Merge pull request #5071 from TimothyZhang7/feature/queen-bee
Release / Create Release (push) Waiting to run

Feature/queen bee
This commit is contained in:
Timothy @aden
2026-02-18 20:59:04 -08:00
committed by GitHub
46 changed files with 7655 additions and 430 deletions
+13
View File
@@ -0,0 +1,13 @@
"""Framework-provided agents."""
from pathlib import Path
FRAMEWORK_AGENTS_DIR = Path(__file__).parent
def list_framework_agents() -> list[Path]:
"""List all framework agent directories."""
return sorted(
[p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
key=lambda p: p.name,
)
@@ -0,0 +1,44 @@
"""
Hive Coder Native coding agent that builds Hive agent packages.
Deeply understands the agent framework and produces complete Python packages
with goals, nodes, edges, system prompts, MCP configuration, and tests
from natural language specifications.
"""
from .agent import (
HiveCoderAgent,
conversation_mode,
default_agent,
edges,
entry_node,
entry_points,
goal,
identity_prompt,
loop_config,
nodes,
pause_nodes,
terminal_nodes,
)
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"HiveCoderAgent",
"default_agent",
"goal",
"nodes",
"edges",
"entry_node",
"entry_points",
"pause_nodes",
"terminal_nodes",
"conversation_mode",
"identity_prompt",
"loop_config",
"RuntimeConfig",
"AgentMetadata",
"default_config",
"metadata",
]
@@ -0,0 +1,223 @@
"""CLI entry point for Hive Coder agent."""
import asyncio
import json
import logging
import sys
import click
from .agent import HiveCoderAgent, default_agent
def setup_logging(verbose=False, debug=False):
"""Configure logging for execution visibility."""
if debug:
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose:
level, fmt = logging.INFO, "%(message)s"
else:
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
logging.getLogger("framework").setLevel(level)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Hive Coder — Build Hive agent packages from natural language."""
pass
@cli.command()
@click.option("--request", "-r", type=str, required=True, help="What agent to build")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def run(request, mock, quiet, verbose, debug):
"""Execute agent building from a request."""
if not quiet:
setup_logging(verbose=verbose, debug=debug)
context = {"user_request": request}
result = asyncio.run(default_agent.run(context, mock_mode=mock))
output_data = {
"success": result.success,
"steps_executed": result.steps_executed,
"output": result.output,
}
if result.error:
output_data["error"] = result.error
click.echo(json.dumps(output_data, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def tui(mock, verbose, debug):
"""Launch the TUI dashboard for interactive agent building."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
async def run_with_tui():
agent = HiveCoderAgent()
agent._tool_registry = ToolRegistry()
storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
storage_path.mkdir(parents=True, exist_ok=True)
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
agent._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock:
llm = LiteLLMProvider(
model=agent.config.model,
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
tools = list(agent._tool_registry.get_tools().values())
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
runtime = create_agent_runtime(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Build Agent",
entry_node="coder",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_with_tui())
@cli.command()
@click.option("--json", "output_json", is_flag=True)
def info(output_json):
"""Show agent information."""
info_data = default_agent.info()
if output_json:
click.echo(json.dumps(info_data, indent=2))
else:
click.echo(f"Agent: {info_data['name']}")
click.echo(f"Version: {info_data['version']}")
click.echo(f"Description: {info_data['description']}")
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}")
@cli.command()
def validate():
"""Validate agent structure."""
validation = default_agent.validate()
if validation["valid"]:
click.echo("Agent is valid")
if validation["warnings"]:
for warning in validation["warnings"]:
click.echo(f" WARNING: {warning}")
else:
click.echo("Agent has errors:")
for error in validation["errors"]:
click.echo(f" ERROR: {error}")
sys.exit(0 if validation["valid"] else 1)
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
def shell(verbose):
"""Interactive agent building session (CLI, no TUI)."""
asyncio.run(_interactive_shell(verbose))
async def _interactive_shell(verbose=False):
"""Async interactive shell."""
setup_logging(verbose=verbose)
click.echo("=== Hive Coder ===")
click.echo("Describe the agent you want to build (or 'quit' to exit):\n")
agent = HiveCoderAgent()
await agent.start()
try:
while True:
try:
request = await asyncio.get_event_loop().run_in_executor(None, input, "Build> ")
if request.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
if not request.strip():
continue
click.echo("\nBuilding agent...\n")
result = await agent.trigger_and_wait("default", {"user_request": request})
if result is None:
click.echo("\n[Execution timed out]\n")
continue
if result.success:
output = result.output or {}
agent_name = output.get("agent_name", "unknown")
validation = output.get("validation_result", "unknown")
click.echo(f"\nAgent '{agent_name}' built. Validation: {validation}\n")
else:
click.echo(f"\nBuild failed: {result.error}\n")
except KeyboardInterrupt:
click.echo("\nGoodbye!")
break
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
await agent.stop()
if __name__ == "__main__":
cli()
+314
View File
@@ -0,0 +1,314 @@
"""Agent graph construction for Hive Coder."""
from pathlib import Path
from framework.graph import Constraint, Goal, SuccessCriterion
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import coder_node
# Goal definition
goal = Goal(
id="agent-builder",
name="Hive Agent Builder",
description=(
"Build complete, validated Hive agent packages from natural language "
"specifications. Produces production-ready Python packages with goals, "
"nodes, edges, system prompts, MCP configuration, and tests."
),
success_criteria=[
SuccessCriterion(
id="valid-package",
description="Generated agent package passes structural validation",
metric="validation_pass",
target="true",
weight=0.30,
),
SuccessCriterion(
id="complete-files",
description=(
"All required files generated: agent.py, config.py, "
"nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
),
metric="file_count",
target=">=6",
weight=0.25,
),
SuccessCriterion(
id="user-satisfaction",
description="User reviews and approves the generated agent",
metric="user_approval",
target="true",
weight=0.25,
),
SuccessCriterion(
id="framework-compliance",
description=(
"Generated code follows framework patterns: STEP 1/STEP 2 "
"for client-facing, correct imports, entry_points format"
),
metric="pattern_compliance",
target="100%",
weight=0.20,
),
],
constraints=[
Constraint(
id="dynamic-tool-discovery",
description=(
"Always discover available tools dynamically via "
"discover_mcp_tools before referencing tools in agent designs"
),
constraint_type="hard",
category="correctness",
),
Constraint(
id="no-fabricated-tools",
description="Only reference tools that exist in hive-tools MCP",
constraint_type="hard",
category="correctness",
),
Constraint(
id="valid-python",
description="All generated Python files must be syntactically correct",
constraint_type="hard",
category="correctness",
),
Constraint(
id="self-verification",
description="Run validation after writing code; fix errors before presenting",
constraint_type="hard",
category="quality",
),
],
)
# Nodes — single coder node (guardian is now auto-attached by the framework)
nodes = [coder_node]
# No edges needed — single forever-alive event_loop node
edges = []
# Graph configuration
entry_node = "coder"
entry_points = {"start": "coder"}
pause_nodes = []
terminal_nodes = [] # Forever-alive: loops until user exits
# No async entry points — guardian is now auto-attached via attach_guardian()
async_entry_points = []
# Module-level variables read by AgentRunner.load()
conversation_mode = "continuous"
identity_prompt = (
"You are Hive Coder, the best agent-building coding agent on the planet. "
"You deeply understand the Hive agent framework at the source code level "
"and produce production-ready agent packages from natural language. "
"You can dynamically discover available framework tools, inspect runtime "
"sessions and checkpoints from agents you build, and run their test suites. "
"You follow coding agent discipline: read before writing, verify "
"assumptions by reading actual code, adhere to project conventions, "
"self-verify with validation, and fix your own errors. You are concise, "
"direct, and technically rigorous. No emojis. No fluff."
)
loop_config = {
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_history_tokens": 32000,
}
class HiveCoderAgent:
"""
Hive Coder builds Hive agent packages from natural language.
Single-node architecture: the coder runs in a continuous while(true) loop.
The guardian watchdog is auto-attached by the framework in TUI mode.
"""
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self.async_entry_points = async_entry_points
self._graph: GraphSpec | None = None
self._agent_runtime: AgentRuntime | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
def _build_graph(self) -> GraphSpec:
"""Build the GraphSpec."""
return GraphSpec(
id="hive-coder-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config=loop_config,
conversation_mode=conversation_mode,
identity_prompt=identity_prompt,
async_entry_points=self.async_entry_points,
)
def _setup(self, mock_mode=False) -> None:
"""Set up the agent runtime."""
self._storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
self._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock_mode:
llm = LiteLLMProvider(
model=self.config.model,
api_key=self.config.api_key,
api_base=self.config.api_base,
)
tool_executor = self._tool_registry.get_executor()
tools = list(self._tool_registry.get_tools().values())
self._graph = self._build_graph()
checkpoint_config = CheckpointConfig(
enabled=True,
checkpoint_on_node_start=False,
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True,
)
entry_point_specs = [
EntryPointSpec(
id="default",
name="Default",
entry_node=self.entry_node,
trigger_type="manual",
isolation_level="shared",
),
]
self._agent_runtime = create_agent_runtime(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
entry_points=entry_point_specs,
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=checkpoint_config,
graph_id="hive_coder",
)
async def start(self, mock_mode=False) -> None:
"""Set up and start the agent runtime."""
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime and clean up."""
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(
self,
entry_point: str = "default",
input_data: dict | None = None,
timeout: float | None = None,
session_state: dict | None = None,
) -> ExecutionResult | None:
"""Execute the graph and wait for completion."""
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point,
input_data=input_data or {},
session_state=session_state,
)
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
"""Get agent information."""
return {
"name": metadata.name,
"version": metadata.version,
"description": metadata.description,
"goal": {
"name": self.goal.name,
"description": self.goal.description,
},
"nodes": [n.id for n in self.nodes],
"edges": [e.id for e in self.edges],
"entry_node": self.entry_node,
"entry_points": self.entry_points,
"pause_nodes": self.pause_nodes,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
"""Validate agent structure."""
errors = []
warnings = []
node_ids = {node.id for node in self.nodes}
for edge in self.edges:
if edge.source not in node_ids:
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
if edge.target not in node_ids:
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for terminal in self.terminal_nodes:
if terminal not in node_ids:
errors.append(f"Terminal node '{terminal}' not found")
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
}
# Create default instance
default_agent = HiveCoderAgent()
@@ -0,0 +1,51 @@
"""Runtime configuration for Hive Coder agent."""
import json
from dataclasses import dataclass, field
from pathlib import Path
def _load_preferred_model() -> str:
"""Load preferred model from ~/.hive/configuration.json."""
config_path = Path.home() / ".hive" / "configuration.json"
if config_path.exists():
try:
with open(config_path) as f:
config = json.load(f)
llm = config.get("llm", {})
if llm.get("provider") and llm.get("model"):
return f"{llm['provider']}/{llm['model']}"
except Exception:
pass
return "anthropic/claude-sonnet-4-20250514"
@dataclass
class RuntimeConfig:
model: str = field(default_factory=_load_preferred_model)
temperature: float = 0.7
max_tokens: int = 40000
api_key: str | None = None
api_base: str | None = None
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "Hive Coder"
version: str = "1.0.0"
description: str = (
"Native coding agent that builds production-ready Hive agent packages "
"from natural language specifications. Deeply understands the agent framework "
"and produces complete Python packages with goals, nodes, edges, system prompts, "
"MCP configuration, and tests."
)
intro_message: str = (
"I'm Hive Coder — I build Hive agents. Describe what kind of agent "
"you want to create and I'll design, implement, and validate it for you."
)
metadata = AgentMetadata()
@@ -0,0 +1,96 @@
"""Attach the Hive Coder's guardian node to any agent runtime.
Usage::
from framework.agents.hive_coder.guardian import attach_guardian
runner._setup()
attach_guardian(runner._agent_runtime, runner._tool_registry)
await runner._agent_runtime.start()
Must be called **before** ``runtime.start()`` it injects the
guardian node into the graph and registers an event-driven entry point.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime
from framework.runtime.execution_stream import EntryPointSpec
from .nodes import ALL_GUARDIAN_TOOLS, guardian_node
logger = logging.getLogger(__name__)
GUARDIAN_ENTRY_POINT = EntryPointSpec(
id="guardian",
name="Agent Guardian",
entry_node="guardian",
trigger_type="event",
trigger_config={
"event_types": [
"execution_failed",
"node_stalled",
"node_tool_doom_loop",
"constraint_violation",
],
"exclude_own_graph": False,
},
isolation_level="shared",
)
def attach_guardian(
runtime: AgentRuntime,
tool_registry: ToolRegistry,
) -> None:
"""Inject hive_coder's guardian node into *runtime*'s graph.
1. Registers graph lifecycle tools if not already present.
2. Refreshes the runtime's tool list and executor.
3. Adds the guardian node (with dynamically filtered tools) to the graph.
4. Registers an event-driven entry point that fires on execution failures,
stalls, tool doom loops, and constraint violations.
Must be called **before** ``runtime.start()``.
Raises:
RuntimeError: If the runtime is already running.
"""
from framework.tools.session_graph_tools import register_graph_tools
# 1. Register graph lifecycle tools if not already present
if not tool_registry.has_tool("load_agent"):
register_graph_tools(tool_registry, runtime)
# 2. Refresh tool schemas and executor on the runtime
runtime._tools = list(tool_registry.get_tools().values())
runtime._tool_executor = tool_registry.get_executor()
# 3. Filter guardian tools to only those available in the registry
available = set(tool_registry.get_tools().keys())
filtered_tools = [t for t in ALL_GUARDIAN_TOOLS if t in available]
# Build guardian node with filtered tool list
node = guardian_node.model_copy(update={"tools": filtered_tools})
# Add to the runtime's graph (so register_entry_point validation passes)
runtime.graph.nodes.append(node)
# Mark guardian as reachable in graph-level entry_points so
# GraphSpec.validate() doesn't flag it as unreachable.
runtime.graph.entry_points["guardian"] = "guardian"
# 4. Register event-driven entry point
runtime.register_entry_point(GUARDIAN_ENTRY_POINT)
logger.info(
"Guardian attached with %d tools: %s",
len(filtered_tools),
filtered_tools,
)
@@ -0,0 +1,9 @@
{
"coder-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "coder_tools_server.py", "--stdio"],
"cwd": "../../../../tools",
"description": "Unsandboxed file system tools for code generation and validation"
}
}
@@ -0,0 +1,556 @@
"""Node definitions for Hive Coder agent."""
from framework.graph import NodeSpec
# Single node — like opencode's while(true) loop.
# One continuous context handles the entire workflow:
# discover → design → implement → verify → present → iterate.
coder_node = NodeSpec(
id="coder",
name="Hive Coder",
description=(
"Autonomous coding agent that builds Hive agent packages. "
"Handles the full lifecycle: understanding user intent, "
"designing architecture, writing code, validating, and "
"iterating on feedback — all in one continuous conversation."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["user_request"],
output_keys=["agent_name", "validation_result"],
success_criteria=(
"A complete, validated Hive agent package exists at "
"exports/{agent_name}/ and passes structural validation."
),
system_prompt="""\
You are Hive Coder, the best agent-building coding agent. You build \
production-ready Hive agent packages from natural language.
# Core Mandates
- **Read before writing.** NEVER write code from assumptions. Read \
reference agents and templates first. Read every file before editing.
- **Conventions first.** Follow existing project patterns exactly. \
Analyze imports, structure, and style in reference agents.
- **Verify assumptions.** Never assume a class, import, or pattern \
exists. Read actual source to confirm. Search if unsure.
- **Discover tools dynamically.** NEVER reference tools from static \
docs. Always run discover_mcp_tools() to see what actually exists.
- **Professional objectivity.** If a use case is a poor fit for the \
framework, say so. Technical accuracy over validation.
- **Concise.** No emojis. No preambles. No postambles. Substance only.
- **Self-verify.** After writing code, run validation and tests. Fix \
errors yourself. Don't declare success until validation passes.
# Tools
## File I/O
- read_file(path, offset?, limit?) read with line numbers
- write_file(path, content) create/overwrite, auto-mkdir
- edit_file(path, old_text, new_text, replace_all?) fuzzy-match edit
- list_directory(path, recursive?) list contents
- search_files(pattern, path?, include?) regex search
- run_command(command, cwd?, timeout?) shell execution
- undo_changes(path?) restore from git snapshot
## Meta-Agent
- discover_mcp_tools(server_config_path?) connect to MCP servers \
and list all available tools with full schemas. Default: hive-tools.
- list_agents() list all agent packages in exports/ with session counts
- list_agent_sessions(agent_name, status?, limit?) list sessions
- get_agent_session_state(agent_name, session_id) full session state
- get_agent_session_memory(agent_name, session_id, key?) memory data
- list_agent_checkpoints(agent_name, session_id) list checkpoints
- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) load checkpoint
- run_agent_tests(agent_name, test_types?, fail_fast?) run pytest with parsing
# Meta-Agent Capabilities
You are not just a file writer. You have deep integration with the \
Hive framework:
## Tool Discovery (MANDATORY before designing)
Before designing any agent, run discover_mcp_tools() to see what \
tools are actually available from the hive-tools MCP server. This \
returns full schemas with parameter names, types, and descriptions. \
NEVER guess tool names or parameters from memory. The tool catalog \
is the ground truth.
To check a specific agent's tools:
discover_mcp_tools("exports/{agent_name}/mcp_servers.json")
## Agent Awareness
Run list_agents() to see what agents already exist. Read their code \
for patterns:
read_file("exports/{name}/agent.py")
read_file("exports/{name}/nodes/__init__.py")
## Post-Build Testing
After writing agent code, validate structurally AND run tests:
run_command("python -c 'from {name} import default_agent; \\
print(default_agent.validate())'")
run_agent_tests("{name}")
## Debugging Built Agents
When a user says "my agent is failing" or "debug this agent":
1. list_agent_sessions("{agent_name}") find the session
2. get_agent_session_state("{agent_name}", "{session_id}") see status
3. get_agent_session_memory("{agent_name}", "{session_id}") inspect data
4. list_agent_checkpoints / get_agent_checkpoint trace execution
# Workflow
You operate in a continuous loop. The user describes what they want, \
you build it. No rigid phases use judgment. But the general flow is:
## 1. Understand
When the user describes what they want to build, hear the structure:
- The actors, the trigger, the core loop, the output, the pain.
Play back a model: "Here's what I'm picturing: [concrete picture]. \
Before I start [1-2 questions you can't infer]."
Ask only what you CANNOT infer. Fill blanks with domain knowledge.
## 2. Qualify
Assess framework fit honestly. Run discover_mcp_tools() to check \
what tools exist. Read the framework guide:
read_file("core/framework/agents/hive_coder/reference/framework_guide.md")
Consider:
- What works well (multi-turn, HITL, tool orchestration)
- Limitations (LLM latency, context limits, cost)
- Deal-breakers (missing tools, wrong paradigm)
Give a clear recommendation: proceed, adjust scope, or reconsider.
## 3. Design
Design the agent architecture:
- Goal: id, name, description, 3-5 success criteria, 2-4 constraints
- Nodes: **2-4 nodes MAXIMUM** (see rules below)
- Edges: on_success for linear, conditional for routing
- Lifecycle: ALWAYS forever-alive (`terminal_nodes=[]`) unless the user \
explicitly requests a one-shot/batch agent. Forever-alive agents loop \
continuously the user exits by closing the TUI. This is the standard \
pattern for all interactive agents.
### Node Count Rules (HARD LIMITS)
**2-4 nodes** for all agents. Never exceed 4 unless the user explicitly \
requests more. Each node boundary serializes outputs to shared memory \
and DESTROYS all in-context information (tool results, reasoning, history).
**MERGE nodes when:**
- Node has NO tools (pure LLM reasoning) merge into predecessor/successor
- Node sets only 1 trivial output collapse into predecessor
- Multiple consecutive autonomous nodes combine into one rich node
- A "report" or "summary" node merge into the client-facing node
- A "confirm" or "schedule" node that calls no external service remove
**SEPARATE nodes only when:**
- Client-facing vs autonomous (different interaction models)
- Fundamentally different tool sets
- Fan-out parallelism (parallel branches MUST be separate)
**Typical patterns:**
- 2 nodes: `interact (client-facing) process (autonomous) interact`
- 3 nodes: `intake (CF) process (auto) review (CF) intake`
- WRONG: 7 nodes where half have no tools and just do LLM reasoning
Read reference agents before designing:
list_agents()
read_file("exports/deep_research_agent/agent.py")
read_file("exports/deep_research_agent/nodes/__init__.py")
Present the design with ASCII art graph. Get user approval.
## 4. Implement
Read templates before writing code:
read_file("core/framework/agents/hive_coder/reference/file_templates.md")
read_file("core/framework/agents/hive_coder/reference/anti_patterns.md")
Write files in order:
1. mkdir -p exports/{name}/nodes exports/{name}/tests
2. config.py RuntimeConfig + AgentMetadata
3. nodes/__init__.py NodeSpec definitions with system prompts
4. agent.py Goal, edges, graph, agent class
5. __init__.py package exports
6. __main__.py CLI with click
7. mcp_servers.json tool server config
8. tests/ fixtures
### Critical Rules
**Imports** (must match exactly only import what you use):
```python
from framework.graph import (
NodeSpec, EdgeSpec, EdgeCondition,
Goal, SuccessCriterion, Constraint,
)
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import (
AgentRuntime, create_agent_runtime,
)
from framework.runtime.execution_stream import EntryPointSpec
```
For agents with async entry points (timers, webhooks, events), also add:
```python
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import (
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
)
```
NEVER `from core.framework...` PYTHONPATH includes core/.
**__init__.py MUST re-export ALL module-level variables** \
(THIS IS THE #1 SOURCE OF AGENT LOAD FAILURES):
The runner imports the package (__init__.py), NOT agent.py. It reads \
goal, nodes, edges, entry_node, entry_points, pause_nodes, \
terminal_nodes, conversation_mode, identity_prompt, loop_config via \
getattr(). If ANY are missing from __init__.py, they silently default \
to None or {} causing "must define goal, nodes, edges" or "node X \
is unreachable" errors. The __init__.py MUST import and re-export \
ALL of these from .agent:
```python
from .agent import (
MyAgent, default_agent, goal, nodes, edges,
entry_node, entry_points, pause_nodes, terminal_nodes,
conversation_mode, identity_prompt, loop_config,
)
```
**entry_points**: `{"start": "first-node-id"}`
For agents with multiple entry points (e.g. a reminder trigger), \
add them: `{"start": "intake", "reminder": "reminder"}`
**conversation_mode** ONLY two valid values:
- `"continuous"` recommended for interactive agents (context carries \
across node transitions)
- Omit entirely for isolated per-node conversations
NEVER use: "client_facing", "interactive", "adaptive", or any other \
value. These DO NOT EXIST.
**loop_config** ONLY three valid keys:
```python
loop_config = {
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_history_tokens": 32000,
}
```
NEVER add: "strategy", "mode", "timeout", or other keys.
**mcp_servers.json**:
```json
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": "../../tools"
}
}
```
NO "mcpServers" wrapper. cwd "../../tools". command "uv".
**Storage**: `Path.home() / ".hive" / "agents" / "{name}"`
**Client-facing system prompts** STEP 1/STEP 2 pattern:
```
STEP 1 Present to user (text only, NO tool calls):
[instructions]
STEP 2 After user responds, call set_output:
[set_output calls]
```
**Autonomous system prompts** set_output in SEPARATE turn.
**Tools** NEVER fabricate tool names. Common hallucinations: \
csv_read, csv_write, csv_append, file_upload, database_query. \
If discover_mcp_tools() shows these don't exist, use alternatives \
(e.g. save_data/load_data for data persistence).
**Node rules**:
- **2-4 nodes MAX.** Never exceed 4. Merge thin nodes aggressively.
- A node with 0 tools is NOT a real node merge it.
- node_type always "event_loop"
- max_node_visits default is 0 (unbounded) correct for forever-alive. \
Only set >0 in one-shot agents with bounded feedback loops.
- Feedback inputs: nullable_output_keys
- terminal_nodes=[] for forever-alive (the default)
- Every node MUST have at least one outgoing edge (no dead ends)
- Agents are forever-alive unless user explicitly asks for one-shot
**Agent class**: CamelCase name, default_agent at module level. \
Constructor takes `config=None`. Follow the exact pattern in \
file_templates.md do NOT invent constructor params like \
`llm_provider` or `tool_registry`.
**Module-level variables** (read by AgentRunner.load()):
goal, nodes, edges, entry_node, entry_points, pause_nodes,
terminal_nodes, conversation_mode, identity_prompt, loop_config
For agents with async triggers, also export:
async_entry_points, runtime_config
**Async entry points** (timers, webhooks, events):
When an agent needs scheduled tasks, webhook reactions, or event-driven \
triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \
`AgentRuntimeConfig` (from framework.runtime.agent_runtime):
- Timer (cron): `trigger_type="timer"`, \
`trigger_config={"cron": "0 9 * * *"}` standard 5-field cron expression \
(e.g. `"0 9 * * MON-FRI"` weekdays 9am, `"*/30 * * * *"` every 30 min)
- Timer (interval): `trigger_type="timer"`, \
`trigger_config={"interval_minutes": 20, "run_immediately": False}`
- Event (for webhooks): `trigger_type="event"`, \
`trigger_config={"event_types": ["webhook_received"]}`
- `isolation_level="shared"` so async runs can read primary session memory
- `runtime_config = AgentRuntimeConfig(webhook_routes=[...])` for HTTP webhooks
- Reference: `exports/gmail_inbox_guardian/agent.py`
- Full docs: `core/framework/agents/hive_coder/reference/framework_guide.md` \
(Async Entry Points section)
## 5. Verify
Run THREE validation steps after writing. All must pass:
**Step A Class validation** (checks graph structure):
```
run_command("python -c 'from {name} import default_agent; \\
print(default_agent.validate())'")
```
**Step B Runner load test** (checks package export contract \
THIS IS THE SAME PATH THE TUI USES):
```
run_command("python -c 'from framework.runner.runner import \\
AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
print(\"AgentRunner.load: OK\")'")
```
This catches missing __init__.py exports, bad conversation_mode, \
invalid loop_config, and unreachable nodes. If Step A passes but \
Step B fails, the problem is in __init__.py exports.
**Step C Run tests:**
```
run_agent_tests("{name}")
```
If anything fails: read error, fix with edit_file, re-validate. Up to 3x.
**CRITICAL: Testing forever-alive agents**
Most agents use `terminal_nodes=[]` (forever-alive). This means \
`runner.run()` NEVER returns it hangs forever waiting for a \
terminal node that doesn't exist. Agent tests MUST be structural:
- Validate graph, node specs, edges, tools, prompts
- Check goal/constraints/success criteria definitions
- Test `AgentRunner.load()` + `_setup()` (skip if no API key)
- NEVER call `runner.run()` or `trigger_and_wait()` in tests for \
forever-alive agents they will hang and time out.
When you restructure an agent (change nodes/edges), always update \
the tests to match. Stale tests referencing old node names will fail.
## 6. Present
Show the user what you built: agent name, goal summary, graph ASCII \
art, files created, validation status. Offer to revise or build another.
After user confirms satisfaction:
set_output("agent_name", "the_agent_name")
set_output("validation_result", "valid")
If building another agent, just start the loop again no need to \
set_output until the user is done.
## 7. Live Test (optional)
After the user approves, offer to load and run the agent in-session. \
This runs it alongside you, with the Agent Guardian watching for \
failures automatically.
```
load_agent("exports/{name}") # registers as secondary graph
start_agent("{name}") # triggers default entry point
```
If the agent fails, the guardian fires and triages. You can also:
- `list_agents()` see all loaded graphs and status
- `restart_agent("{name}")` then `load_agent` pick up code changes
- `unload_agent("{name}")` remove it from the session
- `get_user_presence()` check if user is around
The agent runs in a shared session: it can read memory you've set and \
its outputs are visible to you. If the guardian escalates a failure, \
you'll see the error and can fix the code, then reload.
""",
tools=[
"read_file",
"write_file",
"edit_file",
"list_directory",
"search_files",
"run_command",
"undo_changes",
# Meta-agent tools
"discover_mcp_tools",
"list_agents",
"list_agent_sessions",
"get_agent_session_state",
"get_agent_session_memory",
"list_agent_checkpoints",
"get_agent_checkpoint",
"run_agent_tests",
# Graph lifecycle tools (multi-graph sessions)
"load_agent",
"unload_agent",
"start_agent",
"restart_agent",
"get_user_presence",
],
)
ALL_GUARDIAN_TOOLS = [
# File I/O — available when the agent has hive-tools MCP
"read_file",
"write_file",
"edit_file",
"search_files",
"run_command",
# Graph lifecycle — registered by attach_guardian()
"load_agent",
"unload_agent",
"start_agent",
"restart_agent",
"get_user_presence",
"list_agents",
]
guardian_node = NodeSpec(
id="guardian",
name="Agent Guardian",
description=(
"Event-driven guardian that monitors supervised agent graphs. "
"Triggers on failures, stalls, tool doom loops, and constraint "
"violations. Assesses severity, checks user presence, and decides: "
"ask the user (if present), attempt autonomous fix (if away), or "
"escalate for post-mortem."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["event"],
output_keys=["resolution"],
nullable_output_keys=["resolution"],
success_criteria=(
"Failure is resolved — either by user guidance, autonomous fix, or documented escalation."
),
system_prompt="""\
You are the Agent Guardian a watchdog that monitors supervised agent \
graphs. You fire on failures, stalls, doom loops, and constraint \
violations. Your job: triage, fix, or escalate.
# Event Types
You trigger on these events:
## execution_failed
The agent graph crashed unhandled exception, LLM error, or tool failure.
- Read the error message and stack trace from the event data.
- Transient errors (rate limit, timeout, network): auto-retry via restart.
- Config errors (bad API key, missing tool): needs user input.
- Logic bugs (bad output, crash in code): read source, fix, reload.
- Catastrophic (data corruption): escalate, unload the agent.
## node_stalled
A node has been running too long without producing output. The LLM may \
be stuck in a reasoning loop, waiting for input that won't come, or \
the tool call is hanging.
- Check what node is stalled and how long it's been running.
- If the node is autonomous: restart the agent to break the stall.
- If the node is client-facing: check user presence the user may \
have left. Alert them or restart after a timeout.
- If a tool call is hanging: the MCP server may be down. Restart.
## node_tool_doom_loop
The LLM is calling the same tools repeatedly without making progress. \
This usually means the prompt is inadequate, the tool is returning \
unhelpful errors, or the LLM is stuck in a retry loop.
- Identify which tool is looping and what errors it's returning.
- If it's a transient tool error: restart to reset context.
- If it's a prompt/logic issue: read the node's source, fix the \
system prompt or tool configuration, then reload and restart.
- If the tool itself is broken: unload and escalate.
## constraint_violation
The agent violated a defined constraint (e.g., token budget exceeded, \
forbidden action attempted, output format invalid).
- Read which constraint was violated from the event data.
- Soft constraints (budget warning): log and notify user.
- Hard constraints (forbidden action): halt the agent immediately, \
escalate to user.
- Format violations: may be fixable by restarting with better context.
# Decision Protocol
1. **Identify the event type** and read the event data carefully.
2. **Assess severity:**
- Transient / auto-recoverable -> auto-retry
- Configuration / environment -> needs user input
- Logic bug / prompt issue -> needs code fix
- Catastrophic / safety -> escalate immediately
3. **Check user presence.** Call get_user_presence().
- **present** (idle < 2 min): Ask the user for guidance. Present the \
issue clearly and suggest options.
- **idle** (2-10 min): Attempt autonomous fix first. If it fails, \
queue a notification for when user returns.
- **away** (> 10 min) or **never_seen**: Attempt autonomous fix. \
Save escalation log via write_file if fix fails.
4. **Act.**
- Auto-retry: restart_agent(graph_id), then start_agent.
- Config issues: if user present, ask. If away, log and wait.
- Code fixes: read source, fix with edit_file, restart_agent.
- Escalation: save detailed log, unload the agent.
# Tools
- get_user_presence() -- check if user is active
- list_agents() -- see loaded graphs and status
- load_agent(path) -- load an agent graph
- unload_agent(graph_id) -- remove a graph
- start_agent(graph_id, entry_point, input_data) -- trigger execution
- restart_agent(graph_id) -- unload for reload
- read_file, write_file, edit_file -- inspect/fix agent source code \
(available when the agent's MCP server provides them)
- run_command -- run shell commands (available when provided by MCP)
# Rules
- Be concise. State the event type, your assessment, and your action.
- If asking the user, present the issue and 2-3 concrete options.
- After a fix attempt, verify it works before declaring success.
- For doom loops and stalls, prefer restart first it's the cheapest fix.
- set_output("resolution", "...") only after the issue is resolved or \
escalated. Use a brief description: "auto-fixed: retry after timeout", \
"escalated: missing API key", "user-resolved: updated config", \
"auto-fixed: restarted stalled node", "escalated: doom loop in tool X".
""",
# Placeholder — attach_guardian() replaces with filtered list at runtime
tools=ALL_GUARDIAN_TOOLS,
)
__all__ = ["coder_node", "guardian_node", "ALL_GUARDIAN_TOOLS"]
@@ -0,0 +1,107 @@
# Common Mistakes When Building Hive Agents
## Critical Errors
1. **Using tools that don't exist** — Always verify tools are available in the hive-tools MCP server before assigning them to nodes. Never guess tool names.
2. **Wrong entry_points format** — MUST be `{"start": "first-node-id"}`. NOT a set, NOT `{node_id: [keys]}`.
3. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
4. **Missing STEP 1/STEP 2 in client-facing prompts** — Without explicit phases, the LLM calls set_output before the user responds. Always use the pattern.
5. **Forgetting nullable_output_keys** — When a node receives inputs from multiple edges and some inputs only arrive on certain edges (e.g., feedback), mark those as nullable. Without this, the executor blocks waiting for a value that will never arrive.
6. **Creating dead-end nodes in forever-alive graphs** — Every node must have at least one outgoing edge. A node with no outgoing edges ends the execution, breaking the loop.
7. **Setting max_node_visits to a non-zero value in forever-alive agents** — The framework default is `max_node_visits=0` (unbounded). Setting it to any positive value (e.g., 1) means the node stops executing after that many visits, silently breaking the forever-alive loop. Only set `max_node_visits > 0` in one-shot agents with feedback loops that need bounded retries.
7. **Missing module-level exports in `__init__.py`** — The runner loads agents via `importlib.import_module(package_name)`, which imports `__init__.py`. It then reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `pause_nodes`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. If ANY of these are missing from `__init__.py`, they default to `None` or `{}` — causing "must define goal, nodes, edges" errors or "node X is unreachable" validation failures. **ALL module-level variables from agent.py must be re-exported in `__init__.py`.**
## Value Errors
8. **Invalid `conversation_mode` value** — Only two valid values: `"continuous"` (recommended for interactive agents) or omit entirely (for isolated per-node conversations). Values like `"client_facing"`, `"interactive"`, `"adaptive"` do NOT exist and will cause runtime errors.
9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.
10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `discover_mcp_tools()`. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
## Design Errors
11. **Too many thin nodes** — Hard limit: **2-4 nodes** for most agents. Each node boundary serializes outputs to shared memory and loses all in-context information (tool results, intermediate reasoning, conversation history). A node with 0 tools that just does LLM reasoning is NOT a real node — merge it into its predecessor or successor.
**Merge when:**
- Node has NO tools — pure LLM reasoning belongs in the node that produces or consumes its data
- Node sets only 1 trivial output (e.g., `set_output("done", "true")`) — collapse into predecessor
- Multiple consecutive autonomous nodes with same/similar tools — combine into one
- A "report" or "summary" node that just presents analysis — merge into the client-facing node
- A "schedule" or "confirm" node that doesn't actually schedule anything — remove entirely
**Keep separate when:**
- Client-facing vs autonomous — different interaction models require separate nodes
- Fundamentally different tool sets (e.g., web search vs file I/O)
- Fan-out parallelism — parallel branches MUST be separate nodes
**Bad example** (7 nodes — WAY too many):
```
profile_setup → daily_intake → update_tracker → analyze_progress → generate_plan → schedule_reminders → report
```
`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
**Good example** (3 nodes):
```
intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back)
```
One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved.
12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
13. **Not using continuous conversation mode** — Interactive agents should use `conversation_mode="continuous"`. Without it, each node starts with blank context.
14. **Adding terminal nodes by default** — ALL agents should use `terminal_nodes=[]` (forever-alive) unless the user explicitly requests a one-shot/batch agent. Forever-alive is the standard pattern. Every node must have at least one outgoing edge. Dead-end nodes break the loop.
15. **Calling set_output in same turn as tool calls** — Instruct the LLM to call set_output in a SEPARATE turn from real tool calls.
## File Template Errors
16. **Wrong import paths** — Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`. The PYTHONPATH includes `core/`.
17. **Missing storage path** — Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
18. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
19. **Bare `python` command in mcp_servers.json** — Use `"command": "uv"` with args `["run", "python", ...]`.
## Testing Errors
20. **Using `runner.run()` on forever-alive agents**`runner.run()` calls `trigger_and_wait()` which blocks until the graph reaches a terminal node. Forever-alive agents have `terminal_nodes=[]`, so **`runner.run()` hangs forever**. This is the #1 cause of stuck test suites.
**For forever-alive agents, write structural tests instead:**
- Validate graph structure (nodes, edges, entry points)
- Verify node specs (tools, prompts, client-facing flag)
- Check goal/constraints/success criteria definitions
- Test that `AgentRunner.load()` + `_setup()` succeeds (skip if no API key)
**What NOT to do:**
```python
# WRONG — hangs forever on forever-alive agents
result = await runner.run({"topic": "quantum computing"})
```
**Correct pattern for structure tests:**
```python
def test_research_has_web_tools(self):
assert "web_search" in research_node.tools
def test_research_routes_back_to_interact(self):
edges_to_interact = [e for e in edges if e.source == "research" and e.target == "interact"]
assert edges_to_interact
```
21. **Stale tests after agent restructuring** — When you change an agent's node count or names (e.g., 4 nodes → 2 nodes), the tests MUST be updated too. Tests referencing old node names (e.g., `"review"`, `"report"`) will fail or hang. Always check that test assertions match the current `nodes/__init__.py`.
22. **Running full integration tests without API keys** — Structural tests (validate, import) work without keys. Full integration tests need `ANTHROPIC_API_KEY`. Use `pytest.skip()` in the runner fixture when `_setup()` fails due to missing credentials.
23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
@@ -0,0 +1,597 @@
# Agent File Templates
Complete code templates for each file in a Hive agent package.
## config.py
```python
"""Runtime configuration."""
import json
from dataclasses import dataclass, field
from pathlib import Path
def _load_preferred_model() -> str:
"""Load preferred model from ~/.hive/configuration.json."""
config_path = Path.home() / ".hive" / "configuration.json"
if config_path.exists():
try:
with open(config_path) as f:
config = json.load(f)
llm = config.get("llm", {})
if llm.get("provider") and llm.get("model"):
return f"{llm['provider']}/{llm['model']}"
except Exception:
pass
return "anthropic/claude-sonnet-4-20250514"
@dataclass
class RuntimeConfig:
model: str = field(default_factory=_load_preferred_model)
temperature: float = 0.7
max_tokens: int = 40000
api_key: str | None = None
api_base: str | None = None
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "My Agent Name"
version: str = "1.0.0"
description: str = "What this agent does."
intro_message: str = "Welcome! What would you like me to do?"
metadata = AgentMetadata()
```
## nodes/__init__.py
```python
"""Node definitions for My Agent."""
from framework.graph import NodeSpec
# Node 1: Intake (client-facing)
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
node_type="event_loop",
client_facing=True,
max_node_visits=0, # Unlimited for forever-alive
input_keys=["topic"],
output_keys=["brief"],
success_criteria="The brief is specific and actionable.",
system_prompt="""\
You are an intake specialist.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If vague, ask 1-2 clarifying questions
3. If clear, confirm your understanding
**STEP 2 — After the user confirms, call set_output:**
- set_output("brief", "Clear description of what to do")
""",
tools=[],
)
# Node 2: Worker (autonomous)
worker_node = NodeSpec(
id="worker",
name="Worker",
description="Do the main work",
node_type="event_loop",
max_node_visits=0,
input_keys=["brief", "feedback"],
output_keys=["results"],
nullable_output_keys=["feedback"], # Only on feedback edge
success_criteria="Results are complete and accurate.",
system_prompt="""\
You are a worker agent. Given a brief, do the work.
If feedback is provided, this is a follow-up — address the feedback.
Work in phases:
1. Use tools to gather/process data
2. Analyze results
3. Call set_output for each key in a SEPARATE turn:
- set_output("results", "structured results")
""",
tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
)
# Node 3: Review (client-facing)
review_node = NodeSpec(
id="review",
name="Review",
description="Present results for user approval",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["results", "brief"],
output_keys=["next_action", "feedback"],
nullable_output_keys=["feedback"],
success_criteria="User has reviewed and decided next steps.",
system_prompt="""\
Present the results to the user.
**STEP 1 — Present (text only, NO tool calls):**
1. Summary of work done
2. Key results
3. Ask: satisfied, or want changes?
**STEP 2 — After user responds, call set_output:**
- set_output("next_action", "new_topic") — if starting fresh
- set_output("next_action", "revise") — if changes needed
- set_output("feedback", "what to change") — only if revising
""",
tools=[],
)
__all__ = ["intake_node", "worker_node", "review_node"]
```
## agent.py
```python
"""Agent graph construction for My Agent."""
from pathlib import Path
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import intake_node, worker_node, review_node
# Goal definition
goal = Goal(
id="my-agent-goal",
name="My Agent Goal",
description="What this agent achieves.",
success_criteria=[
SuccessCriterion(id="sc-1", description="...", metric="...", target="...", weight=0.5),
SuccessCriterion(id="sc-2", description="...", metric="...", target="...", weight=0.5),
],
constraints=[
Constraint(id="c-1", description="...", constraint_type="hard", category="quality"),
],
)
# Node list
nodes = [intake_node, worker_node, review_node]
# Edge definitions
edges = [
EdgeSpec(id="intake-to-worker", source="intake", target="worker",
condition=EdgeCondition.ON_SUCCESS, priority=1),
EdgeSpec(id="worker-to-review", source="worker", target="review",
condition=EdgeCondition.ON_SUCCESS, priority=1),
# Feedback loop
EdgeSpec(id="review-to-worker", source="review", target="worker",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'revise'", priority=2),
# Loop back for new topic
EdgeSpec(id="review-to-intake", source="review", target="intake",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'new_topic'", priority=1),
]
# Graph configuration
entry_node = "intake"
entry_points = {"start": "intake"}
pause_nodes = []
terminal_nodes = [] # Forever-alive
# Module-level vars read by AgentRunner.load()
conversation_mode = "continuous"
identity_prompt = "You are a helpful agent."
loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_history_tokens": 32000}
class MyAgent:
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self._graph = None
self._agent_runtime = None
self._tool_registry = None
self._storage_path = None
def _build_graph(self):
return GraphSpec(
id="my-agent-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config=loop_config,
conversation_mode=conversation_mode,
identity_prompt=identity_prompt,
)
def _setup(self, mock_mode=False):
self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config = Path(__file__).parent / "mcp_servers.json"
if mcp_config.exists():
self._tool_registry.load_mcp_config(mcp_config)
llm = None
if not mock_mode:
llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
self._agent_runtime = create_agent_runtime(
graph=self._graph, goal=self.goal, storage_path=self._storage_path,
entry_points=[EntryPointSpec(id="default", name="Default", entry_node=self.entry_node,
trigger_type="manual", isolation_level="shared")],
llm=llm, tools=tools, tool_executor=tool_executor,
checkpoint_config=CheckpointConfig(enabled=True, checkpoint_on_node_complete=True,
checkpoint_max_age_days=7, async_checkpoint=True),
)
async def start(self, mock_mode=False):
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self):
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None):
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)
async def run(self, context, mock_mode=False, session_state=None):
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
return {
"name": metadata.name, "version": metadata.version, "description": metadata.description,
"goal": {"name": self.goal.name, "description": self.goal.description},
"nodes": [n.id for n in self.nodes], "edges": [e.id for e in self.edges],
"entry_node": self.entry_node, "entry_points": self.entry_points,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
errors, warnings = [], []
node_ids = {n.id for n in self.nodes}
for e in self.edges:
if e.source not in node_ids: errors.append(f"Edge {e.id}: source '{e.source}' not found")
if e.target not in node_ids: errors.append(f"Edge {e.id}: target '{e.target}' not found")
if self.entry_node not in node_ids: errors.append(f"Entry node '{self.entry_node}' not found")
for t in self.terminal_nodes:
if t not in node_ids: errors.append(f"Terminal node '{t}' not found")
for ep_id, nid in self.entry_points.items():
if nid not in node_ids: errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
default_agent = MyAgent()
```
## agent.py — Async Entry Points Variant
When an agent needs timers, webhooks, or event-driven triggers, add
`async_entry_points` and optionally `runtime_config` as module-level variables.
These are IN ADDITION to the standard variables above.
```python
# Additional imports for async entry points
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import (
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
)
# ... (goal, nodes, edges, entry_node, entry_points, etc. as above) ...
# Async entry points — event-driven triggers
async_entry_points = [
# Timer with cron: daily at 9am
AsyncEntryPointSpec(
id="daily-check",
name="Daily Check",
entry_node="process-node",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"},
isolation_level="shared",
max_concurrent=1,
),
# Timer with fixed interval: every 20 minutes
AsyncEntryPointSpec(
id="scheduled-check",
name="Scheduled Check",
entry_node="process-node",
trigger_type="timer",
trigger_config={"interval_minutes": 20, "run_immediately": False},
isolation_level="shared",
max_concurrent=1,
),
# Event: reacts to webhook events
AsyncEntryPointSpec(
id="webhook-event",
name="Webhook Event Handler",
entry_node="process-node",
trigger_type="event",
trigger_config={"event_types": ["webhook_received"]},
isolation_level="shared",
max_concurrent=10,
),
]
# Webhook server config (only needed if using webhooks)
runtime_config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=8080,
webhook_routes=[
{
"source_id": "my-source",
"path": "/webhooks/my-source",
"methods": ["POST"],
},
],
)
```
**Key rules for async entry points:**
- `async_entry_points` is a list of `AsyncEntryPointSpec` (NOT `EntryPointSpec`)
- `runtime_config` is `AgentRuntimeConfig` (NOT `RuntimeConfig` from config.py)
- Valid trigger_types: `timer`, `event`, `webhook`, `manual`, `api`
- Valid isolation_levels: `isolated`, `shared`, `synchronized`
- Timer trigger_config (cron): `{"cron": "0 9 * * *"}` — standard 5-field cron expression
- Timer trigger_config (interval): `{"interval_minutes": float, "run_immediately": bool}`
- Event trigger_config: `{"event_types": ["webhook_received"], "filter_stream": "...", "filter_node": "..."}`
- Use `isolation_level="shared"` for async entry points that need to read
the primary session's memory (e.g., user-configured rules)
- The `_build_graph()` method passes `async_entry_points` to GraphSpec
- Reference: `exports/gmail_inbox_guardian/agent.py`
## __init__.py
**CRITICAL:** The runner imports the package (`__init__.py`) and reads ALL module-level
variables via `getattr()`. Every variable defined in `agent.py` that the runner needs
MUST be re-exported here. Missing exports cause silent failures (variables default to
`None` or `{}`), leading to "must define goal, nodes, edges" errors or graph validation
failures like "node X is unreachable".
```python
"""My Agent — description."""
from .agent import (
MyAgent,
default_agent,
goal,
nodes,
edges,
entry_node,
entry_points,
pause_nodes,
terminal_nodes,
conversation_mode,
identity_prompt,
loop_config,
)
from .config import default_config, metadata
__all__ = [
"MyAgent",
"default_agent",
"goal",
"nodes",
"edges",
"entry_node",
"entry_points",
"pause_nodes",
"terminal_nodes",
"conversation_mode",
"identity_prompt",
"loop_config",
"default_config",
"metadata",
]
```
**If the agent uses async entry points**, also import and export:
```python
from .agent import (
...,
async_entry_points,
runtime_config, # Only if using webhooks
)
__all__ = [
...,
"async_entry_points",
"runtime_config",
]
```
## __main__.py
```python
"""CLI entry point for My Agent."""
import asyncio, json, logging, sys
import click
from .agent import default_agent, MyAgent
def setup_logging(verbose=False, debug=False):
if debug: level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose: level, fmt = logging.INFO, "%(message)s"
else: level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""My Agent — description."""
pass
@cli.command()
@click.option("--topic", "-t", required=True)
@click.option("--mock", is_flag=True)
@click.option("--verbose", "-v", is_flag=True)
def run(topic, mock, verbose):
"""Execute the agent."""
setup_logging(verbose=verbose)
result = asyncio.run(default_agent.run({"topic": topic}, mock_mode=mock))
click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True)
def tui(mock):
"""Launch TUI dashboard."""
from pathlib import Path
from framework.tui.app import AdenTUI
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
async def run_tui():
agent = MyAgent()
agent._tool_registry = ToolRegistry()
storage = Path.home() / ".hive" / "agents" / "my_agent"
storage.mkdir(parents=True, exist_ok=True)
mcp_cfg = Path(__file__).parent / "mcp_servers.json"
if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
llm = None if mock else LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
runtime = create_agent_runtime(
graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")],
llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_tui())
@cli.command()
def info():
"""Show agent info."""
data = default_agent.info()
click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}")
click.echo(f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}")
@cli.command()
def validate():
"""Validate agent structure."""
v = default_agent.validate()
if v["valid"]: click.echo("Agent is valid")
else:
click.echo("Errors:")
for e in v["errors"]: click.echo(f" {e}")
sys.exit(0 if v["valid"] else 1)
if __name__ == "__main__":
cli()
```
## mcp_servers.json
```json
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": "../../tools",
"description": "Hive tools MCP server"
}
}
```
**CRITICAL FORMAT RULES:**
- NO `"mcpServers"` wrapper (flat dict, not nested)
- `cwd` MUST be `"../../tools"` (relative from `exports/AGENT_NAME/` to `tools/`)
- `command` MUST be `"uv"` with `"args": ["run", "python", ...]` (NOT bare `"python"`)
## tests/conftest.py
```python
"""Test fixtures."""
import sys
from pathlib import Path
import pytest
import pytest_asyncio
_repo_root = Path(__file__).resolve().parents[3]
for _p in ["exports", "core"]:
_path = str(_repo_root / _p)
if _path not in sys.path:
sys.path.insert(0, _path)
AGENT_PATH = str(Path(__file__).resolve().parents[1])
@pytest.fixture(scope="session")
def mock_mode():
return True
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
from framework.runner.runner import AgentRunner
storage = tmp_path_factory.mktemp("agent_storage")
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
```
## entry_points Format
MUST be: `{"start": "first-node-id"}`
NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
NOT: `{"first-node-id"}` (WRONG — this is a set)
@@ -0,0 +1,433 @@
# Hive Agent Framework — Condensed Reference
## Architecture
Agents are Python packages in `exports/`:
```
exports/my_agent/
├── __init__.py # MUST re-export ALL module-level vars from agent.py
├── __main__.py # CLI (run, tui, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── config.py # Runtime config
├── nodes/__init__.py # Node definitions (NodeSpec)
├── mcp_servers.json # MCP tool server config
└── tests/ # pytest tests
```
## Agent Loading Contract
`AgentRunner.load()` imports the package (`__init__.py`) and reads these
module-level variables via `getattr()`:
| Variable | Required | Default if missing | Consequence |
|----------|----------|--------------------|-------------|
| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
| `nodes` | YES | `None` | **FATAL** — same error |
| `edges` | YES | `None` | **FATAL** — same error |
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
| `terminal_nodes` | no | `[]` | OK for forever-alive |
| `pause_nodes` | no | `[]` | OK |
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
| `identity_prompt` | no | not passed | No agent-level identity |
| `loop_config` | no | `{}` | No iteration limits |
| `async_entry_points` | no | `[]` | No async triggers (timers, webhooks, events) |
| `runtime_config` | no | `None` | No webhook server |
**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
`agent.py`. Missing exports silently fall back to defaults, causing
hard-to-debug failures.
**Why `default_agent.validate()` is NOT sufficient:**
`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
These are always correct because the constructor references agent.py's module
vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
not the class. So `validate()` passes while `AgentRunner.load()` fails.
Always test with `AgentRunner.load("exports/{name}")` — this is the same
code path the TUI and `hive run` use.
## Goal
Defines success criteria and constraints:
```python
goal = Goal(
id="kebab-case-id",
name="Display Name",
description="What the agent does",
success_criteria=[
SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
],
constraints=[
Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
],
)
```
- 3-5 success criteria, weights sum to 1.0
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
## NodeSpec Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | kebab-case identifier |
| name | str | required | Display name |
| description | str | required | What the node does |
| node_type | str | required | Always `"event_loop"` |
| input_keys | list[str] | required | Memory keys this node reads |
| output_keys | list[str] | required | Memory keys this node writes via set_output |
| system_prompt | str | "" | LLM instructions |
| tools | list[str] | [] | Tool names from MCP servers |
| client_facing | bool | False | If True, streams to user and blocks for input |
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
| max_retries | int | 3 | Retries on failure |
| success_criteria | str | "" | Natural language for judge evaluation |
## EdgeSpec Fields
| Field | Type | Description |
|-------|------|-------------|
| id | str | kebab-case identifier |
| source | str | Source node ID |
| target | str | Target node ID |
| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
## Key Patterns
### STEP 1/STEP 2 (Client-Facing Nodes)
```
**STEP 1 — Respond to the user (text only, NO tool calls):**
[Present information, ask questions]
**STEP 2 — After the user responds, call set_output:**
- set_output("key", "value based on user response")
```
This prevents premature set_output before user interaction.
### Fewer, Richer Nodes (CRITICAL)
**Hard limit: 2-4 nodes for most agents.** Never exceed 5 unless the user
explicitly requests a complex multi-phase pipeline.
Each node boundary serializes outputs to shared memory and **destroys** all
in-context information: tool call results, intermediate reasoning, conversation
history. A research node that searches, fetches, and analyzes in ONE node keeps
all source material in its conversation context. Split across 3 nodes, each
downstream node only sees the serialized summary string.
**Decision framework — merge unless ANY of these apply:**
1. **Client-facing boundary** — Autonomous and client-facing work MUST be
separate nodes (different interaction models)
2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
search vs database), separate nodes make sense
3. **Parallel execution** — Fan-out branches must be separate nodes
**Red flags that you have too many nodes:**
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
- A node that sets only 1 trivial output → collapse into predecessor
- Multiple consecutive autonomous nodes → combine into one rich node
- A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove
**Typical agent structure (3 nodes):**
```
intake (client-facing) ←→ process (autonomous) ←→ review (client-facing)
```
Or for simpler agents, just 2 nodes:
```
interact (client-facing) → process (autonomous) → interact (loop)
```
### nullable_output_keys
For inputs that only arrive on certain edges:
```python
research_node = NodeSpec(
input_keys=["brief", "feedback"],
nullable_output_keys=["feedback"], # Only present on feedback edge
max_node_visits=3,
)
```
### Mutually Exclusive Outputs
For routing decisions:
```python
review_node = NodeSpec(
output_keys=["approved", "feedback"],
nullable_output_keys=["approved", "feedback"], # Node sets one or the other
)
```
### Forever-Alive Pattern
`terminal_nodes=[]` — every node has outgoing edges, graph loops until user exits.
Use `conversation_mode="continuous"` to preserve context across transitions.
### set_output
- Synthetic tool injected by framework
- Call separately from real tool calls (separate turn)
- `set_output("key", "value")` stores to shared memory
## Edge Conditions
| Condition | When |
|-----------|------|
| ON_SUCCESS | Node completed successfully |
| ON_FAILURE | Node failed |
| ALWAYS | Unconditional |
| CONDITIONAL | condition_expr evaluates to True against memory |
condition_expr examples:
- `"needs_more_research == True"`
- `"str(next_action).lower() == 'new_agent'"`
- `"feedback is not None"`
## Graph Lifecycle
| Pattern | terminal_nodes | When |
|---------|---------------|------|
| **Forever-alive** | `[]` | **DEFAULT for all agents** |
| Linear | `["last-node"]` | Only if user explicitly requests one-shot/batch |
**Forever-alive is the default.** Always use `terminal_nodes=[]`.
The framework default for `max_node_visits` is 0 (unbounded), so
nodes work correctly in forever-alive loops without explicit override.
Only set `max_node_visits > 0` in one-shot agents with feedback loops.
Every node must have at least one outgoing edge — no dead ends. The
user exits by closing the TUI. Only use terminal nodes if the user
explicitly asks for a batch/one-shot agent that runs once and exits.
## Continuous Conversation Mode
`conversation_mode` has ONLY two valid states:
- `"continuous"` — recommended for interactive agents
- Omit entirely — isolated per-node conversations (each node starts fresh)
**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
`"adaptive"`, `"shared"`. These do not exist in the framework.
When `conversation_mode="continuous"`:
- Same conversation thread carries across node transitions
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
- Transition markers inserted at boundaries
- Compaction happens opportunistically at phase transitions
## loop_config
Only three valid keys:
```python
loop_config = {
"max_iterations": 100, # Max LLM turns per node visit
"max_tool_calls_per_turn": 20, # Max tool calls per LLM response
"max_history_tokens": 32000, # Triggers conversation compaction
}
```
**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
`"temperature"`. These are silently ignored or cause errors.
## Data Tools (Spillover)
For large data that exceeds context:
- `save_data(filename, data)` — Write to session data dir
- `load_data(filename, offset, limit)` — Read with pagination
- `list_data_files()` — List files
- `serve_file_to_user(filename, label)` — Clickable file:// URI
`data_dir` is auto-injected by framework — LLM never sees it.
## Fan-Out / Fan-In
Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
- Parallel nodes must have disjoint output_keys
- Only one branch may have client_facing nodes
- Fan-in node gets all outputs in shared memory
## Judge System
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
- **SchemaJudge**: Validates against Pydantic model
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
## Async Entry Points (Webhooks, Timers, Events)
For agents that need to react to external events (incoming emails, scheduled
tasks, API calls), use `AsyncEntryPointSpec` and optionally `AgentRuntimeConfig`.
### Imports
```python
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
```
Note: `AsyncEntryPointSpec` is in `framework.graph.edge` (the graph/declarative layer).
`AgentRuntimeConfig` is in `framework.runtime.agent_runtime` (the runtime layer).
### AsyncEntryPointSpec Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | Unique identifier |
| name | str | required | Human-readable name |
| entry_node | str | required | Node ID to start execution from |
| trigger_type | str | `"manual"` | `webhook`, `api`, `timer`, `event`, `manual` |
| trigger_config | dict | `{}` | Trigger-specific config (see below) |
| isolation_level | str | `"shared"` | `isolated`, `shared`, `synchronized` |
| priority | int | `0` | Execution priority (higher = more priority) |
| max_concurrent | int | `10` | Max concurrent executions |
### Trigger Types
**timer** — Fires on a schedule. Two modes: cron expressions or fixed interval.
Cron (preferred for precise scheduling):
```python
AsyncEntryPointSpec(
id="daily-digest",
name="Daily Digest",
entry_node="check-node",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"}, # daily at 9am
isolation_level="shared",
max_concurrent=1,
)
```
- `cron` (str) — standard cron expression (5 fields: min hour dom month dow)
- Examples: `"0 9 * * *"` (daily 9am), `"0 9 * * MON-FRI"` (weekdays 9am), `"*/30 * * * *"` (every 30 min)
Fixed interval (simpler, for polling-style tasks):
```python
AsyncEntryPointSpec(
id="scheduled-check",
name="Scheduled Check",
entry_node="check-node",
trigger_type="timer",
trigger_config={"interval_minutes": 20, "run_immediately": False},
isolation_level="shared",
max_concurrent=1,
)
```
- `interval_minutes` (float) — how often to fire
- `run_immediately` (bool, default False) — fire once on startup
**event** — Subscribes to EventBus (e.g., webhook events):
```python
AsyncEntryPointSpec(
id="email-event",
name="Email Event Handler",
entry_node="process-emails",
trigger_type="event",
trigger_config={"event_types": ["webhook_received"]},
isolation_level="shared",
max_concurrent=10,
)
```
- `event_types` (list[str]) — EventType values to subscribe to
- `filter_stream` (str, optional) — only receive from this stream
- `filter_node` (str, optional) — only receive from this node
**webhook** — HTTP endpoint (requires AgentRuntimeConfig):
The webhook server publishes `WEBHOOK_RECEIVED` events on the EventBus.
An `event` trigger type with `event_types: ["webhook_received"]` subscribes
to those events. The flow is:
```
HTTP POST /webhooks/gmail → WebhookServer → EventBus (WEBHOOK_RECEIVED)
→ event entry point → triggers graph execution from entry_node
```
**manual** — Triggered programmatically via `runtime.trigger()`.
### Isolation Levels
| Level | Meaning |
|-------|---------|
| `isolated` | Private state per execution |
| `shared` | Eventual consistency — async executions can read primary session memory |
| `synchronized` | Shared with write locks (use when ordering matters) |
For most async patterns, use `shared` — the async execution reads the primary
session's memory (e.g., user-configured rules) and runs its own workflow.
### AgentRuntimeConfig (for webhook servers)
```python
from framework.runtime.agent_runtime import AgentRuntimeConfig
runtime_config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=8080,
webhook_routes=[
{
"source_id": "gmail",
"path": "/webhooks/gmail",
"methods": ["POST"],
"secret": None, # Optional HMAC-SHA256 secret
},
],
)
```
`runtime_config` is a module-level variable read by `AgentRunner.load()`.
The runner passes it to `create_agent_runtime()`. On `runtime.start()`,
if webhook_routes is non-empty, an embedded HTTP server starts.
### Session Sharing
Timer and event triggers automatically call `_get_primary_session_state()`
before execution. This finds the active user-facing session and provides
its memory to the async execution, filtered to only the async entry node's
`input_keys`. This means the async flow can read user-configured values
(like rules, preferences) without needing separate configuration.
### Module-Level Variables
Agents with async entry points must export two additional variables:
```python
# In agent.py:
async_entry_points = [AsyncEntryPointSpec(...), ...]
runtime_config = AgentRuntimeConfig(...) # Only if using webhooks
```
Both must be re-exported from `__init__.py`:
```python
from .agent import (
..., async_entry_points, runtime_config,
)
```
### Reference Agent
See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
- Primary client-facing intake node (user configures rules)
- Timer-based scheduled inbox checks (every 20 min)
- Webhook-triggered email event handling
- Shared isolation for memory access across streams
## Framework Capabilities
**Works well:** Multi-turn conversations, HITL review, tool orchestration, structured outputs, parallel execution, context management, error recovery, session persistence.
**Limitations:** LLM latency (2-10s/turn), context window limits (~128K), cost per run, rate limits, node boundaries lose context.
**Not designed for:** Sub-second responses, millions of items, real-time streaming, guaranteed determinism, offline/air-gapped.
## Tool Discovery
Do NOT rely on a static tool list — it will be outdated. Always use
`discover_mcp_tools()` to get the current tool catalog from the
hive-tools MCP server. This returns full schemas including parameter
names, types, and descriptions.
```
discover_mcp_tools() # default: hive-tools
discover_mcp_tools("exports/my_agent/mcp_servers.json") # specific agent
```
Common tool categories (verify via discover_mcp_tools):
- **Web**: search, scrape, PDF
- **Data**: save/load/append/list data files, serve to user
- **File**: view, write, replace, diff, list, grep
- **Communication**: email, gmail, slack, telegram
- **CRM**: hubspot, apollo, calcom
- **GitHub**: stargazers, user profiles, repos
- **Vision**: image analysis
- **Time**: current time
@@ -0,0 +1,31 @@
"""Test fixtures for Hive Coder agent."""
import sys
from pathlib import Path
import pytest
import pytest_asyncio
_repo_root = Path(__file__).resolve().parents[3]
for _p in ["exports", "core"]:
_path = str(_repo_root / _p)
if _path not in sys.path:
sys.path.insert(0, _path)
AGENT_PATH = str(Path(__file__).resolve().parents[1])
@pytest.fixture(scope="session")
def mock_mode():
return True
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
from framework.runner.runner import AgentRunner
storage = tmp_path_factory.mktemp("agent_storage")
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
+7
View File
@@ -56,6 +56,13 @@ def _configure_paths():
if (project_root / "core").is_dir() and core_str not in sys.path:
sys.path.insert(0, core_str)
# Add core/framework/agents/ so framework agents are importable as top-level packages
framework_agents_dir = project_root / "core" / "framework" / "agents"
if framework_agents_dir.is_dir():
fa_str = str(framework_agents_dir)
if fa_str not in sys.path:
sys.path.insert(0, fa_str)
def main():
_configure_paths()
+44 -2
View File
@@ -50,14 +50,55 @@ def get_max_tokens() -> int:
def get_api_key() -> str | None:
"""Return the API key from the environment variable specified in configuration."""
"""Return the API key, supporting env var, Claude Code subscription, and ZAI Code.
Priority:
1. Claude Code subscription (``use_claude_code_subscription: true``)
reads the OAuth token from ``~/.claude/.credentials.json``.
2. Environment variable named in ``api_key_env_var``.
"""
llm = get_hive_config().get("llm", {})
# Claude Code subscription: read OAuth token directly
if llm.get("use_claude_code_subscription"):
try:
from framework.runner.runner import get_claude_code_token
token = get_claude_code_token()
if token:
return token
except ImportError:
pass
# Standard env-var path (covers ZAI Code and all API-key providers)
api_key_env_var = llm.get("api_key_env_var")
if api_key_env_var:
return os.environ.get(api_key_env_var)
return None
def get_api_base() -> str | None:
"""Return the api_base URL for OpenAI-compatible endpoints, if configured."""
return get_hive_config().get("llm", {}).get("api_base")
def get_llm_extra_kwargs() -> dict[str, Any]:
"""Return extra kwargs for LiteLLMProvider (e.g. OAuth headers).
When ``use_claude_code_subscription`` is enabled, returns
``extra_headers`` with the OAuth Bearer token so that litellm's
built-in Anthropic OAuth handler adds the required beta headers.
"""
llm = get_hive_config().get("llm", {})
if llm.get("use_claude_code_subscription"):
api_key = get_api_key()
if api_key:
return {
"extra_headers": {"authorization": f"Bearer {api_key}"},
}
return {}
# ---------------------------------------------------------------------------
# RuntimeConfig shared across agent templates
# ---------------------------------------------------------------------------
@@ -71,4 +112,5 @@ class RuntimeConfig:
temperature: float = 0.7
max_tokens: int = field(default_factory=get_max_tokens)
api_key: str | None = field(default_factory=get_api_key)
api_base: str | None = None
api_base: str | None = field(default_factory=get_api_base)
extra_kwargs: dict[str, Any] = field(default_factory=get_llm_extra_kwargs)
+145 -19
View File
@@ -275,12 +275,25 @@ class EventLoopNode(NodeProtocol):
)
accumulator = OutputAccumulator(store=self._conversation_store)
start_iteration = 0
_restored_recent_responses: list[str] = []
_restored_tool_fingerprints: list[list[tuple[str, str]]] = []
else:
# Try crash-recovery restore from store, then fall back to fresh.
conversation, accumulator, start_iteration = await self._restore(ctx)
if conversation is None:
restored = await self._restore(ctx)
if restored is not None:
conversation = restored.conversation
accumulator = restored.accumulator
start_iteration = restored.start_iteration
_restored_recent_responses = restored.recent_responses
_restored_tool_fingerprints = restored.recent_tool_fingerprints
else:
_restored_recent_responses = []
_restored_tool_fingerprints = []
# Fresh conversation: either isolated mode or first node in continuous mode.
system_prompt = ctx.node_spec.system_prompt or ""
from framework.graph.prompt_composer import _with_datetime
system_prompt = _with_datetime(ctx.node_spec.system_prompt or "")
conversation = NodeConversation(
system_prompt=system_prompt,
@@ -306,6 +319,7 @@ class EventLoopNode(NodeProtocol):
tools.append(set_output_tool)
if ctx.node_spec.client_facing and not ctx.event_triggered:
tools.append(self._build_ask_user_tool())
tools.append(self._build_escalate_tool())
logger.info(
"[%s] Tools available (%d): %s | client_facing=%s | judge=%s",
@@ -319,10 +333,9 @@ class EventLoopNode(NodeProtocol):
# 4. Publish loop started
await self._publish_loop_started(stream_id, node_id)
# 5. Stall / doom loop detection state
recent_responses: list[str] = []
recent_tool_fingerprints: list[list[tuple[str, str]]] = []
user_interaction_count = 0 # tracks how many times this node blocked for user input
# 5. Stall / doom loop detection state (restored from cursor if resuming)
recent_responses: list[str] = _restored_recent_responses
recent_tool_fingerprints: list[list[tuple[str, str]]] = _restored_tool_fingerprints
# 6. Main loop
for iteration in range(start_iteration, self._config.max_iterations):
@@ -576,7 +589,8 @@ class EventLoopNode(NodeProtocol):
mcp_tool_calls = [
tc
for tc in logged_tool_calls
if tc.get("tool_name") not in ("set_output", "ask_user") and not tc.get("is_error")
if tc.get("tool_name") not in ("set_output", "ask_user", "escalate_to_coder")
and not tc.get("is_error")
]
if mcp_tool_calls:
fps = self._fingerprint_tool_calls(mcp_tool_calls)
@@ -623,8 +637,15 @@ class EventLoopNode(NodeProtocol):
# Text-only turn breaks the doom loop chain
recent_tool_fingerprints.clear()
# 6g. Write cursor checkpoint
await self._write_cursor(ctx, conversation, accumulator, iteration)
# 6g. Write cursor checkpoint (includes stall/doom state for resume)
await self._write_cursor(
ctx,
conversation,
accumulator,
iteration,
recent_responses=recent_responses,
recent_tool_fingerprints=recent_tool_fingerprints,
)
# 6h. Client-facing input blocking
#
@@ -741,7 +762,6 @@ class EventLoopNode(NodeProtocol):
conversation=conversation if _is_continuous else None,
)
user_interaction_count += 1
recent_responses.clear()
if _cf_auto:
@@ -1267,6 +1287,26 @@ class EventLoopNode(NodeProtocol):
)
results_by_id[tc.tool_use_id] = result
elif tc.tool_name == "escalate_to_coder":
# --- Framework-level escalation handling ---
if self._event_bus:
await self._event_bus.emit_escalation_requested(
stream_id=stream_id,
node_id=node_id,
reason=tc.tool_input.get("reason", ""),
context=tc.tool_input.get("context", ""),
execution_id=ctx.execution_id,
)
# Block like ask_user — the TUI loads the coder,
# and /back injects a message to unblock us.
user_input_requested = True
result = ToolResult(
tool_use_id=tc.tool_use_id,
content="Escalating to Hive Coder. You will resume when done.",
is_error=False,
)
results_by_id[tc.tool_use_id] = result
else:
# --- Real tool: check for truncated args, else queue ---
if "_raw" in tc.tool_input:
@@ -1313,7 +1353,7 @@ class EventLoopNode(NodeProtocol):
continue # shouldn't happen
# Build log entries for real tools
if tc.tool_name not in ("set_output", "ask_user"):
if tc.tool_name not in ("set_output", "ask_user", "escalate_to_coder"):
tool_entry = {
"tool_use_id": tc.tool_use_id,
"tool_name": tc.tool_name,
@@ -1458,6 +1498,46 @@ class EventLoopNode(NodeProtocol):
},
)
def _build_escalate_tool(self) -> Tool:
"""Build the synthetic escalate_to_coder tool.
Client-facing nodes call this when the user's request requires
capabilities beyond the current agent (code changes, feature
expansion, debugging). The TUI intercepts the event and loads
hive_coder in the foreground.
"""
return Tool(
name="escalate_to_coder",
description=(
"Call this tool when the user requests something you "
"cannot handle — a code change, feature expansion, bug "
"fix, or framework-level modification. This will bring "
"in Hive Coder, a coding agent that can read and write "
"files. Provide a clear reason and relevant context so "
"the coder can pick up where you left off."
),
parameters={
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": (
"Why you are escalating (what the user needs that you cannot do)."
),
},
"context": {
"type": "string",
"description": (
"Relevant context: what you discussed, "
"what files are involved, what the user "
"wants changed."
),
},
},
"required": ["reason"],
},
)
def _build_set_output_tool(self, output_keys: list[str] | None) -> Tool | None:
"""Build the synthetic set_output tool for explicit output declaration."""
if not output_keys:
@@ -2214,29 +2294,60 @@ class EventLoopNode(NodeProtocol):
# Persistence: restore, cursor, injection, pause
# -------------------------------------------------------------------
@dataclass
class _RestoredState:
"""State recovered from a previous checkpoint."""
conversation: NodeConversation
accumulator: OutputAccumulator
start_iteration: int
recent_responses: list[str]
recent_tool_fingerprints: list[list[tuple[str, str]]]
async def _restore(
self,
ctx: NodeContext,
) -> tuple[NodeConversation | None, OutputAccumulator | None, int]:
"""Attempt to restore from a previous checkpoint."""
) -> _RestoredState | None:
"""Attempt to restore from a previous checkpoint.
Returns a ``_RestoredState`` with conversation, accumulator, iteration
counter, and stall/doom-loop detection state everything needed to
resume exactly where execution stopped.
"""
if self._conversation_store is None:
return None, None, 0
return None
conversation = await NodeConversation.restore(self._conversation_store)
if conversation is None:
return None, None, 0
return None
accumulator = await OutputAccumulator.restore(self._conversation_store)
cursor = await self._conversation_store.read_cursor()
start_iteration = cursor.get("iteration", 0) + 1 if cursor else 0
# Restore stall/doom-loop detection state
recent_responses: list[str] = cursor.get("recent_responses", []) if cursor else []
raw_fps = cursor.get("recent_tool_fingerprints", []) if cursor else []
recent_tool_fingerprints: list[list[tuple[str, str]]] = [
[tuple(pair) for pair in fps] # type: ignore[misc]
for fps in raw_fps
]
logger.info(
f"Restored event loop: iteration={start_iteration}, "
f"messages={conversation.message_count}, "
f"outputs={list(accumulator.values.keys())}"
f"outputs={list(accumulator.values.keys())}, "
f"stall_window={len(recent_responses)}, "
f"doom_window={len(recent_tool_fingerprints)}"
)
return EventLoopNode._RestoredState(
conversation=conversation,
accumulator=accumulator,
start_iteration=start_iteration,
recent_responses=recent_responses,
recent_tool_fingerprints=recent_tool_fingerprints,
)
return conversation, accumulator, start_iteration
async def _write_cursor(
self,
@@ -2244,8 +2355,15 @@ class EventLoopNode(NodeProtocol):
conversation: NodeConversation,
accumulator: OutputAccumulator,
iteration: int,
*,
recent_responses: list[str] | None = None,
recent_tool_fingerprints: list[list[tuple[str, str]]] | None = None,
) -> None:
"""Write checkpoint cursor for crash recovery."""
"""Write checkpoint cursor for crash recovery.
Persists iteration counter, accumulator outputs, and stall/doom-loop
detection state so that resume picks up exactly where execution stopped.
"""
if self._conversation_store:
cursor = await self._conversation_store.read_cursor() or {}
cursor.update(
@@ -2256,6 +2374,14 @@ class EventLoopNode(NodeProtocol):
"outputs": accumulator.to_dict(),
}
)
# Persist stall/doom-loop detection state for reliable resume
if recent_responses is not None:
cursor["recent_responses"] = recent_responses
if recent_tool_fingerprints is not None:
# Convert list[list[tuple]] → list[list[list]] for JSON
cursor["recent_tool_fingerprints"] = [
[list(pair) for pair in fps] for fps in recent_tool_fingerprints
]
await self._conversation_store.write_cursor(cursor)
async def _drain_injection_queue(self, conversation: NodeConversation) -> int:
+50 -1
View File
@@ -618,7 +618,7 @@ class GraphExecutor:
cnt = node_visit_counts.get(current_node_id, 0) + 1
node_visit_counts[current_node_id] = cnt
_is_retry = False
max_visits = getattr(node_spec, "max_node_visits", 1)
max_visits = getattr(node_spec, "max_node_visits", 0)
if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
self.logger.warning(
f" ⊘ Node '{node_spec.name}' visit limit reached "
@@ -1294,6 +1294,36 @@ class GraphExecutor:
# Handle cancellation (e.g., TUI quit) - save as paused instead of failed
self.logger.info("⏸ Execution cancelled - saving state for resume")
# Flush WIP accumulator outputs from the interrupted node's
# cursor.json into SharedMemory so they survive resume. The
# accumulator writes to cursor.json on every set() call, but
# only writes to SharedMemory when the judge ACCEPTs. Without
# this, edge conditions checking these keys see None on resume.
if current_node_id and self._storage_path:
try:
import json as _json
cursor_path = (
self._storage_path / "conversations" / current_node_id / "cursor.json"
)
if cursor_path.exists():
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
wip_outputs = cursor_data.get("outputs", {})
for key, value in wip_outputs.items():
if value is not None:
memory.write(key, value, validate=False)
if wip_outputs:
self.logger.info(
"Flushed %d WIP accumulator outputs to memory: %s",
len(wip_outputs),
list(wip_outputs.keys()),
)
except Exception:
self.logger.debug(
"Could not flush accumulator outputs from cursor",
exc_info=True,
)
# Save memory and state for resume
saved_memory = memory.read_all()
session_state_out: dict[str, Any] = {
@@ -1371,6 +1401,25 @@ class GraphExecutor:
execution_quality="failed",
)
# Flush WIP accumulator outputs (same as CancelledError path)
if current_node_id and self._storage_path:
try:
import json as _json
cursor_path = (
self._storage_path / "conversations" / current_node_id / "cursor.json"
)
if cursor_path.exists():
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
for key, value in cursor_data.get("outputs", {}).items():
if value is not None:
memory.write(key, value, validate=False)
except Exception:
self.logger.debug(
"Could not flush accumulator outputs from cursor",
exc_info=True,
)
# Save memory and state for potential resume
saved_memory = memory.read_all()
session_state_out: dict[str, Any] = {
+3 -2
View File
@@ -201,10 +201,11 @@ class NodeSpec(BaseModel):
# Visit limits (for feedback/callback edges)
max_node_visits: int = Field(
default=1,
default=0,
description=(
"Max times this node executes in one graph run. "
"Set >1 for feedback loops. 0 = unlimited (max_steps guards)."
"0 = unlimited (default, required for forever-alive agents). "
"Set >1 for one-shot agents with feedback loops."
),
)
+10 -2
View File
@@ -16,6 +16,7 @@ Layer 3 — Focus (per-node system_prompt, reframed as focus directive):
from __future__ import annotations
import logging
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING
@@ -26,6 +27,13 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
def _with_datetime(prompt: str) -> str:
"""Append current datetime with local timezone to a system prompt."""
local = datetime.now().astimezone()
stamp = f"Current date and time: {local.strftime('%Y-%m-%d %H:%M %Z (UTC%z)')}"
return f"{prompt}\n\n{stamp}" if prompt else stamp
def compose_system_prompt(
identity_prompt: str | None,
focus_prompt: str | None,
@@ -39,7 +47,7 @@ def compose_system_prompt(
narrative: Layer 2 auto-generated from conversation state.
Returns:
Composed system prompt with all layers present.
Composed system prompt with all layers present, plus current datetime.
"""
parts: list[str] = []
@@ -55,7 +63,7 @@ def compose_system_prompt(
if focus_prompt:
parts.append(f"\n--- Current Focus ---\n{focus_prompt}")
return "\n".join(parts) if parts else ""
return _with_datetime("\n".join(parts) if parts else "")
def build_narrative(
+45
View File
@@ -28,6 +28,51 @@ from framework.llm.stream_events import StreamEvent
logger = logging.getLogger(__name__)
def _patch_litellm_anthropic_oauth() -> None:
"""Patch litellm's Anthropic header construction to fix OAuth token handling.
litellm bug: validate_environment() puts the OAuth token into x-api-key,
but Anthropic's API rejects OAuth tokens in x-api-key. They must be sent
via Authorization: Bearer only, with x-api-key omitted entirely.
This patch wraps validate_environment to remove x-api-key when the
Authorization header carries an OAuth token (sk-ant-oat prefix).
See: https://github.com/BerriAI/litellm/issues/19618
"""
try:
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
from litellm.types.llms.anthropic import ANTHROPIC_OAUTH_TOKEN_PREFIX
except ImportError:
return
original = AnthropicModelInfo.validate_environment
def _patched_validate_environment(
self, headers, model, messages, optional_params, litellm_params, api_key=None, api_base=None
):
result = original(
self,
headers,
model,
messages,
optional_params,
litellm_params,
api_key=api_key,
api_base=api_base,
)
auth = result.get("authorization", "")
if auth.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
result.pop("x-api-key", None)
return result
AnthropicModelInfo.validate_environment = _patched_validate_environment
if litellm is not None:
_patch_litellm_anthropic_oauth()
RATE_LIMIT_MAX_RETRIES = 10
RATE_LIMIT_BACKOFF_BASE = 2 # seconds
RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits
+5 -1
View File
@@ -36,7 +36,11 @@ from framework.graph import ( # noqa: E402
NodeSpec,
SuccessCriterion,
)
from framework.testing.prompts import PYTEST_TEST_FILE_HEADER # noqa: E402
# Testing framework imports
from framework.testing.prompts import ( # noqa: E402
PYTEST_TEST_FILE_HEADER,
)
from framework.utils.io import atomic_write # noqa: E402
# Initialize MCP server
+157 -46
View File
@@ -75,6 +75,11 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
default=None,
help="Resume from a specific checkpoint (requires --resume-session)",
)
run_parser.add_argument(
"--no-guardian",
action="store_true",
help="Disable the Agent Guardian watchdog in TUI mode",
)
run_parser.set_defaults(func=cmd_run)
# info command
@@ -206,8 +211,28 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
default=None,
help="LLM model to use (any LiteLLM-compatible name)",
)
tui_parser.add_argument(
"--no-guardian",
action="store_true",
help="Disable the Agent Guardian watchdog",
)
tui_parser.set_defaults(func=cmd_tui)
# code command (Hive Coder — framework agent builder)
code_parser = subparsers.add_parser(
"code",
help="Launch Hive Coder to build agents",
description="Interactive agent builder. Describe what you want and Hive Coder builds it.",
)
code_parser.add_argument(
"--model",
"-m",
type=str,
default=None,
help="LLM model to use (any LiteLLM-compatible name)",
)
code_parser.set_defaults(func=cmd_code)
# sessions command group (checkpoint/resume management)
sessions_parser = subparsers.add_parser(
"sessions",
@@ -524,7 +549,17 @@ def cmd_run(args: argparse.Namespace) -> int:
# Force setup inside the loop
if runner._agent_runtime is None:
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
# Attach hive_coder's guardian watchdog (before start)
if not getattr(args, "no_guardian", False) and runner._agent_runtime:
from framework.agents.hive_coder.guardian import attach_guardian
attach_guardian(runner._agent_runtime, runner._tool_registry)
# Start runtime before TUI so it's ready for user input
if runner._agent_runtime and not runner._agent_runtime.is_running:
@@ -1343,60 +1378,26 @@ def cmd_shell(args: argparse.Namespace) -> int:
return 0
def cmd_tui(args: argparse.Namespace) -> int:
"""Browse agents and launch the interactive TUI dashboard."""
import logging
def _get_framework_agents_dir() -> Path:
"""Resolve the framework agents directory relative to this file."""
return Path(__file__).resolve().parent.parent / "agents"
def _launch_agent_tui(
agent_path: str | Path,
model: str | None = None,
no_guardian: bool = False,
) -> int:
"""Load an agent and launch the TUI. Shared by cmd_tui and cmd_code."""
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.tui.app import AdenTUI
logging.basicConfig(level=logging.WARNING, format="%(message)s")
exports_dir = Path("exports")
examples_dir = Path("examples/templates")
has_exports = _has_agents(exports_dir)
has_examples = _has_agents(examples_dir)
if not has_exports and not has_examples:
print("No agents found in exports/ or examples/templates/", file=sys.stderr)
return 1
# Determine which directory to browse
if has_exports and has_examples:
print("\nAgent sources:\n")
print(" 1. Your Agents (exports/)")
print(" 2. Sample Agents (examples/templates/)")
print()
try:
choice = input("Select source (number): ").strip()
if choice == "1":
agents_dir = exports_dir
elif choice == "2":
agents_dir = examples_dir
else:
print("Invalid selection")
return 1
except (EOFError, KeyboardInterrupt):
print()
return 1
elif has_exports:
agents_dir = exports_dir
else:
agents_dir = examples_dir
# Let user pick an agent
agent_path = _select_agent(agents_dir)
if not agent_path:
return 1
# Launch TUI (same pattern as cmd_run --tui)
async def run_with_tui():
try:
runner = AgentRunner.load(
agent_path,
model=args.model,
model=model,
)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
@@ -1416,7 +1417,7 @@ def cmd_tui(args: argparse.Namespace) -> int:
if result.success:
# Retry loading with credentials now configured
try:
runner = AgentRunner.load(agent_path, model=args.model)
runner = AgentRunner.load(agent_path, model=model)
except CredentialError as retry_e:
print(f"\n{retry_e}", file=sys.stderr)
return
@@ -1434,7 +1435,17 @@ def cmd_tui(args: argparse.Namespace) -> int:
return
if runner._agent_runtime is None:
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
# Attach hive_coder's guardian watchdog (before start)
if not no_guardian and runner._agent_runtime:
from framework.agents.hive_coder.guardian import attach_guardian
attach_guardian(runner._agent_runtime, runner._tool_registry)
if runner._agent_runtime and not runner._agent_runtime.is_running:
await runner._agent_runtime.start()
@@ -1455,6 +1466,106 @@ def cmd_tui(args: argparse.Namespace) -> int:
return 0
def cmd_tui(args: argparse.Namespace) -> int:
"""Launch the interactive TUI dashboard with in-app agent picker."""
import logging
logging.basicConfig(level=logging.WARNING, format="%(message)s")
from framework.tui.app import AdenTUI
async def run_tui():
app = AdenTUI(
model=args.model,
no_guardian=getattr(args, "no_guardian", False),
)
await app.run_async()
asyncio.run(run_tui())
print("TUI session ended.")
return 0
def cmd_code(args: argparse.Namespace) -> int:
"""Launch Hive Coder with multi-graph support.
Unlike ``_launch_agent_tui``, this sets up graph lifecycle tools and
assigns ``graph_id="hive_coder"`` so the coder can load, supervise,
and restart secondary agent graphs within the same session.
"""
import logging
logging.basicConfig(level=logging.WARNING, format="%(message)s")
framework_agents_dir = _get_framework_agents_dir()
hive_coder_path = framework_agents_dir / "hive_coder"
if not (hive_coder_path / "agent.py").exists():
print("Error: Hive Coder agent not found.", file=sys.stderr)
return 1
# Ensure framework agents dir is on sys.path for import
fa_str = str(framework_agents_dir)
if fa_str not in sys.path:
sys.path.insert(0, fa_str)
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.tools.session_graph_tools import register_graph_tools
from framework.tui.app import AdenTUI
async def run_with_tui():
try:
runner = AgentRunner.load(hive_coder_path, model=args.model)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
except Exception as e:
print(f"Error loading agent: {e}")
return
if runner._agent_runtime is None:
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
runtime = runner._agent_runtime
# -- Multi-graph setup --
# Tag the primary graph so events carry graph_id="hive_coder"
runtime._graph_id = "hive_coder"
runtime._active_graph_id = "hive_coder"
# Register graph lifecycle tools (load_agent, unload_agent, etc.)
register_graph_tools(runner._tool_registry, runtime)
# Refresh tool schemas AND executor so streams see the new tools.
# The executor closure references the registry dict by ref, but
# refreshing both is robust against any copy-on-read behavior.
runtime._tools = list(runner._tool_registry.get_tools().values())
runtime._tool_executor = runner._tool_registry.get_executor()
if not runtime.is_running:
await runtime.start()
app = AdenTUI(runtime)
try:
await app.run_async()
except Exception as e:
import traceback
traceback.print_exc()
print(f"TUI error: {e}")
await runner.cleanup_async()
asyncio.run(run_with_tui())
print("TUI session ended.")
return 0
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
"""Extract name and description from a Python-based agent's config.py.
+7 -1
View File
@@ -71,9 +71,15 @@ class AgentOrchestrator:
# Auto-create LLM - LiteLLM auto-detects provider and API key from model name
if self._llm is None:
from framework.config import get_api_base, get_api_key, get_llm_extra_kwargs
from framework.llm.litellm import LiteLLMProvider
self._llm = LiteLLMProvider(model=self._model)
self._llm = LiteLLMProvider(
model=self._model,
api_key=get_api_key(),
api_base=get_api_base(),
**get_llm_extra_kwargs(),
)
def register(
self,
+209 -9
View File
@@ -36,27 +36,130 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
CLAUDE_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
CLAUDE_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
# Buffer in seconds before token expiry to trigger a proactive refresh
_TOKEN_REFRESH_BUFFER_SECS = 300 # 5 minutes
def _refresh_claude_code_token(refresh_token: str) -> dict | None:
"""Refresh the Claude Code OAuth token using the refresh token.
POSTs to the Anthropic OAuth token endpoint with form-urlencoded data
(per OAuth 2.0 RFC 6749 Section 4.1.3).
Returns:
Dict with new token data (access_token, refresh_token, expires_in)
on success, None on failure.
"""
import urllib.error
import urllib.parse
import urllib.request
data = urllib.parse.urlencode(
{
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": CLAUDE_OAUTH_CLIENT_ID,
}
).encode("utf-8")
req = urllib.request.Request(
CLAUDE_OAUTH_TOKEN_URL,
data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return json.loads(resp.read())
except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
logger.debug("Claude Code token refresh failed: %s", exc)
return None
def _save_refreshed_credentials(token_data: dict) -> None:
"""Write refreshed token data back to ~/.claude/.credentials.json."""
import time
if not CLAUDE_CREDENTIALS_FILE.exists():
return
try:
with open(CLAUDE_CREDENTIALS_FILE) as f:
creds = json.load(f)
oauth = creds.get("claudeAiOauth", {})
oauth["accessToken"] = token_data["access_token"]
if "refresh_token" in token_data:
oauth["refreshToken"] = token_data["refresh_token"]
if "expires_in" in token_data:
oauth["expiresAt"] = int((time.time() + token_data["expires_in"]) * 1000)
creds["claudeAiOauth"] = oauth
with open(CLAUDE_CREDENTIALS_FILE, "w") as f:
json.dump(creds, f, indent=2)
logger.debug("Claude Code credentials refreshed successfully")
except (json.JSONDecodeError, OSError, KeyError) as exc:
logger.debug("Failed to save refreshed credentials: %s", exc)
def get_claude_code_token() -> str | None:
"""
Get the OAuth token from Claude Code subscription.
"""Get the OAuth token from Claude Code subscription with auto-refresh.
Reads from ~/.claude/.credentials.json which is created by the
Claude Code CLI when users authenticate with their subscription.
If the token is expired or close to expiry, attempts an automatic
refresh using the stored refresh token.
Returns:
The access token if available, None otherwise.
"""
import time
if not CLAUDE_CREDENTIALS_FILE.exists():
return None
try:
with open(CLAUDE_CREDENTIALS_FILE) as f:
creds = json.load(f)
return creds.get("claudeAiOauth", {}).get("accessToken")
except (json.JSONDecodeError, OSError):
return None
oauth = creds.get("claudeAiOauth", {})
access_token = oauth.get("accessToken")
if not access_token:
return None
# Check token expiry (expiresAt is in milliseconds)
expires_at_ms = oauth.get("expiresAt", 0)
now_ms = int(time.time() * 1000)
buffer_ms = _TOKEN_REFRESH_BUFFER_SECS * 1000
if expires_at_ms > now_ms + buffer_ms:
# Token is still valid
return access_token
# Token is expired or near expiry — attempt refresh
refresh_token = oauth.get("refreshToken")
if not refresh_token:
logger.warning("Claude Code token expired and no refresh token available")
return access_token # Return expired token; it may still work briefly
logger.info("Claude Code token expired or near expiry, refreshing...")
token_data = _refresh_claude_code_token(refresh_token)
if token_data and "access_token" in token_data:
_save_refreshed_credentials(token_data)
return token_data["access_token"]
# Refresh failed — return the existing token and warn
logger.warning("Claude Code token refresh failed. Run 'claude' to re-authenticate.")
return access_token
@dataclass
class AgentInfo:
@@ -587,6 +690,7 @@ class AgentRunner:
config = get_hive_config()
llm_config = config.get("llm", {})
use_claude_code = llm_config.get("use_claude_code_subscription", False)
api_base = llm_config.get("api_base")
api_key = None
if use_claude_code:
@@ -596,9 +700,16 @@ class AgentRunner:
print("Warning: Claude Code subscription configured but no token found.")
print("Run 'claude' to authenticate, then try again.")
if api_key:
# Use Claude Code subscription token
self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
if api_key and use_claude_code:
# Use litellm's built-in Anthropic OAuth support.
# The lowercase "authorization" key triggers OAuth detection which
# adds the required anthropic-beta and browser-access headers.
self._llm = LiteLLMProvider(
model=self.model,
api_key=api_key,
api_base=api_base,
extra_headers={"authorization": f"Bearer {api_key}"},
)
else:
# Fall back to environment variable
# First check api_key_env_var from config (set by quickstart)
@@ -606,12 +717,18 @@ class AgentRunner:
self.model
)
if api_key_env and os.environ.get(api_key_env):
self._llm = LiteLLMProvider(model=self.model)
self._llm = LiteLLMProvider(
model=self.model,
api_key=os.environ[api_key_env],
api_base=api_base,
)
else:
# Fall back to credential store
api_key = self._get_api_key_from_credential_store()
if api_key:
self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
self._llm = LiteLLMProvider(
model=self.model, api_key=api_key, api_base=api_base
)
# Set env var so downstream code (e.g. cleanup LLM in
# node._extract_json) can also find it
if api_key_env:
@@ -620,6 +737,20 @@ class AgentRunner:
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
print(f"Set it with: export {api_key_env}=your-api-key")
# Fail fast if the agent needs an LLM but none was configured
if self._llm is None:
has_llm_nodes = any(node.node_type == "event_loop" for node in self.graph.nodes)
if has_llm_nodes:
from framework.credentials.models import CredentialError
api_key_env = self._get_api_key_env_var(self.model)
hint = (
f"Set it with: export {api_key_env}=your-api-key"
if api_key_env
else "Configure an API key for your LLM provider."
)
raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
# Get tools for runtime
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
@@ -731,6 +862,19 @@ class AgentRunner:
async_checkpoint=True, # Non-blocking
)
# Handle runtime_config - ensure it's AgentRuntimeConfig, not RuntimeConfig
# RuntimeConfig is for LLM settings; AgentRuntimeConfig is for AgentRuntime settings
runtime_config = None
if self.runtime_config is not None:
from framework.config import RuntimeConfig
# If it's a RuntimeConfig (LLM config), don't pass it
if isinstance(self.runtime_config, RuntimeConfig):
runtime_config = None
else:
# It's already an AgentRuntimeConfig or compatible type
runtime_config = self.runtime_config
self._agent_runtime = create_agent_runtime(
graph=self.graph,
goal=self.goal,
@@ -741,7 +885,8 @@ class AgentRunner:
tool_executor=tool_executor,
runtime_log_store=log_store,
checkpoint_config=checkpoint_config,
config=self.runtime_config,
config=runtime_config,
graph_id=self.graph.id or self.agent_path.name,
)
# Pass intro_message through for TUI display
@@ -1309,6 +1454,61 @@ Respond with JSON only:
type=MessageType.RESPONSE,
)
@classmethod
async def setup_as_secondary(
cls,
agent_path: str | Path,
runtime: AgentRuntime,
graph_id: str | None = None,
) -> str:
"""Load an agent and register it as a secondary graph on *runtime*.
Uses :meth:`AgentRunner.load` to parse the agent, then calls
:meth:`AgentRuntime.add_graph` with the extracted graph, goal,
and entry points.
Args:
agent_path: Path to the agent directory
runtime: The running AgentRuntime to attach to
graph_id: Optional graph identifier (defaults to directory name)
Returns:
The graph_id used for registration
"""
agent_path = Path(agent_path)
runner = cls.load(agent_path)
gid = graph_id or agent_path.name
# Build entry points
entry_points: dict[str, EntryPointSpec] = {}
if runner.graph.entry_node:
entry_points["default"] = EntryPointSpec(
id="default",
name="Default",
entry_node=runner.graph.entry_node,
trigger_type="manual",
isolation_level="shared",
)
for aep in runner.graph.async_entry_points:
entry_points[aep.id] = EntryPointSpec(
id=aep.id,
name=aep.name,
entry_node=aep.entry_node,
trigger_type=aep.trigger_type,
trigger_config=aep.trigger_config,
isolation_level=aep.isolation_level,
priority=aep.priority,
max_concurrent=aep.max_concurrent,
)
await runtime.add_graph(
graph_id=gid,
graph=runner.graph,
goal=runner.goal,
entry_points=entry_points,
)
return gid
def cleanup(self) -> None:
"""Clean up resources (synchronous)."""
# Clean up MCP client connections
+27 -4
View File
@@ -1,5 +1,6 @@
"""Tool discovery and registration for agent runner."""
import asyncio
import contextvars
import importlib.util
import inspect
@@ -224,8 +225,19 @@ class ToolRegistry:
Get unified tool executor function.
Returns a function that dispatches to the appropriate tool executor.
Handles both sync and async tool implementations async results are
wrapped so that ``EventLoopNode._execute_tool`` can await them.
"""
def _wrap_result(tool_use_id: str, result: Any) -> ToolResult:
if isinstance(result, ToolResult):
return result
return ToolResult(
tool_use_id=tool_use_id,
content=json.dumps(result) if not isinstance(result, str) else result,
is_error=False,
)
def executor(tool_use: ToolUse) -> ToolResult:
if tool_use.name not in self._tools:
return ToolResult(
@@ -237,13 +249,24 @@ class ToolRegistry:
registered = self._tools[tool_use.name]
try:
result = registered.executor(tool_use.input)
if isinstance(result, ToolResult):
return result
# Async tool: wrap the awaitable so the caller can await it
if asyncio.iscoroutine(result) or asyncio.isfuture(result):
async def _await_and_wrap():
try:
r = await result
return _wrap_result(tool_use.id, r)
except Exception as exc:
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps(result) if not isinstance(result, str) else result,
is_error=False,
content=json.dumps({"error": str(exc)}),
is_error=True,
)
return _await_and_wrap()
return _wrap_result(tool_use.id, result)
except Exception as e:
return ToolResult(
tool_use_id=tool_use.id,
+487 -50
View File
@@ -10,6 +10,7 @@ import logging
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, Any
@@ -47,6 +48,20 @@ class AgentRuntimeConfig:
# Each dict: {"source_id": str, "path": str, "methods": ["POST"], "secret": str|None}
@dataclass
class _GraphRegistration:
"""Tracks a loaded graph and its runtime resources."""
graph: "GraphSpec"
goal: "Goal"
entry_points: dict[str, EntryPointSpec]
streams: dict[str, ExecutionStream] # ep_id -> stream (NOT namespaced)
storage_subpath: str # relative to session root, e.g. "graphs/email_agent"
event_subscriptions: list[str] = field(default_factory=list)
timer_tasks: list[asyncio.Task] = field(default_factory=list)
timer_next_fire: dict[str, float] = field(default_factory=dict)
class AgentRuntime:
"""
Top-level runtime that manages agent lifecycle and concurrent executions.
@@ -110,6 +125,7 @@ class AgentRuntime:
config: AgentRuntimeConfig | None = None,
runtime_log_store: Any = None,
checkpoint_config: CheckpointConfig | None = None,
graph_id: str | None = None,
):
"""
Initialize agent runtime.
@@ -124,6 +140,7 @@ class AgentRuntime:
config: Optional runtime configuration
runtime_log_store: Optional RuntimeLogStore for per-execution logging
checkpoint_config: Optional checkpoint configuration for resumable sessions
graph_id: Optional identifier for the primary graph (defaults to "primary")
"""
self.graph = graph
self.goal = goal
@@ -131,6 +148,16 @@ class AgentRuntime:
self._runtime_log_store = runtime_log_store
self._checkpoint_config = checkpoint_config
# Primary graph identity
self._graph_id: str = graph_id or "primary"
# Multi-graph state
self._graphs: dict[str, _GraphRegistration] = {}
self._active_graph_id: str = self._graph_id
# User presence tracking (monotonic timestamp of last inject_input)
self._last_user_input_time: float = 0.0
# Initialize storage
storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path
self._storage = ConcurrentStorage(
@@ -152,15 +179,15 @@ class AgentRuntime:
self._tools = tools or []
self._tool_executor = tool_executor
# Entry points and streams
# Entry points and streams (primary graph)
self._entry_points: dict[str, EntryPointSpec] = {}
self._streams: dict[str, ExecutionStream] = {}
# Webhook server (created on start if webhook_routes configured)
self._webhook_server: Any = None
# Event-driven entry point subscriptions
# Event-driven entry point subscriptions (primary graph)
self._event_subscriptions: list[str] = []
# Timer tasks for scheduled entry points
# Timer tasks for scheduled entry points (primary graph)
self._timer_tasks: list[asyncio.Task] = []
# Next fire time for each timer entry point (ep_id -> datetime)
self._timer_next_fire: dict[str, float] = {}
@@ -245,6 +272,7 @@ class AgentRuntime:
runtime_log_store=self._runtime_log_store,
session_store=self._session_store,
checkpoint_config=self._checkpoint_config,
graph_id=self._graph_id,
)
await stream.start()
self._streams[ep_id] = stream
@@ -290,10 +318,18 @@ class AgentRuntime:
)
continue
# Capture ep_id in closure
def _make_handler(entry_point_id: str):
# Capture ep_id and config in closure
exclude_own = tc.get("exclude_own_graph", False)
def _make_handler(entry_point_id: str, _exclude_own: bool):
async def _on_event(event):
if self._running and entry_point_id in self._streams:
if not self._running or entry_point_id not in self._streams:
return
# Skip events originating from this graph's own
# executions (e.g. guardian should not fire on
# hive_coder failures — only secondary graphs).
if _exclude_own and event.graph_id == self._graph_id:
return
# Run in the same session as the primary entry
# point so memory (e.g. user-defined rules) is
# shared and logs land in one session directory.
@@ -310,9 +346,10 @@ class AgentRuntime:
sub_id = self._event_bus.subscribe(
event_types=event_types,
handler=_make_handler(ep_id),
handler=_make_handler(ep_id, exclude_own),
filter_stream=tc.get("filter_stream"),
filter_node=tc.get("filter_node"),
filter_graph=tc.get("filter_graph"),
)
self._event_subscriptions.append(sub_id)
@@ -322,37 +359,111 @@ class AgentRuntime:
continue
tc = spec.trigger_config
cron_expr = tc.get("cron")
interval = tc.get("interval_minutes")
if not interval or interval <= 0:
run_immediately = tc.get("run_immediately", False)
if cron_expr:
# Cron expression mode — takes priority over interval_minutes
try:
from croniter import croniter
# Validate the expression upfront
if not croniter.is_valid(cron_expr):
raise ValueError(f"Invalid cron expression: {cron_expr}")
except (ImportError, ValueError) as e:
logger.warning(
f"Entry point '{ep_id}' has trigger_type='timer' "
"but no valid interval_minutes in trigger_config"
"Entry point '%s' has invalid cron config: %s",
ep_id,
e,
)
continue
run_immediately = tc.get("run_immediately", False)
def _make_cron_timer(entry_point_id: str, expr: str, immediate: bool):
async def _cron_loop():
from croniter import croniter
def _make_timer(entry_point_id: str, mins: float, immediate: bool):
async def _timer_loop():
interval_secs = mins * 60
if not immediate:
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
cron = croniter(expr, datetime.now())
next_dt = cron.get_next(datetime)
sleep_secs = (next_dt - datetime.now()).total_seconds()
self._timer_next_fire[entry_point_id] = (
time.monotonic() + sleep_secs
)
await asyncio.sleep(max(0, sleep_secs))
while self._running:
self._timer_next_fire.pop(entry_point_id, None)
try:
if self._should_skip_timer(entry_point_id):
logger.info(
"Timer '%s' skipped — primary stream busy",
entry_point_id,
)
else:
session_state = self._get_primary_session_state(
exclude_entry_point=entry_point_id
)
await self.trigger(
entry_point_id,
{"event": {"source": "timer", "reason": "scheduled"}},
{
"event": {
"source": "timer",
"reason": "scheduled",
}
},
session_state=session_state,
)
logger.info(
"Cron fired for entry point '%s' (expr: %s)",
entry_point_id,
expr,
)
except Exception:
logger.error(
"Cron trigger failed for '%s'",
entry_point_id,
exc_info=True,
)
# Calculate next fire from now
cron = croniter(expr, datetime.now())
next_dt = cron.get_next(datetime)
sleep_secs = (next_dt - datetime.now()).total_seconds()
self._timer_next_fire[entry_point_id] = (
time.monotonic() + sleep_secs
)
await asyncio.sleep(max(0, sleep_secs))
return _cron_loop
task = asyncio.create_task(
_make_cron_timer(ep_id, cron_expr, run_immediately)()
)
self._timer_tasks.append(task)
logger.info(
"Started cron timer for entry point '%s' with expression '%s'%s",
ep_id,
cron_expr,
" (immediate first run)" if run_immediately else "",
)
elif interval and interval > 0:
# Fixed interval mode (original behavior)
def _make_timer(entry_point_id: str, mins: float, immediate: bool):
async def _timer_loop():
interval_secs = mins * 60
if not immediate:
self._timer_next_fire[entry_point_id] = (
time.monotonic() + interval_secs
)
await asyncio.sleep(interval_secs)
while self._running:
self._timer_next_fire.pop(entry_point_id, None)
try:
session_state = self._get_primary_session_state(
exclude_entry_point=entry_point_id
)
await self.trigger(
entry_point_id,
{
"event": {
"source": "timer",
"reason": "scheduled",
}
},
session_state=session_state,
)
logger.info(
@@ -366,7 +477,9 @@ class AgentRuntime:
entry_point_id,
exc_info=True,
)
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
self._timer_next_fire[entry_point_id] = (
time.monotonic() + interval_secs
)
await asyncio.sleep(interval_secs)
return _timer_loop
@@ -380,6 +493,25 @@ class AgentRuntime:
" (immediate first run)" if run_immediately else "",
)
else:
logger.warning(
"Entry point '%s' has trigger_type='timer' "
"but no 'cron' or valid 'interval_minutes' in trigger_config",
ep_id,
)
# Register primary graph
self._graphs[self._graph_id] = _GraphRegistration(
graph=self.graph,
goal=self.goal,
entry_points=dict(self._entry_points),
streams=dict(self._streams),
storage_subpath="",
event_subscriptions=list(self._event_subscriptions),
timer_tasks=list(self._timer_tasks),
timer_next_fire=self._timer_next_fire,
)
self._running = True
logger.info(f"AgentRuntime started with {len(self._streams)} streams")
@@ -389,12 +521,17 @@ class AgentRuntime:
return
async with self._lock:
# Cancel timer tasks
# Stop secondary graphs first
secondary_ids = [gid for gid in self._graphs if gid != self._graph_id]
for gid in secondary_ids:
await self._teardown_graph(gid)
# Cancel primary timer tasks
for task in self._timer_tasks:
task.cancel()
self._timer_tasks.clear()
# Unsubscribe event-driven entry points
# Unsubscribe primary event-driven entry points
for sub_id in self._event_subscriptions:
self._event_bus.unsubscribe(sub_id)
self._event_subscriptions.clear()
@@ -404,11 +541,12 @@ class AgentRuntime:
await self._webhook_server.stop()
self._webhook_server = None
# Stop all streams
# Stop all primary streams
for stream in self._streams.values():
await stream.stop()
self._streams.clear()
self._graphs.clear()
# Stop storage
await self._storage.stop()
@@ -475,24 +613,280 @@ class AgentRuntime:
raise ValueError(f"Entry point '{entry_point_id}' not found")
return await stream.wait_for_completion(exec_id, timeout)
def _should_skip_timer(self, timer_ep_id: str) -> bool:
"""Return True if a non-timer stream is actively running (not waiting for input).
# === MULTI-GRAPH MANAGEMENT ===
Timers should only fire when the primary stream is idle (blocked
waiting for client input) or has no active execution. This prevents
concurrent pipeline runs that would race on shared memory.
async def add_graph(
self,
graph_id: str,
graph: "GraphSpec",
goal: "Goal",
entry_points: dict[str, EntryPointSpec],
storage_subpath: str | None = None,
) -> None:
"""Load a secondary graph into this runtime session.
Creates execution streams for the graph's entry points, sets up
event/timer triggers, and registers the graph. Shares the same
EventBus, state.json, and data directory as the primary graph.
Can be called while the runtime is running.
Args:
graph_id: Unique identifier for the graph
graph: Graph specification
goal: Goal driving this graph's execution
entry_points: Entry point specs (ep_id -> spec)
storage_subpath: Relative path under session root for this
graph's conversations/checkpoints. Defaults to
``"graphs/{graph_id}"``.
Raises:
ValueError: If graph_id already registered or entry node missing
"""
for ep_id, stream in self._streams.items():
if ep_id == timer_ep_id:
continue
spec = self._entry_points.get(ep_id)
if spec and spec.trigger_type == "timer":
continue
if stream.active_execution_ids and not stream.is_awaiting_input:
return True
return False
if graph_id in self._graphs:
raise ValueError(f"Graph '{graph_id}' already registered")
def _get_primary_session_state(self, exclude_entry_point: str) -> dict[str, Any] | None:
subpath = storage_subpath or f"graphs/{graph_id}"
# Validate entry nodes exist in graph
for _ep_id, spec in entry_points.items():
if graph.get_node(spec.entry_node) is None:
raise ValueError(f"Entry node '{spec.entry_node}' not found in graph '{graph_id}'")
# Create streams for each entry point
streams: dict[str, ExecutionStream] = {}
for ep_id, spec in entry_points.items():
stream = ExecutionStream(
stream_id=f"{graph_id}::{ep_id}",
entry_spec=spec,
graph=graph,
goal=goal,
state_manager=self._state_manager,
storage=self._storage,
outcome_aggregator=self._outcome_aggregator,
event_bus=self._event_bus,
llm=self._llm,
tools=self._tools,
tool_executor=self._tool_executor,
result_retention_max=self._config.execution_result_max,
result_retention_ttl_seconds=self._config.execution_result_ttl_seconds,
runtime_log_store=self._runtime_log_store,
session_store=self._session_store,
checkpoint_config=self._checkpoint_config,
graph_id=graph_id,
)
if self._running:
await stream.start()
streams[ep_id] = stream
# Set up event-driven subscriptions
from framework.runtime.event_bus import EventType as _ET
event_subs: list[str] = []
for ep_id, spec in entry_points.items():
if spec.trigger_type != "event":
continue
tc = spec.trigger_config
event_types = [_ET(et) for et in tc.get("event_types", [])]
if not event_types:
logger.warning(
"Entry point '%s::%s' has trigger_type='event' "
"but no event_types in trigger_config",
graph_id,
ep_id,
)
continue
namespaced_ep = f"{graph_id}::{ep_id}"
exclude_own = tc.get("exclude_own_graph", False)
def _make_handler(entry_point_id: str, gid: str, _exclude_own: bool):
async def _on_event(event):
if not self._running or gid not in self._graphs:
return
# Skip events from this graph's own executions
if _exclude_own and event.graph_id == gid:
return
reg = self._graphs[gid]
local_ep = entry_point_id.split("::", 1)[-1]
stream = reg.streams.get(local_ep)
if stream is None:
return
session_state = self._get_primary_session_state(
local_ep,
source_graph_id=gid,
)
await stream.execute(
{"event": event.to_dict()},
session_state=session_state,
)
return _on_event
sub_id = self._event_bus.subscribe(
event_types=event_types,
handler=_make_handler(namespaced_ep, graph_id, exclude_own),
filter_stream=tc.get("filter_stream"),
filter_node=tc.get("filter_node"),
filter_graph=tc.get("filter_graph"),
)
event_subs.append(sub_id)
# Set up timer-driven entry points
timer_tasks: list[asyncio.Task] = []
timer_next_fire: dict[str, float] = {}
for ep_id, spec in entry_points.items():
if spec.trigger_type != "timer":
continue
tc = spec.trigger_config
interval = tc.get("interval_minutes")
run_immediately = tc.get("run_immediately", False)
if interval and interval > 0 and self._running:
def _make_timer(gid: str, local_ep: str, mins: float, immediate: bool):
async def _timer_loop():
interval_secs = mins * 60
if not immediate:
timer_next_fire[local_ep] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
while self._running and gid in self._graphs:
timer_next_fire.pop(local_ep, None)
try:
reg = self._graphs.get(gid)
if not reg:
break
stream = reg.streams.get(local_ep)
if not stream:
break
session_state = self._get_primary_session_state(
local_ep, source_graph_id=gid
)
await stream.execute(
{"event": {"source": "timer", "reason": "scheduled"}},
session_state=session_state,
)
except Exception:
logger.error(
"Timer trigger failed for '%s::%s'",
gid,
local_ep,
exc_info=True,
)
timer_next_fire[local_ep] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
return _timer_loop
task = asyncio.create_task(
_make_timer(graph_id, ep_id, interval, run_immediately)()
)
timer_tasks.append(task)
self._graphs[graph_id] = _GraphRegistration(
graph=graph,
goal=goal,
entry_points=entry_points,
streams=streams,
storage_subpath=subpath,
event_subscriptions=event_subs,
timer_tasks=timer_tasks,
timer_next_fire=timer_next_fire,
)
logger.info(
"Added graph '%s' with %d entry points (%d streams)",
graph_id,
len(entry_points),
len(streams),
)
async def remove_graph(self, graph_id: str) -> None:
"""Remove a secondary graph from this runtime session.
Stops all streams, cancels timers, unsubscribes events, and
removes the registration. Cannot remove the primary graph.
Args:
graph_id: Graph to remove
Raises:
ValueError: If graph_id is the primary graph or not found
"""
if graph_id == self._graph_id:
raise ValueError("Cannot remove the primary graph")
if graph_id not in self._graphs:
raise ValueError(f"Graph '{graph_id}' not found")
await self._teardown_graph(graph_id)
logger.info("Removed graph '%s'", graph_id)
async def _teardown_graph(self, graph_id: str) -> None:
"""Internal: stop and clean up all resources for a graph."""
reg = self._graphs.pop(graph_id, None)
if reg is None:
return
# Cancel timers
for task in reg.timer_tasks:
task.cancel()
# Unsubscribe events
for sub_id in reg.event_subscriptions:
self._event_bus.unsubscribe(sub_id)
# Stop streams
for stream in reg.streams.values():
await stream.stop()
# Reset active graph if it was the removed one
if self._active_graph_id == graph_id:
self._active_graph_id = self._graph_id
def list_graphs(self) -> list[str]:
"""Return all registered graph IDs (primary first)."""
result = []
if self._graph_id in self._graphs:
result.append(self._graph_id)
for gid in self._graphs:
if gid != self._graph_id:
result.append(gid)
return result
@property
def graph_id(self) -> str:
"""The primary graph's ID."""
return self._graph_id
@property
def active_graph_id(self) -> str:
"""The currently focused graph (for TUI routing)."""
return self._active_graph_id
@active_graph_id.setter
def active_graph_id(self, value: str) -> None:
if value not in self._graphs:
raise ValueError(f"Graph '{value}' not registered")
self._active_graph_id = value
@property
def user_idle_seconds(self) -> float:
"""Seconds since the user last provided input.
Returns ``float('inf')`` if no input has been received yet.
"""
if self._last_user_input_time == 0.0:
return float("inf")
return time.monotonic() - self._last_user_input_time
def get_graph_registration(self, graph_id: str) -> _GraphRegistration | None:
"""Get the registration for a specific graph (or None)."""
return self._graphs.get(graph_id)
def _get_primary_session_state(
self,
exclude_entry_point: str,
*,
source_graph_id: str | None = None,
) -> dict[str, Any] | None:
"""Build session_state so an async entry point runs in the primary session.
Looks for an active execution from another stream (the "primary"
@@ -509,6 +903,15 @@ class AgentRuntime:
which is kept up-to-date by ``GraphExecutor._write_progress()``
at every node transition.
Searches across ALL graphs' streams (primary + secondary) so
event-driven entry points on secondary graphs can share the
primary session.
Args:
exclude_entry_point: Entry point ID to skip (the one being triggered)
source_graph_id: Graph the exclude_entry_point belongs to (for
resolving the entry node spec). Defaults to primary graph.
Returns ``None`` if no primary session is active (the webhook
execution will just create its own session).
"""
@@ -516,13 +919,27 @@ class AgentRuntime:
# Determine which memory keys the async entry node needs.
allowed_keys: set[str] | None = None
ep_spec = self._entry_points.get(exclude_entry_point)
# Look up the entry node from the correct graph
src_graph_id = source_graph_id or self._graph_id
src_reg = self._graphs.get(src_graph_id)
ep_spec = (
src_reg.entry_points.get(exclude_entry_point)
if src_reg
else self._entry_points.get(exclude_entry_point)
)
if ep_spec:
entry_node = self.graph.get_node(ep_spec.entry_node)
graph = src_reg.graph if src_reg else self.graph
entry_node = graph.get_node(ep_spec.entry_node)
if entry_node and entry_node.input_keys:
allowed_keys = set(entry_node.input_keys)
for ep_id, stream in self._streams.items():
# Search ALL graphs' streams for an active session
all_streams: list[tuple[str, ExecutionStream]] = []
for _gid, reg in self._graphs.items():
for ep_id, stream in reg.streams.items():
all_streams.append((ep_id, stream))
for ep_id, stream in all_streams:
if ep_id == exclude_entry_point:
continue
for exec_id in stream.active_execution_ids:
@@ -552,21 +969,35 @@ class AgentRuntime:
)
return None
async def inject_input(self, node_id: str, content: str) -> bool:
async def inject_input(self, node_id: str, content: str, graph_id: str | None = None) -> bool:
"""Inject user input into a running client-facing node.
Routes input to the EventLoopNode identified by ``node_id``
across all active streams. Used by the TUI ChatRepl to deliver
user responses during client-facing node execution.
Routes input to the EventLoopNode identified by ``node_id``.
Searches the specified graph (or active graph) first, then all others.
Args:
node_id: The node currently waiting for input
content: The user's input text
graph_id: Optional graph to search first (defaults to active graph)
Returns:
True if input was delivered, False if no matching node found
"""
for stream in self._streams.values():
# Track user presence
self._last_user_input_time = time.monotonic()
# Search target graph first
target = graph_id or self._active_graph_id
if target in self._graphs:
for stream in self._graphs[target].streams.values():
if await stream.inject_input(node_id, content):
return True
# Then search all other graphs
for gid, reg in self._graphs.items():
if gid == target:
continue
for stream in reg.streams.values():
if await stream.inject_input(node_id, content):
return True
return False
@@ -629,6 +1060,7 @@ class AgentRuntime:
event_types: list,
handler: Callable,
filter_stream: str | None = None,
filter_graph: str | None = None,
) -> str:
"""
Subscribe to agent events.
@@ -637,6 +1069,7 @@ class AgentRuntime:
event_types: Types of events to receive
handler: Async function to call when event occurs
filter_stream: Only receive events from this stream
filter_graph: Only receive events from this graph
Returns:
Subscription ID (use to unsubscribe)
@@ -645,6 +1078,7 @@ class AgentRuntime:
event_types=event_types,
handler=handler,
filter_stream=filter_stream,
filter_graph=filter_graph,
)
def unsubscribe_from_events(self, subscription_id: str) -> bool:
@@ -712,6 +1146,7 @@ def create_agent_runtime(
runtime_log_store: Any = None,
enable_logging: bool = True,
checkpoint_config: CheckpointConfig | None = None,
graph_id: str | None = None,
) -> AgentRuntime:
"""
Create and configure an AgentRuntime with entry points.
@@ -734,6 +1169,7 @@ def create_agent_runtime(
Set to False to disable logging entirely.
checkpoint_config: Optional checkpoint configuration for resumable sessions.
If None, uses default checkpointing behavior.
graph_id: Optional identifier for the primary graph (defaults to "primary").
Returns:
Configured AgentRuntime (not yet started)
@@ -755,6 +1191,7 @@ def create_agent_runtime(
config=config,
runtime_log_store=runtime_log_store,
checkpoint_config=checkpoint_config,
graph_id=graph_id,
)
for spec in entry_points:
+35
View File
@@ -83,6 +83,9 @@ class EventType(StrEnum):
# Custom events
CUSTOM = "custom"
# Escalation (agent requests handoff to hive_coder)
ESCALATION_REQUESTED = "escalation_requested"
@dataclass
class AgentEvent:
@@ -95,6 +98,7 @@ class AgentEvent:
data: dict[str, Any] = field(default_factory=dict)
timestamp: datetime = field(default_factory=datetime.now)
correlation_id: str | None = None # For tracking related events
graph_id: str | None = None # Which graph emitted this event (multi-graph sessions)
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
@@ -106,6 +110,7 @@ class AgentEvent:
"data": self.data,
"timestamp": self.timestamp.isoformat(),
"correlation_id": self.correlation_id,
"graph_id": self.graph_id,
}
@@ -123,6 +128,7 @@ class Subscription:
filter_stream: str | None = None # Only receive events from this stream
filter_node: str | None = None # Only receive events from this node
filter_execution: str | None = None # Only receive events from this execution
filter_graph: str | None = None # Only receive events from this graph
class EventBus:
@@ -182,6 +188,7 @@ class EventBus:
filter_stream: str | None = None,
filter_node: str | None = None,
filter_execution: str | None = None,
filter_graph: str | None = None,
) -> str:
"""
Subscribe to events.
@@ -192,6 +199,7 @@ class EventBus:
filter_stream: Only receive events from this stream
filter_node: Only receive events from this node
filter_execution: Only receive events from this execution
filter_graph: Only receive events from this graph
Returns:
Subscription ID (use to unsubscribe)
@@ -206,6 +214,7 @@ class EventBus:
filter_stream=filter_stream,
filter_node=filter_node,
filter_execution=filter_execution,
filter_graph=filter_graph,
)
self._subscriptions[sub_id] = subscription
@@ -271,6 +280,10 @@ class EventBus:
if subscription.filter_execution and subscription.filter_execution != event.execution_id:
return False
# Check graph filter
if subscription.filter_graph and subscription.filter_graph != event.graph_id:
return False
return True
async def _execute_handlers(
@@ -820,6 +833,25 @@ class EventBus:
)
)
async def emit_escalation_requested(
self,
stream_id: str,
node_id: str,
reason: str = "",
context: str = "",
execution_id: str | None = None,
) -> None:
"""Emit escalation requested event (agent wants hive_coder)."""
await self.publish(
AgentEvent(
type=EventType.ESCALATION_REQUESTED,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"reason": reason, "context": context},
)
)
# === QUERY OPERATIONS ===
def get_history(
@@ -873,6 +905,7 @@ class EventBus:
stream_id: str | None = None,
node_id: str | None = None,
execution_id: str | None = None,
graph_id: str | None = None,
timeout: float | None = None,
) -> AgentEvent | None:
"""
@@ -883,6 +916,7 @@ class EventBus:
stream_id: Filter by stream
node_id: Filter by node
execution_id: Filter by execution
graph_id: Filter by graph
timeout: Maximum time to wait (seconds)
Returns:
@@ -903,6 +937,7 @@ class EventBus:
filter_stream=stream_id,
filter_node=node_id,
filter_execution=execution_id,
filter_graph=graph_id,
)
try:
+46 -13
View File
@@ -26,7 +26,7 @@ if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.goal import Goal
from framework.llm.provider import LLMProvider, Tool
from framework.runtime.event_bus import EventBus
from framework.runtime.event_bus import AgentEvent, EventBus
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
@@ -34,6 +34,31 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
class _GraphScopedEventBus:
"""Thin proxy that stamps ``graph_id`` on every published event.
The ``GraphExecutor`` and ``EventLoopNode`` emit events via the
convenience methods on ``EventBus`` (e.g. ``emit_llm_text_delta``).
Rather than threading ``graph_id`` through every one of those 20+
methods, this proxy intercepts ``publish()`` and sets ``graph_id``
before forwarding to the real bus. All other attribute access is
delegated unchanged.
"""
__slots__ = ("_bus", "_graph_id")
def __init__(self, bus: "EventBus", graph_id: str) -> None:
object.__setattr__(self, "_bus", bus)
object.__setattr__(self, "_graph_id", graph_id)
async def publish(self, event: "AgentEvent") -> None: # type: ignore[override]
event.graph_id = object.__getattribute__(self, "_graph_id")
await object.__getattribute__(self, "_bus").publish(event)
def __getattr__(self, name: str) -> Any:
return getattr(object.__getattribute__(self, "_bus"), name)
@dataclass
class EntryPointSpec:
"""Specification for an entry point."""
@@ -117,6 +142,7 @@ class ExecutionStream:
runtime_log_store: Any = None,
session_store: "SessionStore | None" = None,
checkpoint_config: CheckpointConfig | None = None,
graph_id: str | None = None,
):
"""
Initialize execution stream.
@@ -136,11 +162,13 @@ class ExecutionStream:
runtime_log_store: Optional RuntimeLogStore for per-execution logging
session_store: Optional SessionStore for unified session storage
checkpoint_config: Optional checkpoint configuration for resumable sessions
graph_id: Optional graph identifier for multi-graph sessions
"""
self.stream_id = stream_id
self.entry_spec = entry_spec
self.graph = graph
self.goal = goal
self.graph_id = graph_id
self._state_manager = state_manager
self._storage = storage
self._outcome_aggregator = outcome_aggregator
@@ -173,6 +201,11 @@ class ExecutionStream:
self._semaphore = asyncio.Semaphore(entry_spec.max_concurrent)
self._lock = asyncio.Lock()
# Graph-scoped event bus (stamps graph_id on published events)
self._scoped_event_bus = self._event_bus
if self._event_bus and self.graph_id:
self._scoped_event_bus = _GraphScopedEventBus(self._event_bus, self.graph_id)
# State
self._running = False
@@ -185,10 +218,10 @@ class ExecutionStream:
logger.info(f"ExecutionStream '{self.stream_id}' started")
# Emit stream started event
if self._event_bus:
if self._scoped_event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
await self._event_bus.publish(
await self._scoped_event_bus.publish(
AgentEvent(
type=EventType.STREAM_STARTED,
stream_id=self.stream_id,
@@ -262,10 +295,10 @@ class ExecutionStream:
logger.info(f"ExecutionStream '{self.stream_id}' stopped")
# Emit stream stopped event
if self._event_bus:
if self._scoped_event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
await self._event_bus.publish(
await self._scoped_event_bus.publish(
AgentEvent(
type=EventType.STREAM_STOPPED,
stream_id=self.stream_id,
@@ -369,8 +402,8 @@ class ExecutionStream:
try:
# Emit started event
if self._event_bus:
await self._event_bus.emit_execution_started(
if self._scoped_event_bus:
await self._scoped_event_bus.emit_execution_started(
stream_id=self.stream_id,
execution_id=execution_id,
input_data=ctx.input_data,
@@ -415,7 +448,7 @@ class ExecutionStream:
llm=self._llm,
tools=self._tools,
tool_executor=self._tool_executor,
event_bus=self._event_bus,
event_bus=self._scoped_event_bus,
stream_id=self.stream_id,
storage_path=exec_storage,
runtime_logger=runtime_logger,
@@ -465,16 +498,16 @@ class ExecutionStream:
await self._write_session_state(execution_id, ctx, result=result)
# Emit completion/failure event
if self._event_bus:
if self._scoped_event_bus:
if result.success:
await self._event_bus.emit_execution_completed(
await self._scoped_event_bus.emit_execution_completed(
stream_id=self.stream_id,
execution_id=execution_id,
output=result.output,
correlation_id=ctx.correlation_id,
)
else:
await self._event_bus.emit_execution_failed(
await self._scoped_event_bus.emit_execution_failed(
stream_id=self.stream_id,
execution_id=execution_id,
error=result.error or "Unknown error",
@@ -552,8 +585,8 @@ class ExecutionStream:
pass # Don't let end_run errors mask the original error
# Emit failure event
if self._event_bus:
await self._event_bus.emit_execution_failed(
if self._scoped_event_bus:
await self._scoped_event_bus.emit_execution_failed(
stream_id=self.stream_id,
execution_id=execution_id,
error=str(e),
@@ -641,5 +641,185 @@ class TestCreateAgentRuntime:
assert "api" in runtime._entry_points
# === Timer Entry Point Tests ===
class TestTimerEntryPoints:
"""Tests for timer-driven entry points (interval and cron)."""
@pytest.mark.asyncio
async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
"""Test that interval_minutes timer creates an async task."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-interval",
name="Interval Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"interval_minutes": 60},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
assert not runtime._timer_tasks[0].done()
# Give the async task a moment to set next_fire
await asyncio.sleep(0.05)
assert "timer-interval" in runtime._timer_next_fire
finally:
await runtime.stop()
assert len(runtime._timer_tasks) == 0
@pytest.mark.asyncio
async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
"""Test that cron expression timer creates an async task."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-cron",
name="Cron Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
assert not runtime._timer_tasks[0].done()
# Give the async task a moment to set next_fire
await asyncio.sleep(0.05)
assert "timer-cron" in runtime._timer_next_fire
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_invalid_cron_expression_skipped(
self, sample_graph, sample_goal, temp_storage, caplog
):
"""Test that an invalid cron expression logs a warning and skips."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-bad-cron",
name="Bad Cron Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "not a cron expression"},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 0
assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cron_takes_priority_over_interval(
self, sample_graph, sample_goal, temp_storage, caplog
):
"""Test that when both cron and interval_minutes are set, cron wins."""
import logging
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-both",
name="Both Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
)
runtime.register_entry_point(entry_spec)
with caplog.at_level(logging.INFO):
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
# Should log cron, not interval
assert any("cron" in r.message.lower() for r in caplog.records)
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
"""Test that timer with neither cron nor interval_minutes logs a warning."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-empty",
name="Empty Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 0
assert "no 'cron' or valid 'interval_minutes'" in caplog.text
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
"""Test that run_immediately=True with cron doesn't set next_fire before first run."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-cron-immediate",
name="Cron Immediate",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "0 0 * * *", "run_immediately": True},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
# With run_immediately, the task enters the while loop directly,
# so _timer_next_fire is NOT set before the first trigger attempt
# (it pops it at the top of the loop)
# Give it a moment to start executing
await asyncio.sleep(0.05)
# Task should still be running (it will try to trigger and likely fail
# since there's no LLM, but the task itself continues)
assert not runtime._timer_tasks[0].done()
finally:
await runtime.stop()
if __name__ == "__main__":
pytest.main([__file__, "-v"])
View File
+325
View File
@@ -0,0 +1,325 @@
"""Graph lifecycle tools for multi-graph sessions.
These tools allow an agent (e.g. hive_coder) to load, unload, start,
restart, and query other agent graphs within the same runtime session.
Usage::
from framework.tools.session_graph_tools import register_graph_tools
register_graph_tools(tool_registry, runtime)
The tools are registered as async Python functions on the ToolRegistry.
They close over the ``AgentRuntime`` instance no ContextVar needed
since the runtime is a stable, long-lived object.
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime
logger = logging.getLogger(__name__)
def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
"""Register graph lifecycle tools bound to *runtime*.
Returns the number of tools registered.
"""
from framework.llm.provider import Tool
tools_registered = 0
# --- load_agent -----------------------------------------------------------
async def load_agent(agent_path: str) -> str:
"""Load an agent graph from disk into the running session.
The agent is imported from *agent_path* (a directory containing
``agent.py``). Its graph, goal, and entry points are registered
as a secondary graph on the runtime. Returns a JSON summary.
"""
from framework.runner.runner import AgentRunner
from framework.runtime.execution_stream import EntryPointSpec
path = Path(agent_path).resolve()
if not path.exists():
return json.dumps({"error": f"Agent path does not exist: {path}"})
try:
runner = AgentRunner.load(path)
except Exception as exc:
return json.dumps({"error": f"Failed to load agent: {exc}"})
graph_id = path.name
if graph_id in list(runtime.list_graphs()):
return json.dumps({"error": f"Graph '{graph_id}' is already loaded"})
# Build entry point dict from the loaded graph
entry_points: dict[str, EntryPointSpec] = {}
# Primary entry point
if runner.graph.entry_node:
entry_points["default"] = EntryPointSpec(
id="default",
name="Default",
entry_node=runner.graph.entry_node,
trigger_type="manual",
isolation_level="shared",
)
# Async entry points
for aep in runner.graph.async_entry_points:
entry_points[aep.id] = EntryPointSpec(
id=aep.id,
name=aep.name,
entry_node=aep.entry_node,
trigger_type=aep.trigger_type,
trigger_config=aep.trigger_config,
isolation_level=aep.isolation_level,
priority=aep.priority,
max_concurrent=aep.max_concurrent,
)
await runtime.add_graph(
graph_id=graph_id,
graph=runner.graph,
goal=runner.goal,
entry_points=entry_points,
)
return json.dumps(
{
"graph_id": graph_id,
"entry_points": list(entry_points.keys()),
"nodes": [n.id for n in runner.graph.nodes],
"status": "loaded",
}
)
_load_tool = Tool(
name="load_agent",
description=(
"Load an agent graph from disk into the current session. "
"The agent runs alongside the primary agent, sharing memory and data."
),
parameters={
"type": "object",
"properties": {
"agent_path": {
"type": "string",
"description": "Path to the agent directory (containing agent.py)",
},
},
"required": ["agent_path"],
},
)
registry.register("load_agent", _load_tool, lambda inputs: load_agent(**inputs))
tools_registered += 1
# --- unload_agent ---------------------------------------------------------
async def unload_agent(graph_id: str) -> str:
"""Stop and remove a secondary agent graph from the session."""
try:
await runtime.remove_graph(graph_id)
return json.dumps({"graph_id": graph_id, "status": "unloaded"})
except ValueError as exc:
return json.dumps({"error": str(exc)})
_unload_tool = Tool(
name="unload_agent",
description="Stop and remove a loaded agent graph from the session.",
parameters={
"type": "object",
"properties": {
"graph_id": {
"type": "string",
"description": "ID of the graph to unload",
},
},
"required": ["graph_id"],
},
)
registry.register("unload_agent", _unload_tool, lambda inputs: unload_agent(**inputs))
tools_registered += 1
# --- start_agent ----------------------------------------------------------
async def start_agent(
graph_id: str, entry_point: str = "default", input_data: str = "{}"
) -> str:
"""Trigger an entry point on a loaded agent graph."""
reg = runtime.get_graph_registration(graph_id)
if reg is None:
return json.dumps({"error": f"Graph '{graph_id}' not found"})
stream = reg.streams.get(entry_point)
if stream is None:
return json.dumps(
{
"error": f"Entry point '{entry_point}' not found on graph '{graph_id}'",
"available": list(reg.streams.keys()),
}
)
try:
data = json.loads(input_data) if isinstance(input_data, str) else input_data
except json.JSONDecodeError as exc:
return json.dumps({"error": f"Invalid JSON input: {exc}"})
session_state = runtime._get_primary_session_state(entry_point, source_graph_id=graph_id)
exec_id = await stream.execute(data, session_state=session_state)
return json.dumps(
{
"graph_id": graph_id,
"entry_point": entry_point,
"execution_id": exec_id,
"status": "triggered",
}
)
_start_tool = Tool(
name="start_agent",
description="Trigger an entry point on a loaded agent graph to start execution.",
parameters={
"type": "object",
"properties": {
"graph_id": {
"type": "string",
"description": "ID of the graph to start",
},
"entry_point": {
"type": "string",
"description": "Entry point to trigger (default: 'default')",
},
"input_data": {
"type": "string",
"description": "JSON string of input data for the execution",
},
},
"required": ["graph_id"],
},
)
registry.register("start_agent", _start_tool, lambda inputs: start_agent(**inputs))
tools_registered += 1
# --- restart_agent --------------------------------------------------------
async def restart_agent(graph_id: str) -> str:
"""Unload and reload an agent graph (picks up code changes)."""
reg = runtime.get_graph_registration(graph_id)
if reg is None:
return json.dumps({"error": f"Graph '{graph_id}' not found"})
if graph_id == runtime.graph_id:
return json.dumps({"error": "Cannot restart the primary graph"})
# Remember the graph spec so we can reload it
# The graph_id is the agent directory name by convention
# We need to find the original agent path
# For now, use the graph's id to locate the agent
try:
await runtime.remove_graph(graph_id)
except ValueError as exc:
return json.dumps({"error": f"Failed to unload: {exc}"})
# Reload by calling load_agent with the graph_id as path hint
# The caller should use load_agent explicitly if the path is different
return json.dumps(
{
"graph_id": graph_id,
"status": "unloaded",
"note": "Use load_agent to reload with updated code",
}
)
_restart_tool = Tool(
name="restart_agent",
description=(
"Unload an agent graph. Use load_agent afterwards to reload with updated code."
),
parameters={
"type": "object",
"properties": {
"graph_id": {
"type": "string",
"description": "ID of the graph to restart",
},
},
"required": ["graph_id"],
},
)
registry.register("restart_agent", _restart_tool, lambda inputs: restart_agent(**inputs))
tools_registered += 1
# --- list_agents ----------------------------------------------------------
def list_agents() -> str:
"""List all agent graphs in the current session with their status."""
graphs = []
for gid in runtime.list_graphs():
reg = runtime.get_graph_registration(gid)
if reg is None:
continue
graphs.append(
{
"graph_id": gid,
"is_primary": gid == runtime.graph_id,
"is_active": gid == runtime.active_graph_id,
"entry_points": list(reg.entry_points.keys()),
"active_executions": sum(
len(s.active_execution_ids) for s in reg.streams.values()
),
}
)
return json.dumps({"graphs": graphs})
_list_tool = Tool(
name="list_agents",
description="List all loaded agent graphs and their status.",
parameters={"type": "object", "properties": {}},
)
registry.register("list_agents", _list_tool, lambda inputs: list_agents())
tools_registered += 1
# --- get_user_presence ----------------------------------------------------
def get_user_presence() -> str:
"""Return user idle time and presence status."""
idle = runtime.user_idle_seconds
if idle == float("inf"):
status = "never_seen"
elif idle < 120:
status = "present"
elif idle < 600:
status = "idle"
else:
status = "away"
return json.dumps(
{
"idle_seconds": idle if idle != float("inf") else None,
"status": status,
}
)
_presence_tool = Tool(
name="get_user_presence",
description=(
"Check if the user is currently active. Returns idle time "
"and a status of 'present', 'idle', 'away', or 'never_seen'."
),
parameters={"type": "object", "properties": {}},
)
registry.register("get_user_presence", _presence_tool, lambda inputs: get_user_presence())
tools_registered += 1
logger.info("Registered %d graph lifecycle tools", tools_registered)
return tools_registered
+512 -82
View File
@@ -4,17 +4,18 @@ import subprocess
import threading
import time
from textual import work
from textual.app import App, ComposeResult
from textual.binding import Binding
from textual.containers import Container, Horizontal
from textual.widgets import Footer, Label
from framework.runtime.agent_runtime import AgentRuntime
from framework.runtime.event_bus import AgentEvent, EventType
from framework.tui.widgets.chat_repl import ChatRepl
from framework.tui.widgets.graph_view import GraphOverview
from framework.tui.widgets.selectable_rich_log import SelectableRichLog
# AgentRuntime imported lazily where needed to support runtime=None startup.
# ChatRepl and GraphOverview are imported lazily in _mount_agent_widgets.
class StatusBar(Container):
"""Live status bar showing agent execution state."""
@@ -151,6 +152,10 @@ class AdenTUI(App):
padding: 0;
}
#agent-workspace {
height: 1fr;
}
#chat-history {
height: 1fr;
width: 100%;
@@ -188,6 +193,15 @@ class AdenTUI(App):
background: $panel;
color: $text-muted;
}
#empty-workspace {
align: center middle;
height: 1fr;
}
#empty-workspace Label {
text-align: center;
}
"""
BINDINGS = [
@@ -198,23 +212,37 @@ class AdenTUI(App):
Binding("ctrl+l", "toggle_logs", "Toggle Logs", show=True, priority=True),
Binding("ctrl+z", "pause_execution", "Pause", show=True, priority=True),
Binding("ctrl+r", "show_sessions", "Sessions", show=True, priority=True),
Binding("ctrl+p", "attach_pdf", "Attach PDF", show=True, priority=True),
Binding("ctrl+a", "show_agent_picker", "Agents", show=True, priority=True),
Binding("ctrl+e", "escalate_to_coder", "Coder", show=True, priority=True),
Binding("ctrl+e", "return_from_coder", "← Back", show=True, priority=True),
Binding("tab", "focus_next", "Next Panel", show=True),
Binding("shift+tab", "focus_previous", "Previous Panel", show=False),
]
def __init__(
self,
runtime: AgentRuntime,
runtime=None,
resume_session: str | None = None,
resume_checkpoint: str | None = None,
model: str | None = None,
no_guardian: bool = False,
):
super().__init__()
self.runtime = runtime
self.graph_view = GraphOverview(runtime)
self.chat_repl = ChatRepl(runtime, resume_session, resume_checkpoint)
self.status_bar = StatusBar(graph_id=runtime.graph.id)
self._model = model
self._no_guardian = no_guardian
self._resume_session = resume_session
self._resume_checkpoint = resume_checkpoint
self._runner = None # AgentRunner — needed for cleanup on swap
# Escalation stack: stores worker state when coder is in foreground
self._escalation_stack: list[dict] = []
# Widgets are created lazily when runtime is available
self.graph_view = None
self.chat_repl = None
self.status_bar = StatusBar(graph_id=runtime.graph.id if runtime else "")
self.is_ready = False
def open_url(self, url: str, *, new_tab: bool = True) -> None:
@@ -239,34 +267,364 @@ class AdenTUI(App):
def compose(self) -> ComposeResult:
yield self.status_bar
yield Horizontal(
self.graph_view,
self.chat_repl,
)
yield Horizontal(id="agent-workspace")
yield Footer()
async def on_mount(self) -> None:
"""Called when app starts."""
self.title = "Aden TUI Dashboard"
# Add logging setup
self._setup_logging_queue()
# Set ready immediately so _poll_logs can process messages
self.is_ready = True
# Add event subscription with delay to ensure TUI is fully initialized
if self.runtime is not None:
# Direct launch with agent already loaded
self._mount_agent_widgets()
self.call_later(self._init_runtime_connection)
# Delay initial log messages until layout is fully rendered
def write_initial_logs():
logging.info("TUI Dashboard initialized successfully")
logging.info("Waiting for agent execution to start...")
# Wait for layout to be fully rendered before writing logs
self.set_timer(0.2, write_initial_logs)
else:
# No agent — show picker
self.call_later(self._show_agent_picker_initial)
# -- Agent widget lifecycle --
def _mount_agent_widgets(self) -> None:
"""Mount ChatRepl and GraphOverview into #agent-workspace."""
from framework.tui.widgets.chat_repl import ChatRepl
from framework.tui.widgets.graph_view import GraphOverview
workspace = self.query_one("#agent-workspace", Horizontal)
# Remove empty-state placeholder if present
for child in list(workspace.children):
child.remove()
self.graph_view = GraphOverview(self.runtime)
self.chat_repl = ChatRepl(
self.runtime,
self._resume_session,
self._resume_checkpoint,
)
workspace.mount(self.graph_view)
workspace.mount(self.chat_repl)
self.status_bar.set_graph_id(self.runtime.graph.id)
def _unmount_agent_widgets(self) -> None:
"""Remove ChatRepl and GraphOverview from #agent-workspace."""
# Unsubscribe from events
if hasattr(self, "_subscription_id"):
try:
self.runtime.unsubscribe_from_events(self._subscription_id)
except Exception:
pass
del self._subscription_id
workspace = self.query_one("#agent-workspace", Horizontal)
for child in list(workspace.children):
child.remove()
self.graph_view = None
self.chat_repl = None
async def _load_and_switch_agent(self, agent_path: str) -> None:
"""Load an agent and replace the current one in the TUI."""
from pathlib import Path
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
# 1. Tear down old agent
if self.runtime is not None:
self._unmount_agent_widgets()
if self._runner is not None:
try:
await self._runner.cleanup_async()
except Exception:
pass
self._runner = None
self.runtime = None
# 2. Show loading state
agent_name = Path(agent_path).name
self.status_bar.set_graph_id(f"Loading {agent_name}...")
self.notify(f"Loading agent: {agent_name}...", timeout=3)
# 3. Load new agent
try:
runner = AgentRunner.load(agent_path, model=self._model)
if runner._agent_runtime is None:
runner._setup()
if not self._no_guardian and runner._agent_runtime:
from framework.agents.hive_coder.guardian import attach_guardian
attach_guardian(runner._agent_runtime, runner._tool_registry)
if runner._agent_runtime and not runner._agent_runtime.is_running:
await runner._agent_runtime.start()
self._runner = runner
self.runtime = runner._agent_runtime
except CredentialError as e:
self.status_bar.set_graph_id("")
self.notify(f"Credential error: {e}", severity="error", timeout=10)
return
except Exception as e:
self.status_bar.set_graph_id("")
self.notify(f"Failed to load agent: {e}", severity="error", timeout=10)
return
# 4. Mount new widgets and subscribe to events
self._mount_agent_widgets()
await self._init_runtime_connection()
# Clear resume state for subsequent loads
self._resume_session = None
self._resume_checkpoint = None
self.notify(f"Agent loaded: {agent_name}", severity="information", timeout=3)
# -- Agent picker --
def _show_agent_picker_initial(self) -> None:
"""Show the agent picker on initial startup (no agent loaded)."""
from framework.tui.screens.agent_picker import AgentPickerScreen, discover_agents
agents = discover_agents()
if not agents:
self.notify("No agents found in exports/ or examples/", severity="error", timeout=5)
self.set_timer(2.0, self.exit)
return
def _on_initial_pick(result: str | None) -> None:
if result is None:
self.exit()
return
self._do_load_agent(result)
self.push_screen(AgentPickerScreen(agents), callback=_on_initial_pick)
def action_show_agent_picker(self) -> None:
"""Open the agent picker (Ctrl+A or /agents)."""
from framework.tui.screens.agent_picker import AgentPickerScreen, discover_agents
agents = discover_agents()
if not agents:
self.notify("No agents found", severity="error", timeout=5)
return
def _on_pick(result: str | None) -> None:
if result is not None:
self._do_load_agent(result)
self.push_screen(AgentPickerScreen(agents), callback=_on_pick)
@work(exclusive=True)
async def _do_load_agent(self, agent_path: str) -> None:
"""Worker wrapper for _load_and_switch_agent."""
await self._load_and_switch_agent(agent_path)
# -- Escalation to Hive Coder --
@work(exclusive=True, group="escalation")
async def _do_escalate_to_coder(
self,
reason: str = "",
context: str = "",
node_id: str = "",
) -> None:
"""Push current agent onto stack and load hive_coder."""
from pathlib import Path
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.tools.session_graph_tools import register_graph_tools
if self.runtime is None:
self.notify("No active agent to escalate from", severity="error")
return
# 1. Save current state (do NOT cleanup — worker stays alive)
saved = {
"runner": self._runner,
"runtime": self.runtime,
"blocked_node_id": node_id,
}
self._escalation_stack.append(saved)
# Unsubscribe from worker events
if hasattr(self, "_subscription_id"):
try:
self.runtime.unsubscribe_from_events(self._subscription_id)
except Exception:
pass
del self._subscription_id
# Remember worker agent path for coder context
worker_path = ""
if self._runner and hasattr(self._runner, "agent_path"):
worker_path = str(self._runner.agent_path.resolve())
# 2. Remove worker widgets (they get destroyed)
workspace = self.query_one("#agent-workspace", Horizontal)
for child in list(workspace.children):
child.remove()
self.graph_view = None
self.chat_repl = None
# 3. Show loading state
self.status_bar.set_graph_id("Loading Hive Coder...")
self.notify("Escalating to Hive Coder...", timeout=3)
# 4. Load hive_coder
framework_agents_dir = Path(__file__).resolve().parent.parent / "agents"
hive_coder_path = framework_agents_dir / "hive_coder"
try:
runner = AgentRunner.load(hive_coder_path, model=self._model)
if runner._agent_runtime is None:
runner._setup()
coder_runtime = runner._agent_runtime
coder_runtime._graph_id = "hive_coder"
coder_runtime._active_graph_id = "hive_coder"
# Register graph lifecycle tools
register_graph_tools(runner._tool_registry, coder_runtime)
coder_runtime._tools = list(runner._tool_registry.get_tools().values())
coder_runtime._tool_executor = runner._tool_registry.get_executor()
if not coder_runtime.is_running:
await coder_runtime.start()
self._runner = runner
self.runtime = coder_runtime
except (CredentialError, Exception) as e:
self.status_bar.set_graph_id("")
self.notify(f"Failed to load coder: {e}", severity="error", timeout=10)
self._restore_from_escalation_stack()
return
# 5. Mount coder widgets and subscribe
self._mount_agent_widgets()
await self._init_runtime_connection()
self.status_bar.set_graph_id("hive_coder (escalated)")
# 6. Auto-trigger coder with escalation context
escalation_input = self._build_escalation_input(reason, context, worker_path)
try:
import asyncio
entry_points = self.runtime.get_entry_points()
if entry_points:
ep = entry_points[0]
future = asyncio.run_coroutine_threadsafe(
self.runtime.trigger(
entry_point_id=ep.id,
input_data={"user_request": escalation_input},
),
self.chat_repl._agent_loop,
)
exec_id = await asyncio.wrap_future(future)
self.chat_repl._current_exec_id = exec_id
except Exception as e:
self.notify(f"Error starting coder: {e}", severity="error")
self.notify(
"Hive Coder loaded. Ctrl+E or /back to return.",
severity="information",
timeout=5,
)
self.refresh_bindings()
def _build_escalation_input(self, reason: str, context: str, worker_path: str) -> str:
"""Compose the user_request string for hive_coder."""
parts = []
if worker_path:
parts.append(
f"Modify the agent at: {worker_path}\n"
f"Do NOT ask which agent to modify — it is the path above."
)
if reason:
parts.append(f"Problem: {reason}")
if context:
parts.append(f"Context:\n{context}")
if not parts:
parts.append("The user needs help modifying their agent.")
return "\n\n".join(parts)
async def _return_from_escalation(self, summary: str = "") -> None:
"""Pop escalation stack and restore the worker agent."""
if not self._escalation_stack:
self.notify("No escalation to return from", severity="warning")
return
# 1. Tear down coder
self._unmount_agent_widgets()
if self._runner is not None:
try:
await self._runner.cleanup_async()
except Exception:
pass
# 2. Restore worker
saved = self._escalation_stack.pop()
self._runner = saved["runner"]
self.runtime = saved["runtime"]
# 3. Mount fresh widgets for the worker runtime
self._mount_agent_widgets()
await self._init_runtime_connection()
graph_id = self.runtime.graph.id if self.runtime else ""
self.status_bar.set_graph_id(graph_id)
# 4. Inject return message to unblock the worker node
blocked_node_id = saved.get("blocked_node_id", "")
return_msg = summary or "Coder session completed. Continuing."
if blocked_node_id:
try:
import asyncio
future = asyncio.run_coroutine_threadsafe(
self.runtime.inject_input(blocked_node_id, return_msg),
self.chat_repl._agent_loop,
)
await asyncio.wrap_future(future)
except Exception as e:
self.notify(
f"Could not resume worker: {e}",
severity="warning",
timeout=5,
)
# 5. Show return in chat (deferred — widgets need a tick to mount)
def _show_return():
if self.chat_repl:
self.chat_repl._write_history("[bold cyan]Returned from Hive Coder.[/bold cyan]")
if summary:
self.chat_repl._write_history(f"[dim]{summary}[/dim]")
self.call_later(_show_return)
self.notify("Returned to worker agent", severity="information", timeout=3)
self.refresh_bindings()
def _restore_from_escalation_stack(self) -> None:
"""Emergency restore when coder loading fails."""
if not self._escalation_stack:
return
saved = self._escalation_stack.pop()
self._runner = saved["runner"]
self.runtime = saved["runtime"]
self._mount_agent_widgets()
self.call_later(self._init_runtime_connection)
# -- Logging --
def _setup_logging_queue(self) -> None:
"""Setup a thread-safe queue for logs."""
@@ -302,7 +660,7 @@ class AdenTUI(App):
def _poll_logs(self) -> None:
"""Poll the log queue and update UI."""
if not self.is_ready:
if not self.is_ready or self.chat_repl is None:
return
try:
@@ -316,6 +674,8 @@ class AdenTUI(App):
except Exception:
pass
# -- Runtime event routing --
_EVENT_TYPES = [
EventType.LLM_TEXT_DELTA,
EventType.CLIENT_OUTPUT_DELTA,
@@ -342,6 +702,7 @@ class AdenTUI(App):
EventType.EDGE_TRAVERSED,
EventType.EXECUTION_PAUSED,
EventType.EXECUTION_RESUMED,
EventType.ESCALATION_REQUESTED,
]
_LOG_PANE_EVENTS = frozenset(_EVENT_TYPES) - {
@@ -384,17 +745,38 @@ class AdenTUI(App):
def _route_event(self, event: AgentEvent) -> None:
"""Route incoming events to widgets. Runs on Textual's main thread."""
if not self.is_ready:
logging.getLogger("tui.events").warning(
"Event dropped (not ready): %s node=%s",
event.type.value,
event.node_id or "?",
)
if not self.is_ready or self.chat_repl is None:
return
try:
et = event.type
# --- Multi-graph filtering ---
# If the event has a graph_id and it's not the active graph,
# show a notification for important events and drop the rest.
if event.graph_id is not None and event.graph_id != self.runtime.active_graph_id:
if et == EventType.CLIENT_INPUT_REQUESTED:
self.notify(
f"[bold]{event.graph_id}[/bold] is waiting for input",
severity="warning",
timeout=10,
)
elif et == EventType.EXECUTION_FAILED:
error = event.data.get("error", "Unknown error")[:60]
self.notify(
f"[bold red]{event.graph_id}[/bold red] failed: {error}",
severity="error",
timeout=10,
)
elif et == EventType.EXECUTION_COMPLETED:
self.notify(
f"[bold green]{event.graph_id}[/bold green] completed",
severity="information",
timeout=5,
)
# All other background events are silently dropped (visible in logs)
return
# --- Chat REPL events ---
if et in (EventType.LLM_TEXT_DELTA, EventType.CLIENT_OUTPUT_DELTA):
self.chat_repl.handle_text_delta(
@@ -419,6 +801,14 @@ class AdenTUI(App):
elif et == EventType.CLIENT_INPUT_REQUESTED:
self.chat_repl.handle_input_requested(
event.node_id or event.data.get("node_id", ""),
graph_id=event.graph_id,
)
elif et == EventType.ESCALATION_REQUESTED:
self.chat_repl.handle_escalation_requested(event.data)
self._do_escalate_to_coder(
reason=event.data.get("reason", ""),
context=event.data.get("context", ""),
node_id=event.node_id or "",
)
elif et == EventType.NODE_LOOP_STARTED:
self.chat_repl.handle_node_started(event.node_id or "")
@@ -451,6 +841,7 @@ class AdenTUI(App):
self.chat_repl.handle_constraint_violation(event.data)
# --- Graph view events ---
if self.graph_view is not None:
if et in (
EventType.EXECUTION_STARTED,
EventType.EXECUTION_COMPLETED,
@@ -504,7 +895,10 @@ class AdenTUI(App):
elif et == EventType.EXECUTION_FAILED:
self.status_bar.set_failed(event.data.get("error", ""))
elif et == EventType.NODE_LOOP_STARTED:
self.status_bar.set_active_node(event.node_id or "", "thinking...")
nid = event.node_id or ""
node = self.runtime.graph.get_node(nid)
name = node.name if node else nid
self.status_bar.set_active_node(name, "thinking...")
elif et == EventType.NODE_LOOP_ITERATION:
self.status_bar.set_node_detail(f"step {event.data.get('iteration', '?')}")
elif et == EventType.TOOL_CALL_STARTED:
@@ -544,40 +938,62 @@ class AdenTUI(App):
exc_info=True,
)
# -- Actions --
def action_switch_graph(self, graph_id: str) -> None:
"""Switch the active graph focus in the TUI."""
if self.runtime is None:
return
try:
self.runtime.active_graph_id = graph_id
except ValueError:
self.notify(f"Graph '{graph_id}' not found", severity="error", timeout=3)
return
# Update status bar
self.status_bar.set_graph_id(graph_id)
# Update graph view
reg = self.runtime.get_graph_registration(graph_id)
if reg and self.graph_view:
self.graph_view.switch_graph(reg.graph)
# Flush chat streaming state
if self.chat_repl:
self.chat_repl.flush_streaming()
self.notify(f"Switched to graph: {graph_id}", severity="information", timeout=3)
def save_screenshot(self, filename: str | None = None) -> str:
"""Save a screenshot of the current screen as SVG (viewable in browsers).
Args:
filename: Optional filename for the screenshot. If None, generates timestamp-based name.
Returns:
Path to the saved SVG file.
"""
"""Save a screenshot of the current screen as SVG (viewable in browsers)."""
from datetime import datetime
from pathlib import Path
# Create screenshots directory
screenshots_dir = Path("screenshots")
screenshots_dir.mkdir(exist_ok=True)
# Generate filename if not provided
if filename is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"tui_screenshot_{timestamp}.svg"
# Ensure .svg extension
if not filename.endswith(".svg"):
filename += ".svg"
# Full path
filepath = screenshots_dir / filename
# Temporarily hide borders for cleaner screenshot
from framework.tui.widgets.chat_repl import ChatRepl
try:
chat_widget = self.query_one(ChatRepl)
except Exception:
# No ChatRepl mounted yet
svg_data = self.export_screenshot()
filepath.write_text(svg_data, encoding="utf-8")
return str(filepath)
original_chat_border = chat_widget.styles.border_left
chat_widget.styles.border_left = ("none", "transparent")
# Hide all TextArea widget borders
input_widgets = self.query("ChatTextArea")
original_input_borders = []
for input_widget in input_widgets:
@@ -585,11 +1001,9 @@ class AdenTUI(App):
input_widget.styles.border = ("none", "transparent")
try:
# Get SVG data from Textual and save it
svg_data = self.export_screenshot()
filepath.write_text(svg_data, encoding="utf-8")
finally:
# Restore the original borders
chat_widget.styles.border_left = original_chat_border
for i, input_widget in enumerate(input_widgets):
input_widget.styles.border = original_input_borders[i]
@@ -610,15 +1024,18 @@ class AdenTUI(App):
def action_toggle_logs(self) -> None:
"""Toggle inline log display in chat (bound to Ctrl+L)."""
if self.chat_repl is None:
return
self.chat_repl.toggle_logs()
mode = "ON" if self.chat_repl._show_logs else "OFF"
self.notify(f"Logs {mode}", severity="information", timeout=2)
def action_pause_execution(self) -> None:
"""Immediately pause execution by cancelling task (bound to Ctrl+Z)."""
if self.chat_repl is None or self.runtime is None:
return
try:
chat_repl = self.query_one(ChatRepl)
if not chat_repl._current_exec_id:
if not self.chat_repl._current_exec_id:
self.notify(
"No active execution to pause",
severity="information",
@@ -626,16 +1043,26 @@ class AdenTUI(App):
)
return
# Find and cancel the execution task - executor will catch and save state
task_cancelled = False
for stream in self.runtime._streams.values():
exec_id = chat_repl._current_exec_id
all_streams = []
active_reg = self.runtime.get_graph_registration(self.runtime.active_graph_id)
if active_reg:
all_streams.extend(active_reg.streams.values())
for gid in self.runtime.list_graphs():
if gid == self.runtime.active_graph_id:
continue
reg = self.runtime.get_graph_registration(gid)
if reg:
all_streams.extend(reg.streams.values())
for stream in all_streams:
exec_id = self.chat_repl._current_exec_id
task = stream._execution_tasks.get(exec_id)
if task and not task.done():
task.cancel()
task_cancelled = True
self.notify(
"Execution paused - state saved",
"Execution paused - state saved",
severity="information",
timeout=3,
)
@@ -656,10 +1083,10 @@ class AdenTUI(App):
async def action_show_sessions(self) -> None:
"""Show sessions list (bound to Ctrl+R)."""
# Send /sessions command to chat input
if self.chat_repl is None:
return
try:
chat_repl = self.query_one(ChatRepl)
await chat_repl._submit_input("/sessions")
await self.chat_repl._submit_input("/sessions")
except Exception:
self.notify(
"Use /sessions command to see all sessions",
@@ -667,59 +1094,62 @@ class AdenTUI(App):
timeout=3,
)
async def action_attach_pdf(self) -> None:
"""Open native OS file dialog for PDF selection (bound to Ctrl+P)."""
from framework.tui.widgets.file_browser import _has_gui, pick_pdf_file
def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None:
"""Control which bindings are shown in the footer.
if not _has_gui():
self.notify(
"No GUI available. Use /attach <path> instead.",
severity="warning",
timeout=5,
)
Both escalate_to_coder and return_from_coder are bound to Ctrl+E.
check_action toggles which one is active based on escalation state,
so the footer shows "Coder" or "← Back" accordingly.
"""
if action == "escalate_to_coder":
return not self._escalation_stack
if action == "return_from_coder":
return bool(self._escalation_stack)
return True
def action_escalate_to_coder(self) -> None:
"""Escalate to Hive Coder (bound to Ctrl+E)."""
if self.runtime is None:
self.notify("No active agent to escalate from", severity="error")
return
# _do_escalate_to_coder is already @work-decorated; calling it starts the worker.
self._do_escalate_to_coder(reason="User-initiated escalation")
self.notify("Opening file dialog...", severity="information", timeout=2)
path = await pick_pdf_file()
if path is not None:
self.chat_repl.attach_pdf(path)
async def action_return_from_coder(self) -> None:
"""Return from Hive Coder to worker agent (Ctrl+E toggles)."""
await self._return_from_escalation()
async def on_unmount(self) -> None:
"""Cleanup on app shutdown - cancel execution which will save state."""
self.is_ready = False
# Cancel any active execution - the executor will catch CancelledError
# and save current state as paused (no waiting needed!)
# Cancel any active execution
try:
import asyncio
chat_repl = self.query_one(ChatRepl)
if chat_repl._current_exec_id:
# Find the stream with this execution
for stream in self.runtime._streams.values():
exec_id = chat_repl._current_exec_id
if self.chat_repl and self.chat_repl._current_exec_id and self.runtime:
all_streams = []
for gid in self.runtime.list_graphs():
reg = self.runtime.get_graph_registration(gid)
if reg:
all_streams.extend(reg.streams.values())
for stream in all_streams:
exec_id = self.chat_repl._current_exec_id
task = stream._execution_tasks.get(exec_id)
if task and not task.done():
# Cancel the task - executor will catch and save state
task.cancel()
try:
# Wait for executor to save state (may take a few seconds)
# Longer timeout for quit to ensure state is properly saved
await asyncio.wait_for(task, timeout=5.0)
except (TimeoutError, asyncio.CancelledError):
# Expected - task was cancelled
# If timeout, state may not be fully saved
pass
except Exception:
# Ignore other exceptions during cleanup
pass
break
except Exception:
pass
try:
if hasattr(self, "_subscription_id"):
if hasattr(self, "_subscription_id") and self.runtime:
self.runtime.unsubscribe_from_events(self._subscription_id)
except Exception:
pass
+234
View File
@@ -0,0 +1,234 @@
"""Agent picker ModalScreen for selecting agents within the TUI."""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
from rich.console import Group
from rich.text import Text
from textual.app import ComposeResult
from textual.binding import Binding
from textual.containers import Vertical
from textual.screen import ModalScreen
from textual.widgets import Label, OptionList, TabbedContent, TabPane
from textual.widgets._option_list import Option
@dataclass
class AgentEntry:
"""Lightweight agent metadata for the picker."""
path: Path
name: str
description: str
category: str
session_count: int = 0
node_count: int = 0
tool_count: int = 0
tags: list[str] = field(default_factory=list)
def _count_sessions(agent_name: str) -> int:
"""Count session directories under ~/.hive/agents/{agent_name}/sessions/."""
sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
if not sessions_dir.exists():
return 0
return sum(1 for d in sessions_dir.iterdir() if d.is_dir() and d.name.startswith("session_"))
def _extract_agent_stats(agent_json_path: Path) -> tuple[int, int, list[str]]:
"""Extract node count, tool count, and tags from agent.json."""
try:
data = json.loads(agent_json_path.read_text())
nodes = data.get("nodes", [])
node_count = len(nodes)
tools: set[str] = set()
for node in nodes:
tools.update(node.get("tools", []))
tags = data.get("agent", {}).get("tags", [])
return node_count, len(tools), tags
except Exception:
return 0, 0, []
def discover_agents() -> dict[str, list[AgentEntry]]:
"""Discover agents from all known sources grouped by category."""
from framework.runner.cli import (
_extract_python_agent_metadata,
_get_framework_agents_dir,
_is_valid_agent_dir,
)
groups: dict[str, list[AgentEntry]] = {}
sources = [
("Your Agents", Path("exports")),
("Framework", _get_framework_agents_dir()),
("Examples", Path("examples/templates")),
]
for category, base_dir in sources:
if not base_dir.exists():
continue
entries: list[AgentEntry] = []
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
if not _is_valid_agent_dir(path):
continue
agent_json = path / "agent.json"
node_count, tool_count, tags = 0, 0, []
if agent_json.exists():
try:
data = json.loads(agent_json.read_text())
meta = data.get("agent", {})
name = meta.get("name", path.name)
desc = meta.get("description", "")
except Exception:
name = path.name
desc = "(error reading agent.json)"
node_count, tool_count, tags = _extract_agent_stats(agent_json)
else:
name, desc = _extract_python_agent_metadata(path)
entries.append(
AgentEntry(
path=path,
name=name,
description=desc,
category=category,
session_count=_count_sessions(path.name),
node_count=node_count,
tool_count=tool_count,
tags=tags,
)
)
if entries:
groups[category] = entries
return groups
def _render_agent_option(agent: AgentEntry) -> Group:
"""Build a Rich renderable for a single agent option."""
# Line 1: name + session badge
line1 = Text()
line1.append(agent.name, style="bold")
if agent.session_count:
line1.append(f" {agent.session_count} sessions", style="dim cyan")
# Line 2: description (word-wrapped by the widget)
desc = agent.description if agent.description else "No description"
line2 = Text(desc, style="dim")
# Line 3: stats chips
chips = Text()
if agent.node_count:
chips.append(f" {agent.node_count} nodes ", style="on dark_green white")
chips.append(" ")
if agent.tool_count:
chips.append(f" {agent.tool_count} tools ", style="on dark_blue white")
chips.append(" ")
for tag in agent.tags[:3]:
chips.append(f" {tag} ", style="on grey37 white")
chips.append(" ")
parts = [line1, line2]
if chips.plain.strip():
parts.append(chips)
return Group(*parts)
class AgentPickerScreen(ModalScreen[str | None]):
"""Modal screen showing available agents organized by tabbed categories.
Returns the selected agent path as a string, or None if dismissed.
"""
BINDINGS = [
Binding("escape", "dismiss_picker", "Cancel"),
]
DEFAULT_CSS = """
AgentPickerScreen {
align: center middle;
}
#picker-container {
width: 90%;
max-width: 120;
height: 85%;
background: $surface;
border: heavy $primary;
padding: 1 2;
}
#picker-title {
text-align: center;
text-style: bold;
width: 100%;
color: $text;
}
#picker-subtitle {
text-align: center;
width: 100%;
margin-bottom: 1;
}
#picker-footer {
text-align: center;
width: 100%;
margin-top: 1;
}
TabPane {
padding: 0;
}
OptionList {
height: 1fr;
}
OptionList > .option-list--option {
padding: 1 2;
}
"""
def __init__(self, agent_groups: dict[str, list[AgentEntry]]) -> None:
super().__init__()
self._groups = agent_groups
# Map (tab_id, option_index) -> AgentEntry
self._option_map: dict[str, dict[int, AgentEntry]] = {}
def compose(self) -> ComposeResult:
total = sum(len(v) for v in self._groups.values())
with Vertical(id="picker-container"):
yield Label("Hive Agent Launcher", id="picker-title")
yield Label(
f"[dim]{total} agents available[/dim]",
id="picker-subtitle",
)
with TabbedContent():
for category, agents in self._groups.items():
tab_id = category.lower().replace(" ", "-")
with TabPane(f"{category} ({len(agents)})", id=tab_id):
option_list = OptionList(id=f"list-{tab_id}")
self._option_map[f"list-{tab_id}"] = {}
for i, agent in enumerate(agents):
option_list.add_option(
Option(
_render_agent_option(agent),
id=str(agent.path),
)
)
self._option_map[f"list-{tab_id}"][i] = agent
yield option_list
yield Label(
"[dim]Enter[/dim] Select [dim]Tab[/dim] Switch category [dim]Esc[/dim] Cancel",
id="picker-footer",
)
def on_option_list_option_selected(self, event: OptionList.OptionSelected) -> None:
list_id = event.option_list.id or ""
idx = event.option_index
agent_map = self._option_map.get(list_id, {})
agent = agent_map.get(idx)
if agent:
self.dismiss(str(agent.path))
def action_dismiss_picker(self) -> None:
self.dismiss(None)
+270 -30
View File
@@ -2,9 +2,12 @@
Chat / REPL Widget - Uses RichLog for append-only, selection-safe display.
Streaming display approach:
- The processing-indicator Label is used as a live status bar during streaming
(Label.update() replaces text in-place, unlike RichLog which is append-only).
- On EXECUTION_COMPLETED, the final output is written to RichLog as permanent history.
- The #streaming-output RichLog shows live LLM output as it streams in.
Each text delta appends new tokens so the user sees the full response forming.
- On flush (tool call, node switch, execution complete, input requested) the
accumulated text is written to #chat-history as permanent history and the
streaming area is cleared.
- The #processing-indicator Label shows brief status messages (tool names, etc.).
- Tool events are written directly to RichLog as discrete status lines.
Client-facing input:
@@ -22,6 +25,7 @@ import threading
from pathlib import Path
from typing import Any
from rich.text import Text
from textual.app import ComposeResult
from textual.containers import Vertical
from textual.message import Message
@@ -77,6 +81,18 @@ class ChatRepl(Vertical):
scrollbar-color: $primary;
}
ChatRepl > #streaming-output {
width: 100%;
height: auto;
min-height: 0;
max-height: 50%;
background: $surface;
border: none;
display: none;
scrollbar-background: $panel;
scrollbar-color: $primary;
}
ChatRepl > #processing-indicator {
width: 100%;
height: 1;
@@ -111,8 +127,10 @@ class ChatRepl(Vertical):
self.runtime = runtime
self._current_exec_id: str | None = None
self._streaming_snapshot: str = ""
self._streaming_written: int = 0 # chars already written to streaming-output
self._waiting_for_input: bool = False
self._input_node_id: str | None = None
self._input_graph_id: str | None = None
self._pending_ask_question: str = ""
self._active_node_id: str | None = None # Currently executing node
self._resume_session = resume_session
@@ -142,6 +160,14 @@ class ChatRepl(Vertical):
wrap=True,
min_width=0,
)
yield RichLog(
id="streaming-output",
highlight=True,
markup=True,
auto_scroll=True,
wrap=True,
min_width=0,
)
yield Label("Agent is processing...", id="processing-indicator")
yield ChatTextArea(id="chat-input", placeholder="Enter input for agent...")
@@ -208,6 +234,13 @@ class ChatRepl(Vertical):
[bold]/resume[/bold] <session_id> - Resume session by ID
[bold]/recover[/bold] <session_id> <cp_id> - Recover from specific checkpoint
[bold]/pause[/bold] - Pause current execution (Ctrl+Z)
[bold]/agents[/bold] - Browse and switch agents (Ctrl+A)
[bold]/coder[/bold] [reason] - Escalate to Hive Coder for code changes
[bold]/back[/bold] [summary] - Return from Hive Coder to worker agent
[bold]/graphs[/bold] - List loaded graphs and their status
[bold]/graph[/bold] <id> - Switch active graph focus
[bold]/load[/bold] <path> - Load an agent graph into the session
[bold]/unload[/bold] <id> - Remove a graph from the session
[bold]/help[/bold] - Show this help message
[dim]Examples:[/dim]
@@ -216,6 +249,10 @@ class ChatRepl(Vertical):
/detach [dim]# Remove attached PDF[/dim]
/sessions [dim]# List all sessions[/dim]
/resume 1 [dim]# Resume first listed session[/dim]
/graphs [dim]# Show loaded agent graphs[/dim]
/graph email_agent [dim]# Switch focus to email_agent[/dim]
/load exports/email_agent [dim]# Load agent into session[/dim]
/unload email_agent [dim]# Remove agent from session[/dim]
/pause [dim]# Pause (or Ctrl+Z)[/dim]
""")
elif cmd == "/sessions":
@@ -268,6 +305,33 @@ class ChatRepl(Vertical):
self._write_history("[dim]No PDF attached.[/dim]")
elif cmd == "/pause":
await self._cmd_pause()
elif cmd == "/agents":
app = self.app
if hasattr(app, "action_show_agent_picker"):
await app.action_show_agent_picker()
elif cmd == "/graphs":
self._cmd_graphs()
elif cmd == "/graph":
if len(parts) < 2:
self._write_history("[bold red]Usage:[/bold red] /graph <graph_id>")
else:
self._cmd_switch_graph(parts[1].strip())
elif cmd == "/load":
if len(parts) < 2:
self._write_history("[bold red]Usage:[/bold red] /load <agent_path>")
else:
await self._cmd_load_graph(parts[1].strip())
elif cmd == "/unload":
if len(parts) < 2:
self._write_history("[bold red]Usage:[/bold red] /unload <graph_id>")
else:
await self._cmd_unload_graph(parts[1].strip())
elif cmd == "/coder":
reason = " ".join(parts[1:]) if len(parts) > 1 else ""
await self._cmd_coder(reason)
elif cmd == "/back":
summary = " ".join(parts[1:]) if len(parts) > 1 else ""
await self._cmd_back(summary)
else:
self._write_history(
f"[bold red]Unknown command:[/bold red] {cmd}\n"
@@ -769,6 +833,150 @@ class ChatRepl(Vertical):
if not task_cancelled:
self._write_history("[bold yellow]Execution already completed[/bold yellow]")
async def _cmd_coder(self, reason: str = "") -> None:
"""User-initiated escalation to Hive Coder."""
app = self.app
if not hasattr(app, "_do_escalate_to_coder"):
self._write_history("[bold red]Escalation not available[/bold red]")
return
context_parts = []
if self._active_node_id:
context_parts.append(f"Active node: {self._active_node_id}")
if self._streaming_snapshot:
snippet = self._streaming_snapshot[:500]
context_parts.append(f"Last agent output: {snippet}")
context = "\n".join(context_parts)
if not reason:
reason = "User-initiated escalation via /coder"
self._write_history("[bold cyan]Escalating to Hive Coder...[/bold cyan]")
node_id = self._input_node_id or self._active_node_id or ""
app._do_escalate_to_coder(
reason=reason,
context=context,
node_id=node_id,
)
async def _cmd_back(self, summary: str = "") -> None:
"""Return from Hive Coder to the worker agent."""
app = self.app
if not hasattr(app, "_escalation_stack"):
self._write_history("[bold yellow]Not in an escalation.[/bold yellow]")
return
if not app._escalation_stack:
self._write_history(
"[bold yellow]Not in an escalation.[/bold yellow] "
"/back is only available after /coder or agent escalation."
)
return
self._write_history("[bold cyan]Returning to worker agent...[/bold cyan]")
await app._return_from_escalation(summary)
def _cmd_graphs(self) -> None:
"""List all loaded graphs and their status."""
graphs = self.runtime.list_graphs()
if not graphs:
self._write_history("[dim]No graphs loaded[/dim]")
return
lines = ["[bold cyan]Loaded Graphs:[/bold cyan]"]
for gid in graphs:
reg = self.runtime.get_graph_registration(gid)
if reg is None:
continue
is_primary = gid == self.runtime.graph_id
is_active = gid == self.runtime.active_graph_id
markers = []
if is_primary:
markers.append("primary")
if is_active:
markers.append("active")
marker_str = f" [dim]({', '.join(markers)})[/dim]" if markers else ""
ep_list = ", ".join(reg.entry_points.keys())
active_execs = sum(len(s.active_execution_ids) for s in reg.streams.values())
exec_str = f" [green]{active_execs} running[/green]" if active_execs else ""
lines.append(f" [bold]{gid}[/bold]{marker_str} — eps: {ep_list}{exec_str}")
self._write_history("\n".join(lines))
def _cmd_switch_graph(self, graph_id: str) -> None:
"""Switch the active graph focus."""
try:
self.runtime.active_graph_id = graph_id
except ValueError:
self._write_history(
f"[bold red]Graph '{graph_id}' not found.[/bold red] "
"Use /graphs to see loaded graphs."
)
return
# Tell the app to update the UI
app = self.app
if hasattr(app, "action_switch_graph"):
app.action_switch_graph(graph_id)
else:
self._write_history(f"[bold green]Switched to graph: {graph_id}[/bold green]")
async def _cmd_load_graph(self, agent_path: str) -> None:
"""Load an agent graph into the session."""
from pathlib import Path
path = Path(agent_path).resolve()
if not path.exists():
self._write_history(f"[bold red]Path does not exist:[/bold red] {path}")
return
self._write_history(f"[dim]Loading agent from {path}...[/dim]")
try:
from framework.runner.runner import AgentRunner
graph_id = await AgentRunner.setup_as_secondary(path, self.runtime)
self._write_history(
f"[bold green]Loaded graph '{graph_id}'[/bold green] — "
"use /graphs to see all, /graph to switch"
)
except Exception as e:
self._write_history(f"[bold red]Failed to load agent:[/bold red] {e}")
async def _cmd_unload_graph(self, graph_id: str) -> None:
"""Unload a secondary graph from the session."""
try:
await self.runtime.remove_graph(graph_id)
self._write_history(f"[bold green]Unloaded graph '{graph_id}'[/bold green]")
except ValueError as e:
self._write_history(f"[bold red]Error:[/bold red] {e}")
def _node_label(self, node_id: str | None = None) -> str:
"""Resolve a node_id to a Rich-formatted speaker label."""
nid = node_id or self._active_node_id
if nid:
node = self.runtime.graph.get_node(nid)
name = node.name if node else nid
return f"[bold blue]{name}:[/bold blue]"
return "[bold blue]Agent:[/bold blue]"
def _clear_streaming(self) -> None:
"""Reset streaming state and hide the live output area."""
self._streaming_snapshot = ""
self._streaming_written = 0
stream_log = self.query_one("#streaming-output", RichLog)
stream_log.clear()
stream_log.display = False
def flush_streaming(self) -> None:
"""Flush any accumulated streaming text to history.
Called by the app when switching graphs to ensure in-progress
streaming content is preserved before the UI context changes.
"""
if self._streaming_snapshot:
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
self._clear_streaming()
def on_mount(self) -> None:
"""Add welcome message and check for resumable sessions."""
history = self.query_one("#chat-history", RichLog)
@@ -903,11 +1111,13 @@ class ChatRepl(Vertical):
indicator.update("Thinking...")
node_id = self._input_node_id
graph_id = self._input_graph_id
self._input_node_id = None
self._input_graph_id = None
try:
future = asyncio.run_coroutine_threadsafe(
self.runtime.inject_input(node_id, user_input),
self.runtime.inject_input(node_id, user_input, graph_id=graph_id),
self._agent_loop,
)
await asyncio.wrap_future(future)
@@ -956,7 +1166,7 @@ class ChatRepl(Vertical):
input_key = "input"
# Reset streaming state
self._streaming_snapshot = ""
self._clear_streaming()
# Show processing indicator
indicator.update("Thinking...")
@@ -1004,34 +1214,45 @@ class ChatRepl(Vertical):
previous node and resets the processing indicator so the user
sees a clean transition between graph nodes.
"""
self._active_node_id = node_id
# Flush stale snapshot with the PREVIOUS node's label before switching
if self._streaming_snapshot:
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
self._streaming_snapshot = ""
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
self._clear_streaming()
self._active_node_id = node_id
indicator = self.query_one("#processing-indicator", Label)
indicator.update("Thinking...")
def handle_loop_iteration(self, iteration: int) -> None:
"""Flush accumulated streaming text when a new loop iteration starts."""
if self._streaming_snapshot:
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
self._streaming_snapshot = ""
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
self._clear_streaming()
def handle_text_delta(self, content: str, snapshot: str) -> None:
"""Handle a streaming text token from the LLM."""
self._streaming_snapshot = snapshot
# Show a truncated live preview in the indicator label
indicator = self.query_one("#processing-indicator", Label)
preview = snapshot[-80:] if len(snapshot) > 80 else snapshot
# Replace newlines for single-line display
preview = preview.replace("\n", " ")
indicator.update(
f"Thinking: ...{preview}" if len(snapshot) > 80 else f"Thinking: {preview}"
)
# Stream into the live output area
stream_log = self.query_one("#streaming-output", RichLog)
if not stream_log.display:
stream_log.display = True
# Rewrite the full snapshot as a single block so text wraps
# naturally instead of one token per line.
stream_log.clear()
stream_log.write(Text.from_markup(f"{self._node_label()} {snapshot}"))
self._streaming_written = len(snapshot)
def handle_tool_started(self, tool_name: str, tool_input: dict[str, Any]) -> None:
"""Handle a tool call starting."""
# Flush any accumulated LLM text before the tool call starts.
# Without this, text from a turn that also issues tool calls
# would sit in _streaming_snapshot and get overwritten by the
# next LLM turn, never appearing in the chat log.
if self._streaming_snapshot:
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
self._clear_streaming()
indicator = self.query_one("#processing-indicator", Label)
if tool_name == "ask_user":
@@ -1041,6 +1262,10 @@ class ChatRepl(Vertical):
indicator.update("Preparing question...")
return
if tool_name == "escalate_to_coder":
indicator.update("Escalating to coder...")
return
# Update indicator to show tool activity
indicator.update(f"Using tool: {tool_name}...")
@@ -1052,9 +1277,7 @@ class ChatRepl(Vertical):
def handle_tool_completed(self, tool_name: str, result: str, is_error: bool) -> None:
"""Handle a tool call completing."""
if tool_name == "ask_user":
# Suppress the synthetic "Waiting for user input..." result.
# The actual question is displayed by handle_input_requested().
if tool_name in ("ask_user", "escalate_to_coder"):
return
result_str = str(result)
@@ -1080,14 +1303,14 @@ class ChatRepl(Vertical):
# Write the final streaming snapshot to permanent history (if any)
if self._streaming_snapshot:
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
else:
output_str = str(output.get("output_string", output))
self._write_history(f"[bold blue]Agent:[/bold blue] {output_str}")
self._write_history(f"{self._node_label()} {output_str}")
self._write_history("") # separator
self._current_exec_id = None
self._streaming_snapshot = ""
self._clear_streaming()
self._waiting_for_input = False
self._input_node_id = None
self._active_node_id = None
@@ -1109,7 +1332,7 @@ class ChatRepl(Vertical):
self._write_history("") # separator
self._current_exec_id = None
self._streaming_snapshot = ""
self._clear_streaming()
self._waiting_for_input = False
self._pending_ask_question = ""
self._input_node_id = None
@@ -1122,7 +1345,18 @@ class ChatRepl(Vertical):
chat_input.placeholder = "Enter input for agent..."
chat_input.focus()
def handle_input_requested(self, node_id: str) -> None:
def handle_escalation_requested(self, data: dict) -> None:
"""Display escalation request from the worker agent."""
if self._streaming_snapshot:
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
self._clear_streaming()
reason = data.get("reason", "")
self._write_history("[bold yellow]Agent is escalating to Hive Coder[/bold yellow]")
if reason:
self._write_history(f"[dim]Reason: {reason}[/dim]")
def handle_input_requested(self, node_id: str, graph_id: str | None = None) -> None:
"""Handle a client-facing node requesting user input.
Transitions to 'waiting for input' state: flushes the current
@@ -1130,27 +1364,33 @@ class ChatRepl(Vertical):
and sets a flag so the next submission routes to inject_input().
"""
# Flush accumulated streaming text as agent output
label = self._node_label(node_id)
flushed_snapshot = self._streaming_snapshot
if flushed_snapshot:
self._write_history(f"[bold blue]Agent:[/bold blue] {flushed_snapshot}")
self._streaming_snapshot = ""
self._write_history(f"{label} {flushed_snapshot}")
self._clear_streaming()
# Display the ask_user question if stashed and not already
# present in the streaming snapshot (avoids double-display).
question = self._pending_ask_question
self._pending_ask_question = ""
if question and question not in flushed_snapshot:
self._write_history(f"[bold blue]Agent:[/bold blue] {question}")
self._write_history(f"{label} {question}")
self._waiting_for_input = True
self._input_node_id = node_id or None
self._input_graph_id = graph_id
indicator = self.query_one("#processing-indicator", Label)
indicator.update("Waiting for your input...")
chat_input = self.query_one("#chat-input", ChatTextArea)
chat_input.disabled = False
chat_input.placeholder = "Type your response..."
node = self.runtime.graph.get_node(node_id) if node_id else None
name = node.name if node else None
chat_input.placeholder = (
f"Type your response to {name}..." if name else "Type your response..."
)
chat_input.focus()
def handle_node_completed(self, node_id: str) -> None:
+31 -12
View File
@@ -52,12 +52,26 @@ class GraphOverview(Vertical):
def __init__(self, runtime: AgentRuntime):
super().__init__()
self.runtime = runtime
self._override_graph = None # Set by switch_graph() for secondary graphs
self.active_node: str | None = None
self.execution_path: list[str] = []
# Per-node status strings shown next to the node in the graph display.
# e.g. {"planner": "thinking...", "searcher": "web_search..."}
self._node_status: dict[str, str] = {}
@property
def _graph(self):
"""The graph currently being displayed (may be a secondary graph)."""
return self._override_graph or self.runtime.graph
def switch_graph(self, graph) -> None:
"""Switch to displaying a different graph and refresh."""
self._override_graph = graph
self.active_node = None
self.execution_path = []
self._node_status = {}
self._display_graph()
def compose(self) -> ComposeResult:
# Use RichLog for formatted output
yield RichLog(id="graph-display", highlight=True, markup=True)
@@ -75,7 +89,7 @@ class GraphOverview(Vertical):
def _topo_order(self) -> list[str]:
"""BFS from entry_node following edges."""
graph = self.runtime.graph
graph = self._graph
visited: list[str] = []
seen: set[str] = set()
queue = [graph.entry_node]
@@ -102,7 +116,7 @@ class GraphOverview(Vertical):
order_idx = {nid: i for i, nid in enumerate(ordered)}
back_edges: list[dict] = []
for node_id in ordered:
for edge in self.runtime.graph.get_outgoing_edges(node_id):
for edge in self._graph.get_outgoing_edges(node_id):
target_idx = order_idx.get(edge.target, -1)
source_idx = order_idx.get(node_id, -1)
if target_idx != -1 and target_idx <= source_idx:
@@ -129,7 +143,7 @@ class GraphOverview(Vertical):
def _render_node_line(self, node_id: str) -> str:
"""Render a single node with status symbol and optional status text."""
graph = self.runtime.graph
graph = self._graph
is_terminal = node_id in (graph.terminal_nodes or [])
is_active = node_id == self.active_node
is_done = node_id in self.execution_path and not is_active
@@ -160,7 +174,7 @@ class GraphOverview(Vertical):
Back-edges are excluded here they are drawn by the return-channel
overlay in Pass 2.
"""
all_edges = self.runtime.graph.get_outgoing_edges(node_id)
all_edges = self._graph.get_outgoing_edges(node_id)
if not all_edges:
return []
@@ -399,7 +413,7 @@ class GraphOverview(Vertical):
display = self.query_one("#graph-display", RichLog)
display.clear()
graph = self.runtime.graph
graph = self._graph
display.write(f"[bold cyan]Agent Graph:[/bold cyan] {graph.id}\n")
ordered = self._topo_order()
@@ -457,18 +471,23 @@ class GraphOverview(Vertical):
for ep in event_sources:
if ep.trigger_type == "timer":
cron_expr = ep.trigger_config.get("cron")
interval = ep.trigger_config.get("interval_minutes", "?")
schedule_label = f"cron: {cron_expr}" if cron_expr else f"every {interval} min"
display.write(f" [green]⏱[/green] {ep.name} [dim]→ {ep.entry_node}[/dim]")
# Show interval + next fire countdown
# Show schedule + next fire countdown
next_fire = self.runtime._timer_next_fire.get(ep.id)
if next_fire is not None:
remaining = max(0, next_fire - time.monotonic())
mins, secs = divmod(int(remaining), 60)
display.write(
f" [dim]every {interval} min — next in {mins}m {secs:02d}s[/dim]"
)
hours, rem = divmod(int(remaining), 3600)
mins, secs = divmod(rem, 60)
if hours > 0:
countdown = f"{hours}h {mins:02d}m {secs:02d}s"
else:
display.write(f" [dim]every {interval} min[/dim]")
countdown = f"{mins}m {secs:02d}s"
display.write(f" [dim]{schedule_label} — next in {countdown}[/dim]")
else:
display.write(f" [dim]{schedule_label}[/dim]")
elif ep.trigger_type in ("event", "webhook"):
display.write(f" [yellow]⚡[/yellow] {ep.name} [dim]→ {ep.entry_node}[/dim]")
@@ -510,7 +529,7 @@ class GraphOverview(Vertical):
self._node_status.clear()
self.execution_path.clear()
entry_node = event.data.get("entry_node") or (
self.runtime.graph.entry_node if self.runtime else None
self._graph.entry_node if self.runtime else None
)
if entry_node:
self.update_active_node(entry_node)
@@ -178,7 +178,12 @@ class SelectableRichLog(RichLog):
# Build full text from all lines
all_text = "\n".join(strip.text for strip in self.lines)
try:
extracted = sel.extract(all_text)
except (IndexError, ValueError):
# Selection coordinates can exceed line count when the virtual
# canvas is larger than the actual content (e.g. after scroll).
return None
return extracted if extracted else None
def copy_selection(self) -> str | None:
+4 -2
View File
@@ -147,16 +147,18 @@ class TestComposeSystemPrompt:
def test_identity_only(self):
result = compose_system_prompt(identity_prompt="I am an agent.", focus_prompt=None)
assert result == "I am an agent."
assert result.startswith("I am an agent.")
assert "Current date and time:" in result
def test_focus_only(self):
result = compose_system_prompt(identity_prompt=None, focus_prompt="Do the thing.")
assert "Current Focus" in result
assert "Do the thing." in result
assert "Current date and time:" in result
def test_empty(self):
result = compose_system_prompt(identity_prompt=None, focus_prompt=None)
assert result == ""
assert "Current date and time:" in result
class TestBuildNarrative:
+2 -2
View File
@@ -80,11 +80,11 @@ def goal():
def test_max_node_visits_default():
"""NodeSpec.max_node_visits should default to 1."""
"""NodeSpec.max_node_visits should default to 0 (unbounded, for forever-alive agents)."""
spec = NodeSpec(
id="n", name="N", description="test", node_type="event_loop", output_keys=["out"]
)
assert spec.max_node_visits == 1
assert spec.max_node_visits == 0
# ---------------------------------------------------------------------------
+27 -4
View File
@@ -11,39 +11,61 @@ from framework.llm.litellm import LiteLLMProvider
from framework.llm.provider import LLMProvider
from framework.runner.orchestrator import AgentOrchestrator
# Patch config helpers so tests don't depend on local ~/.hive/configuration.json
_CONFIG_PATCHES = {
"framework.config.get_api_key": lambda: None,
"framework.config.get_api_base": lambda: None,
"framework.config.get_llm_extra_kwargs": lambda: {},
}
def _patched(fn):
"""Apply config patches to a test function."""
for target, side_effect in _CONFIG_PATCHES.items():
fn = patch(target, side_effect)(fn)
return fn
class TestOrchestratorLLMInitialization:
"""Test AgentOrchestrator LLM provider initialization."""
@_patched
def test_auto_creates_litellm_provider_when_no_llm_passed(self):
"""Test that LiteLLMProvider is auto-created when no llm is passed."""
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
orchestrator = AgentOrchestrator()
mock_init.assert_called_once_with(model="claude-haiku-4-5-20251001")
mock_init.assert_called_once_with(
model="claude-haiku-4-5-20251001", api_key=None, api_base=None
)
assert orchestrator._llm is not None
@_patched
def test_uses_custom_model_parameter(self):
"""Test that custom model parameter is passed to LiteLLMProvider."""
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
AgentOrchestrator(model="gpt-4o")
mock_init.assert_called_once_with(model="gpt-4o")
mock_init.assert_called_once_with(model="gpt-4o", api_key=None, api_base=None)
@_patched
def test_supports_openai_model_names(self):
"""Test that OpenAI model names are supported."""
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
orchestrator = AgentOrchestrator(model="gpt-4o-mini")
mock_init.assert_called_once_with(model="gpt-4o-mini")
mock_init.assert_called_once_with(model="gpt-4o-mini", api_key=None, api_base=None)
assert orchestrator._model == "gpt-4o-mini"
@_patched
def test_supports_anthropic_model_names(self):
"""Test that Anthropic model names are supported."""
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
orchestrator = AgentOrchestrator(model="claude-3-haiku-20240307")
mock_init.assert_called_once_with(model="claude-3-haiku-20240307")
mock_init.assert_called_once_with(
model="claude-3-haiku-20240307", api_key=None, api_base=None
)
assert orchestrator._model == "claude-3-haiku-20240307"
def test_skips_auto_creation_when_llm_passed(self):
@@ -56,6 +78,7 @@ class TestOrchestratorLLMInitialization:
mock_init.assert_not_called()
assert orchestrator._llm is mock_llm
@_patched
def test_model_attribute_stored_correctly(self):
"""Test that _model attribute is stored correctly."""
with patch.object(LiteLLMProvider, "__init__", return_value=None):
+112
View File
@@ -0,0 +1,112 @@
# Hive Coder: Meta-Agent Integration Plan
## Problem
The hive_coder agent currently has 7 file I/O tools (`read_file`, `write_file`, `edit_file`, `list_directory`, `search_files`, `run_command`, `undo_changes`) in `tools/coder_tools_server.py`. It can write agent packages but is **not integrated into the Hive ecosystem**:
1. **No dynamic tool discovery** — It references a static list of hive-tools in `reference/framework_guide.md`. It can't discover what MCP tools are actually available or what parameters they accept.
2. **No runtime observability** — It can't inspect sessions, checkpoints, or logs from agents it builds. When something goes wrong, the user has to manually dig through files.
3. **No test execution** — It can't run an agent's test suite structurally (it could use `run_command` with raw pytest, but has no structured test parsing).
## Solution
Add 8 new tools to `coder_tools_server.py` that give hive_coder deep integration with the Hive framework. Update the system prompt to teach the LLM when and how to use these meta-agent capabilities.
---
## New Tools
### 1. Tool Discovery
**`discover_mcp_tools(server_config_path?)`**
Connect to any MCP server and list all available tools with full schemas. Uses `framework.runner.mcp_client.MCPClient` — the same client the runtime uses. Reads a `mcp_servers.json` file (defaults to hive-tools), connects to each server, calls `list_tools()`, returns tool names + descriptions + input schemas, then disconnects.
This replaces the static tools reference. The LLM now discovers tools dynamically before designing an agent.
### 2. Agent Inventory
**`list_agents()`**
Scan `exports/` for agent packages and `~/.hive/agents/` for runtime data. Returns agent names, descriptions (from `__init__.py`), and session counts. Gives the LLM awareness of what already exists.
### 3-7. Session & Checkpoint Inspection
Ported from `agent_builder_server.py` lines 3484-3856. Pure filesystem reads — JSON + pathlib, zero framework imports.
| Tool | Purpose |
|------|---------|
| `list_agent_sessions(agent_name, status?, limit?)` | List sessions, filterable by status |
| `get_agent_session_state(agent_name, session_id)` | Full session state (memory excluded to prevent context bloat) |
| `get_agent_session_memory(agent_name, session_id, key?)` | Read memory contents from a session |
| `list_agent_checkpoints(agent_name, session_id)` | List checkpoints for debugging |
| `get_agent_checkpoint(agent_name, session_id, checkpoint_id?)` | Load a checkpoint's full state |
**Key difference from agent-builder:** These tools accept `agent_name` (e.g. `"deep_research_agent"`) instead of raw `agent_work_dir` paths. They resolve to `~/.hive/agents/{agent_name}/` internally. Friendlier for the LLM.
### 8. Test Execution
**`run_agent_tests(agent_name, test_types?, fail_fast?)`**
Ported from `agent_builder_server.py` lines 2756-2920. Runs pytest on an agent's test suite, sets PYTHONPATH automatically, parses output into structured results (passed/failed/skipped counts, per-test status, failure details).
---
## Files to Modify
### `tools/coder_tools_server.py` (~400 new lines)
Add all 8 tools after the existing `undo_changes` tool:
```
# ── Meta-agent: Tool discovery ────────────────────────────────
# discover_mcp_tools()
# ── Meta-agent: Agent inventory ───────────────────────────────
# list_agents()
# ── Meta-agent: Session & checkpoint inspection ───────────────
# _resolve_hive_agent_path(), _read_session_json(), _scan_agent_sessions(), _truncate_value()
# list_agent_sessions(), get_agent_session_state(), get_agent_session_memory()
# list_agent_checkpoints(), get_agent_checkpoint()
# ── Meta-agent: Test execution ────────────────────────────────
# run_agent_tests()
```
### `exports/hive_coder/nodes/__init__.py`
- Add 8 new tool names to the `tools` list
- Rewrite system prompt "Tools Available" section with meta-agent tools
- Add "Meta-Agent Capabilities" section teaching:
- Tool discovery before designing agents
- Post-build test execution
- Debugging via session/checkpoint inspection
- Agent awareness via `list_agents()`
### `exports/hive_coder/agent.py`
- Update `identity_prompt` to mention dynamic tool discovery and runtime observability
- Add `dynamic-tool-discovery` constraint to the goal
### `exports/hive_coder/reference/framework_guide.md`
Replace static tools list with a note to use `discover_mcp_tools()` instead.
---
## What's NOT in Scope (deferred to v2)
- **Agent notifications / webhook listener** — Requires always-on listener architecture
- **`compare_agent_checkpoints`** — LLM can compare by reading two checkpoints sequentially
- **Runtime log query tools** — Available in hive-tools MCP; `run_command` can access them now
---
## Verification
1. MCP server starts with all 15 tools (7 existing + 8 new)
2. `discover_mcp_tools()` connects to hive-tools and returns real tool schemas
3. Agent validation passes (`default_agent.validate()`)
4. Session tools work against existing data in `~/.hive/agents/`
5. Smoke test: launch in TUI, ask it to discover tools
+75
View File
@@ -0,0 +1,75 @@
# Hive Queen Bee: Native agent-building agent
## Problem
Building a Hive agent today requires manual assembly of 7+ files (`agent.py`, `config.py`, `nodes/__init__.py`, `__init__.py`, `__main__.py`, `mcp_servers.json`, tests) with precise framework conventions — correct imports, entry_points format, conversation_mode values, STEP 1/STEP 2 prompt patterns, nullable_output_keys, and more. A single missing re-export in `__init__.py` silently breaks `AgentRunner.load()`. This is the #1 friction point for new users and a recurring source of bugs even for experienced ones.
There is no tool that understands the framework deeply enough to produce correct agents. General-purpose coding assistants hallucinate tool names, use wrong import paths (`from core.framework...`), create too many thin nodes, forget module-level exports, and produce agents that fail validation.
## Proposal
Build **Hive Coder** (codename "Queen Bee") — a framework-native coding agent that lives inside the framework itself and builds complete, validated agent packages from natural language.
### Design principles
1. **Single-node, forever-alive** — One continuous EventLoopNode conversation handles the full lifecycle (understand, qualify, design, implement, verify, iterate). No artificial phase boundaries that destroy context.
2. **Meta-agent capabilities** — Not just a file writer. Can discover available MCP tools at runtime, inspect sessions/checkpoints of agents it builds, run their test suites, and debug failures.
3. **Self-verifying** — Runs three validation steps after every build: class validation (graph structure), `AgentRunner.load()` (package export contract), and pytest. Fixes its own errors up to 3 attempts.
4. **Honest qualification** — Assesses framework fit before building. If a use case is a poor fit (needs sub-second latency, pure CRUD, massive data pipelines), says so instead of producing a bad agent.
5. **Reference-grounded** — Ships with embedded reference docs (framework guide, file templates, anti-patterns) that it reads before writing code. No reliance on training data for framework specifics.
### Components
#### `hive_coder` agent (`core/framework/agents/hive_coder/`)
| File | Purpose |
|------|---------|
| `agent.py` | Goal, single-node graph, `HiveCoderAgent` class |
| `nodes/__init__.py` | `coder` EventLoopNode with comprehensive system prompt |
| `config.py` | RuntimeConfig with `~/.hive/configuration.json` auto-detection |
| `__main__.py` | Click CLI (`run`, `tui`, `info`, `validate`, `shell`) |
| `reference/framework_guide.md` | Node types, edges, patterns, async entry points |
| `reference/file_templates.md` | Complete code templates for every agent file |
| `reference/anti_patterns.md` | 22 common mistakes with explanations |
#### Coder Tools MCP Server (`tools/coder_tools_server.py`)
Dedicated tool server providing:
- **File I/O**: `read_file` (with line numbers, offset/limit), `write_file` (auto-mkdir), `edit_file` (9-strategy fuzzy matching ported from opencode), `list_directory`, `search_files` (regex)
- **Shell**: `run_command` (timeout, cwd, output truncation)
- **Git**: `undo_changes` (snapshot-based rollback)
- **Meta-agent**: `discover_mcp_tools`, `list_agents`, `list_agent_sessions`, `get_agent_session_state`, `get_agent_session_memory`, `list_agent_checkpoints`, `get_agent_checkpoint`, `run_agent_tests`
All file operations sandboxed to a configurable project root.
#### Framework changes
- `hive code` CLI command — direct launch shortcut
- `hive tui` — discovers framework agents as a source
- `AgentRuntime` — cron expression support (`croniter`) for async entry points
- `prompt_composer` — appends current datetime to system prompts
- `NodeSpec.max_node_visits` — default changed from 1 to 0 (unbounded), matching forever-alive as the standard pattern
- TUI graph view — cron display and hours in countdown
- CredentialError graceful handling in TUI launch
## Acceptance criteria
- [ ] `hive code` launches Hive Coder in the TUI
- [ ] `hive tui` lists framework agents alongside exports/ and examples/
- [ ] Given "build me a research agent that searches the web and summarizes findings", Hive Coder produces a valid package in `exports/` that passes `AgentRunner.load()`
- [ ] Tool discovery works: agent calls `discover_mcp_tools()` before designing, never fabricates tool names
- [ ] Self-verification: agent runs all 3 validation steps and fixes errors before presenting
- [ ] Cron timers fire on schedule (unit tested)
- [ ] `max_node_visits=0` default does not break existing agents or tests
- [ ] Reference docs are accurate and match current framework behavior
## Non-goals
- Multi-agent orchestration (queen spawning worker agents at runtime) — future work
- GUI/web interface — TUI only for v1
- Auto-publishing to a registry — agents are local packages
+288
View File
@@ -0,0 +1,288 @@
# Plan: Multi-Graph Sessions with Guardian Pattern
## Context
The target experience: hive_coder builds an agent (e.g., email automation), loads it into the same runtime session, and acts as its guardian. The email agent runs autonomously while hive_coder watches for failures. On error, hive_coder asks the user for help if they're around, attempts an autonomous fix if they're away, and escalates catastrophic failures for post-mortem.
This requires multiple agent graphs sharing a single `AgentRuntime` session — shared memory and data, but isolated conversations. The existing runtime already has most of the primitives: `ExecutionStream` accepts its own `graph`, `trigger_type="event"` subscribes entry points to the EventBus, and `_get_primary_session_state()` bridges memory across streams.
## Architecture Overview
```
AgentRuntime (shared EventBus, shared state.json, shared data/)
├── hive_coder graph
│ ├── Stream "default" → coder node (client_facing, manual)
│ └── Stream "guardian" → guardian node (event-driven, subscribes to EXECUTION_FAILED)
└── email_agent graph
└── Stream "email_agent::default" → intake node (client_facing, manual)
```
The guardian entry point on hive_coder fires when email_agent emits `EXECUTION_FAILED`. It receives the failure event in its input, reads shared memory for context, and decides: ask user (if present), auto-fix (if away), or escalate (if catastrophic).
## Gap 1: Event Scoping — `graph_id` on Events
**Problem**: EventBus events carry `stream_id` and `node_id` but no `graph_id`. The guardian needs to subscribe to events from a specific graph (email_agent), not a specific stream name.
**Solution**: Add `graph_id: str | None = None` to `AgentEvent` and `filter_graph` to `Subscription`.
### `core/framework/runtime/event_bus.py`
- `AgentEvent` dataclass: add `graph_id: str | None = None` field, include in `to_dict()`
- `Subscription` dataclass: add `filter_graph: str | None = None`
- `subscribe()`: accept `filter_graph` param, pass to `Subscription`
- `_matches()`: check `filter_graph` against `event.graph_id`
### `core/framework/runtime/execution_stream.py`
- `__init__()`: accept `graph_id: str | None = None`, store as `self.graph_id`
- When emitting events via `_event_bus.publish()`: set `event.graph_id = self.graph_id`
## Gap 2: Multi-Graph Runtime — `add_graph()` / `remove_graph()`
**Problem**: `AgentRuntime.__init__` takes a single `GraphSpec`. We need to add/remove graphs dynamically at runtime.
**Solution**: Keep the primary graph on `__init__`. Add methods to register secondary graphs that create their own `ExecutionStream` instances backed by a different graph.
### `core/framework/runtime/agent_runtime.py`
New instance state:
```python
self._graph_id: str = graph_id or "primary" # ID for the primary graph
self._graphs: dict[str, _GraphRegistration] = {} # graph_id -> registration
self._active_graph_id: str = self._graph_id # TUI focus
```
Where `_GraphRegistration` is a simple dataclass:
```python
@dataclass
class _GraphRegistration:
graph: GraphSpec
goal: Goal
entry_points: dict[str, EntryPointSpec]
streams: dict[str, ExecutionStream]
storage_subpath: str # relative to session root, e.g. "graphs/email_agent"
event_subscriptions: list[str] # EventBus subscription IDs
timer_tasks: list[asyncio.Task]
```
New methods:
- `add_graph(graph_id, graph, goal, entry_points, storage_subpath=None)` — creates streams for the graph using graph-scoped storage, sets up event/timer triggers, stamps `graph_id` on all streams. Can be called while running.
- `remove_graph(graph_id)` — stops streams, cancels timers, unsubscribes events, removes registration. Cannot remove primary graph.
- `list_graphs() -> list[str]` — returns all graph IDs
- `active_graph_id` property with setter — TUI uses this to control which graph's events are displayed
Update existing methods:
- `start()`: stamp `self._graph_id` on primary graph streams (via `ExecutionStream.graph_id`)
- `inject_input(node_id, content)`: search active graph's streams first, then all others
- `_get_primary_session_state()`: search across ALL graphs' streams (not just primary's)
- `stop()`: stop all secondary graph streams/timers/subscriptions too
### Storage Layout
```
~/.hive/agents/hive_coder/sessions/{session_id}/
state.json ← SHARED across all graphs
data/ ← SHARED data directory
conversations/coder/ ← hive_coder conversations
graphs/
email_agent/ ← secondary graph storage root
conversations/
intake/
checkpoints/
```
Secondary graph executors get `storage_path = {session_root}/graphs/{graph_id}/` while `state.json` and `data/` remain at the session root. The `resume_session_id` mechanism in `_get_primary_session_state()` already handles this — secondary executions find the primary session's `state.json`.
**Concurrent state.json writes**: For the guardian pattern (sequential: email_agent fails → guardian triggers), no file lock needed. But since both could technically write concurrently, add a simple `fcntl.flock()` wrapper around `_write_progress()` in the executor. Small, defensive change.
## Gap 3: Guardian Pattern — User Presence + Autonomous Recovery
**Problem**: When email_agent fails, hive_coder's guardian entry point must decide: ask user or auto-fix.
**Solution**: User presence is a runtime-level signal. The guardian's system prompt and event data give it enough context to decide.
### User Presence Tracking
Add to `AgentRuntime`:
```python
self._last_user_input_time: float = 0.0 # monotonic timestamp
```
Updated in `inject_input()` (called whenever user types in TUI). Exposed as:
```python
@property
def user_idle_seconds(self) -> float:
if self._last_user_input_time == 0:
return float('inf')
return time.monotonic() - self._last_user_input_time
```
The guardian node's system prompt instructs the LLM: "If user_idle_seconds < 120, ask the user for guidance via the client-facing interaction. If user is away, attempt an autonomous fix."
This is NOT framework logic — it's prompt-driven. The guardian node is a regular `event_loop` node with `client_facing=True` and tools for code editing + agent lifecycle. The LLM decides the strategy based on presence info injected as context.
### Escalation Model
Escalation = save a structured log entry. No special framework support needed. The guardian node uses `save_data("escalation_log.jsonl", ...)` via the existing data tools. The LLM writes:
```json
{"timestamp": "...", "severity": "catastrophic", "agent": "email_agent", "error": "...", "attempted_fixes": [...], "recommended_action": "..."}
```
Post-mortem: user opens `/data escalation_log.jsonl` or the TUI shows a notification linking to it.
## Gap 4: Graph Lifecycle Tools — Stop/Reload/Restart
**Problem**: hive_coder needs to programmatically stop a broken agent, fix its code, reload it, and restart it.
**Solution**: MCP tools accessible to the active agent. Uses `ContextVar` to access the runtime (same pattern as `data_dir`).
### `core/framework/tools/session_graph_tools.py` (NEW)
```python
async def load_agent(agent_path: str) -> str:
"""Load an agent graph into the running session."""
async def unload_agent(graph_id: str) -> str:
"""Stop and remove an agent graph from the session."""
async def start_agent(graph_id: str, entry_point: str = "default", input_data: str = "{}") -> str:
"""Trigger an entry point on a loaded agent graph."""
async def restart_agent(graph_id: str) -> str:
"""Unload and re-load an agent (picks up code changes)."""
async def list_agents() -> str:
"""List all agent graphs in the current session with their status."""
async def get_user_presence() -> str:
"""Return user idle time and presence status."""
```
These tools call `runtime.add_graph()`, `runtime.remove_graph()`, `runtime.trigger()`, etc.
### Registration
These tools are registered via `ToolRegistry` with `CONTEXT_PARAM` for `runtime` (injected by the executor, same as `data_dir`). Only available when the runtime is multi-graph capable (set by `cmd_code()`).
## Gap 5: TUI Integration — Graph Switching + Background Notifications
### `core/framework/tui/app.py`
- `_route_event()`: check `event.graph_id` against `runtime.active_graph_id`
- Events from active graph: route normally (streaming, chat, etc.)
- `CLIENT_INPUT_REQUESTED` from background graph: show notification bar
- `EXECUTION_FAILED` from background graph: show error notification
- `EXECUTION_COMPLETED` from background: show brief completion notice
- Other background events: silent (visible in logs)
- `action_switch_graph(graph_id)`: update `runtime.active_graph_id`, refresh graph view, show header
### `core/framework/tui/widgets/chat_repl.py`
- Track `_input_graph_id: str | None` alongside `_input_node_id`
- `handle_input_requested(node_id, graph_id)`: if background graph, show notification instead of enabling input
- `_submit_input()`: pass `graph_id` to help `inject_input()` route correctly
- New TUI commands:
- `/graphs` — list loaded graphs and their status
- `/graph <id>` — switch active graph focus
- `/load <path>` — load an agent graph into the session
- `/unload <id>` — remove a graph from the session
- On graph switch: flush streaming state, render graph header separator
### `core/framework/tui/widgets/graph_view.py`
- `switch_graph(graph_id)` — re-render the graph visualization for the new active graph
- When multi-graph active: show tab-like header listing all loaded graphs
## Gap 6: CLI + Runner Integration
### `core/framework/runner/cli.py`
- `cmd_code()` creates the hive_coder runtime with `graph_id="hive_coder"`
- Registers `session_graph_tools` with the tool config so hive_coder's LLM can call them
- Sets `runtime._multi_graph_capable = True` flag
### `core/framework/runner/runner.py`
- New method: `setup_as_secondary(runtime, graph_id)` — configures this runner to join an existing `AgentRuntime` as a secondary graph. Uses the existing `AgentRunner.load()` to parse agent.json, then calls `runtime.add_graph()` with the parsed graph/goal/entry_points.
## Gap 7: Reliable Mid-Node Resume
**Problem**: When an EventLoopNode is interrupted (crash, Ctrl+Z, context switch), resume doesn't restore to exactly where execution stopped. Several pieces of in-node state are lost, which changes behavior post-resume. In multi-graph sessions with parallel execution and frequent context switching, these gaps compound.
### What's already restored correctly
- **Conversation history**: All messages persisted to disk immediately via `FileConversationStore._persist()` — one file per message in `parts/NNNNNNNNNN.json`
- **OutputAccumulator values**: Write-through to `cursor.json` on every `accumulator.set()` call
- **Iteration counter**: Written to `cursor.json` at the end of each iteration (step 6g)
- **Orphaned tool calls**: `_repair_orphaned_tool_calls()` patches in-flight tool calls with error messages so the LLM knows to retry
### What's lost — and fixes
#### 1. `user_interaction_count` (CRITICAL)
Resets to 0 on resume. This controls client-facing blocking semantics: before the first interaction, `set_output`-only turns don't prevent blocking (the LLM must present to the user first). After resume, a node that had 3 user interactions behaves as if the user never interacted.
**Fix**: Persist `user_interaction_count` to `cursor.json` alongside `iteration` and `outputs`. Write it in `_write_cursor()` (step 6g), restore in `_restore()`.
**Files**: `core/framework/graph/event_loop_node.py`
#### 2. Accumulator outputs not in SharedMemory
The `OutputAccumulator` writes to `cursor.json` (durable) but only writes to `SharedMemory` when the judge ACCEPTs. On crash, the CancelledError handler captures `memory.read_all()` — which doesn't include the accumulator's WIP values. On resume, edge conditions checking those memory keys see `None`.
**Fix**: In the executor's `CancelledError` handler, read the interrupted node's `cursor.json` and write any accumulator outputs to `memory` before building `session_state_out`. This ensures resume memory includes WIP output values.
**Files**: `core/framework/graph/executor.py` (CancelledError handler, ~line 1289)
#### 3. Stall/doom-loop detection counters
`recent_responses` and `recent_tool_fingerprints` reset to empty lists. A previously near-stalled node gets a fresh detection budget.
**Fix**: Persist these to `cursor.json`. They're small (last N strings). Write in `_write_cursor()`, restore in `_restore()`.
**Files**: `core/framework/graph/event_loop_node.py`
#### 4. `continuous_conversation` at executor level
In continuous mode, the executor's `continuous_conversation` variable is `None` on resume. The node's `_restore()` recovers messages from disk, but the executor doesn't pre-populate this variable until the node returns.
**Fix**: After a resumed node completes, set `continuous_conversation = result.conversation` (this already happens in the normal path at line 1155 — verify it also runs on the resume path).
**Files**: `core/framework/graph/executor.py`
### Multi-graph specific: independent resume per graph
Each graph in a multi-graph session has its own storage subdirectory (`graphs/{graph_id}/`) with its own `conversations/`, `checkpoints/`, and `cursor.json` files. Resume is already per-executor, so each graph resumes independently. The shared `state.json` at the session root captures the union of all graphs' memory — the `fcntl.flock()` wrapper on `_write_progress()` (Gap 2) ensures concurrent writes don't corrupt it.
### Implementation
These fixes are prerequisite to multi-graph and should be done as **Phase 0** before the EventBus changes:
1. Persist `user_interaction_count` + stall/doom counters to `cursor.json`
2. Restore them in `_restore()`
3. Flush accumulator outputs to SharedMemory in executor's CancelledError handler
4. Verify continuous_conversation is set on resume path
## Implementation Phases
### Phase 0: Reliable Mid-Node Resume (prerequisite)
1. `event_loop_node.py` — persist `user_interaction_count`, `recent_responses`, `recent_tool_fingerprints` to `cursor.json` via `_write_cursor()`; restore in `_restore()`
2. `executor.py` — in CancelledError handler, read interrupted node's `cursor.json` accumulator outputs and write to `memory` before building `session_state_out`
3. `executor.py` — verify `continuous_conversation` is populated on resume path
### Phase 1: EventBus Foundation
1. `event_bus.py``graph_id` on `AgentEvent`, `filter_graph` on `Subscription` + `_matches()`
2. `execution_stream.py` — accept and stamp `graph_id` on emitted events
### Phase 2: Multi-Graph Runtime
3. `agent_runtime.py``_GraphRegistration` dataclass, `add_graph()`, `remove_graph()`, `list_graphs()`, `active_graph_id` property
4. `agent_runtime.py` — update `inject_input()`, `_get_primary_session_state()`, `stop()` for multi-graph
5. `agent_runtime.py` — user presence tracking (`_last_user_input_time`, `user_idle_seconds`)
6. Storage path logic: secondary graphs get `{session_root}/graphs/{graph_id}/`
### Phase 3: Graph Lifecycle Tools
7. `core/framework/tools/session_graph_tools.py``load_agent`, `unload_agent`, `start_agent`, `restart_agent`, `list_agents`, `get_user_presence`
8. `runner.py``setup_as_secondary()` method
### Phase 4: TUI Integration
9. `app.py``graph_id` event filtering, background notifications, `action_switch_graph`
10. `chat_repl.py``/graphs`, `/graph`, `/load`, `/unload` commands, graph_id tracking
11. `graph_view.py` — multi-graph header, `switch_graph()`
### Phase 5: hive_coder Integration
12. `cli.py``cmd_code()` sets up multi-graph capable runtime, registers graph tools
13. hive_coder's agent config — add guardian entry point with `trigger_type="event"` subscribing to `EXECUTION_FAILED`
14. Guardian node system prompt — presence-aware triage logic (ask user / auto-fix / escalate)
## Backward Compatibility
- Single-graph `hive run exports/my_agent` unchanged: `graph_id` defaults to `None`, no secondary graphs loaded, events carry `graph_id=None`, TUI shows no graph switching UI
- All new fields are optional with `None` defaults
- `_get_primary_session_state()` existing behavior preserved when no secondary graphs exist
## Verification
1. **Unit**: `add_graph()` creates streams with correct `graph_id`, events carry `graph_id`, `filter_graph` works in subscriptions, `inject_input()` routes to correct graph
2. **Integration**: Load hive_coder + email_agent, email_agent fails → guardian fires → reads shared memory → decides action
3. **TUI**: `/graphs` shows both, `/graph` switches, background failure notification appears, input routing works across graphs
4. **Backward compat**: `hive run exports/deep_research_agent --tui` works unchanged
5. **Lifecycle**: `restart_agent` picks up code changes, `unload_agent` cleans up streams and subscriptions
+56
View File
@@ -0,0 +1,56 @@
# feat(queen): Hive Queen Bee — native agent-building agent
## Summary
Introduces **Hive Coder** (codename "Queen Bee"), a framework-native coding agent that builds complete Hive agent packages from natural language descriptions. This is a single-node, forever-alive agent inspired by opencode's `while(true)` loop — one continuous conversation handles the full lifecycle: understand, qualify, design, implement, verify, and iterate.
The agent is deeply integrated with the framework: it can discover available MCP tools at runtime, inspect sessions and checkpoints of agents it builds, run their test suites, and self-verify its own output. It ships with a dedicated MCP tools server (`coder_tools_server.py`) providing rich file I/O, fuzzy-match editing, git snapshots, and shell execution — all scoped to a configurable project root.
## What's included
### New: `hive_coder` agent (`core/framework/agents/hive_coder/`)
- **`agent.py`** — Goal with 4 success criteria and 4 constraints, single-node graph, `HiveCoderAgent` class with full runtime lifecycle (start/stop/trigger_and_wait)
- **`nodes/__init__.py`** — Single `coder` EventLoopNode with a comprehensive system prompt covering coding mandates, tool discovery, meta-agent capabilities, node count rules, implementation templates, and a 6-phase workflow
- **`config.py`** — RuntimeConfig with auto-detection of preferred model from `~/.hive/configuration.json`
- **`__main__.py`** — Click CLI with `run`, `tui`, `info`, `validate`, and `shell` subcommands
- **`reference/`** — Framework guide, file templates, and anti-patterns docs embedded as agent reference material
### New: Coder Tools MCP Server (`tools/coder_tools_server.py`)
- 1500-line MCP server providing 15 tools: `read_file`, `write_file`, `edit_file` (with opencode-style 9-strategy fuzzy matching), `list_directory`, `search_files`, `run_command`, `undo_changes`, `discover_mcp_tools`, `list_agents`, `list_agent_sessions`, `get_agent_session_state`, `get_agent_session_memory`, `list_agent_checkpoints`, `get_agent_checkpoint`, `run_agent_tests`
- Path-scoped security: all file operations sandboxed to project root
- Git-based undo: automatic snapshots before writes with `undo_changes` rollback
### Framework changes
- **`hive code` CLI command** — Direct launch shortcut for Hive Coder via `cmd_code` in `runner/cli.py`
- **`hive tui` updated** — Now discovers framework agents alongside exports/ and examples/
- **Cron timer support** — `AgentRuntime` now supports cron expressions (`croniter`) in addition to fixed-interval timers for async entry points
- **Datetime in system prompts** — `prompt_composer._with_datetime()` appends current datetime to all composed system prompts; EventLoopNode also applies it for isolated conversations
- **`max_node_visits` default → 0** — Changed from 1 to 0 (unbounded) across `NodeSpec` and executor, matching the forever-alive pattern as the standard default
- **TUI graph view** — Timer display updated to show cron expressions and hours in countdown
- **CredentialError handling** — `_setup()` calls in TUI launch paths now catch and display credential errors gracefully
### Tests
- New `test_agent_runtime.py` tests for cron-based timer scheduling
## Architecture
```
User ──▶ [coder] (EventLoopNode, client_facing, forever-alive)
│ Tools: coder_tools_server.py (file I/O, shell, git)
│ + meta-agent tools (discover, inspect, test)
└──▶ loops continuously until user exits
```
Single node. No edges. No terminal nodes. The agent stays alive and handles multiple build requests in one session — context accumulates across interactions.
## Test plan
- [ ] `hive code` launches Hive Coder TUI successfully
- [ ] `hive tui` shows "Framework Agents" as a source option
- [ ] Agent can discover tools via `discover_mcp_tools()`
- [ ] Agent generates a valid agent package from a natural language request
- [ ] Generated packages pass `AgentRunner.load()` validation
- [ ] Cron timer tests pass (`test_agent_runtime.py`)
- [ ] Existing tests unaffected by `max_node_visits` default change
+162 -36
View File
@@ -347,10 +347,9 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
["cerebras:1"]="Qwen3 235B - Frontier reasoning"
)
# NOTE: 8192 should match DEFAULT_MAX_TOKENS in core/framework/graph/edge.py
declare -A MODEL_CHOICES_MAXTOKENS=(
["anthropic:0"]=8192
["anthropic:1"]=8192
["anthropic:0"]=32768
["anthropic:1"]=16384
["anthropic:2"]=8192
["anthropic:3"]=8192
["openai:0"]=16384
@@ -454,8 +453,7 @@ else
MC_PROVIDERS=(anthropic anthropic anthropic anthropic openai openai openai gemini gemini groq groq cerebras cerebras)
MC_IDS=("claude-opus-4-6" "claude-sonnet-4-5-20250929" "claude-sonnet-4-20250514" "claude-haiku-4-5-20251001" "gpt-5.2" "gpt-5-mini" "gpt-5-nano" "gemini-3-flash-preview" "gemini-3-pro-preview" "moonshotai/kimi-k2-instruct-0905" "openai/gpt-oss-120b" "zai-glm-4.7" "qwen3-235b-a22b-instruct-2507")
MC_LABELS=("Opus 4.6 - Most capable (recommended)" "Sonnet 4.5 - Best balance" "Sonnet 4 - Fast + capable" "Haiku 4.5 - Fast + cheap" "GPT-5.2 - Most capable (recommended)" "GPT-5 Mini - Fast + cheap" "GPT-5 Nano - Fastest" "Gemini 3 Flash - Fast (recommended)" "Gemini 3 Pro - Best quality" "Kimi K2 - Best quality (recommended)" "GPT-OSS 120B - Fast reasoning" "ZAI-GLM 4.7 - Best quality (recommended)" "Qwen3 235B - Frontier reasoning")
# NOTE: 8192 should match DEFAULT_MAX_TOKENS in core/framework/graph/edge.py
MC_MAXTOKENS=(8192 8192 8192 8192 16384 16384 16384 8192 8192 8192 8192 8192 8192)
MC_MAXTOKENS=(32768 16384 8192 8192 16384 16384 16384 8192 8192 8192 8192 8192 8192)
# Helper: get number of model choices for a provider
get_model_choice_count() {
@@ -616,11 +614,14 @@ prompt_model_selection() {
}
# Function to save configuration
# Args: provider_id env_var model max_tokens [use_claude_code_sub] [api_base]
save_configuration() {
local provider_id="$1"
local env_var="$2"
local model="$3"
local max_tokens="$4"
local use_claude_code_sub="${5:-}"
local api_base="${6:-}"
# Fallbacks if not provided
if [ -z "$model" ]; then
@@ -643,6 +644,12 @@ config = {
},
'created_at': '$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")'
}
if '$use_claude_code_sub' == 'true':
config['llm']['use_claude_code_subscription'] = True
# No api_key_env_var needed for Claude Code subscription
config['llm'].pop('api_key_env_var', None)
if '$api_base':
config['llm']['api_base'] = '$api_base'
with open('$HIVE_CONFIG_FILE', 'w') as f:
json.dump(config, f, indent=2)
print(json.dumps(config, indent=2))
@@ -664,8 +671,47 @@ SELECTED_PROVIDER_ID="" # Will hold the chosen provider ID
SELECTED_ENV_VAR="" # Will hold the chosen env var
SELECTED_MODEL="" # Will hold the chosen model ID
SELECTED_MAX_TOKENS=8192 # Will hold the chosen max_tokens
SUBSCRIPTION_MODE="" # "claude_code" | "zai_code" | ""
if [ "$USE_ASSOC_ARRAYS" = true ]; then
# ── Subscription mode detection ──────────────────────────────
# Claude Code subscription: default when ~/.claude/.credentials.json exists
CLAUDE_CRED_FILE="$HOME/.claude/.credentials.json"
if [ -f "$CLAUDE_CRED_FILE" ]; then
echo -e " ${GREEN}${NC} Claude Code subscription detected"
echo -e " ${DIM}~/.claude/.credentials.json${NC}"
echo -e " ${DIM}Default: claude-opus-4-6 | max_tokens: 32768${NC}"
echo ""
if prompt_yes_no "Use Claude Code subscription? (no API key needed)"; then
SUBSCRIPTION_MODE="claude_code"
SELECTED_PROVIDER_ID="anthropic"
SELECTED_MODEL="claude-opus-4-6"
SELECTED_MAX_TOKENS=32768
echo ""
echo -e "${GREEN}${NC} Using Claude Code subscription"
fi
fi
# ZAI Code subscription: check for ZAI_API_KEY
if [ -z "$SUBSCRIPTION_MODE" ] && [ -n "${ZAI_API_KEY:-}" ]; then
echo -e " ${GREEN}${NC} Found ZAI Code API key"
echo ""
if prompt_yes_no "Use your ZAI Code subscription?"; then
SUBSCRIPTION_MODE="zai_code"
SELECTED_PROVIDER_ID="openai"
SELECTED_ENV_VAR="ZAI_API_KEY"
SELECTED_MODEL="glm-5"
SELECTED_MAX_TOKENS=32768
echo ""
echo -e "${GREEN}${NC} Using ZAI Code subscription"
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
fi
fi
# Skip normal provider detection if a subscription mode was selected
if [ -n "$SUBSCRIPTION_MODE" ]; then
# Jump ahead — SELECTED_PROVIDER_ID is already set
:
elif [ "$USE_ASSOC_ARRAYS" = true ]; then
# Bash 4+ - iterate over associative array keys
for env_var in "${!PROVIDER_NAMES[@]}"; do
value="${!env_var}"
@@ -693,28 +739,18 @@ if [ ${#FOUND_PROVIDERS[@]} -gt 0 ]; then
done
echo ""
if [ ${#FOUND_PROVIDERS[@]} -eq 1 ]; then
# Only one provider found, use it automatically
if prompt_yes_no "Use this key?"; then
SELECTED_ENV_VAR="${FOUND_ENV_VARS[0]}"
SELECTED_PROVIDER_ID="$(get_provider_id "$SELECTED_ENV_VAR")"
echo ""
echo -e "${GREEN}${NC} Using ${FOUND_PROVIDERS[0]}"
prompt_model_selection "$SELECTED_PROVIDER_ID"
fi
else
# Multiple providers found, let user pick one
# Show all found providers + ZAI subscription + Other
echo -e "${BOLD}Select your default LLM provider:${NC}"
echo ""
# Build choice menu from found providers
i=1
for provider in "${FOUND_PROVIDERS[@]}"; do
echo -e " ${CYAN}$i)${NC} $provider"
i=$((i + 1))
done
ZAI_CHOICE=$i
echo -e " ${CYAN}$i)${NC} ZAI Code Subscription ${DIM}(use your ZAI Code plan)${NC}"
i=$((i + 1))
echo -e " ${CYAN}$i)${NC} Other"
max_choice=$i
echo ""
@@ -725,6 +761,17 @@ if [ ${#FOUND_PROVIDERS[@]} -gt 0 ]; then
if [ "$choice" -eq "$max_choice" ]; then
# Fall through to the manual provider selection below
break
elif [ "$choice" -eq "$ZAI_CHOICE" ]; then
# ZAI Code Subscription
SUBSCRIPTION_MODE="zai_code"
SELECTED_PROVIDER_ID="openai"
SELECTED_ENV_VAR="ZAI_API_KEY"
SELECTED_MODEL="glm-5"
SELECTED_MAX_TOKENS=32768
echo ""
echo -e "${GREEN}${NC} Using ZAI Code subscription"
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
break
fi
idx=$((choice - 1))
SELECTED_ENV_VAR="${FOUND_ENV_VARS[$idx]}"
@@ -739,51 +786,93 @@ if [ ${#FOUND_PROVIDERS[@]} -gt 0 ]; then
echo -e "${RED}Invalid choice. Please enter 1-$max_choice${NC}"
done
fi
fi
if [ -z "$SELECTED_PROVIDER_ID" ]; then
echo ""
prompt_choice "Select your LLM provider:" \
"Anthropic (Claude) - Recommended" \
"OpenAI (GPT)" \
"Google Gemini - Free tier available" \
"Groq - Fast, free tier" \
"Cerebras - Fast, free tier" \
"Skip for now"
choice=$PROMPT_CHOICE
echo -e "${BOLD}Select your LLM provider:${NC}"
echo ""
echo -e " ${CYAN}${BOLD}Subscription modes (no API key purchase needed):${NC}"
echo -e " ${CYAN}1)${NC} Claude Code Subscription ${DIM}(use your Claude Max/Pro plan)${NC}"
echo -e " ${CYAN}2)${NC} ZAI Code Subscription ${DIM}(use your ZAI Code plan)${NC}"
echo ""
echo -e " ${CYAN}${BOLD}API key providers:${NC}"
echo -e " ${CYAN}3)${NC} Anthropic (Claude) - Recommended"
echo -e " ${CYAN}4)${NC} OpenAI (GPT)"
echo -e " ${CYAN}5)${NC} Google Gemini - Free tier available"
echo -e " ${CYAN}6)${NC} Groq - Fast, free tier"
echo -e " ${CYAN}7)${NC} Cerebras - Fast, free tier"
echo -e " ${CYAN}8)${NC} Skip for now"
echo ""
while true; do
read -r -p "Enter choice (1-8): " choice || true
if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le 8 ]; then
break
fi
echo -e "${RED}Invalid choice. Please enter 1-8${NC}"
done
case $choice in
0)
1)
# Claude Code Subscription
CLAUDE_CRED_FILE="$HOME/.claude/.credentials.json"
if [ ! -f "$CLAUDE_CRED_FILE" ]; then
echo ""
echo -e "${YELLOW} ~/.claude/.credentials.json not found.${NC}"
echo -e " Run ${CYAN}claude${NC} first to authenticate with your Claude subscription,"
echo -e " then run this quickstart again."
echo ""
SELECTED_PROVIDER_ID=""
else
SUBSCRIPTION_MODE="claude_code"
SELECTED_PROVIDER_ID="anthropic"
echo ""
echo -e "${GREEN}${NC} Using Claude Code subscription"
fi
;;
2)
# ZAI Code Subscription
SUBSCRIPTION_MODE="zai_code"
SELECTED_PROVIDER_ID="openai"
SELECTED_ENV_VAR="ZAI_API_KEY"
SELECTED_MODEL="glm-5"
SELECTED_MAX_TOKENS=32768
PROVIDER_NAME="ZAI"
echo ""
echo -e "${GREEN}${NC} Using ZAI Code subscription"
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
;;
3)
SELECTED_ENV_VAR="ANTHROPIC_API_KEY"
SELECTED_PROVIDER_ID="anthropic"
PROVIDER_NAME="Anthropic"
SIGNUP_URL="https://console.anthropic.com/settings/keys"
;;
1)
4)
SELECTED_ENV_VAR="OPENAI_API_KEY"
SELECTED_PROVIDER_ID="openai"
PROVIDER_NAME="OpenAI"
SIGNUP_URL="https://platform.openai.com/api-keys"
;;
2)
5)
SELECTED_ENV_VAR="GEMINI_API_KEY"
SELECTED_PROVIDER_ID="gemini"
PROVIDER_NAME="Google Gemini"
SIGNUP_URL="https://aistudio.google.com/apikey"
;;
3)
6)
SELECTED_ENV_VAR="GROQ_API_KEY"
SELECTED_PROVIDER_ID="groq"
PROVIDER_NAME="Groq"
SIGNUP_URL="https://console.groq.com/keys"
;;
4)
7)
SELECTED_ENV_VAR="CEREBRAS_API_KEY"
SELECTED_PROVIDER_ID="cerebras"
PROVIDER_NAME="Cerebras"
SIGNUP_URL="https://cloud.cerebras.ai/"
;;
5)
8)
echo ""
echo -e "${YELLOW}Skipped.${NC} An LLM API key is required to test and use worker agents."
echo -e "Add your API key later by running:"
@@ -795,7 +884,8 @@ if [ -z "$SELECTED_PROVIDER_ID" ]; then
;;
esac
if [ -n "$SELECTED_ENV_VAR" ] && [ -z "${!SELECTED_ENV_VAR}" ]; then
# For API-key providers: prompt for key if not already set
if [ -z "$SUBSCRIPTION_MODE" ] && [ -n "$SELECTED_ENV_VAR" ] && [ -z "${!SELECTED_ENV_VAR}" ]; then
echo ""
echo -e "Get your API key from: ${CYAN}$SIGNUP_URL${NC}"
echo ""
@@ -816,6 +906,28 @@ if [ -z "$SELECTED_PROVIDER_ID" ]; then
SELECTED_PROVIDER_ID=""
fi
fi
# For ZAI subscription: prompt for API key if not already set
if [ "$SUBSCRIPTION_MODE" = "zai_code" ] && [ -z "${ZAI_API_KEY:-}" ]; then
echo ""
read -r -p "Paste your ZAI API key (or press Enter to skip): " API_KEY
if [ -n "$API_KEY" ]; then
echo "" >> "$SHELL_RC_FILE"
echo "# Hive Agent Framework - ZAI Code subscription API key" >> "$SHELL_RC_FILE"
echo "export ZAI_API_KEY=\"$API_KEY\"" >> "$SHELL_RC_FILE"
export ZAI_API_KEY="$API_KEY"
echo ""
echo -e "${GREEN}${NC} ZAI API key saved to $SHELL_RC_FILE"
else
echo ""
echo -e "${YELLOW}Skipped.${NC} Add your ZAI API key to $SHELL_RC_FILE when ready:"
echo -e " ${CYAN}echo 'export ZAI_API_KEY=\"your-key\"' >> $SHELL_RC_FILE${NC}"
SELECTED_ENV_VAR=""
SELECTED_PROVIDER_ID=""
SUBSCRIPTION_MODE=""
fi
fi
fi
# Prompt for model if not already selected (manual provider path)
@@ -827,7 +939,13 @@ fi
if [ -n "$SELECTED_PROVIDER_ID" ]; then
echo ""
echo -n " Saving configuration... "
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "true" "" > /dev/null
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "" "https://api.z.ai/api/coding/paas/v4" > /dev/null
else
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" > /dev/null
fi
echo -e "${GREEN}${NC}"
echo -e " ${DIM}~/.hive/configuration.json${NC}"
fi
@@ -1041,7 +1159,15 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
SELECTED_MODEL="$(get_default_model "$SELECTED_PROVIDER_ID")"
fi
echo -e "${BOLD}Default LLM:${NC}"
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
echo -e " ${GREEN}${NC} Claude Code Subscription → ${DIM}$SELECTED_MODEL${NC}"
echo -e " ${DIM}Token auto-refresh from ~/.claude/.credentials.json${NC}"
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
echo -e " ${GREEN}${NC} ZAI Code Subscription → ${DIM}$SELECTED_MODEL${NC}"
echo -e " ${DIM}API: api.z.ai (OpenAI-compatible)${NC}"
else
echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC}${DIM}$SELECTED_MODEL${NC}"
fi
echo ""
fi
File diff suppressed because it is too large Load Diff