Merge pull request #5071 from TimothyZhang7/feature/queen-bee
Release / Create Release (push) Waiting to run
Release / Create Release (push) Waiting to run
Feature/queen bee
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
"""Framework-provided agents."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
FRAMEWORK_AGENTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def list_framework_agents() -> list[Path]:
|
||||
"""List all framework agent directories."""
|
||||
return sorted(
|
||||
[p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
|
||||
key=lambda p: p.name,
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Hive Coder — Native coding agent that builds Hive agent packages.
|
||||
|
||||
Deeply understands the agent framework and produces complete Python packages
|
||||
with goals, nodes, edges, system prompts, MCP configuration, and tests
|
||||
from natural language specifications.
|
||||
"""
|
||||
|
||||
from .agent import (
|
||||
HiveCoderAgent,
|
||||
conversation_mode,
|
||||
default_agent,
|
||||
edges,
|
||||
entry_node,
|
||||
entry_points,
|
||||
goal,
|
||||
identity_prompt,
|
||||
loop_config,
|
||||
nodes,
|
||||
pause_nodes,
|
||||
terminal_nodes,
|
||||
)
|
||||
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
__all__ = [
|
||||
"HiveCoderAgent",
|
||||
"default_agent",
|
||||
"goal",
|
||||
"nodes",
|
||||
"edges",
|
||||
"entry_node",
|
||||
"entry_points",
|
||||
"pause_nodes",
|
||||
"terminal_nodes",
|
||||
"conversation_mode",
|
||||
"identity_prompt",
|
||||
"loop_config",
|
||||
"RuntimeConfig",
|
||||
"AgentMetadata",
|
||||
"default_config",
|
||||
"metadata",
|
||||
]
|
||||
@@ -0,0 +1,223 @@
|
||||
"""CLI entry point for Hive Coder agent."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import click
|
||||
|
||||
from .agent import HiveCoderAgent, default_agent
|
||||
|
||||
|
||||
def setup_logging(verbose=False, debug=False):
|
||||
"""Configure logging for execution visibility."""
|
||||
if debug:
|
||||
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
|
||||
elif verbose:
|
||||
level, fmt = logging.INFO, "%(message)s"
|
||||
else:
|
||||
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
|
||||
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
|
||||
logging.getLogger("framework").setLevel(level)
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="1.0.0")
|
||||
def cli():
|
||||
"""Hive Coder — Build Hive agent packages from natural language."""
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--request", "-r", type=str, required=True, help="What agent to build")
|
||||
@click.option("--mock", is_flag=True, help="Run in mock mode")
|
||||
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
|
||||
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
|
||||
@click.option("--debug", is_flag=True, help="Show debug logging")
|
||||
def run(request, mock, quiet, verbose, debug):
|
||||
"""Execute agent building from a request."""
|
||||
if not quiet:
|
||||
setup_logging(verbose=verbose, debug=debug)
|
||||
|
||||
context = {"user_request": request}
|
||||
|
||||
result = asyncio.run(default_agent.run(context, mock_mode=mock))
|
||||
|
||||
output_data = {
|
||||
"success": result.success,
|
||||
"steps_executed": result.steps_executed,
|
||||
"output": result.output,
|
||||
}
|
||||
if result.error:
|
||||
output_data["error"] = result.error
|
||||
|
||||
click.echo(json.dumps(output_data, indent=2, default=str))
|
||||
sys.exit(0 if result.success else 1)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--mock", is_flag=True, help="Run in mock mode")
|
||||
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
|
||||
@click.option("--debug", is_flag=True, help="Show debug logging")
|
||||
def tui(mock, verbose, debug):
|
||||
"""Launch the TUI dashboard for interactive agent building."""
|
||||
setup_logging(verbose=verbose, debug=debug)
|
||||
|
||||
try:
|
||||
from framework.tui.app import AdenTUI
|
||||
except ImportError:
|
||||
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
|
||||
sys.exit(1)
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
async def run_with_tui():
|
||||
agent = HiveCoderAgent()
|
||||
|
||||
agent._tool_registry = ToolRegistry()
|
||||
|
||||
storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_config_path.exists():
|
||||
agent._tool_registry.load_mcp_config(mcp_config_path)
|
||||
|
||||
llm = None
|
||||
if not mock:
|
||||
llm = LiteLLMProvider(
|
||||
model=agent.config.model,
|
||||
api_key=agent.config.api_key,
|
||||
api_base=agent.config.api_base,
|
||||
)
|
||||
|
||||
tools = list(agent._tool_registry.get_tools().values())
|
||||
tool_executor = agent._tool_registry.get_executor()
|
||||
graph = agent._build_graph()
|
||||
|
||||
runtime = create_agent_runtime(
|
||||
graph=graph,
|
||||
goal=agent.goal,
|
||||
storage_path=storage_path,
|
||||
entry_points=[
|
||||
EntryPointSpec(
|
||||
id="start",
|
||||
name="Build Agent",
|
||||
entry_node="coder",
|
||||
trigger_type="manual",
|
||||
isolation_level="isolated",
|
||||
),
|
||||
],
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
)
|
||||
|
||||
await runtime.start()
|
||||
|
||||
try:
|
||||
app = AdenTUI(runtime)
|
||||
await app.run_async()
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
asyncio.run(run_with_tui())
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--json", "output_json", is_flag=True)
|
||||
def info(output_json):
|
||||
"""Show agent information."""
|
||||
info_data = default_agent.info()
|
||||
if output_json:
|
||||
click.echo(json.dumps(info_data, indent=2))
|
||||
else:
|
||||
click.echo(f"Agent: {info_data['name']}")
|
||||
click.echo(f"Version: {info_data['version']}")
|
||||
click.echo(f"Description: {info_data['description']}")
|
||||
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
|
||||
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
|
||||
click.echo(f"Entry: {info_data['entry_node']}")
|
||||
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def validate():
|
||||
"""Validate agent structure."""
|
||||
validation = default_agent.validate()
|
||||
if validation["valid"]:
|
||||
click.echo("Agent is valid")
|
||||
if validation["warnings"]:
|
||||
for warning in validation["warnings"]:
|
||||
click.echo(f" WARNING: {warning}")
|
||||
else:
|
||||
click.echo("Agent has errors:")
|
||||
for error in validation["errors"]:
|
||||
click.echo(f" ERROR: {error}")
|
||||
sys.exit(0 if validation["valid"] else 1)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--verbose", "-v", is_flag=True)
|
||||
def shell(verbose):
|
||||
"""Interactive agent building session (CLI, no TUI)."""
|
||||
asyncio.run(_interactive_shell(verbose))
|
||||
|
||||
|
||||
async def _interactive_shell(verbose=False):
|
||||
"""Async interactive shell."""
|
||||
setup_logging(verbose=verbose)
|
||||
|
||||
click.echo("=== Hive Coder ===")
|
||||
click.echo("Describe the agent you want to build (or 'quit' to exit):\n")
|
||||
|
||||
agent = HiveCoderAgent()
|
||||
await agent.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
request = await asyncio.get_event_loop().run_in_executor(None, input, "Build> ")
|
||||
if request.lower() in ["quit", "exit", "q"]:
|
||||
click.echo("Goodbye!")
|
||||
break
|
||||
|
||||
if not request.strip():
|
||||
continue
|
||||
|
||||
click.echo("\nBuilding agent...\n")
|
||||
|
||||
result = await agent.trigger_and_wait("default", {"user_request": request})
|
||||
|
||||
if result is None:
|
||||
click.echo("\n[Execution timed out]\n")
|
||||
continue
|
||||
|
||||
if result.success:
|
||||
output = result.output or {}
|
||||
agent_name = output.get("agent_name", "unknown")
|
||||
validation = output.get("validation_result", "unknown")
|
||||
click.echo(f"\nAgent '{agent_name}' built. Validation: {validation}\n")
|
||||
else:
|
||||
click.echo(f"\nBuild failed: {result.error}\n")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
click.echo("\nGoodbye!")
|
||||
break
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
await agent.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
@@ -0,0 +1,314 @@
|
||||
"""Agent graph construction for Hive Coder."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from framework.graph import Constraint, Goal, SuccessCriterion
|
||||
from framework.graph.checkpoint_config import CheckpointConfig
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import ExecutionResult
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
from .config import default_config, metadata
|
||||
from .nodes import coder_node
|
||||
|
||||
# Goal definition
|
||||
goal = Goal(
|
||||
id="agent-builder",
|
||||
name="Hive Agent Builder",
|
||||
description=(
|
||||
"Build complete, validated Hive agent packages from natural language "
|
||||
"specifications. Produces production-ready Python packages with goals, "
|
||||
"nodes, edges, system prompts, MCP configuration, and tests."
|
||||
),
|
||||
success_criteria=[
|
||||
SuccessCriterion(
|
||||
id="valid-package",
|
||||
description="Generated agent package passes structural validation",
|
||||
metric="validation_pass",
|
||||
target="true",
|
||||
weight=0.30,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="complete-files",
|
||||
description=(
|
||||
"All required files generated: agent.py, config.py, "
|
||||
"nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
|
||||
),
|
||||
metric="file_count",
|
||||
target=">=6",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="user-satisfaction",
|
||||
description="User reviews and approves the generated agent",
|
||||
metric="user_approval",
|
||||
target="true",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="framework-compliance",
|
||||
description=(
|
||||
"Generated code follows framework patterns: STEP 1/STEP 2 "
|
||||
"for client-facing, correct imports, entry_points format"
|
||||
),
|
||||
metric="pattern_compliance",
|
||||
target="100%",
|
||||
weight=0.20,
|
||||
),
|
||||
],
|
||||
constraints=[
|
||||
Constraint(
|
||||
id="dynamic-tool-discovery",
|
||||
description=(
|
||||
"Always discover available tools dynamically via "
|
||||
"discover_mcp_tools before referencing tools in agent designs"
|
||||
),
|
||||
constraint_type="hard",
|
||||
category="correctness",
|
||||
),
|
||||
Constraint(
|
||||
id="no-fabricated-tools",
|
||||
description="Only reference tools that exist in hive-tools MCP",
|
||||
constraint_type="hard",
|
||||
category="correctness",
|
||||
),
|
||||
Constraint(
|
||||
id="valid-python",
|
||||
description="All generated Python files must be syntactically correct",
|
||||
constraint_type="hard",
|
||||
category="correctness",
|
||||
),
|
||||
Constraint(
|
||||
id="self-verification",
|
||||
description="Run validation after writing code; fix errors before presenting",
|
||||
constraint_type="hard",
|
||||
category="quality",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Nodes — single coder node (guardian is now auto-attached by the framework)
|
||||
nodes = [coder_node]
|
||||
|
||||
# No edges needed — single forever-alive event_loop node
|
||||
edges = []
|
||||
|
||||
# Graph configuration
|
||||
entry_node = "coder"
|
||||
entry_points = {"start": "coder"}
|
||||
pause_nodes = []
|
||||
terminal_nodes = [] # Forever-alive: loops until user exits
|
||||
|
||||
# No async entry points — guardian is now auto-attached via attach_guardian()
|
||||
async_entry_points = []
|
||||
|
||||
# Module-level variables read by AgentRunner.load()
|
||||
conversation_mode = "continuous"
|
||||
identity_prompt = (
|
||||
"You are Hive Coder, the best agent-building coding agent on the planet. "
|
||||
"You deeply understand the Hive agent framework at the source code level "
|
||||
"and produce production-ready agent packages from natural language. "
|
||||
"You can dynamically discover available framework tools, inspect runtime "
|
||||
"sessions and checkpoints from agents you build, and run their test suites. "
|
||||
"You follow coding agent discipline: read before writing, verify "
|
||||
"assumptions by reading actual code, adhere to project conventions, "
|
||||
"self-verify with validation, and fix your own errors. You are concise, "
|
||||
"direct, and technically rigorous. No emojis. No fluff."
|
||||
)
|
||||
loop_config = {
|
||||
"max_iterations": 100,
|
||||
"max_tool_calls_per_turn": 20,
|
||||
"max_history_tokens": 32000,
|
||||
}
|
||||
|
||||
|
||||
class HiveCoderAgent:
|
||||
"""
|
||||
Hive Coder — builds Hive agent packages from natural language.
|
||||
|
||||
Single-node architecture: the coder runs in a continuous while(true) loop.
|
||||
The guardian watchdog is auto-attached by the framework in TUI mode.
|
||||
"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
self.config = config or default_config
|
||||
self.goal = goal
|
||||
self.nodes = nodes
|
||||
self.edges = edges
|
||||
self.entry_node = entry_node
|
||||
self.entry_points = entry_points
|
||||
self.pause_nodes = pause_nodes
|
||||
self.terminal_nodes = terminal_nodes
|
||||
self.async_entry_points = async_entry_points
|
||||
self._graph: GraphSpec | None = None
|
||||
self._agent_runtime: AgentRuntime | None = None
|
||||
self._tool_registry: ToolRegistry | None = None
|
||||
self._storage_path: Path | None = None
|
||||
|
||||
def _build_graph(self) -> GraphSpec:
|
||||
"""Build the GraphSpec."""
|
||||
return GraphSpec(
|
||||
id="hive-coder-graph",
|
||||
goal_id=self.goal.id,
|
||||
version="1.0.0",
|
||||
entry_node=self.entry_node,
|
||||
entry_points=self.entry_points,
|
||||
terminal_nodes=self.terminal_nodes,
|
||||
pause_nodes=self.pause_nodes,
|
||||
nodes=self.nodes,
|
||||
edges=self.edges,
|
||||
default_model=self.config.model,
|
||||
max_tokens=self.config.max_tokens,
|
||||
loop_config=loop_config,
|
||||
conversation_mode=conversation_mode,
|
||||
identity_prompt=identity_prompt,
|
||||
async_entry_points=self.async_entry_points,
|
||||
)
|
||||
|
||||
def _setup(self, mock_mode=False) -> None:
|
||||
"""Set up the agent runtime."""
|
||||
self._storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
|
||||
self._storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._tool_registry = ToolRegistry()
|
||||
|
||||
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_config_path.exists():
|
||||
self._tool_registry.load_mcp_config(mcp_config_path)
|
||||
|
||||
llm = None
|
||||
if not mock_mode:
|
||||
llm = LiteLLMProvider(
|
||||
model=self.config.model,
|
||||
api_key=self.config.api_key,
|
||||
api_base=self.config.api_base,
|
||||
)
|
||||
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
|
||||
self._graph = self._build_graph()
|
||||
|
||||
checkpoint_config = CheckpointConfig(
|
||||
enabled=True,
|
||||
checkpoint_on_node_start=False,
|
||||
checkpoint_on_node_complete=True,
|
||||
checkpoint_max_age_days=7,
|
||||
async_checkpoint=True,
|
||||
)
|
||||
|
||||
entry_point_specs = [
|
||||
EntryPointSpec(
|
||||
id="default",
|
||||
name="Default",
|
||||
entry_node=self.entry_node,
|
||||
trigger_type="manual",
|
||||
isolation_level="shared",
|
||||
),
|
||||
]
|
||||
|
||||
self._agent_runtime = create_agent_runtime(
|
||||
graph=self._graph,
|
||||
goal=self.goal,
|
||||
storage_path=self._storage_path,
|
||||
entry_points=entry_point_specs,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
checkpoint_config=checkpoint_config,
|
||||
graph_id="hive_coder",
|
||||
)
|
||||
|
||||
async def start(self, mock_mode=False) -> None:
|
||||
"""Set up and start the agent runtime."""
|
||||
if self._agent_runtime is None:
|
||||
self._setup(mock_mode=mock_mode)
|
||||
if not self._agent_runtime.is_running:
|
||||
await self._agent_runtime.start()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the agent runtime and clean up."""
|
||||
if self._agent_runtime and self._agent_runtime.is_running:
|
||||
await self._agent_runtime.stop()
|
||||
self._agent_runtime = None
|
||||
|
||||
async def trigger_and_wait(
|
||||
self,
|
||||
entry_point: str = "default",
|
||||
input_data: dict | None = None,
|
||||
timeout: float | None = None,
|
||||
session_state: dict | None = None,
|
||||
) -> ExecutionResult | None:
|
||||
"""Execute the graph and wait for completion."""
|
||||
if self._agent_runtime is None:
|
||||
raise RuntimeError("Agent not started. Call start() first.")
|
||||
|
||||
return await self._agent_runtime.trigger_and_wait(
|
||||
entry_point_id=entry_point,
|
||||
input_data=input_data or {},
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
|
||||
"""Run the agent (convenience method for single execution)."""
|
||||
await self.start(mock_mode=mock_mode)
|
||||
try:
|
||||
result = await self.trigger_and_wait("default", context, session_state=session_state)
|
||||
return result or ExecutionResult(success=False, error="Execution timeout")
|
||||
finally:
|
||||
await self.stop()
|
||||
|
||||
def info(self):
|
||||
"""Get agent information."""
|
||||
return {
|
||||
"name": metadata.name,
|
||||
"version": metadata.version,
|
||||
"description": metadata.description,
|
||||
"goal": {
|
||||
"name": self.goal.name,
|
||||
"description": self.goal.description,
|
||||
},
|
||||
"nodes": [n.id for n in self.nodes],
|
||||
"edges": [e.id for e in self.edges],
|
||||
"entry_node": self.entry_node,
|
||||
"entry_points": self.entry_points,
|
||||
"pause_nodes": self.pause_nodes,
|
||||
"terminal_nodes": self.terminal_nodes,
|
||||
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
|
||||
}
|
||||
|
||||
def validate(self):
|
||||
"""Validate agent structure."""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
node_ids = {node.id for node in self.nodes}
|
||||
for edge in self.edges:
|
||||
if edge.source not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
|
||||
if edge.target not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
|
||||
|
||||
if self.entry_node not in node_ids:
|
||||
errors.append(f"Entry node '{self.entry_node}' not found")
|
||||
|
||||
for terminal in self.terminal_nodes:
|
||||
if terminal not in node_ids:
|
||||
errors.append(f"Terminal node '{terminal}' not found")
|
||||
|
||||
for ep_id, node_id in self.entry_points.items():
|
||||
if node_id not in node_ids:
|
||||
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
|
||||
|
||||
return {
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
|
||||
# Create default instance
|
||||
default_agent = HiveCoderAgent()
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Runtime configuration for Hive Coder agent."""
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_preferred_model() -> str:
|
||||
"""Load preferred model from ~/.hive/configuration.json."""
|
||||
config_path = Path.home() / ".hive" / "configuration.json"
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
llm = config.get("llm", {})
|
||||
if llm.get("provider") and llm.get("model"):
|
||||
return f"{llm['provider']}/{llm['model']}"
|
||||
except Exception:
|
||||
pass
|
||||
return "anthropic/claude-sonnet-4-20250514"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RuntimeConfig:
|
||||
model: str = field(default_factory=_load_preferred_model)
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = 40000
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
|
||||
|
||||
default_config = RuntimeConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentMetadata:
|
||||
name: str = "Hive Coder"
|
||||
version: str = "1.0.0"
|
||||
description: str = (
|
||||
"Native coding agent that builds production-ready Hive agent packages "
|
||||
"from natural language specifications. Deeply understands the agent framework "
|
||||
"and produces complete Python packages with goals, nodes, edges, system prompts, "
|
||||
"MCP configuration, and tests."
|
||||
)
|
||||
intro_message: str = (
|
||||
"I'm Hive Coder — I build Hive agents. Describe what kind of agent "
|
||||
"you want to create and I'll design, implement, and validate it for you."
|
||||
)
|
||||
|
||||
|
||||
metadata = AgentMetadata()
|
||||
@@ -0,0 +1,96 @@
|
||||
"""Attach the Hive Coder's guardian node to any agent runtime.
|
||||
|
||||
Usage::
|
||||
|
||||
from framework.agents.hive_coder.guardian import attach_guardian
|
||||
|
||||
runner._setup()
|
||||
attach_guardian(runner._agent_runtime, runner._tool_registry)
|
||||
await runner._agent_runtime.start()
|
||||
|
||||
Must be called **before** ``runtime.start()`` — it injects the
|
||||
guardian node into the graph and registers an event-driven entry point.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
from .nodes import ALL_GUARDIAN_TOOLS, guardian_node
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GUARDIAN_ENTRY_POINT = EntryPointSpec(
|
||||
id="guardian",
|
||||
name="Agent Guardian",
|
||||
entry_node="guardian",
|
||||
trigger_type="event",
|
||||
trigger_config={
|
||||
"event_types": [
|
||||
"execution_failed",
|
||||
"node_stalled",
|
||||
"node_tool_doom_loop",
|
||||
"constraint_violation",
|
||||
],
|
||||
"exclude_own_graph": False,
|
||||
},
|
||||
isolation_level="shared",
|
||||
)
|
||||
|
||||
|
||||
def attach_guardian(
|
||||
runtime: AgentRuntime,
|
||||
tool_registry: ToolRegistry,
|
||||
) -> None:
|
||||
"""Inject hive_coder's guardian node into *runtime*'s graph.
|
||||
|
||||
1. Registers graph lifecycle tools if not already present.
|
||||
2. Refreshes the runtime's tool list and executor.
|
||||
3. Adds the guardian node (with dynamically filtered tools) to the graph.
|
||||
4. Registers an event-driven entry point that fires on execution failures,
|
||||
stalls, tool doom loops, and constraint violations.
|
||||
|
||||
Must be called **before** ``runtime.start()``.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the runtime is already running.
|
||||
"""
|
||||
from framework.tools.session_graph_tools import register_graph_tools
|
||||
|
||||
# 1. Register graph lifecycle tools if not already present
|
||||
if not tool_registry.has_tool("load_agent"):
|
||||
register_graph_tools(tool_registry, runtime)
|
||||
|
||||
# 2. Refresh tool schemas and executor on the runtime
|
||||
runtime._tools = list(tool_registry.get_tools().values())
|
||||
runtime._tool_executor = tool_registry.get_executor()
|
||||
|
||||
# 3. Filter guardian tools to only those available in the registry
|
||||
available = set(tool_registry.get_tools().keys())
|
||||
filtered_tools = [t for t in ALL_GUARDIAN_TOOLS if t in available]
|
||||
|
||||
# Build guardian node with filtered tool list
|
||||
node = guardian_node.model_copy(update={"tools": filtered_tools})
|
||||
|
||||
# Add to the runtime's graph (so register_entry_point validation passes)
|
||||
runtime.graph.nodes.append(node)
|
||||
|
||||
# Mark guardian as reachable in graph-level entry_points so
|
||||
# GraphSpec.validate() doesn't flag it as unreachable.
|
||||
runtime.graph.entry_points["guardian"] = "guardian"
|
||||
|
||||
# 4. Register event-driven entry point
|
||||
runtime.register_entry_point(GUARDIAN_ENTRY_POINT)
|
||||
|
||||
logger.info(
|
||||
"Guardian attached with %d tools: %s",
|
||||
len(filtered_tools),
|
||||
filtered_tools,
|
||||
)
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"coder-tools": {
|
||||
"transport": "stdio",
|
||||
"command": "uv",
|
||||
"args": ["run", "python", "coder_tools_server.py", "--stdio"],
|
||||
"cwd": "../../../../tools",
|
||||
"description": "Unsandboxed file system tools for code generation and validation"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,556 @@
|
||||
"""Node definitions for Hive Coder agent."""
|
||||
|
||||
from framework.graph import NodeSpec
|
||||
|
||||
# Single node — like opencode's while(true) loop.
|
||||
# One continuous context handles the entire workflow:
|
||||
# discover → design → implement → verify → present → iterate.
|
||||
coder_node = NodeSpec(
|
||||
id="coder",
|
||||
name="Hive Coder",
|
||||
description=(
|
||||
"Autonomous coding agent that builds Hive agent packages. "
|
||||
"Handles the full lifecycle: understanding user intent, "
|
||||
"designing architecture, writing code, validating, and "
|
||||
"iterating on feedback — all in one continuous conversation."
|
||||
),
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=0,
|
||||
input_keys=["user_request"],
|
||||
output_keys=["agent_name", "validation_result"],
|
||||
success_criteria=(
|
||||
"A complete, validated Hive agent package exists at "
|
||||
"exports/{agent_name}/ and passes structural validation."
|
||||
),
|
||||
system_prompt="""\
|
||||
You are Hive Coder, the best agent-building coding agent. You build \
|
||||
production-ready Hive agent packages from natural language.
|
||||
|
||||
# Core Mandates
|
||||
|
||||
- **Read before writing.** NEVER write code from assumptions. Read \
|
||||
reference agents and templates first. Read every file before editing.
|
||||
- **Conventions first.** Follow existing project patterns exactly. \
|
||||
Analyze imports, structure, and style in reference agents.
|
||||
- **Verify assumptions.** Never assume a class, import, or pattern \
|
||||
exists. Read actual source to confirm. Search if unsure.
|
||||
- **Discover tools dynamically.** NEVER reference tools from static \
|
||||
docs. Always run discover_mcp_tools() to see what actually exists.
|
||||
- **Professional objectivity.** If a use case is a poor fit for the \
|
||||
framework, say so. Technical accuracy over validation.
|
||||
- **Concise.** No emojis. No preambles. No postambles. Substance only.
|
||||
- **Self-verify.** After writing code, run validation and tests. Fix \
|
||||
errors yourself. Don't declare success until validation passes.
|
||||
|
||||
# Tools
|
||||
|
||||
## File I/O
|
||||
- read_file(path, offset?, limit?) — read with line numbers
|
||||
- write_file(path, content) — create/overwrite, auto-mkdir
|
||||
- edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
|
||||
- list_directory(path, recursive?) — list contents
|
||||
- search_files(pattern, path?, include?) — regex search
|
||||
- run_command(command, cwd?, timeout?) — shell execution
|
||||
- undo_changes(path?) — restore from git snapshot
|
||||
|
||||
## Meta-Agent
|
||||
- discover_mcp_tools(server_config_path?) — connect to MCP servers \
|
||||
and list all available tools with full schemas. Default: hive-tools.
|
||||
- list_agents() — list all agent packages in exports/ with session counts
|
||||
- list_agent_sessions(agent_name, status?, limit?) — list sessions
|
||||
- get_agent_session_state(agent_name, session_id) — full session state
|
||||
- get_agent_session_memory(agent_name, session_id, key?) — memory data
|
||||
- list_agent_checkpoints(agent_name, session_id) — list checkpoints
|
||||
- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) — load checkpoint
|
||||
- run_agent_tests(agent_name, test_types?, fail_fast?) — run pytest with parsing
|
||||
|
||||
# Meta-Agent Capabilities
|
||||
|
||||
You are not just a file writer. You have deep integration with the \
|
||||
Hive framework:
|
||||
|
||||
## Tool Discovery (MANDATORY before designing)
|
||||
Before designing any agent, run discover_mcp_tools() to see what \
|
||||
tools are actually available from the hive-tools MCP server. This \
|
||||
returns full schemas with parameter names, types, and descriptions. \
|
||||
NEVER guess tool names or parameters from memory. The tool catalog \
|
||||
is the ground truth.
|
||||
|
||||
To check a specific agent's tools:
|
||||
discover_mcp_tools("exports/{agent_name}/mcp_servers.json")
|
||||
|
||||
## Agent Awareness
|
||||
Run list_agents() to see what agents already exist. Read their code \
|
||||
for patterns:
|
||||
read_file("exports/{name}/agent.py")
|
||||
read_file("exports/{name}/nodes/__init__.py")
|
||||
|
||||
## Post-Build Testing
|
||||
After writing agent code, validate structurally AND run tests:
|
||||
run_command("python -c 'from {name} import default_agent; \\
|
||||
print(default_agent.validate())'")
|
||||
run_agent_tests("{name}")
|
||||
|
||||
## Debugging Built Agents
|
||||
When a user says "my agent is failing" or "debug this agent":
|
||||
1. list_agent_sessions("{agent_name}") — find the session
|
||||
2. get_agent_session_state("{agent_name}", "{session_id}") — see status
|
||||
3. get_agent_session_memory("{agent_name}", "{session_id}") — inspect data
|
||||
4. list_agent_checkpoints / get_agent_checkpoint — trace execution
|
||||
|
||||
# Workflow
|
||||
|
||||
You operate in a continuous loop. The user describes what they want, \
|
||||
you build it. No rigid phases — use judgment. But the general flow is:
|
||||
|
||||
## 1. Understand
|
||||
|
||||
When the user describes what they want to build, hear the structure:
|
||||
- The actors, the trigger, the core loop, the output, the pain.
|
||||
|
||||
Play back a model: "Here's what I'm picturing: [concrete picture]. \
|
||||
Before I start — [1-2 questions you can't infer]."
|
||||
|
||||
Ask only what you CANNOT infer. Fill blanks with domain knowledge.
|
||||
|
||||
## 2. Qualify
|
||||
|
||||
Assess framework fit honestly. Run discover_mcp_tools() to check \
|
||||
what tools exist. Read the framework guide:
|
||||
read_file("core/framework/agents/hive_coder/reference/framework_guide.md")
|
||||
|
||||
Consider:
|
||||
- What works well (multi-turn, HITL, tool orchestration)
|
||||
- Limitations (LLM latency, context limits, cost)
|
||||
- Deal-breakers (missing tools, wrong paradigm)
|
||||
|
||||
Give a clear recommendation: proceed, adjust scope, or reconsider.
|
||||
|
||||
## 3. Design
|
||||
|
||||
Design the agent architecture:
|
||||
- Goal: id, name, description, 3-5 success criteria, 2-4 constraints
|
||||
- Nodes: **2-4 nodes MAXIMUM** (see rules below)
|
||||
- Edges: on_success for linear, conditional for routing
|
||||
- Lifecycle: ALWAYS forever-alive (`terminal_nodes=[]`) unless the user \
|
||||
explicitly requests a one-shot/batch agent. Forever-alive agents loop \
|
||||
continuously — the user exits by closing the TUI. This is the standard \
|
||||
pattern for all interactive agents.
|
||||
|
||||
### Node Count Rules (HARD LIMITS)
|
||||
|
||||
**2-4 nodes** for all agents. Never exceed 4 unless the user explicitly \
|
||||
requests more. Each node boundary serializes outputs to shared memory \
|
||||
and DESTROYS all in-context information (tool results, reasoning, history).
|
||||
|
||||
**MERGE nodes when:**
|
||||
- Node has NO tools (pure LLM reasoning) → merge into predecessor/successor
|
||||
- Node sets only 1 trivial output → collapse into predecessor
|
||||
- Multiple consecutive autonomous nodes → combine into one rich node
|
||||
- A "report" or "summary" node → merge into the client-facing node
|
||||
- A "confirm" or "schedule" node that calls no external service → remove
|
||||
|
||||
**SEPARATE nodes only when:**
|
||||
- Client-facing vs autonomous (different interaction models)
|
||||
- Fundamentally different tool sets
|
||||
- Fan-out parallelism (parallel branches MUST be separate)
|
||||
|
||||
**Typical patterns:**
|
||||
- 2 nodes: `interact (client-facing) → process (autonomous) → interact`
|
||||
- 3 nodes: `intake (CF) → process (auto) → review (CF) → intake`
|
||||
- WRONG: 7 nodes where half have no tools and just do LLM reasoning
|
||||
|
||||
Read reference agents before designing:
|
||||
list_agents()
|
||||
read_file("exports/deep_research_agent/agent.py")
|
||||
read_file("exports/deep_research_agent/nodes/__init__.py")
|
||||
|
||||
Present the design with ASCII art graph. Get user approval.
|
||||
|
||||
## 4. Implement
|
||||
|
||||
Read templates before writing code:
|
||||
read_file("core/framework/agents/hive_coder/reference/file_templates.md")
|
||||
read_file("core/framework/agents/hive_coder/reference/anti_patterns.md")
|
||||
|
||||
Write files in order:
|
||||
1. mkdir -p exports/{name}/nodes exports/{name}/tests
|
||||
2. config.py — RuntimeConfig + AgentMetadata
|
||||
3. nodes/__init__.py — NodeSpec definitions with system prompts
|
||||
4. agent.py — Goal, edges, graph, agent class
|
||||
5. __init__.py — package exports
|
||||
6. __main__.py — CLI with click
|
||||
7. mcp_servers.json — tool server config
|
||||
8. tests/ — fixtures
|
||||
|
||||
### Critical Rules
|
||||
|
||||
**Imports** (must match exactly — only import what you use):
|
||||
```python
|
||||
from framework.graph import (
|
||||
NodeSpec, EdgeSpec, EdgeCondition,
|
||||
Goal, SuccessCriterion, Constraint,
|
||||
)
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import ExecutionResult
|
||||
from framework.graph.checkpoint_config import CheckpointConfig
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import (
|
||||
AgentRuntime, create_agent_runtime,
|
||||
)
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
```
|
||||
For agents with async entry points (timers, webhooks, events), also add:
|
||||
```python
|
||||
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
|
||||
from framework.runtime.agent_runtime import (
|
||||
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
|
||||
)
|
||||
```
|
||||
NEVER `from core.framework...` — PYTHONPATH includes core/.
|
||||
|
||||
**__init__.py MUST re-export ALL module-level variables** \
|
||||
(THIS IS THE #1 SOURCE OF AGENT LOAD FAILURES):
|
||||
The runner imports the package (__init__.py), NOT agent.py. It reads \
|
||||
goal, nodes, edges, entry_node, entry_points, pause_nodes, \
|
||||
terminal_nodes, conversation_mode, identity_prompt, loop_config via \
|
||||
getattr(). If ANY are missing from __init__.py, they silently default \
|
||||
to None or {} — causing "must define goal, nodes, edges" or "node X \
|
||||
is unreachable" errors. The __init__.py MUST import and re-export \
|
||||
ALL of these from .agent:
|
||||
```python
|
||||
from .agent import (
|
||||
MyAgent, default_agent, goal, nodes, edges,
|
||||
entry_node, entry_points, pause_nodes, terminal_nodes,
|
||||
conversation_mode, identity_prompt, loop_config,
|
||||
)
|
||||
```
|
||||
|
||||
**entry_points**: `{"start": "first-node-id"}`
|
||||
For agents with multiple entry points (e.g. a reminder trigger), \
|
||||
add them: `{"start": "intake", "reminder": "reminder"}`
|
||||
|
||||
**conversation_mode** — ONLY two valid values:
|
||||
- `"continuous"` — recommended for interactive agents (context carries \
|
||||
across node transitions)
|
||||
- Omit entirely — for isolated per-node conversations
|
||||
NEVER use: "client_facing", "interactive", "adaptive", or any other \
|
||||
value. These DO NOT EXIST.
|
||||
|
||||
**loop_config** — ONLY three valid keys:
|
||||
```python
|
||||
loop_config = {
|
||||
"max_iterations": 100,
|
||||
"max_tool_calls_per_turn": 20,
|
||||
"max_history_tokens": 32000,
|
||||
}
|
||||
```
|
||||
NEVER add: "strategy", "mode", "timeout", or other keys.
|
||||
|
||||
**mcp_servers.json**:
|
||||
```json
|
||||
{
|
||||
"hive-tools": {
|
||||
"transport": "stdio",
|
||||
"command": "uv",
|
||||
"args": ["run", "python", "mcp_server.py", "--stdio"],
|
||||
"cwd": "../../tools"
|
||||
}
|
||||
}
|
||||
```
|
||||
NO "mcpServers" wrapper. cwd "../../tools". command "uv".
|
||||
|
||||
**Storage**: `Path.home() / ".hive" / "agents" / "{name}"`
|
||||
|
||||
**Client-facing system prompts** — STEP 1/STEP 2 pattern:
|
||||
```
|
||||
STEP 1 — Present to user (text only, NO tool calls):
|
||||
[instructions]
|
||||
|
||||
STEP 2 — After user responds, call set_output:
|
||||
[set_output calls]
|
||||
```
|
||||
|
||||
**Autonomous system prompts** — set_output in SEPARATE turn.
|
||||
|
||||
**Tools** — NEVER fabricate tool names. Common hallucinations: \
|
||||
csv_read, csv_write, csv_append, file_upload, database_query. \
|
||||
If discover_mcp_tools() shows these don't exist, use alternatives \
|
||||
(e.g. save_data/load_data for data persistence).
|
||||
|
||||
**Node rules**:
|
||||
- **2-4 nodes MAX.** Never exceed 4. Merge thin nodes aggressively.
|
||||
- A node with 0 tools is NOT a real node — merge it.
|
||||
- node_type always "event_loop"
|
||||
- max_node_visits default is 0 (unbounded) — correct for forever-alive. \
|
||||
Only set >0 in one-shot agents with bounded feedback loops.
|
||||
- Feedback inputs: nullable_output_keys
|
||||
- terminal_nodes=[] for forever-alive (the default)
|
||||
- Every node MUST have at least one outgoing edge (no dead ends)
|
||||
- Agents are forever-alive unless user explicitly asks for one-shot
|
||||
|
||||
**Agent class**: CamelCase name, default_agent at module level. \
|
||||
Constructor takes `config=None`. Follow the exact pattern in \
|
||||
file_templates.md — do NOT invent constructor params like \
|
||||
`llm_provider` or `tool_registry`.
|
||||
|
||||
**Module-level variables** (read by AgentRunner.load()):
|
||||
goal, nodes, edges, entry_node, entry_points, pause_nodes,
|
||||
terminal_nodes, conversation_mode, identity_prompt, loop_config
|
||||
|
||||
For agents with async triggers, also export:
|
||||
async_entry_points, runtime_config
|
||||
|
||||
**Async entry points** (timers, webhooks, events):
|
||||
When an agent needs scheduled tasks, webhook reactions, or event-driven \
|
||||
triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \
|
||||
`AgentRuntimeConfig` (from framework.runtime.agent_runtime):
|
||||
- Timer (cron): `trigger_type="timer"`, \
|
||||
`trigger_config={"cron": "0 9 * * *"}` — standard 5-field cron expression \
|
||||
(e.g. `"0 9 * * MON-FRI"` weekdays 9am, `"*/30 * * * *"` every 30 min)
|
||||
- Timer (interval): `trigger_type="timer"`, \
|
||||
`trigger_config={"interval_minutes": 20, "run_immediately": False}`
|
||||
- Event (for webhooks): `trigger_type="event"`, \
|
||||
`trigger_config={"event_types": ["webhook_received"]}`
|
||||
- `isolation_level="shared"` so async runs can read primary session memory
|
||||
- `runtime_config = AgentRuntimeConfig(webhook_routes=[...])` for HTTP webhooks
|
||||
- Reference: `exports/gmail_inbox_guardian/agent.py`
|
||||
- Full docs: `core/framework/agents/hive_coder/reference/framework_guide.md` \
|
||||
(Async Entry Points section)
|
||||
|
||||
## 5. Verify
|
||||
|
||||
Run THREE validation steps after writing. All must pass:
|
||||
|
||||
**Step A — Class validation** (checks graph structure):
|
||||
```
|
||||
run_command("python -c 'from {name} import default_agent; \\
|
||||
print(default_agent.validate())'")
|
||||
```
|
||||
|
||||
**Step B — Runner load test** (checks package export contract — \
|
||||
THIS IS THE SAME PATH THE TUI USES):
|
||||
```
|
||||
run_command("python -c 'from framework.runner.runner import \\
|
||||
AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
|
||||
print(\"AgentRunner.load: OK\")'")
|
||||
```
|
||||
This catches missing __init__.py exports, bad conversation_mode, \
|
||||
invalid loop_config, and unreachable nodes. If Step A passes but \
|
||||
Step B fails, the problem is in __init__.py exports.
|
||||
|
||||
**Step C — Run tests:**
|
||||
```
|
||||
run_agent_tests("{name}")
|
||||
```
|
||||
|
||||
If anything fails: read error, fix with edit_file, re-validate. Up to 3x.
|
||||
|
||||
**CRITICAL: Testing forever-alive agents**
|
||||
Most agents use `terminal_nodes=[]` (forever-alive). This means \
|
||||
`runner.run()` NEVER returns — it hangs forever waiting for a \
|
||||
terminal node that doesn't exist. Agent tests MUST be structural:
|
||||
- Validate graph, node specs, edges, tools, prompts
|
||||
- Check goal/constraints/success criteria definitions
|
||||
- Test `AgentRunner.load()` + `_setup()` (skip if no API key)
|
||||
- NEVER call `runner.run()` or `trigger_and_wait()` in tests for \
|
||||
forever-alive agents — they will hang and time out.
|
||||
When you restructure an agent (change nodes/edges), always update \
|
||||
the tests to match. Stale tests referencing old node names will fail.
|
||||
|
||||
## 6. Present
|
||||
|
||||
Show the user what you built: agent name, goal summary, graph ASCII \
|
||||
art, files created, validation status. Offer to revise or build another.
|
||||
|
||||
After user confirms satisfaction:
|
||||
set_output("agent_name", "the_agent_name")
|
||||
set_output("validation_result", "valid")
|
||||
|
||||
If building another agent, just start the loop again — no need to \
|
||||
set_output until the user is done.
|
||||
|
||||
## 7. Live Test (optional)
|
||||
|
||||
After the user approves, offer to load and run the agent in-session. \
|
||||
This runs it alongside you, with the Agent Guardian watching for \
|
||||
failures automatically.
|
||||
|
||||
```
|
||||
load_agent("exports/{name}") # registers as secondary graph
|
||||
start_agent("{name}") # triggers default entry point
|
||||
```
|
||||
|
||||
If the agent fails, the guardian fires and triages. You can also:
|
||||
- `list_agents()` — see all loaded graphs and status
|
||||
- `restart_agent("{name}")` then `load_agent` — pick up code changes
|
||||
- `unload_agent("{name}")` — remove it from the session
|
||||
- `get_user_presence()` — check if user is around
|
||||
|
||||
The agent runs in a shared session: it can read memory you've set and \
|
||||
its outputs are visible to you. If the guardian escalates a failure, \
|
||||
you'll see the error and can fix the code, then reload.
|
||||
""",
|
||||
tools=[
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
"undo_changes",
|
||||
# Meta-agent tools
|
||||
"discover_mcp_tools",
|
||||
"list_agents",
|
||||
"list_agent_sessions",
|
||||
"get_agent_session_state",
|
||||
"get_agent_session_memory",
|
||||
"list_agent_checkpoints",
|
||||
"get_agent_checkpoint",
|
||||
"run_agent_tests",
|
||||
# Graph lifecycle tools (multi-graph sessions)
|
||||
"load_agent",
|
||||
"unload_agent",
|
||||
"start_agent",
|
||||
"restart_agent",
|
||||
"get_user_presence",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
ALL_GUARDIAN_TOOLS = [
|
||||
# File I/O — available when the agent has hive-tools MCP
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"search_files",
|
||||
"run_command",
|
||||
# Graph lifecycle — registered by attach_guardian()
|
||||
"load_agent",
|
||||
"unload_agent",
|
||||
"start_agent",
|
||||
"restart_agent",
|
||||
"get_user_presence",
|
||||
"list_agents",
|
||||
]
|
||||
|
||||
guardian_node = NodeSpec(
|
||||
id="guardian",
|
||||
name="Agent Guardian",
|
||||
description=(
|
||||
"Event-driven guardian that monitors supervised agent graphs. "
|
||||
"Triggers on failures, stalls, tool doom loops, and constraint "
|
||||
"violations. Assesses severity, checks user presence, and decides: "
|
||||
"ask the user (if present), attempt autonomous fix (if away), or "
|
||||
"escalate for post-mortem."
|
||||
),
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=0,
|
||||
input_keys=["event"],
|
||||
output_keys=["resolution"],
|
||||
nullable_output_keys=["resolution"],
|
||||
success_criteria=(
|
||||
"Failure is resolved — either by user guidance, autonomous fix, or documented escalation."
|
||||
),
|
||||
system_prompt="""\
|
||||
You are the Agent Guardian — a watchdog that monitors supervised agent \
|
||||
graphs. You fire on failures, stalls, doom loops, and constraint \
|
||||
violations. Your job: triage, fix, or escalate.
|
||||
|
||||
# Event Types
|
||||
|
||||
You trigger on these events:
|
||||
|
||||
## execution_failed
|
||||
The agent graph crashed — unhandled exception, LLM error, or tool failure.
|
||||
- Read the error message and stack trace from the event data.
|
||||
- Transient errors (rate limit, timeout, network): auto-retry via restart.
|
||||
- Config errors (bad API key, missing tool): needs user input.
|
||||
- Logic bugs (bad output, crash in code): read source, fix, reload.
|
||||
- Catastrophic (data corruption): escalate, unload the agent.
|
||||
|
||||
## node_stalled
|
||||
A node has been running too long without producing output. The LLM may \
|
||||
be stuck in a reasoning loop, waiting for input that won't come, or \
|
||||
the tool call is hanging.
|
||||
- Check what node is stalled and how long it's been running.
|
||||
- If the node is autonomous: restart the agent to break the stall.
|
||||
- If the node is client-facing: check user presence — the user may \
|
||||
have left. Alert them or restart after a timeout.
|
||||
- If a tool call is hanging: the MCP server may be down. Restart.
|
||||
|
||||
## node_tool_doom_loop
|
||||
The LLM is calling the same tools repeatedly without making progress. \
|
||||
This usually means the prompt is inadequate, the tool is returning \
|
||||
unhelpful errors, or the LLM is stuck in a retry loop.
|
||||
- Identify which tool is looping and what errors it's returning.
|
||||
- If it's a transient tool error: restart to reset context.
|
||||
- If it's a prompt/logic issue: read the node's source, fix the \
|
||||
system prompt or tool configuration, then reload and restart.
|
||||
- If the tool itself is broken: unload and escalate.
|
||||
|
||||
## constraint_violation
|
||||
The agent violated a defined constraint (e.g., token budget exceeded, \
|
||||
forbidden action attempted, output format invalid).
|
||||
- Read which constraint was violated from the event data.
|
||||
- Soft constraints (budget warning): log and notify user.
|
||||
- Hard constraints (forbidden action): halt the agent immediately, \
|
||||
escalate to user.
|
||||
- Format violations: may be fixable by restarting with better context.
|
||||
|
||||
# Decision Protocol
|
||||
|
||||
1. **Identify the event type** and read the event data carefully.
|
||||
|
||||
2. **Assess severity:**
|
||||
- Transient / auto-recoverable -> auto-retry
|
||||
- Configuration / environment -> needs user input
|
||||
- Logic bug / prompt issue -> needs code fix
|
||||
- Catastrophic / safety -> escalate immediately
|
||||
|
||||
3. **Check user presence.** Call get_user_presence().
|
||||
- **present** (idle < 2 min): Ask the user for guidance. Present the \
|
||||
issue clearly and suggest options.
|
||||
- **idle** (2-10 min): Attempt autonomous fix first. If it fails, \
|
||||
queue a notification for when user returns.
|
||||
- **away** (> 10 min) or **never_seen**: Attempt autonomous fix. \
|
||||
Save escalation log via write_file if fix fails.
|
||||
|
||||
4. **Act.**
|
||||
- Auto-retry: restart_agent(graph_id), then start_agent.
|
||||
- Config issues: if user present, ask. If away, log and wait.
|
||||
- Code fixes: read source, fix with edit_file, restart_agent.
|
||||
- Escalation: save detailed log, unload the agent.
|
||||
|
||||
# Tools
|
||||
|
||||
- get_user_presence() -- check if user is active
|
||||
- list_agents() -- see loaded graphs and status
|
||||
- load_agent(path) -- load an agent graph
|
||||
- unload_agent(graph_id) -- remove a graph
|
||||
- start_agent(graph_id, entry_point, input_data) -- trigger execution
|
||||
- restart_agent(graph_id) -- unload for reload
|
||||
- read_file, write_file, edit_file -- inspect/fix agent source code \
|
||||
(available when the agent's MCP server provides them)
|
||||
- run_command -- run shell commands (available when provided by MCP)
|
||||
|
||||
# Rules
|
||||
|
||||
- Be concise. State the event type, your assessment, and your action.
|
||||
- If asking the user, present the issue and 2-3 concrete options.
|
||||
- After a fix attempt, verify it works before declaring success.
|
||||
- For doom loops and stalls, prefer restart first — it's the cheapest fix.
|
||||
- set_output("resolution", "...") only after the issue is resolved or \
|
||||
escalated. Use a brief description: "auto-fixed: retry after timeout", \
|
||||
"escalated: missing API key", "user-resolved: updated config", \
|
||||
"auto-fixed: restarted stalled node", "escalated: doom loop in tool X".
|
||||
""",
|
||||
# Placeholder — attach_guardian() replaces with filtered list at runtime
|
||||
tools=ALL_GUARDIAN_TOOLS,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["coder_node", "guardian_node", "ALL_GUARDIAN_TOOLS"]
|
||||
@@ -0,0 +1,107 @@
|
||||
# Common Mistakes When Building Hive Agents
|
||||
|
||||
## Critical Errors
|
||||
|
||||
1. **Using tools that don't exist** — Always verify tools are available in the hive-tools MCP server before assigning them to nodes. Never guess tool names.
|
||||
|
||||
2. **Wrong entry_points format** — MUST be `{"start": "first-node-id"}`. NOT a set, NOT `{node_id: [keys]}`.
|
||||
|
||||
3. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
|
||||
|
||||
4. **Missing STEP 1/STEP 2 in client-facing prompts** — Without explicit phases, the LLM calls set_output before the user responds. Always use the pattern.
|
||||
|
||||
5. **Forgetting nullable_output_keys** — When a node receives inputs from multiple edges and some inputs only arrive on certain edges (e.g., feedback), mark those as nullable. Without this, the executor blocks waiting for a value that will never arrive.
|
||||
|
||||
6. **Creating dead-end nodes in forever-alive graphs** — Every node must have at least one outgoing edge. A node with no outgoing edges ends the execution, breaking the loop.
|
||||
|
||||
7. **Setting max_node_visits to a non-zero value in forever-alive agents** — The framework default is `max_node_visits=0` (unbounded). Setting it to any positive value (e.g., 1) means the node stops executing after that many visits, silently breaking the forever-alive loop. Only set `max_node_visits > 0` in one-shot agents with feedback loops that need bounded retries.
|
||||
|
||||
7. **Missing module-level exports in `__init__.py`** — The runner loads agents via `importlib.import_module(package_name)`, which imports `__init__.py`. It then reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `pause_nodes`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. If ANY of these are missing from `__init__.py`, they default to `None` or `{}` — causing "must define goal, nodes, edges" errors or "node X is unreachable" validation failures. **ALL module-level variables from agent.py must be re-exported in `__init__.py`.**
|
||||
|
||||
## Value Errors
|
||||
|
||||
8. **Invalid `conversation_mode` value** — Only two valid values: `"continuous"` (recommended for interactive agents) or omit entirely (for isolated per-node conversations). Values like `"client_facing"`, `"interactive"`, `"adaptive"` do NOT exist and will cause runtime errors.
|
||||
|
||||
9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.
|
||||
|
||||
10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `discover_mcp_tools()`. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
|
||||
|
||||
## Design Errors
|
||||
|
||||
11. **Too many thin nodes** — Hard limit: **2-4 nodes** for most agents. Each node boundary serializes outputs to shared memory and loses all in-context information (tool results, intermediate reasoning, conversation history). A node with 0 tools that just does LLM reasoning is NOT a real node — merge it into its predecessor or successor.
|
||||
|
||||
**Merge when:**
|
||||
- Node has NO tools — pure LLM reasoning belongs in the node that produces or consumes its data
|
||||
- Node sets only 1 trivial output (e.g., `set_output("done", "true")`) — collapse into predecessor
|
||||
- Multiple consecutive autonomous nodes with same/similar tools — combine into one
|
||||
- A "report" or "summary" node that just presents analysis — merge into the client-facing node
|
||||
- A "schedule" or "confirm" node that doesn't actually schedule anything — remove entirely
|
||||
|
||||
**Keep separate when:**
|
||||
- Client-facing vs autonomous — different interaction models require separate nodes
|
||||
- Fundamentally different tool sets (e.g., web search vs file I/O)
|
||||
- Fan-out parallelism — parallel branches MUST be separate nodes
|
||||
|
||||
**Bad example** (7 nodes — WAY too many):
|
||||
```
|
||||
profile_setup → daily_intake → update_tracker → analyze_progress → generate_plan → schedule_reminders → report
|
||||
```
|
||||
`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
|
||||
|
||||
**Good example** (3 nodes):
|
||||
```
|
||||
intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back)
|
||||
```
|
||||
One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved.
|
||||
|
||||
12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
|
||||
|
||||
13. **Not using continuous conversation mode** — Interactive agents should use `conversation_mode="continuous"`. Without it, each node starts with blank context.
|
||||
|
||||
14. **Adding terminal nodes by default** — ALL agents should use `terminal_nodes=[]` (forever-alive) unless the user explicitly requests a one-shot/batch agent. Forever-alive is the standard pattern. Every node must have at least one outgoing edge. Dead-end nodes break the loop.
|
||||
|
||||
15. **Calling set_output in same turn as tool calls** — Instruct the LLM to call set_output in a SEPARATE turn from real tool calls.
|
||||
|
||||
## File Template Errors
|
||||
|
||||
16. **Wrong import paths** — Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`. The PYTHONPATH includes `core/`.
|
||||
|
||||
17. **Missing storage path** — Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
|
||||
|
||||
18. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
|
||||
|
||||
19. **Bare `python` command in mcp_servers.json** — Use `"command": "uv"` with args `["run", "python", ...]`.
|
||||
|
||||
## Testing Errors
|
||||
|
||||
20. **Using `runner.run()` on forever-alive agents** — `runner.run()` calls `trigger_and_wait()` which blocks until the graph reaches a terminal node. Forever-alive agents have `terminal_nodes=[]`, so **`runner.run()` hangs forever**. This is the #1 cause of stuck test suites.
|
||||
|
||||
**For forever-alive agents, write structural tests instead:**
|
||||
- Validate graph structure (nodes, edges, entry points)
|
||||
- Verify node specs (tools, prompts, client-facing flag)
|
||||
- Check goal/constraints/success criteria definitions
|
||||
- Test that `AgentRunner.load()` + `_setup()` succeeds (skip if no API key)
|
||||
|
||||
**What NOT to do:**
|
||||
```python
|
||||
# WRONG — hangs forever on forever-alive agents
|
||||
result = await runner.run({"topic": "quantum computing"})
|
||||
```
|
||||
|
||||
**Correct pattern for structure tests:**
|
||||
```python
|
||||
def test_research_has_web_tools(self):
|
||||
assert "web_search" in research_node.tools
|
||||
|
||||
def test_research_routes_back_to_interact(self):
|
||||
edges_to_interact = [e for e in edges if e.source == "research" and e.target == "interact"]
|
||||
assert edges_to_interact
|
||||
```
|
||||
|
||||
21. **Stale tests after agent restructuring** — When you change an agent's node count or names (e.g., 4 nodes → 2 nodes), the tests MUST be updated too. Tests referencing old node names (e.g., `"review"`, `"report"`) will fail or hang. Always check that test assertions match the current `nodes/__init__.py`.
|
||||
|
||||
22. **Running full integration tests without API keys** — Structural tests (validate, import) work without keys. Full integration tests need `ANTHROPIC_API_KEY`. Use `pytest.skip()` in the runner fixture when `_setup()` fails due to missing credentials.
|
||||
|
||||
23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
|
||||
|
||||
24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
|
||||
@@ -0,0 +1,597 @@
|
||||
# Agent File Templates
|
||||
|
||||
Complete code templates for each file in a Hive agent package.
|
||||
|
||||
## config.py
|
||||
|
||||
```python
|
||||
"""Runtime configuration."""
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_preferred_model() -> str:
|
||||
"""Load preferred model from ~/.hive/configuration.json."""
|
||||
config_path = Path.home() / ".hive" / "configuration.json"
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
llm = config.get("llm", {})
|
||||
if llm.get("provider") and llm.get("model"):
|
||||
return f"{llm['provider']}/{llm['model']}"
|
||||
except Exception:
|
||||
pass
|
||||
return "anthropic/claude-sonnet-4-20250514"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RuntimeConfig:
|
||||
model: str = field(default_factory=_load_preferred_model)
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = 40000
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
|
||||
|
||||
default_config = RuntimeConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentMetadata:
|
||||
name: str = "My Agent Name"
|
||||
version: str = "1.0.0"
|
||||
description: str = "What this agent does."
|
||||
intro_message: str = "Welcome! What would you like me to do?"
|
||||
|
||||
|
||||
metadata = AgentMetadata()
|
||||
```
|
||||
|
||||
## nodes/__init__.py
|
||||
|
||||
```python
|
||||
"""Node definitions for My Agent."""
|
||||
|
||||
from framework.graph import NodeSpec
|
||||
|
||||
# Node 1: Intake (client-facing)
|
||||
intake_node = NodeSpec(
|
||||
id="intake",
|
||||
name="Intake",
|
||||
description="Gather requirements from the user",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=0, # Unlimited for forever-alive
|
||||
input_keys=["topic"],
|
||||
output_keys=["brief"],
|
||||
success_criteria="The brief is specific and actionable.",
|
||||
system_prompt="""\
|
||||
You are an intake specialist.
|
||||
|
||||
**STEP 1 — Read and respond (text only, NO tool calls):**
|
||||
1. Read the topic provided
|
||||
2. If vague, ask 1-2 clarifying questions
|
||||
3. If clear, confirm your understanding
|
||||
|
||||
**STEP 2 — After the user confirms, call set_output:**
|
||||
- set_output("brief", "Clear description of what to do")
|
||||
""",
|
||||
tools=[],
|
||||
)
|
||||
|
||||
# Node 2: Worker (autonomous)
|
||||
worker_node = NodeSpec(
|
||||
id="worker",
|
||||
name="Worker",
|
||||
description="Do the main work",
|
||||
node_type="event_loop",
|
||||
max_node_visits=0,
|
||||
input_keys=["brief", "feedback"],
|
||||
output_keys=["results"],
|
||||
nullable_output_keys=["feedback"], # Only on feedback edge
|
||||
success_criteria="Results are complete and accurate.",
|
||||
system_prompt="""\
|
||||
You are a worker agent. Given a brief, do the work.
|
||||
|
||||
If feedback is provided, this is a follow-up — address the feedback.
|
||||
|
||||
Work in phases:
|
||||
1. Use tools to gather/process data
|
||||
2. Analyze results
|
||||
3. Call set_output for each key in a SEPARATE turn:
|
||||
- set_output("results", "structured results")
|
||||
""",
|
||||
tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
|
||||
)
|
||||
|
||||
# Node 3: Review (client-facing)
|
||||
review_node = NodeSpec(
|
||||
id="review",
|
||||
name="Review",
|
||||
description="Present results for user approval",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=0,
|
||||
input_keys=["results", "brief"],
|
||||
output_keys=["next_action", "feedback"],
|
||||
nullable_output_keys=["feedback"],
|
||||
success_criteria="User has reviewed and decided next steps.",
|
||||
system_prompt="""\
|
||||
Present the results to the user.
|
||||
|
||||
**STEP 1 — Present (text only, NO tool calls):**
|
||||
1. Summary of work done
|
||||
2. Key results
|
||||
3. Ask: satisfied, or want changes?
|
||||
|
||||
**STEP 2 — After user responds, call set_output:**
|
||||
- set_output("next_action", "new_topic") — if starting fresh
|
||||
- set_output("next_action", "revise") — if changes needed
|
||||
- set_output("feedback", "what to change") — only if revising
|
||||
""",
|
||||
tools=[],
|
||||
)
|
||||
|
||||
__all__ = ["intake_node", "worker_node", "review_node"]
|
||||
```
|
||||
|
||||
## agent.py
|
||||
|
||||
```python
|
||||
"""Agent graph construction for My Agent."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import ExecutionResult
|
||||
from framework.graph.checkpoint_config import CheckpointConfig
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
from .config import default_config, metadata
|
||||
from .nodes import intake_node, worker_node, review_node
|
||||
|
||||
# Goal definition
|
||||
goal = Goal(
|
||||
id="my-agent-goal",
|
||||
name="My Agent Goal",
|
||||
description="What this agent achieves.",
|
||||
success_criteria=[
|
||||
SuccessCriterion(id="sc-1", description="...", metric="...", target="...", weight=0.5),
|
||||
SuccessCriterion(id="sc-2", description="...", metric="...", target="...", weight=0.5),
|
||||
],
|
||||
constraints=[
|
||||
Constraint(id="c-1", description="...", constraint_type="hard", category="quality"),
|
||||
],
|
||||
)
|
||||
|
||||
# Node list
|
||||
nodes = [intake_node, worker_node, review_node]
|
||||
|
||||
# Edge definitions
|
||||
edges = [
|
||||
EdgeSpec(id="intake-to-worker", source="intake", target="worker",
|
||||
condition=EdgeCondition.ON_SUCCESS, priority=1),
|
||||
EdgeSpec(id="worker-to-review", source="worker", target="review",
|
||||
condition=EdgeCondition.ON_SUCCESS, priority=1),
|
||||
# Feedback loop
|
||||
EdgeSpec(id="review-to-worker", source="review", target="worker",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="str(next_action).lower() == 'revise'", priority=2),
|
||||
# Loop back for new topic
|
||||
EdgeSpec(id="review-to-intake", source="review", target="intake",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="str(next_action).lower() == 'new_topic'", priority=1),
|
||||
]
|
||||
|
||||
# Graph configuration
|
||||
entry_node = "intake"
|
||||
entry_points = {"start": "intake"}
|
||||
pause_nodes = []
|
||||
terminal_nodes = [] # Forever-alive
|
||||
|
||||
# Module-level vars read by AgentRunner.load()
|
||||
conversation_mode = "continuous"
|
||||
identity_prompt = "You are a helpful agent."
|
||||
loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_history_tokens": 32000}
|
||||
|
||||
|
||||
class MyAgent:
|
||||
def __init__(self, config=None):
|
||||
self.config = config or default_config
|
||||
self.goal = goal
|
||||
self.nodes = nodes
|
||||
self.edges = edges
|
||||
self.entry_node = entry_node
|
||||
self.entry_points = entry_points
|
||||
self.pause_nodes = pause_nodes
|
||||
self.terminal_nodes = terminal_nodes
|
||||
self._graph = None
|
||||
self._agent_runtime = None
|
||||
self._tool_registry = None
|
||||
self._storage_path = None
|
||||
|
||||
def _build_graph(self):
|
||||
return GraphSpec(
|
||||
id="my-agent-graph",
|
||||
goal_id=self.goal.id,
|
||||
version="1.0.0",
|
||||
entry_node=self.entry_node,
|
||||
entry_points=self.entry_points,
|
||||
terminal_nodes=self.terminal_nodes,
|
||||
pause_nodes=self.pause_nodes,
|
||||
nodes=self.nodes,
|
||||
edges=self.edges,
|
||||
default_model=self.config.model,
|
||||
max_tokens=self.config.max_tokens,
|
||||
loop_config=loop_config,
|
||||
conversation_mode=conversation_mode,
|
||||
identity_prompt=identity_prompt,
|
||||
)
|
||||
|
||||
def _setup(self, mock_mode=False):
|
||||
self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
|
||||
self._storage_path.mkdir(parents=True, exist_ok=True)
|
||||
self._tool_registry = ToolRegistry()
|
||||
mcp_config = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_config.exists():
|
||||
self._tool_registry.load_mcp_config(mcp_config)
|
||||
llm = None
|
||||
if not mock_mode:
|
||||
llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
self._graph = self._build_graph()
|
||||
self._agent_runtime = create_agent_runtime(
|
||||
graph=self._graph, goal=self.goal, storage_path=self._storage_path,
|
||||
entry_points=[EntryPointSpec(id="default", name="Default", entry_node=self.entry_node,
|
||||
trigger_type="manual", isolation_level="shared")],
|
||||
llm=llm, tools=tools, tool_executor=tool_executor,
|
||||
checkpoint_config=CheckpointConfig(enabled=True, checkpoint_on_node_complete=True,
|
||||
checkpoint_max_age_days=7, async_checkpoint=True),
|
||||
)
|
||||
|
||||
async def start(self, mock_mode=False):
|
||||
if self._agent_runtime is None:
|
||||
self._setup(mock_mode=mock_mode)
|
||||
if not self._agent_runtime.is_running:
|
||||
await self._agent_runtime.start()
|
||||
|
||||
async def stop(self):
|
||||
if self._agent_runtime and self._agent_runtime.is_running:
|
||||
await self._agent_runtime.stop()
|
||||
self._agent_runtime = None
|
||||
|
||||
async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None):
|
||||
if self._agent_runtime is None:
|
||||
raise RuntimeError("Agent not started. Call start() first.")
|
||||
return await self._agent_runtime.trigger_and_wait(
|
||||
entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)
|
||||
|
||||
async def run(self, context, mock_mode=False, session_state=None):
|
||||
await self.start(mock_mode=mock_mode)
|
||||
try:
|
||||
result = await self.trigger_and_wait("default", context, session_state=session_state)
|
||||
return result or ExecutionResult(success=False, error="Execution timeout")
|
||||
finally:
|
||||
await self.stop()
|
||||
|
||||
def info(self):
|
||||
return {
|
||||
"name": metadata.name, "version": metadata.version, "description": metadata.description,
|
||||
"goal": {"name": self.goal.name, "description": self.goal.description},
|
||||
"nodes": [n.id for n in self.nodes], "edges": [e.id for e in self.edges],
|
||||
"entry_node": self.entry_node, "entry_points": self.entry_points,
|
||||
"terminal_nodes": self.terminal_nodes,
|
||||
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
|
||||
}
|
||||
|
||||
def validate(self):
|
||||
errors, warnings = [], []
|
||||
node_ids = {n.id for n in self.nodes}
|
||||
for e in self.edges:
|
||||
if e.source not in node_ids: errors.append(f"Edge {e.id}: source '{e.source}' not found")
|
||||
if e.target not in node_ids: errors.append(f"Edge {e.id}: target '{e.target}' not found")
|
||||
if self.entry_node not in node_ids: errors.append(f"Entry node '{self.entry_node}' not found")
|
||||
for t in self.terminal_nodes:
|
||||
if t not in node_ids: errors.append(f"Terminal node '{t}' not found")
|
||||
for ep_id, nid in self.entry_points.items():
|
||||
if nid not in node_ids: errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
|
||||
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
|
||||
|
||||
|
||||
default_agent = MyAgent()
|
||||
```
|
||||
|
||||
## agent.py — Async Entry Points Variant
|
||||
|
||||
When an agent needs timers, webhooks, or event-driven triggers, add
|
||||
`async_entry_points` and optionally `runtime_config` as module-level variables.
|
||||
These are IN ADDITION to the standard variables above.
|
||||
|
||||
```python
|
||||
# Additional imports for async entry points
|
||||
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
|
||||
from framework.runtime.agent_runtime import (
|
||||
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
|
||||
)
|
||||
|
||||
# ... (goal, nodes, edges, entry_node, entry_points, etc. as above) ...
|
||||
|
||||
# Async entry points — event-driven triggers
|
||||
async_entry_points = [
|
||||
# Timer with cron: daily at 9am
|
||||
AsyncEntryPointSpec(
|
||||
id="daily-check",
|
||||
name="Daily Check",
|
||||
entry_node="process-node",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "0 9 * * *"},
|
||||
isolation_level="shared",
|
||||
max_concurrent=1,
|
||||
),
|
||||
# Timer with fixed interval: every 20 minutes
|
||||
AsyncEntryPointSpec(
|
||||
id="scheduled-check",
|
||||
name="Scheduled Check",
|
||||
entry_node="process-node",
|
||||
trigger_type="timer",
|
||||
trigger_config={"interval_minutes": 20, "run_immediately": False},
|
||||
isolation_level="shared",
|
||||
max_concurrent=1,
|
||||
),
|
||||
# Event: reacts to webhook events
|
||||
AsyncEntryPointSpec(
|
||||
id="webhook-event",
|
||||
name="Webhook Event Handler",
|
||||
entry_node="process-node",
|
||||
trigger_type="event",
|
||||
trigger_config={"event_types": ["webhook_received"]},
|
||||
isolation_level="shared",
|
||||
max_concurrent=10,
|
||||
),
|
||||
]
|
||||
|
||||
# Webhook server config (only needed if using webhooks)
|
||||
runtime_config = AgentRuntimeConfig(
|
||||
webhook_host="127.0.0.1",
|
||||
webhook_port=8080,
|
||||
webhook_routes=[
|
||||
{
|
||||
"source_id": "my-source",
|
||||
"path": "/webhooks/my-source",
|
||||
"methods": ["POST"],
|
||||
},
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
**Key rules for async entry points:**
|
||||
- `async_entry_points` is a list of `AsyncEntryPointSpec` (NOT `EntryPointSpec`)
|
||||
- `runtime_config` is `AgentRuntimeConfig` (NOT `RuntimeConfig` from config.py)
|
||||
- Valid trigger_types: `timer`, `event`, `webhook`, `manual`, `api`
|
||||
- Valid isolation_levels: `isolated`, `shared`, `synchronized`
|
||||
- Timer trigger_config (cron): `{"cron": "0 9 * * *"}` — standard 5-field cron expression
|
||||
- Timer trigger_config (interval): `{"interval_minutes": float, "run_immediately": bool}`
|
||||
- Event trigger_config: `{"event_types": ["webhook_received"], "filter_stream": "...", "filter_node": "..."}`
|
||||
- Use `isolation_level="shared"` for async entry points that need to read
|
||||
the primary session's memory (e.g., user-configured rules)
|
||||
- The `_build_graph()` method passes `async_entry_points` to GraphSpec
|
||||
- Reference: `exports/gmail_inbox_guardian/agent.py`
|
||||
|
||||
## __init__.py
|
||||
|
||||
**CRITICAL:** The runner imports the package (`__init__.py`) and reads ALL module-level
|
||||
variables via `getattr()`. Every variable defined in `agent.py` that the runner needs
|
||||
MUST be re-exported here. Missing exports cause silent failures (variables default to
|
||||
`None` or `{}`), leading to "must define goal, nodes, edges" errors or graph validation
|
||||
failures like "node X is unreachable".
|
||||
|
||||
```python
|
||||
"""My Agent — description."""
|
||||
|
||||
from .agent import (
|
||||
MyAgent,
|
||||
default_agent,
|
||||
goal,
|
||||
nodes,
|
||||
edges,
|
||||
entry_node,
|
||||
entry_points,
|
||||
pause_nodes,
|
||||
terminal_nodes,
|
||||
conversation_mode,
|
||||
identity_prompt,
|
||||
loop_config,
|
||||
)
|
||||
from .config import default_config, metadata
|
||||
|
||||
__all__ = [
|
||||
"MyAgent",
|
||||
"default_agent",
|
||||
"goal",
|
||||
"nodes",
|
||||
"edges",
|
||||
"entry_node",
|
||||
"entry_points",
|
||||
"pause_nodes",
|
||||
"terminal_nodes",
|
||||
"conversation_mode",
|
||||
"identity_prompt",
|
||||
"loop_config",
|
||||
"default_config",
|
||||
"metadata",
|
||||
]
|
||||
```
|
||||
|
||||
**If the agent uses async entry points**, also import and export:
|
||||
```python
|
||||
from .agent import (
|
||||
...,
|
||||
async_entry_points,
|
||||
runtime_config, # Only if using webhooks
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
...,
|
||||
"async_entry_points",
|
||||
"runtime_config",
|
||||
]
|
||||
```
|
||||
|
||||
## __main__.py
|
||||
|
||||
```python
|
||||
"""CLI entry point for My Agent."""
|
||||
|
||||
import asyncio, json, logging, sys
|
||||
import click
|
||||
from .agent import default_agent, MyAgent
|
||||
|
||||
|
||||
def setup_logging(verbose=False, debug=False):
|
||||
if debug: level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
|
||||
elif verbose: level, fmt = logging.INFO, "%(message)s"
|
||||
else: level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
|
||||
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="1.0.0")
|
||||
def cli():
|
||||
"""My Agent — description."""
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--topic", "-t", required=True)
|
||||
@click.option("--mock", is_flag=True)
|
||||
@click.option("--verbose", "-v", is_flag=True)
|
||||
def run(topic, mock, verbose):
|
||||
"""Execute the agent."""
|
||||
setup_logging(verbose=verbose)
|
||||
result = asyncio.run(default_agent.run({"topic": topic}, mock_mode=mock))
|
||||
click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
|
||||
sys.exit(0 if result.success else 1)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--mock", is_flag=True)
|
||||
def tui(mock):
|
||||
"""Launch TUI dashboard."""
|
||||
from pathlib import Path
|
||||
from framework.tui.app import AdenTUI
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
async def run_tui():
|
||||
agent = MyAgent()
|
||||
agent._tool_registry = ToolRegistry()
|
||||
storage = Path.home() / ".hive" / "agents" / "my_agent"
|
||||
storage.mkdir(parents=True, exist_ok=True)
|
||||
mcp_cfg = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
|
||||
llm = None if mock else LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
|
||||
runtime = create_agent_runtime(
|
||||
graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
|
||||
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")],
|
||||
llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
|
||||
await runtime.start()
|
||||
try:
|
||||
app = AdenTUI(runtime)
|
||||
await app.run_async()
|
||||
finally:
|
||||
await runtime.stop()
|
||||
asyncio.run(run_tui())
|
||||
|
||||
|
||||
@cli.command()
|
||||
def info():
|
||||
"""Show agent info."""
|
||||
data = default_agent.info()
|
||||
click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}")
|
||||
click.echo(f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def validate():
|
||||
"""Validate agent structure."""
|
||||
v = default_agent.validate()
|
||||
if v["valid"]: click.echo("Agent is valid")
|
||||
else:
|
||||
click.echo("Errors:")
|
||||
for e in v["errors"]: click.echo(f" {e}")
|
||||
sys.exit(0 if v["valid"] else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
```
|
||||
|
||||
## mcp_servers.json
|
||||
|
||||
```json
|
||||
{
|
||||
"hive-tools": {
|
||||
"transport": "stdio",
|
||||
"command": "uv",
|
||||
"args": ["run", "python", "mcp_server.py", "--stdio"],
|
||||
"cwd": "../../tools",
|
||||
"description": "Hive tools MCP server"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**CRITICAL FORMAT RULES:**
|
||||
- NO `"mcpServers"` wrapper (flat dict, not nested)
|
||||
- `cwd` MUST be `"../../tools"` (relative from `exports/AGENT_NAME/` to `tools/`)
|
||||
- `command` MUST be `"uv"` with `"args": ["run", "python", ...]` (NOT bare `"python"`)
|
||||
|
||||
## tests/conftest.py
|
||||
|
||||
```python
|
||||
"""Test fixtures."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
_repo_root = Path(__file__).resolve().parents[3]
|
||||
for _p in ["exports", "core"]:
|
||||
_path = str(_repo_root / _p)
|
||||
if _path not in sys.path:
|
||||
sys.path.insert(0, _path)
|
||||
|
||||
AGENT_PATH = str(Path(__file__).resolve().parents[1])
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_mode():
|
||||
return True
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def runner(tmp_path_factory, mock_mode):
|
||||
from framework.runner.runner import AgentRunner
|
||||
storage = tmp_path_factory.mktemp("agent_storage")
|
||||
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
|
||||
r._setup()
|
||||
yield r
|
||||
await r.cleanup_async()
|
||||
```
|
||||
|
||||
## entry_points Format
|
||||
|
||||
MUST be: `{"start": "first-node-id"}`
|
||||
NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
|
||||
NOT: `{"first-node-id"}` (WRONG — this is a set)
|
||||
@@ -0,0 +1,433 @@
|
||||
# Hive Agent Framework — Condensed Reference
|
||||
|
||||
## Architecture
|
||||
|
||||
Agents are Python packages in `exports/`:
|
||||
```
|
||||
exports/my_agent/
|
||||
├── __init__.py # MUST re-export ALL module-level vars from agent.py
|
||||
├── __main__.py # CLI (run, tui, info, validate, shell)
|
||||
├── agent.py # Graph construction (goal, edges, agent class)
|
||||
├── config.py # Runtime config
|
||||
├── nodes/__init__.py # Node definitions (NodeSpec)
|
||||
├── mcp_servers.json # MCP tool server config
|
||||
└── tests/ # pytest tests
|
||||
```
|
||||
|
||||
## Agent Loading Contract
|
||||
|
||||
`AgentRunner.load()` imports the package (`__init__.py`) and reads these
|
||||
module-level variables via `getattr()`:
|
||||
|
||||
| Variable | Required | Default if missing | Consequence |
|
||||
|----------|----------|--------------------|-------------|
|
||||
| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
|
||||
| `nodes` | YES | `None` | **FATAL** — same error |
|
||||
| `edges` | YES | `None` | **FATAL** — same error |
|
||||
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
|
||||
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
|
||||
| `terminal_nodes` | no | `[]` | OK for forever-alive |
|
||||
| `pause_nodes` | no | `[]` | OK |
|
||||
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
|
||||
| `identity_prompt` | no | not passed | No agent-level identity |
|
||||
| `loop_config` | no | `{}` | No iteration limits |
|
||||
| `async_entry_points` | no | `[]` | No async triggers (timers, webhooks, events) |
|
||||
| `runtime_config` | no | `None` | No webhook server |
|
||||
|
||||
**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
|
||||
`agent.py`. Missing exports silently fall back to defaults, causing
|
||||
hard-to-debug failures.
|
||||
|
||||
**Why `default_agent.validate()` is NOT sufficient:**
|
||||
`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
|
||||
These are always correct because the constructor references agent.py's module
|
||||
vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
|
||||
not the class. So `validate()` passes while `AgentRunner.load()` fails.
|
||||
Always test with `AgentRunner.load("exports/{name}")` — this is the same
|
||||
code path the TUI and `hive run` use.
|
||||
|
||||
## Goal
|
||||
|
||||
Defines success criteria and constraints:
|
||||
```python
|
||||
goal = Goal(
|
||||
id="kebab-case-id",
|
||||
name="Display Name",
|
||||
description="What the agent does",
|
||||
success_criteria=[
|
||||
SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
|
||||
],
|
||||
constraints=[
|
||||
Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
|
||||
],
|
||||
)
|
||||
```
|
||||
- 3-5 success criteria, weights sum to 1.0
|
||||
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
|
||||
|
||||
## NodeSpec Fields
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| id | str | required | kebab-case identifier |
|
||||
| name | str | required | Display name |
|
||||
| description | str | required | What the node does |
|
||||
| node_type | str | required | Always `"event_loop"` |
|
||||
| input_keys | list[str] | required | Memory keys this node reads |
|
||||
| output_keys | list[str] | required | Memory keys this node writes via set_output |
|
||||
| system_prompt | str | "" | LLM instructions |
|
||||
| tools | list[str] | [] | Tool names from MCP servers |
|
||||
| client_facing | bool | False | If True, streams to user and blocks for input |
|
||||
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
|
||||
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
|
||||
| max_retries | int | 3 | Retries on failure |
|
||||
| success_criteria | str | "" | Natural language for judge evaluation |
|
||||
|
||||
## EdgeSpec Fields
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| id | str | kebab-case identifier |
|
||||
| source | str | Source node ID |
|
||||
| target | str | Target node ID |
|
||||
| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
|
||||
| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
|
||||
| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
|
||||
|
||||
## Key Patterns
|
||||
|
||||
### STEP 1/STEP 2 (Client-Facing Nodes)
|
||||
```
|
||||
**STEP 1 — Respond to the user (text only, NO tool calls):**
|
||||
[Present information, ask questions]
|
||||
|
||||
**STEP 2 — After the user responds, call set_output:**
|
||||
- set_output("key", "value based on user response")
|
||||
```
|
||||
This prevents premature set_output before user interaction.
|
||||
|
||||
### Fewer, Richer Nodes (CRITICAL)
|
||||
|
||||
**Hard limit: 2-4 nodes for most agents.** Never exceed 5 unless the user
|
||||
explicitly requests a complex multi-phase pipeline.
|
||||
|
||||
Each node boundary serializes outputs to shared memory and **destroys** all
|
||||
in-context information: tool call results, intermediate reasoning, conversation
|
||||
history. A research node that searches, fetches, and analyzes in ONE node keeps
|
||||
all source material in its conversation context. Split across 3 nodes, each
|
||||
downstream node only sees the serialized summary string.
|
||||
|
||||
**Decision framework — merge unless ANY of these apply:**
|
||||
1. **Client-facing boundary** — Autonomous and client-facing work MUST be
|
||||
separate nodes (different interaction models)
|
||||
2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
|
||||
search vs database), separate nodes make sense
|
||||
3. **Parallel execution** — Fan-out branches must be separate nodes
|
||||
|
||||
**Red flags that you have too many nodes:**
|
||||
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
|
||||
- A node that sets only 1 trivial output → collapse into predecessor
|
||||
- Multiple consecutive autonomous nodes → combine into one rich node
|
||||
- A "report" node that presents analysis → merge into the client-facing node
|
||||
- A "confirm" or "schedule" node that doesn't call any external service → remove
|
||||
|
||||
**Typical agent structure (3 nodes):**
|
||||
```
|
||||
intake (client-facing) ←→ process (autonomous) ←→ review (client-facing)
|
||||
```
|
||||
Or for simpler agents, just 2 nodes:
|
||||
```
|
||||
interact (client-facing) → process (autonomous) → interact (loop)
|
||||
```
|
||||
|
||||
### nullable_output_keys
|
||||
For inputs that only arrive on certain edges:
|
||||
```python
|
||||
research_node = NodeSpec(
|
||||
input_keys=["brief", "feedback"],
|
||||
nullable_output_keys=["feedback"], # Only present on feedback edge
|
||||
max_node_visits=3,
|
||||
)
|
||||
```
|
||||
|
||||
### Mutually Exclusive Outputs
|
||||
For routing decisions:
|
||||
```python
|
||||
review_node = NodeSpec(
|
||||
output_keys=["approved", "feedback"],
|
||||
nullable_output_keys=["approved", "feedback"], # Node sets one or the other
|
||||
)
|
||||
```
|
||||
|
||||
### Forever-Alive Pattern
|
||||
`terminal_nodes=[]` — every node has outgoing edges, graph loops until user exits.
|
||||
Use `conversation_mode="continuous"` to preserve context across transitions.
|
||||
|
||||
### set_output
|
||||
- Synthetic tool injected by framework
|
||||
- Call separately from real tool calls (separate turn)
|
||||
- `set_output("key", "value")` stores to shared memory
|
||||
|
||||
## Edge Conditions
|
||||
|
||||
| Condition | When |
|
||||
|-----------|------|
|
||||
| ON_SUCCESS | Node completed successfully |
|
||||
| ON_FAILURE | Node failed |
|
||||
| ALWAYS | Unconditional |
|
||||
| CONDITIONAL | condition_expr evaluates to True against memory |
|
||||
|
||||
condition_expr examples:
|
||||
- `"needs_more_research == True"`
|
||||
- `"str(next_action).lower() == 'new_agent'"`
|
||||
- `"feedback is not None"`
|
||||
|
||||
## Graph Lifecycle
|
||||
|
||||
| Pattern | terminal_nodes | When |
|
||||
|---------|---------------|------|
|
||||
| **Forever-alive** | `[]` | **DEFAULT for all agents** |
|
||||
| Linear | `["last-node"]` | Only if user explicitly requests one-shot/batch |
|
||||
|
||||
**Forever-alive is the default.** Always use `terminal_nodes=[]`.
|
||||
The framework default for `max_node_visits` is 0 (unbounded), so
|
||||
nodes work correctly in forever-alive loops without explicit override.
|
||||
Only set `max_node_visits > 0` in one-shot agents with feedback loops.
|
||||
Every node must have at least one outgoing edge — no dead ends. The
|
||||
user exits by closing the TUI. Only use terminal nodes if the user
|
||||
explicitly asks for a batch/one-shot agent that runs once and exits.
|
||||
|
||||
## Continuous Conversation Mode
|
||||
|
||||
`conversation_mode` has ONLY two valid states:
|
||||
- `"continuous"` — recommended for interactive agents
|
||||
- Omit entirely — isolated per-node conversations (each node starts fresh)
|
||||
|
||||
**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
|
||||
`"adaptive"`, `"shared"`. These do not exist in the framework.
|
||||
|
||||
When `conversation_mode="continuous"`:
|
||||
- Same conversation thread carries across node transitions
|
||||
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
|
||||
- Transition markers inserted at boundaries
|
||||
- Compaction happens opportunistically at phase transitions
|
||||
|
||||
## loop_config
|
||||
|
||||
Only three valid keys:
|
||||
```python
|
||||
loop_config = {
|
||||
"max_iterations": 100, # Max LLM turns per node visit
|
||||
"max_tool_calls_per_turn": 20, # Max tool calls per LLM response
|
||||
"max_history_tokens": 32000, # Triggers conversation compaction
|
||||
}
|
||||
```
|
||||
**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
|
||||
`"temperature"`. These are silently ignored or cause errors.
|
||||
|
||||
## Data Tools (Spillover)
|
||||
|
||||
For large data that exceeds context:
|
||||
- `save_data(filename, data)` — Write to session data dir
|
||||
- `load_data(filename, offset, limit)` — Read with pagination
|
||||
- `list_data_files()` — List files
|
||||
- `serve_file_to_user(filename, label)` — Clickable file:// URI
|
||||
|
||||
`data_dir` is auto-injected by framework — LLM never sees it.
|
||||
|
||||
## Fan-Out / Fan-In
|
||||
|
||||
Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
|
||||
- Parallel nodes must have disjoint output_keys
|
||||
- Only one branch may have client_facing nodes
|
||||
- Fan-in node gets all outputs in shared memory
|
||||
|
||||
## Judge System
|
||||
|
||||
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
|
||||
- **SchemaJudge**: Validates against Pydantic model
|
||||
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
|
||||
|
||||
Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
|
||||
|
||||
## Async Entry Points (Webhooks, Timers, Events)
|
||||
|
||||
For agents that need to react to external events (incoming emails, scheduled
|
||||
tasks, API calls), use `AsyncEntryPointSpec` and optionally `AgentRuntimeConfig`.
|
||||
|
||||
### Imports
|
||||
```python
|
||||
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
|
||||
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
|
||||
```
|
||||
Note: `AsyncEntryPointSpec` is in `framework.graph.edge` (the graph/declarative layer).
|
||||
`AgentRuntimeConfig` is in `framework.runtime.agent_runtime` (the runtime layer).
|
||||
|
||||
### AsyncEntryPointSpec Fields
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| id | str | required | Unique identifier |
|
||||
| name | str | required | Human-readable name |
|
||||
| entry_node | str | required | Node ID to start execution from |
|
||||
| trigger_type | str | `"manual"` | `webhook`, `api`, `timer`, `event`, `manual` |
|
||||
| trigger_config | dict | `{}` | Trigger-specific config (see below) |
|
||||
| isolation_level | str | `"shared"` | `isolated`, `shared`, `synchronized` |
|
||||
| priority | int | `0` | Execution priority (higher = more priority) |
|
||||
| max_concurrent | int | `10` | Max concurrent executions |
|
||||
|
||||
### Trigger Types
|
||||
|
||||
**timer** — Fires on a schedule. Two modes: cron expressions or fixed interval.
|
||||
|
||||
Cron (preferred for precise scheduling):
|
||||
```python
|
||||
AsyncEntryPointSpec(
|
||||
id="daily-digest",
|
||||
name="Daily Digest",
|
||||
entry_node="check-node",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "0 9 * * *"}, # daily at 9am
|
||||
isolation_level="shared",
|
||||
max_concurrent=1,
|
||||
)
|
||||
```
|
||||
- `cron` (str) — standard cron expression (5 fields: min hour dom month dow)
|
||||
- Examples: `"0 9 * * *"` (daily 9am), `"0 9 * * MON-FRI"` (weekdays 9am), `"*/30 * * * *"` (every 30 min)
|
||||
|
||||
Fixed interval (simpler, for polling-style tasks):
|
||||
```python
|
||||
AsyncEntryPointSpec(
|
||||
id="scheduled-check",
|
||||
name="Scheduled Check",
|
||||
entry_node="check-node",
|
||||
trigger_type="timer",
|
||||
trigger_config={"interval_minutes": 20, "run_immediately": False},
|
||||
isolation_level="shared",
|
||||
max_concurrent=1,
|
||||
)
|
||||
```
|
||||
- `interval_minutes` (float) — how often to fire
|
||||
- `run_immediately` (bool, default False) — fire once on startup
|
||||
|
||||
**event** — Subscribes to EventBus (e.g., webhook events):
|
||||
```python
|
||||
AsyncEntryPointSpec(
|
||||
id="email-event",
|
||||
name="Email Event Handler",
|
||||
entry_node="process-emails",
|
||||
trigger_type="event",
|
||||
trigger_config={"event_types": ["webhook_received"]},
|
||||
isolation_level="shared",
|
||||
max_concurrent=10,
|
||||
)
|
||||
```
|
||||
- `event_types` (list[str]) — EventType values to subscribe to
|
||||
- `filter_stream` (str, optional) — only receive from this stream
|
||||
- `filter_node` (str, optional) — only receive from this node
|
||||
|
||||
**webhook** — HTTP endpoint (requires AgentRuntimeConfig):
|
||||
The webhook server publishes `WEBHOOK_RECEIVED` events on the EventBus.
|
||||
An `event` trigger type with `event_types: ["webhook_received"]` subscribes
|
||||
to those events. The flow is:
|
||||
```
|
||||
HTTP POST /webhooks/gmail → WebhookServer → EventBus (WEBHOOK_RECEIVED)
|
||||
→ event entry point → triggers graph execution from entry_node
|
||||
```
|
||||
|
||||
**manual** — Triggered programmatically via `runtime.trigger()`.
|
||||
|
||||
### Isolation Levels
|
||||
|
||||
| Level | Meaning |
|
||||
|-------|---------|
|
||||
| `isolated` | Private state per execution |
|
||||
| `shared` | Eventual consistency — async executions can read primary session memory |
|
||||
| `synchronized` | Shared with write locks (use when ordering matters) |
|
||||
|
||||
For most async patterns, use `shared` — the async execution reads the primary
|
||||
session's memory (e.g., user-configured rules) and runs its own workflow.
|
||||
|
||||
### AgentRuntimeConfig (for webhook servers)
|
||||
|
||||
```python
|
||||
from framework.runtime.agent_runtime import AgentRuntimeConfig
|
||||
|
||||
runtime_config = AgentRuntimeConfig(
|
||||
webhook_host="127.0.0.1",
|
||||
webhook_port=8080,
|
||||
webhook_routes=[
|
||||
{
|
||||
"source_id": "gmail",
|
||||
"path": "/webhooks/gmail",
|
||||
"methods": ["POST"],
|
||||
"secret": None, # Optional HMAC-SHA256 secret
|
||||
},
|
||||
],
|
||||
)
|
||||
```
|
||||
`runtime_config` is a module-level variable read by `AgentRunner.load()`.
|
||||
The runner passes it to `create_agent_runtime()`. On `runtime.start()`,
|
||||
if webhook_routes is non-empty, an embedded HTTP server starts.
|
||||
|
||||
### Session Sharing
|
||||
|
||||
Timer and event triggers automatically call `_get_primary_session_state()`
|
||||
before execution. This finds the active user-facing session and provides
|
||||
its memory to the async execution, filtered to only the async entry node's
|
||||
`input_keys`. This means the async flow can read user-configured values
|
||||
(like rules, preferences) without needing separate configuration.
|
||||
|
||||
### Module-Level Variables
|
||||
|
||||
Agents with async entry points must export two additional variables:
|
||||
```python
|
||||
# In agent.py:
|
||||
async_entry_points = [AsyncEntryPointSpec(...), ...]
|
||||
runtime_config = AgentRuntimeConfig(...) # Only if using webhooks
|
||||
```
|
||||
|
||||
Both must be re-exported from `__init__.py`:
|
||||
```python
|
||||
from .agent import (
|
||||
..., async_entry_points, runtime_config,
|
||||
)
|
||||
```
|
||||
|
||||
### Reference Agent
|
||||
|
||||
See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
|
||||
- Primary client-facing intake node (user configures rules)
|
||||
- Timer-based scheduled inbox checks (every 20 min)
|
||||
- Webhook-triggered email event handling
|
||||
- Shared isolation for memory access across streams
|
||||
|
||||
## Framework Capabilities
|
||||
|
||||
**Works well:** Multi-turn conversations, HITL review, tool orchestration, structured outputs, parallel execution, context management, error recovery, session persistence.
|
||||
|
||||
**Limitations:** LLM latency (2-10s/turn), context window limits (~128K), cost per run, rate limits, node boundaries lose context.
|
||||
|
||||
**Not designed for:** Sub-second responses, millions of items, real-time streaming, guaranteed determinism, offline/air-gapped.
|
||||
|
||||
## Tool Discovery
|
||||
|
||||
Do NOT rely on a static tool list — it will be outdated. Always use
|
||||
`discover_mcp_tools()` to get the current tool catalog from the
|
||||
hive-tools MCP server. This returns full schemas including parameter
|
||||
names, types, and descriptions.
|
||||
|
||||
```
|
||||
discover_mcp_tools() # default: hive-tools
|
||||
discover_mcp_tools("exports/my_agent/mcp_servers.json") # specific agent
|
||||
```
|
||||
|
||||
Common tool categories (verify via discover_mcp_tools):
|
||||
- **Web**: search, scrape, PDF
|
||||
- **Data**: save/load/append/list data files, serve to user
|
||||
- **File**: view, write, replace, diff, list, grep
|
||||
- **Communication**: email, gmail, slack, telegram
|
||||
- **CRM**: hubspot, apollo, calcom
|
||||
- **GitHub**: stargazers, user profiles, repos
|
||||
- **Vision**: image analysis
|
||||
- **Time**: current time
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Test fixtures for Hive Coder agent."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
_repo_root = Path(__file__).resolve().parents[3]
|
||||
for _p in ["exports", "core"]:
|
||||
_path = str(_repo_root / _p)
|
||||
if _path not in sys.path:
|
||||
sys.path.insert(0, _path)
|
||||
|
||||
AGENT_PATH = str(Path(__file__).resolve().parents[1])
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_mode():
|
||||
return True
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def runner(tmp_path_factory, mock_mode):
|
||||
from framework.runner.runner import AgentRunner
|
||||
|
||||
storage = tmp_path_factory.mktemp("agent_storage")
|
||||
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
|
||||
r._setup()
|
||||
yield r
|
||||
await r.cleanup_async()
|
||||
@@ -56,6 +56,13 @@ def _configure_paths():
|
||||
if (project_root / "core").is_dir() and core_str not in sys.path:
|
||||
sys.path.insert(0, core_str)
|
||||
|
||||
# Add core/framework/agents/ so framework agents are importable as top-level packages
|
||||
framework_agents_dir = project_root / "core" / "framework" / "agents"
|
||||
if framework_agents_dir.is_dir():
|
||||
fa_str = str(framework_agents_dir)
|
||||
if fa_str not in sys.path:
|
||||
sys.path.insert(0, fa_str)
|
||||
|
||||
|
||||
def main():
|
||||
_configure_paths()
|
||||
|
||||
@@ -50,14 +50,55 @@ def get_max_tokens() -> int:
|
||||
|
||||
|
||||
def get_api_key() -> str | None:
|
||||
"""Return the API key from the environment variable specified in configuration."""
|
||||
"""Return the API key, supporting env var, Claude Code subscription, and ZAI Code.
|
||||
|
||||
Priority:
|
||||
1. Claude Code subscription (``use_claude_code_subscription: true``)
|
||||
reads the OAuth token from ``~/.claude/.credentials.json``.
|
||||
2. Environment variable named in ``api_key_env_var``.
|
||||
"""
|
||||
llm = get_hive_config().get("llm", {})
|
||||
|
||||
# Claude Code subscription: read OAuth token directly
|
||||
if llm.get("use_claude_code_subscription"):
|
||||
try:
|
||||
from framework.runner.runner import get_claude_code_token
|
||||
|
||||
token = get_claude_code_token()
|
||||
if token:
|
||||
return token
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Standard env-var path (covers ZAI Code and all API-key providers)
|
||||
api_key_env_var = llm.get("api_key_env_var")
|
||||
if api_key_env_var:
|
||||
return os.environ.get(api_key_env_var)
|
||||
return None
|
||||
|
||||
|
||||
def get_api_base() -> str | None:
|
||||
"""Return the api_base URL for OpenAI-compatible endpoints, if configured."""
|
||||
return get_hive_config().get("llm", {}).get("api_base")
|
||||
|
||||
|
||||
def get_llm_extra_kwargs() -> dict[str, Any]:
|
||||
"""Return extra kwargs for LiteLLMProvider (e.g. OAuth headers).
|
||||
|
||||
When ``use_claude_code_subscription`` is enabled, returns
|
||||
``extra_headers`` with the OAuth Bearer token so that litellm's
|
||||
built-in Anthropic OAuth handler adds the required beta headers.
|
||||
"""
|
||||
llm = get_hive_config().get("llm", {})
|
||||
if llm.get("use_claude_code_subscription"):
|
||||
api_key = get_api_key()
|
||||
if api_key:
|
||||
return {
|
||||
"extra_headers": {"authorization": f"Bearer {api_key}"},
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RuntimeConfig – shared across agent templates
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -71,4 +112,5 @@ class RuntimeConfig:
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = field(default_factory=get_max_tokens)
|
||||
api_key: str | None = field(default_factory=get_api_key)
|
||||
api_base: str | None = None
|
||||
api_base: str | None = field(default_factory=get_api_base)
|
||||
extra_kwargs: dict[str, Any] = field(default_factory=get_llm_extra_kwargs)
|
||||
|
||||
@@ -275,12 +275,25 @@ class EventLoopNode(NodeProtocol):
|
||||
)
|
||||
accumulator = OutputAccumulator(store=self._conversation_store)
|
||||
start_iteration = 0
|
||||
_restored_recent_responses: list[str] = []
|
||||
_restored_tool_fingerprints: list[list[tuple[str, str]]] = []
|
||||
else:
|
||||
# Try crash-recovery restore from store, then fall back to fresh.
|
||||
conversation, accumulator, start_iteration = await self._restore(ctx)
|
||||
if conversation is None:
|
||||
restored = await self._restore(ctx)
|
||||
if restored is not None:
|
||||
conversation = restored.conversation
|
||||
accumulator = restored.accumulator
|
||||
start_iteration = restored.start_iteration
|
||||
_restored_recent_responses = restored.recent_responses
|
||||
_restored_tool_fingerprints = restored.recent_tool_fingerprints
|
||||
else:
|
||||
_restored_recent_responses = []
|
||||
_restored_tool_fingerprints = []
|
||||
|
||||
# Fresh conversation: either isolated mode or first node in continuous mode.
|
||||
system_prompt = ctx.node_spec.system_prompt or ""
|
||||
from framework.graph.prompt_composer import _with_datetime
|
||||
|
||||
system_prompt = _with_datetime(ctx.node_spec.system_prompt or "")
|
||||
|
||||
conversation = NodeConversation(
|
||||
system_prompt=system_prompt,
|
||||
@@ -306,6 +319,7 @@ class EventLoopNode(NodeProtocol):
|
||||
tools.append(set_output_tool)
|
||||
if ctx.node_spec.client_facing and not ctx.event_triggered:
|
||||
tools.append(self._build_ask_user_tool())
|
||||
tools.append(self._build_escalate_tool())
|
||||
|
||||
logger.info(
|
||||
"[%s] Tools available (%d): %s | client_facing=%s | judge=%s",
|
||||
@@ -319,10 +333,9 @@ class EventLoopNode(NodeProtocol):
|
||||
# 4. Publish loop started
|
||||
await self._publish_loop_started(stream_id, node_id)
|
||||
|
||||
# 5. Stall / doom loop detection state
|
||||
recent_responses: list[str] = []
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] = []
|
||||
user_interaction_count = 0 # tracks how many times this node blocked for user input
|
||||
# 5. Stall / doom loop detection state (restored from cursor if resuming)
|
||||
recent_responses: list[str] = _restored_recent_responses
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] = _restored_tool_fingerprints
|
||||
|
||||
# 6. Main loop
|
||||
for iteration in range(start_iteration, self._config.max_iterations):
|
||||
@@ -576,7 +589,8 @@ class EventLoopNode(NodeProtocol):
|
||||
mcp_tool_calls = [
|
||||
tc
|
||||
for tc in logged_tool_calls
|
||||
if tc.get("tool_name") not in ("set_output", "ask_user") and not tc.get("is_error")
|
||||
if tc.get("tool_name") not in ("set_output", "ask_user", "escalate_to_coder")
|
||||
and not tc.get("is_error")
|
||||
]
|
||||
if mcp_tool_calls:
|
||||
fps = self._fingerprint_tool_calls(mcp_tool_calls)
|
||||
@@ -623,8 +637,15 @@ class EventLoopNode(NodeProtocol):
|
||||
# Text-only turn breaks the doom loop chain
|
||||
recent_tool_fingerprints.clear()
|
||||
|
||||
# 6g. Write cursor checkpoint
|
||||
await self._write_cursor(ctx, conversation, accumulator, iteration)
|
||||
# 6g. Write cursor checkpoint (includes stall/doom state for resume)
|
||||
await self._write_cursor(
|
||||
ctx,
|
||||
conversation,
|
||||
accumulator,
|
||||
iteration,
|
||||
recent_responses=recent_responses,
|
||||
recent_tool_fingerprints=recent_tool_fingerprints,
|
||||
)
|
||||
|
||||
# 6h. Client-facing input blocking
|
||||
#
|
||||
@@ -741,7 +762,6 @@ class EventLoopNode(NodeProtocol):
|
||||
conversation=conversation if _is_continuous else None,
|
||||
)
|
||||
|
||||
user_interaction_count += 1
|
||||
recent_responses.clear()
|
||||
|
||||
if _cf_auto:
|
||||
@@ -1267,6 +1287,26 @@ class EventLoopNode(NodeProtocol):
|
||||
)
|
||||
results_by_id[tc.tool_use_id] = result
|
||||
|
||||
elif tc.tool_name == "escalate_to_coder":
|
||||
# --- Framework-level escalation handling ---
|
||||
if self._event_bus:
|
||||
await self._event_bus.emit_escalation_requested(
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
reason=tc.tool_input.get("reason", ""),
|
||||
context=tc.tool_input.get("context", ""),
|
||||
execution_id=ctx.execution_id,
|
||||
)
|
||||
# Block like ask_user — the TUI loads the coder,
|
||||
# and /back injects a message to unblock us.
|
||||
user_input_requested = True
|
||||
result = ToolResult(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content="Escalating to Hive Coder. You will resume when done.",
|
||||
is_error=False,
|
||||
)
|
||||
results_by_id[tc.tool_use_id] = result
|
||||
|
||||
else:
|
||||
# --- Real tool: check for truncated args, else queue ---
|
||||
if "_raw" in tc.tool_input:
|
||||
@@ -1313,7 +1353,7 @@ class EventLoopNode(NodeProtocol):
|
||||
continue # shouldn't happen
|
||||
|
||||
# Build log entries for real tools
|
||||
if tc.tool_name not in ("set_output", "ask_user"):
|
||||
if tc.tool_name not in ("set_output", "ask_user", "escalate_to_coder"):
|
||||
tool_entry = {
|
||||
"tool_use_id": tc.tool_use_id,
|
||||
"tool_name": tc.tool_name,
|
||||
@@ -1458,6 +1498,46 @@ class EventLoopNode(NodeProtocol):
|
||||
},
|
||||
)
|
||||
|
||||
def _build_escalate_tool(self) -> Tool:
|
||||
"""Build the synthetic escalate_to_coder tool.
|
||||
|
||||
Client-facing nodes call this when the user's request requires
|
||||
capabilities beyond the current agent (code changes, feature
|
||||
expansion, debugging). The TUI intercepts the event and loads
|
||||
hive_coder in the foreground.
|
||||
"""
|
||||
return Tool(
|
||||
name="escalate_to_coder",
|
||||
description=(
|
||||
"Call this tool when the user requests something you "
|
||||
"cannot handle — a code change, feature expansion, bug "
|
||||
"fix, or framework-level modification. This will bring "
|
||||
"in Hive Coder, a coding agent that can read and write "
|
||||
"files. Provide a clear reason and relevant context so "
|
||||
"the coder can pick up where you left off."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Why you are escalating (what the user needs that you cannot do)."
|
||||
),
|
||||
},
|
||||
"context": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Relevant context: what you discussed, "
|
||||
"what files are involved, what the user "
|
||||
"wants changed."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["reason"],
|
||||
},
|
||||
)
|
||||
|
||||
def _build_set_output_tool(self, output_keys: list[str] | None) -> Tool | None:
|
||||
"""Build the synthetic set_output tool for explicit output declaration."""
|
||||
if not output_keys:
|
||||
@@ -2214,29 +2294,60 @@ class EventLoopNode(NodeProtocol):
|
||||
# Persistence: restore, cursor, injection, pause
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class _RestoredState:
|
||||
"""State recovered from a previous checkpoint."""
|
||||
|
||||
conversation: NodeConversation
|
||||
accumulator: OutputAccumulator
|
||||
start_iteration: int
|
||||
recent_responses: list[str]
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]]
|
||||
|
||||
async def _restore(
|
||||
self,
|
||||
ctx: NodeContext,
|
||||
) -> tuple[NodeConversation | None, OutputAccumulator | None, int]:
|
||||
"""Attempt to restore from a previous checkpoint."""
|
||||
) -> _RestoredState | None:
|
||||
"""Attempt to restore from a previous checkpoint.
|
||||
|
||||
Returns a ``_RestoredState`` with conversation, accumulator, iteration
|
||||
counter, and stall/doom-loop detection state — everything needed to
|
||||
resume exactly where execution stopped.
|
||||
"""
|
||||
if self._conversation_store is None:
|
||||
return None, None, 0
|
||||
return None
|
||||
|
||||
conversation = await NodeConversation.restore(self._conversation_store)
|
||||
if conversation is None:
|
||||
return None, None, 0
|
||||
return None
|
||||
|
||||
accumulator = await OutputAccumulator.restore(self._conversation_store)
|
||||
|
||||
cursor = await self._conversation_store.read_cursor()
|
||||
start_iteration = cursor.get("iteration", 0) + 1 if cursor else 0
|
||||
|
||||
# Restore stall/doom-loop detection state
|
||||
recent_responses: list[str] = cursor.get("recent_responses", []) if cursor else []
|
||||
raw_fps = cursor.get("recent_tool_fingerprints", []) if cursor else []
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] = [
|
||||
[tuple(pair) for pair in fps] # type: ignore[misc]
|
||||
for fps in raw_fps
|
||||
]
|
||||
|
||||
logger.info(
|
||||
f"Restored event loop: iteration={start_iteration}, "
|
||||
f"messages={conversation.message_count}, "
|
||||
f"outputs={list(accumulator.values.keys())}"
|
||||
f"outputs={list(accumulator.values.keys())}, "
|
||||
f"stall_window={len(recent_responses)}, "
|
||||
f"doom_window={len(recent_tool_fingerprints)}"
|
||||
)
|
||||
return EventLoopNode._RestoredState(
|
||||
conversation=conversation,
|
||||
accumulator=accumulator,
|
||||
start_iteration=start_iteration,
|
||||
recent_responses=recent_responses,
|
||||
recent_tool_fingerprints=recent_tool_fingerprints,
|
||||
)
|
||||
return conversation, accumulator, start_iteration
|
||||
|
||||
async def _write_cursor(
|
||||
self,
|
||||
@@ -2244,8 +2355,15 @@ class EventLoopNode(NodeProtocol):
|
||||
conversation: NodeConversation,
|
||||
accumulator: OutputAccumulator,
|
||||
iteration: int,
|
||||
*,
|
||||
recent_responses: list[str] | None = None,
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] | None = None,
|
||||
) -> None:
|
||||
"""Write checkpoint cursor for crash recovery."""
|
||||
"""Write checkpoint cursor for crash recovery.
|
||||
|
||||
Persists iteration counter, accumulator outputs, and stall/doom-loop
|
||||
detection state so that resume picks up exactly where execution stopped.
|
||||
"""
|
||||
if self._conversation_store:
|
||||
cursor = await self._conversation_store.read_cursor() or {}
|
||||
cursor.update(
|
||||
@@ -2256,6 +2374,14 @@ class EventLoopNode(NodeProtocol):
|
||||
"outputs": accumulator.to_dict(),
|
||||
}
|
||||
)
|
||||
# Persist stall/doom-loop detection state for reliable resume
|
||||
if recent_responses is not None:
|
||||
cursor["recent_responses"] = recent_responses
|
||||
if recent_tool_fingerprints is not None:
|
||||
# Convert list[list[tuple]] → list[list[list]] for JSON
|
||||
cursor["recent_tool_fingerprints"] = [
|
||||
[list(pair) for pair in fps] for fps in recent_tool_fingerprints
|
||||
]
|
||||
await self._conversation_store.write_cursor(cursor)
|
||||
|
||||
async def _drain_injection_queue(self, conversation: NodeConversation) -> int:
|
||||
|
||||
@@ -618,7 +618,7 @@ class GraphExecutor:
|
||||
cnt = node_visit_counts.get(current_node_id, 0) + 1
|
||||
node_visit_counts[current_node_id] = cnt
|
||||
_is_retry = False
|
||||
max_visits = getattr(node_spec, "max_node_visits", 1)
|
||||
max_visits = getattr(node_spec, "max_node_visits", 0)
|
||||
if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
|
||||
self.logger.warning(
|
||||
f" ⊘ Node '{node_spec.name}' visit limit reached "
|
||||
@@ -1294,6 +1294,36 @@ class GraphExecutor:
|
||||
# Handle cancellation (e.g., TUI quit) - save as paused instead of failed
|
||||
self.logger.info("⏸ Execution cancelled - saving state for resume")
|
||||
|
||||
# Flush WIP accumulator outputs from the interrupted node's
|
||||
# cursor.json into SharedMemory so they survive resume. The
|
||||
# accumulator writes to cursor.json on every set() call, but
|
||||
# only writes to SharedMemory when the judge ACCEPTs. Without
|
||||
# this, edge conditions checking these keys see None on resume.
|
||||
if current_node_id and self._storage_path:
|
||||
try:
|
||||
import json as _json
|
||||
|
||||
cursor_path = (
|
||||
self._storage_path / "conversations" / current_node_id / "cursor.json"
|
||||
)
|
||||
if cursor_path.exists():
|
||||
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
|
||||
wip_outputs = cursor_data.get("outputs", {})
|
||||
for key, value in wip_outputs.items():
|
||||
if value is not None:
|
||||
memory.write(key, value, validate=False)
|
||||
if wip_outputs:
|
||||
self.logger.info(
|
||||
"Flushed %d WIP accumulator outputs to memory: %s",
|
||||
len(wip_outputs),
|
||||
list(wip_outputs.keys()),
|
||||
)
|
||||
except Exception:
|
||||
self.logger.debug(
|
||||
"Could not flush accumulator outputs from cursor",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Save memory and state for resume
|
||||
saved_memory = memory.read_all()
|
||||
session_state_out: dict[str, Any] = {
|
||||
@@ -1371,6 +1401,25 @@ class GraphExecutor:
|
||||
execution_quality="failed",
|
||||
)
|
||||
|
||||
# Flush WIP accumulator outputs (same as CancelledError path)
|
||||
if current_node_id and self._storage_path:
|
||||
try:
|
||||
import json as _json
|
||||
|
||||
cursor_path = (
|
||||
self._storage_path / "conversations" / current_node_id / "cursor.json"
|
||||
)
|
||||
if cursor_path.exists():
|
||||
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
|
||||
for key, value in cursor_data.get("outputs", {}).items():
|
||||
if value is not None:
|
||||
memory.write(key, value, validate=False)
|
||||
except Exception:
|
||||
self.logger.debug(
|
||||
"Could not flush accumulator outputs from cursor",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Save memory and state for potential resume
|
||||
saved_memory = memory.read_all()
|
||||
session_state_out: dict[str, Any] = {
|
||||
|
||||
@@ -201,10 +201,11 @@ class NodeSpec(BaseModel):
|
||||
|
||||
# Visit limits (for feedback/callback edges)
|
||||
max_node_visits: int = Field(
|
||||
default=1,
|
||||
default=0,
|
||||
description=(
|
||||
"Max times this node executes in one graph run. "
|
||||
"Set >1 for feedback loops. 0 = unlimited (max_steps guards)."
|
||||
"0 = unlimited (default, required for forever-alive agents). "
|
||||
"Set >1 for one-shot agents with feedback loops."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ Layer 3 — Focus (per-node system_prompt, reframed as focus directive):
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -26,6 +27,13 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _with_datetime(prompt: str) -> str:
|
||||
"""Append current datetime with local timezone to a system prompt."""
|
||||
local = datetime.now().astimezone()
|
||||
stamp = f"Current date and time: {local.strftime('%Y-%m-%d %H:%M %Z (UTC%z)')}"
|
||||
return f"{prompt}\n\n{stamp}" if prompt else stamp
|
||||
|
||||
|
||||
def compose_system_prompt(
|
||||
identity_prompt: str | None,
|
||||
focus_prompt: str | None,
|
||||
@@ -39,7 +47,7 @@ def compose_system_prompt(
|
||||
narrative: Layer 2 — auto-generated from conversation state.
|
||||
|
||||
Returns:
|
||||
Composed system prompt with all layers present.
|
||||
Composed system prompt with all layers present, plus current datetime.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
|
||||
@@ -55,7 +63,7 @@ def compose_system_prompt(
|
||||
if focus_prompt:
|
||||
parts.append(f"\n--- Current Focus ---\n{focus_prompt}")
|
||||
|
||||
return "\n".join(parts) if parts else ""
|
||||
return _with_datetime("\n".join(parts) if parts else "")
|
||||
|
||||
|
||||
def build_narrative(
|
||||
|
||||
@@ -28,6 +28,51 @@ from framework.llm.stream_events import StreamEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _patch_litellm_anthropic_oauth() -> None:
|
||||
"""Patch litellm's Anthropic header construction to fix OAuth token handling.
|
||||
|
||||
litellm bug: validate_environment() puts the OAuth token into x-api-key,
|
||||
but Anthropic's API rejects OAuth tokens in x-api-key. They must be sent
|
||||
via Authorization: Bearer only, with x-api-key omitted entirely.
|
||||
|
||||
This patch wraps validate_environment to remove x-api-key when the
|
||||
Authorization header carries an OAuth token (sk-ant-oat prefix).
|
||||
|
||||
See: https://github.com/BerriAI/litellm/issues/19618
|
||||
"""
|
||||
try:
|
||||
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
|
||||
from litellm.types.llms.anthropic import ANTHROPIC_OAUTH_TOKEN_PREFIX
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
original = AnthropicModelInfo.validate_environment
|
||||
|
||||
def _patched_validate_environment(
|
||||
self, headers, model, messages, optional_params, litellm_params, api_key=None, api_base=None
|
||||
):
|
||||
result = original(
|
||||
self,
|
||||
headers,
|
||||
model,
|
||||
messages,
|
||||
optional_params,
|
||||
litellm_params,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
auth = result.get("authorization", "")
|
||||
if auth.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
|
||||
result.pop("x-api-key", None)
|
||||
return result
|
||||
|
||||
AnthropicModelInfo.validate_environment = _patched_validate_environment
|
||||
|
||||
|
||||
if litellm is not None:
|
||||
_patch_litellm_anthropic_oauth()
|
||||
|
||||
RATE_LIMIT_MAX_RETRIES = 10
|
||||
RATE_LIMIT_BACKOFF_BASE = 2 # seconds
|
||||
RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits
|
||||
|
||||
@@ -36,7 +36,11 @@ from framework.graph import ( # noqa: E402
|
||||
NodeSpec,
|
||||
SuccessCriterion,
|
||||
)
|
||||
from framework.testing.prompts import PYTEST_TEST_FILE_HEADER # noqa: E402
|
||||
|
||||
# Testing framework imports
|
||||
from framework.testing.prompts import ( # noqa: E402
|
||||
PYTEST_TEST_FILE_HEADER,
|
||||
)
|
||||
from framework.utils.io import atomic_write # noqa: E402
|
||||
|
||||
# Initialize MCP server
|
||||
|
||||
+159
-48
@@ -75,6 +75,11 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
default=None,
|
||||
help="Resume from a specific checkpoint (requires --resume-session)",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--no-guardian",
|
||||
action="store_true",
|
||||
help="Disable the Agent Guardian watchdog in TUI mode",
|
||||
)
|
||||
run_parser.set_defaults(func=cmd_run)
|
||||
|
||||
# info command
|
||||
@@ -206,8 +211,28 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
default=None,
|
||||
help="LLM model to use (any LiteLLM-compatible name)",
|
||||
)
|
||||
tui_parser.add_argument(
|
||||
"--no-guardian",
|
||||
action="store_true",
|
||||
help="Disable the Agent Guardian watchdog",
|
||||
)
|
||||
tui_parser.set_defaults(func=cmd_tui)
|
||||
|
||||
# code command (Hive Coder — framework agent builder)
|
||||
code_parser = subparsers.add_parser(
|
||||
"code",
|
||||
help="Launch Hive Coder to build agents",
|
||||
description="Interactive agent builder. Describe what you want and Hive Coder builds it.",
|
||||
)
|
||||
code_parser.add_argument(
|
||||
"--model",
|
||||
"-m",
|
||||
type=str,
|
||||
default=None,
|
||||
help="LLM model to use (any LiteLLM-compatible name)",
|
||||
)
|
||||
code_parser.set_defaults(func=cmd_code)
|
||||
|
||||
# sessions command group (checkpoint/resume management)
|
||||
sessions_parser = subparsers.add_parser(
|
||||
"sessions",
|
||||
@@ -524,7 +549,17 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
|
||||
# Force setup inside the loop
|
||||
if runner._agent_runtime is None:
|
||||
runner._setup()
|
||||
try:
|
||||
runner._setup()
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
return
|
||||
|
||||
# Attach hive_coder's guardian watchdog (before start)
|
||||
if not getattr(args, "no_guardian", False) and runner._agent_runtime:
|
||||
from framework.agents.hive_coder.guardian import attach_guardian
|
||||
|
||||
attach_guardian(runner._agent_runtime, runner._tool_registry)
|
||||
|
||||
# Start runtime before TUI so it's ready for user input
|
||||
if runner._agent_runtime and not runner._agent_runtime.is_running:
|
||||
@@ -1343,60 +1378,26 @@ def cmd_shell(args: argparse.Namespace) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_tui(args: argparse.Namespace) -> int:
|
||||
"""Browse agents and launch the interactive TUI dashboard."""
|
||||
import logging
|
||||
def _get_framework_agents_dir() -> Path:
|
||||
"""Resolve the framework agents directory relative to this file."""
|
||||
return Path(__file__).resolve().parent.parent / "agents"
|
||||
|
||||
|
||||
def _launch_agent_tui(
|
||||
agent_path: str | Path,
|
||||
model: str | None = None,
|
||||
no_guardian: bool = False,
|
||||
) -> int:
|
||||
"""Load an agent and launch the TUI. Shared by cmd_tui and cmd_code."""
|
||||
from framework.credentials.models import CredentialError
|
||||
from framework.runner import AgentRunner
|
||||
from framework.tui.app import AdenTUI
|
||||
|
||||
logging.basicConfig(level=logging.WARNING, format="%(message)s")
|
||||
|
||||
exports_dir = Path("exports")
|
||||
examples_dir = Path("examples/templates")
|
||||
|
||||
has_exports = _has_agents(exports_dir)
|
||||
has_examples = _has_agents(examples_dir)
|
||||
|
||||
if not has_exports and not has_examples:
|
||||
print("No agents found in exports/ or examples/templates/", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Determine which directory to browse
|
||||
if has_exports and has_examples:
|
||||
print("\nAgent sources:\n")
|
||||
print(" 1. Your Agents (exports/)")
|
||||
print(" 2. Sample Agents (examples/templates/)")
|
||||
print()
|
||||
try:
|
||||
choice = input("Select source (number): ").strip()
|
||||
if choice == "1":
|
||||
agents_dir = exports_dir
|
||||
elif choice == "2":
|
||||
agents_dir = examples_dir
|
||||
else:
|
||||
print("Invalid selection")
|
||||
return 1
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
return 1
|
||||
elif has_exports:
|
||||
agents_dir = exports_dir
|
||||
else:
|
||||
agents_dir = examples_dir
|
||||
|
||||
# Let user pick an agent
|
||||
agent_path = _select_agent(agents_dir)
|
||||
if not agent_path:
|
||||
return 1
|
||||
|
||||
# Launch TUI (same pattern as cmd_run --tui)
|
||||
async def run_with_tui():
|
||||
try:
|
||||
runner = AgentRunner.load(
|
||||
agent_path,
|
||||
model=args.model,
|
||||
model=model,
|
||||
)
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
@@ -1416,7 +1417,7 @@ def cmd_tui(args: argparse.Namespace) -> int:
|
||||
if result.success:
|
||||
# Retry loading with credentials now configured
|
||||
try:
|
||||
runner = AgentRunner.load(agent_path, model=args.model)
|
||||
runner = AgentRunner.load(agent_path, model=model)
|
||||
except CredentialError as retry_e:
|
||||
print(f"\n{retry_e}", file=sys.stderr)
|
||||
return
|
||||
@@ -1434,7 +1435,17 @@ def cmd_tui(args: argparse.Namespace) -> int:
|
||||
return
|
||||
|
||||
if runner._agent_runtime is None:
|
||||
runner._setup()
|
||||
try:
|
||||
runner._setup()
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
return
|
||||
|
||||
# Attach hive_coder's guardian watchdog (before start)
|
||||
if not no_guardian and runner._agent_runtime:
|
||||
from framework.agents.hive_coder.guardian import attach_guardian
|
||||
|
||||
attach_guardian(runner._agent_runtime, runner._tool_registry)
|
||||
|
||||
if runner._agent_runtime and not runner._agent_runtime.is_running:
|
||||
await runner._agent_runtime.start()
|
||||
@@ -1455,6 +1466,106 @@ def cmd_tui(args: argparse.Namespace) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_tui(args: argparse.Namespace) -> int:
|
||||
"""Launch the interactive TUI dashboard with in-app agent picker."""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARNING, format="%(message)s")
|
||||
|
||||
from framework.tui.app import AdenTUI
|
||||
|
||||
async def run_tui():
|
||||
app = AdenTUI(
|
||||
model=args.model,
|
||||
no_guardian=getattr(args, "no_guardian", False),
|
||||
)
|
||||
await app.run_async()
|
||||
|
||||
asyncio.run(run_tui())
|
||||
print("TUI session ended.")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_code(args: argparse.Namespace) -> int:
|
||||
"""Launch Hive Coder with multi-graph support.
|
||||
|
||||
Unlike ``_launch_agent_tui``, this sets up graph lifecycle tools and
|
||||
assigns ``graph_id="hive_coder"`` so the coder can load, supervise,
|
||||
and restart secondary agent graphs within the same session.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARNING, format="%(message)s")
|
||||
|
||||
framework_agents_dir = _get_framework_agents_dir()
|
||||
hive_coder_path = framework_agents_dir / "hive_coder"
|
||||
|
||||
if not (hive_coder_path / "agent.py").exists():
|
||||
print("Error: Hive Coder agent not found.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Ensure framework agents dir is on sys.path for import
|
||||
fa_str = str(framework_agents_dir)
|
||||
if fa_str not in sys.path:
|
||||
sys.path.insert(0, fa_str)
|
||||
|
||||
from framework.credentials.models import CredentialError
|
||||
from framework.runner import AgentRunner
|
||||
from framework.tools.session_graph_tools import register_graph_tools
|
||||
from framework.tui.app import AdenTUI
|
||||
|
||||
async def run_with_tui():
|
||||
try:
|
||||
runner = AgentRunner.load(hive_coder_path, model=args.model)
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error loading agent: {e}")
|
||||
return
|
||||
|
||||
if runner._agent_runtime is None:
|
||||
try:
|
||||
runner._setup()
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
return
|
||||
|
||||
runtime = runner._agent_runtime
|
||||
|
||||
# -- Multi-graph setup --
|
||||
# Tag the primary graph so events carry graph_id="hive_coder"
|
||||
runtime._graph_id = "hive_coder"
|
||||
runtime._active_graph_id = "hive_coder"
|
||||
|
||||
# Register graph lifecycle tools (load_agent, unload_agent, etc.)
|
||||
register_graph_tools(runner._tool_registry, runtime)
|
||||
|
||||
# Refresh tool schemas AND executor so streams see the new tools.
|
||||
# The executor closure references the registry dict by ref, but
|
||||
# refreshing both is robust against any copy-on-read behavior.
|
||||
runtime._tools = list(runner._tool_registry.get_tools().values())
|
||||
runtime._tool_executor = runner._tool_registry.get_executor()
|
||||
|
||||
if not runtime.is_running:
|
||||
await runtime.start()
|
||||
|
||||
app = AdenTUI(runtime)
|
||||
try:
|
||||
await app.run_async()
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print(f"TUI error: {e}")
|
||||
|
||||
await runner.cleanup_async()
|
||||
|
||||
asyncio.run(run_with_tui())
|
||||
print("TUI session ended.")
|
||||
return 0
|
||||
|
||||
|
||||
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
|
||||
"""Extract name and description from a Python-based agent's config.py.
|
||||
|
||||
|
||||
@@ -71,9 +71,15 @@ class AgentOrchestrator:
|
||||
|
||||
# Auto-create LLM - LiteLLM auto-detects provider and API key from model name
|
||||
if self._llm is None:
|
||||
from framework.config import get_api_base, get_api_key, get_llm_extra_kwargs
|
||||
from framework.llm.litellm import LiteLLMProvider
|
||||
|
||||
self._llm = LiteLLMProvider(model=self._model)
|
||||
self._llm = LiteLLMProvider(
|
||||
model=self._model,
|
||||
api_key=get_api_key(),
|
||||
api_base=get_api_base(),
|
||||
**get_llm_extra_kwargs(),
|
||||
)
|
||||
|
||||
def register(
|
||||
self,
|
||||
|
||||
@@ -36,27 +36,130 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
|
||||
CLAUDE_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
|
||||
CLAUDE_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
|
||||
|
||||
# Buffer in seconds before token expiry to trigger a proactive refresh
|
||||
_TOKEN_REFRESH_BUFFER_SECS = 300 # 5 minutes
|
||||
|
||||
|
||||
def _refresh_claude_code_token(refresh_token: str) -> dict | None:
|
||||
"""Refresh the Claude Code OAuth token using the refresh token.
|
||||
|
||||
POSTs to the Anthropic OAuth token endpoint with form-urlencoded data
|
||||
(per OAuth 2.0 RFC 6749 Section 4.1.3).
|
||||
|
||||
Returns:
|
||||
Dict with new token data (access_token, refresh_token, expires_in)
|
||||
on success, None on failure.
|
||||
"""
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
data = urllib.parse.urlencode(
|
||||
{
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": CLAUDE_OAUTH_CLIENT_ID,
|
||||
}
|
||||
).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(
|
||||
CLAUDE_OAUTH_TOKEN_URL,
|
||||
data=data,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
return json.loads(resp.read())
|
||||
except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
|
||||
logger.debug("Claude Code token refresh failed: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def _save_refreshed_credentials(token_data: dict) -> None:
|
||||
"""Write refreshed token data back to ~/.claude/.credentials.json."""
|
||||
import time
|
||||
|
||||
if not CLAUDE_CREDENTIALS_FILE.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
with open(CLAUDE_CREDENTIALS_FILE) as f:
|
||||
creds = json.load(f)
|
||||
|
||||
oauth = creds.get("claudeAiOauth", {})
|
||||
oauth["accessToken"] = token_data["access_token"]
|
||||
if "refresh_token" in token_data:
|
||||
oauth["refreshToken"] = token_data["refresh_token"]
|
||||
if "expires_in" in token_data:
|
||||
oauth["expiresAt"] = int((time.time() + token_data["expires_in"]) * 1000)
|
||||
creds["claudeAiOauth"] = oauth
|
||||
|
||||
with open(CLAUDE_CREDENTIALS_FILE, "w") as f:
|
||||
json.dump(creds, f, indent=2)
|
||||
logger.debug("Claude Code credentials refreshed successfully")
|
||||
except (json.JSONDecodeError, OSError, KeyError) as exc:
|
||||
logger.debug("Failed to save refreshed credentials: %s", exc)
|
||||
|
||||
|
||||
def get_claude_code_token() -> str | None:
|
||||
"""
|
||||
Get the OAuth token from Claude Code subscription.
|
||||
"""Get the OAuth token from Claude Code subscription with auto-refresh.
|
||||
|
||||
Reads from ~/.claude/.credentials.json which is created by the
|
||||
Claude Code CLI when users authenticate with their subscription.
|
||||
|
||||
If the token is expired or close to expiry, attempts an automatic
|
||||
refresh using the stored refresh token.
|
||||
|
||||
Returns:
|
||||
The access token if available, None otherwise.
|
||||
"""
|
||||
import time
|
||||
|
||||
if not CLAUDE_CREDENTIALS_FILE.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(CLAUDE_CREDENTIALS_FILE) as f:
|
||||
creds = json.load(f)
|
||||
return creds.get("claudeAiOauth", {}).get("accessToken")
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
oauth = creds.get("claudeAiOauth", {})
|
||||
access_token = oauth.get("accessToken")
|
||||
if not access_token:
|
||||
return None
|
||||
|
||||
# Check token expiry (expiresAt is in milliseconds)
|
||||
expires_at_ms = oauth.get("expiresAt", 0)
|
||||
now_ms = int(time.time() * 1000)
|
||||
buffer_ms = _TOKEN_REFRESH_BUFFER_SECS * 1000
|
||||
|
||||
if expires_at_ms > now_ms + buffer_ms:
|
||||
# Token is still valid
|
||||
return access_token
|
||||
|
||||
# Token is expired or near expiry — attempt refresh
|
||||
refresh_token = oauth.get("refreshToken")
|
||||
if not refresh_token:
|
||||
logger.warning("Claude Code token expired and no refresh token available")
|
||||
return access_token # Return expired token; it may still work briefly
|
||||
|
||||
logger.info("Claude Code token expired or near expiry, refreshing...")
|
||||
token_data = _refresh_claude_code_token(refresh_token)
|
||||
|
||||
if token_data and "access_token" in token_data:
|
||||
_save_refreshed_credentials(token_data)
|
||||
return token_data["access_token"]
|
||||
|
||||
# Refresh failed — return the existing token and warn
|
||||
logger.warning("Claude Code token refresh failed. Run 'claude' to re-authenticate.")
|
||||
return access_token
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentInfo:
|
||||
@@ -587,6 +690,7 @@ class AgentRunner:
|
||||
config = get_hive_config()
|
||||
llm_config = config.get("llm", {})
|
||||
use_claude_code = llm_config.get("use_claude_code_subscription", False)
|
||||
api_base = llm_config.get("api_base")
|
||||
|
||||
api_key = None
|
||||
if use_claude_code:
|
||||
@@ -596,9 +700,16 @@ class AgentRunner:
|
||||
print("Warning: Claude Code subscription configured but no token found.")
|
||||
print("Run 'claude' to authenticate, then try again.")
|
||||
|
||||
if api_key:
|
||||
# Use Claude Code subscription token
|
||||
self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
|
||||
if api_key and use_claude_code:
|
||||
# Use litellm's built-in Anthropic OAuth support.
|
||||
# The lowercase "authorization" key triggers OAuth detection which
|
||||
# adds the required anthropic-beta and browser-access headers.
|
||||
self._llm = LiteLLMProvider(
|
||||
model=self.model,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
extra_headers={"authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
else:
|
||||
# Fall back to environment variable
|
||||
# First check api_key_env_var from config (set by quickstart)
|
||||
@@ -606,12 +717,18 @@ class AgentRunner:
|
||||
self.model
|
||||
)
|
||||
if api_key_env and os.environ.get(api_key_env):
|
||||
self._llm = LiteLLMProvider(model=self.model)
|
||||
self._llm = LiteLLMProvider(
|
||||
model=self.model,
|
||||
api_key=os.environ[api_key_env],
|
||||
api_base=api_base,
|
||||
)
|
||||
else:
|
||||
# Fall back to credential store
|
||||
api_key = self._get_api_key_from_credential_store()
|
||||
if api_key:
|
||||
self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
|
||||
self._llm = LiteLLMProvider(
|
||||
model=self.model, api_key=api_key, api_base=api_base
|
||||
)
|
||||
# Set env var so downstream code (e.g. cleanup LLM in
|
||||
# node._extract_json) can also find it
|
||||
if api_key_env:
|
||||
@@ -620,6 +737,20 @@ class AgentRunner:
|
||||
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
|
||||
print(f"Set it with: export {api_key_env}=your-api-key")
|
||||
|
||||
# Fail fast if the agent needs an LLM but none was configured
|
||||
if self._llm is None:
|
||||
has_llm_nodes = any(node.node_type == "event_loop" for node in self.graph.nodes)
|
||||
if has_llm_nodes:
|
||||
from framework.credentials.models import CredentialError
|
||||
|
||||
api_key_env = self._get_api_key_env_var(self.model)
|
||||
hint = (
|
||||
f"Set it with: export {api_key_env}=your-api-key"
|
||||
if api_key_env
|
||||
else "Configure an API key for your LLM provider."
|
||||
)
|
||||
raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
|
||||
|
||||
# Get tools for runtime
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
@@ -731,6 +862,19 @@ class AgentRunner:
|
||||
async_checkpoint=True, # Non-blocking
|
||||
)
|
||||
|
||||
# Handle runtime_config - ensure it's AgentRuntimeConfig, not RuntimeConfig
|
||||
# RuntimeConfig is for LLM settings; AgentRuntimeConfig is for AgentRuntime settings
|
||||
runtime_config = None
|
||||
if self.runtime_config is not None:
|
||||
from framework.config import RuntimeConfig
|
||||
|
||||
# If it's a RuntimeConfig (LLM config), don't pass it
|
||||
if isinstance(self.runtime_config, RuntimeConfig):
|
||||
runtime_config = None
|
||||
else:
|
||||
# It's already an AgentRuntimeConfig or compatible type
|
||||
runtime_config = self.runtime_config
|
||||
|
||||
self._agent_runtime = create_agent_runtime(
|
||||
graph=self.graph,
|
||||
goal=self.goal,
|
||||
@@ -741,7 +885,8 @@ class AgentRunner:
|
||||
tool_executor=tool_executor,
|
||||
runtime_log_store=log_store,
|
||||
checkpoint_config=checkpoint_config,
|
||||
config=self.runtime_config,
|
||||
config=runtime_config,
|
||||
graph_id=self.graph.id or self.agent_path.name,
|
||||
)
|
||||
|
||||
# Pass intro_message through for TUI display
|
||||
@@ -1309,6 +1454,61 @@ Respond with JSON only:
|
||||
type=MessageType.RESPONSE,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def setup_as_secondary(
|
||||
cls,
|
||||
agent_path: str | Path,
|
||||
runtime: AgentRuntime,
|
||||
graph_id: str | None = None,
|
||||
) -> str:
|
||||
"""Load an agent and register it as a secondary graph on *runtime*.
|
||||
|
||||
Uses :meth:`AgentRunner.load` to parse the agent, then calls
|
||||
:meth:`AgentRuntime.add_graph` with the extracted graph, goal,
|
||||
and entry points.
|
||||
|
||||
Args:
|
||||
agent_path: Path to the agent directory
|
||||
runtime: The running AgentRuntime to attach to
|
||||
graph_id: Optional graph identifier (defaults to directory name)
|
||||
|
||||
Returns:
|
||||
The graph_id used for registration
|
||||
"""
|
||||
agent_path = Path(agent_path)
|
||||
runner = cls.load(agent_path)
|
||||
gid = graph_id or agent_path.name
|
||||
|
||||
# Build entry points
|
||||
entry_points: dict[str, EntryPointSpec] = {}
|
||||
if runner.graph.entry_node:
|
||||
entry_points["default"] = EntryPointSpec(
|
||||
id="default",
|
||||
name="Default",
|
||||
entry_node=runner.graph.entry_node,
|
||||
trigger_type="manual",
|
||||
isolation_level="shared",
|
||||
)
|
||||
for aep in runner.graph.async_entry_points:
|
||||
entry_points[aep.id] = EntryPointSpec(
|
||||
id=aep.id,
|
||||
name=aep.name,
|
||||
entry_node=aep.entry_node,
|
||||
trigger_type=aep.trigger_type,
|
||||
trigger_config=aep.trigger_config,
|
||||
isolation_level=aep.isolation_level,
|
||||
priority=aep.priority,
|
||||
max_concurrent=aep.max_concurrent,
|
||||
)
|
||||
|
||||
await runtime.add_graph(
|
||||
graph_id=gid,
|
||||
graph=runner.graph,
|
||||
goal=runner.goal,
|
||||
entry_points=entry_points,
|
||||
)
|
||||
return gid
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Clean up resources (synchronous)."""
|
||||
# Clean up MCP client connections
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Tool discovery and registration for agent runner."""
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import importlib.util
|
||||
import inspect
|
||||
@@ -224,8 +225,19 @@ class ToolRegistry:
|
||||
Get unified tool executor function.
|
||||
|
||||
Returns a function that dispatches to the appropriate tool executor.
|
||||
Handles both sync and async tool implementations — async results are
|
||||
wrapped so that ``EventLoopNode._execute_tool`` can await them.
|
||||
"""
|
||||
|
||||
def _wrap_result(tool_use_id: str, result: Any) -> ToolResult:
|
||||
if isinstance(result, ToolResult):
|
||||
return result
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use_id,
|
||||
content=json.dumps(result) if not isinstance(result, str) else result,
|
||||
is_error=False,
|
||||
)
|
||||
|
||||
def executor(tool_use: ToolUse) -> ToolResult:
|
||||
if tool_use.name not in self._tools:
|
||||
return ToolResult(
|
||||
@@ -237,13 +249,24 @@ class ToolRegistry:
|
||||
registered = self._tools[tool_use.name]
|
||||
try:
|
||||
result = registered.executor(tool_use.input)
|
||||
if isinstance(result, ToolResult):
|
||||
return result
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
content=json.dumps(result) if not isinstance(result, str) else result,
|
||||
is_error=False,
|
||||
)
|
||||
|
||||
# Async tool: wrap the awaitable so the caller can await it
|
||||
if asyncio.iscoroutine(result) or asyncio.isfuture(result):
|
||||
|
||||
async def _await_and_wrap():
|
||||
try:
|
||||
r = await result
|
||||
return _wrap_result(tool_use.id, r)
|
||||
except Exception as exc:
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
content=json.dumps({"error": str(exc)}),
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
return _await_and_wrap()
|
||||
|
||||
return _wrap_result(tool_use.id, result)
|
||||
except Exception as e:
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
|
||||
@@ -10,6 +10,7 @@ import logging
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -47,6 +48,20 @@ class AgentRuntimeConfig:
|
||||
# Each dict: {"source_id": str, "path": str, "methods": ["POST"], "secret": str|None}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _GraphRegistration:
|
||||
"""Tracks a loaded graph and its runtime resources."""
|
||||
|
||||
graph: "GraphSpec"
|
||||
goal: "Goal"
|
||||
entry_points: dict[str, EntryPointSpec]
|
||||
streams: dict[str, ExecutionStream] # ep_id -> stream (NOT namespaced)
|
||||
storage_subpath: str # relative to session root, e.g. "graphs/email_agent"
|
||||
event_subscriptions: list[str] = field(default_factory=list)
|
||||
timer_tasks: list[asyncio.Task] = field(default_factory=list)
|
||||
timer_next_fire: dict[str, float] = field(default_factory=dict)
|
||||
|
||||
|
||||
class AgentRuntime:
|
||||
"""
|
||||
Top-level runtime that manages agent lifecycle and concurrent executions.
|
||||
@@ -110,6 +125,7 @@ class AgentRuntime:
|
||||
config: AgentRuntimeConfig | None = None,
|
||||
runtime_log_store: Any = None,
|
||||
checkpoint_config: CheckpointConfig | None = None,
|
||||
graph_id: str | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize agent runtime.
|
||||
@@ -124,6 +140,7 @@ class AgentRuntime:
|
||||
config: Optional runtime configuration
|
||||
runtime_log_store: Optional RuntimeLogStore for per-execution logging
|
||||
checkpoint_config: Optional checkpoint configuration for resumable sessions
|
||||
graph_id: Optional identifier for the primary graph (defaults to "primary")
|
||||
"""
|
||||
self.graph = graph
|
||||
self.goal = goal
|
||||
@@ -131,6 +148,16 @@ class AgentRuntime:
|
||||
self._runtime_log_store = runtime_log_store
|
||||
self._checkpoint_config = checkpoint_config
|
||||
|
||||
# Primary graph identity
|
||||
self._graph_id: str = graph_id or "primary"
|
||||
|
||||
# Multi-graph state
|
||||
self._graphs: dict[str, _GraphRegistration] = {}
|
||||
self._active_graph_id: str = self._graph_id
|
||||
|
||||
# User presence tracking (monotonic timestamp of last inject_input)
|
||||
self._last_user_input_time: float = 0.0
|
||||
|
||||
# Initialize storage
|
||||
storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path
|
||||
self._storage = ConcurrentStorage(
|
||||
@@ -152,15 +179,15 @@ class AgentRuntime:
|
||||
self._tools = tools or []
|
||||
self._tool_executor = tool_executor
|
||||
|
||||
# Entry points and streams
|
||||
# Entry points and streams (primary graph)
|
||||
self._entry_points: dict[str, EntryPointSpec] = {}
|
||||
self._streams: dict[str, ExecutionStream] = {}
|
||||
|
||||
# Webhook server (created on start if webhook_routes configured)
|
||||
self._webhook_server: Any = None
|
||||
# Event-driven entry point subscriptions
|
||||
# Event-driven entry point subscriptions (primary graph)
|
||||
self._event_subscriptions: list[str] = []
|
||||
# Timer tasks for scheduled entry points
|
||||
# Timer tasks for scheduled entry points (primary graph)
|
||||
self._timer_tasks: list[asyncio.Task] = []
|
||||
# Next fire time for each timer entry point (ep_id -> datetime)
|
||||
self._timer_next_fire: dict[str, float] = {}
|
||||
@@ -245,6 +272,7 @@ class AgentRuntime:
|
||||
runtime_log_store=self._runtime_log_store,
|
||||
session_store=self._session_store,
|
||||
checkpoint_config=self._checkpoint_config,
|
||||
graph_id=self._graph_id,
|
||||
)
|
||||
await stream.start()
|
||||
self._streams[ep_id] = stream
|
||||
@@ -290,29 +318,38 @@ class AgentRuntime:
|
||||
)
|
||||
continue
|
||||
|
||||
# Capture ep_id in closure
|
||||
def _make_handler(entry_point_id: str):
|
||||
# Capture ep_id and config in closure
|
||||
exclude_own = tc.get("exclude_own_graph", False)
|
||||
|
||||
def _make_handler(entry_point_id: str, _exclude_own: bool):
|
||||
async def _on_event(event):
|
||||
if self._running and entry_point_id in self._streams:
|
||||
# Run in the same session as the primary entry
|
||||
# point so memory (e.g. user-defined rules) is
|
||||
# shared and logs land in one session directory.
|
||||
session_state = self._get_primary_session_state(
|
||||
exclude_entry_point=entry_point_id
|
||||
)
|
||||
await self.trigger(
|
||||
entry_point_id,
|
||||
{"event": event.to_dict()},
|
||||
session_state=session_state,
|
||||
)
|
||||
if not self._running or entry_point_id not in self._streams:
|
||||
return
|
||||
# Skip events originating from this graph's own
|
||||
# executions (e.g. guardian should not fire on
|
||||
# hive_coder failures — only secondary graphs).
|
||||
if _exclude_own and event.graph_id == self._graph_id:
|
||||
return
|
||||
# Run in the same session as the primary entry
|
||||
# point so memory (e.g. user-defined rules) is
|
||||
# shared and logs land in one session directory.
|
||||
session_state = self._get_primary_session_state(
|
||||
exclude_entry_point=entry_point_id
|
||||
)
|
||||
await self.trigger(
|
||||
entry_point_id,
|
||||
{"event": event.to_dict()},
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
return _on_event
|
||||
|
||||
sub_id = self._event_bus.subscribe(
|
||||
event_types=event_types,
|
||||
handler=_make_handler(ep_id),
|
||||
handler=_make_handler(ep_id, exclude_own),
|
||||
filter_stream=tc.get("filter_stream"),
|
||||
filter_node=tc.get("filter_node"),
|
||||
filter_graph=tc.get("filter_graph"),
|
||||
)
|
||||
self._event_subscriptions.append(sub_id)
|
||||
|
||||
@@ -322,37 +359,111 @@ class AgentRuntime:
|
||||
continue
|
||||
|
||||
tc = spec.trigger_config
|
||||
cron_expr = tc.get("cron")
|
||||
interval = tc.get("interval_minutes")
|
||||
if not interval or interval <= 0:
|
||||
logger.warning(
|
||||
f"Entry point '{ep_id}' has trigger_type='timer' "
|
||||
"but no valid interval_minutes in trigger_config"
|
||||
)
|
||||
continue
|
||||
|
||||
run_immediately = tc.get("run_immediately", False)
|
||||
|
||||
def _make_timer(entry_point_id: str, mins: float, immediate: bool):
|
||||
async def _timer_loop():
|
||||
interval_secs = mins * 60
|
||||
if not immediate:
|
||||
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
|
||||
await asyncio.sleep(interval_secs)
|
||||
while self._running:
|
||||
self._timer_next_fire.pop(entry_point_id, None)
|
||||
try:
|
||||
if self._should_skip_timer(entry_point_id):
|
||||
logger.info(
|
||||
"Timer '%s' skipped — primary stream busy",
|
||||
entry_point_id,
|
||||
)
|
||||
else:
|
||||
if cron_expr:
|
||||
# Cron expression mode — takes priority over interval_minutes
|
||||
try:
|
||||
from croniter import croniter
|
||||
|
||||
# Validate the expression upfront
|
||||
if not croniter.is_valid(cron_expr):
|
||||
raise ValueError(f"Invalid cron expression: {cron_expr}")
|
||||
except (ImportError, ValueError) as e:
|
||||
logger.warning(
|
||||
"Entry point '%s' has invalid cron config: %s",
|
||||
ep_id,
|
||||
e,
|
||||
)
|
||||
continue
|
||||
|
||||
def _make_cron_timer(entry_point_id: str, expr: str, immediate: bool):
|
||||
async def _cron_loop():
|
||||
from croniter import croniter
|
||||
|
||||
if not immediate:
|
||||
cron = croniter(expr, datetime.now())
|
||||
next_dt = cron.get_next(datetime)
|
||||
sleep_secs = (next_dt - datetime.now()).total_seconds()
|
||||
self._timer_next_fire[entry_point_id] = (
|
||||
time.monotonic() + sleep_secs
|
||||
)
|
||||
await asyncio.sleep(max(0, sleep_secs))
|
||||
while self._running:
|
||||
self._timer_next_fire.pop(entry_point_id, None)
|
||||
try:
|
||||
session_state = self._get_primary_session_state(
|
||||
exclude_entry_point=entry_point_id
|
||||
)
|
||||
await self.trigger(
|
||||
entry_point_id,
|
||||
{"event": {"source": "timer", "reason": "scheduled"}},
|
||||
{
|
||||
"event": {
|
||||
"source": "timer",
|
||||
"reason": "scheduled",
|
||||
}
|
||||
},
|
||||
session_state=session_state,
|
||||
)
|
||||
logger.info(
|
||||
"Cron fired for entry point '%s' (expr: %s)",
|
||||
entry_point_id,
|
||||
expr,
|
||||
)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Cron trigger failed for '%s'",
|
||||
entry_point_id,
|
||||
exc_info=True,
|
||||
)
|
||||
# Calculate next fire from now
|
||||
cron = croniter(expr, datetime.now())
|
||||
next_dt = cron.get_next(datetime)
|
||||
sleep_secs = (next_dt - datetime.now()).total_seconds()
|
||||
self._timer_next_fire[entry_point_id] = (
|
||||
time.monotonic() + sleep_secs
|
||||
)
|
||||
await asyncio.sleep(max(0, sleep_secs))
|
||||
|
||||
return _cron_loop
|
||||
|
||||
task = asyncio.create_task(
|
||||
_make_cron_timer(ep_id, cron_expr, run_immediately)()
|
||||
)
|
||||
self._timer_tasks.append(task)
|
||||
logger.info(
|
||||
"Started cron timer for entry point '%s' with expression '%s'%s",
|
||||
ep_id,
|
||||
cron_expr,
|
||||
" (immediate first run)" if run_immediately else "",
|
||||
)
|
||||
|
||||
elif interval and interval > 0:
|
||||
# Fixed interval mode (original behavior)
|
||||
def _make_timer(entry_point_id: str, mins: float, immediate: bool):
|
||||
async def _timer_loop():
|
||||
interval_secs = mins * 60
|
||||
if not immediate:
|
||||
self._timer_next_fire[entry_point_id] = (
|
||||
time.monotonic() + interval_secs
|
||||
)
|
||||
await asyncio.sleep(interval_secs)
|
||||
while self._running:
|
||||
self._timer_next_fire.pop(entry_point_id, None)
|
||||
try:
|
||||
session_state = self._get_primary_session_state(
|
||||
exclude_entry_point=entry_point_id
|
||||
)
|
||||
await self.trigger(
|
||||
entry_point_id,
|
||||
{
|
||||
"event": {
|
||||
"source": "timer",
|
||||
"reason": "scheduled",
|
||||
}
|
||||
},
|
||||
session_state=session_state,
|
||||
)
|
||||
logger.info(
|
||||
@@ -360,25 +471,46 @@ class AgentRuntime:
|
||||
entry_point_id,
|
||||
mins,
|
||||
)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Timer trigger failed for '%s'",
|
||||
entry_point_id,
|
||||
exc_info=True,
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Timer trigger failed for '%s'",
|
||||
entry_point_id,
|
||||
exc_info=True,
|
||||
)
|
||||
self._timer_next_fire[entry_point_id] = (
|
||||
time.monotonic() + interval_secs
|
||||
)
|
||||
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
|
||||
await asyncio.sleep(interval_secs)
|
||||
await asyncio.sleep(interval_secs)
|
||||
|
||||
return _timer_loop
|
||||
return _timer_loop
|
||||
|
||||
task = asyncio.create_task(_make_timer(ep_id, interval, run_immediately)())
|
||||
self._timer_tasks.append(task)
|
||||
logger.info(
|
||||
"Started timer for entry point '%s' every %s min%s",
|
||||
ep_id,
|
||||
interval,
|
||||
" (immediate first run)" if run_immediately else "",
|
||||
)
|
||||
task = asyncio.create_task(_make_timer(ep_id, interval, run_immediately)())
|
||||
self._timer_tasks.append(task)
|
||||
logger.info(
|
||||
"Started timer for entry point '%s' every %s min%s",
|
||||
ep_id,
|
||||
interval,
|
||||
" (immediate first run)" if run_immediately else "",
|
||||
)
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
"Entry point '%s' has trigger_type='timer' "
|
||||
"but no 'cron' or valid 'interval_minutes' in trigger_config",
|
||||
ep_id,
|
||||
)
|
||||
|
||||
# Register primary graph
|
||||
self._graphs[self._graph_id] = _GraphRegistration(
|
||||
graph=self.graph,
|
||||
goal=self.goal,
|
||||
entry_points=dict(self._entry_points),
|
||||
streams=dict(self._streams),
|
||||
storage_subpath="",
|
||||
event_subscriptions=list(self._event_subscriptions),
|
||||
timer_tasks=list(self._timer_tasks),
|
||||
timer_next_fire=self._timer_next_fire,
|
||||
)
|
||||
|
||||
self._running = True
|
||||
logger.info(f"AgentRuntime started with {len(self._streams)} streams")
|
||||
@@ -389,12 +521,17 @@ class AgentRuntime:
|
||||
return
|
||||
|
||||
async with self._lock:
|
||||
# Cancel timer tasks
|
||||
# Stop secondary graphs first
|
||||
secondary_ids = [gid for gid in self._graphs if gid != self._graph_id]
|
||||
for gid in secondary_ids:
|
||||
await self._teardown_graph(gid)
|
||||
|
||||
# Cancel primary timer tasks
|
||||
for task in self._timer_tasks:
|
||||
task.cancel()
|
||||
self._timer_tasks.clear()
|
||||
|
||||
# Unsubscribe event-driven entry points
|
||||
# Unsubscribe primary event-driven entry points
|
||||
for sub_id in self._event_subscriptions:
|
||||
self._event_bus.unsubscribe(sub_id)
|
||||
self._event_subscriptions.clear()
|
||||
@@ -404,11 +541,12 @@ class AgentRuntime:
|
||||
await self._webhook_server.stop()
|
||||
self._webhook_server = None
|
||||
|
||||
# Stop all streams
|
||||
# Stop all primary streams
|
||||
for stream in self._streams.values():
|
||||
await stream.stop()
|
||||
|
||||
self._streams.clear()
|
||||
self._graphs.clear()
|
||||
|
||||
# Stop storage
|
||||
await self._storage.stop()
|
||||
@@ -475,24 +613,280 @@ class AgentRuntime:
|
||||
raise ValueError(f"Entry point '{entry_point_id}' not found")
|
||||
return await stream.wait_for_completion(exec_id, timeout)
|
||||
|
||||
def _should_skip_timer(self, timer_ep_id: str) -> bool:
|
||||
"""Return True if a non-timer stream is actively running (not waiting for input).
|
||||
# === MULTI-GRAPH MANAGEMENT ===
|
||||
|
||||
Timers should only fire when the primary stream is idle (blocked
|
||||
waiting for client input) or has no active execution. This prevents
|
||||
concurrent pipeline runs that would race on shared memory.
|
||||
async def add_graph(
|
||||
self,
|
||||
graph_id: str,
|
||||
graph: "GraphSpec",
|
||||
goal: "Goal",
|
||||
entry_points: dict[str, EntryPointSpec],
|
||||
storage_subpath: str | None = None,
|
||||
) -> None:
|
||||
"""Load a secondary graph into this runtime session.
|
||||
|
||||
Creates execution streams for the graph's entry points, sets up
|
||||
event/timer triggers, and registers the graph. Shares the same
|
||||
EventBus, state.json, and data directory as the primary graph.
|
||||
|
||||
Can be called while the runtime is running.
|
||||
|
||||
Args:
|
||||
graph_id: Unique identifier for the graph
|
||||
graph: Graph specification
|
||||
goal: Goal driving this graph's execution
|
||||
entry_points: Entry point specs (ep_id -> spec)
|
||||
storage_subpath: Relative path under session root for this
|
||||
graph's conversations/checkpoints. Defaults to
|
||||
``"graphs/{graph_id}"``.
|
||||
|
||||
Raises:
|
||||
ValueError: If graph_id already registered or entry node missing
|
||||
"""
|
||||
for ep_id, stream in self._streams.items():
|
||||
if ep_id == timer_ep_id:
|
||||
continue
|
||||
spec = self._entry_points.get(ep_id)
|
||||
if spec and spec.trigger_type == "timer":
|
||||
continue
|
||||
if stream.active_execution_ids and not stream.is_awaiting_input:
|
||||
return True
|
||||
return False
|
||||
if graph_id in self._graphs:
|
||||
raise ValueError(f"Graph '{graph_id}' already registered")
|
||||
|
||||
def _get_primary_session_state(self, exclude_entry_point: str) -> dict[str, Any] | None:
|
||||
subpath = storage_subpath or f"graphs/{graph_id}"
|
||||
|
||||
# Validate entry nodes exist in graph
|
||||
for _ep_id, spec in entry_points.items():
|
||||
if graph.get_node(spec.entry_node) is None:
|
||||
raise ValueError(f"Entry node '{spec.entry_node}' not found in graph '{graph_id}'")
|
||||
|
||||
# Create streams for each entry point
|
||||
streams: dict[str, ExecutionStream] = {}
|
||||
for ep_id, spec in entry_points.items():
|
||||
stream = ExecutionStream(
|
||||
stream_id=f"{graph_id}::{ep_id}",
|
||||
entry_spec=spec,
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=self._state_manager,
|
||||
storage=self._storage,
|
||||
outcome_aggregator=self._outcome_aggregator,
|
||||
event_bus=self._event_bus,
|
||||
llm=self._llm,
|
||||
tools=self._tools,
|
||||
tool_executor=self._tool_executor,
|
||||
result_retention_max=self._config.execution_result_max,
|
||||
result_retention_ttl_seconds=self._config.execution_result_ttl_seconds,
|
||||
runtime_log_store=self._runtime_log_store,
|
||||
session_store=self._session_store,
|
||||
checkpoint_config=self._checkpoint_config,
|
||||
graph_id=graph_id,
|
||||
)
|
||||
if self._running:
|
||||
await stream.start()
|
||||
streams[ep_id] = stream
|
||||
|
||||
# Set up event-driven subscriptions
|
||||
from framework.runtime.event_bus import EventType as _ET
|
||||
|
||||
event_subs: list[str] = []
|
||||
for ep_id, spec in entry_points.items():
|
||||
if spec.trigger_type != "event":
|
||||
continue
|
||||
tc = spec.trigger_config
|
||||
event_types = [_ET(et) for et in tc.get("event_types", [])]
|
||||
if not event_types:
|
||||
logger.warning(
|
||||
"Entry point '%s::%s' has trigger_type='event' "
|
||||
"but no event_types in trigger_config",
|
||||
graph_id,
|
||||
ep_id,
|
||||
)
|
||||
continue
|
||||
|
||||
namespaced_ep = f"{graph_id}::{ep_id}"
|
||||
exclude_own = tc.get("exclude_own_graph", False)
|
||||
|
||||
def _make_handler(entry_point_id: str, gid: str, _exclude_own: bool):
|
||||
async def _on_event(event):
|
||||
if not self._running or gid not in self._graphs:
|
||||
return
|
||||
# Skip events from this graph's own executions
|
||||
if _exclude_own and event.graph_id == gid:
|
||||
return
|
||||
reg = self._graphs[gid]
|
||||
local_ep = entry_point_id.split("::", 1)[-1]
|
||||
stream = reg.streams.get(local_ep)
|
||||
if stream is None:
|
||||
return
|
||||
session_state = self._get_primary_session_state(
|
||||
local_ep,
|
||||
source_graph_id=gid,
|
||||
)
|
||||
await stream.execute(
|
||||
{"event": event.to_dict()},
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
return _on_event
|
||||
|
||||
sub_id = self._event_bus.subscribe(
|
||||
event_types=event_types,
|
||||
handler=_make_handler(namespaced_ep, graph_id, exclude_own),
|
||||
filter_stream=tc.get("filter_stream"),
|
||||
filter_node=tc.get("filter_node"),
|
||||
filter_graph=tc.get("filter_graph"),
|
||||
)
|
||||
event_subs.append(sub_id)
|
||||
|
||||
# Set up timer-driven entry points
|
||||
timer_tasks: list[asyncio.Task] = []
|
||||
timer_next_fire: dict[str, float] = {}
|
||||
for ep_id, spec in entry_points.items():
|
||||
if spec.trigger_type != "timer":
|
||||
continue
|
||||
tc = spec.trigger_config
|
||||
interval = tc.get("interval_minutes")
|
||||
run_immediately = tc.get("run_immediately", False)
|
||||
|
||||
if interval and interval > 0 and self._running:
|
||||
|
||||
def _make_timer(gid: str, local_ep: str, mins: float, immediate: bool):
|
||||
async def _timer_loop():
|
||||
interval_secs = mins * 60
|
||||
if not immediate:
|
||||
timer_next_fire[local_ep] = time.monotonic() + interval_secs
|
||||
await asyncio.sleep(interval_secs)
|
||||
while self._running and gid in self._graphs:
|
||||
timer_next_fire.pop(local_ep, None)
|
||||
try:
|
||||
reg = self._graphs.get(gid)
|
||||
if not reg:
|
||||
break
|
||||
stream = reg.streams.get(local_ep)
|
||||
if not stream:
|
||||
break
|
||||
session_state = self._get_primary_session_state(
|
||||
local_ep, source_graph_id=gid
|
||||
)
|
||||
await stream.execute(
|
||||
{"event": {"source": "timer", "reason": "scheduled"}},
|
||||
session_state=session_state,
|
||||
)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Timer trigger failed for '%s::%s'",
|
||||
gid,
|
||||
local_ep,
|
||||
exc_info=True,
|
||||
)
|
||||
timer_next_fire[local_ep] = time.monotonic() + interval_secs
|
||||
await asyncio.sleep(interval_secs)
|
||||
|
||||
return _timer_loop
|
||||
|
||||
task = asyncio.create_task(
|
||||
_make_timer(graph_id, ep_id, interval, run_immediately)()
|
||||
)
|
||||
timer_tasks.append(task)
|
||||
|
||||
self._graphs[graph_id] = _GraphRegistration(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
entry_points=entry_points,
|
||||
streams=streams,
|
||||
storage_subpath=subpath,
|
||||
event_subscriptions=event_subs,
|
||||
timer_tasks=timer_tasks,
|
||||
timer_next_fire=timer_next_fire,
|
||||
)
|
||||
logger.info(
|
||||
"Added graph '%s' with %d entry points (%d streams)",
|
||||
graph_id,
|
||||
len(entry_points),
|
||||
len(streams),
|
||||
)
|
||||
|
||||
async def remove_graph(self, graph_id: str) -> None:
|
||||
"""Remove a secondary graph from this runtime session.
|
||||
|
||||
Stops all streams, cancels timers, unsubscribes events, and
|
||||
removes the registration. Cannot remove the primary graph.
|
||||
|
||||
Args:
|
||||
graph_id: Graph to remove
|
||||
|
||||
Raises:
|
||||
ValueError: If graph_id is the primary graph or not found
|
||||
"""
|
||||
if graph_id == self._graph_id:
|
||||
raise ValueError("Cannot remove the primary graph")
|
||||
if graph_id not in self._graphs:
|
||||
raise ValueError(f"Graph '{graph_id}' not found")
|
||||
await self._teardown_graph(graph_id)
|
||||
logger.info("Removed graph '%s'", graph_id)
|
||||
|
||||
async def _teardown_graph(self, graph_id: str) -> None:
|
||||
"""Internal: stop and clean up all resources for a graph."""
|
||||
reg = self._graphs.pop(graph_id, None)
|
||||
if reg is None:
|
||||
return
|
||||
|
||||
# Cancel timers
|
||||
for task in reg.timer_tasks:
|
||||
task.cancel()
|
||||
|
||||
# Unsubscribe events
|
||||
for sub_id in reg.event_subscriptions:
|
||||
self._event_bus.unsubscribe(sub_id)
|
||||
|
||||
# Stop streams
|
||||
for stream in reg.streams.values():
|
||||
await stream.stop()
|
||||
|
||||
# Reset active graph if it was the removed one
|
||||
if self._active_graph_id == graph_id:
|
||||
self._active_graph_id = self._graph_id
|
||||
|
||||
def list_graphs(self) -> list[str]:
|
||||
"""Return all registered graph IDs (primary first)."""
|
||||
result = []
|
||||
if self._graph_id in self._graphs:
|
||||
result.append(self._graph_id)
|
||||
for gid in self._graphs:
|
||||
if gid != self._graph_id:
|
||||
result.append(gid)
|
||||
return result
|
||||
|
||||
@property
|
||||
def graph_id(self) -> str:
|
||||
"""The primary graph's ID."""
|
||||
return self._graph_id
|
||||
|
||||
@property
|
||||
def active_graph_id(self) -> str:
|
||||
"""The currently focused graph (for TUI routing)."""
|
||||
return self._active_graph_id
|
||||
|
||||
@active_graph_id.setter
|
||||
def active_graph_id(self, value: str) -> None:
|
||||
if value not in self._graphs:
|
||||
raise ValueError(f"Graph '{value}' not registered")
|
||||
self._active_graph_id = value
|
||||
|
||||
@property
|
||||
def user_idle_seconds(self) -> float:
|
||||
"""Seconds since the user last provided input.
|
||||
|
||||
Returns ``float('inf')`` if no input has been received yet.
|
||||
"""
|
||||
if self._last_user_input_time == 0.0:
|
||||
return float("inf")
|
||||
return time.monotonic() - self._last_user_input_time
|
||||
|
||||
def get_graph_registration(self, graph_id: str) -> _GraphRegistration | None:
|
||||
"""Get the registration for a specific graph (or None)."""
|
||||
return self._graphs.get(graph_id)
|
||||
|
||||
def _get_primary_session_state(
|
||||
self,
|
||||
exclude_entry_point: str,
|
||||
*,
|
||||
source_graph_id: str | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Build session_state so an async entry point runs in the primary session.
|
||||
|
||||
Looks for an active execution from another stream (the "primary"
|
||||
@@ -509,6 +903,15 @@ class AgentRuntime:
|
||||
which is kept up-to-date by ``GraphExecutor._write_progress()``
|
||||
at every node transition.
|
||||
|
||||
Searches across ALL graphs' streams (primary + secondary) so
|
||||
event-driven entry points on secondary graphs can share the
|
||||
primary session.
|
||||
|
||||
Args:
|
||||
exclude_entry_point: Entry point ID to skip (the one being triggered)
|
||||
source_graph_id: Graph the exclude_entry_point belongs to (for
|
||||
resolving the entry node spec). Defaults to primary graph.
|
||||
|
||||
Returns ``None`` if no primary session is active (the webhook
|
||||
execution will just create its own session).
|
||||
"""
|
||||
@@ -516,13 +919,27 @@ class AgentRuntime:
|
||||
|
||||
# Determine which memory keys the async entry node needs.
|
||||
allowed_keys: set[str] | None = None
|
||||
ep_spec = self._entry_points.get(exclude_entry_point)
|
||||
# Look up the entry node from the correct graph
|
||||
src_graph_id = source_graph_id or self._graph_id
|
||||
src_reg = self._graphs.get(src_graph_id)
|
||||
ep_spec = (
|
||||
src_reg.entry_points.get(exclude_entry_point)
|
||||
if src_reg
|
||||
else self._entry_points.get(exclude_entry_point)
|
||||
)
|
||||
if ep_spec:
|
||||
entry_node = self.graph.get_node(ep_spec.entry_node)
|
||||
graph = src_reg.graph if src_reg else self.graph
|
||||
entry_node = graph.get_node(ep_spec.entry_node)
|
||||
if entry_node and entry_node.input_keys:
|
||||
allowed_keys = set(entry_node.input_keys)
|
||||
|
||||
for ep_id, stream in self._streams.items():
|
||||
# Search ALL graphs' streams for an active session
|
||||
all_streams: list[tuple[str, ExecutionStream]] = []
|
||||
for _gid, reg in self._graphs.items():
|
||||
for ep_id, stream in reg.streams.items():
|
||||
all_streams.append((ep_id, stream))
|
||||
|
||||
for ep_id, stream in all_streams:
|
||||
if ep_id == exclude_entry_point:
|
||||
continue
|
||||
for exec_id in stream.active_execution_ids:
|
||||
@@ -552,23 +969,37 @@ class AgentRuntime:
|
||||
)
|
||||
return None
|
||||
|
||||
async def inject_input(self, node_id: str, content: str) -> bool:
|
||||
async def inject_input(self, node_id: str, content: str, graph_id: str | None = None) -> bool:
|
||||
"""Inject user input into a running client-facing node.
|
||||
|
||||
Routes input to the EventLoopNode identified by ``node_id``
|
||||
across all active streams. Used by the TUI ChatRepl to deliver
|
||||
user responses during client-facing node execution.
|
||||
Routes input to the EventLoopNode identified by ``node_id``.
|
||||
Searches the specified graph (or active graph) first, then all others.
|
||||
|
||||
Args:
|
||||
node_id: The node currently waiting for input
|
||||
content: The user's input text
|
||||
graph_id: Optional graph to search first (defaults to active graph)
|
||||
|
||||
Returns:
|
||||
True if input was delivered, False if no matching node found
|
||||
"""
|
||||
for stream in self._streams.values():
|
||||
if await stream.inject_input(node_id, content):
|
||||
return True
|
||||
# Track user presence
|
||||
self._last_user_input_time = time.monotonic()
|
||||
|
||||
# Search target graph first
|
||||
target = graph_id or self._active_graph_id
|
||||
if target in self._graphs:
|
||||
for stream in self._graphs[target].streams.values():
|
||||
if await stream.inject_input(node_id, content):
|
||||
return True
|
||||
|
||||
# Then search all other graphs
|
||||
for gid, reg in self._graphs.items():
|
||||
if gid == target:
|
||||
continue
|
||||
for stream in reg.streams.values():
|
||||
if await stream.inject_input(node_id, content):
|
||||
return True
|
||||
return False
|
||||
|
||||
async def get_goal_progress(self) -> dict[str, Any]:
|
||||
@@ -629,6 +1060,7 @@ class AgentRuntime:
|
||||
event_types: list,
|
||||
handler: Callable,
|
||||
filter_stream: str | None = None,
|
||||
filter_graph: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Subscribe to agent events.
|
||||
@@ -637,6 +1069,7 @@ class AgentRuntime:
|
||||
event_types: Types of events to receive
|
||||
handler: Async function to call when event occurs
|
||||
filter_stream: Only receive events from this stream
|
||||
filter_graph: Only receive events from this graph
|
||||
|
||||
Returns:
|
||||
Subscription ID (use to unsubscribe)
|
||||
@@ -645,6 +1078,7 @@ class AgentRuntime:
|
||||
event_types=event_types,
|
||||
handler=handler,
|
||||
filter_stream=filter_stream,
|
||||
filter_graph=filter_graph,
|
||||
)
|
||||
|
||||
def unsubscribe_from_events(self, subscription_id: str) -> bool:
|
||||
@@ -712,6 +1146,7 @@ def create_agent_runtime(
|
||||
runtime_log_store: Any = None,
|
||||
enable_logging: bool = True,
|
||||
checkpoint_config: CheckpointConfig | None = None,
|
||||
graph_id: str | None = None,
|
||||
) -> AgentRuntime:
|
||||
"""
|
||||
Create and configure an AgentRuntime with entry points.
|
||||
@@ -734,6 +1169,7 @@ def create_agent_runtime(
|
||||
Set to False to disable logging entirely.
|
||||
checkpoint_config: Optional checkpoint configuration for resumable sessions.
|
||||
If None, uses default checkpointing behavior.
|
||||
graph_id: Optional identifier for the primary graph (defaults to "primary").
|
||||
|
||||
Returns:
|
||||
Configured AgentRuntime (not yet started)
|
||||
@@ -755,6 +1191,7 @@ def create_agent_runtime(
|
||||
config=config,
|
||||
runtime_log_store=runtime_log_store,
|
||||
checkpoint_config=checkpoint_config,
|
||||
graph_id=graph_id,
|
||||
)
|
||||
|
||||
for spec in entry_points:
|
||||
|
||||
@@ -83,6 +83,9 @@ class EventType(StrEnum):
|
||||
# Custom events
|
||||
CUSTOM = "custom"
|
||||
|
||||
# Escalation (agent requests handoff to hive_coder)
|
||||
ESCALATION_REQUESTED = "escalation_requested"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentEvent:
|
||||
@@ -95,6 +98,7 @@ class AgentEvent:
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
correlation_id: str | None = None # For tracking related events
|
||||
graph_id: str | None = None # Which graph emitted this event (multi-graph sessions)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for serialization."""
|
||||
@@ -106,6 +110,7 @@ class AgentEvent:
|
||||
"data": self.data,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"correlation_id": self.correlation_id,
|
||||
"graph_id": self.graph_id,
|
||||
}
|
||||
|
||||
|
||||
@@ -123,6 +128,7 @@ class Subscription:
|
||||
filter_stream: str | None = None # Only receive events from this stream
|
||||
filter_node: str | None = None # Only receive events from this node
|
||||
filter_execution: str | None = None # Only receive events from this execution
|
||||
filter_graph: str | None = None # Only receive events from this graph
|
||||
|
||||
|
||||
class EventBus:
|
||||
@@ -182,6 +188,7 @@ class EventBus:
|
||||
filter_stream: str | None = None,
|
||||
filter_node: str | None = None,
|
||||
filter_execution: str | None = None,
|
||||
filter_graph: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Subscribe to events.
|
||||
@@ -192,6 +199,7 @@ class EventBus:
|
||||
filter_stream: Only receive events from this stream
|
||||
filter_node: Only receive events from this node
|
||||
filter_execution: Only receive events from this execution
|
||||
filter_graph: Only receive events from this graph
|
||||
|
||||
Returns:
|
||||
Subscription ID (use to unsubscribe)
|
||||
@@ -206,6 +214,7 @@ class EventBus:
|
||||
filter_stream=filter_stream,
|
||||
filter_node=filter_node,
|
||||
filter_execution=filter_execution,
|
||||
filter_graph=filter_graph,
|
||||
)
|
||||
|
||||
self._subscriptions[sub_id] = subscription
|
||||
@@ -271,6 +280,10 @@ class EventBus:
|
||||
if subscription.filter_execution and subscription.filter_execution != event.execution_id:
|
||||
return False
|
||||
|
||||
# Check graph filter
|
||||
if subscription.filter_graph and subscription.filter_graph != event.graph_id:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def _execute_handlers(
|
||||
@@ -820,6 +833,25 @@ class EventBus:
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_escalation_requested(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
reason: str = "",
|
||||
context: str = "",
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit escalation requested event (agent wants hive_coder)."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.ESCALATION_REQUESTED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"reason": reason, "context": context},
|
||||
)
|
||||
)
|
||||
|
||||
# === QUERY OPERATIONS ===
|
||||
|
||||
def get_history(
|
||||
@@ -873,6 +905,7 @@ class EventBus:
|
||||
stream_id: str | None = None,
|
||||
node_id: str | None = None,
|
||||
execution_id: str | None = None,
|
||||
graph_id: str | None = None,
|
||||
timeout: float | None = None,
|
||||
) -> AgentEvent | None:
|
||||
"""
|
||||
@@ -883,6 +916,7 @@ class EventBus:
|
||||
stream_id: Filter by stream
|
||||
node_id: Filter by node
|
||||
execution_id: Filter by execution
|
||||
graph_id: Filter by graph
|
||||
timeout: Maximum time to wait (seconds)
|
||||
|
||||
Returns:
|
||||
@@ -903,6 +937,7 @@ class EventBus:
|
||||
filter_stream=stream_id,
|
||||
filter_node=node_id,
|
||||
filter_execution=execution_id,
|
||||
filter_graph=graph_id,
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -26,7 +26,7 @@ if TYPE_CHECKING:
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.goal import Goal
|
||||
from framework.llm.provider import LLMProvider, Tool
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.event_bus import AgentEvent, EventBus
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
from framework.storage.session_store import SessionStore
|
||||
@@ -34,6 +34,31 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _GraphScopedEventBus:
|
||||
"""Thin proxy that stamps ``graph_id`` on every published event.
|
||||
|
||||
The ``GraphExecutor`` and ``EventLoopNode`` emit events via the
|
||||
convenience methods on ``EventBus`` (e.g. ``emit_llm_text_delta``).
|
||||
Rather than threading ``graph_id`` through every one of those 20+
|
||||
methods, this proxy intercepts ``publish()`` and sets ``graph_id``
|
||||
before forwarding to the real bus. All other attribute access is
|
||||
delegated unchanged.
|
||||
"""
|
||||
|
||||
__slots__ = ("_bus", "_graph_id")
|
||||
|
||||
def __init__(self, bus: "EventBus", graph_id: str) -> None:
|
||||
object.__setattr__(self, "_bus", bus)
|
||||
object.__setattr__(self, "_graph_id", graph_id)
|
||||
|
||||
async def publish(self, event: "AgentEvent") -> None: # type: ignore[override]
|
||||
event.graph_id = object.__getattribute__(self, "_graph_id")
|
||||
await object.__getattribute__(self, "_bus").publish(event)
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
return getattr(object.__getattribute__(self, "_bus"), name)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntryPointSpec:
|
||||
"""Specification for an entry point."""
|
||||
@@ -117,6 +142,7 @@ class ExecutionStream:
|
||||
runtime_log_store: Any = None,
|
||||
session_store: "SessionStore | None" = None,
|
||||
checkpoint_config: CheckpointConfig | None = None,
|
||||
graph_id: str | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize execution stream.
|
||||
@@ -136,11 +162,13 @@ class ExecutionStream:
|
||||
runtime_log_store: Optional RuntimeLogStore for per-execution logging
|
||||
session_store: Optional SessionStore for unified session storage
|
||||
checkpoint_config: Optional checkpoint configuration for resumable sessions
|
||||
graph_id: Optional graph identifier for multi-graph sessions
|
||||
"""
|
||||
self.stream_id = stream_id
|
||||
self.entry_spec = entry_spec
|
||||
self.graph = graph
|
||||
self.goal = goal
|
||||
self.graph_id = graph_id
|
||||
self._state_manager = state_manager
|
||||
self._storage = storage
|
||||
self._outcome_aggregator = outcome_aggregator
|
||||
@@ -173,6 +201,11 @@ class ExecutionStream:
|
||||
self._semaphore = asyncio.Semaphore(entry_spec.max_concurrent)
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
# Graph-scoped event bus (stamps graph_id on published events)
|
||||
self._scoped_event_bus = self._event_bus
|
||||
if self._event_bus and self.graph_id:
|
||||
self._scoped_event_bus = _GraphScopedEventBus(self._event_bus, self.graph_id)
|
||||
|
||||
# State
|
||||
self._running = False
|
||||
|
||||
@@ -185,10 +218,10 @@ class ExecutionStream:
|
||||
logger.info(f"ExecutionStream '{self.stream_id}' started")
|
||||
|
||||
# Emit stream started event
|
||||
if self._event_bus:
|
||||
if self._scoped_event_bus:
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
await self._event_bus.publish(
|
||||
await self._scoped_event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.STREAM_STARTED,
|
||||
stream_id=self.stream_id,
|
||||
@@ -262,10 +295,10 @@ class ExecutionStream:
|
||||
logger.info(f"ExecutionStream '{self.stream_id}' stopped")
|
||||
|
||||
# Emit stream stopped event
|
||||
if self._event_bus:
|
||||
if self._scoped_event_bus:
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
await self._event_bus.publish(
|
||||
await self._scoped_event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.STREAM_STOPPED,
|
||||
stream_id=self.stream_id,
|
||||
@@ -369,8 +402,8 @@ class ExecutionStream:
|
||||
|
||||
try:
|
||||
# Emit started event
|
||||
if self._event_bus:
|
||||
await self._event_bus.emit_execution_started(
|
||||
if self._scoped_event_bus:
|
||||
await self._scoped_event_bus.emit_execution_started(
|
||||
stream_id=self.stream_id,
|
||||
execution_id=execution_id,
|
||||
input_data=ctx.input_data,
|
||||
@@ -415,7 +448,7 @@ class ExecutionStream:
|
||||
llm=self._llm,
|
||||
tools=self._tools,
|
||||
tool_executor=self._tool_executor,
|
||||
event_bus=self._event_bus,
|
||||
event_bus=self._scoped_event_bus,
|
||||
stream_id=self.stream_id,
|
||||
storage_path=exec_storage,
|
||||
runtime_logger=runtime_logger,
|
||||
@@ -465,16 +498,16 @@ class ExecutionStream:
|
||||
await self._write_session_state(execution_id, ctx, result=result)
|
||||
|
||||
# Emit completion/failure event
|
||||
if self._event_bus:
|
||||
if self._scoped_event_bus:
|
||||
if result.success:
|
||||
await self._event_bus.emit_execution_completed(
|
||||
await self._scoped_event_bus.emit_execution_completed(
|
||||
stream_id=self.stream_id,
|
||||
execution_id=execution_id,
|
||||
output=result.output,
|
||||
correlation_id=ctx.correlation_id,
|
||||
)
|
||||
else:
|
||||
await self._event_bus.emit_execution_failed(
|
||||
await self._scoped_event_bus.emit_execution_failed(
|
||||
stream_id=self.stream_id,
|
||||
execution_id=execution_id,
|
||||
error=result.error or "Unknown error",
|
||||
@@ -552,8 +585,8 @@ class ExecutionStream:
|
||||
pass # Don't let end_run errors mask the original error
|
||||
|
||||
# Emit failure event
|
||||
if self._event_bus:
|
||||
await self._event_bus.emit_execution_failed(
|
||||
if self._scoped_event_bus:
|
||||
await self._scoped_event_bus.emit_execution_failed(
|
||||
stream_id=self.stream_id,
|
||||
execution_id=execution_id,
|
||||
error=str(e),
|
||||
|
||||
@@ -641,5 +641,185 @@ class TestCreateAgentRuntime:
|
||||
assert "api" in runtime._entry_points
|
||||
|
||||
|
||||
# === Timer Entry Point Tests ===
|
||||
|
||||
|
||||
class TestTimerEntryPoints:
|
||||
"""Tests for timer-driven entry points (interval and cron)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
|
||||
"""Test that interval_minutes timer creates an async task."""
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-interval",
|
||||
name="Interval Timer",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={"interval_minutes": 60},
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 1
|
||||
assert not runtime._timer_tasks[0].done()
|
||||
# Give the async task a moment to set next_fire
|
||||
await asyncio.sleep(0.05)
|
||||
assert "timer-interval" in runtime._timer_next_fire
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
assert len(runtime._timer_tasks) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
|
||||
"""Test that cron expression timer creates an async task."""
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-cron",
|
||||
name="Cron Timer",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 1
|
||||
assert not runtime._timer_tasks[0].done()
|
||||
# Give the async task a moment to set next_fire
|
||||
await asyncio.sleep(0.05)
|
||||
assert "timer-cron" in runtime._timer_next_fire
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_cron_expression_skipped(
|
||||
self, sample_graph, sample_goal, temp_storage, caplog
|
||||
):
|
||||
"""Test that an invalid cron expression logs a warning and skips."""
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-bad-cron",
|
||||
name="Bad Cron Timer",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "not a cron expression"},
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 0
|
||||
assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cron_takes_priority_over_interval(
|
||||
self, sample_graph, sample_goal, temp_storage, caplog
|
||||
):
|
||||
"""Test that when both cron and interval_minutes are set, cron wins."""
|
||||
import logging
|
||||
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-both",
|
||||
name="Both Timer",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
with caplog.at_level(logging.INFO):
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 1
|
||||
# Should log cron, not interval
|
||||
assert any("cron" in r.message.lower() for r in caplog.records)
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
|
||||
"""Test that timer with neither cron nor interval_minutes logs a warning."""
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-empty",
|
||||
name="Empty Timer",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={},
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 0
|
||||
assert "no 'cron' or valid 'interval_minutes'" in caplog.text
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
|
||||
"""Test that run_immediately=True with cron doesn't set next_fire before first run."""
|
||||
runtime = AgentRuntime(
|
||||
graph=sample_graph,
|
||||
goal=sample_goal,
|
||||
storage_path=temp_storage,
|
||||
)
|
||||
|
||||
entry_spec = EntryPointSpec(
|
||||
id="timer-cron-immediate",
|
||||
name="Cron Immediate",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="timer",
|
||||
trigger_config={"cron": "0 0 * * *", "run_immediately": True},
|
||||
)
|
||||
runtime.register_entry_point(entry_spec)
|
||||
|
||||
await runtime.start()
|
||||
try:
|
||||
assert len(runtime._timer_tasks) == 1
|
||||
# With run_immediately, the task enters the while loop directly,
|
||||
# so _timer_next_fire is NOT set before the first trigger attempt
|
||||
# (it pops it at the top of the loop)
|
||||
# Give it a moment to start executing
|
||||
await asyncio.sleep(0.05)
|
||||
# Task should still be running (it will try to trigger and likely fail
|
||||
# since there's no LLM, but the task itself continues)
|
||||
assert not runtime._timer_tasks[0].done()
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
||||
@@ -0,0 +1,325 @@
|
||||
"""Graph lifecycle tools for multi-graph sessions.
|
||||
|
||||
These tools allow an agent (e.g. hive_coder) to load, unload, start,
|
||||
restart, and query other agent graphs within the same runtime session.
|
||||
|
||||
Usage::
|
||||
|
||||
from framework.tools.session_graph_tools import register_graph_tools
|
||||
|
||||
register_graph_tools(tool_registry, runtime)
|
||||
|
||||
The tools are registered as async Python functions on the ToolRegistry.
|
||||
They close over the ``AgentRuntime`` instance — no ContextVar needed
|
||||
since the runtime is a stable, long-lived object.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
|
||||
"""Register graph lifecycle tools bound to *runtime*.
|
||||
|
||||
Returns the number of tools registered.
|
||||
"""
|
||||
from framework.llm.provider import Tool
|
||||
|
||||
tools_registered = 0
|
||||
|
||||
# --- load_agent -----------------------------------------------------------
|
||||
|
||||
async def load_agent(agent_path: str) -> str:
|
||||
"""Load an agent graph from disk into the running session.
|
||||
|
||||
The agent is imported from *agent_path* (a directory containing
|
||||
``agent.py``). Its graph, goal, and entry points are registered
|
||||
as a secondary graph on the runtime. Returns a JSON summary.
|
||||
"""
|
||||
from framework.runner.runner import AgentRunner
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
path = Path(agent_path).resolve()
|
||||
if not path.exists():
|
||||
return json.dumps({"error": f"Agent path does not exist: {path}"})
|
||||
|
||||
try:
|
||||
runner = AgentRunner.load(path)
|
||||
except Exception as exc:
|
||||
return json.dumps({"error": f"Failed to load agent: {exc}"})
|
||||
|
||||
graph_id = path.name
|
||||
if graph_id in list(runtime.list_graphs()):
|
||||
return json.dumps({"error": f"Graph '{graph_id}' is already loaded"})
|
||||
|
||||
# Build entry point dict from the loaded graph
|
||||
entry_points: dict[str, EntryPointSpec] = {}
|
||||
|
||||
# Primary entry point
|
||||
if runner.graph.entry_node:
|
||||
entry_points["default"] = EntryPointSpec(
|
||||
id="default",
|
||||
name="Default",
|
||||
entry_node=runner.graph.entry_node,
|
||||
trigger_type="manual",
|
||||
isolation_level="shared",
|
||||
)
|
||||
|
||||
# Async entry points
|
||||
for aep in runner.graph.async_entry_points:
|
||||
entry_points[aep.id] = EntryPointSpec(
|
||||
id=aep.id,
|
||||
name=aep.name,
|
||||
entry_node=aep.entry_node,
|
||||
trigger_type=aep.trigger_type,
|
||||
trigger_config=aep.trigger_config,
|
||||
isolation_level=aep.isolation_level,
|
||||
priority=aep.priority,
|
||||
max_concurrent=aep.max_concurrent,
|
||||
)
|
||||
|
||||
await runtime.add_graph(
|
||||
graph_id=graph_id,
|
||||
graph=runner.graph,
|
||||
goal=runner.goal,
|
||||
entry_points=entry_points,
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"graph_id": graph_id,
|
||||
"entry_points": list(entry_points.keys()),
|
||||
"nodes": [n.id for n in runner.graph.nodes],
|
||||
"status": "loaded",
|
||||
}
|
||||
)
|
||||
|
||||
_load_tool = Tool(
|
||||
name="load_agent",
|
||||
description=(
|
||||
"Load an agent graph from disk into the current session. "
|
||||
"The agent runs alongside the primary agent, sharing memory and data."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_path": {
|
||||
"type": "string",
|
||||
"description": "Path to the agent directory (containing agent.py)",
|
||||
},
|
||||
},
|
||||
"required": ["agent_path"],
|
||||
},
|
||||
)
|
||||
registry.register("load_agent", _load_tool, lambda inputs: load_agent(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- unload_agent ---------------------------------------------------------
|
||||
|
||||
async def unload_agent(graph_id: str) -> str:
|
||||
"""Stop and remove a secondary agent graph from the session."""
|
||||
try:
|
||||
await runtime.remove_graph(graph_id)
|
||||
return json.dumps({"graph_id": graph_id, "status": "unloaded"})
|
||||
except ValueError as exc:
|
||||
return json.dumps({"error": str(exc)})
|
||||
|
||||
_unload_tool = Tool(
|
||||
name="unload_agent",
|
||||
description="Stop and remove a loaded agent graph from the session.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"graph_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the graph to unload",
|
||||
},
|
||||
},
|
||||
"required": ["graph_id"],
|
||||
},
|
||||
)
|
||||
registry.register("unload_agent", _unload_tool, lambda inputs: unload_agent(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- start_agent ----------------------------------------------------------
|
||||
|
||||
async def start_agent(
|
||||
graph_id: str, entry_point: str = "default", input_data: str = "{}"
|
||||
) -> str:
|
||||
"""Trigger an entry point on a loaded agent graph."""
|
||||
reg = runtime.get_graph_registration(graph_id)
|
||||
if reg is None:
|
||||
return json.dumps({"error": f"Graph '{graph_id}' not found"})
|
||||
|
||||
stream = reg.streams.get(entry_point)
|
||||
if stream is None:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": f"Entry point '{entry_point}' not found on graph '{graph_id}'",
|
||||
"available": list(reg.streams.keys()),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
data = json.loads(input_data) if isinstance(input_data, str) else input_data
|
||||
except json.JSONDecodeError as exc:
|
||||
return json.dumps({"error": f"Invalid JSON input: {exc}"})
|
||||
|
||||
session_state = runtime._get_primary_session_state(entry_point, source_graph_id=graph_id)
|
||||
exec_id = await stream.execute(data, session_state=session_state)
|
||||
return json.dumps(
|
||||
{
|
||||
"graph_id": graph_id,
|
||||
"entry_point": entry_point,
|
||||
"execution_id": exec_id,
|
||||
"status": "triggered",
|
||||
}
|
||||
)
|
||||
|
||||
_start_tool = Tool(
|
||||
name="start_agent",
|
||||
description="Trigger an entry point on a loaded agent graph to start execution.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"graph_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the graph to start",
|
||||
},
|
||||
"entry_point": {
|
||||
"type": "string",
|
||||
"description": "Entry point to trigger (default: 'default')",
|
||||
},
|
||||
"input_data": {
|
||||
"type": "string",
|
||||
"description": "JSON string of input data for the execution",
|
||||
},
|
||||
},
|
||||
"required": ["graph_id"],
|
||||
},
|
||||
)
|
||||
registry.register("start_agent", _start_tool, lambda inputs: start_agent(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- restart_agent --------------------------------------------------------
|
||||
|
||||
async def restart_agent(graph_id: str) -> str:
|
||||
"""Unload and reload an agent graph (picks up code changes)."""
|
||||
reg = runtime.get_graph_registration(graph_id)
|
||||
if reg is None:
|
||||
return json.dumps({"error": f"Graph '{graph_id}' not found"})
|
||||
if graph_id == runtime.graph_id:
|
||||
return json.dumps({"error": "Cannot restart the primary graph"})
|
||||
|
||||
# Remember the graph spec so we can reload it
|
||||
# The graph_id is the agent directory name by convention
|
||||
# We need to find the original agent path
|
||||
# For now, use the graph's id to locate the agent
|
||||
try:
|
||||
await runtime.remove_graph(graph_id)
|
||||
except ValueError as exc:
|
||||
return json.dumps({"error": f"Failed to unload: {exc}"})
|
||||
|
||||
# Reload by calling load_agent with the graph_id as path hint
|
||||
# The caller should use load_agent explicitly if the path is different
|
||||
return json.dumps(
|
||||
{
|
||||
"graph_id": graph_id,
|
||||
"status": "unloaded",
|
||||
"note": "Use load_agent to reload with updated code",
|
||||
}
|
||||
)
|
||||
|
||||
_restart_tool = Tool(
|
||||
name="restart_agent",
|
||||
description=(
|
||||
"Unload an agent graph. Use load_agent afterwards to reload with updated code."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"graph_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the graph to restart",
|
||||
},
|
||||
},
|
||||
"required": ["graph_id"],
|
||||
},
|
||||
)
|
||||
registry.register("restart_agent", _restart_tool, lambda inputs: restart_agent(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- list_agents ----------------------------------------------------------
|
||||
|
||||
def list_agents() -> str:
|
||||
"""List all agent graphs in the current session with their status."""
|
||||
graphs = []
|
||||
for gid in runtime.list_graphs():
|
||||
reg = runtime.get_graph_registration(gid)
|
||||
if reg is None:
|
||||
continue
|
||||
graphs.append(
|
||||
{
|
||||
"graph_id": gid,
|
||||
"is_primary": gid == runtime.graph_id,
|
||||
"is_active": gid == runtime.active_graph_id,
|
||||
"entry_points": list(reg.entry_points.keys()),
|
||||
"active_executions": sum(
|
||||
len(s.active_execution_ids) for s in reg.streams.values()
|
||||
),
|
||||
}
|
||||
)
|
||||
return json.dumps({"graphs": graphs})
|
||||
|
||||
_list_tool = Tool(
|
||||
name="list_agents",
|
||||
description="List all loaded agent graphs and their status.",
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("list_agents", _list_tool, lambda inputs: list_agents())
|
||||
tools_registered += 1
|
||||
|
||||
# --- get_user_presence ----------------------------------------------------
|
||||
|
||||
def get_user_presence() -> str:
|
||||
"""Return user idle time and presence status."""
|
||||
idle = runtime.user_idle_seconds
|
||||
if idle == float("inf"):
|
||||
status = "never_seen"
|
||||
elif idle < 120:
|
||||
status = "present"
|
||||
elif idle < 600:
|
||||
status = "idle"
|
||||
else:
|
||||
status = "away"
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"idle_seconds": idle if idle != float("inf") else None,
|
||||
"status": status,
|
||||
}
|
||||
)
|
||||
|
||||
_presence_tool = Tool(
|
||||
name="get_user_presence",
|
||||
description=(
|
||||
"Check if the user is currently active. Returns idle time "
|
||||
"and a status of 'present', 'idle', 'away', or 'never_seen'."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("get_user_presence", _presence_tool, lambda inputs: get_user_presence())
|
||||
tools_registered += 1
|
||||
|
||||
logger.info("Registered %d graph lifecycle tools", tools_registered)
|
||||
return tools_registered
|
||||
+556
-126
@@ -4,17 +4,18 @@ import subprocess
|
||||
import threading
|
||||
import time
|
||||
|
||||
from textual import work
|
||||
from textual.app import App, ComposeResult
|
||||
from textual.binding import Binding
|
||||
from textual.containers import Container, Horizontal
|
||||
from textual.widgets import Footer, Label
|
||||
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
from framework.tui.widgets.chat_repl import ChatRepl
|
||||
from framework.tui.widgets.graph_view import GraphOverview
|
||||
from framework.tui.widgets.selectable_rich_log import SelectableRichLog
|
||||
|
||||
# AgentRuntime imported lazily where needed to support runtime=None startup.
|
||||
# ChatRepl and GraphOverview are imported lazily in _mount_agent_widgets.
|
||||
|
||||
|
||||
class StatusBar(Container):
|
||||
"""Live status bar showing agent execution state."""
|
||||
@@ -151,6 +152,10 @@ class AdenTUI(App):
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
#agent-workspace {
|
||||
height: 1fr;
|
||||
}
|
||||
|
||||
#chat-history {
|
||||
height: 1fr;
|
||||
width: 100%;
|
||||
@@ -188,6 +193,15 @@ class AdenTUI(App):
|
||||
background: $panel;
|
||||
color: $text-muted;
|
||||
}
|
||||
|
||||
#empty-workspace {
|
||||
align: center middle;
|
||||
height: 1fr;
|
||||
}
|
||||
|
||||
#empty-workspace Label {
|
||||
text-align: center;
|
||||
}
|
||||
"""
|
||||
|
||||
BINDINGS = [
|
||||
@@ -198,23 +212,37 @@ class AdenTUI(App):
|
||||
Binding("ctrl+l", "toggle_logs", "Toggle Logs", show=True, priority=True),
|
||||
Binding("ctrl+z", "pause_execution", "Pause", show=True, priority=True),
|
||||
Binding("ctrl+r", "show_sessions", "Sessions", show=True, priority=True),
|
||||
Binding("ctrl+p", "attach_pdf", "Attach PDF", show=True, priority=True),
|
||||
Binding("ctrl+a", "show_agent_picker", "Agents", show=True, priority=True),
|
||||
Binding("ctrl+e", "escalate_to_coder", "Coder", show=True, priority=True),
|
||||
Binding("ctrl+e", "return_from_coder", "← Back", show=True, priority=True),
|
||||
Binding("tab", "focus_next", "Next Panel", show=True),
|
||||
Binding("shift+tab", "focus_previous", "Previous Panel", show=False),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
runtime: AgentRuntime,
|
||||
runtime=None,
|
||||
resume_session: str | None = None,
|
||||
resume_checkpoint: str | None = None,
|
||||
model: str | None = None,
|
||||
no_guardian: bool = False,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.runtime = runtime
|
||||
self.graph_view = GraphOverview(runtime)
|
||||
self.chat_repl = ChatRepl(runtime, resume_session, resume_checkpoint)
|
||||
self.status_bar = StatusBar(graph_id=runtime.graph.id)
|
||||
self._model = model
|
||||
self._no_guardian = no_guardian
|
||||
self._resume_session = resume_session
|
||||
self._resume_checkpoint = resume_checkpoint
|
||||
self._runner = None # AgentRunner — needed for cleanup on swap
|
||||
|
||||
# Escalation stack: stores worker state when coder is in foreground
|
||||
self._escalation_stack: list[dict] = []
|
||||
|
||||
# Widgets are created lazily when runtime is available
|
||||
self.graph_view = None
|
||||
self.chat_repl = None
|
||||
self.status_bar = StatusBar(graph_id=runtime.graph.id if runtime else "")
|
||||
self.is_ready = False
|
||||
|
||||
def open_url(self, url: str, *, new_tab: bool = True) -> None:
|
||||
@@ -239,34 +267,364 @@ class AdenTUI(App):
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
yield self.status_bar
|
||||
|
||||
yield Horizontal(
|
||||
self.graph_view,
|
||||
self.chat_repl,
|
||||
)
|
||||
|
||||
yield Horizontal(id="agent-workspace")
|
||||
yield Footer()
|
||||
|
||||
async def on_mount(self) -> None:
|
||||
"""Called when app starts."""
|
||||
self.title = "Aden TUI Dashboard"
|
||||
|
||||
# Add logging setup
|
||||
self._setup_logging_queue()
|
||||
|
||||
# Set ready immediately so _poll_logs can process messages
|
||||
self.is_ready = True
|
||||
|
||||
# Add event subscription with delay to ensure TUI is fully initialized
|
||||
if self.runtime is not None:
|
||||
# Direct launch with agent already loaded
|
||||
self._mount_agent_widgets()
|
||||
self.call_later(self._init_runtime_connection)
|
||||
|
||||
def write_initial_logs():
|
||||
logging.info("TUI Dashboard initialized successfully")
|
||||
logging.info("Waiting for agent execution to start...")
|
||||
|
||||
self.set_timer(0.2, write_initial_logs)
|
||||
else:
|
||||
# No agent — show picker
|
||||
self.call_later(self._show_agent_picker_initial)
|
||||
|
||||
# -- Agent widget lifecycle --
|
||||
|
||||
def _mount_agent_widgets(self) -> None:
|
||||
"""Mount ChatRepl and GraphOverview into #agent-workspace."""
|
||||
from framework.tui.widgets.chat_repl import ChatRepl
|
||||
from framework.tui.widgets.graph_view import GraphOverview
|
||||
|
||||
workspace = self.query_one("#agent-workspace", Horizontal)
|
||||
|
||||
# Remove empty-state placeholder if present
|
||||
for child in list(workspace.children):
|
||||
child.remove()
|
||||
|
||||
self.graph_view = GraphOverview(self.runtime)
|
||||
self.chat_repl = ChatRepl(
|
||||
self.runtime,
|
||||
self._resume_session,
|
||||
self._resume_checkpoint,
|
||||
)
|
||||
workspace.mount(self.graph_view)
|
||||
workspace.mount(self.chat_repl)
|
||||
self.status_bar.set_graph_id(self.runtime.graph.id)
|
||||
|
||||
def _unmount_agent_widgets(self) -> None:
|
||||
"""Remove ChatRepl and GraphOverview from #agent-workspace."""
|
||||
# Unsubscribe from events
|
||||
if hasattr(self, "_subscription_id"):
|
||||
try:
|
||||
self.runtime.unsubscribe_from_events(self._subscription_id)
|
||||
except Exception:
|
||||
pass
|
||||
del self._subscription_id
|
||||
|
||||
workspace = self.query_one("#agent-workspace", Horizontal)
|
||||
for child in list(workspace.children):
|
||||
child.remove()
|
||||
|
||||
self.graph_view = None
|
||||
self.chat_repl = None
|
||||
|
||||
async def _load_and_switch_agent(self, agent_path: str) -> None:
|
||||
"""Load an agent and replace the current one in the TUI."""
|
||||
from pathlib import Path
|
||||
|
||||
from framework.credentials.models import CredentialError
|
||||
from framework.runner import AgentRunner
|
||||
|
||||
# 1. Tear down old agent
|
||||
if self.runtime is not None:
|
||||
self._unmount_agent_widgets()
|
||||
if self._runner is not None:
|
||||
try:
|
||||
await self._runner.cleanup_async()
|
||||
except Exception:
|
||||
pass
|
||||
self._runner = None
|
||||
self.runtime = None
|
||||
|
||||
# 2. Show loading state
|
||||
agent_name = Path(agent_path).name
|
||||
self.status_bar.set_graph_id(f"Loading {agent_name}...")
|
||||
self.notify(f"Loading agent: {agent_name}...", timeout=3)
|
||||
|
||||
# 3. Load new agent
|
||||
try:
|
||||
runner = AgentRunner.load(agent_path, model=self._model)
|
||||
if runner._agent_runtime is None:
|
||||
runner._setup()
|
||||
|
||||
if not self._no_guardian and runner._agent_runtime:
|
||||
from framework.agents.hive_coder.guardian import attach_guardian
|
||||
|
||||
attach_guardian(runner._agent_runtime, runner._tool_registry)
|
||||
|
||||
if runner._agent_runtime and not runner._agent_runtime.is_running:
|
||||
await runner._agent_runtime.start()
|
||||
|
||||
self._runner = runner
|
||||
self.runtime = runner._agent_runtime
|
||||
except CredentialError as e:
|
||||
self.status_bar.set_graph_id("")
|
||||
self.notify(f"Credential error: {e}", severity="error", timeout=10)
|
||||
return
|
||||
except Exception as e:
|
||||
self.status_bar.set_graph_id("")
|
||||
self.notify(f"Failed to load agent: {e}", severity="error", timeout=10)
|
||||
return
|
||||
|
||||
# 4. Mount new widgets and subscribe to events
|
||||
self._mount_agent_widgets()
|
||||
await self._init_runtime_connection()
|
||||
|
||||
# Clear resume state for subsequent loads
|
||||
self._resume_session = None
|
||||
self._resume_checkpoint = None
|
||||
|
||||
self.notify(f"Agent loaded: {agent_name}", severity="information", timeout=3)
|
||||
|
||||
# -- Agent picker --
|
||||
|
||||
def _show_agent_picker_initial(self) -> None:
|
||||
"""Show the agent picker on initial startup (no agent loaded)."""
|
||||
from framework.tui.screens.agent_picker import AgentPickerScreen, discover_agents
|
||||
|
||||
agents = discover_agents()
|
||||
if not agents:
|
||||
self.notify("No agents found in exports/ or examples/", severity="error", timeout=5)
|
||||
self.set_timer(2.0, self.exit)
|
||||
return
|
||||
|
||||
def _on_initial_pick(result: str | None) -> None:
|
||||
if result is None:
|
||||
self.exit()
|
||||
return
|
||||
self._do_load_agent(result)
|
||||
|
||||
self.push_screen(AgentPickerScreen(agents), callback=_on_initial_pick)
|
||||
|
||||
def action_show_agent_picker(self) -> None:
|
||||
"""Open the agent picker (Ctrl+A or /agents)."""
|
||||
from framework.tui.screens.agent_picker import AgentPickerScreen, discover_agents
|
||||
|
||||
agents = discover_agents()
|
||||
if not agents:
|
||||
self.notify("No agents found", severity="error", timeout=5)
|
||||
return
|
||||
|
||||
def _on_pick(result: str | None) -> None:
|
||||
if result is not None:
|
||||
self._do_load_agent(result)
|
||||
|
||||
self.push_screen(AgentPickerScreen(agents), callback=_on_pick)
|
||||
|
||||
@work(exclusive=True)
|
||||
async def _do_load_agent(self, agent_path: str) -> None:
|
||||
"""Worker wrapper for _load_and_switch_agent."""
|
||||
await self._load_and_switch_agent(agent_path)
|
||||
|
||||
# -- Escalation to Hive Coder --
|
||||
|
||||
@work(exclusive=True, group="escalation")
|
||||
async def _do_escalate_to_coder(
|
||||
self,
|
||||
reason: str = "",
|
||||
context: str = "",
|
||||
node_id: str = "",
|
||||
) -> None:
|
||||
"""Push current agent onto stack and load hive_coder."""
|
||||
from pathlib import Path
|
||||
|
||||
from framework.credentials.models import CredentialError
|
||||
from framework.runner import AgentRunner
|
||||
from framework.tools.session_graph_tools import register_graph_tools
|
||||
|
||||
if self.runtime is None:
|
||||
self.notify("No active agent to escalate from", severity="error")
|
||||
return
|
||||
|
||||
# 1. Save current state (do NOT cleanup — worker stays alive)
|
||||
saved = {
|
||||
"runner": self._runner,
|
||||
"runtime": self.runtime,
|
||||
"blocked_node_id": node_id,
|
||||
}
|
||||
self._escalation_stack.append(saved)
|
||||
|
||||
# Unsubscribe from worker events
|
||||
if hasattr(self, "_subscription_id"):
|
||||
try:
|
||||
self.runtime.unsubscribe_from_events(self._subscription_id)
|
||||
except Exception:
|
||||
pass
|
||||
del self._subscription_id
|
||||
|
||||
# Remember worker agent path for coder context
|
||||
worker_path = ""
|
||||
if self._runner and hasattr(self._runner, "agent_path"):
|
||||
worker_path = str(self._runner.agent_path.resolve())
|
||||
|
||||
# 2. Remove worker widgets (they get destroyed)
|
||||
workspace = self.query_one("#agent-workspace", Horizontal)
|
||||
for child in list(workspace.children):
|
||||
child.remove()
|
||||
self.graph_view = None
|
||||
self.chat_repl = None
|
||||
|
||||
# 3. Show loading state
|
||||
self.status_bar.set_graph_id("Loading Hive Coder...")
|
||||
self.notify("Escalating to Hive Coder...", timeout=3)
|
||||
|
||||
# 4. Load hive_coder
|
||||
framework_agents_dir = Path(__file__).resolve().parent.parent / "agents"
|
||||
hive_coder_path = framework_agents_dir / "hive_coder"
|
||||
|
||||
try:
|
||||
runner = AgentRunner.load(hive_coder_path, model=self._model)
|
||||
if runner._agent_runtime is None:
|
||||
runner._setup()
|
||||
|
||||
coder_runtime = runner._agent_runtime
|
||||
coder_runtime._graph_id = "hive_coder"
|
||||
coder_runtime._active_graph_id = "hive_coder"
|
||||
|
||||
# Register graph lifecycle tools
|
||||
register_graph_tools(runner._tool_registry, coder_runtime)
|
||||
coder_runtime._tools = list(runner._tool_registry.get_tools().values())
|
||||
coder_runtime._tool_executor = runner._tool_registry.get_executor()
|
||||
|
||||
if not coder_runtime.is_running:
|
||||
await coder_runtime.start()
|
||||
|
||||
self._runner = runner
|
||||
self.runtime = coder_runtime
|
||||
except (CredentialError, Exception) as e:
|
||||
self.status_bar.set_graph_id("")
|
||||
self.notify(f"Failed to load coder: {e}", severity="error", timeout=10)
|
||||
self._restore_from_escalation_stack()
|
||||
return
|
||||
|
||||
# 5. Mount coder widgets and subscribe
|
||||
self._mount_agent_widgets()
|
||||
await self._init_runtime_connection()
|
||||
|
||||
self.status_bar.set_graph_id("hive_coder (escalated)")
|
||||
|
||||
# 6. Auto-trigger coder with escalation context
|
||||
escalation_input = self._build_escalation_input(reason, context, worker_path)
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
entry_points = self.runtime.get_entry_points()
|
||||
if entry_points:
|
||||
ep = entry_points[0]
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.runtime.trigger(
|
||||
entry_point_id=ep.id,
|
||||
input_data={"user_request": escalation_input},
|
||||
),
|
||||
self.chat_repl._agent_loop,
|
||||
)
|
||||
exec_id = await asyncio.wrap_future(future)
|
||||
self.chat_repl._current_exec_id = exec_id
|
||||
except Exception as e:
|
||||
self.notify(f"Error starting coder: {e}", severity="error")
|
||||
|
||||
self.notify(
|
||||
"Hive Coder loaded. Ctrl+E or /back to return.",
|
||||
severity="information",
|
||||
timeout=5,
|
||||
)
|
||||
self.refresh_bindings()
|
||||
|
||||
def _build_escalation_input(self, reason: str, context: str, worker_path: str) -> str:
|
||||
"""Compose the user_request string for hive_coder."""
|
||||
parts = []
|
||||
if worker_path:
|
||||
parts.append(
|
||||
f"Modify the agent at: {worker_path}\n"
|
||||
f"Do NOT ask which agent to modify — it is the path above."
|
||||
)
|
||||
if reason:
|
||||
parts.append(f"Problem: {reason}")
|
||||
if context:
|
||||
parts.append(f"Context:\n{context}")
|
||||
if not parts:
|
||||
parts.append("The user needs help modifying their agent.")
|
||||
return "\n\n".join(parts)
|
||||
|
||||
async def _return_from_escalation(self, summary: str = "") -> None:
|
||||
"""Pop escalation stack and restore the worker agent."""
|
||||
if not self._escalation_stack:
|
||||
self.notify("No escalation to return from", severity="warning")
|
||||
return
|
||||
|
||||
# 1. Tear down coder
|
||||
self._unmount_agent_widgets()
|
||||
if self._runner is not None:
|
||||
try:
|
||||
await self._runner.cleanup_async()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Restore worker
|
||||
saved = self._escalation_stack.pop()
|
||||
self._runner = saved["runner"]
|
||||
self.runtime = saved["runtime"]
|
||||
|
||||
# 3. Mount fresh widgets for the worker runtime
|
||||
self._mount_agent_widgets()
|
||||
await self._init_runtime_connection()
|
||||
|
||||
graph_id = self.runtime.graph.id if self.runtime else ""
|
||||
self.status_bar.set_graph_id(graph_id)
|
||||
|
||||
# 4. Inject return message to unblock the worker node
|
||||
blocked_node_id = saved.get("blocked_node_id", "")
|
||||
return_msg = summary or "Coder session completed. Continuing."
|
||||
if blocked_node_id:
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.runtime.inject_input(blocked_node_id, return_msg),
|
||||
self.chat_repl._agent_loop,
|
||||
)
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception as e:
|
||||
self.notify(
|
||||
f"Could not resume worker: {e}",
|
||||
severity="warning",
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
# 5. Show return in chat (deferred — widgets need a tick to mount)
|
||||
def _show_return():
|
||||
if self.chat_repl:
|
||||
self.chat_repl._write_history("[bold cyan]Returned from Hive Coder.[/bold cyan]")
|
||||
if summary:
|
||||
self.chat_repl._write_history(f"[dim]{summary}[/dim]")
|
||||
|
||||
self.call_later(_show_return)
|
||||
self.notify("Returned to worker agent", severity="information", timeout=3)
|
||||
self.refresh_bindings()
|
||||
|
||||
def _restore_from_escalation_stack(self) -> None:
|
||||
"""Emergency restore when coder loading fails."""
|
||||
if not self._escalation_stack:
|
||||
return
|
||||
saved = self._escalation_stack.pop()
|
||||
self._runner = saved["runner"]
|
||||
self.runtime = saved["runtime"]
|
||||
self._mount_agent_widgets()
|
||||
self.call_later(self._init_runtime_connection)
|
||||
|
||||
# Delay initial log messages until layout is fully rendered
|
||||
def write_initial_logs():
|
||||
logging.info("TUI Dashboard initialized successfully")
|
||||
logging.info("Waiting for agent execution to start...")
|
||||
|
||||
# Wait for layout to be fully rendered before writing logs
|
||||
self.set_timer(0.2, write_initial_logs)
|
||||
# -- Logging --
|
||||
|
||||
def _setup_logging_queue(self) -> None:
|
||||
"""Setup a thread-safe queue for logs."""
|
||||
@@ -302,7 +660,7 @@ class AdenTUI(App):
|
||||
|
||||
def _poll_logs(self) -> None:
|
||||
"""Poll the log queue and update UI."""
|
||||
if not self.is_ready:
|
||||
if not self.is_ready or self.chat_repl is None:
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -316,6 +674,8 @@ class AdenTUI(App):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# -- Runtime event routing --
|
||||
|
||||
_EVENT_TYPES = [
|
||||
EventType.LLM_TEXT_DELTA,
|
||||
EventType.CLIENT_OUTPUT_DELTA,
|
||||
@@ -342,6 +702,7 @@ class AdenTUI(App):
|
||||
EventType.EDGE_TRAVERSED,
|
||||
EventType.EXECUTION_PAUSED,
|
||||
EventType.EXECUTION_RESUMED,
|
||||
EventType.ESCALATION_REQUESTED,
|
||||
]
|
||||
|
||||
_LOG_PANE_EVENTS = frozenset(_EVENT_TYPES) - {
|
||||
@@ -384,17 +745,38 @@ class AdenTUI(App):
|
||||
|
||||
def _route_event(self, event: AgentEvent) -> None:
|
||||
"""Route incoming events to widgets. Runs on Textual's main thread."""
|
||||
if not self.is_ready:
|
||||
logging.getLogger("tui.events").warning(
|
||||
"Event dropped (not ready): %s node=%s",
|
||||
event.type.value,
|
||||
event.node_id or "?",
|
||||
)
|
||||
if not self.is_ready or self.chat_repl is None:
|
||||
return
|
||||
|
||||
try:
|
||||
et = event.type
|
||||
|
||||
# --- Multi-graph filtering ---
|
||||
# If the event has a graph_id and it's not the active graph,
|
||||
# show a notification for important events and drop the rest.
|
||||
if event.graph_id is not None and event.graph_id != self.runtime.active_graph_id:
|
||||
if et == EventType.CLIENT_INPUT_REQUESTED:
|
||||
self.notify(
|
||||
f"[bold]{event.graph_id}[/bold] is waiting for input",
|
||||
severity="warning",
|
||||
timeout=10,
|
||||
)
|
||||
elif et == EventType.EXECUTION_FAILED:
|
||||
error = event.data.get("error", "Unknown error")[:60]
|
||||
self.notify(
|
||||
f"[bold red]{event.graph_id}[/bold red] failed: {error}",
|
||||
severity="error",
|
||||
timeout=10,
|
||||
)
|
||||
elif et == EventType.EXECUTION_COMPLETED:
|
||||
self.notify(
|
||||
f"[bold green]{event.graph_id}[/bold green] completed",
|
||||
severity="information",
|
||||
timeout=5,
|
||||
)
|
||||
# All other background events are silently dropped (visible in logs)
|
||||
return
|
||||
|
||||
# --- Chat REPL events ---
|
||||
if et in (EventType.LLM_TEXT_DELTA, EventType.CLIENT_OUTPUT_DELTA):
|
||||
self.chat_repl.handle_text_delta(
|
||||
@@ -419,6 +801,14 @@ class AdenTUI(App):
|
||||
elif et == EventType.CLIENT_INPUT_REQUESTED:
|
||||
self.chat_repl.handle_input_requested(
|
||||
event.node_id or event.data.get("node_id", ""),
|
||||
graph_id=event.graph_id,
|
||||
)
|
||||
elif et == EventType.ESCALATION_REQUESTED:
|
||||
self.chat_repl.handle_escalation_requested(event.data)
|
||||
self._do_escalate_to_coder(
|
||||
reason=event.data.get("reason", ""),
|
||||
context=event.data.get("context", ""),
|
||||
node_id=event.node_id or "",
|
||||
)
|
||||
elif et == EventType.NODE_LOOP_STARTED:
|
||||
self.chat_repl.handle_node_started(event.node_id or "")
|
||||
@@ -451,47 +841,48 @@ class AdenTUI(App):
|
||||
self.chat_repl.handle_constraint_violation(event.data)
|
||||
|
||||
# --- Graph view events ---
|
||||
if et in (
|
||||
EventType.EXECUTION_STARTED,
|
||||
EventType.EXECUTION_COMPLETED,
|
||||
EventType.EXECUTION_FAILED,
|
||||
):
|
||||
self.graph_view.update_execution(event)
|
||||
if self.graph_view is not None:
|
||||
if et in (
|
||||
EventType.EXECUTION_STARTED,
|
||||
EventType.EXECUTION_COMPLETED,
|
||||
EventType.EXECUTION_FAILED,
|
||||
):
|
||||
self.graph_view.update_execution(event)
|
||||
|
||||
if et == EventType.NODE_LOOP_STARTED:
|
||||
self.graph_view.handle_node_loop_started(event.node_id or "")
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
self.graph_view.handle_node_loop_iteration(
|
||||
event.node_id or "",
|
||||
event.data.get("iteration", 0),
|
||||
)
|
||||
elif et == EventType.NODE_LOOP_COMPLETED:
|
||||
self.graph_view.handle_node_loop_completed(event.node_id or "")
|
||||
elif et == EventType.NODE_STALLED:
|
||||
self.graph_view.handle_stalled(
|
||||
event.node_id or "",
|
||||
event.data.get("reason", ""),
|
||||
)
|
||||
if et == EventType.NODE_LOOP_STARTED:
|
||||
self.graph_view.handle_node_loop_started(event.node_id or "")
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
self.graph_view.handle_node_loop_iteration(
|
||||
event.node_id or "",
|
||||
event.data.get("iteration", 0),
|
||||
)
|
||||
elif et == EventType.NODE_LOOP_COMPLETED:
|
||||
self.graph_view.handle_node_loop_completed(event.node_id or "")
|
||||
elif et == EventType.NODE_STALLED:
|
||||
self.graph_view.handle_stalled(
|
||||
event.node_id or "",
|
||||
event.data.get("reason", ""),
|
||||
)
|
||||
|
||||
if et == EventType.TOOL_CALL_STARTED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=True,
|
||||
)
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=False,
|
||||
)
|
||||
if et == EventType.TOOL_CALL_STARTED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=True,
|
||||
)
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=False,
|
||||
)
|
||||
|
||||
# Edge traversal → graph view
|
||||
if et == EventType.EDGE_TRAVERSED:
|
||||
self.graph_view.handle_edge_traversed(
|
||||
event.data.get("source_node", ""),
|
||||
event.data.get("target_node", ""),
|
||||
)
|
||||
# Edge traversal → graph view
|
||||
if et == EventType.EDGE_TRAVERSED:
|
||||
self.graph_view.handle_edge_traversed(
|
||||
event.data.get("source_node", ""),
|
||||
event.data.get("target_node", ""),
|
||||
)
|
||||
|
||||
# --- Status bar events ---
|
||||
if et == EventType.EXECUTION_STARTED:
|
||||
@@ -504,7 +895,10 @@ class AdenTUI(App):
|
||||
elif et == EventType.EXECUTION_FAILED:
|
||||
self.status_bar.set_failed(event.data.get("error", ""))
|
||||
elif et == EventType.NODE_LOOP_STARTED:
|
||||
self.status_bar.set_active_node(event.node_id or "", "thinking...")
|
||||
nid = event.node_id or ""
|
||||
node = self.runtime.graph.get_node(nid)
|
||||
name = node.name if node else nid
|
||||
self.status_bar.set_active_node(name, "thinking...")
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
self.status_bar.set_node_detail(f"step {event.data.get('iteration', '?')}")
|
||||
elif et == EventType.TOOL_CALL_STARTED:
|
||||
@@ -544,40 +938,62 @@ class AdenTUI(App):
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# -- Actions --
|
||||
|
||||
def action_switch_graph(self, graph_id: str) -> None:
|
||||
"""Switch the active graph focus in the TUI."""
|
||||
if self.runtime is None:
|
||||
return
|
||||
try:
|
||||
self.runtime.active_graph_id = graph_id
|
||||
except ValueError:
|
||||
self.notify(f"Graph '{graph_id}' not found", severity="error", timeout=3)
|
||||
return
|
||||
|
||||
# Update status bar
|
||||
self.status_bar.set_graph_id(graph_id)
|
||||
|
||||
# Update graph view
|
||||
reg = self.runtime.get_graph_registration(graph_id)
|
||||
if reg and self.graph_view:
|
||||
self.graph_view.switch_graph(reg.graph)
|
||||
|
||||
# Flush chat streaming state
|
||||
if self.chat_repl:
|
||||
self.chat_repl.flush_streaming()
|
||||
|
||||
self.notify(f"Switched to graph: {graph_id}", severity="information", timeout=3)
|
||||
|
||||
def save_screenshot(self, filename: str | None = None) -> str:
|
||||
"""Save a screenshot of the current screen as SVG (viewable in browsers).
|
||||
|
||||
Args:
|
||||
filename: Optional filename for the screenshot. If None, generates timestamp-based name.
|
||||
|
||||
Returns:
|
||||
Path to the saved SVG file.
|
||||
"""
|
||||
"""Save a screenshot of the current screen as SVG (viewable in browsers)."""
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Create screenshots directory
|
||||
screenshots_dir = Path("screenshots")
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Generate filename if not provided
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"tui_screenshot_{timestamp}.svg"
|
||||
|
||||
# Ensure .svg extension
|
||||
if not filename.endswith(".svg"):
|
||||
filename += ".svg"
|
||||
|
||||
# Full path
|
||||
filepath = screenshots_dir / filename
|
||||
|
||||
# Temporarily hide borders for cleaner screenshot
|
||||
chat_widget = self.query_one(ChatRepl)
|
||||
from framework.tui.widgets.chat_repl import ChatRepl
|
||||
|
||||
try:
|
||||
chat_widget = self.query_one(ChatRepl)
|
||||
except Exception:
|
||||
# No ChatRepl mounted yet
|
||||
svg_data = self.export_screenshot()
|
||||
filepath.write_text(svg_data, encoding="utf-8")
|
||||
return str(filepath)
|
||||
|
||||
original_chat_border = chat_widget.styles.border_left
|
||||
chat_widget.styles.border_left = ("none", "transparent")
|
||||
|
||||
# Hide all TextArea widget borders
|
||||
input_widgets = self.query("ChatTextArea")
|
||||
original_input_borders = []
|
||||
for input_widget in input_widgets:
|
||||
@@ -585,11 +1001,9 @@ class AdenTUI(App):
|
||||
input_widget.styles.border = ("none", "transparent")
|
||||
|
||||
try:
|
||||
# Get SVG data from Textual and save it
|
||||
svg_data = self.export_screenshot()
|
||||
filepath.write_text(svg_data, encoding="utf-8")
|
||||
finally:
|
||||
# Restore the original borders
|
||||
chat_widget.styles.border_left = original_chat_border
|
||||
for i, input_widget in enumerate(input_widgets):
|
||||
input_widget.styles.border = original_input_borders[i]
|
||||
@@ -610,15 +1024,18 @@ class AdenTUI(App):
|
||||
|
||||
def action_toggle_logs(self) -> None:
|
||||
"""Toggle inline log display in chat (bound to Ctrl+L)."""
|
||||
if self.chat_repl is None:
|
||||
return
|
||||
self.chat_repl.toggle_logs()
|
||||
mode = "ON" if self.chat_repl._show_logs else "OFF"
|
||||
self.notify(f"Logs {mode}", severity="information", timeout=2)
|
||||
|
||||
def action_pause_execution(self) -> None:
|
||||
"""Immediately pause execution by cancelling task (bound to Ctrl+Z)."""
|
||||
if self.chat_repl is None or self.runtime is None:
|
||||
return
|
||||
try:
|
||||
chat_repl = self.query_one(ChatRepl)
|
||||
if not chat_repl._current_exec_id:
|
||||
if not self.chat_repl._current_exec_id:
|
||||
self.notify(
|
||||
"No active execution to pause",
|
||||
severity="information",
|
||||
@@ -626,16 +1043,26 @@ class AdenTUI(App):
|
||||
)
|
||||
return
|
||||
|
||||
# Find and cancel the execution task - executor will catch and save state
|
||||
task_cancelled = False
|
||||
for stream in self.runtime._streams.values():
|
||||
exec_id = chat_repl._current_exec_id
|
||||
all_streams = []
|
||||
active_reg = self.runtime.get_graph_registration(self.runtime.active_graph_id)
|
||||
if active_reg:
|
||||
all_streams.extend(active_reg.streams.values())
|
||||
for gid in self.runtime.list_graphs():
|
||||
if gid == self.runtime.active_graph_id:
|
||||
continue
|
||||
reg = self.runtime.get_graph_registration(gid)
|
||||
if reg:
|
||||
all_streams.extend(reg.streams.values())
|
||||
|
||||
for stream in all_streams:
|
||||
exec_id = self.chat_repl._current_exec_id
|
||||
task = stream._execution_tasks.get(exec_id)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
task_cancelled = True
|
||||
self.notify(
|
||||
"⏸ Execution paused - state saved",
|
||||
"Execution paused - state saved",
|
||||
severity="information",
|
||||
timeout=3,
|
||||
)
|
||||
@@ -656,10 +1083,10 @@ class AdenTUI(App):
|
||||
|
||||
async def action_show_sessions(self) -> None:
|
||||
"""Show sessions list (bound to Ctrl+R)."""
|
||||
# Send /sessions command to chat input
|
||||
if self.chat_repl is None:
|
||||
return
|
||||
try:
|
||||
chat_repl = self.query_one(ChatRepl)
|
||||
await chat_repl._submit_input("/sessions")
|
||||
await self.chat_repl._submit_input("/sessions")
|
||||
except Exception:
|
||||
self.notify(
|
||||
"Use /sessions command to see all sessions",
|
||||
@@ -667,59 +1094,62 @@ class AdenTUI(App):
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
async def action_attach_pdf(self) -> None:
|
||||
"""Open native OS file dialog for PDF selection (bound to Ctrl+P)."""
|
||||
from framework.tui.widgets.file_browser import _has_gui, pick_pdf_file
|
||||
def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None:
|
||||
"""Control which bindings are shown in the footer.
|
||||
|
||||
if not _has_gui():
|
||||
self.notify(
|
||||
"No GUI available. Use /attach <path> instead.",
|
||||
severity="warning",
|
||||
timeout=5,
|
||||
)
|
||||
Both escalate_to_coder and return_from_coder are bound to Ctrl+E.
|
||||
check_action toggles which one is active based on escalation state,
|
||||
so the footer shows "Coder" or "← Back" accordingly.
|
||||
"""
|
||||
if action == "escalate_to_coder":
|
||||
return not self._escalation_stack
|
||||
if action == "return_from_coder":
|
||||
return bool(self._escalation_stack)
|
||||
return True
|
||||
|
||||
def action_escalate_to_coder(self) -> None:
|
||||
"""Escalate to Hive Coder (bound to Ctrl+E)."""
|
||||
if self.runtime is None:
|
||||
self.notify("No active agent to escalate from", severity="error")
|
||||
return
|
||||
# _do_escalate_to_coder is already @work-decorated; calling it starts the worker.
|
||||
self._do_escalate_to_coder(reason="User-initiated escalation")
|
||||
|
||||
self.notify("Opening file dialog...", severity="information", timeout=2)
|
||||
path = await pick_pdf_file()
|
||||
|
||||
if path is not None:
|
||||
self.chat_repl.attach_pdf(path)
|
||||
async def action_return_from_coder(self) -> None:
|
||||
"""Return from Hive Coder to worker agent (Ctrl+E toggles)."""
|
||||
await self._return_from_escalation()
|
||||
|
||||
async def on_unmount(self) -> None:
|
||||
"""Cleanup on app shutdown - cancel execution which will save state."""
|
||||
self.is_ready = False
|
||||
|
||||
# Cancel any active execution - the executor will catch CancelledError
|
||||
# and save current state as paused (no waiting needed!)
|
||||
# Cancel any active execution
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
chat_repl = self.query_one(ChatRepl)
|
||||
if chat_repl._current_exec_id:
|
||||
# Find the stream with this execution
|
||||
for stream in self.runtime._streams.values():
|
||||
exec_id = chat_repl._current_exec_id
|
||||
if self.chat_repl and self.chat_repl._current_exec_id and self.runtime:
|
||||
all_streams = []
|
||||
for gid in self.runtime.list_graphs():
|
||||
reg = self.runtime.get_graph_registration(gid)
|
||||
if reg:
|
||||
all_streams.extend(reg.streams.values())
|
||||
for stream in all_streams:
|
||||
exec_id = self.chat_repl._current_exec_id
|
||||
task = stream._execution_tasks.get(exec_id)
|
||||
if task and not task.done():
|
||||
# Cancel the task - executor will catch and save state
|
||||
task.cancel()
|
||||
try:
|
||||
# Wait for executor to save state (may take a few seconds)
|
||||
# Longer timeout for quit to ensure state is properly saved
|
||||
await asyncio.wait_for(task, timeout=5.0)
|
||||
except (TimeoutError, asyncio.CancelledError):
|
||||
# Expected - task was cancelled
|
||||
# If timeout, state may not be fully saved
|
||||
pass
|
||||
except Exception:
|
||||
# Ignore other exceptions during cleanup
|
||||
pass
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if hasattr(self, "_subscription_id"):
|
||||
if hasattr(self, "_subscription_id") and self.runtime:
|
||||
self.runtime.unsubscribe_from_events(self._subscription_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
"""Agent picker ModalScreen for selecting agents within the TUI."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from rich.console import Group
|
||||
from rich.text import Text
|
||||
from textual.app import ComposeResult
|
||||
from textual.binding import Binding
|
||||
from textual.containers import Vertical
|
||||
from textual.screen import ModalScreen
|
||||
from textual.widgets import Label, OptionList, TabbedContent, TabPane
|
||||
from textual.widgets._option_list import Option
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentEntry:
|
||||
"""Lightweight agent metadata for the picker."""
|
||||
|
||||
path: Path
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
session_count: int = 0
|
||||
node_count: int = 0
|
||||
tool_count: int = 0
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _count_sessions(agent_name: str) -> int:
|
||||
"""Count session directories under ~/.hive/agents/{agent_name}/sessions/."""
|
||||
sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
|
||||
if not sessions_dir.exists():
|
||||
return 0
|
||||
return sum(1 for d in sessions_dir.iterdir() if d.is_dir() and d.name.startswith("session_"))
|
||||
|
||||
|
||||
def _extract_agent_stats(agent_json_path: Path) -> tuple[int, int, list[str]]:
|
||||
"""Extract node count, tool count, and tags from agent.json."""
|
||||
try:
|
||||
data = json.loads(agent_json_path.read_text())
|
||||
nodes = data.get("nodes", [])
|
||||
node_count = len(nodes)
|
||||
tools: set[str] = set()
|
||||
for node in nodes:
|
||||
tools.update(node.get("tools", []))
|
||||
tags = data.get("agent", {}).get("tags", [])
|
||||
return node_count, len(tools), tags
|
||||
except Exception:
|
||||
return 0, 0, []
|
||||
|
||||
|
||||
def discover_agents() -> dict[str, list[AgentEntry]]:
|
||||
"""Discover agents from all known sources grouped by category."""
|
||||
from framework.runner.cli import (
|
||||
_extract_python_agent_metadata,
|
||||
_get_framework_agents_dir,
|
||||
_is_valid_agent_dir,
|
||||
)
|
||||
|
||||
groups: dict[str, list[AgentEntry]] = {}
|
||||
sources = [
|
||||
("Your Agents", Path("exports")),
|
||||
("Framework", _get_framework_agents_dir()),
|
||||
("Examples", Path("examples/templates")),
|
||||
]
|
||||
|
||||
for category, base_dir in sources:
|
||||
if not base_dir.exists():
|
||||
continue
|
||||
entries: list[AgentEntry] = []
|
||||
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
|
||||
if not _is_valid_agent_dir(path):
|
||||
continue
|
||||
|
||||
agent_json = path / "agent.json"
|
||||
node_count, tool_count, tags = 0, 0, []
|
||||
if agent_json.exists():
|
||||
try:
|
||||
data = json.loads(agent_json.read_text())
|
||||
meta = data.get("agent", {})
|
||||
name = meta.get("name", path.name)
|
||||
desc = meta.get("description", "")
|
||||
except Exception:
|
||||
name = path.name
|
||||
desc = "(error reading agent.json)"
|
||||
node_count, tool_count, tags = _extract_agent_stats(agent_json)
|
||||
else:
|
||||
name, desc = _extract_python_agent_metadata(path)
|
||||
|
||||
entries.append(
|
||||
AgentEntry(
|
||||
path=path,
|
||||
name=name,
|
||||
description=desc,
|
||||
category=category,
|
||||
session_count=_count_sessions(path.name),
|
||||
node_count=node_count,
|
||||
tool_count=tool_count,
|
||||
tags=tags,
|
||||
)
|
||||
)
|
||||
if entries:
|
||||
groups[category] = entries
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
def _render_agent_option(agent: AgentEntry) -> Group:
|
||||
"""Build a Rich renderable for a single agent option."""
|
||||
# Line 1: name + session badge
|
||||
line1 = Text()
|
||||
line1.append(agent.name, style="bold")
|
||||
if agent.session_count:
|
||||
line1.append(f" {agent.session_count} sessions", style="dim cyan")
|
||||
|
||||
# Line 2: description (word-wrapped by the widget)
|
||||
desc = agent.description if agent.description else "No description"
|
||||
line2 = Text(desc, style="dim")
|
||||
|
||||
# Line 3: stats chips
|
||||
chips = Text()
|
||||
if agent.node_count:
|
||||
chips.append(f" {agent.node_count} nodes ", style="on dark_green white")
|
||||
chips.append(" ")
|
||||
if agent.tool_count:
|
||||
chips.append(f" {agent.tool_count} tools ", style="on dark_blue white")
|
||||
chips.append(" ")
|
||||
for tag in agent.tags[:3]:
|
||||
chips.append(f" {tag} ", style="on grey37 white")
|
||||
chips.append(" ")
|
||||
|
||||
parts = [line1, line2]
|
||||
if chips.plain.strip():
|
||||
parts.append(chips)
|
||||
return Group(*parts)
|
||||
|
||||
|
||||
class AgentPickerScreen(ModalScreen[str | None]):
|
||||
"""Modal screen showing available agents organized by tabbed categories.
|
||||
|
||||
Returns the selected agent path as a string, or None if dismissed.
|
||||
"""
|
||||
|
||||
BINDINGS = [
|
||||
Binding("escape", "dismiss_picker", "Cancel"),
|
||||
]
|
||||
|
||||
DEFAULT_CSS = """
|
||||
AgentPickerScreen {
|
||||
align: center middle;
|
||||
}
|
||||
#picker-container {
|
||||
width: 90%;
|
||||
max-width: 120;
|
||||
height: 85%;
|
||||
background: $surface;
|
||||
border: heavy $primary;
|
||||
padding: 1 2;
|
||||
}
|
||||
#picker-title {
|
||||
text-align: center;
|
||||
text-style: bold;
|
||||
width: 100%;
|
||||
color: $text;
|
||||
}
|
||||
#picker-subtitle {
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
margin-bottom: 1;
|
||||
}
|
||||
#picker-footer {
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
margin-top: 1;
|
||||
}
|
||||
TabPane {
|
||||
padding: 0;
|
||||
}
|
||||
OptionList {
|
||||
height: 1fr;
|
||||
}
|
||||
OptionList > .option-list--option {
|
||||
padding: 1 2;
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, agent_groups: dict[str, list[AgentEntry]]) -> None:
|
||||
super().__init__()
|
||||
self._groups = agent_groups
|
||||
# Map (tab_id, option_index) -> AgentEntry
|
||||
self._option_map: dict[str, dict[int, AgentEntry]] = {}
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
total = sum(len(v) for v in self._groups.values())
|
||||
with Vertical(id="picker-container"):
|
||||
yield Label("Hive Agent Launcher", id="picker-title")
|
||||
yield Label(
|
||||
f"[dim]{total} agents available[/dim]",
|
||||
id="picker-subtitle",
|
||||
)
|
||||
with TabbedContent():
|
||||
for category, agents in self._groups.items():
|
||||
tab_id = category.lower().replace(" ", "-")
|
||||
with TabPane(f"{category} ({len(agents)})", id=tab_id):
|
||||
option_list = OptionList(id=f"list-{tab_id}")
|
||||
self._option_map[f"list-{tab_id}"] = {}
|
||||
for i, agent in enumerate(agents):
|
||||
option_list.add_option(
|
||||
Option(
|
||||
_render_agent_option(agent),
|
||||
id=str(agent.path),
|
||||
)
|
||||
)
|
||||
self._option_map[f"list-{tab_id}"][i] = agent
|
||||
yield option_list
|
||||
yield Label(
|
||||
"[dim]Enter[/dim] Select [dim]Tab[/dim] Switch category [dim]Esc[/dim] Cancel",
|
||||
id="picker-footer",
|
||||
)
|
||||
|
||||
def on_option_list_option_selected(self, event: OptionList.OptionSelected) -> None:
|
||||
list_id = event.option_list.id or ""
|
||||
idx = event.option_index
|
||||
agent_map = self._option_map.get(list_id, {})
|
||||
agent = agent_map.get(idx)
|
||||
if agent:
|
||||
self.dismiss(str(agent.path))
|
||||
|
||||
def action_dismiss_picker(self) -> None:
|
||||
self.dismiss(None)
|
||||
@@ -2,9 +2,12 @@
|
||||
Chat / REPL Widget - Uses RichLog for append-only, selection-safe display.
|
||||
|
||||
Streaming display approach:
|
||||
- The processing-indicator Label is used as a live status bar during streaming
|
||||
(Label.update() replaces text in-place, unlike RichLog which is append-only).
|
||||
- On EXECUTION_COMPLETED, the final output is written to RichLog as permanent history.
|
||||
- The #streaming-output RichLog shows live LLM output as it streams in.
|
||||
Each text delta appends new tokens so the user sees the full response forming.
|
||||
- On flush (tool call, node switch, execution complete, input requested) the
|
||||
accumulated text is written to #chat-history as permanent history and the
|
||||
streaming area is cleared.
|
||||
- The #processing-indicator Label shows brief status messages (tool names, etc.).
|
||||
- Tool events are written directly to RichLog as discrete status lines.
|
||||
|
||||
Client-facing input:
|
||||
@@ -22,6 +25,7 @@ import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from rich.text import Text
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Vertical
|
||||
from textual.message import Message
|
||||
@@ -77,6 +81,18 @@ class ChatRepl(Vertical):
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
|
||||
ChatRepl > #streaming-output {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
min-height: 0;
|
||||
max-height: 50%;
|
||||
background: $surface;
|
||||
border: none;
|
||||
display: none;
|
||||
scrollbar-background: $panel;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
|
||||
ChatRepl > #processing-indicator {
|
||||
width: 100%;
|
||||
height: 1;
|
||||
@@ -111,8 +127,10 @@ class ChatRepl(Vertical):
|
||||
self.runtime = runtime
|
||||
self._current_exec_id: str | None = None
|
||||
self._streaming_snapshot: str = ""
|
||||
self._streaming_written: int = 0 # chars already written to streaming-output
|
||||
self._waiting_for_input: bool = False
|
||||
self._input_node_id: str | None = None
|
||||
self._input_graph_id: str | None = None
|
||||
self._pending_ask_question: str = ""
|
||||
self._active_node_id: str | None = None # Currently executing node
|
||||
self._resume_session = resume_session
|
||||
@@ -142,6 +160,14 @@ class ChatRepl(Vertical):
|
||||
wrap=True,
|
||||
min_width=0,
|
||||
)
|
||||
yield RichLog(
|
||||
id="streaming-output",
|
||||
highlight=True,
|
||||
markup=True,
|
||||
auto_scroll=True,
|
||||
wrap=True,
|
||||
min_width=0,
|
||||
)
|
||||
yield Label("Agent is processing...", id="processing-indicator")
|
||||
yield ChatTextArea(id="chat-input", placeholder="Enter input for agent...")
|
||||
|
||||
@@ -208,6 +234,13 @@ class ChatRepl(Vertical):
|
||||
[bold]/resume[/bold] <session_id> - Resume session by ID
|
||||
[bold]/recover[/bold] <session_id> <cp_id> - Recover from specific checkpoint
|
||||
[bold]/pause[/bold] - Pause current execution (Ctrl+Z)
|
||||
[bold]/agents[/bold] - Browse and switch agents (Ctrl+A)
|
||||
[bold]/coder[/bold] [reason] - Escalate to Hive Coder for code changes
|
||||
[bold]/back[/bold] [summary] - Return from Hive Coder to worker agent
|
||||
[bold]/graphs[/bold] - List loaded graphs and their status
|
||||
[bold]/graph[/bold] <id> - Switch active graph focus
|
||||
[bold]/load[/bold] <path> - Load an agent graph into the session
|
||||
[bold]/unload[/bold] <id> - Remove a graph from the session
|
||||
[bold]/help[/bold] - Show this help message
|
||||
|
||||
[dim]Examples:[/dim]
|
||||
@@ -216,6 +249,10 @@ class ChatRepl(Vertical):
|
||||
/detach [dim]# Remove attached PDF[/dim]
|
||||
/sessions [dim]# List all sessions[/dim]
|
||||
/resume 1 [dim]# Resume first listed session[/dim]
|
||||
/graphs [dim]# Show loaded agent graphs[/dim]
|
||||
/graph email_agent [dim]# Switch focus to email_agent[/dim]
|
||||
/load exports/email_agent [dim]# Load agent into session[/dim]
|
||||
/unload email_agent [dim]# Remove agent from session[/dim]
|
||||
/pause [dim]# Pause (or Ctrl+Z)[/dim]
|
||||
""")
|
||||
elif cmd == "/sessions":
|
||||
@@ -268,6 +305,33 @@ class ChatRepl(Vertical):
|
||||
self._write_history("[dim]No PDF attached.[/dim]")
|
||||
elif cmd == "/pause":
|
||||
await self._cmd_pause()
|
||||
elif cmd == "/agents":
|
||||
app = self.app
|
||||
if hasattr(app, "action_show_agent_picker"):
|
||||
await app.action_show_agent_picker()
|
||||
elif cmd == "/graphs":
|
||||
self._cmd_graphs()
|
||||
elif cmd == "/graph":
|
||||
if len(parts) < 2:
|
||||
self._write_history("[bold red]Usage:[/bold red] /graph <graph_id>")
|
||||
else:
|
||||
self._cmd_switch_graph(parts[1].strip())
|
||||
elif cmd == "/load":
|
||||
if len(parts) < 2:
|
||||
self._write_history("[bold red]Usage:[/bold red] /load <agent_path>")
|
||||
else:
|
||||
await self._cmd_load_graph(parts[1].strip())
|
||||
elif cmd == "/unload":
|
||||
if len(parts) < 2:
|
||||
self._write_history("[bold red]Usage:[/bold red] /unload <graph_id>")
|
||||
else:
|
||||
await self._cmd_unload_graph(parts[1].strip())
|
||||
elif cmd == "/coder":
|
||||
reason = " ".join(parts[1:]) if len(parts) > 1 else ""
|
||||
await self._cmd_coder(reason)
|
||||
elif cmd == "/back":
|
||||
summary = " ".join(parts[1:]) if len(parts) > 1 else ""
|
||||
await self._cmd_back(summary)
|
||||
else:
|
||||
self._write_history(
|
||||
f"[bold red]Unknown command:[/bold red] {cmd}\n"
|
||||
@@ -769,6 +833,150 @@ class ChatRepl(Vertical):
|
||||
if not task_cancelled:
|
||||
self._write_history("[bold yellow]Execution already completed[/bold yellow]")
|
||||
|
||||
async def _cmd_coder(self, reason: str = "") -> None:
|
||||
"""User-initiated escalation to Hive Coder."""
|
||||
app = self.app
|
||||
if not hasattr(app, "_do_escalate_to_coder"):
|
||||
self._write_history("[bold red]Escalation not available[/bold red]")
|
||||
return
|
||||
|
||||
context_parts = []
|
||||
if self._active_node_id:
|
||||
context_parts.append(f"Active node: {self._active_node_id}")
|
||||
if self._streaming_snapshot:
|
||||
snippet = self._streaming_snapshot[:500]
|
||||
context_parts.append(f"Last agent output: {snippet}")
|
||||
context = "\n".join(context_parts)
|
||||
|
||||
if not reason:
|
||||
reason = "User-initiated escalation via /coder"
|
||||
|
||||
self._write_history("[bold cyan]Escalating to Hive Coder...[/bold cyan]")
|
||||
|
||||
node_id = self._input_node_id or self._active_node_id or ""
|
||||
app._do_escalate_to_coder(
|
||||
reason=reason,
|
||||
context=context,
|
||||
node_id=node_id,
|
||||
)
|
||||
|
||||
async def _cmd_back(self, summary: str = "") -> None:
|
||||
"""Return from Hive Coder to the worker agent."""
|
||||
app = self.app
|
||||
if not hasattr(app, "_escalation_stack"):
|
||||
self._write_history("[bold yellow]Not in an escalation.[/bold yellow]")
|
||||
return
|
||||
if not app._escalation_stack:
|
||||
self._write_history(
|
||||
"[bold yellow]Not in an escalation.[/bold yellow] "
|
||||
"/back is only available after /coder or agent escalation."
|
||||
)
|
||||
return
|
||||
|
||||
self._write_history("[bold cyan]Returning to worker agent...[/bold cyan]")
|
||||
await app._return_from_escalation(summary)
|
||||
|
||||
def _cmd_graphs(self) -> None:
|
||||
"""List all loaded graphs and their status."""
|
||||
graphs = self.runtime.list_graphs()
|
||||
if not graphs:
|
||||
self._write_history("[dim]No graphs loaded[/dim]")
|
||||
return
|
||||
|
||||
lines = ["[bold cyan]Loaded Graphs:[/bold cyan]"]
|
||||
for gid in graphs:
|
||||
reg = self.runtime.get_graph_registration(gid)
|
||||
if reg is None:
|
||||
continue
|
||||
is_primary = gid == self.runtime.graph_id
|
||||
is_active = gid == self.runtime.active_graph_id
|
||||
markers = []
|
||||
if is_primary:
|
||||
markers.append("primary")
|
||||
if is_active:
|
||||
markers.append("active")
|
||||
marker_str = f" [dim]({', '.join(markers)})[/dim]" if markers else ""
|
||||
ep_list = ", ".join(reg.entry_points.keys())
|
||||
active_execs = sum(len(s.active_execution_ids) for s in reg.streams.values())
|
||||
exec_str = f" [green]{active_execs} running[/green]" if active_execs else ""
|
||||
lines.append(f" [bold]{gid}[/bold]{marker_str} — eps: {ep_list}{exec_str}")
|
||||
self._write_history("\n".join(lines))
|
||||
|
||||
def _cmd_switch_graph(self, graph_id: str) -> None:
|
||||
"""Switch the active graph focus."""
|
||||
try:
|
||||
self.runtime.active_graph_id = graph_id
|
||||
except ValueError:
|
||||
self._write_history(
|
||||
f"[bold red]Graph '{graph_id}' not found.[/bold red] "
|
||||
"Use /graphs to see loaded graphs."
|
||||
)
|
||||
return
|
||||
|
||||
# Tell the app to update the UI
|
||||
app = self.app
|
||||
if hasattr(app, "action_switch_graph"):
|
||||
app.action_switch_graph(graph_id)
|
||||
else:
|
||||
self._write_history(f"[bold green]Switched to graph: {graph_id}[/bold green]")
|
||||
|
||||
async def _cmd_load_graph(self, agent_path: str) -> None:
|
||||
"""Load an agent graph into the session."""
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(agent_path).resolve()
|
||||
if not path.exists():
|
||||
self._write_history(f"[bold red]Path does not exist:[/bold red] {path}")
|
||||
return
|
||||
|
||||
self._write_history(f"[dim]Loading agent from {path}...[/dim]")
|
||||
|
||||
try:
|
||||
from framework.runner.runner import AgentRunner
|
||||
|
||||
graph_id = await AgentRunner.setup_as_secondary(path, self.runtime)
|
||||
self._write_history(
|
||||
f"[bold green]Loaded graph '{graph_id}'[/bold green] — "
|
||||
"use /graphs to see all, /graph to switch"
|
||||
)
|
||||
except Exception as e:
|
||||
self._write_history(f"[bold red]Failed to load agent:[/bold red] {e}")
|
||||
|
||||
async def _cmd_unload_graph(self, graph_id: str) -> None:
|
||||
"""Unload a secondary graph from the session."""
|
||||
try:
|
||||
await self.runtime.remove_graph(graph_id)
|
||||
self._write_history(f"[bold green]Unloaded graph '{graph_id}'[/bold green]")
|
||||
except ValueError as e:
|
||||
self._write_history(f"[bold red]Error:[/bold red] {e}")
|
||||
|
||||
def _node_label(self, node_id: str | None = None) -> str:
|
||||
"""Resolve a node_id to a Rich-formatted speaker label."""
|
||||
nid = node_id or self._active_node_id
|
||||
if nid:
|
||||
node = self.runtime.graph.get_node(nid)
|
||||
name = node.name if node else nid
|
||||
return f"[bold blue]{name}:[/bold blue]"
|
||||
return "[bold blue]Agent:[/bold blue]"
|
||||
|
||||
def _clear_streaming(self) -> None:
|
||||
"""Reset streaming state and hide the live output area."""
|
||||
self._streaming_snapshot = ""
|
||||
self._streaming_written = 0
|
||||
stream_log = self.query_one("#streaming-output", RichLog)
|
||||
stream_log.clear()
|
||||
stream_log.display = False
|
||||
|
||||
def flush_streaming(self) -> None:
|
||||
"""Flush any accumulated streaming text to history.
|
||||
|
||||
Called by the app when switching graphs to ensure in-progress
|
||||
streaming content is preserved before the UI context changes.
|
||||
"""
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
self._clear_streaming()
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Add welcome message and check for resumable sessions."""
|
||||
history = self.query_one("#chat-history", RichLog)
|
||||
@@ -903,11 +1111,13 @@ class ChatRepl(Vertical):
|
||||
indicator.update("Thinking...")
|
||||
|
||||
node_id = self._input_node_id
|
||||
graph_id = self._input_graph_id
|
||||
self._input_node_id = None
|
||||
self._input_graph_id = None
|
||||
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.runtime.inject_input(node_id, user_input),
|
||||
self.runtime.inject_input(node_id, user_input, graph_id=graph_id),
|
||||
self._agent_loop,
|
||||
)
|
||||
await asyncio.wrap_future(future)
|
||||
@@ -956,7 +1166,7 @@ class ChatRepl(Vertical):
|
||||
input_key = "input"
|
||||
|
||||
# Reset streaming state
|
||||
self._streaming_snapshot = ""
|
||||
self._clear_streaming()
|
||||
|
||||
# Show processing indicator
|
||||
indicator.update("Thinking...")
|
||||
@@ -1004,34 +1214,45 @@ class ChatRepl(Vertical):
|
||||
previous node and resets the processing indicator so the user
|
||||
sees a clean transition between graph nodes.
|
||||
"""
|
||||
self._active_node_id = node_id
|
||||
# Flush stale snapshot with the PREVIOUS node's label before switching
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
|
||||
self._streaming_snapshot = ""
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
self._clear_streaming()
|
||||
self._active_node_id = node_id
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update("Thinking...")
|
||||
|
||||
def handle_loop_iteration(self, iteration: int) -> None:
|
||||
"""Flush accumulated streaming text when a new loop iteration starts."""
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
|
||||
self._streaming_snapshot = ""
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
self._clear_streaming()
|
||||
|
||||
def handle_text_delta(self, content: str, snapshot: str) -> None:
|
||||
"""Handle a streaming text token from the LLM."""
|
||||
self._streaming_snapshot = snapshot
|
||||
|
||||
# Show a truncated live preview in the indicator label
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
preview = snapshot[-80:] if len(snapshot) > 80 else snapshot
|
||||
# Replace newlines for single-line display
|
||||
preview = preview.replace("\n", " ")
|
||||
indicator.update(
|
||||
f"Thinking: ...{preview}" if len(snapshot) > 80 else f"Thinking: {preview}"
|
||||
)
|
||||
# Stream into the live output area
|
||||
stream_log = self.query_one("#streaming-output", RichLog)
|
||||
if not stream_log.display:
|
||||
stream_log.display = True
|
||||
|
||||
# Rewrite the full snapshot as a single block so text wraps
|
||||
# naturally instead of one token per line.
|
||||
stream_log.clear()
|
||||
stream_log.write(Text.from_markup(f"{self._node_label()} {snapshot}"))
|
||||
self._streaming_written = len(snapshot)
|
||||
|
||||
def handle_tool_started(self, tool_name: str, tool_input: dict[str, Any]) -> None:
|
||||
"""Handle a tool call starting."""
|
||||
# Flush any accumulated LLM text before the tool call starts.
|
||||
# Without this, text from a turn that also issues tool calls
|
||||
# would sit in _streaming_snapshot and get overwritten by the
|
||||
# next LLM turn, never appearing in the chat log.
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
self._clear_streaming()
|
||||
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
|
||||
if tool_name == "ask_user":
|
||||
@@ -1041,6 +1262,10 @@ class ChatRepl(Vertical):
|
||||
indicator.update("Preparing question...")
|
||||
return
|
||||
|
||||
if tool_name == "escalate_to_coder":
|
||||
indicator.update("Escalating to coder...")
|
||||
return
|
||||
|
||||
# Update indicator to show tool activity
|
||||
indicator.update(f"Using tool: {tool_name}...")
|
||||
|
||||
@@ -1052,9 +1277,7 @@ class ChatRepl(Vertical):
|
||||
|
||||
def handle_tool_completed(self, tool_name: str, result: str, is_error: bool) -> None:
|
||||
"""Handle a tool call completing."""
|
||||
if tool_name == "ask_user":
|
||||
# Suppress the synthetic "Waiting for user input..." result.
|
||||
# The actual question is displayed by handle_input_requested().
|
||||
if tool_name in ("ask_user", "escalate_to_coder"):
|
||||
return
|
||||
|
||||
result_str = str(result)
|
||||
@@ -1080,14 +1303,14 @@ class ChatRepl(Vertical):
|
||||
|
||||
# Write the final streaming snapshot to permanent history (if any)
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
else:
|
||||
output_str = str(output.get("output_string", output))
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {output_str}")
|
||||
self._write_history(f"{self._node_label()} {output_str}")
|
||||
self._write_history("") # separator
|
||||
|
||||
self._current_exec_id = None
|
||||
self._streaming_snapshot = ""
|
||||
self._clear_streaming()
|
||||
self._waiting_for_input = False
|
||||
self._input_node_id = None
|
||||
self._active_node_id = None
|
||||
@@ -1109,7 +1332,7 @@ class ChatRepl(Vertical):
|
||||
self._write_history("") # separator
|
||||
|
||||
self._current_exec_id = None
|
||||
self._streaming_snapshot = ""
|
||||
self._clear_streaming()
|
||||
self._waiting_for_input = False
|
||||
self._pending_ask_question = ""
|
||||
self._input_node_id = None
|
||||
@@ -1122,7 +1345,18 @@ class ChatRepl(Vertical):
|
||||
chat_input.placeholder = "Enter input for agent..."
|
||||
chat_input.focus()
|
||||
|
||||
def handle_input_requested(self, node_id: str) -> None:
|
||||
def handle_escalation_requested(self, data: dict) -> None:
|
||||
"""Display escalation request from the worker agent."""
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"{self._node_label()} {self._streaming_snapshot}")
|
||||
self._clear_streaming()
|
||||
|
||||
reason = data.get("reason", "")
|
||||
self._write_history("[bold yellow]Agent is escalating to Hive Coder[/bold yellow]")
|
||||
if reason:
|
||||
self._write_history(f"[dim]Reason: {reason}[/dim]")
|
||||
|
||||
def handle_input_requested(self, node_id: str, graph_id: str | None = None) -> None:
|
||||
"""Handle a client-facing node requesting user input.
|
||||
|
||||
Transitions to 'waiting for input' state: flushes the current
|
||||
@@ -1130,27 +1364,33 @@ class ChatRepl(Vertical):
|
||||
and sets a flag so the next submission routes to inject_input().
|
||||
"""
|
||||
# Flush accumulated streaming text as agent output
|
||||
label = self._node_label(node_id)
|
||||
flushed_snapshot = self._streaming_snapshot
|
||||
if flushed_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {flushed_snapshot}")
|
||||
self._streaming_snapshot = ""
|
||||
self._write_history(f"{label} {flushed_snapshot}")
|
||||
self._clear_streaming()
|
||||
|
||||
# Display the ask_user question if stashed and not already
|
||||
# present in the streaming snapshot (avoids double-display).
|
||||
question = self._pending_ask_question
|
||||
self._pending_ask_question = ""
|
||||
if question and question not in flushed_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {question}")
|
||||
self._write_history(f"{label} {question}")
|
||||
|
||||
self._waiting_for_input = True
|
||||
self._input_node_id = node_id or None
|
||||
self._input_graph_id = graph_id
|
||||
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update("Waiting for your input...")
|
||||
|
||||
chat_input = self.query_one("#chat-input", ChatTextArea)
|
||||
chat_input.disabled = False
|
||||
chat_input.placeholder = "Type your response..."
|
||||
node = self.runtime.graph.get_node(node_id) if node_id else None
|
||||
name = node.name if node else None
|
||||
chat_input.placeholder = (
|
||||
f"Type your response to {name}..." if name else "Type your response..."
|
||||
)
|
||||
chat_input.focus()
|
||||
|
||||
def handle_node_completed(self, node_id: str) -> None:
|
||||
|
||||
@@ -52,12 +52,26 @@ class GraphOverview(Vertical):
|
||||
def __init__(self, runtime: AgentRuntime):
|
||||
super().__init__()
|
||||
self.runtime = runtime
|
||||
self._override_graph = None # Set by switch_graph() for secondary graphs
|
||||
self.active_node: str | None = None
|
||||
self.execution_path: list[str] = []
|
||||
# Per-node status strings shown next to the node in the graph display.
|
||||
# e.g. {"planner": "thinking...", "searcher": "web_search..."}
|
||||
self._node_status: dict[str, str] = {}
|
||||
|
||||
@property
|
||||
def _graph(self):
|
||||
"""The graph currently being displayed (may be a secondary graph)."""
|
||||
return self._override_graph or self.runtime.graph
|
||||
|
||||
def switch_graph(self, graph) -> None:
|
||||
"""Switch to displaying a different graph and refresh."""
|
||||
self._override_graph = graph
|
||||
self.active_node = None
|
||||
self.execution_path = []
|
||||
self._node_status = {}
|
||||
self._display_graph()
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
# Use RichLog for formatted output
|
||||
yield RichLog(id="graph-display", highlight=True, markup=True)
|
||||
@@ -75,7 +89,7 @@ class GraphOverview(Vertical):
|
||||
|
||||
def _topo_order(self) -> list[str]:
|
||||
"""BFS from entry_node following edges."""
|
||||
graph = self.runtime.graph
|
||||
graph = self._graph
|
||||
visited: list[str] = []
|
||||
seen: set[str] = set()
|
||||
queue = [graph.entry_node]
|
||||
@@ -102,7 +116,7 @@ class GraphOverview(Vertical):
|
||||
order_idx = {nid: i for i, nid in enumerate(ordered)}
|
||||
back_edges: list[dict] = []
|
||||
for node_id in ordered:
|
||||
for edge in self.runtime.graph.get_outgoing_edges(node_id):
|
||||
for edge in self._graph.get_outgoing_edges(node_id):
|
||||
target_idx = order_idx.get(edge.target, -1)
|
||||
source_idx = order_idx.get(node_id, -1)
|
||||
if target_idx != -1 and target_idx <= source_idx:
|
||||
@@ -129,7 +143,7 @@ class GraphOverview(Vertical):
|
||||
|
||||
def _render_node_line(self, node_id: str) -> str:
|
||||
"""Render a single node with status symbol and optional status text."""
|
||||
graph = self.runtime.graph
|
||||
graph = self._graph
|
||||
is_terminal = node_id in (graph.terminal_nodes or [])
|
||||
is_active = node_id == self.active_node
|
||||
is_done = node_id in self.execution_path and not is_active
|
||||
@@ -160,7 +174,7 @@ class GraphOverview(Vertical):
|
||||
Back-edges are excluded here — they are drawn by the return-channel
|
||||
overlay in Pass 2.
|
||||
"""
|
||||
all_edges = self.runtime.graph.get_outgoing_edges(node_id)
|
||||
all_edges = self._graph.get_outgoing_edges(node_id)
|
||||
if not all_edges:
|
||||
return []
|
||||
|
||||
@@ -399,7 +413,7 @@ class GraphOverview(Vertical):
|
||||
display = self.query_one("#graph-display", RichLog)
|
||||
display.clear()
|
||||
|
||||
graph = self.runtime.graph
|
||||
graph = self._graph
|
||||
display.write(f"[bold cyan]Agent Graph:[/bold cyan] {graph.id}\n")
|
||||
|
||||
ordered = self._topo_order()
|
||||
@@ -457,18 +471,23 @@ class GraphOverview(Vertical):
|
||||
|
||||
for ep in event_sources:
|
||||
if ep.trigger_type == "timer":
|
||||
cron_expr = ep.trigger_config.get("cron")
|
||||
interval = ep.trigger_config.get("interval_minutes", "?")
|
||||
schedule_label = f"cron: {cron_expr}" if cron_expr else f"every {interval} min"
|
||||
display.write(f" [green]⏱[/green] {ep.name} [dim]→ {ep.entry_node}[/dim]")
|
||||
# Show interval + next fire countdown
|
||||
# Show schedule + next fire countdown
|
||||
next_fire = self.runtime._timer_next_fire.get(ep.id)
|
||||
if next_fire is not None:
|
||||
remaining = max(0, next_fire - time.monotonic())
|
||||
mins, secs = divmod(int(remaining), 60)
|
||||
display.write(
|
||||
f" [dim]every {interval} min — next in {mins}m {secs:02d}s[/dim]"
|
||||
)
|
||||
hours, rem = divmod(int(remaining), 3600)
|
||||
mins, secs = divmod(rem, 60)
|
||||
if hours > 0:
|
||||
countdown = f"{hours}h {mins:02d}m {secs:02d}s"
|
||||
else:
|
||||
countdown = f"{mins}m {secs:02d}s"
|
||||
display.write(f" [dim]{schedule_label} — next in {countdown}[/dim]")
|
||||
else:
|
||||
display.write(f" [dim]every {interval} min[/dim]")
|
||||
display.write(f" [dim]{schedule_label}[/dim]")
|
||||
|
||||
elif ep.trigger_type in ("event", "webhook"):
|
||||
display.write(f" [yellow]⚡[/yellow] {ep.name} [dim]→ {ep.entry_node}[/dim]")
|
||||
@@ -510,7 +529,7 @@ class GraphOverview(Vertical):
|
||||
self._node_status.clear()
|
||||
self.execution_path.clear()
|
||||
entry_node = event.data.get("entry_node") or (
|
||||
self.runtime.graph.entry_node if self.runtime else None
|
||||
self._graph.entry_node if self.runtime else None
|
||||
)
|
||||
if entry_node:
|
||||
self.update_active_node(entry_node)
|
||||
|
||||
@@ -178,7 +178,12 @@ class SelectableRichLog(RichLog):
|
||||
|
||||
# Build full text from all lines
|
||||
all_text = "\n".join(strip.text for strip in self.lines)
|
||||
extracted = sel.extract(all_text)
|
||||
try:
|
||||
extracted = sel.extract(all_text)
|
||||
except (IndexError, ValueError):
|
||||
# Selection coordinates can exceed line count when the virtual
|
||||
# canvas is larger than the actual content (e.g. after scroll).
|
||||
return None
|
||||
return extracted if extracted else None
|
||||
|
||||
def copy_selection(self) -> str | None:
|
||||
|
||||
@@ -147,16 +147,18 @@ class TestComposeSystemPrompt:
|
||||
|
||||
def test_identity_only(self):
|
||||
result = compose_system_prompt(identity_prompt="I am an agent.", focus_prompt=None)
|
||||
assert result == "I am an agent."
|
||||
assert result.startswith("I am an agent.")
|
||||
assert "Current date and time:" in result
|
||||
|
||||
def test_focus_only(self):
|
||||
result = compose_system_prompt(identity_prompt=None, focus_prompt="Do the thing.")
|
||||
assert "Current Focus" in result
|
||||
assert "Do the thing." in result
|
||||
assert "Current date and time:" in result
|
||||
|
||||
def test_empty(self):
|
||||
result = compose_system_prompt(identity_prompt=None, focus_prompt=None)
|
||||
assert result == ""
|
||||
assert "Current date and time:" in result
|
||||
|
||||
|
||||
class TestBuildNarrative:
|
||||
|
||||
@@ -80,11 +80,11 @@ def goal():
|
||||
|
||||
|
||||
def test_max_node_visits_default():
|
||||
"""NodeSpec.max_node_visits should default to 1."""
|
||||
"""NodeSpec.max_node_visits should default to 0 (unbounded, for forever-alive agents)."""
|
||||
spec = NodeSpec(
|
||||
id="n", name="N", description="test", node_type="event_loop", output_keys=["out"]
|
||||
)
|
||||
assert spec.max_node_visits == 1
|
||||
assert spec.max_node_visits == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -11,39 +11,61 @@ from framework.llm.litellm import LiteLLMProvider
|
||||
from framework.llm.provider import LLMProvider
|
||||
from framework.runner.orchestrator import AgentOrchestrator
|
||||
|
||||
# Patch config helpers so tests don't depend on local ~/.hive/configuration.json
|
||||
_CONFIG_PATCHES = {
|
||||
"framework.config.get_api_key": lambda: None,
|
||||
"framework.config.get_api_base": lambda: None,
|
||||
"framework.config.get_llm_extra_kwargs": lambda: {},
|
||||
}
|
||||
|
||||
|
||||
def _patched(fn):
|
||||
"""Apply config patches to a test function."""
|
||||
for target, side_effect in _CONFIG_PATCHES.items():
|
||||
fn = patch(target, side_effect)(fn)
|
||||
return fn
|
||||
|
||||
|
||||
class TestOrchestratorLLMInitialization:
|
||||
"""Test AgentOrchestrator LLM provider initialization."""
|
||||
|
||||
@_patched
|
||||
def test_auto_creates_litellm_provider_when_no_llm_passed(self):
|
||||
"""Test that LiteLLMProvider is auto-created when no llm is passed."""
|
||||
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
|
||||
orchestrator = AgentOrchestrator()
|
||||
|
||||
mock_init.assert_called_once_with(model="claude-haiku-4-5-20251001")
|
||||
mock_init.assert_called_once_with(
|
||||
model="claude-haiku-4-5-20251001", api_key=None, api_base=None
|
||||
)
|
||||
assert orchestrator._llm is not None
|
||||
|
||||
@_patched
|
||||
def test_uses_custom_model_parameter(self):
|
||||
"""Test that custom model parameter is passed to LiteLLMProvider."""
|
||||
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
|
||||
AgentOrchestrator(model="gpt-4o")
|
||||
|
||||
mock_init.assert_called_once_with(model="gpt-4o")
|
||||
mock_init.assert_called_once_with(model="gpt-4o", api_key=None, api_base=None)
|
||||
|
||||
@_patched
|
||||
def test_supports_openai_model_names(self):
|
||||
"""Test that OpenAI model names are supported."""
|
||||
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
|
||||
orchestrator = AgentOrchestrator(model="gpt-4o-mini")
|
||||
|
||||
mock_init.assert_called_once_with(model="gpt-4o-mini")
|
||||
mock_init.assert_called_once_with(model="gpt-4o-mini", api_key=None, api_base=None)
|
||||
assert orchestrator._model == "gpt-4o-mini"
|
||||
|
||||
@_patched
|
||||
def test_supports_anthropic_model_names(self):
|
||||
"""Test that Anthropic model names are supported."""
|
||||
with patch.object(LiteLLMProvider, "__init__", return_value=None) as mock_init:
|
||||
orchestrator = AgentOrchestrator(model="claude-3-haiku-20240307")
|
||||
|
||||
mock_init.assert_called_once_with(model="claude-3-haiku-20240307")
|
||||
mock_init.assert_called_once_with(
|
||||
model="claude-3-haiku-20240307", api_key=None, api_base=None
|
||||
)
|
||||
assert orchestrator._model == "claude-3-haiku-20240307"
|
||||
|
||||
def test_skips_auto_creation_when_llm_passed(self):
|
||||
@@ -56,6 +78,7 @@ class TestOrchestratorLLMInitialization:
|
||||
mock_init.assert_not_called()
|
||||
assert orchestrator._llm is mock_llm
|
||||
|
||||
@_patched
|
||||
def test_model_attribute_stored_correctly(self):
|
||||
"""Test that _model attribute is stored correctly."""
|
||||
with patch.object(LiteLLMProvider, "__init__", return_value=None):
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
# Hive Coder: Meta-Agent Integration Plan
|
||||
|
||||
## Problem
|
||||
|
||||
The hive_coder agent currently has 7 file I/O tools (`read_file`, `write_file`, `edit_file`, `list_directory`, `search_files`, `run_command`, `undo_changes`) in `tools/coder_tools_server.py`. It can write agent packages but is **not integrated into the Hive ecosystem**:
|
||||
|
||||
1. **No dynamic tool discovery** — It references a static list of hive-tools in `reference/framework_guide.md`. It can't discover what MCP tools are actually available or what parameters they accept.
|
||||
2. **No runtime observability** — It can't inspect sessions, checkpoints, or logs from agents it builds. When something goes wrong, the user has to manually dig through files.
|
||||
3. **No test execution** — It can't run an agent's test suite structurally (it could use `run_command` with raw pytest, but has no structured test parsing).
|
||||
|
||||
## Solution
|
||||
|
||||
Add 8 new tools to `coder_tools_server.py` that give hive_coder deep integration with the Hive framework. Update the system prompt to teach the LLM when and how to use these meta-agent capabilities.
|
||||
|
||||
---
|
||||
|
||||
## New Tools
|
||||
|
||||
### 1. Tool Discovery
|
||||
|
||||
**`discover_mcp_tools(server_config_path?)`**
|
||||
|
||||
Connect to any MCP server and list all available tools with full schemas. Uses `framework.runner.mcp_client.MCPClient` — the same client the runtime uses. Reads a `mcp_servers.json` file (defaults to hive-tools), connects to each server, calls `list_tools()`, returns tool names + descriptions + input schemas, then disconnects.
|
||||
|
||||
This replaces the static tools reference. The LLM now discovers tools dynamically before designing an agent.
|
||||
|
||||
### 2. Agent Inventory
|
||||
|
||||
**`list_agents()`**
|
||||
|
||||
Scan `exports/` for agent packages and `~/.hive/agents/` for runtime data. Returns agent names, descriptions (from `__init__.py`), and session counts. Gives the LLM awareness of what already exists.
|
||||
|
||||
### 3-7. Session & Checkpoint Inspection
|
||||
|
||||
Ported from `agent_builder_server.py` lines 3484-3856. Pure filesystem reads — JSON + pathlib, zero framework imports.
|
||||
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `list_agent_sessions(agent_name, status?, limit?)` | List sessions, filterable by status |
|
||||
| `get_agent_session_state(agent_name, session_id)` | Full session state (memory excluded to prevent context bloat) |
|
||||
| `get_agent_session_memory(agent_name, session_id, key?)` | Read memory contents from a session |
|
||||
| `list_agent_checkpoints(agent_name, session_id)` | List checkpoints for debugging |
|
||||
| `get_agent_checkpoint(agent_name, session_id, checkpoint_id?)` | Load a checkpoint's full state |
|
||||
|
||||
**Key difference from agent-builder:** These tools accept `agent_name` (e.g. `"deep_research_agent"`) instead of raw `agent_work_dir` paths. They resolve to `~/.hive/agents/{agent_name}/` internally. Friendlier for the LLM.
|
||||
|
||||
### 8. Test Execution
|
||||
|
||||
**`run_agent_tests(agent_name, test_types?, fail_fast?)`**
|
||||
|
||||
Ported from `agent_builder_server.py` lines 2756-2920. Runs pytest on an agent's test suite, sets PYTHONPATH automatically, parses output into structured results (passed/failed/skipped counts, per-test status, failure details).
|
||||
|
||||
---
|
||||
|
||||
## Files to Modify
|
||||
|
||||
### `tools/coder_tools_server.py` (~400 new lines)
|
||||
|
||||
Add all 8 tools after the existing `undo_changes` tool:
|
||||
|
||||
```
|
||||
# ── Meta-agent: Tool discovery ────────────────────────────────
|
||||
# discover_mcp_tools()
|
||||
|
||||
# ── Meta-agent: Agent inventory ───────────────────────────────
|
||||
# list_agents()
|
||||
|
||||
# ── Meta-agent: Session & checkpoint inspection ───────────────
|
||||
# _resolve_hive_agent_path(), _read_session_json(), _scan_agent_sessions(), _truncate_value()
|
||||
# list_agent_sessions(), get_agent_session_state(), get_agent_session_memory()
|
||||
# list_agent_checkpoints(), get_agent_checkpoint()
|
||||
|
||||
# ── Meta-agent: Test execution ────────────────────────────────
|
||||
# run_agent_tests()
|
||||
```
|
||||
|
||||
### `exports/hive_coder/nodes/__init__.py`
|
||||
|
||||
- Add 8 new tool names to the `tools` list
|
||||
- Rewrite system prompt "Tools Available" section with meta-agent tools
|
||||
- Add "Meta-Agent Capabilities" section teaching:
|
||||
- Tool discovery before designing agents
|
||||
- Post-build test execution
|
||||
- Debugging via session/checkpoint inspection
|
||||
- Agent awareness via `list_agents()`
|
||||
|
||||
### `exports/hive_coder/agent.py`
|
||||
|
||||
- Update `identity_prompt` to mention dynamic tool discovery and runtime observability
|
||||
- Add `dynamic-tool-discovery` constraint to the goal
|
||||
|
||||
### `exports/hive_coder/reference/framework_guide.md`
|
||||
|
||||
Replace static tools list with a note to use `discover_mcp_tools()` instead.
|
||||
|
||||
---
|
||||
|
||||
## What's NOT in Scope (deferred to v2)
|
||||
|
||||
- **Agent notifications / webhook listener** — Requires always-on listener architecture
|
||||
- **`compare_agent_checkpoints`** — LLM can compare by reading two checkpoints sequentially
|
||||
- **Runtime log query tools** — Available in hive-tools MCP; `run_command` can access them now
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
1. MCP server starts with all 15 tools (7 existing + 8 new)
|
||||
2. `discover_mcp_tools()` connects to hive-tools and returns real tool schemas
|
||||
3. Agent validation passes (`default_agent.validate()`)
|
||||
4. Session tools work against existing data in `~/.hive/agents/`
|
||||
5. Smoke test: launch in TUI, ask it to discover tools
|
||||
@@ -0,0 +1,75 @@
|
||||
# Hive Queen Bee: Native agent-building agent
|
||||
|
||||
## Problem
|
||||
|
||||
Building a Hive agent today requires manual assembly of 7+ files (`agent.py`, `config.py`, `nodes/__init__.py`, `__init__.py`, `__main__.py`, `mcp_servers.json`, tests) with precise framework conventions — correct imports, entry_points format, conversation_mode values, STEP 1/STEP 2 prompt patterns, nullable_output_keys, and more. A single missing re-export in `__init__.py` silently breaks `AgentRunner.load()`. This is the #1 friction point for new users and a recurring source of bugs even for experienced ones.
|
||||
|
||||
There is no tool that understands the framework deeply enough to produce correct agents. General-purpose coding assistants hallucinate tool names, use wrong import paths (`from core.framework...`), create too many thin nodes, forget module-level exports, and produce agents that fail validation.
|
||||
|
||||
## Proposal
|
||||
|
||||
Build **Hive Coder** (codename "Queen Bee") — a framework-native coding agent that lives inside the framework itself and builds complete, validated agent packages from natural language.
|
||||
|
||||
### Design principles
|
||||
|
||||
1. **Single-node, forever-alive** — One continuous EventLoopNode conversation handles the full lifecycle (understand, qualify, design, implement, verify, iterate). No artificial phase boundaries that destroy context.
|
||||
|
||||
2. **Meta-agent capabilities** — Not just a file writer. Can discover available MCP tools at runtime, inspect sessions/checkpoints of agents it builds, run their test suites, and debug failures.
|
||||
|
||||
3. **Self-verifying** — Runs three validation steps after every build: class validation (graph structure), `AgentRunner.load()` (package export contract), and pytest. Fixes its own errors up to 3 attempts.
|
||||
|
||||
4. **Honest qualification** — Assesses framework fit before building. If a use case is a poor fit (needs sub-second latency, pure CRUD, massive data pipelines), says so instead of producing a bad agent.
|
||||
|
||||
5. **Reference-grounded** — Ships with embedded reference docs (framework guide, file templates, anti-patterns) that it reads before writing code. No reliance on training data for framework specifics.
|
||||
|
||||
### Components
|
||||
|
||||
#### `hive_coder` agent (`core/framework/agents/hive_coder/`)
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `agent.py` | Goal, single-node graph, `HiveCoderAgent` class |
|
||||
| `nodes/__init__.py` | `coder` EventLoopNode with comprehensive system prompt |
|
||||
| `config.py` | RuntimeConfig with `~/.hive/configuration.json` auto-detection |
|
||||
| `__main__.py` | Click CLI (`run`, `tui`, `info`, `validate`, `shell`) |
|
||||
| `reference/framework_guide.md` | Node types, edges, patterns, async entry points |
|
||||
| `reference/file_templates.md` | Complete code templates for every agent file |
|
||||
| `reference/anti_patterns.md` | 22 common mistakes with explanations |
|
||||
|
||||
#### Coder Tools MCP Server (`tools/coder_tools_server.py`)
|
||||
|
||||
Dedicated tool server providing:
|
||||
|
||||
- **File I/O**: `read_file` (with line numbers, offset/limit), `write_file` (auto-mkdir), `edit_file` (9-strategy fuzzy matching ported from opencode), `list_directory`, `search_files` (regex)
|
||||
- **Shell**: `run_command` (timeout, cwd, output truncation)
|
||||
- **Git**: `undo_changes` (snapshot-based rollback)
|
||||
- **Meta-agent**: `discover_mcp_tools`, `list_agents`, `list_agent_sessions`, `get_agent_session_state`, `get_agent_session_memory`, `list_agent_checkpoints`, `get_agent_checkpoint`, `run_agent_tests`
|
||||
|
||||
All file operations sandboxed to a configurable project root.
|
||||
|
||||
#### Framework changes
|
||||
|
||||
- `hive code` CLI command — direct launch shortcut
|
||||
- `hive tui` — discovers framework agents as a source
|
||||
- `AgentRuntime` — cron expression support (`croniter`) for async entry points
|
||||
- `prompt_composer` — appends current datetime to system prompts
|
||||
- `NodeSpec.max_node_visits` — default changed from 1 to 0 (unbounded), matching forever-alive as the standard pattern
|
||||
- TUI graph view — cron display and hours in countdown
|
||||
- CredentialError graceful handling in TUI launch
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- [ ] `hive code` launches Hive Coder in the TUI
|
||||
- [ ] `hive tui` lists framework agents alongside exports/ and examples/
|
||||
- [ ] Given "build me a research agent that searches the web and summarizes findings", Hive Coder produces a valid package in `exports/` that passes `AgentRunner.load()`
|
||||
- [ ] Tool discovery works: agent calls `discover_mcp_tools()` before designing, never fabricates tool names
|
||||
- [ ] Self-verification: agent runs all 3 validation steps and fixes errors before presenting
|
||||
- [ ] Cron timers fire on schedule (unit tested)
|
||||
- [ ] `max_node_visits=0` default does not break existing agents or tests
|
||||
- [ ] Reference docs are accurate and match current framework behavior
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Multi-agent orchestration (queen spawning worker agents at runtime) — future work
|
||||
- GUI/web interface — TUI only for v1
|
||||
- Auto-publishing to a registry — agents are local packages
|
||||
@@ -0,0 +1,288 @@
|
||||
# Plan: Multi-Graph Sessions with Guardian Pattern
|
||||
|
||||
## Context
|
||||
|
||||
The target experience: hive_coder builds an agent (e.g., email automation), loads it into the same runtime session, and acts as its guardian. The email agent runs autonomously while hive_coder watches for failures. On error, hive_coder asks the user for help if they're around, attempts an autonomous fix if they're away, and escalates catastrophic failures for post-mortem.
|
||||
|
||||
This requires multiple agent graphs sharing a single `AgentRuntime` session — shared memory and data, but isolated conversations. The existing runtime already has most of the primitives: `ExecutionStream` accepts its own `graph`, `trigger_type="event"` subscribes entry points to the EventBus, and `_get_primary_session_state()` bridges memory across streams.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
AgentRuntime (shared EventBus, shared state.json, shared data/)
|
||||
├── hive_coder graph
|
||||
│ ├── Stream "default" → coder node (client_facing, manual)
|
||||
│ └── Stream "guardian" → guardian node (event-driven, subscribes to EXECUTION_FAILED)
|
||||
└── email_agent graph
|
||||
└── Stream "email_agent::default" → intake node (client_facing, manual)
|
||||
```
|
||||
|
||||
The guardian entry point on hive_coder fires when email_agent emits `EXECUTION_FAILED`. It receives the failure event in its input, reads shared memory for context, and decides: ask user (if present), auto-fix (if away), or escalate (if catastrophic).
|
||||
|
||||
## Gap 1: Event Scoping — `graph_id` on Events
|
||||
|
||||
**Problem**: EventBus events carry `stream_id` and `node_id` but no `graph_id`. The guardian needs to subscribe to events from a specific graph (email_agent), not a specific stream name.
|
||||
|
||||
**Solution**: Add `graph_id: str | None = None` to `AgentEvent` and `filter_graph` to `Subscription`.
|
||||
|
||||
### `core/framework/runtime/event_bus.py`
|
||||
- `AgentEvent` dataclass: add `graph_id: str | None = None` field, include in `to_dict()`
|
||||
- `Subscription` dataclass: add `filter_graph: str | None = None`
|
||||
- `subscribe()`: accept `filter_graph` param, pass to `Subscription`
|
||||
- `_matches()`: check `filter_graph` against `event.graph_id`
|
||||
|
||||
### `core/framework/runtime/execution_stream.py`
|
||||
- `__init__()`: accept `graph_id: str | None = None`, store as `self.graph_id`
|
||||
- When emitting events via `_event_bus.publish()`: set `event.graph_id = self.graph_id`
|
||||
|
||||
## Gap 2: Multi-Graph Runtime — `add_graph()` / `remove_graph()`
|
||||
|
||||
**Problem**: `AgentRuntime.__init__` takes a single `GraphSpec`. We need to add/remove graphs dynamically at runtime.
|
||||
|
||||
**Solution**: Keep the primary graph on `__init__`. Add methods to register secondary graphs that create their own `ExecutionStream` instances backed by a different graph.
|
||||
|
||||
### `core/framework/runtime/agent_runtime.py`
|
||||
|
||||
New instance state:
|
||||
```python
|
||||
self._graph_id: str = graph_id or "primary" # ID for the primary graph
|
||||
self._graphs: dict[str, _GraphRegistration] = {} # graph_id -> registration
|
||||
self._active_graph_id: str = self._graph_id # TUI focus
|
||||
```
|
||||
|
||||
Where `_GraphRegistration` is a simple dataclass:
|
||||
```python
|
||||
@dataclass
|
||||
class _GraphRegistration:
|
||||
graph: GraphSpec
|
||||
goal: Goal
|
||||
entry_points: dict[str, EntryPointSpec]
|
||||
streams: dict[str, ExecutionStream]
|
||||
storage_subpath: str # relative to session root, e.g. "graphs/email_agent"
|
||||
event_subscriptions: list[str] # EventBus subscription IDs
|
||||
timer_tasks: list[asyncio.Task]
|
||||
```
|
||||
|
||||
New methods:
|
||||
- `add_graph(graph_id, graph, goal, entry_points, storage_subpath=None)` — creates streams for the graph using graph-scoped storage, sets up event/timer triggers, stamps `graph_id` on all streams. Can be called while running.
|
||||
- `remove_graph(graph_id)` — stops streams, cancels timers, unsubscribes events, removes registration. Cannot remove primary graph.
|
||||
- `list_graphs() -> list[str]` — returns all graph IDs
|
||||
- `active_graph_id` property with setter — TUI uses this to control which graph's events are displayed
|
||||
|
||||
Update existing methods:
|
||||
- `start()`: stamp `self._graph_id` on primary graph streams (via `ExecutionStream.graph_id`)
|
||||
- `inject_input(node_id, content)`: search active graph's streams first, then all others
|
||||
- `_get_primary_session_state()`: search across ALL graphs' streams (not just primary's)
|
||||
- `stop()`: stop all secondary graph streams/timers/subscriptions too
|
||||
|
||||
### Storage Layout
|
||||
```
|
||||
~/.hive/agents/hive_coder/sessions/{session_id}/
|
||||
state.json ← SHARED across all graphs
|
||||
data/ ← SHARED data directory
|
||||
conversations/coder/ ← hive_coder conversations
|
||||
graphs/
|
||||
email_agent/ ← secondary graph storage root
|
||||
conversations/
|
||||
intake/
|
||||
checkpoints/
|
||||
```
|
||||
|
||||
Secondary graph executors get `storage_path = {session_root}/graphs/{graph_id}/` while `state.json` and `data/` remain at the session root. The `resume_session_id` mechanism in `_get_primary_session_state()` already handles this — secondary executions find the primary session's `state.json`.
|
||||
|
||||
**Concurrent state.json writes**: For the guardian pattern (sequential: email_agent fails → guardian triggers), no file lock needed. But since both could technically write concurrently, add a simple `fcntl.flock()` wrapper around `_write_progress()` in the executor. Small, defensive change.
|
||||
|
||||
## Gap 3: Guardian Pattern — User Presence + Autonomous Recovery
|
||||
|
||||
**Problem**: When email_agent fails, hive_coder's guardian entry point must decide: ask user or auto-fix.
|
||||
|
||||
**Solution**: User presence is a runtime-level signal. The guardian's system prompt and event data give it enough context to decide.
|
||||
|
||||
### User Presence Tracking
|
||||
Add to `AgentRuntime`:
|
||||
```python
|
||||
self._last_user_input_time: float = 0.0 # monotonic timestamp
|
||||
```
|
||||
|
||||
Updated in `inject_input()` (called whenever user types in TUI). Exposed as:
|
||||
```python
|
||||
@property
|
||||
def user_idle_seconds(self) -> float:
|
||||
if self._last_user_input_time == 0:
|
||||
return float('inf')
|
||||
return time.monotonic() - self._last_user_input_time
|
||||
```
|
||||
|
||||
The guardian node's system prompt instructs the LLM: "If user_idle_seconds < 120, ask the user for guidance via the client-facing interaction. If user is away, attempt an autonomous fix."
|
||||
|
||||
This is NOT framework logic — it's prompt-driven. The guardian node is a regular `event_loop` node with `client_facing=True` and tools for code editing + agent lifecycle. The LLM decides the strategy based on presence info injected as context.
|
||||
|
||||
### Escalation Model
|
||||
Escalation = save a structured log entry. No special framework support needed. The guardian node uses `save_data("escalation_log.jsonl", ...)` via the existing data tools. The LLM writes:
|
||||
```json
|
||||
{"timestamp": "...", "severity": "catastrophic", "agent": "email_agent", "error": "...", "attempted_fixes": [...], "recommended_action": "..."}
|
||||
```
|
||||
|
||||
Post-mortem: user opens `/data escalation_log.jsonl` or the TUI shows a notification linking to it.
|
||||
|
||||
## Gap 4: Graph Lifecycle Tools — Stop/Reload/Restart
|
||||
|
||||
**Problem**: hive_coder needs to programmatically stop a broken agent, fix its code, reload it, and restart it.
|
||||
|
||||
**Solution**: MCP tools accessible to the active agent. Uses `ContextVar` to access the runtime (same pattern as `data_dir`).
|
||||
|
||||
### `core/framework/tools/session_graph_tools.py` (NEW)
|
||||
|
||||
```python
|
||||
async def load_agent(agent_path: str) -> str:
|
||||
"""Load an agent graph into the running session."""
|
||||
|
||||
async def unload_agent(graph_id: str) -> str:
|
||||
"""Stop and remove an agent graph from the session."""
|
||||
|
||||
async def start_agent(graph_id: str, entry_point: str = "default", input_data: str = "{}") -> str:
|
||||
"""Trigger an entry point on a loaded agent graph."""
|
||||
|
||||
async def restart_agent(graph_id: str) -> str:
|
||||
"""Unload and re-load an agent (picks up code changes)."""
|
||||
|
||||
async def list_agents() -> str:
|
||||
"""List all agent graphs in the current session with their status."""
|
||||
|
||||
async def get_user_presence() -> str:
|
||||
"""Return user idle time and presence status."""
|
||||
```
|
||||
|
||||
These tools call `runtime.add_graph()`, `runtime.remove_graph()`, `runtime.trigger()`, etc.
|
||||
|
||||
### Registration
|
||||
These tools are registered via `ToolRegistry` with `CONTEXT_PARAM` for `runtime` (injected by the executor, same as `data_dir`). Only available when the runtime is multi-graph capable (set by `cmd_code()`).
|
||||
|
||||
## Gap 5: TUI Integration — Graph Switching + Background Notifications
|
||||
|
||||
### `core/framework/tui/app.py`
|
||||
- `_route_event()`: check `event.graph_id` against `runtime.active_graph_id`
|
||||
- Events from active graph: route normally (streaming, chat, etc.)
|
||||
- `CLIENT_INPUT_REQUESTED` from background graph: show notification bar
|
||||
- `EXECUTION_FAILED` from background graph: show error notification
|
||||
- `EXECUTION_COMPLETED` from background: show brief completion notice
|
||||
- Other background events: silent (visible in logs)
|
||||
- `action_switch_graph(graph_id)`: update `runtime.active_graph_id`, refresh graph view, show header
|
||||
|
||||
### `core/framework/tui/widgets/chat_repl.py`
|
||||
- Track `_input_graph_id: str | None` alongside `_input_node_id`
|
||||
- `handle_input_requested(node_id, graph_id)`: if background graph, show notification instead of enabling input
|
||||
- `_submit_input()`: pass `graph_id` to help `inject_input()` route correctly
|
||||
- New TUI commands:
|
||||
- `/graphs` — list loaded graphs and their status
|
||||
- `/graph <id>` — switch active graph focus
|
||||
- `/load <path>` — load an agent graph into the session
|
||||
- `/unload <id>` — remove a graph from the session
|
||||
- On graph switch: flush streaming state, render graph header separator
|
||||
|
||||
### `core/framework/tui/widgets/graph_view.py`
|
||||
- `switch_graph(graph_id)` — re-render the graph visualization for the new active graph
|
||||
- When multi-graph active: show tab-like header listing all loaded graphs
|
||||
|
||||
## Gap 6: CLI + Runner Integration
|
||||
|
||||
### `core/framework/runner/cli.py`
|
||||
- `cmd_code()` creates the hive_coder runtime with `graph_id="hive_coder"`
|
||||
- Registers `session_graph_tools` with the tool config so hive_coder's LLM can call them
|
||||
- Sets `runtime._multi_graph_capable = True` flag
|
||||
|
||||
### `core/framework/runner/runner.py`
|
||||
- New method: `setup_as_secondary(runtime, graph_id)` — configures this runner to join an existing `AgentRuntime` as a secondary graph. Uses the existing `AgentRunner.load()` to parse agent.json, then calls `runtime.add_graph()` with the parsed graph/goal/entry_points.
|
||||
|
||||
## Gap 7: Reliable Mid-Node Resume
|
||||
|
||||
**Problem**: When an EventLoopNode is interrupted (crash, Ctrl+Z, context switch), resume doesn't restore to exactly where execution stopped. Several pieces of in-node state are lost, which changes behavior post-resume. In multi-graph sessions with parallel execution and frequent context switching, these gaps compound.
|
||||
|
||||
### What's already restored correctly
|
||||
- **Conversation history**: All messages persisted to disk immediately via `FileConversationStore._persist()` — one file per message in `parts/NNNNNNNNNN.json`
|
||||
- **OutputAccumulator values**: Write-through to `cursor.json` on every `accumulator.set()` call
|
||||
- **Iteration counter**: Written to `cursor.json` at the end of each iteration (step 6g)
|
||||
- **Orphaned tool calls**: `_repair_orphaned_tool_calls()` patches in-flight tool calls with error messages so the LLM knows to retry
|
||||
|
||||
### What's lost — and fixes
|
||||
|
||||
#### 1. `user_interaction_count` (CRITICAL)
|
||||
Resets to 0 on resume. This controls client-facing blocking semantics: before the first interaction, `set_output`-only turns don't prevent blocking (the LLM must present to the user first). After resume, a node that had 3 user interactions behaves as if the user never interacted.
|
||||
|
||||
**Fix**: Persist `user_interaction_count` to `cursor.json` alongside `iteration` and `outputs`. Write it in `_write_cursor()` (step 6g), restore in `_restore()`.
|
||||
|
||||
**Files**: `core/framework/graph/event_loop_node.py`
|
||||
|
||||
#### 2. Accumulator outputs not in SharedMemory
|
||||
The `OutputAccumulator` writes to `cursor.json` (durable) but only writes to `SharedMemory` when the judge ACCEPTs. On crash, the CancelledError handler captures `memory.read_all()` — which doesn't include the accumulator's WIP values. On resume, edge conditions checking those memory keys see `None`.
|
||||
|
||||
**Fix**: In the executor's `CancelledError` handler, read the interrupted node's `cursor.json` and write any accumulator outputs to `memory` before building `session_state_out`. This ensures resume memory includes WIP output values.
|
||||
|
||||
**Files**: `core/framework/graph/executor.py` (CancelledError handler, ~line 1289)
|
||||
|
||||
#### 3. Stall/doom-loop detection counters
|
||||
`recent_responses` and `recent_tool_fingerprints` reset to empty lists. A previously near-stalled node gets a fresh detection budget.
|
||||
|
||||
**Fix**: Persist these to `cursor.json`. They're small (last N strings). Write in `_write_cursor()`, restore in `_restore()`.
|
||||
|
||||
**Files**: `core/framework/graph/event_loop_node.py`
|
||||
|
||||
#### 4. `continuous_conversation` at executor level
|
||||
In continuous mode, the executor's `continuous_conversation` variable is `None` on resume. The node's `_restore()` recovers messages from disk, but the executor doesn't pre-populate this variable until the node returns.
|
||||
|
||||
**Fix**: After a resumed node completes, set `continuous_conversation = result.conversation` (this already happens in the normal path at line 1155 — verify it also runs on the resume path).
|
||||
|
||||
**Files**: `core/framework/graph/executor.py`
|
||||
|
||||
### Multi-graph specific: independent resume per graph
|
||||
Each graph in a multi-graph session has its own storage subdirectory (`graphs/{graph_id}/`) with its own `conversations/`, `checkpoints/`, and `cursor.json` files. Resume is already per-executor, so each graph resumes independently. The shared `state.json` at the session root captures the union of all graphs' memory — the `fcntl.flock()` wrapper on `_write_progress()` (Gap 2) ensures concurrent writes don't corrupt it.
|
||||
|
||||
### Implementation
|
||||
These fixes are prerequisite to multi-graph and should be done as **Phase 0** before the EventBus changes:
|
||||
1. Persist `user_interaction_count` + stall/doom counters to `cursor.json`
|
||||
2. Restore them in `_restore()`
|
||||
3. Flush accumulator outputs to SharedMemory in executor's CancelledError handler
|
||||
4. Verify continuous_conversation is set on resume path
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 0: Reliable Mid-Node Resume (prerequisite)
|
||||
1. `event_loop_node.py` — persist `user_interaction_count`, `recent_responses`, `recent_tool_fingerprints` to `cursor.json` via `_write_cursor()`; restore in `_restore()`
|
||||
2. `executor.py` — in CancelledError handler, read interrupted node's `cursor.json` accumulator outputs and write to `memory` before building `session_state_out`
|
||||
3. `executor.py` — verify `continuous_conversation` is populated on resume path
|
||||
|
||||
### Phase 1: EventBus Foundation
|
||||
1. `event_bus.py` — `graph_id` on `AgentEvent`, `filter_graph` on `Subscription` + `_matches()`
|
||||
2. `execution_stream.py` — accept and stamp `graph_id` on emitted events
|
||||
|
||||
### Phase 2: Multi-Graph Runtime
|
||||
3. `agent_runtime.py` — `_GraphRegistration` dataclass, `add_graph()`, `remove_graph()`, `list_graphs()`, `active_graph_id` property
|
||||
4. `agent_runtime.py` — update `inject_input()`, `_get_primary_session_state()`, `stop()` for multi-graph
|
||||
5. `agent_runtime.py` — user presence tracking (`_last_user_input_time`, `user_idle_seconds`)
|
||||
6. Storage path logic: secondary graphs get `{session_root}/graphs/{graph_id}/`
|
||||
|
||||
### Phase 3: Graph Lifecycle Tools
|
||||
7. `core/framework/tools/session_graph_tools.py` — `load_agent`, `unload_agent`, `start_agent`, `restart_agent`, `list_agents`, `get_user_presence`
|
||||
8. `runner.py` — `setup_as_secondary()` method
|
||||
|
||||
### Phase 4: TUI Integration
|
||||
9. `app.py` — `graph_id` event filtering, background notifications, `action_switch_graph`
|
||||
10. `chat_repl.py` — `/graphs`, `/graph`, `/load`, `/unload` commands, graph_id tracking
|
||||
11. `graph_view.py` — multi-graph header, `switch_graph()`
|
||||
|
||||
### Phase 5: hive_coder Integration
|
||||
12. `cli.py` — `cmd_code()` sets up multi-graph capable runtime, registers graph tools
|
||||
13. hive_coder's agent config — add guardian entry point with `trigger_type="event"` subscribing to `EXECUTION_FAILED`
|
||||
14. Guardian node system prompt — presence-aware triage logic (ask user / auto-fix / escalate)
|
||||
|
||||
## Backward Compatibility
|
||||
- Single-graph `hive run exports/my_agent` unchanged: `graph_id` defaults to `None`, no secondary graphs loaded, events carry `graph_id=None`, TUI shows no graph switching UI
|
||||
- All new fields are optional with `None` defaults
|
||||
- `_get_primary_session_state()` existing behavior preserved when no secondary graphs exist
|
||||
|
||||
## Verification
|
||||
1. **Unit**: `add_graph()` creates streams with correct `graph_id`, events carry `graph_id`, `filter_graph` works in subscriptions, `inject_input()` routes to correct graph
|
||||
2. **Integration**: Load hive_coder + email_agent, email_agent fails → guardian fires → reads shared memory → decides action
|
||||
3. **TUI**: `/graphs` shows both, `/graph` switches, background failure notification appears, input routing works across graphs
|
||||
4. **Backward compat**: `hive run exports/deep_research_agent --tui` works unchanged
|
||||
5. **Lifecycle**: `restart_agent` picks up code changes, `unload_agent` cleans up streams and subscriptions
|
||||
@@ -0,0 +1,56 @@
|
||||
# feat(queen): Hive Queen Bee — native agent-building agent
|
||||
|
||||
## Summary
|
||||
|
||||
Introduces **Hive Coder** (codename "Queen Bee"), a framework-native coding agent that builds complete Hive agent packages from natural language descriptions. This is a single-node, forever-alive agent inspired by opencode's `while(true)` loop — one continuous conversation handles the full lifecycle: understand, qualify, design, implement, verify, and iterate.
|
||||
|
||||
The agent is deeply integrated with the framework: it can discover available MCP tools at runtime, inspect sessions and checkpoints of agents it builds, run their test suites, and self-verify its own output. It ships with a dedicated MCP tools server (`coder_tools_server.py`) providing rich file I/O, fuzzy-match editing, git snapshots, and shell execution — all scoped to a configurable project root.
|
||||
|
||||
## What's included
|
||||
|
||||
### New: `hive_coder` agent (`core/framework/agents/hive_coder/`)
|
||||
- **`agent.py`** — Goal with 4 success criteria and 4 constraints, single-node graph, `HiveCoderAgent` class with full runtime lifecycle (start/stop/trigger_and_wait)
|
||||
- **`nodes/__init__.py`** — Single `coder` EventLoopNode with a comprehensive system prompt covering coding mandates, tool discovery, meta-agent capabilities, node count rules, implementation templates, and a 6-phase workflow
|
||||
- **`config.py`** — RuntimeConfig with auto-detection of preferred model from `~/.hive/configuration.json`
|
||||
- **`__main__.py`** — Click CLI with `run`, `tui`, `info`, `validate`, and `shell` subcommands
|
||||
- **`reference/`** — Framework guide, file templates, and anti-patterns docs embedded as agent reference material
|
||||
|
||||
### New: Coder Tools MCP Server (`tools/coder_tools_server.py`)
|
||||
- 1500-line MCP server providing 15 tools: `read_file`, `write_file`, `edit_file` (with opencode-style 9-strategy fuzzy matching), `list_directory`, `search_files`, `run_command`, `undo_changes`, `discover_mcp_tools`, `list_agents`, `list_agent_sessions`, `get_agent_session_state`, `get_agent_session_memory`, `list_agent_checkpoints`, `get_agent_checkpoint`, `run_agent_tests`
|
||||
- Path-scoped security: all file operations sandboxed to project root
|
||||
- Git-based undo: automatic snapshots before writes with `undo_changes` rollback
|
||||
|
||||
### Framework changes
|
||||
- **`hive code` CLI command** — Direct launch shortcut for Hive Coder via `cmd_code` in `runner/cli.py`
|
||||
- **`hive tui` updated** — Now discovers framework agents alongside exports/ and examples/
|
||||
- **Cron timer support** — `AgentRuntime` now supports cron expressions (`croniter`) in addition to fixed-interval timers for async entry points
|
||||
- **Datetime in system prompts** — `prompt_composer._with_datetime()` appends current datetime to all composed system prompts; EventLoopNode also applies it for isolated conversations
|
||||
- **`max_node_visits` default → 0** — Changed from 1 to 0 (unbounded) across `NodeSpec` and executor, matching the forever-alive pattern as the standard default
|
||||
- **TUI graph view** — Timer display updated to show cron expressions and hours in countdown
|
||||
- **CredentialError handling** — `_setup()` calls in TUI launch paths now catch and display credential errors gracefully
|
||||
|
||||
### Tests
|
||||
- New `test_agent_runtime.py` tests for cron-based timer scheduling
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
User ──▶ [coder] (EventLoopNode, client_facing, forever-alive)
|
||||
│
|
||||
│ Tools: coder_tools_server.py (file I/O, shell, git)
|
||||
│ + meta-agent tools (discover, inspect, test)
|
||||
│
|
||||
└──▶ loops continuously until user exits
|
||||
```
|
||||
|
||||
Single node. No edges. No terminal nodes. The agent stays alive and handles multiple build requests in one session — context accumulates across interactions.
|
||||
|
||||
## Test plan
|
||||
|
||||
- [ ] `hive code` launches Hive Coder TUI successfully
|
||||
- [ ] `hive tui` shows "Framework Agents" as a source option
|
||||
- [ ] Agent can discover tools via `discover_mcp_tools()`
|
||||
- [ ] Agent generates a valid agent package from a natural language request
|
||||
- [ ] Generated packages pass `AgentRunner.load()` validation
|
||||
- [ ] Cron timer tests pass (`test_agent_runtime.py`)
|
||||
- [ ] Existing tests unaffected by `max_node_visits` default change
|
||||
+189
-63
@@ -347,10 +347,9 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
["cerebras:1"]="Qwen3 235B - Frontier reasoning"
|
||||
)
|
||||
|
||||
# NOTE: 8192 should match DEFAULT_MAX_TOKENS in core/framework/graph/edge.py
|
||||
declare -A MODEL_CHOICES_MAXTOKENS=(
|
||||
["anthropic:0"]=8192
|
||||
["anthropic:1"]=8192
|
||||
["anthropic:0"]=32768
|
||||
["anthropic:1"]=16384
|
||||
["anthropic:2"]=8192
|
||||
["anthropic:3"]=8192
|
||||
["openai:0"]=16384
|
||||
@@ -454,8 +453,7 @@ else
|
||||
MC_PROVIDERS=(anthropic anthropic anthropic anthropic openai openai openai gemini gemini groq groq cerebras cerebras)
|
||||
MC_IDS=("claude-opus-4-6" "claude-sonnet-4-5-20250929" "claude-sonnet-4-20250514" "claude-haiku-4-5-20251001" "gpt-5.2" "gpt-5-mini" "gpt-5-nano" "gemini-3-flash-preview" "gemini-3-pro-preview" "moonshotai/kimi-k2-instruct-0905" "openai/gpt-oss-120b" "zai-glm-4.7" "qwen3-235b-a22b-instruct-2507")
|
||||
MC_LABELS=("Opus 4.6 - Most capable (recommended)" "Sonnet 4.5 - Best balance" "Sonnet 4 - Fast + capable" "Haiku 4.5 - Fast + cheap" "GPT-5.2 - Most capable (recommended)" "GPT-5 Mini - Fast + cheap" "GPT-5 Nano - Fastest" "Gemini 3 Flash - Fast (recommended)" "Gemini 3 Pro - Best quality" "Kimi K2 - Best quality (recommended)" "GPT-OSS 120B - Fast reasoning" "ZAI-GLM 4.7 - Best quality (recommended)" "Qwen3 235B - Frontier reasoning")
|
||||
# NOTE: 8192 should match DEFAULT_MAX_TOKENS in core/framework/graph/edge.py
|
||||
MC_MAXTOKENS=(8192 8192 8192 8192 16384 16384 16384 8192 8192 8192 8192 8192 8192)
|
||||
MC_MAXTOKENS=(32768 16384 8192 8192 16384 16384 16384 8192 8192 8192 8192 8192 8192)
|
||||
|
||||
# Helper: get number of model choices for a provider
|
||||
get_model_choice_count() {
|
||||
@@ -616,11 +614,14 @@ prompt_model_selection() {
|
||||
}
|
||||
|
||||
# Function to save configuration
|
||||
# Args: provider_id env_var model max_tokens [use_claude_code_sub] [api_base]
|
||||
save_configuration() {
|
||||
local provider_id="$1"
|
||||
local env_var="$2"
|
||||
local model="$3"
|
||||
local max_tokens="$4"
|
||||
local use_claude_code_sub="${5:-}"
|
||||
local api_base="${6:-}"
|
||||
|
||||
# Fallbacks if not provided
|
||||
if [ -z "$model" ]; then
|
||||
@@ -643,6 +644,12 @@ config = {
|
||||
},
|
||||
'created_at': '$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")'
|
||||
}
|
||||
if '$use_claude_code_sub' == 'true':
|
||||
config['llm']['use_claude_code_subscription'] = True
|
||||
# No api_key_env_var needed for Claude Code subscription
|
||||
config['llm'].pop('api_key_env_var', None)
|
||||
if '$api_base':
|
||||
config['llm']['api_base'] = '$api_base'
|
||||
with open('$HIVE_CONFIG_FILE', 'w') as f:
|
||||
json.dump(config, f, indent=2)
|
||||
print(json.dumps(config, indent=2))
|
||||
@@ -664,8 +671,47 @@ SELECTED_PROVIDER_ID="" # Will hold the chosen provider ID
|
||||
SELECTED_ENV_VAR="" # Will hold the chosen env var
|
||||
SELECTED_MODEL="" # Will hold the chosen model ID
|
||||
SELECTED_MAX_TOKENS=8192 # Will hold the chosen max_tokens
|
||||
SUBSCRIPTION_MODE="" # "claude_code" | "zai_code" | ""
|
||||
|
||||
if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
# ── Subscription mode detection ──────────────────────────────
|
||||
# Claude Code subscription: default when ~/.claude/.credentials.json exists
|
||||
CLAUDE_CRED_FILE="$HOME/.claude/.credentials.json"
|
||||
if [ -f "$CLAUDE_CRED_FILE" ]; then
|
||||
echo -e " ${GREEN}⬢${NC} Claude Code subscription detected"
|
||||
echo -e " ${DIM}~/.claude/.credentials.json${NC}"
|
||||
echo -e " ${DIM}Default: claude-opus-4-6 | max_tokens: 32768${NC}"
|
||||
echo ""
|
||||
if prompt_yes_no "Use Claude Code subscription? (no API key needed)"; then
|
||||
SUBSCRIPTION_MODE="claude_code"
|
||||
SELECTED_PROVIDER_ID="anthropic"
|
||||
SELECTED_MODEL="claude-opus-4-6"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using Claude Code subscription"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ZAI Code subscription: check for ZAI_API_KEY
|
||||
if [ -z "$SUBSCRIPTION_MODE" ] && [ -n "${ZAI_API_KEY:-}" ]; then
|
||||
echo -e " ${GREEN}⬢${NC} Found ZAI Code API key"
|
||||
echo ""
|
||||
if prompt_yes_no "Use your ZAI Code subscription?"; then
|
||||
SUBSCRIPTION_MODE="zai_code"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_ENV_VAR="ZAI_API_KEY"
|
||||
SELECTED_MODEL="glm-5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using ZAI Code subscription"
|
||||
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Skip normal provider detection if a subscription mode was selected
|
||||
if [ -n "$SUBSCRIPTION_MODE" ]; then
|
||||
# Jump ahead — SELECTED_PROVIDER_ID is already set
|
||||
:
|
||||
elif [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
# Bash 4+ - iterate over associative array keys
|
||||
for env_var in "${!PROVIDER_NAMES[@]}"; do
|
||||
value="${!env_var}"
|
||||
@@ -693,97 +739,140 @@ if [ ${#FOUND_PROVIDERS[@]} -gt 0 ]; then
|
||||
done
|
||||
echo ""
|
||||
|
||||
if [ ${#FOUND_PROVIDERS[@]} -eq 1 ]; then
|
||||
# Only one provider found, use it automatically
|
||||
if prompt_yes_no "Use this key?"; then
|
||||
SELECTED_ENV_VAR="${FOUND_ENV_VARS[0]}"
|
||||
# Show all found providers + ZAI subscription + Other
|
||||
echo -e "${BOLD}Select your default LLM provider:${NC}"
|
||||
echo ""
|
||||
|
||||
i=1
|
||||
for provider in "${FOUND_PROVIDERS[@]}"; do
|
||||
echo -e " ${CYAN}$i)${NC} $provider"
|
||||
i=$((i + 1))
|
||||
done
|
||||
ZAI_CHOICE=$i
|
||||
echo -e " ${CYAN}$i)${NC} ZAI Code Subscription ${DIM}(use your ZAI Code plan)${NC}"
|
||||
i=$((i + 1))
|
||||
echo -e " ${CYAN}$i)${NC} Other"
|
||||
max_choice=$i
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
read -r -p "Enter choice (1-$max_choice): " choice || true
|
||||
if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le "$max_choice" ]; then
|
||||
if [ "$choice" -eq "$max_choice" ]; then
|
||||
# Fall through to the manual provider selection below
|
||||
break
|
||||
elif [ "$choice" -eq "$ZAI_CHOICE" ]; then
|
||||
# ZAI Code Subscription
|
||||
SUBSCRIPTION_MODE="zai_code"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_ENV_VAR="ZAI_API_KEY"
|
||||
SELECTED_MODEL="glm-5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using ZAI Code subscription"
|
||||
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
|
||||
break
|
||||
fi
|
||||
idx=$((choice - 1))
|
||||
SELECTED_ENV_VAR="${FOUND_ENV_VARS[$idx]}"
|
||||
SELECTED_PROVIDER_ID="$(get_provider_id "$SELECTED_ENV_VAR")"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using ${FOUND_PROVIDERS[0]}"
|
||||
echo -e "${GREEN}⬢${NC} Selected: ${FOUND_PROVIDERS[$idx]}"
|
||||
|
||||
prompt_model_selection "$SELECTED_PROVIDER_ID"
|
||||
break
|
||||
fi
|
||||
else
|
||||
# Multiple providers found, let user pick one
|
||||
echo -e "${BOLD}Select your default LLM provider:${NC}"
|
||||
echo ""
|
||||
|
||||
# Build choice menu from found providers
|
||||
i=1
|
||||
for provider in "${FOUND_PROVIDERS[@]}"; do
|
||||
echo -e " ${CYAN}$i)${NC} $provider"
|
||||
i=$((i + 1))
|
||||
done
|
||||
echo -e " ${CYAN}$i)${NC} Other"
|
||||
max_choice=$i
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
read -r -p "Enter choice (1-$max_choice): " choice || true
|
||||
if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le "$max_choice" ]; then
|
||||
if [ "$choice" -eq "$max_choice" ]; then
|
||||
# Fall through to the manual provider selection below
|
||||
break
|
||||
fi
|
||||
idx=$((choice - 1))
|
||||
SELECTED_ENV_VAR="${FOUND_ENV_VARS[$idx]}"
|
||||
SELECTED_PROVIDER_ID="$(get_provider_id "$SELECTED_ENV_VAR")"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Selected: ${FOUND_PROVIDERS[$idx]}"
|
||||
|
||||
prompt_model_selection "$SELECTED_PROVIDER_ID"
|
||||
break
|
||||
fi
|
||||
echo -e "${RED}Invalid choice. Please enter 1-$max_choice${NC}"
|
||||
done
|
||||
fi
|
||||
echo -e "${RED}Invalid choice. Please enter 1-$max_choice${NC}"
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -z "$SELECTED_PROVIDER_ID" ]; then
|
||||
echo ""
|
||||
prompt_choice "Select your LLM provider:" \
|
||||
"Anthropic (Claude) - Recommended" \
|
||||
"OpenAI (GPT)" \
|
||||
"Google Gemini - Free tier available" \
|
||||
"Groq - Fast, free tier" \
|
||||
"Cerebras - Fast, free tier" \
|
||||
"Skip for now"
|
||||
choice=$PROMPT_CHOICE
|
||||
echo -e "${BOLD}Select your LLM provider:${NC}"
|
||||
echo ""
|
||||
echo -e " ${CYAN}${BOLD}Subscription modes (no API key purchase needed):${NC}"
|
||||
echo -e " ${CYAN}1)${NC} Claude Code Subscription ${DIM}(use your Claude Max/Pro plan)${NC}"
|
||||
echo -e " ${CYAN}2)${NC} ZAI Code Subscription ${DIM}(use your ZAI Code plan)${NC}"
|
||||
echo ""
|
||||
echo -e " ${CYAN}${BOLD}API key providers:${NC}"
|
||||
echo -e " ${CYAN}3)${NC} Anthropic (Claude) - Recommended"
|
||||
echo -e " ${CYAN}4)${NC} OpenAI (GPT)"
|
||||
echo -e " ${CYAN}5)${NC} Google Gemini - Free tier available"
|
||||
echo -e " ${CYAN}6)${NC} Groq - Fast, free tier"
|
||||
echo -e " ${CYAN}7)${NC} Cerebras - Fast, free tier"
|
||||
echo -e " ${CYAN}8)${NC} Skip for now"
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
read -r -p "Enter choice (1-8): " choice || true
|
||||
if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le 8 ]; then
|
||||
break
|
||||
fi
|
||||
echo -e "${RED}Invalid choice. Please enter 1-8${NC}"
|
||||
done
|
||||
|
||||
case $choice in
|
||||
0)
|
||||
1)
|
||||
# Claude Code Subscription
|
||||
CLAUDE_CRED_FILE="$HOME/.claude/.credentials.json"
|
||||
if [ ! -f "$CLAUDE_CRED_FILE" ]; then
|
||||
echo ""
|
||||
echo -e "${YELLOW} ~/.claude/.credentials.json not found.${NC}"
|
||||
echo -e " Run ${CYAN}claude${NC} first to authenticate with your Claude subscription,"
|
||||
echo -e " then run this quickstart again."
|
||||
echo ""
|
||||
SELECTED_PROVIDER_ID=""
|
||||
else
|
||||
SUBSCRIPTION_MODE="claude_code"
|
||||
SELECTED_PROVIDER_ID="anthropic"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using Claude Code subscription"
|
||||
fi
|
||||
;;
|
||||
2)
|
||||
# ZAI Code Subscription
|
||||
SUBSCRIPTION_MODE="zai_code"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_ENV_VAR="ZAI_API_KEY"
|
||||
SELECTED_MODEL="glm-5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
PROVIDER_NAME="ZAI"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using ZAI Code subscription"
|
||||
echo -e " ${DIM}Model: glm-5 | API: api.z.ai${NC}"
|
||||
;;
|
||||
3)
|
||||
SELECTED_ENV_VAR="ANTHROPIC_API_KEY"
|
||||
SELECTED_PROVIDER_ID="anthropic"
|
||||
PROVIDER_NAME="Anthropic"
|
||||
SIGNUP_URL="https://console.anthropic.com/settings/keys"
|
||||
;;
|
||||
1)
|
||||
4)
|
||||
SELECTED_ENV_VAR="OPENAI_API_KEY"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
PROVIDER_NAME="OpenAI"
|
||||
SIGNUP_URL="https://platform.openai.com/api-keys"
|
||||
;;
|
||||
2)
|
||||
5)
|
||||
SELECTED_ENV_VAR="GEMINI_API_KEY"
|
||||
SELECTED_PROVIDER_ID="gemini"
|
||||
PROVIDER_NAME="Google Gemini"
|
||||
SIGNUP_URL="https://aistudio.google.com/apikey"
|
||||
;;
|
||||
3)
|
||||
6)
|
||||
SELECTED_ENV_VAR="GROQ_API_KEY"
|
||||
SELECTED_PROVIDER_ID="groq"
|
||||
PROVIDER_NAME="Groq"
|
||||
SIGNUP_URL="https://console.groq.com/keys"
|
||||
;;
|
||||
4)
|
||||
7)
|
||||
SELECTED_ENV_VAR="CEREBRAS_API_KEY"
|
||||
SELECTED_PROVIDER_ID="cerebras"
|
||||
PROVIDER_NAME="Cerebras"
|
||||
SIGNUP_URL="https://cloud.cerebras.ai/"
|
||||
;;
|
||||
5)
|
||||
8)
|
||||
echo ""
|
||||
echo -e "${YELLOW}Skipped.${NC} An LLM API key is required to test and use worker agents."
|
||||
echo -e "Add your API key later by running:"
|
||||
@@ -795,7 +884,8 @@ if [ -z "$SELECTED_PROVIDER_ID" ]; then
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -n "$SELECTED_ENV_VAR" ] && [ -z "${!SELECTED_ENV_VAR}" ]; then
|
||||
# For API-key providers: prompt for key if not already set
|
||||
if [ -z "$SUBSCRIPTION_MODE" ] && [ -n "$SELECTED_ENV_VAR" ] && [ -z "${!SELECTED_ENV_VAR}" ]; then
|
||||
echo ""
|
||||
echo -e "Get your API key from: ${CYAN}$SIGNUP_URL${NC}"
|
||||
echo ""
|
||||
@@ -816,6 +906,28 @@ if [ -z "$SELECTED_PROVIDER_ID" ]; then
|
||||
SELECTED_PROVIDER_ID=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# For ZAI subscription: prompt for API key if not already set
|
||||
if [ "$SUBSCRIPTION_MODE" = "zai_code" ] && [ -z "${ZAI_API_KEY:-}" ]; then
|
||||
echo ""
|
||||
read -r -p "Paste your ZAI API key (or press Enter to skip): " API_KEY
|
||||
|
||||
if [ -n "$API_KEY" ]; then
|
||||
echo "" >> "$SHELL_RC_FILE"
|
||||
echo "# Hive Agent Framework - ZAI Code subscription API key" >> "$SHELL_RC_FILE"
|
||||
echo "export ZAI_API_KEY=\"$API_KEY\"" >> "$SHELL_RC_FILE"
|
||||
export ZAI_API_KEY="$API_KEY"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} ZAI API key saved to $SHELL_RC_FILE"
|
||||
else
|
||||
echo ""
|
||||
echo -e "${YELLOW}Skipped.${NC} Add your ZAI API key to $SHELL_RC_FILE when ready:"
|
||||
echo -e " ${CYAN}echo 'export ZAI_API_KEY=\"your-key\"' >> $SHELL_RC_FILE${NC}"
|
||||
SELECTED_ENV_VAR=""
|
||||
SELECTED_PROVIDER_ID=""
|
||||
SUBSCRIPTION_MODE=""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Prompt for model if not already selected (manual provider path)
|
||||
@@ -827,7 +939,13 @@ fi
|
||||
if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||
echo ""
|
||||
echo -n " Saving configuration... "
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" > /dev/null
|
||||
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "true" "" > /dev/null
|
||||
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "" "https://api.z.ai/api/coding/paas/v4" > /dev/null
|
||||
else
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" > /dev/null
|
||||
fi
|
||||
echo -e "${GREEN}⬢${NC}"
|
||||
echo -e " ${DIM}~/.hive/configuration.json${NC}"
|
||||
fi
|
||||
@@ -1041,7 +1159,15 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||
SELECTED_MODEL="$(get_default_model "$SELECTED_PROVIDER_ID")"
|
||||
fi
|
||||
echo -e "${BOLD}Default LLM:${NC}"
|
||||
echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC} → ${DIM}$SELECTED_MODEL${NC}"
|
||||
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
|
||||
echo -e " ${GREEN}⬢${NC} Claude Code Subscription → ${DIM}$SELECTED_MODEL${NC}"
|
||||
echo -e " ${DIM}Token auto-refresh from ~/.claude/.credentials.json${NC}"
|
||||
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
|
||||
echo -e " ${GREEN}⬢${NC} ZAI Code Subscription → ${DIM}$SELECTED_MODEL${NC}"
|
||||
echo -e " ${DIM}API: api.z.ai (OpenAI-compatible)${NC}"
|
||||
else
|
||||
echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC} → ${DIM}$SELECTED_MODEL${NC}"
|
||||
fi
|
||||
echo ""
|
||||
fi
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user