lint fixes

Merge branch 'main' into feat/vulnerability_agent
vulnerability check to sample agents
2026-02-15 17:45:56 -08:00 · 2026-02-15 17:35:59 -08:00 · 2026-02-15 17:27:09 -08:00 · 2026-02-15 16:58:06 -08:00 · 2026-02-15 16:47:53 -08:00 · 2026-02-15 20:14:00 +08:00
729 changed files with 160757 additions and 29576 deletions
@@ -0,0 +1,9 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "uv",
+      "args": ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"],
+      "disabled": false
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1,5 @@
+---
+description: hive-concepts
+---
+
+use hive-concepts skill
@@ -0,0 +1,5 @@
+---
+description: hive-create
+---
+
+use hive-create skill
@@ -0,0 +1,5 @@
+---
+description: hive-credentials
+---
+
+use hive-credentials skill
@@ -0,0 +1,5 @@
+---
+description: hive-patterns
+---
+
+use hive-patterns skill
@@ -0,0 +1,5 @@
+---
+description: hive-test
+---
+
+use hive-test skill
@@ -0,0 +1,5 @@
+---
+description: hive
+---
+
+use hive skill
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Edit|Write|NotebookEdit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "ruff check --fix \"$CLAUDE_FILE_PATH\" 2>/dev/null; ruff format \"$CLAUDE_FILE_PATH\" 2>/dev/null; true"
+          }
+        ]
+      }
+    ]
+  }
+}
@@ -0,0 +1,34 @@
+{
+  "permissions": {
+    "allow": [
+      "mcp__agent-builder__create_session",
+      "mcp__agent-builder__set_goal",
+      "mcp__agent-builder__add_node",
+      "mcp__agent-builder__add_edge",
+      "mcp__agent-builder__configure_loop",
+      "mcp__agent-builder__add_mcp_server",
+      "mcp__agent-builder__validate_graph",
+      "mcp__agent-builder__export_graph",
+      "mcp__agent-builder__load_session_by_id",
+      "Bash(git status:*)",
+      "Bash(gh run view:*)",
+      "Bash(uv run:*)",
+      "Bash(env:*)",
+      "mcp__agent-builder__test_node",
+      "mcp__agent-builder__list_mcp_tools",
+      "Bash(python -m py_compile:*)",
+      "Bash(python -m pytest:*)",
+      "Bash(source:*)",
+      "mcp__agent-builder__update_node",
+      "mcp__agent-builder__check_missing_credentials",
+      "mcp__agent-builder__list_stored_credentials",
+      "Bash(find:*)",
+      "mcp__agent-builder__run_tests",
+      "Bash(PYTHONPATH=core:exports:tools/src uv run pytest:*)",
+      "mcp__agent-builder__list_agent_sessions",
+      "mcp__agent-builder__generate_constraint_tests",
+      "mcp__agent-builder__generate_success_tests"
+    ]
+  },
+  "enabledMcpjsonServers": ["agent-builder", "tools"]
+}
@@ -0,0 +1,399 @@
+---
+name: hive-concepts
+description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: foundational
+  part_of: hive
+---
+
+# Building Agents - Core Concepts
+
+Foundational knowledge for building goal-driven agents as Python packages.
+
+## Architecture: Python Services (Not JSON Configs)
+
+Agents are built as Python packages:
+
+```
+exports/my_agent/
+├── __init__.py          # Package exports
+├── __main__.py          # CLI (run, info, validate, shell)
+├── agent.py             # Graph construction (goal, edges, agent class)
+├── nodes/__init__.py    # Node definitions (NodeSpec)
+├── config.py            # Runtime config
+└── README.md            # Documentation
+```
+
+**Key Principle: Agent is visible and editable during build**
+
+- Files created immediately as components are approved
+- User can watch files grow in their editor
+- No session state - just direct file writes
+- No "export" step - agent is ready when build completes
+
+## Core Concepts
+
+### Goal
+
+Success criteria and constraints (written to agent.py)
+
+```python
+goal = Goal(
+    id="research-goal",
+    name="Technical Research Agent",
+    description="Research technical topics thoroughly",
+    success_criteria=[
+        SuccessCriterion(
+            id="completeness",
+            description="Cover all aspects of topic",
+            metric="coverage_score",
+            target=">=0.9",
+            weight=0.4,
+        ),
+        # 3-5 success criteria total
+    ],
+    constraints=[
+        Constraint(
+            id="accuracy",
+            description="All information must be verified",
+            constraint_type="hard",
+            category="quality",
+        ),
+        # 1-5 constraints total
+    ],
+)
+```
+
+### Node
+
+Unit of work (written to nodes/__init__.py)
+
+**Node Types:**
+
+- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
+- `function` — Deterministic Python operations. No LLM involved.
+
+```python
+search_node = NodeSpec(
+    id="search-web",
+    name="Search Web",
+    description="Search for information and extract results",
+    node_type="event_loop",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
+    tools=["web_search"],
+)
+```
+
+**NodeSpec Fields for Event Loop Nodes:**
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
+| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
+| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
+
+### Edge
+
+Connection between nodes (written to agent.py)
+
+**Edge Conditions:**
+
+- `on_success` — Proceed if node succeeds (most common)
+- `on_failure` — Handle errors
+- `always` — Always proceed
+- `conditional` — Based on expression evaluating node output
+
+**Edge Priority:**
+
+Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
+
+```python
+# Forward edge (evaluated first)
+EdgeSpec(
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
+    priority=1,
+)
+
+# Feedback edge (evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
+```
+
+### Client-Facing Nodes
+
+For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
+- Stream its LLM output directly to the end user
+- Block for user input between conversational turns
+- Resume when new input is injected via `inject_event()`
+
+```python
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=[],
+    output_keys=["repo_url", "project_url"],
+    system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
+)
+```
+
+> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
+
+**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
+
+```python
+system_prompt="""\
+**STEP 1 — Respond to the user (text only, NO tool calls):**
+[Present information, ask questions, etc.]
+
+**STEP 2 — After the user responds, call set_output:**
+[Call set_output with the structured outputs]
+"""
+```
+
+This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
+
+### Node Design: Fewer, Richer Nodes
+
+Prefer fewer nodes that do more work over many thin single-purpose nodes:
+
+- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
+- **Good**: 4 rich nodes (intake → research → review → report)
+
+Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
+
+### nullable_output_keys for Cross-Edge Inputs
+
+When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
+
+```python
+research_node = NodeSpec(
+    id="research",
+    input_keys=["research_brief", "feedback"],
+    nullable_output_keys=["feedback"],  # Not present on first visit
+    max_node_visits=3,
+    ...
+)
+```
+
+## Event Loop Architecture Concepts
+
+### How EventLoopNode Works
+
+An event loop node runs a multi-turn loop:
+1. LLM receives system prompt + conversation history
+2. LLM responds (text and/or tool calls)
+3. Tool calls are executed, results added to conversation
+4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
+5. Repeat until judge ACCEPTs or max_iterations reached
+
+### EventLoopNode Runtime
+
+EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
+
+```python
+# Direct execution — executor auto-creates EventLoopNodes
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+runtime = Runtime(storage_path)
+executor = GraphExecutor(
+    runtime=runtime,
+    llm=llm,
+    tools=tools,
+    tool_executor=tool_executor,
+    storage_path=storage_path,
+)
+result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
+
+# TUI execution — AgentRuntime also works
+from framework.runtime.agent_runtime import create_agent_runtime
+runtime = create_agent_runtime(
+    graph=graph, goal=goal, storage_path=storage_path,
+    entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
+)
+```
+
+### set_output
+
+Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
+
+`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
+
+### JudgeProtocol
+
+**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
+
+The judge controls when a node's loop exits:
+- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
+- **SchemaJudge**: Validates outputs against a Pydantic model
+- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
+
+### LoopConfig
+
+Controls loop behavior:
+- `max_iterations` (default 50) — prevents infinite loops
+- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
+- `tool_call_overflow_margin` (default 0.5) — wiggle room before discarding extra tool calls (50% means hard cutoff at 150% of limit)
+- `stall_detection_threshold` (default 3) — detects repeated identical responses
+- `max_history_tokens` (default 32000) — triggers conversation compaction
+
+### Data Tools (Spillover Management)
+
+When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
+
+- `save_data(filename, data)` — Write data to a file in the data directory
+- `load_data(filename, offset=0, limit=50)` — Read data with line-based pagination
+- `list_data_files()` — List available data files
+- `serve_file_to_user(filename, label="")` — Get a clickable file:// URI for the user
+
+Note: `data_dir` is a framework-injected context parameter — the LLM never sees or passes it. `GraphExecutor.execute()` sets it per-execution via `contextvars`, so data tools and spillover always share the same session-scoped directory.
+
+These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+### Fan-Out / Fan-In
+
+Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
+
+### max_node_visits
+
+Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
+
+## Tool Discovery & Validation
+
+**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
+
+Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
+
+### Step 1: Register MCP Server (if not already done)
+
+```python
+mcp__agent-builder__add_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args='["mcp_server.py", "--stdio"]',
+    cwd="../tools"
+)
+```
+
+### Step 2: Discover Available Tools
+
+```python
+# List all tools from all registered servers
+mcp__agent-builder__list_mcp_tools()
+
+# Or list tools from a specific server
+mcp__agent-builder__list_mcp_tools(server_name="tools")
+```
+
+### Step 3: Validate Before Adding Nodes
+
+Before writing a node with `tools=[...]`:
+
+1. Call `list_mcp_tools()` to get available tools
+2. Check each tool in your node exists in the response
+3. If a tool doesn't exist:
+   - **DO NOT proceed** with the node
+   - Inform the user: "The tool 'X' is not available. Available tools are: ..."
+   - Ask if they want to use an alternative or proceed without the tool
+
+### Tool Validation Anti-Patterns
+
+- **Never assume a tool exists** - always call `list_mcp_tools()` first
+- **Never write a node with unverified tools** - validate before writing
+- **Never silently drop tools** - if a tool doesn't exist, inform the user
+- **Never guess tool names** - use exact names from discovery response
+
+## Workflow Overview: Incremental File Construction
+
+```
+1. CREATE PACKAGE → mkdir + write skeletons
+2. DEFINE GOAL → Write to agent.py + config.py
+3. FOR EACH NODE:
+   - Propose design (event_loop for LLM work, function for deterministic)
+   - User approves
+   - Write to nodes/__init__.py IMMEDIATELY
+   - (Optional) Validate with test_node
+4. CONNECT EDGES → Update agent.py
+   - Use priority for feedback edges (negative priority)
+   - (Optional) Validate with validate_graph
+5. FINALIZE → Write agent class to agent.py
+6. DONE - Agent ready at exports/my_agent/
+```
+
+**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
+
+## When to Use This Skill
+
+Use hive-concepts when:
+- Starting a new agent project and need to understand fundamentals
+- Need to understand agent architecture before building
+- Want to validate tool availability before proceeding
+- Learning about node types, edges, and graph execution
+
+**Next Steps:**
+- Ready to build? → Use `hive-create` skill
+- Need patterns and examples? → Use `hive-patterns` skill
+
+## MCP Tools for Validation
+
+After writing files, optionally use MCP tools for validation:
+
+**test_node** - Validate node configuration with mock inputs
+```python
+mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "test query"}',
+    mock_llm_response='{"results": "mock output"}'
+)
+```
+
+**validate_graph** - Check graph structure
+```python
+mcp__agent-builder__validate_graph()
+# Returns: unreachable nodes, missing connections, event_loop validation, etc.
+```
+
+**configure_loop** - Set event loop parameters
+```python
+mcp__agent-builder__configure_loop(
+    max_iterations=50,
+    max_tool_calls_per_turn=10,
+    stall_detection_threshold=3,
+    max_history_tokens=32000
+)
+```
+
+**Key Point:** Files are written FIRST. MCP tools are for validation only.
+
+## Related Skills
+
+- **hive-create** - Step-by-step building process
+- **hive-patterns** - Best practices: judges, feedback edges, fan-out, context management
+- **hive** - Complete workflow orchestrator
+- **hive-test** - Test and validate completed agents
@@ -0,0 +1,24 @@
+"""
+Deep Research Agent - Interactive, rigorous research with TUI conversation.
+
+Research any topic through multi-source web search, quality evaluation,
+and synthesis. Features client-facing TUI interaction at key checkpoints
+for user guidance and iterative deepening.
+"""
+
+from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
+from .config import RuntimeConfig, AgentMetadata, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "DeepResearchAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -0,0 +1,241 @@
+"""
+CLI entry point for Deep Research Agent.
+
+Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import click
+
+from .agent import default_agent, DeepResearchAgent
+
+
+def setup_logging(verbose=False, debug=False):
+    """Configure logging for execution visibility."""
+    if debug:
+        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+    elif verbose:
+        level, fmt = logging.INFO, "%(message)s"
+    else:
+        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
+    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+    logging.getLogger("framework").setLevel(level)
+
+
+@click.group()
+@click.version_option(version="1.0.0")
+def cli():
+    """Deep Research Agent - Interactive, rigorous research with TUI conversation."""
+    pass
+
+
+@cli.command()
+@click.option("--topic", "-t", type=str, required=True, help="Research topic")
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def run(topic, mock, quiet, verbose, debug):
+    """Execute research on a topic."""
+    if not quiet:
+        setup_logging(verbose=verbose, debug=debug)
+
+    context = {"topic": topic}
+
+    result = asyncio.run(default_agent.run(context, mock_mode=mock))
+
+    output_data = {
+        "success": result.success,
+        "steps_executed": result.steps_executed,
+        "output": result.output,
+    }
+    if result.error:
+        output_data["error"] = result.error
+
+    click.echo(json.dumps(output_data, indent=2, default=str))
+    sys.exit(0 if result.success else 1)
+
+
+@cli.command()
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def tui(mock, verbose, debug):
+    """Launch the TUI dashboard for interactive research."""
+    setup_logging(verbose=verbose, debug=debug)
+
+    try:
+        from framework.tui.app import AdenTUI
+    except ImportError:
+        click.echo(
+            "TUI requires the 'textual' package. Install with: pip install textual"
+        )
+        sys.exit(1)
+
+    from pathlib import Path
+
+    from framework.llm import LiteLLMProvider
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import create_agent_runtime
+    from framework.runtime.event_bus import EventBus
+    from framework.runtime.execution_stream import EntryPointSpec
+
+    async def run_with_tui():
+        agent = DeepResearchAgent()
+
+        # Build graph and tools
+        agent._event_bus = EventBus()
+        agent._tool_registry = ToolRegistry()
+
+        storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            agent._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock:
+            llm = LiteLLMProvider(
+                model=agent.config.model,
+                api_key=agent.config.api_key,
+                api_base=agent.config.api_base,
+            )
+
+        tools = list(agent._tool_registry.get_tools().values())
+        tool_executor = agent._tool_registry.get_executor()
+        graph = agent._build_graph()
+
+        runtime = create_agent_runtime(
+            graph=graph,
+            goal=agent.goal,
+            storage_path=storage_path,
+            entry_points=[
+                EntryPointSpec(
+                    id="start",
+                    name="Start Research",
+                    entry_node="intake",
+                    trigger_type="manual",
+                    isolation_level="isolated",
+                ),
+            ],
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+        )
+
+        await runtime.start()
+
+        try:
+            app = AdenTUI(runtime)
+            await app.run_async()
+        finally:
+            await runtime.stop()
+
+    asyncio.run(run_with_tui())
+
+
+@cli.command()
+@click.option("--json", "output_json", is_flag=True)
+def info(output_json):
+    """Show agent information."""
+    info_data = default_agent.info()
+    if output_json:
+        click.echo(json.dumps(info_data, indent=2))
+    else:
+        click.echo(f"Agent: {info_data['name']}")
+        click.echo(f"Version: {info_data['version']}")
+        click.echo(f"Description: {info_data['description']}")
+        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
+        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
+        click.echo(f"Entry: {info_data['entry_node']}")
+        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
+
+
+@cli.command()
+def validate():
+    """Validate agent structure."""
+    validation = default_agent.validate()
+    if validation["valid"]:
+        click.echo("Agent is valid")
+        if validation["warnings"]:
+            for warning in validation["warnings"]:
+                click.echo(f"  WARNING: {warning}")
+    else:
+        click.echo("Agent has errors:")
+        for error in validation["errors"]:
+            click.echo(f"  ERROR: {error}")
+    sys.exit(0 if validation["valid"] else 1)
+
+
+@cli.command()
+@click.option("--verbose", "-v", is_flag=True)
+def shell(verbose):
+    """Interactive research session (CLI, no TUI)."""
+    asyncio.run(_interactive_shell(verbose))
+
+
+async def _interactive_shell(verbose=False):
+    """Async interactive shell."""
+    setup_logging(verbose=verbose)
+
+    click.echo("=== Deep Research Agent ===")
+    click.echo("Enter a topic to research (or 'quit' to exit):\n")
+
+    agent = DeepResearchAgent()
+    await agent.start()
+
+    try:
+        while True:
+            try:
+                topic = await asyncio.get_event_loop().run_in_executor(
+                    None, input, "Topic> "
+                )
+                if topic.lower() in ["quit", "exit", "q"]:
+                    click.echo("Goodbye!")
+                    break
+
+                if not topic.strip():
+                    continue
+
+                click.echo("\nResearching...\n")
+
+                result = await agent.trigger_and_wait("start", {"topic": topic})
+
+                if result is None:
+                    click.echo("\n[Execution timed out]\n")
+                    continue
+
+                if result.success:
+                    output = result.output
+                    if "report_content" in output:
+                        click.echo("\n--- Report ---\n")
+                        click.echo(output["report_content"])
+                        click.echo("\n")
+                    if "references" in output:
+                        click.echo("--- References ---\n")
+                        for ref in output.get("references", []):
+                            click.echo(
+                                f"  [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}"
+                            )
+                        click.echo("\n")
+                else:
+                    click.echo(f"\nResearch failed: {result.error}\n")
+
+            except KeyboardInterrupt:
+                click.echo("\nGoodbye!")
+                break
+            except Exception as e:
+                click.echo(f"Error: {e}", err=True)
+                import traceback
+
+                traceback.print_exc()
+    finally:
+        await agent.stop()
+
+
+if __name__ == "__main__":
+    cli()
@@ -0,0 +1,358 @@
+"""Agent graph construction for Deep Research Agent."""
+
+from pathlib import Path
+
+from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.graph.checkpoint_config import CheckpointConfig
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.runtime.execution_stream import EntryPointSpec
+
+from .config import default_config, metadata
+from .nodes import (
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+)
+
+# Goal definition
+goal = Goal(
+    id="rigorous-interactive-research",
+    name="Rigorous Interactive Research",
+    description=(
+        "Research any topic by searching diverse sources, analyzing findings, "
+        "and producing a cited report — with user checkpoints to guide direction."
+    ),
+    success_criteria=[
+        SuccessCriterion(
+            id="source-diversity",
+            description="Use multiple diverse, authoritative sources",
+            metric="source_count",
+            target=">=5",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="citation-coverage",
+            description="Every factual claim in the report cites its source",
+            metric="citation_coverage",
+            target="100%",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="user-satisfaction",
+            description="User reviews findings before report generation",
+            metric="user_approval",
+            target="true",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="report-completeness",
+            description="Final report answers the original research questions",
+            metric="question_coverage",
+            target="90%",
+            weight=0.25,
+        ),
+    ],
+    constraints=[
+        Constraint(
+            id="no-hallucination",
+            description="Only include information found in fetched sources",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="source-attribution",
+            description="Every claim must cite its source with a numbered reference",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="user-checkpoint",
+            description="Present findings to the user before writing the final report",
+            constraint_type="functional",
+            category="interaction",
+        ),
+    ],
+)
+
+# Node list
+nodes = [
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+]
+
+# Edge definitions
+edges = [
+    # intake -> research
+    EdgeSpec(
+        id="intake-to-research",
+        source="intake",
+        target="research",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # research -> review
+    EdgeSpec(
+        id="research-to-review",
+        source="research",
+        target="review",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # review -> research (feedback loop)
+    EdgeSpec(
+        id="review-to-research-feedback",
+        source="review",
+        target="research",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == True",
+        priority=1,
+    ),
+    # review -> report (user satisfied)
+    EdgeSpec(
+        id="review-to-report",
+        source="review",
+        target="report",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == False",
+        priority=2,
+    ),
+    # report -> research (user wants deeper research on current topic)
+    EdgeSpec(
+        id="report-to-research",
+        source="report",
+        target="research",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="str(next_action).lower() == 'more_research'",
+        priority=2,
+    ),
+    # report -> intake (user wants a new topic — default when not more_research)
+    EdgeSpec(
+        id="report-to-intake",
+        source="report",
+        target="intake",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="str(next_action).lower() != 'more_research'",
+        priority=1,
+    ),
+]
+
+# Graph configuration
+entry_node = "intake"
+entry_points = {"start": "intake"}
+pause_nodes = []
+terminal_nodes = []
+
+
+class DeepResearchAgent:
+    """
+    Deep Research Agent — 4-node pipeline with user checkpoints.
+
+    Flow: intake -> research -> review -> report
+                      ^           |
+                      +-- feedback loop (if user wants more)
+
+    Uses AgentRuntime for proper session management:
+    - Session-scoped storage (sessions/{session_id}/)
+    - Checkpointing for resume capability
+    - Runtime logging
+    - Data folder for save_data/load_data
+    """
+
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self._graph: GraphSpec | None = None
+        self._agent_runtime: AgentRuntime | None = None
+        self._tool_registry: ToolRegistry | None = None
+        self._storage_path: Path | None = None
+
+    def _build_graph(self) -> GraphSpec:
+        """Build the GraphSpec."""
+        return GraphSpec(
+            id="deep-research-agent-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            loop_config={
+                "max_iterations": 100,
+                "max_tool_calls_per_turn": 20,
+                "max_history_tokens": 32000,
+            },
+            conversation_mode="continuous",
+            identity_prompt=(
+                "You are a rigorous research agent. You search for information "
+                "from diverse, authoritative sources, analyze findings critically, "
+                "and produce well-cited reports. You never fabricate information — "
+                "every claim must trace back to a source you actually retrieved."
+            ),
+        )
+
+    def _setup(self, mock_mode=False) -> None:
+        """Set up the agent runtime with sessions, checkpoints, and logging."""
+        self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
+        self._storage_path.mkdir(parents=True, exist_ok=True)
+
+        self._tool_registry = ToolRegistry()
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            self._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock_mode:
+            llm = LiteLLMProvider(
+                model=self.config.model,
+                api_key=self.config.api_key,
+                api_base=self.config.api_base,
+            )
+
+        tool_executor = self._tool_registry.get_executor()
+        tools = list(self._tool_registry.get_tools().values())
+
+        self._graph = self._build_graph()
+
+        checkpoint_config = CheckpointConfig(
+            enabled=True,
+            checkpoint_on_node_start=False,
+            checkpoint_on_node_complete=True,
+            checkpoint_max_age_days=7,
+            async_checkpoint=True,
+        )
+
+        entry_point_specs = [
+            EntryPointSpec(
+                id="default",
+                name="Default",
+                entry_node=self.entry_node,
+                trigger_type="manual",
+                isolation_level="shared",
+            )
+        ]
+
+        self._agent_runtime = create_agent_runtime(
+            graph=self._graph,
+            goal=self.goal,
+            storage_path=self._storage_path,
+            entry_points=entry_point_specs,
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+            checkpoint_config=checkpoint_config,
+        )
+
+    async def start(self, mock_mode=False) -> None:
+        """Set up and start the agent runtime."""
+        if self._agent_runtime is None:
+            self._setup(mock_mode=mock_mode)
+        if not self._agent_runtime.is_running:
+            await self._agent_runtime.start()
+
+    async def stop(self) -> None:
+        """Stop the agent runtime and clean up."""
+        if self._agent_runtime and self._agent_runtime.is_running:
+            await self._agent_runtime.stop()
+        self._agent_runtime = None
+
+    async def trigger_and_wait(
+        self,
+        entry_point: str = "default",
+        input_data: dict | None = None,
+        timeout: float | None = None,
+        session_state: dict | None = None,
+    ) -> ExecutionResult | None:
+        """Execute the graph and wait for completion."""
+        if self._agent_runtime is None:
+            raise RuntimeError("Agent not started. Call start() first.")
+
+        return await self._agent_runtime.trigger_and_wait(
+            entry_point_id=entry_point,
+            input_data=input_data or {},
+            session_state=session_state,
+        )
+
+    async def run(
+        self, context: dict, mock_mode=False, session_state=None
+    ) -> ExecutionResult:
+        """Run the agent (convenience method for single execution)."""
+        await self.start(mock_mode=mock_mode)
+        try:
+            result = await self.trigger_and_wait(
+                "default", context, session_state=session_state
+            )
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    def info(self):
+        """Get agent information."""
+        return {
+            "name": metadata.name,
+            "version": metadata.version,
+            "description": metadata.description,
+            "goal": {
+                "name": self.goal.name,
+                "description": self.goal.description,
+            },
+            "nodes": [n.id for n in self.nodes],
+            "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node,
+            "entry_points": self.entry_points,
+            "pause_nodes": self.pause_nodes,
+            "terminal_nodes": self.terminal_nodes,
+            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
+        }
+
+    def validate(self):
+        """Validate agent structure."""
+        errors = []
+        warnings = []
+
+        node_ids = {node.id for node in self.nodes}
+        for edge in self.edges:
+            if edge.source not in node_ids:
+                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
+            if edge.target not in node_ids:
+                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
+
+        if self.entry_node not in node_ids:
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        for terminal in self.terminal_nodes:
+            if terminal not in node_ids:
+                errors.append(f"Terminal node '{terminal}' not found")
+
+        for ep_id, node_id in self.entry_points.items():
+            if node_id not in node_ids:
+                errors.append(
+                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
+                )
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+        }
+
+
+# Create default instance
+default_agent = DeepResearchAgent()
@@ -0,0 +1,26 @@
+"""Runtime configuration."""
+
+from dataclasses import dataclass
+
+from framework.config import RuntimeConfig
+
+default_config = RuntimeConfig()
+
+
+@dataclass
+class AgentMetadata:
+    name: str = "Deep Research Agent"
+    version: str = "1.0.0"
+    description: str = (
+        "Interactive research agent that rigorously investigates topics through "
+        "multi-source search, quality evaluation, and synthesis - with TUI conversation "
+        "at key checkpoints for user guidance and feedback."
+    )
+    intro_message: str = (
+        "Hi! I'm your deep research assistant. Tell me a topic and I'll investigate it "
+        "thoroughly — searching multiple sources, evaluating quality, and synthesizing "
+        "a comprehensive report. What would you like me to research?"
+    )
+
+
+metadata = AgentMetadata()
@@ -0,0 +1,9 @@
+{
+  "hive-tools": {
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "mcp_server.py", "--stdio"],
+    "cwd": "../../tools",
+    "description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
+  }
+}
@@ -0,0 +1,204 @@
+"""Node definitions for Deep Research Agent."""
+
+from framework.graph import NodeSpec
+
+# Node 1: Intake (client-facing)
+# Brief conversation to clarify what the user wants researched.
+intake_node = NodeSpec(
+    id="intake",
+    name="Research Intake",
+    description="Discuss the research topic with the user, clarify scope, and confirm direction",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    success_criteria=(
+        "The research brief is specific and actionable: it states the topic, "
+        "the key questions to answer, the desired scope, and depth."
+    ),
+    system_prompt="""\
+You are a research intake specialist. The user wants to research a topic.
+Have a brief conversation to clarify what they need.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
+3. If it's already clear, confirm your understanding and ask the user to confirm
+
+Keep it short. Don't over-ask.
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "A clear paragraph describing exactly what to research, \
+what questions to answer, what scope to cover, and how deep to go.")
+""",
+    tools=[],
+)
+
+# Node 2: Research
+# The workhorse — searches the web, fetches content, analyzes sources.
+# One node with both tools avoids the context-passing overhead of 5 separate nodes.
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search the web, fetch source content, and compile findings",
+    node_type="event_loop",
+    max_node_visits=0,
+    input_keys=["research_brief", "feedback"],
+    output_keys=["findings", "sources", "gaps"],
+    nullable_output_keys=["feedback"],
+    success_criteria=(
+        "Findings reference at least 3 distinct sources with URLs. "
+        "Key claims are substantiated by fetched content, not generated."
+    ),
+    system_prompt="""\
+You are a research agent. Given a research brief, find and analyze sources.
+
+If feedback is provided, this is a follow-up round — focus on the gaps identified.
+
+Work in phases:
+1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
+   Prioritize authoritative sources (.edu, .gov, established publications).
+2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
+   Skip URLs that fail. Extract the substantive content.
+3. **Analyze**: Review what you've collected. Identify key findings, themes,
+   and any contradictions between sources.
+
+Important:
+- Work in batches of 3-4 tool calls at a time — never more than 10 per turn
+- After each batch, assess whether you have enough material
+- Prefer quality over quantity — 5 good sources beat 15 thin ones
+- Track which URL each finding comes from (you'll need citations later)
+- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
+
+When done, use set_output (one key at a time, separate turns):
+- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
+Include themes, contradictions, and confidence levels.")
+- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
+- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
+""",
+    tools=[
+        "web_search",
+        "web_scrape",
+        "load_data",
+        "save_data",
+        "append_data",
+        "list_data_files",
+    ],
+)
+
+# Node 3: Review (client-facing)
+# Shows the user what was found and asks whether to dig deeper or proceed.
+review_node = NodeSpec(
+    id="review",
+    name="Review Findings",
+    description="Present findings to user and decide whether to research more or write the report",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["findings", "sources", "gaps", "research_brief"],
+    output_keys=["needs_more_research", "feedback"],
+    success_criteria=(
+        "The user has been presented with findings and has explicitly indicated "
+        "whether they want more research or are ready for the report."
+    ),
+    system_prompt="""\
+Present the research findings to the user clearly and concisely.
+
+**STEP 1 — Present (your first message, text only, NO tool calls):**
+1. **Summary** (2-3 sentences of what was found)
+2. **Key Findings** (bulleted, with confidence levels)
+3. **Sources Used** (count and quality assessment)
+4. **Gaps** (what's still unclear or under-covered)
+
+End by asking: Are they satisfied, or do they want deeper research? \
+Should we proceed to writing the final report?
+
+**STEP 2 — After the user responds, call set_output:**
+- set_output("needs_more_research", "true")  — if they want more
+- set_output("needs_more_research", "false") — if they're satisfied
+- set_output("feedback", "What the user wants explored further, or empty string")
+""",
+    tools=[],
+)
+
+# Node 4: Report (client-facing)
+# Writes an HTML report, serves the link to the user, and answers follow-ups.
+report_node = NodeSpec(
+    id="report",
+    name="Write & Deliver Report",
+    description="Write a cited HTML report from the findings and present it to the user",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["findings", "sources", "research_brief"],
+    output_keys=["delivery_status", "next_action"],
+    success_criteria=(
+        "An HTML report has been saved, the file link has been presented to the user, "
+        "and the user has indicated what they want to do next."
+    ),
+    system_prompt="""\
+Write a research report as an HTML file and present it to the user.
+
+IMPORTANT: save_data requires TWO separate arguments: filename and data.
+Call it like: save_data(filename="report.html", data="<html>...</html>")
+Do NOT use _raw, do NOT nest arguments inside a JSON string.
+
+**STEP 1 — Write and save the HTML report (tool calls, NO text to user yet):**
+
+Build a clean HTML document. Keep the HTML concise — aim for clarity over length.
+Use minimal embedded CSS (a few lines of style, not a full framework).
+
+Report structure:
+- Title & date
+- Executive Summary (2-3 paragraphs)
+- Key Findings (organized by theme, with [n] citation links)
+- Analysis (synthesis, implications)
+- Conclusion (key takeaways)
+- References (numbered list with clickable URLs)
+
+Requirements:
+- Every factual claim must cite its source with [n] notation
+- Be objective — present multiple viewpoints where sources disagree
+- Answer the original research questions from the brief
+
+Save the HTML:
+  save_data(filename="report.html", data="<html>...</html>")
+
+Then get the clickable link:
+  serve_file_to_user(filename="report.html", label="Research Report")
+
+If save_data fails, simplify and shorten the HTML, then retry.
+
+**STEP 2 — Present the link to the user (text only, NO tool calls):**
+
+Tell the user the report is ready and include the file:// URI from
+serve_file_to_user so they can click it to open. Give a brief summary
+of what the report covers. Ask if they have questions or want to continue.
+
+**STEP 3 — After the user responds:**
+- Answer any follow-up questions from the research material
+- When the user is ready to move on, ask what they'd like to do next:
+  - Research a new topic?
+  - Dig deeper into the current topic?
+- Then call set_output:
+  - set_output("delivery_status", "completed")
+  - set_output("next_action", "new_topic")       — if they want a new topic
+  - set_output("next_action", "more_research")   — if they want deeper research
+""",
+    tools=[
+        "save_data",
+        "append_data",
+        "edit_data",
+        "serve_file_to_user",
+        "load_data",
+        "list_data_files",
+    ],
+)
+
+__all__ = [
+    "intake_node",
+    "research_node",
+    "review_node",
+    "report_node",
+]
@@ -0,0 +1,640 @@
+---
+name: hive-credentials
+description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.3"
+  type: utility
+---
+
+# Setup Credentials
+
+Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
+
+## When to Use
+
+- Before running or testing an agent for the first time
+- When `AgentRunner.run()` fails with "missing required credentials"
+- When a user asks to configure credentials for an agent
+- After building a new agent that uses tools requiring API keys
+
+## Workflow
+
+### Step 1: Identify the Agent
+
+Determine which agent needs credentials. The user will either:
+
+- Name the agent directly (e.g., "set up credentials for hubspot-agent")
+- Have an agent directory open (check `exports/` for agent dirs)
+- Be working on an agent in the current session
+
+Locate the agent's directory under `exports/{agent_name}/`.
+
+### Step 2: Detect Missing Credentials
+
+Use the `check_missing_credentials` MCP tool to detect what the agent needs and what's already configured. This tool loads the agent, inspects its required tools and node types, maps them to credentials via `CREDENTIAL_SPECS`, and checks both the encrypted store and environment variables.
+
+```
+check_missing_credentials(agent_path="exports/{agent_name}")
+```
+
+The tool returns a JSON response:
+
+```json
+{
+  "agent": "exports/{agent_name}",
+  "missing": [
+    {
+      "credential_name": "brave_search",
+      "env_var": "BRAVE_SEARCH_API_KEY",
+      "description": "Brave Search API key for web search",
+      "help_url": "https://brave.com/search/api/",
+      "tools": ["web_search"]
+    }
+  ],
+  "available": [
+    {
+      "credential_name": "anthropic",
+      "env_var": "ANTHROPIC_API_KEY",
+      "source": "encrypted_store"
+    }
+  ],
+  "total_missing": 1,
+  "ready": false
+}
+```
+
+**If `ready` is true (nothing missing):** Report all credentials as configured and skip Steps 3-5. Example:
+
+```
+All required credentials are already configured:
+  ✓ anthropic (ANTHROPIC_API_KEY)
+  ✓ brave_search (BRAVE_SEARCH_API_KEY)
+Your agent is ready to run!
+```
+
+**If credentials are missing:** Continue to Step 3 with the `missing` list.
+
+### Step 3: Present Auth Options for Each Missing Credential
+
+For each missing credential, check what authentication methods are available:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec:
+    # Determine available auth options
+    auth_options = []
+    if spec.aden_supported:
+        auth_options.append("aden")
+    if spec.direct_api_key_supported:
+        auth_options.append("direct")
+    auth_options.append("custom")  # Always available
+
+    # Get setup info
+    setup_info = {
+        "env_var": spec.env_var,
+        "description": spec.description,
+        "help_url": spec.help_url,
+        "api_key_instructions": spec.api_key_instructions,
+    }
+```
+
+Present the available options using AskUserQuestion:
+
+```
+Choose how to configure HUBSPOT_ACCESS_TOKEN:
+
+  1) Aden Platform (OAuth) (Recommended)
+     Secure OAuth2 flow via hive.adenhq.com
+     - Quick setup with automatic token refresh
+     - No need to manage API keys manually
+
+  2) Direct API Key
+     Enter your own API key manually
+     - Requires creating a HubSpot Private App
+     - Full control over scopes and permissions
+
+  3) Local Credential Setup (Advanced)
+     Programmatic configuration for CI/CD
+     - For automated deployments
+     - Requires manual API calls
+```
+
+### Step 4: Execute Auth Flow Based on User Choice
+
+#### Prerequisite: Ensure HIVE_CREDENTIAL_KEY Is Available
+
+Before storing any credentials, verify `HIVE_CREDENTIAL_KEY` is set (needed to encrypt/decrypt the local store). Check both the current session and shell config:
+
+```bash
+# Check current session
+printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+
+# Check shell config files
+for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+```
+
+- **In current session** — proceed to store credentials
+- **In shell config but NOT in current session** — run `source ~/.zshrc` (or `~/.bashrc`) first, then proceed
+- **Not set anywhere** — `EncryptedFileStorage` will auto-generate one. After storing, tell the user to persist it: `export HIVE_CREDENTIAL_KEY="{generated_key}"` in their shell profile
+
+> **⚠️ IMPORTANT: After adding `HIVE_CREDENTIAL_KEY` to the user's shell config, always display:**
+> ```
+> ⚠️  Environment variables were added to your shell config.
+>     Open a NEW TERMINAL for them to take effect outside this session.
+> ```
+
+#### Option 1: Aden Platform (OAuth)
+
+This is the recommended flow for supported integrations (HubSpot, etc.).
+
+**How Aden OAuth Works:**
+
+The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
+
+1. User has an Aden account
+2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
+3. We just need to sync those credentials down to the local credential store
+
+**4.1a. Check for ADEN_API_KEY**
+
+```python
+import os
+aden_key = os.environ.get("ADEN_API_KEY")
+```
+
+If not set, guide user to get one from Aden (this is where they do OAuth):
+
+```python
+from aden_tools.credentials import open_browser, get_aden_setup_url
+
+# Open browser to Aden - user will sign up and connect integrations there
+url = get_aden_setup_url()  # https://hive.adenhq.com
+success, msg = open_browser(url)
+
+print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
+print("Once done, copy your API key and return here.")
+```
+
+Ask user to provide the ADEN_API_KEY they received.
+
+**4.1b. Save ADEN_API_KEY to Shell Config**
+
+With user approval, persist ADEN_API_KEY to their shell config:
+
+```python
+from aden_tools.credentials import (
+    detect_shell,
+    add_env_var_to_shell_config,
+    get_shell_source_command,
+)
+
+shell_type = detect_shell()  # 'bash', 'zsh', or 'unknown'
+
+# Ask user for approval before modifying shell config
+# If approved:
+success, config_path = add_env_var_to_shell_config(
+    "ADEN_API_KEY",
+    user_provided_key,
+    comment="Aden Platform (OAuth) API key"
+)
+
+if success:
+    source_cmd = get_shell_source_command()
+    print(f"Saved to {config_path}")
+    print(f"Run: {source_cmd}")
+```
+
+> **⚠️ IMPORTANT: After adding `ADEN_API_KEY` to the user's shell config, always display:**
+> ```
+> ⚠️  Environment variables were added to your shell config.
+>     Open a NEW TERMINAL for them to take effect outside this session.
+> ```
+
+Also save to `~/.hive/configuration.json` for the framework:
+
+```python
+import json
+from pathlib import Path
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+config["aden"] = {
+    "api_key_configured": True,
+    "api_url": "https://api.adenhq.com"
+}
+
+config_path.parent.mkdir(parents=True, exist_ok=True)
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+**4.1c. Sync Credentials from Aden Server**
+
+Since the user has already authorized integrations on Aden, use the one-liner factory method:
+
+```python
+from core.framework.credentials import CredentialStore
+
+# This single call handles everything:
+# - Creates encrypted local storage at ~/.hive/credentials
+# - Configures Aden client from ADEN_API_KEY env var
+# - Syncs all credentials from Aden server automatically
+store = CredentialStore.with_aden_sync(
+    base_url="https://api.adenhq.com",
+    auto_sync=True,  # Syncs on creation
+)
+
+# Check what was synced
+synced = store.list_credentials()
+print(f"Synced credentials: {synced}")
+
+# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
+if "hubspot" not in synced:
+    print("HubSpot not found in your Aden account.")
+    print("Please visit https://hive.adenhq.com to connect HubSpot, then try again.")
+```
+
+For more control over the sync process:
+
+```python
+from core.framework.credentials import CredentialStore
+from core.framework.credentials.aden import (
+    AdenCredentialClient,
+    AdenClientConfig,
+    AdenSyncProvider,
+)
+
+# Create client (API key loaded from ADEN_API_KEY env var)
+client = AdenCredentialClient(AdenClientConfig(
+    base_url="https://api.adenhq.com",
+))
+
+# Create provider and store
+provider = AdenSyncProvider(client=client)
+store = CredentialStore.with_encrypted_storage()
+
+# Manual sync
+synced_count = provider.sync_all(store)
+print(f"Synced {synced_count} credentials from Aden")
+```
+
+**4.1d. Run Health Check**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+# Get the token from the store
+cred = store.get_credential("hubspot")
+token = cred.keys["access_token"].value.get_secret_value()
+
+result = check_credential_health("hubspot", token)
+if result.valid:
+    print("HubSpot credentials validated successfully!")
+else:
+    print(f"Validation failed: {result.message}")
+    # Offer to retry the OAuth flow
+```
+
+#### Option 2: Direct API Key
+
+For users who prefer manual API key management.
+
+**4.2a. Show Setup Instructions**
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec and spec.api_key_instructions:
+    print(spec.api_key_instructions)
+# Output:
+# To get a HubSpot Private App token:
+# 1. Go to HubSpot Settings > Integrations > Private Apps
+# 2. Click "Create a private app"
+# 3. Name your app (e.g., "Hive Agent")
+# ...
+
+if spec and spec.help_url:
+    print(f"More info: {spec.help_url}")
+```
+
+**4.2b. Collect API Key from User**
+
+Use AskUserQuestion to securely collect the API key:
+
+```
+Please provide your HubSpot access token:
+(This will be stored securely in ~/.hive/credentials)
+```
+
+**4.2c. Run Health Check Before Storing**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+result = check_credential_health("hubspot", user_provided_token)
+if not result.valid:
+    print(f"Warning: {result.message}")
+    # Ask user if they want to:
+    # 1. Try a different token
+    # 2. Continue anyway (not recommended)
+```
+
+**4.2d. Store in Local Encrypted Store**
+
+```python
+from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+from pydantic import SecretStr
+
+store = CredentialStore.with_encrypted_storage()
+
+cred = CredentialObject(
+    id="hubspot",
+    name="HubSpot Access Token",
+    keys={
+        "access_token": CredentialKey(
+            name="access_token",
+            value=SecretStr(user_provided_token),
+        )
+    },
+)
+store.save_credential(cred)
+```
+
+**4.2e. Export to Current Session**
+
+```bash
+export HUBSPOT_ACCESS_TOKEN="the-value"
+```
+
+#### Option 3: Local Credential Setup (Advanced)
+
+For programmatic/CI/CD setups.
+
+**4.3a. Show Documentation**
+
+```
+For advanced credential management, you can use the CredentialStore API directly:
+
+  from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+  from pydantic import SecretStr
+
+  store = CredentialStore.with_encrypted_storage()
+
+  cred = CredentialObject(
+      id="hubspot",
+      name="HubSpot Access Token",
+      keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
+  )
+  store.save_credential(cred)
+
+For CI/CD environments:
+  - Set HIVE_CREDENTIAL_KEY for encryption
+  - Pre-populate ~/.hive/credentials programmatically
+  - Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
+
+Documentation: See core/framework/credentials/README.md
+```
+
+### Step 5: Record Configuration Method
+
+Track which auth method was used for each credential in `~/.hive/configuration.json`:
+
+```python
+import json
+from pathlib import Path
+from datetime import datetime
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+if "credential_methods" not in config:
+    config["credential_methods"] = {}
+
+config["credential_methods"]["hubspot"] = {
+    "method": "aden",  # or "direct" or "custom"
+    "configured_at": datetime.now().isoformat(),
+}
+
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+### Step 6: Verify All Credentials
+
+Use the `verify_credentials` MCP tool to confirm everything is properly configured:
+
+```
+verify_credentials(agent_path="exports/{agent_name}")
+```
+
+The tool returns:
+
+```json
+{
+  "agent": "exports/{agent_name}",
+  "ready": true,
+  "missing_credentials": [],
+  "warnings": [],
+  "errors": []
+}
+```
+
+If `ready` is true, report success. If `missing_credentials` is non-empty, identify what failed and loop back to Step 3 for the remaining credentials.
+
+## Health Check Reference
+
+Health checks validate credentials by making lightweight API calls:
+
+| Credential      | Endpoint                                | What It Checks                    |
+| --------------- | --------------------------------------- | --------------------------------- |
+| `anthropic`     | `POST /v1/messages`                     | API key validity                  |
+| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                  |
+| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity         |
+| `github`        | `GET /user`                             | Token validity, user identity     |
+| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes |
+| `resend`        | `GET /domains`                          | API key validity                  |
+
+```python
+from aden_tools.credentials import check_credential_health, HealthCheckResult
+
+result: HealthCheckResult = check_credential_health("hubspot", token_value)
+# result.valid: bool
+# result.message: str
+# result.details: dict (status_code, rate_limited, etc.)
+```
+
+## Encryption Key (HIVE_CREDENTIAL_KEY)
+
+The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
+
+- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
+- The user MUST persist this key (e.g., in `~/.bashrc`/`~/.zshrc` or a secrets manager)
+- Without this key, stored credentials cannot be decrypted
+
+**Shell config rule:** Only TWO keys belong in shell config (`~/.zshrc`/`~/.bashrc`):
+- `HIVE_CREDENTIAL_KEY` — encryption key for the credential store
+- `ADEN_API_KEY` — Aden platform auth key (needed before the store can sync)
+
+All other API keys (Brave, Google, HubSpot, etc.) must go in the encrypted store only. **Never offer to add them to shell config.**
+
+If `HIVE_CREDENTIAL_KEY` is not set:
+
+1. Let the store generate one
+2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
+3. Recommend adding it to `~/.bashrc` or their shell profile
+
+## Security Rules
+
+- **NEVER** log, print, or echo credential values in tool output
+- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
+- **NEVER** hardcode credentials in source code
+- **NEVER** offer to save API keys to shell config (`~/.zshrc`/`~/.bashrc`) — the **only** keys that belong in shell config are `HIVE_CREDENTIAL_KEY` and `ADEN_API_KEY`. All other credentials (Brave, Google, HubSpot, GitHub, Resend, etc.) go in the encrypted store only.
+- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
+- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
+- **ALWAYS** run health checks before storing credentials (when possible)
+- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
+- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
+
+## Credential Sources Reference
+
+All credential specs are defined in `tools/src/aden_tools/credentials/`:
+
+| File              | Category      | Credentials                                   | Aden Supported |
+| ----------------- | ------------- | --------------------------------------------- | -------------- |
+| `llm.py`          | LLM Providers | `anthropic`                                   | No             |
+| `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
+| `email.py`        | Email         | `resend`                                      | No             |
+| `integrations.py` | Integrations  | `github`, `hubspot`, `google_calendar_oauth`  | No / Yes       |
+
+**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
+variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
+Add them to `llm.py` as needed.
+
+To check what's registered:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+for name, spec in CREDENTIAL_SPECS.items():
+    print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
+```
+
+## Migration: CredentialManager → CredentialStore
+
+**CredentialManager is deprecated.** Use CredentialStore instead.
+
+| Old (Deprecated)                          | New (Recommended)                                                    |
+| ----------------------------------------- | -------------------------------------------------------------------- |
+| `CredentialManager()`                     | `CredentialStore.with_encrypted_storage()`                           |
+| `creds.get("hubspot")`                    | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
+| `creds.validate_for_tools(tools)`         | Use `store.is_available(cred_id)` per credential                     |
+| `creds.get_auth_options("hubspot")`       | Check `CREDENTIAL_SPECS["hubspot"].aden_supported`                   |
+| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly                        |
+
+**Why migrate?**
+
+- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
+- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
+- **CredentialStoreAdapter** exists for backward compatibility during migration
+
+```python
+# Old way (deprecated)
+from aden_tools.credentials import CredentialManager
+creds = CredentialManager()
+token = creds.get("hubspot")
+
+# New way (recommended)
+from core.framework.credentials import CredentialStore
+store = CredentialStore.with_encrypted_storage()
+token = store.get("hubspot")
+
+# With Aden sync (recommended for OAuth integrations)
+store = CredentialStore.with_aden_sync()
+token = store.get_key("hubspot", "access_token")
+```
+
+## Example Session
+
+```
+User: /hive-credentials for my research-agent
+
+Agent: Let me check what credentials your research-agent needs.
+
+[Calls check_missing_credentials(agent_path="exports/research-agent")]
+→ Returns:
+  available: anthropic (encrypted_store), brave_search (encrypted_store)
+  missing: google_search (GOOGLE_API_KEY), google_cse (GOOGLE_CSE_ID)
+  ready: false
+
+Agent: 2 of 4 required credentials are already configured. Only Google Custom
+Search needs setup (2 values).
+
+--- Setting up Google Custom Search (google_search + google_cse) ---
+
+This requires two values that work together.
+
+[Checks HIVE_CREDENTIAL_KEY before storing]
+$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+set
+
+First, the Google API Key:
+1. Go to https://console.cloud.google.com/apis/credentials
+2. Create a new project (or select an existing one)
+3. Enable the "Custom Search API" from the API Library
+4. Go to Credentials > Create Credentials > API Key
+5. Copy the generated API key
+
+[AskUserQuestion: "Please provide your Google API key:"]
+[User provides key]
+
+Now, the Custom Search Engine ID:
+1. Go to https://programmablesearchengine.google.com/controlpanel/all
+2. Click "Add" to create a new search engine
+3. Under "What to search", select "Search the entire web"
+4. Give your search engine a name
+5. Click "Create"
+6. Copy the Search Engine ID (cx value)
+
+[AskUserQuestion: "Please provide your Google CSE ID:"]
+[User provides ID]
+
+[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
+[Stores both in local encrypted store, exports to env]
+
+✓ Google Custom Search credentials valid
+
+[Calls verify_credentials(agent_path="exports/research-agent")]
+→ Returns: ready: true, missing_credentials: []
+
+All credentials are now configured:
+  ✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
+  ✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
+  ✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
+
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                      ✅ CREDENTIALS CONFIGURED                              │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                             │
+│     OPEN A NEW TERMINAL before running commands below.                      │
+│     Environment variables were saved to your shell config but               │
+│     only take effect in new terminal sessions.                              │
+│                                                                             │
+│  NEXT STEPS:                                                                │
+│                                                                             │
+│  1. RUN YOUR AGENT:                                                         │
+│                                                                             │
+│     hive tui                                                                │
+│                                                                             │
+│  2. IF YOU ENCOUNTER ISSUES, USE THE DEBUGGER:                              │
+│                                                                             │
+│     /hive-debugger                                                          │
+│                                                                             │
+│     The debugger analyzes runtime logs, identifies retry loops, tool        │
+│     failures, stalled execution, and provides actionable fix suggestions.   │
+│                                                                             │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
@@ -0,0 +1,385 @@
+---
+name: hive-patterns
+description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: reference
+  part_of: hive
+---
+
+# Building Agents - Patterns & Best Practices
+
+Design patterns, examples, and best practices for building robust goal-driven agents.
+
+**Prerequisites:** Complete agent structure using `hive-create`.
+
+## Practical Example: Hybrid Workflow
+
+How to build a node using both direct file writes and optional MCP validation:
+
+```python
+# 1. WRITE TO FILE FIRST (Primary - makes it visible)
+node_code = '''
+search_node = NodeSpec(
+    id="search-web",
+    node_type="event_loop",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
+    tools=["web_search"],
+)
+'''
+
+Edit(
+    file_path="exports/research_agent/nodes/__init__.py",
+    old_string="# Nodes will be added here",
+    new_string=node_code
+)
+
+# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
+validation = mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "python tutorials"}',
+    mock_llm_response='{"search_results": [...mock results...]}'
+)
+```
+
+**User experience:**
+
+- Immediately sees node in their editor (from step 1)
+- Gets validation feedback (from step 2)
+- Can edit the file directly if needed
+
+## Multi-Turn Interaction Patterns
+
+For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
+
+### Client-Facing Nodes
+
+A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
+
+```python
+# Client-facing node with STEP 1/STEP 2 prompt pattern
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    system_prompt="""\
+You are an intake specialist.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions
+3. If it's clear, confirm your understanding
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "Clear description of what to research")
+""",
+)
+
+# Internal node runs without user interaction
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search and analyze sources",
+    node_type="event_loop",
+    input_keys=["research_brief"],
+    output_keys=["findings", "sources"],
+    system_prompt="Research the topic using web_search and web_scrape...",
+    tools=["web_search", "web_scrape", "load_data", "save_data"],
+)
+```
+
+**How it works:**
+
+- Client-facing nodes stream LLM text to the user and block for input after each response
+- User input is injected via `node.inject_event(text)`
+- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
+- Internal nodes (non-client-facing) run their entire loop without blocking
+- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
+
+**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
+
+### When to Use client_facing
+
+| Scenario                            | client_facing | Why                    |
+| ----------------------------------- | :-----------: | ---------------------- |
+| Gathering user requirements         |      Yes      | Need user input        |
+| Human review/approval checkpoint    |      Yes      | Need human decision    |
+| Data processing (scanning, scoring) |      No       | Runs autonomously      |
+| Report generation                   |      No       | No user input needed   |
+| Final confirmation before action    |      Yes      | Need explicit approval |
+
+> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
+
+## Edge-Based Routing and Feedback Loops
+
+### Conditional Edge Routing
+
+Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
+
+```python
+# Node with mutually exclusive outputs
+review_node = NodeSpec(
+    id="review",
+    name="Review",
+    node_type="event_loop",
+    client_facing=True,
+    output_keys=["approved_contacts", "redo_extraction"],
+    nullable_output_keys=["approved_contacts", "redo_extraction"],
+    max_node_visits=3,
+    system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
+)
+
+# Forward edge (positive priority, evaluated first)
+EdgeSpec(
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
+    priority=1,
+)
+
+# Feedback edge (negative priority, evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
+```
+
+**Key concepts:**
+
+- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
+- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
+- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
+
+### Routing Decision Table
+
+| Pattern                | Old Approach            | New Approach                                  |
+| ---------------------- | ----------------------- | --------------------------------------------- |
+| Conditional branching  | `router` node           | Conditional edges with `condition_expr`       |
+| Binary approve/reject  | `pause_nodes` + resume  | `client_facing=True` + `nullable_output_keys` |
+| Loop-back on rejection | Manual entry_points     | Feedback edge with `priority=-1`              |
+| Multi-way routing      | Router with routes dict | Multiple conditional edges with priorities    |
+
+## Judge Patterns
+
+**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
+
+- Output rollback logic
+- `_user_has_responded` flags
+- Premature set_output rejection
+- Interaction protocol injection into system prompts
+
+Judges control when an event_loop node's loop exits. Choose based on validation needs.
+
+### Implicit Judge (Default)
+
+When no judge is configured, the implicit judge ACCEPTs when:
+
+- The LLM finishes its response with no tool calls
+- All required output keys have been set via `set_output`
+
+Best for simple nodes where "all outputs set" is sufficient validation.
+
+### SchemaJudge
+
+Validates outputs against a Pydantic model. Use when you need structural validation.
+
+```python
+from pydantic import BaseModel
+
+class ScannerOutput(BaseModel):
+    github_users: list[dict]  # Must be a list of user objects
+
+class SchemaJudge:
+    def __init__(self, output_model: type[BaseModel]):
+        self._model = output_model
+
+    async def evaluate(self, context: dict) -> JudgeVerdict:
+        missing = context.get("missing_keys", [])
+        if missing:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
+            )
+        try:
+            self._model.model_validate(context["output_accumulator"])
+            return JudgeVerdict(action="ACCEPT")
+        except ValidationError as e:
+            return JudgeVerdict(action="RETRY", feedback=str(e))
+```
+
+### When to Use Which Judge
+
+| Judge           | Use When                              | Example                |
+| --------------- | ------------------------------------- | ---------------------- |
+| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
+| SchemaJudge     | Need structural validation of outputs | API response parsing   |
+| Custom          | Domain-specific validation logic      | Score must be 0.0-1.0  |
+
+## Fan-Out / Fan-In (Parallel Execution)
+
+Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
+
+```python
+# Scanner fans out to Profiler and Scorer in parallel
+EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
+         condition=EdgeCondition.ON_SUCCESS)
+
+# Both fan in to Extractor
+EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+```
+
+**Requirements:**
+
+- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
+- Only one parallel branch may contain a `client_facing` node
+- Fan-in node receives outputs from all completed branches in shared memory
+
+## Context Management Patterns
+
+### Tiered Compaction
+
+EventLoopNode automatically manages context window usage with tiered compaction:
+
+1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
+2. **Normal compaction** — LLM summarizes older messages
+3. **Aggressive compaction** — Keeps only recent messages + summary
+4. **Emergency** — Hard reset with tool history preservation
+
+### Spillover Pattern
+
+The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
+
+For explicit data management, use the data tools (real MCP tools, not synthetic):
+
+```python
+# save_data, load_data, list_data_files, serve_file_to_user are real MCP tools
+# data_dir is auto-injected by the framework — the LLM never sees it
+
+# Saving large results
+save_data(filename="sources.json", data=large_json_string)
+
+# Reading with pagination (line-based offset/limit)
+load_data(filename="sources.json", offset=0, limit=50)
+
+# Listing available files
+list_data_files()
+
+# Serving a file to the user as a clickable link
+serve_file_to_user(filename="report.html", label="Research Report")
+```
+
+Add data tools to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+`data_dir` is a framework context parameter — auto-injected at call time. `GraphExecutor.execute()` sets it per-execution via `ToolRegistry.set_execution_context(data_dir=...)` (using `contextvars` for concurrency safety), ensuring it matches the session-scoped spillover directory.
+
+## Anti-Patterns
+
+### What NOT to Do
+
+- **Don't rely on `export_graph`** — Write files immediately, not at end
+- **Don't hide code in session** — Write to files as components are approved
+- **Don't wait to write files** — Agent visible from first step
+- **Don't batch everything** — Write incrementally, one component at a time
+- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
+- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
+
+### Fewer, Richer Nodes
+
+A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
+
+| Bad (8 thin nodes)  | Good (4 rich nodes)                 |
+| ------------------- | ----------------------------------- |
+| parse-query         | intake (client-facing)              |
+| search-sources      | research (search + fetch + analyze) |
+| fetch-content       | review (client-facing)              |
+| evaluate-sources    | report (write + deliver)            |
+| synthesize-findings |                                     |
+| write-report        |                                     |
+| quality-check       |                                     |
+| save-report         |                                     |
+
+**Why fewer nodes are better:**
+
+- The LLM retains full context of its work within a single node
+- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
+- Fewer edges means simpler graph and fewer failure points
+- Data tools (`save_data`/`load_data`) handle context window limits within a single node
+
+### MCP Tools - Correct Usage
+
+**MCP tools OK for:**
+
+- `test_node` — Validate node configuration with mock inputs
+- `validate_graph` — Check graph structure
+- `configure_loop` — Set event loop parameters
+- `create_session` — Track session state for bookkeeping
+
+**Just don't:** Use MCP as the primary construction method or rely on export_graph
+
+## Error Handling Patterns
+
+### Graceful Failure with Fallback
+
+```python
+edges = [
+    # Success path
+    EdgeSpec(id="api-success", source="api-call", target="process-results",
+             condition=EdgeCondition.ON_SUCCESS),
+    # Fallback on failure
+    EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
+    # Report if fallback also fails
+    EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
+]
+```
+
+## Handoff to Testing
+
+When agent is complete, transition to testing phase:
+
+### Pre-Testing Checklist
+
+- [ ] Agent structure validates: `uv run python -m agent_name validate`
+- [ ] All nodes defined in nodes/**init**.py
+- [ ] All edges connect valid nodes with correct priorities
+- [ ] Feedback edge targets have `max_node_visits > 1`
+- [ ] Client-facing nodes have meaningful system prompts
+- [ ] Agent can be imported: `from exports.agent_name import default_agent`
+
+## Related Skills
+
+- **hive-concepts** — Fundamental concepts (node types, edges, event loop architecture)
+- **hive-create** — Step-by-step building process
+- **hive-test** — Test and validate agents
+- **hive** — Complete workflow orchestrator
+
+---
+
+**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
@@ -0,0 +1,940 @@
+---
+name: hive-test
+description: Iterative agent testing with session recovery. Execute, analyze, fix, resume from checkpoints. Use when testing an agent, debugging test failures, or verifying fixes without re-running from scratch.
+---
+
+# Agent Testing
+
+Test agents iteratively: execute, analyze failures, fix, resume from checkpoint, repeat.
+
+## When to Use
+
+- Testing a newly built agent against its goal
+- Debugging a failing agent iteratively
+- Verifying fixes without re-running expensive early nodes
+- Running final regression tests before deployment
+
+## Prerequisites
+
+1. Agent package at `exports/{agent_name}/` (built with `/hive-create`)
+2. Credentials configured (`/hive-credentials`)
+3. `ANTHROPIC_API_KEY` set (or appropriate LLM provider key)
+
+**Path distinction** (critical — don't confuse these):
+- `exports/{agent_name}/` — agent source code (edit here)
+- `~/.hive/agents/{agent_name}/` — runtime data: sessions, checkpoints, logs (read here)
+
+---
+
+## The Iterative Test Loop
+
+This is the core workflow. Don't re-run the entire agent when a late node fails — analyze, fix, and resume from the last clean checkpoint.
+
+```
+┌──────────────────────────────────────┐
+│ PHASE 1: Generate Test Scenarios     │
+│ Goal → synthetic test inputs + tests │
+└──────────────┬───────────────────────┘
+               ↓
+┌──────────────────────────────────────┐
+│ PHASE 2: Execute                     │◄────────────────┐
+│ Run agent (CLI or pytest)            │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+          Pass? ──yes──► PHASE 6: Final Verification     │
+               │                                         │
+               no                                        │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 3: Analyze                     │                 │
+│ Session + runtime logs + checkpoints │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 4: Fix                         │                 │
+│ Prompt / code / graph / goal         │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 5: Recover & Resume            │─────────────────┘
+│ Checkpoint resume OR fresh re-run    │
+└──────────────────────────────────────┘
+```
+
+---
+
+### Phase 1: Generate Test Scenarios
+
+Create synthetic tests from the agent's goal, constraints, and success criteria.
+
+#### Step 1a: Read the goal
+
+```python
+# Read goal from agent.py
+Read(file_path="exports/{agent_name}/agent.py")
+# Extract the Goal definition and convert to JSON string
+```
+
+#### Step 1b: Get test guidelines
+
+```python
+# Get constraint test guidelines
+generate_constraint_tests(
+    goal_id="your-goal-id",
+    goal_json='{"id": "...", "constraints": [...]}',
+    agent_path="exports/{agent_name}"
+)
+
+# Get success criteria test guidelines
+generate_success_tests(
+    goal_id="your-goal-id",
+    goal_json='{"id": "...", "success_criteria": [...]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/{agent_name}"
+)
+```
+
+These return `file_header`, `test_template`, `constraints_formatted`/`success_criteria_formatted`, and `test_guidelines`. They do NOT generate test code — you write the tests.
+
+#### Step 1c: Write tests
+
+```python
+Write(
+    file_path=result["output_file"],
+    content=result["file_header"] + "\n\n" + your_test_code
+)
+```
+
+#### Test writing rules
+
+- Every test MUST be `async` with `@pytest.mark.asyncio`
+- Every test MUST accept `runner, auto_responder, mock_mode` fixtures
+- Use `await auto_responder.start()` before running, `await auto_responder.stop()` in `finally`
+- Use `await runner.run(input_dict)` — this goes through AgentRunner → AgentRuntime → ExecutionStream
+- Access output via `result.output.get("key")` — NEVER `result.output["key"]`
+- `result.success=True` means no exception, NOT goal achieved — always check output
+- Write 8-15 tests total, not 30+
+- Each real test costs ~3 seconds + LLM tokens
+- NEVER use `default_agent.run()` — it bypasses the runtime (no sessions, no logs, client-facing nodes hang)
+
+#### Step 1d: Check existing tests
+
+Before generating, check if tests already exist:
+
+```python
+list_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+---
+
+### Phase 2: Execute
+
+Two execution paths, use the right one for your situation.
+
+#### Iterative debugging (for complex agents)
+
+Run the agent via CLI. This creates sessions with checkpoints at `~/.hive/agents/{agent_name}/sessions/`:
+
+```bash
+uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
+```
+
+Sessions and checkpoints are saved automatically.
+
+**Client-facing nodes**: Agents with `client_facing=True` nodes (interactive conversation) work in headless mode when run from a real terminal — the agent streams output to stdout and reads user input from stdin via a `>>> ` prompt. In non-interactive shells (like Claude Code's Bash tool), client-facing nodes will hang because there is no stdin. For testing interactive agents from Claude Code, use `run_tests` with mock mode or have the user run the agent manually in their terminal.
+
+#### Automated regression (for CI or final verification)
+
+Use the `run_tests` MCP tool to run all pytest tests:
+
+```python
+run_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+Returns structured results:
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 12, "passed": 10, "failed": 2, "pass_rate": "83.3%"},
+  "test_results": [{"test_name": "test_success_source_diversity", "status": "failed"}],
+  "failures": [{"test_name": "test_success_source_diversity", "details": "..."}]
+}
+```
+
+**Options:**
+```python
+# Run only constraint tests
+run_tests(goal_id, agent_path, test_types='["constraint"]')
+
+# Stop on first failure
+run_tests(goal_id, agent_path, fail_fast=True)
+
+# Parallel execution
+run_tests(goal_id, agent_path, parallel=4)
+```
+
+**Note:** `run_tests` uses `AgentRunner` with `tmp_path` storage, so sessions are isolated per test run. For checkpoint-based recovery with persistent sessions, use CLI execution. Use `run_tests` for quick regression checks and final verification.
+
+---
+
+### Phase 3: Analyze Failures
+
+When a test fails, drill down systematically. Don't guess — use the tools.
+
+#### Step 3a: Get error category
+
+```python
+debug_test(
+    goal_id="your-goal-id",
+    test_name="test_success_source_diversity",
+    agent_path="exports/{agent_name}"
+)
+```
+
+Returns error category (`IMPLEMENTATION_ERROR`, `ASSERTION_FAILURE`, `TIMEOUT`, `IMPORT_ERROR`, `API_ERROR`) plus full traceback and suggestions.
+
+#### Step 3b: Find the failed session
+
+```python
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    status="failed",
+    limit=5
+)
+```
+
+Returns session list with IDs, timestamps, current_node (where it failed), execution_quality.
+
+#### Step 3c: Inspect session state
+
+```python
+get_agent_session_state(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345"
+)
+```
+
+Returns execution path, which node was current, step count, timestamps — but excludes memory values (to avoid context bloat). Shows `memory_keys` and `memory_size` instead.
+
+#### Step 3d: Examine runtime logs (L2/L3)
+
+```python
+# L2: Per-node success/failure, retry counts
+query_runtime_log_details(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    run_id="session_20260209_143022_abc12345",
+    needs_attention_only=True
+)
+
+# L3: Exact LLM responses, tool call inputs/outputs
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    run_id="session_20260209_143022_abc12345",
+    node_id="research"
+)
+```
+
+#### Step 3e: Inspect memory data
+
+```python
+# See what data a node actually produced
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    key="research_results"
+)
+```
+
+#### Step 3f: Find recovery points
+
+```python
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    is_clean="true"
+)
+```
+
+Returns checkpoint summaries with IDs, types (`node_start`, `node_complete`), which node, and `is_clean` flag. Clean checkpoints are safe resume points.
+
+#### Step 3g: Compare checkpoints (optional)
+
+To understand what changed between two points in execution:
+
+```python
+compare_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    checkpoint_id_before="cp_node_complete_research_143030",
+    checkpoint_id_after="cp_node_complete_review_143115"
+)
+```
+
+Returns memory diff (added/removed/changed keys) and execution path diff.
+
+---
+
+### Phase 4: Fix Based on Root Cause
+
+Use the analysis from Phase 3 to determine what to fix and where.
+
+| Root Cause | What to Fix | Where to Edit |
+|------------|------------|---------------|
+| **Prompt issue** — LLM produces wrong output format, misses instructions | Node `system_prompt` | `exports/{agent}/nodes/__init__.py` |
+| **Code bug** — TypeError, KeyError, logic error in Python | Agent code | `exports/{agent}/agent.py`, `nodes/__init__.py` |
+| **Graph issue** — wrong routing, missing edge, bad condition_expr | Edges, node config | `exports/{agent}/agent.py` |
+| **Tool issue** — MCP tool fails, wrong config, missing credential | Tool config | `exports/{agent}/mcp_servers.json`, `/hive-credentials` |
+| **Goal issue** — success criteria too strict/vague, wrong constraints | Goal definition | `exports/{agent}/agent.py` (goal section) |
+| **Test issue** — test expectations don't match actual agent behavior | Test code | `exports/{agent}/tests/test_*.py` |
+
+#### Fix strategies by error category
+
+**IMPLEMENTATION_ERROR** (TypeError, AttributeError, KeyError):
+```python
+# Read the failing code
+Read(file_path="exports/{agent_name}/nodes/__init__.py")
+
+# Fix the bug
+Edit(
+    file_path="exports/{agent_name}/nodes/__init__.py",
+    old_string="results.get('videos')",
+    new_string="(results or {}).get('videos', [])"
+)
+```
+
+**ASSERTION_FAILURE** (test assertions fail but agent ran successfully):
+- Check if the agent's output is actually wrong → fix the prompt
+- Check if the test's expectations are unrealistic → fix the test
+- Use `get_agent_session_memory` to see what the agent actually produced
+
+**TIMEOUT / STALL** (agent runs too long):
+- Check `node_visit_counts` for feedback loops hitting max_node_visits
+- Check L3 logs for tool calls that hang
+- Reduce `max_iterations` in loop_config or fix the prompt to converge faster
+
+**API_ERROR** (connection, rate limit, auth):
+- Verify credentials with `/hive-credentials`
+- Check MCP server configuration
+
+---
+
+### Phase 5: Recover & Resume
+
+After fixing the agent, decide whether to resume or re-run.
+
+#### When to resume from checkpoint
+
+Resume when ALL of these are true:
+- The fix is to a node that comes AFTER existing clean checkpoints
+- Clean checkpoints exist (from a CLI execution with checkpointing)
+- The early nodes are expensive (web scraping, API calls, long LLM chains)
+
+```bash
+# Resume from the last clean checkpoint before the failing node
+uv run hive run exports/{agent_name} \
+  --resume-session session_20260209_143022_abc12345 \
+  --checkpoint cp_node_complete_research_143030
+```
+
+This skips all nodes before the checkpoint and only re-runs the fixed node onward.
+
+#### When to re-run from scratch
+
+Re-run when ANY of these are true:
+- The fix is to the entry node or an early node
+- No checkpoints exist (e.g., agent was run via `run_tests`)
+- The agent is fast (2-3 nodes, completes in seconds)
+- You changed the graph structure (added/removed nodes/edges)
+
+```bash
+uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
+```
+
+#### Inspecting a checkpoint before resuming
+
+```python
+get_agent_checkpoint(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    checkpoint_id="cp_node_complete_research_143030"
+)
+```
+
+Returns the full checkpoint: shared_memory snapshot, execution_path, current_node, next_node, is_clean.
+
+#### Loop back to Phase 2
+
+After resuming or re-running, check if the fix worked. If not, go back to Phase 3.
+
+---
+
+### Phase 6: Final Verification
+
+Once the iterative fix loop converges (the agent produces correct output), run the full automated test suite:
+
+```python
+run_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+All tests should pass. If not, repeat the loop for remaining failures.
+
+---
+
+## Credential Requirements
+
+**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).
+
+### Prerequisites
+
+Before running agent tests, you MUST collect ALL required credentials from the user.
+
+**Step 1: LLM API Key (always required)**
+```bash
+export ANTHROPIC_API_KEY="your-key-here"
+```
+
+**Step 2: Tool-specific credentials (depends on agent's tools)**
+
+Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
+
+```python
+from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
+
+creds = CredentialManager()
+
+# Determine which tools the agent uses (from agent.json or mcp_servers.json)
+agent_tools = [...]  # e.g., ["hubspot_search_contacts", "web_search", ...]
+
+# Find all missing credentials for those tools
+missing = creds.get_missing_for_tools(agent_tools)
+```
+
+Common tool credentials:
+| Tool | Env Var | Help URL |
+|------|---------|----------|
+| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
+| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
+| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
+
+**Why ALL credentials are required:**
+- Tests need to execute the agent's LLM nodes to validate behavior
+- Tools with missing credentials will return error dicts instead of real data
+- Mock mode bypasses everything, providing no confidence in real-world performance
+
+### Mock Mode Limitations
+
+Mock mode (`--mock` flag or `MOCK_MODE=1`) is **ONLY for structure validation**:
+
+- Validates graph structure (nodes, edges, connections)
+- Validates that `AgentRunner.load()` succeeds and the agent is importable
+- Does NOT execute event_loop agents — MockLLMProvider never calls `set_output`, so event_loop nodes loop forever
+- Does NOT test LLM reasoning, content quality, or constraint validation
+- Does NOT test real API integrations or tool use
+
+**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials.
+
+### Enforcing Credentials in Tests
+
+When writing tests, **ALWAYS include credential checks**:
+
+```python
+import os
+import pytest
+from aden_tools.credentials import CredentialManager
+
+pytestmark = pytest.mark.skipif(
+    not CredentialManager().is_available("anthropic") and not os.environ.get("MOCK_MODE"),
+    reason="API key required for real testing. Set ANTHROPIC_API_KEY or use MOCK_MODE=1."
+)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def check_credentials():
+    """Ensure ALL required credentials are set for real testing."""
+    creds = CredentialManager()
+    mock_mode = os.environ.get("MOCK_MODE")
+
+    if not creds.is_available("anthropic"):
+        if mock_mode:
+            print("\nRunning in MOCK MODE - structure validation only")
+        else:
+            pytest.fail(
+                "\nANTHROPIC_API_KEY not set!\n"
+                "Set API key: export ANTHROPIC_API_KEY='your-key-here'\n"
+                "Or run structure validation: MOCK_MODE=1 pytest exports/{agent}/tests/"
+            )
+
+    if not mock_mode:
+        agent_tools = []  # Update per agent
+        missing = creds.get_missing_for_tools(agent_tools)
+        if missing:
+            lines = ["\nMissing tool credentials!"]
+            for name in missing:
+                spec = creds.specs.get(name)
+                if spec:
+                    lines.append(f"  {spec.env_var} - {spec.description}")
+            pytest.fail("\n".join(lines))
+```
+
+### User Communication
+
+When the user asks to test an agent, **ALWAYS check for ALL credentials first**:
+
+1. **Identify the agent's tools** from `mcp_servers.json`
+2. **Check ALL required credentials** using `CredentialManager`
+3. **Ask the user to provide any missing credentials** before proceeding
+4. Collect ALL missing credentials in a single prompt — not one at a time
+
+---
+
+## Safe Test Patterns
+
+### OutputCleaner
+
+The framework automatically validates and cleans node outputs using a fast LLM at edge traversal time. Tests should still use safe patterns because OutputCleaner may not catch all issues.
+
+### Safe Access (REQUIRED)
+
+```python
+# UNSAFE - will crash on missing keys
+approval = result.output["approval_decision"]
+category = result.output["analysis"]["category"]
+
+# SAFE - use .get() with defaults
+output = result.output or {}
+approval = output.get("approval_decision", "UNKNOWN")
+
+# SAFE - type check before operations
+analysis = output.get("analysis", {})
+if isinstance(analysis, dict):
+    category = analysis.get("category", "unknown")
+
+# SAFE - handle JSON parsing trap (LLM response as string)
+import json
+recommendation = output.get("recommendation", "{}")
+if isinstance(recommendation, str):
+    try:
+        parsed = json.loads(recommendation)
+        if isinstance(parsed, dict):
+            approval = parsed.get("approval_decision", "UNKNOWN")
+    except json.JSONDecodeError:
+        approval = "UNKNOWN"
+elif isinstance(recommendation, dict):
+    approval = recommendation.get("approval_decision", "UNKNOWN")
+
+# SAFE - type check before iteration
+items = output.get("items", [])
+if isinstance(items, list):
+    for item in items:
+        ...
+```
+
+### Helper Functions for conftest.py
+
+```python
+import json
+import re
+
+def _parse_json_from_output(result, key):
+    """Parse JSON from agent output (framework may store full LLM response as string)."""
+    response_text = result.output.get(key, "")
+    json_text = re.sub(r'```json\s*|\s*```', '', response_text).strip()
+    try:
+        return json.loads(json_text)
+    except (json.JSONDecodeError, AttributeError, TypeError):
+        return result.output.get(key)
+
+def safe_get_nested(result, key_path, default=None):
+    """Safely get nested value from result.output."""
+    output = result.output or {}
+    current = output
+    for key in key_path:
+        if isinstance(current, dict):
+            current = current.get(key)
+        elif isinstance(current, str):
+            try:
+                json_text = re.sub(r'```json\s*|\s*```', '', current).strip()
+                parsed = json.loads(json_text)
+                if isinstance(parsed, dict):
+                    current = parsed.get(key)
+                else:
+                    return default
+            except json.JSONDecodeError:
+                return default
+        else:
+            return default
+    return current if current is not None else default
+
+# Make available in tests
+pytest.parse_json_from_output = _parse_json_from_output
+pytest.safe_get_nested = safe_get_nested
+```
+
+### ExecutionResult Fields
+
+**`result.success=True` means NO exception, NOT goal achieved**
+
+```python
+# WRONG
+assert result.success
+
+# RIGHT
+assert result.success, f"Agent failed: {result.error}"
+output = result.output or {}
+approval = output.get("approval_decision")
+assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
+```
+
+All fields:
+- `success: bool` — Completed without exception (NOT goal achieved!)
+- `output: dict` — Complete memory snapshot (may contain raw strings)
+- `error: str | None` — Error message if failed
+- `steps_executed: int` — Number of nodes executed
+- `total_tokens: int` — Cumulative token usage
+- `total_latency_ms: int` — Total execution time
+- `path: list[str]` — Node IDs traversed (may repeat in feedback loops)
+- `paused_at: str | None` — Node ID if paused
+- `session_state: dict` — State for resuming
+- `node_visit_counts: dict[str, int]` — Visit counts per node (feedback loop testing)
+- `execution_quality: str` — "clean", "degraded", or "failed"
+
+### Test Count Guidance
+
+**Write 8-15 tests, not 30+**
+
+- 2-3 tests per success criterion
+- 1 happy path test
+- 1 boundary/edge case test
+- 1 error handling test (optional)
+
+Each real test costs ~3 seconds + LLM tokens. 12 tests = ~36 seconds, $0.12.
+
+---
+
+## Test Patterns
+
+### Happy Path
+```python
+@pytest.mark.asyncio
+async def test_happy_path(runner, auto_responder, mock_mode):
+    """Test normal successful execution."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "python tutorials"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    assert output.get("report"), "No report produced"
+```
+
+### Boundary Condition
+```python
+@pytest.mark.asyncio
+async def test_minimum_sources(runner, auto_responder, mock_mode):
+    """Test at minimum source threshold."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "niche topic"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    sources = output.get("sources", [])
+    if isinstance(sources, list):
+        assert len(sources) >= 3, f"Expected >= 3 sources, got {len(sources)}"
+```
+
+### Error Handling
+```python
+@pytest.mark.asyncio
+async def test_empty_input(runner, auto_responder, mock_mode):
+    """Test graceful handling of empty input."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": ""})
+    finally:
+        await auto_responder.stop()
+    # Agent should either fail gracefully or produce an error message
+    output = result.output or {}
+    assert not result.success or output.get("error"), "Should handle empty input"
+```
+
+### Feedback Loop
+```python
+@pytest.mark.asyncio
+async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
+    """Test that feedback loops don't run forever."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "test"})
+    finally:
+        await auto_responder.stop()
+    visits = result.node_visit_counts or {}
+    for node_id, count in visits.items():
+        assert count <= 5, f"Node {node_id} visited {count} times — possible infinite loop"
+```
+
+---
+
+## MCP Tool Reference
+
+### Phase 1: Test Generation
+
+```python
+# Check existing tests
+list_tests(goal_id, agent_path)
+
+# Get constraint test guidelines (returns templates, NOT generated tests)
+generate_constraint_tests(goal_id, goal_json, agent_path)
+# Returns: output_file, file_header, test_template, constraints_formatted, test_guidelines
+
+# Get success criteria test guidelines
+generate_success_tests(goal_id, goal_json, node_names, tool_names, agent_path)
+# Returns: output_file, file_header, test_template, success_criteria_formatted, test_guidelines
+```
+
+### Phase 2: Execution
+
+```python
+# Automated regression (no checkpoints, fresh runs)
+run_tests(goal_id, agent_path, test_types='["all"]', parallel=-1, fail_fast=False)
+
+# Run only specific test types
+run_tests(goal_id, agent_path, test_types='["constraint"]')
+run_tests(goal_id, agent_path, test_types='["success"]')
+```
+
+```bash
+# Iterative debugging with checkpoints (via CLI)
+uv run hive run exports/{agent_name} --input '{"query": "test"}'
+```
+
+### Phase 3: Analysis
+
+```python
+# Debug a specific failed test
+debug_test(goal_id, test_name, agent_path)
+
+# Find failed sessions
+list_agent_sessions(agent_work_dir, status="failed", limit=5)
+
+# Inspect session state (excludes memory values)
+get_agent_session_state(agent_work_dir, session_id)
+
+# Inspect memory data
+get_agent_session_memory(agent_work_dir, session_id, key="research_results")
+
+# Runtime logs: L1 summaries
+query_runtime_logs(agent_work_dir, status="needs_attention")
+
+# Runtime logs: L2 per-node details
+query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
+
+# Runtime logs: L3 tool/LLM raw data
+query_runtime_log_raw(agent_work_dir, run_id, node_id="research")
+
+# Find clean checkpoints
+list_agent_checkpoints(agent_work_dir, session_id, is_clean="true")
+
+# Compare checkpoints (memory diff)
+compare_agent_checkpoints(agent_work_dir, session_id, cp_before, cp_after)
+```
+
+### Phase 5: Recovery
+
+```python
+# Inspect checkpoint before resuming
+get_agent_checkpoint(agent_work_dir, session_id, checkpoint_id)
+# Empty checkpoint_id = latest checkpoint
+```
+
+```bash
+# Resume from checkpoint via CLI (headless)
+uv run hive run exports/{agent_name} \
+  --resume-session {session_id} --checkpoint {checkpoint_id}
+```
+
+---
+
+## Anti-Patterns
+
+| Don't | Do Instead |
+|-------|-----------|
+| Use `default_agent.run()` in tests | Use `runner.run()` with `auto_responder` fixtures (goes through AgentRuntime) |
+| Re-run entire agent when a late node fails | Resume from last clean checkpoint |
+| Treat `result.success` as goal achieved | Check `result.output` for actual criteria |
+| Access `result.output["key"]` directly | Use `result.output.get("key")` |
+| Fix random things hoping tests pass | Analyze L2/L3 logs to find root cause first |
+| Write 30+ tests | Write 8-15 focused tests |
+| Skip credential check | Use `/hive-credentials` before testing |
+| Confuse `exports/` with `~/.hive/agents/` | Code in `exports/`, runtime data in `~/.hive/` |
+| Use `run_tests` for iterative debugging | Use headless CLI with checkpoints for iterative debugging |
+| Use headless CLI for final regression | Use `run_tests` for automated regression |
+| Use `--tui` from Claude Code | Use headless `run` command — TUI hangs in non-interactive shells |
+| Test client-facing nodes from Claude Code | Use mock mode, or have the user run the agent in their terminal |
+| Run tests without reading goal first | Always understand the goal before writing tests |
+| Skip Phase 3 analysis and guess | Use session + log tools to identify root cause |
+
+---
+
+## Example Walkthrough: Deep Research Agent
+
+A complete iteration showing the test loop for an agent with nodes: `intake → research → review → report`.
+
+### Phase 1: Generate tests
+
+```python
+# Read the goal
+Read(file_path="exports/deep_research_agent/agent.py")
+
+# Get success criteria test guidelines
+result = generate_success_tests(
+    goal_id="rigorous-interactive-research",
+    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "target": ">=5"}, {"id": "citation-coverage", "target": "100%"}, {"id": "report-completeness", "target": "90%"}]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/deep_research_agent"
+)
+
+# Write tests
+Write(
+    file_path=result["output_file"],
+    content=result["file_header"] + "\n\n" + test_code
+)
+```
+
+### Phase 2: First execution
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+Result: `test_success_source_diversity` fails — agent only found 2 sources instead of 5.
+
+### Phase 3: Analyze
+
+```python
+# Debug the failing test
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_source_diversity",
+    agent_path="exports/deep_research_agent"
+)
+# → ASSERTION_FAILURE: Expected >= 5 sources, got 2
+
+# Find the session
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_150000_abc12345
+
+# See what the research node produced
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    key="research_results"
+)
+# → Only 2 web_search calls made, each returned 1 source
+
+# Check the LLM's behavior in the research node
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    run_id="session_20260209_150000_abc12345",
+    node_id="research"
+)
+# → LLM called web_search only twice, then called set_output
+```
+
+Root cause: The research node's prompt doesn't tell the LLM to search for at least 5 diverse sources. It stops after the first couple of searches.
+
+### Phase 4: Fix the prompt
+
+```python
+Read(file_path="exports/deep_research_agent/nodes/__init__.py")
+
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Search for information on the user\'s topic."',
+    new_string='system_prompt="Search for information on the user\'s topic. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries to ensure source diversity. Do not stop searching until you have at least 5 distinct sources."'
+)
+```
+
+### Phase 5: Resume from checkpoint
+
+For this example, the fix is to the `research` node. If we had run via CLI with checkpointing, we could resume from the checkpoint after `intake` to skip re-running intake:
+
+```bash
+# Check if clean checkpoint exists after intake
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    is_clean="true"
+)
+# → cp_node_complete_intake_150005
+
+# Resume from after intake, re-run research with fixed prompt
+uv run hive run exports/deep_research_agent \
+  --resume-session session_20260209_150000_abc12345 \
+  --checkpoint cp_node_complete_intake_150005
+```
+
+Or for this simple case (intake is fast), just re-run:
+
+```bash
+uv run hive run exports/deep_research_agent --input '{"topic": "test"}'
+```
+
+### Phase 6: Final verification
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent"
+)
+# → All 12 tests pass
+```
+
+---
+
+## Test File Structure
+
+```
+exports/{agent_name}/
+├── agent.py              ← Agent to test (goal, nodes, edges)
+├── nodes/__init__.py     ← Node implementations (prompts, config)
+├── config.py             ← Agent configuration
+├── mcp_servers.json      ← Tool server config
+└── tests/
+    ├── conftest.py           ← Shared fixtures + safe access helpers
+    ├── test_constraints.py   ← Constraint tests
+    ├── test_success_criteria.py  ← Success criteria tests
+    └── test_edge_cases.py    ← Edge case tests
+```
+
+## Integration with Other Skills
+
+| Scenario | From | To | Action |
+|----------|------|----|--------|
+| Agent built, ready to test | `/hive-create` | `/hive-test` | Generate tests, start loop |
+| Prompt fix needed | `/hive-test` Phase 4 | Direct edit | Edit `nodes/__init__.py`, resume |
+| Goal definition wrong | `/hive-test` Phase 4 | `/hive-create` | Update goal, may need rebuild |
+| Missing credentials | `/hive-test` Phase 3 | `/hive-credentials` | Set up credentials |
+| Complex runtime failure | `/hive-test` Phase 3 | `/hive-debugger` | Deep L1/L2/L3 analysis |
+| All tests pass | `/hive-test` Phase 6 | Done | Agent validated |
@@ -0,0 +1,333 @@
+# Example: Iterative Testing of a Research Agent
+
+This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
+
+## Agent Structure
+
+```
+exports/deep_research_agent/
+├── agent.py          # Goal + graph: intake → research → review → report
+├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
+├── config.py         # Model config
+├── mcp_servers.json  # Tools: web_search, web_scrape
+└── tests/            # Test files (we'll create these)
+```
+
+**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
+
+---
+
+## Phase 1: Generate Tests
+
+### Read the goal
+
+```python
+Read(file_path="exports/deep_research_agent/agent.py")
+# Extract: goal_id="rigorous-interactive-research"
+# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
+# constraints: no-hallucination, source-attribution
+```
+
+### Get test guidelines
+
+```python
+result = generate_success_tests(
+    goal_id="rigorous-interactive-research",
+    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/deep_research_agent"
+)
+```
+
+### Write tests
+
+```python
+Write(
+    file_path="exports/deep_research_agent/tests/test_success_criteria.py",
+    content=result["file_header"] + '''
+
+@pytest.mark.asyncio
+async def test_success_source_diversity(runner, auto_responder, mock_mode):
+    """At least 5 diverse sources are found."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "impact of remote work on productivity"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    sources = output.get("sources", [])
+    if isinstance(sources, list):
+        assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
+
+@pytest.mark.asyncio
+async def test_success_citation_coverage(runner, auto_responder, mock_mode):
+    """Every factual claim in the report cites its source."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "climate change effects on agriculture"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    report = output.get("report", "")
+    # Check that report contains numbered references
+    assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
+
+@pytest.mark.asyncio
+async def test_success_report_completeness(runner, auto_responder, mock_mode):
+    """Report addresses the original research question."""
+    query = "pros and cons of nuclear energy"
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": query})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    report = output.get("report", "")
+    assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
+
+@pytest.mark.asyncio
+async def test_empty_query_handling(runner, auto_responder, mock_mode):
+    """Agent handles empty input gracefully."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": ""})
+    finally:
+        await auto_responder.stop()
+    output = result.output or {}
+    assert not result.success or output.get("error"), "Should handle empty query"
+
+@pytest.mark.asyncio
+async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
+    """Feedback loop between review and research terminates."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "quantum computing basics"})
+    finally:
+        await auto_responder.stop()
+    visits = result.node_visit_counts or {}
+    for node_id, count in visits.items():
+        assert count <= 5, f"Node {node_id} visited {count} times"
+'''
+)
+```
+
+---
+
+## Phase 2: First Execution
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
+  "failures": [
+    {"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
+    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
+  ]
+}
+```
+
+---
+
+## Phase 3: Analyze (Iteration 1)
+
+### Debug the first failure
+
+```python
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_source_diversity",
+    agent_path="exports/deep_research_agent"
+)
+# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
+```
+
+### Find the session and inspect memory
+
+```python
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_150000_abc12345
+
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    key="research_results"
+)
+# → Only 2 sources found. LLM stopped searching after 2 queries.
+```
+
+### Check LLM behavior in the research node
+
+```python
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    run_id="session_20260209_150000_abc12345",
+    node_id="research"
+)
+# → LLM called web_search twice, got results, immediately called set_output.
+# → Prompt doesn't instruct it to find at least 5 sources.
+```
+
+**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
+
+---
+
+## Phase 4: Fix (Iteration 1)
+
+```python
+Read(file_path="exports/deep_research_agent/nodes/__init__.py")
+
+# Fix the research node prompt
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Search for information on the user\'s topic using web search."',
+    new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
+)
+```
+
+---
+
+## Phase 5: Recover & Resume (Iteration 1)
+
+The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
+  "failures": [
+    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
+  ]
+}
+```
+
+Source diversity now passes. Citation coverage still fails.
+
+---
+
+## Phase 3: Analyze (Iteration 2)
+
+```python
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_citation_coverage",
+    agent_path="exports/deep_research_agent"
+)
+# Category: ASSERTION_FAILURE — Report lacks citations
+
+# Check what the report node produced
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_151500_def67890
+
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_151500_def67890",
+    key="report"
+)
+# → Report text exists but uses no numbered references.
+# → Sources are in memory but report node doesn't cite them.
+```
+
+**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
+
+---
+
+## Phase 4: Fix (Iteration 2)
+
+```python
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Write a comprehensive report based on the research findings."',
+    new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
+)
+```
+
+---
+
+## Phase 5: Resume (Iteration 2)
+
+The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
+
+```bash
+# Run via CLI to get checkpoints
+uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
+
+# After it runs, find the clean checkpoint before report
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_152000_ghi34567",
+    is_clean="true"
+)
+# → cp_node_complete_review_152100 (after review, before report)
+
+# Resume — skips intake, research, review entirely
+uv run hive run exports/deep_research_agent \
+  --resume-session session_20260209_152000_ghi34567 \
+  --checkpoint cp_node_complete_review_152100
+```
+
+Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
+
+---
+
+## Phase 6: Final Verification
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent"
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": true,
+  "summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
+}
+```
+
+All tests pass.
+
+---
+
+## Summary
+
+| Iteration | Failure | Root Cause | Fix | Recovery |
+|-----------|---------|------------|-----|----------|
+| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
+| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
+
+**Key takeaways:**
+- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
+- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
+- Final `run_tests` confirms all scenarios pass end-to-end
@@ -0,0 +1,526 @@
+---
+name: hive
+description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates hive-* skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: workflow-orchestrator
+  orchestrates:
+    - hive-concepts
+    - hive-create
+    - hive-patterns
+    - hive-test
+    - hive-credentials
+    - hive-debugger
+---
+
+# Agent Development Workflow
+
+**THIS IS AN EXECUTABLE WORKFLOW. DO NOT explore the codebase or read source files. ROUTE to the correct skill IMMEDIATELY.**
+
+When this skill is loaded, **ALWAYS use the AskUserQuestion tool** to present options:
+
+```
+Use AskUserQuestion with these options:
+- "Build a new agent" → Then invoke /hive-create
+- "Test an existing agent" → Then invoke /hive-test
+- "Learn agent concepts" → Then invoke /hive-concepts
+- "Optimize agent design" → Then invoke /hive-patterns
+- "Set up credentials" → Then invoke /hive-credentials
+- "Debug a failing agent" → Then invoke /hive-debugger
+- "Other" (please describe what you want to achieve)
+```
+
+**DO NOT:** Read source files, explore the codebase, search for code, or do any investigation before routing. The sub-skills handle all of that.
+
+---
+
+Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.
+
+## Overview
+
+This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:
+
+1. **Understand Concepts** → `/hive-concepts` (optional)
+2. **Build Structure** → `/hive-create`
+3. **Optimize Design** → `/hive-patterns` (optional)
+4. **Setup Credentials** → `/hive-credentials` (if agent uses tools requiring API keys)
+5. **Test & Validate** → `/hive-test`
+6. **Debug Issues** → `/hive-debugger` (if agent fails at runtime)
+
+## When to Use This Workflow
+
+Use this meta-skill when:
+- Starting a new agent from scratch
+- Unclear which skill to use first
+- Need end-to-end guidance for agent development
+- Want consistent, repeatable agent builds
+
+**Skip this workflow** if:
+- You only need to test an existing agent → use `/hive-test` directly
+- You know exactly which phase you're in → use specific skill directly
+
+## Quick Decision Tree
+
+```
+"Need to understand agent concepts" → hive-concepts
+"Build a new agent" → hive-create
+"Optimize my agent design" → hive-patterns
+"Need client-facing nodes or feedback loops" → hive-patterns
+"Set up API keys for my agent" → hive-credentials
+"Test my agent" → hive-test
+"My agent is failing/stuck/has errors" → hive-debugger
+"Not sure what I need" → Read phases below, then decide
+"Agent has structure but needs implementation" → See agent directory STATUS.md
+```
+
+## Phase 0: Understand Concepts (Optional)
+
+**Skill**: `/hive-concepts`
+**Input**: Questions about agent architecture
+
+### When to Use
+
+- First time building an agent
+- Need to understand node types, edges, goals
+- Want to validate tool availability
+- Learning about event loop architecture and client-facing nodes
+
+### What This Phase Provides
+
+- Architecture overview (Python packages, not JSON)
+- Core concepts (Goal, Node, Edge, Event Loop, Judges)
+- Tool discovery and validation procedures
+- Workflow overview
+
+**Skip this phase** if you already understand agent fundamentals.
+
+## Phase 1: Build Agent Structure
+
+**Skill**: `/hive-create`
+**Input**: User requirements ("Build an agent that...") or a template to start from
+
+### What This Phase Does
+
+Creates the complete agent architecture:
+- Package structure (`exports/agent_name/`)
+- Goal with success criteria and constraints
+- Workflow graph (nodes and edges)
+- Node specifications
+- CLI interface
+- Documentation
+
+### Process
+
+1. **Create package** - Directory structure with skeleton files
+2. **Define goal** - Success criteria and constraints written to agent.py
+3. **Design nodes** - Each node approved and written incrementally
+4. **Connect edges** - Workflow graph with conditional routing
+5. **Finalize** - Agent class, exports, and documentation
+
+### Outputs
+
+- ✅ `exports/agent_name/` package created
+- ✅ Goal defined in agent.py
+- ✅ 3-5 success criteria defined
+- ✅ 1-5 constraints defined
+- ✅ 5-10 nodes specified in nodes/__init__.py
+- ✅ 8-15 edges connecting workflow
+- ✅ Validated structure (passes `uv run python -m agent_name validate`)
+- ✅ README.md with usage instructions
+- ✅ CLI commands (info, validate, run, shell)
+
+### Success Criteria
+
+You're ready for Phase 2 when:
+- Agent structure validates without errors
+- All nodes and edges are defined
+- CLI commands work (info, validate)
+- You see: "Agent complete: exports/agent_name/"
+
+### Common Outputs
+
+The hive-create skill produces:
+```
+exports/agent_name/
+├── __init__.py          (package exports)
+├── __main__.py          (CLI interface)
+├── agent.py             (goal, graph, agent class)
+├── nodes/__init__.py    (node specifications)
+├── config.py            (configuration)
+├── implementations.py   (may be created for Python functions)
+└── README.md            (documentation)
+```
+
+### Next Steps
+
+**If structure complete and validated:**
+→ Check `exports/agent_name/STATUS.md` or `IMPLEMENTATION_GUIDE.md`
+→ These files explain implementation options
+→ You may need to add Python functions or MCP tools (not covered by current skills)
+
+**If want to optimize design:**
+→ Proceed to Phase 1.5 (hive-patterns)
+
+**If ready to test:**
+→ Proceed to Phase 2
+
+## Phase 1.5: Optimize Design (Optional)
+
+**Skill**: `/hive-patterns`
+**Input**: Completed agent structure
+
+### When to Use
+
+- Want to add client-facing blocking or feedback edges
+- Need judge patterns for output validation
+- Want fan-out/fan-in (parallel execution)
+- Need error handling patterns
+- Want best practices guidance
+
+### What This Phase Provides
+
+- Client-facing interaction patterns
+- Feedback edge routing with nullable output keys
+- Judge patterns (implicit, SchemaJudge)
+- Fan-out/fan-in parallel execution
+- Context management and spillover patterns
+- Anti-patterns to avoid
+
+**Skip this phase** if your agent design is straightforward.
+
+## Phase 2: Test & Validate
+
+**Skill**: `/hive-test`
+**Input**: Working agent from Phase 1
+
+### What This Phase Does
+
+Guides the creation and execution of a comprehensive test suite:
+- Constraint tests
+- Success criteria tests
+- Edge case tests
+- Integration tests
+
+### Process
+
+1. **Analyze agent** - Read goal, constraints, success criteria
+2. **Generate tests** - The calling agent writes pytest files in `exports/agent_name/tests/` using hive-test guidelines and templates
+3. **User approval** - Review and approve each test
+4. **Run evaluation** - Execute tests and collect results
+5. **Debug failures** - Identify and fix issues
+6. **Iterate** - Repeat until all tests pass
+
+### Outputs
+
+- ✅ Test files in `exports/agent_name/tests/`
+- ✅ Test report with pass/fail metrics
+- ✅ Coverage of all success criteria
+- ✅ Coverage of all constraints
+- ✅ Edge case handling verified
+
+### Success Criteria
+
+You're done when:
+- All tests pass
+- All success criteria validated
+- All constraints verified
+- Agent handles edge cases
+- Test coverage is comprehensive
+
+### Next Steps
+
+**Agent ready for:**
+- Production deployment
+- Integration into larger systems
+- Documentation and handoff
+- Continuous monitoring
+
+## Phase Transitions
+
+### From Phase 1 to Phase 2
+
+**Trigger signals:**
+- "Agent complete: exports/..."
+- Structure validation passes
+- README indicates implementation complete
+
+**Before proceeding:**
+- Verify agent can be imported: `from exports.agent_name import default_agent`
+- Check if implementation is needed (see STATUS.md or IMPLEMENTATION_GUIDE.md)
+- Confirm agent executes without import errors
+
+### Skipping Phases
+
+**When to skip Phase 1:**
+- Agent structure already exists
+- Only need to add tests
+- Modifying existing agent
+
+**When to skip Phase 2:**
+- Prototyping or exploring
+- Agent not production-bound
+- Manual testing sufficient
+
+## Common Patterns
+
+### Pattern 1: Complete New Build (Simple)
+
+```
+User: "Build an agent that monitors files"
+→ Use /hive-create
+→ Agent structure created
+→ Use /hive-test
+→ Tests created and passing
+→ Done: Production-ready agent
+```
+
+### Pattern 1b: Complete New Build (With Learning)
+
+```
+User: "Build an agent (first time)"
+→ Use /hive-concepts (understand concepts)
+→ Use /hive-create (build structure)
+→ Use /hive-patterns (optimize design)
+→ Use /hive-test (validate)
+→ Done: Production-ready agent
+```
+
+### Pattern 1c: Build from Template
+
+```
+User: "Build an agent based on the deep research template"
+→ Use /hive-create
+→ Select "From a template" path
+→ Pick template, name new agent
+→ Review/modify goal, nodes, graph
+→ Agent exported with customizations
+→ Use /hive-test
+→ Done: Customized agent
+```
+
+### Pattern 2: Test Existing Agent
+
+```
+User: "Test my agent at exports/my_agent"
+→ Skip Phase 1
+→ Use /hive-test directly
+→ Tests created
+→ Done: Validated agent
+```
+
+### Pattern 3: Iterative Development
+
+```
+User: "Build an agent"
+→ Use /hive-create (Phase 1)
+→ Implementation needed (see STATUS.md)
+→ [User implements functions]
+→ Use /hive-test (Phase 2)
+→ Tests reveal bugs
+→ [Fix bugs manually]
+→ Re-run tests
+→ Done: Working agent
+```
+
+### Pattern 4: Agent with Review Loops and HITL Checkpoints
+
+```
+User: "Build an agent with human review and feedback loops"
+→ Use /hive-concepts (learn event loop, client-facing nodes)
+→ Use /hive-create (build structure with feedback edges)
+→ Use /hive-patterns (implement client-facing + feedback patterns)
+→ Use /hive-test (validate review flows and edge routing)
+→ Done: Agent with HITL checkpoints and review loops
+```
+
+## Skill Dependencies
+
+```
+hive (meta-skill)
+    │
+    ├── hive-concepts (foundational)
+    │   ├── Architecture concepts (event loop, judges)
+    │   ├── Node types (event_loop, function)
+    │   ├── Edge routing and priority
+    │   ├── Tool discovery procedures
+    │   └── Workflow overview
+    │
+    ├── hive-create (procedural)
+    │   ├── Creates package structure
+    │   ├── Defines goal
+    │   ├── Adds nodes (event_loop, function)
+    │   ├── Connects edges with priority routing
+    │   ├── Finalizes agent class
+    │   └── Requires: hive-concepts
+    │
+    ├── hive-patterns (reference)
+    │   ├── Client-facing interaction patterns
+    │   ├── Feedback edges and review loops
+    │   ├── Judge patterns (implicit, SchemaJudge)
+    │   ├── Fan-out/fan-in parallel execution
+    │   └── Context management and anti-patterns
+    │
+    ├── hive-credentials (utility)
+    │   ├── Detects missing credentials
+    │   ├── Offers auth method choices (Aden OAuth, direct API key)
+    │   ├── Stores securely in ~/.hive/credentials
+    │   └── Validates with health checks
+    │
+    ├── hive-test (validation)
+    │   ├── Reads agent goal
+    │   ├── Generates tests
+    │   ├── Runs evaluation
+    │   └── Reports results
+    │
+    └── hive-debugger (troubleshooting)
+        ├── Monitors runtime logs (L1/L2/L3)
+        ├── Identifies retry loops, tool failures
+        ├── Categorizes issues (10 categories)
+        └── Provides fix recommendations
+```
+
+## Troubleshooting
+
+### "Agent structure won't validate"
+
+- Check node IDs match between nodes/__init__.py and agent.py
+- Verify all edges reference valid node IDs
+- Ensure entry_node exists in nodes list
+- Run: `PYTHONPATH=exports uv run python -m agent_name validate`
+
+### "Agent has structure but won't run"
+
+- Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
+- Implementation may be needed (Python functions or MCP tools)
+- This is expected - hive-create creates structure, not implementation
+- See implementation guide for completion options
+
+### "Tests are failing"
+
+- Review test output for specific failures
+- Check agent goal and success criteria
+- Verify constraints are met
+- Use `/hive-test` to debug and iterate
+- Fix agent code and re-run tests
+
+### "Agent is failing at runtime"
+
+- Use `/hive-debugger` to analyze runtime logs
+- The debugger identifies retry loops, tool failures, and stalled execution
+- Get actionable fix recommendations with code changes
+- Monitor the agent in real-time during TUI sessions
+
+### "Not sure which phase I'm in"
+
+Run these checks:
+
+```bash
+# Check if agent structure exists
+ls exports/my_agent/agent.py
+
+# Check if it validates
+PYTHONPATH=exports uv run python -m my_agent validate
+
+# Check if tests exist
+ls exports/my_agent/tests/
+
+# If structure exists and validates → Phase 2 (testing)
+# If structure doesn't exist → Phase 1 (building)
+# If tests exist but failing → Debug phase
+```
+
+## Best Practices
+
+### For Phase 1 (Building)
+
+1. **Start with clear requirements** - Know what the agent should do
+2. **Define success criteria early** - Measurable goals drive design
+3. **Keep nodes focused** - One responsibility per node
+4. **Use descriptive names** - Node IDs should explain purpose
+5. **Validate incrementally** - Check structure after each major addition
+
+### For Phase 2 (Testing)
+
+1. **Test constraints first** - Hard requirements must pass
+2. **Mock external dependencies** - Use mock mode for LLMs/APIs
+3. **Cover edge cases** - Test failures, not just success paths
+4. **Iterate quickly** - Fix one test at a time
+5. **Document test patterns** - Future tests follow same structure
+
+### General Workflow
+
+1. **Use version control** - Git commit after each phase
+2. **Document decisions** - Update README with changes
+3. **Keep iterations small** - Build → Test → Fix → Repeat
+4. **Preserve working states** - Tag successful iterations
+5. **Learn from failures** - Failed tests reveal design issues
+
+## Exit Criteria
+
+You're done with the workflow when:
+
+✅ Agent structure validates
+✅ All tests pass
+✅ Success criteria met
+✅ Constraints verified
+✅ Documentation complete
+✅ Agent ready for deployment
+
+## Additional Resources
+
+- **hive-concepts**: See `.claude/skills/hive-concepts/SKILL.md`
+- **hive-create**: See `.claude/skills/hive-create/SKILL.md`
+- **hive-patterns**: See `.claude/skills/hive-patterns/SKILL.md`
+- **hive-test**: See `.claude/skills/hive-test/SKILL.md`
+- **Agent framework docs**: See `core/README.md`
+- **Example agents**: See `exports/` directory
+
+## Summary
+
+This workflow provides a proven path from concept to production-ready agent:
+
+1. **Learn** with `/hive-concepts` → Understand fundamentals (optional)
+2. **Build** with `/hive-create` → Get validated structure
+3. **Optimize** with `/hive-patterns` → Apply best practices (optional)
+4. **Configure** with `/hive-credentials` → Set up API keys (if needed)
+5. **Test** with `/hive-test` → Get verified functionality
+6. **Debug** with `/hive-debugger` → Fix runtime issues (if needed)
+
+The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.
+
+## Skill Selection Guide
+
+**Choose hive-concepts when:**
+- First time building agents
+- Need to understand event loop architecture
+- Validating tool availability
+- Learning about node types, edges, and judges
+
+**Choose hive-create when:**
+- Actually building an agent
+- Have clear requirements
+- Ready to write code
+- Want step-by-step guidance
+- Want to start from an existing template and customize it
+
+**Choose hive-patterns when:**
+- Agent structure complete
+- Need client-facing nodes or feedback edges
+- Implementing review loops or fan-out/fan-in
+- Want judge patterns or context management
+- Want best practices
+
+**Choose hive-test when:**
+- Agent structure complete
+- Ready to validate functionality
+- Need comprehensive test coverage
+- Testing feedback loops, output keys, or fan-out
+
+**Choose hive-debugger when:**
+- Agent is failing or stuck at runtime
+- Seeing retry loops or escalations
+- Tool calls are failing
+- Need to understand why a node isn't completing
+- Want real-time monitoring of agent execution
@@ -0,0 +1,199 @@
+# Example: File Monitor Agent
+
+This example shows the complete /hive workflow in action for building a file monitoring agent.
+
+## Initial Request
+
+```
+User: "Build an agent that monitors ~/Downloads and copies new files to ~/Documents"
+```
+
+## Phase 1: Building (20 minutes)
+
+### Step 1: Create Structure
+
+Agent invokes `/hive-create` skill and:
+
+1. Creates `exports/file_monitor_agent/` package
+2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
+
+**Output**: Package structure visible immediately
+
+### Step 2: Define Goal
+
+```python
+goal = Goal(
+    id="file-monitor-copy",
+    name="Automated File Monitor & Copy",
+    success_criteria=[
+        # 100% detection rate
+        # 100% copy success
+        # 100% conflict resolution
+        # >99% uptime
+    ],
+    constraints=[
+        # Preserve originals
+        # Handle errors gracefully
+        # Track state
+        # Respect permissions
+    ]
+)
+```
+
+**Output**: Goal written to agent.py
+
+### Step 3: Design Nodes
+
+7 nodes approved and written incrementally:
+
+1. `initialize-state` - Set up tracking
+2. `list-downloads` - Scan directory
+3. `identify-new-files` - Find new files
+4. `check-for-new-files` - Router
+5. `copy-files` - Copy with conflict resolution
+6. `update-state` - Mark as processed
+7. `wait-interval` - Sleep between cycles
+
+**Output**: All nodes in nodes/__init__.py
+
+### Step 4: Connect Edges
+
+8 edges connecting the workflow loop:
+
+```
+initialize → list → identify → check
+                                ↓  ↓
+                              copy  wait
+                                ↓    ↑
+                              update ↓
+                                ↓    ↓
+                              wait → list (loop)
+```
+
+**Output**: Edges written to agent.py
+
+### Step 5: Finalize
+
+```bash
+$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
+✓ Agent is valid
+
+$ PYTHONPATH=exports uv run python -m file_monitor_agent info
+Agent: File Monitor & Copy Agent
+Nodes: 7
+Edges: 8
+```
+
+**Phase 1 Complete**: Structure validated ✅
+
+### Status After Phase 1
+
+```
+exports/file_monitor_agent/
+├── __init__.py          ✅ (exports)
+├── __main__.py          ✅ (CLI)
+├── agent.py             ✅ (goal, graph, agent class)
+├── nodes/__init__.py    ✅ (7 nodes)
+├── config.py            ✅ (configuration)
+├── implementations.py   ✅ (Python functions)
+├── README.md            ✅ (documentation)
+├── IMPLEMENTATION_GUIDE.md ✅ (next steps)
+└── STATUS.md            ✅ (current state)
+```
+
+**Note**: Implementation gap exists - data flow needs connection (covered in STATUS.md)
+
+## Phase 2: Testing (25 minutes)
+
+### Step 1: Analyze Agent
+
+Agent invokes `/hive-test` skill and:
+
+1. Reads goal from `exports/file_monitor_agent/agent.py`
+2. Identifies 4 success criteria to test
+3. Identifies 4 constraints to verify
+4. Plans test coverage
+
+### Step 2: Generate Tests
+
+Creates test files:
+
+```
+exports/file_monitor_agent/tests/
+├── conftest.py              (fixtures)
+├── test_constraints.py      (4 constraint tests)
+├── test_success_criteria.py (4 success tests)
+└── test_edge_cases.py       (error handling)
+```
+
+Tests approved incrementally by user.
+
+### Step 3: Run Tests
+
+```bash
+$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/
+
+test_constraints.py::test_preserves_originals     PASSED
+test_constraints.py::test_handles_errors          PASSED
+test_constraints.py::test_tracks_state            PASSED
+test_constraints.py::test_respects_permissions    PASSED
+
+test_success_criteria.py::test_detects_all_files  PASSED
+test_success_criteria.py::test_copies_all_files   PASSED
+test_success_criteria.py::test_resolves_conflicts PASSED
+test_success_criteria.py::test_continuous_run     PASSED
+
+test_edge_cases.py::test_empty_directory          PASSED
+test_edge_cases.py::test_permission_denied        PASSED
+test_edge_cases.py::test_disk_full                PASSED
+test_edge_cases.py::test_large_files              PASSED
+
+========================== 12 passed in 3.42s ==========================
+```
+
+**Phase 2 Complete**: All tests pass ✅
+
+## Final Output
+
+**Production-Ready Agent:**
+
+```bash
+# Run the agent
+./RUN_AGENT.sh
+
+# Or manually
+PYTHONPATH=exports uv run python -m file_monitor_agent run
+```
+
+**Capabilities:**
+- Monitors ~/Downloads continuously
+- Copies new files to ~/Documents
+- Resolves conflicts with timestamps
+- Handles errors gracefully
+- Tracks processed files
+- Runs as background service
+
+**Total Time**: ~45 minutes from concept to production
+
+## Key Learnings
+
+1. **Incremental building** - Files written immediately, visible throughout
+2. **Validation early** - Structure validated before moving to implementation
+3. **Test-driven** - Tests reveal real behavior
+4. **Documentation included** - README, STATUS, and guides auto-generated
+5. **Repeatable process** - Same workflow for any agent type
+
+## Variations
+
+**For simpler agents:**
+- Fewer nodes (3-5 instead of 7)
+- Simpler workflow (linear instead of looping)
+- Faster build time (10-15 minutes)
+
+**For complex agents:**
+- More nodes (10-15+)
+- Multiple subgraphs
+- Pause/resume points for human-in-the-loop
+- Longer build time (45-60 minutes)
+
+The workflow scales to your needs!
@@ -0,0 +1,145 @@
+# Triage Issue Skill
+
+Analyze a GitHub issue, verify claims against the codebase, and close invalid issues with a technical response.
+
+## Trigger
+
+User provides a GitHub issue URL or number, e.g.:
+- `/triage-issue 1970`
+- `/triage-issue https://github.com/adenhq/hive/issues/1970`
+
+## Workflow
+
+### Step 1: Fetch Issue Details
+
+```bash
+gh issue view <number> --repo adenhq/hive --json title,body,state,labels,author
+```
+
+Extract:
+- Title
+- Body (the claim/bug report)
+- Current state
+- Labels
+- Author
+
+If issue is already closed, inform user and stop.
+
+### Step 2: Analyze the Claim
+
+Read the issue body and identify:
+1. **The core claim** - What is the user asserting?
+2. **Technical specifics** - File paths, function names, code snippets mentioned
+3. **Expected behavior** - What do they think should happen?
+4. **Severity claimed** - Security issue? Bug? Feature request?
+
+### Step 3: Investigate the Codebase
+
+For each technical claim:
+1. Find the referenced code using Grep/Glob/Read
+2. Understand the actual implementation
+3. Check if the claim accurately describes the behavior
+4. Look for related tests, documentation, or design decisions
+
+### Step 4: Evaluate Validity
+
+Categorize the issue as one of:
+
+| Category | Action |
+|----------|--------|
+| **Valid Bug** | Do NOT close. Inform user this is a real issue. |
+| **Valid Feature Request** | Do NOT close. Suggest labeling appropriately. |
+| **Misunderstanding** | Prepare technical explanation for why behavior is correct. |
+| **Fundamentally Flawed** | Prepare critique explaining the technical impossibility or design rationale. |
+| **Duplicate** | Find the original issue and prepare duplicate notice. |
+| **Incomplete** | Prepare request for more information. |
+
+### Step 5: Draft Response
+
+For issues to be closed, draft a response that:
+
+1. **Acknowledges the concern** - Don't be dismissive
+2. **Explains the actual behavior** - With code references
+3. **Provides technical rationale** - Why it works this way
+4. **References industry standards** - If applicable
+5. **Offers alternatives** - If there's a better approach for the user
+
+Use this template:
+
+```markdown
+## Analysis
+
+[Brief summary of what was investigated]
+
+## Technical Details
+
+[Explanation with code references]
+
+## Why This Is Working As Designed
+
+[Rationale]
+
+## Recommendation
+
+[What the user should do instead, if applicable]
+
+---
+*This issue was reviewed and closed by the maintainers.*
+```
+
+### Step 6: User Review
+
+Present the draft to the user with:
+
+```
+## Issue #<number>: <title>
+
+**Claim:** <summary of claim>
+
+**Finding:** <valid/invalid/misunderstanding/etc>
+
+**Draft Response:**
+<the markdown response>
+
+---
+Do you want me to post this comment and close the issue?
+```
+
+Use AskUserQuestion with options:
+- "Post and close" - Post comment, close issue
+- "Edit response" - Let user modify the response
+- "Skip" - Don't take action
+
+### Step 7: Execute Action
+
+If user approves:
+
+```bash
+# Post comment
+gh issue comment <number> --repo adenhq/hive --body "<response>"
+
+# Close issue
+gh issue close <number> --repo adenhq/hive --reason "not planned"
+```
+
+Report success with link to the issue.
+
+## Important Guidelines
+
+1. **Never close valid issues** - If there's any merit to the claim, don't close it
+2. **Be respectful** - The reporter took time to file the issue
+3. **Be technical** - Provide code references and evidence
+4. **Be educational** - Help them understand, don't just dismiss
+5. **Check twice** - Make sure you understand the code before declaring something invalid
+6. **Consider edge cases** - Maybe their environment reveals a real issue
+
+## Example Critiques
+
+### Security Misunderstanding
+> "The claim that secrets are exposed in plaintext misunderstands the encryption architecture. While `SecretStr` is used for logging protection, actual encryption is provided by Fernet (AES-128-CBC) at the storage layer. The code path is: serialize → encrypt → write. Only encrypted bytes touch disk."
+
+### Impossible Request
+> "The requested feature would require [X] which violates [fundamental constraint]. This is not a limitation of our implementation but a fundamental property of [technology/protocol]."
+
+### Already Handled
+> "This scenario is already handled by [code reference]. The reporter may be using an older version or misconfigured environment."
@@ -0,0 +1,7 @@
+# Project-level Codex config for Hive.
+# Keep this file minimal: MCP connectivity + skill discovery.
+
+[mcp_servers.agent-builder]
+command = "uv"
+args = ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"]
+cwd = "."
@@ -0,0 +1,20 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core",
+      "env": {
+        "PYTHONPATH": "../tools/src"
+      }
+    },
+    "tools": {
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "tools",
+      "env": {
+        "PYTHONPATH": "src"
+      }
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1,18 @@
+This project uses ruff for Python linting and formatting.
+
+Rules:
+- Line length: 100 characters
+- Python target: 3.11+
+- Use double quotes for strings
+- Sort imports with isort (ruff I rules): stdlib, third-party, first-party (framework), local
+- Combine as-imports
+- Use type hints on all function signatures
+- Use `from __future__ import annotations` for modern type syntax
+- Raise exceptions with `from` in except blocks (B904)
+- No unused imports (F401), no unused variables (F841)
+- Prefer list/dict/set comprehensions over map/filter (C4)
+
+Run `make lint` to auto-fix, `make check` to verify without modifying files.
+Run `make format` to apply ruff formatting.
+
+The ruff config lives in core/pyproject.toml under [tool.ruff].
@@ -11,6 +11,9 @@ indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true

+[*.py]
+indent_size = 4
+
 [*.md]
 trim_trailing_whitespace = false

@@ -0,0 +1,124 @@
+# Normalize line endings for all text files
+* text=auto
+
+# Source code
+*.py text diff=python
+*.js text
+*.ts text
+*.jsx text
+*.tsx text
+*.json text
+*.yaml text
+*.yml text
+*.toml text
+*.ini text
+*.cfg text
+
+# Shell scripts (must use LF)
+*.sh text eol=lf
+quickstart.sh text eol=lf
+
+# PowerShell scripts (Windows-friendly)
+*.ps1 text eol=lf
+*.psm1 text eol=lf
+
+# Windows batch files (must use CRLF)
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Documentation
+*.md text
+*.txt text
+*.rst text
+*.tex text
+
+# Configuration files
+.gitignore text
+.gitattributes text
+.editorconfig text
+Dockerfile text
+docker-compose.yml text
+requirements*.txt text
+pyproject.toml text
+setup.py text
+setup.cfg text
+MANIFEST.in text
+LICENSE text
+README* text
+CHANGELOG* text
+CONTRIBUTING* text
+CODE_OF_CONDUCT* text
+
+# Web files
+*.html text
+*.css text
+*.scss text
+*.sass text
+
+# Data files
+*.xml text
+*.csv text
+*.sql text
+
+# Graphics (binary)
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.svg binary
+*.eps binary
+*.bmp binary
+*.tif binary
+*.tiff binary
+
+# Archives (binary)
+*.zip binary
+*.tar binary
+*.gz binary
+*.bz2 binary
+*.7z binary
+*.rar binary
+
+# Python compiled (binary)
+*.pyc binary
+*.pyo binary
+*.pyd binary
+*.whl binary
+*.egg binary
+
+# System libraries (binary)
+*.so binary
+*.dll binary
+*.dylib binary
+*.lib binary
+*.a binary
+
+# Documents (binary)
+*.pdf binary
+*.doc binary
+*.docx binary
+*.ppt binary
+*.pptx binary
+*.xls binary
+*.xlsx binary
+
+# Fonts (binary)
+*.ttf binary
+*.otf binary
+*.woff binary
+*.woff2 binary
+*.eot binary
+
+# Audio/Video (binary)
+*.mp3 binary
+*.mp4 binary
+*.wav binary
+*.avi binary
+*.mov binary
+*.flv binary
+
+# Database files (binary)
+*.db binary
+*.sqlite binary
+*.sqlite3 binary
@@ -8,7 +8,6 @@
 /hive/ @adenhq/maintainers

 # Infrastructure
-/docker-compose*.yml @adenhq/maintainers
 /.github/ @adenhq/maintainers

 # Documentation
@@ -1,9 +1,10 @@
 ---
 name: Bug Report
 about: Report a bug to help us improve
-title: '[Bug]: '
-labels: bug
+title: "[Bug]: "
+labels: bug, enhancement
 assignees: ''
+
 ---

 ## Describe the Bug
@@ -29,13 +30,12 @@ If applicable, add screenshots to help explain your problem.
 ## Environment

 - OS: [e.g., Ubuntu 22.04, macOS 14]
- Docker version: [e.g., 24.0.0]
- Node version: [e.g., 20.10.0]
- Browser (if applicable): [e.g., Chrome 120]
+- Python version: [e.g., 3.11.0]
+- Docker version (if applicable): [e.g., 24.0.0]

 ## Configuration

-Relevant parts of your `config.yaml` (remove any sensitive data):
+Relevant parts of your agent configuration or environment setup (remove any sensitive data):

 ```yaml
 # paste here
@@ -1,9 +1,10 @@
 ---
 name: Feature Request
 about: Suggest a new feature or enhancement
-title: '[Feature]: '
+title: "[Feature]: "
 labels: enhancement
 assignees: ''
+
 ---

 ## Problem Statement
@@ -0,0 +1,71 @@
+---
+name: Integration Request
+about: Suggest a new integration
+title: "[Integration]:"
+labels: ''
+assignees: ''
+
+---
+
+## Service                                                                                      
+                                                                                                 
+ Name and brief description of the service and what it enables agents to do.                     
+                                                                                                 
+ **Description:** [e.g., "API key for Slack Bot" — short one-liner for the credential spec]      
+                                                                                                 
+ ## Credential Identity                                                                          
+                                                                                                 
+ - **credential_id:** [e.g., `slack`]                                                            
+ - **env_var:** [e.g., `SLACK_BOT_TOKEN`]                                                        
+ - **credential_key:** [e.g., `access_token`, `api_key`, `bot_token`]                            
+                                                                                                 
+ ## Tools                                                                                        
+                                                                                                 
+ Tool function names that require this credential:                                               
+                                                                                                 
+ - [e.g., `slack_send_message`]                                                                  
+ - [e.g., `slack_list_channels`]                                                                 
+                                                                                                 
+ ## Auth Methods                                                                                 
+                                                                                                 
+ - **Direct API key supported:** Yes / No                                                        
+ - **Aden OAuth supported:** Yes / No                                                            
+                                                                                                 
+ If Aden OAuth is supported, describe the OAuth scopes/permissions required.                     
+                                                                                                 
+ ## How to Get the Credential                                                                    
+                                                                                                 
+ Link where users obtain the key/token:                                                          
+                                                                                                 
+ [e.g., https://api.slack.com/apps]                                                              
+                                                                                                 
+ Step-by-step instructions:                                                                      
+                                                                                                 
+ 1. Go to ...                                                                                    
+ 2. Create a ...                                                                                 
+ 3. Select scopes/permissions: ...                                                               
+ 4. Copy the key/token                                                                           
+                                                                                                 
+ ## Health Check                                                                                 
+                                                                                                 
+ A lightweight API call to validate the credential (no writes, no charges).                      
+                                                                                                 
+ - **Endpoint:** [e.g., `https://slack.com/api/auth.test`]                                       
+ - **Method:** [e.g., `GET` or `POST`]                                                           
+ - **Auth header:** [e.g., `Authorization: Bearer {token}` or `X-Api-Key: {key}`]                
+ - **Parameters (if any):** [e.g., `?limit=1`]                                                   
+ - **200 means:** [e.g., key is valid]                                                           
+ - **401 means:** [e.g., invalid or expired]                                                     
+ - **429 means:** [e.g., rate limited but key is valid]                                          
+                                                                                                 
+ ## Credential Group                                                                             
+                                                                                                 
+ Does this require multiple credentials configured together? (e.g., Google Custom Search needs   
+ both an API key and a CSE ID)                                                                   
+                                                                                                 
+ - [ ] No, single credential                                                                     
+ - [ ] Yes — list the other credential IDs in the group:                                         
+                                                                                                 
+ ## Additional Context                                                                           
+                                                                                                 
+ Links to API docs, rate limits, free tier availability, or anything else relevant.
@@ -24,8 +24,8 @@ Fixes #(issue number)

 Describe the tests you ran to verify your changes:

- [ ] Unit tests pass (`npm run test`)
- [ ] Lint passes (`npm run lint`)
+- [ ] Unit tests pass (`cd core && pytest tests/`)
+- [ ] Lint passes (`cd core && ruff check .`)
 - [ ] Manual testing performed

 ## Checklist
@@ -0,0 +1,34 @@
+name: Auto-close duplicate issues
+description: Auto-closes issues that are duplicates of existing issues
+on:
+  schedule:
+    - cron: "0 */6 * * *"
+  workflow_dispatch:
+
+jobs:
+  auto-close-duplicates:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Run auto-close-duplicates tests
+        run: bun test scripts/auto-close-duplicates
+
+      - name: Auto-close duplicate issues
+        run: bun run scripts/auto-close-duplicates.ts
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
+          GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
+          STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
@@ -12,84 +12,123 @@ concurrency:

 jobs:
  lint:
-    name: Lint
+    name: Lint Python
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: npm ci
+        run: uv sync --project core --group dev

-      - name: Run linter
-        run: npm run lint
+      - name: Ruff lint
+        run: |
+          uv run --project core ruff check core/
+          uv run --project core ruff check tools/
+
+      - name: Ruff format
+        run: |
+          uv run --project core ruff format --check core/
+          uv run --project core ruff format --check tools/

  test:
-    name: Test
+    name: Test Python Framework
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
+        run: |
+          cd core
+          uv sync
+          uv run pytest tests/ -v
+
+  test-tools:
+    name: Test Tools
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
+        run: |
+          cd tools
+          uv sync --extra dev
+          uv run pytest tests/ -v
+
+  validate:
+    name: Validate Agent Exports
+    runs-on: ubuntu-latest
+    needs: [lint, test, test-tools]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: npm ci
+        run: |
+          cd core
+          uv sync

-      - name: Run tests
-        run: npm run test
+      - name: Validate exported agents
+        run: |
+          # Check that agent exports have valid structure
+          if [ ! -d "exports" ]; then
+            echo "No exports/ directory found, skipping validation"
+            exit 0
+          fi

-  build:
-    name: Build
-    runs-on: ubuntu-latest
-    needs: [lint, test]
-    steps:
-      - uses: actions/checkout@v4
+          shopt -s nullglob
+          agent_dirs=(exports/*/)
+          shopt -u nullglob

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '20'
-          cache: 'npm'
+          if [ ${#agent_dirs[@]} -eq 0 ]; then
+            echo "No agent directories in exports/, skipping validation"
+            exit 0
+          fi

-      - name: Install dependencies
-        run: npm ci
+          validated=0
+          for agent_dir in "${agent_dirs[@]}"; do
+            if [ -f "$agent_dir/agent.json" ]; then
+              echo "Validating $agent_dir"
+              uv run python -c "import json; json.load(open('$agent_dir/agent.json'))"
+              validated=$((validated + 1))
+            fi
+          done

-      - name: Build packages
-        run: npm run build
-
-  docker:
-    name: Docker Build
-    runs-on: ubuntu-latest
-    needs: [lint, test]
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build frontend image
-        uses: docker/build-push-action@v5
-        with:
-          context: ./honeycomb
-          push: false
-          tags: honeycomb-frontend:test
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: Build backend image
-        uses: docker/build-push-action@v5
-        with:
-          context: ./hive
-          push: false
-          tags: honeycomb-backend:test
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+          if [ "$validated" -eq 0 ]; then
+            echo "No agent.json files found in exports/, skipping validation"
+          else
+            echo "Validated $validated agent(s)"
+          fi
@@ -0,0 +1,103 @@
+name: Issue Triage
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  triage:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      issues: write
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Triage and check for duplicates
+        uses: anthropics/claude-code-action@v1
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          allowed_non_write_users: "*"
+          prompt: |
+            Analyze this new issue and perform triage tasks.
+
+            Issue: #${{ github.event.issue.number }}
+            Repository: ${{ github.repository }}
+
+            ## Your Tasks:
+
+            ### 1. Get issue details
+            Use mcp__github__get_issue to get the full details of issue #${{ github.event.issue.number }}
+
+            ### 2. Check for duplicates
+            Search for similar existing issues using mcp__github__search_issues with relevant keywords from the issue title and body.
+
+            Criteria for duplicates:
+            - Same bug or error being reported
+            - Same feature request (even if worded differently)
+            - Same question being asked
+            - Issues describing the same root problem
+
+            If you find a duplicate:
+            - Add a comment using EXACTLY this format (required for auto-close to work):
+              "Found a possible duplicate of #<issue_number>: <brief explanation of why it's a duplicate>"
+            - Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
+            - Suggest the user react with a thumbs-down if they disagree
+
+            ### 3. Check for Low-Quality / AI Spam
+            Analyze the issue quality. We are receiving many low-effort, AI-generated spam issues.
+            Flag the issue as INVALID if it matches these criteria:
+            - **Vague/Generic**: Title is "Fix bug" or "Error" without specific context.
+            - **Hallucinated**: Refers to files or features that do not exist in this repo.
+            - **Template Filler**: Body contains "Insert description here" or unrelated gibberish.
+            - **Low Effort**: No reproduction steps, no logs, only 1-2 sentences.
+
+            If identified as spam/low-quality:
+            - Add the "invalid" label.
+            - Add a comment:
+              "This issue has been automatically flagged as low-quality or potentially AI-generated spam. It lacks specific details (logs, reproduction steps, file references) required for us to help. Please open a new issue following the template exactly if this is a legitimate request."
+            - Do NOT proceed to other steps.
+
+            ### 4. Check for invalid issues (General)
+            If the issue is not spam but still lacks information:
+            - Add the "invalid" label
+            - Comment asking for clarification
+
+            ### 5. Categorize with labels (if NOT a duplicate or spam)
+            Apply appropriate labels based on the issue content. Use ONLY these labels:
+            - bug: Something isn't working
+            - enhancement: New feature or request
+            - question: Further information is requested
+            - documentation: Improvements or additions to documentation
+            - good first issue: Good for newcomers (if issue is well-defined and small scope)
+            - help wanted: Extra attention is needed (if issue needs community input)
+            - backlog: Tracked for the future, but not currently planned or prioritized
+
+            ### 6. Estimate size (if NOT a duplicate, spam, or invalid)
+            Apply exactly ONE size label to help contributors match their capacity to the task:
+            - "size: small": Docs, typos, single-file fixes, config changes
+            - "size: medium": Bug fixes with tests, adding a single tool, changes within one package
+            - "size: large": Cross-package changes (core + tools), new modules, complex logic, architectural refactors
+
+            You may apply multiple labels if appropriate (e.g., "bug", "size: small", and "good first issue").
+
+            ## Tools Available:
+            - mcp__github__get_issue: Get issue details
+            - mcp__github__search_issues: Search for similar issues
+            - mcp__github__list_issues: List recent issues if needed
+            - mcp__github__add_issue_comment: Add a comment
+            - mcp__github__update_issue: Add labels
+            - mcp__github__get_issue_comments: Get existing comments
+
+            Be thorough but efficient. Focus on accurate categorization and finding true duplicates.
+
+          claude_args: |
+            --model claude-haiku-4-5-20251001
+            --allowedTools "mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues,mcp__github__add_issue_comment,mcp__github__update_issue,mcp__github__get_issue_comments"
@@ -0,0 +1,204 @@
+name: PR Check Command
+
+on:
+  issue_comment:
+    types: [created]
+
+jobs:
+  check-pr:
+    # Only run on PR comments that start with /check
+    if: github.event.issue.pull_request && startsWith(github.event.comment.body, '/check')
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+      checks: write
+      statuses: write
+
+    steps:
+      - name: Check PR requirements
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.issue.number;
+            console.log(`Triggered by /check comment on PR #${prNumber}`);
+
+            // Fetch PR data
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: prNumber,
+            });
+
+            const prBody = pr.body || '';
+            const prTitle = pr.title || '';
+            const prAuthor = pr.user.login;
+            const headSha = pr.head.sha;
+
+            // Create a check run in progress
+            const { data: checkRun } = await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: 'check-requirements',
+              head_sha: headSha,
+              status: 'in_progress',
+              started_at: new Date().toISOString(),
+            });
+
+            // Extract issue numbers
+            const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+            const allText = `${prTitle} ${prBody}`;
+            const matches = [...allText.matchAll(issuePattern)];
+            const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+            console.log(`PR #${prNumber}:`);
+            console.log(`  Author: ${prAuthor}`);
+            console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+            if (issueNumbers.length === 0) {
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description
+
+            **Why is this required?** See #472 for details.`;
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: message,
+              });
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              // Update check run to failure
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'failure',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'Missing linked issue',
+                  summary: 'PR must reference an issue (e.g., `Fixes #123`)',
+                },
+              });
+
+              core.setFailed('PR must reference an issue');
+              return;
+            }
+
+            // Check if PR author is assigned to any linked issue
+            let issueWithAuthorAssigned = null;
+            let issuesWithoutAuthor = [];
+
+            for (const issueNum of issueNumbers) {
+              try {
+                const { data: issue } = await github.rest.issues.get({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: issueNum,
+                });
+
+                const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                if (assigneeLogins.includes(prAuthor)) {
+                  issueWithAuthorAssigned = issueNum;
+                  console.log(`  Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+                  break;
+                } else {
+                  issuesWithoutAuthor.push({
+                    number: issueNum,
+                    assignees: assigneeLogins
+                  });
+                  console.log(`  Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'}`);
+                }
+              } catch (error) {
+                console.log(`  Issue #${issueNum} not found`);
+              }
+            }
+
+            if (!issueWithAuthorAssigned) {
+              const issueList = issuesWithoutAuthor.map(i =>
+                `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+              ).join(', ');
+
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR
+
+            **Why is this required?** See #472 for details.`;
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: message,
+              });
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              // Update check run to failure
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'failure',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'PR author not assigned to issue',
+                  summary: `PR author @${prAuthor} must be assigned to one of the linked issues: ${issueList}`,
+                },
+              });
+
+              core.setFailed('PR author must be assigned to the linked issue');
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: `✅ PR requirements met! Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+              });
+
+              // Update check run to success
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'success',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'Requirements met',
+                  summary: `Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+                },
+              });
+
+              console.log(`PR requirements met!`);
+            }
@@ -0,0 +1,138 @@
+name: PR Requirements Backfill
+
+on:
+  workflow_dispatch:
+
+jobs:
+  check-all-open-prs:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Check all open PRs
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { data: pullRequests } = await github.rest.pulls.list({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              per_page: 100,
+            });
+
+            console.log(`Found ${pullRequests.length} open PRs`);
+
+            for (const pr of pullRequests) {
+              const prNumber = pr.number;
+              const prBody = pr.body || '';
+              const prTitle = pr.title || '';
+              const prAuthor = pr.user.login;
+
+              console.log(`\nChecking PR #${prNumber}: ${prTitle}`);
+
+              // Extract issue numbers from body and title
+              const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+              const allText = `${prTitle} ${prBody}`;
+              const matches = [...allText.matchAll(issuePattern)];
+              const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+              console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+              if (issueNumbers.length === 0) {
+                console.log(`  ❌ No linked issue - closing PR`);
+
+                const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description`;
+
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+
+                await github.rest.pulls.update({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  pull_number: prNumber,
+                  state: 'closed',
+                });
+
+                continue;
+              }
+
+              // Check if any linked issue has the PR author as assignee
+              let issueWithAuthorAssigned = null;
+              let issuesWithoutAuthor = [];
+
+              for (const issueNum of issueNumbers) {
+                try {
+                  const { data: issue } = await github.rest.issues.get({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    issue_number: issueNum,
+                  });
+
+                  const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                  if (assigneeLogins.includes(prAuthor)) {
+                    issueWithAuthorAssigned = issueNum;
+                    break;
+                  } else {
+                    issuesWithoutAuthor.push({
+                      number: issueNum,
+                      assignees: assigneeLogins
+                    });
+                  }
+                } catch (error) {
+                  console.log(`  Issue #${issueNum} not found or inaccessible`);
+                }
+              }
+
+              if (!issueWithAuthorAssigned) {
+                const issueList = issuesWithoutAuthor.map(i =>
+                  `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+                ).join(', ');
+
+                console.log(`  ❌ PR author not assigned to any linked issue - closing PR`);
+
+                const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR`;
+
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+
+                await github.rest.pulls.update({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  pull_number: prNumber,
+                  state: 'closed',
+                });
+              } else {
+                console.log(`  ✅ PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+              }
+            }
+
+            console.log('\nBackfill complete!');
@@ -0,0 +1,189 @@
+name: PR Requirements Check
+
+on:
+  pull_request_target:
+    types: [opened, reopened, edited, synchronize]
+
+jobs:
+  check-requirements:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Check PR has linked issue with assignee
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = context.payload.pull_request;
+            const prNumber = pr.number;
+            const prBody = pr.body || '';
+            const prTitle = pr.title || '';
+            const prLabels = (pr.labels || []).map(l => l.name);
+
+            // Allow micro-fix and documentation PRs without a linked issue
+            const isMicroFix = prLabels.includes('micro-fix') || /micro-fix/i.test(prTitle);
+            const isDocumentation = prLabels.includes('documentation') || /\bdocs?\b/i.test(prTitle);
+            if (isMicroFix || isDocumentation) {
+              const reason = isMicroFix ? 'micro-fix' : 'documentation';
+              console.log(`PR #${prNumber} is a ${reason}, skipping issue requirement.`);
+              return;
+            }
+
+            // Extract issue numbers from body and title
+            // Matches: fixes #123, closes #123, resolves #123, or plain #123
+            const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+
+            const allText = `${prTitle} ${prBody}`;
+            const matches = [...allText.matchAll(issuePattern)];
+            const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+            console.log(`PR #${prNumber}:`);
+            console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+            if (issueNumbers.length === 0) {
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description
+
+            **Exception:** To bypass this requirement, you can:
+            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
+            **Why is this required?** See #472 for details.`;
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+              });
+
+              const botComment = comments.data.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              );
+
+              if (!botComment) {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+              }
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              core.setFailed('PR must reference an issue');
+              return;
+            }
+
+            // Check if any linked issue has the PR author as assignee
+            const prAuthor = pr.user.login;
+            let issueWithAuthorAssigned = null;
+            let issuesWithoutAuthor = [];
+
+            for (const issueNum of issueNumbers) {
+              try {
+                const { data: issue } = await github.rest.issues.get({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: issueNum,
+                });
+
+                const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                if (assigneeLogins.includes(prAuthor)) {
+                  issueWithAuthorAssigned = issueNum;
+                  console.log(`  Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+                  break;
+                } else {
+                  issuesWithoutAuthor.push({
+                    number: issueNum,
+                    assignees: assigneeLogins
+                  });
+                  console.log(`  Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'} (PR author: ${prAuthor})`);
+                }
+              } catch (error) {
+                console.log(`  Issue #${issueNum} not found or inaccessible`);
+              }
+            }
+
+            if (!issueWithAuthorAssigned) {
+              const issueList = issuesWithoutAuthor.map(i =>
+                `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+              ).join(', ');
+
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR
+
+            **Exception:** To bypass this requirement, you can:
+            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
+            **Why is this required?** See #472 for details.`;
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+              });
+
+              const botComment = comments.data.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              );
+
+              if (!botComment) {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+              }
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              core.setFailed('PR author must be assigned to the linked issue');
+            } else {
+              console.log(`PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+            }
@@ -7,7 +7,6 @@ on:

 permissions:
  contents: write
-  packages: write

 jobs:
  release:
@@ -18,20 +17,23 @@ jobs:
        with:
          fetch-depth: 0

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: npm ci
-
-      - name: Build packages
-        run: npm run build
+        run: |
+          cd core
+          uv sync

      - name: Run tests
-        run: npm run test
+        run: |
+          cd core
+          uv run pytest tests/ -v

      - name: Generate changelog
        id: changelog
@@ -46,50 +48,3 @@ jobs:
          generate_release_notes: true
          draft: false
          prerelease: ${{ contains(github.ref, '-') }}
-
-  docker-publish:
-    name: Publish Docker Images
-    runs-on: ubuntu-latest
-    needs: release
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ghcr.io/${{ github.repository }}/frontend
-            ghcr.io/${{ github.repository }}/backend
-          tags: |
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}.{{minor}}
-            type=semver,pattern={{major}}
-
-      - name: Build and push frontend
-        uses: docker/build-push-action@v5
-        with:
-          context: ./honeycomb
-          push: true
-          tags: ghcr.io/${{ github.repository }}/frontend:${{ github.ref_name }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: Build and push backend
-        uses: docker/build-push-action@v5
-        with:
-          context: ./hive
-          push: true
-          tags: ghcr.io/${{ github.repository }}/backend:${{ github.ref_name }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
@@ -5,15 +5,14 @@ node_modules/
 # Build outputs
 dist/
 build/
+workdir/
 .next/
 out/

-# Environment files (generated from config.yaml)
+# Environment files
 .env
 .env.local
 .env.*.local
-honeycomb/.env
-hive/.env

 # User configuration (copied from .example)
 config.yaml
@@ -43,12 +42,38 @@ pnpm-debug.log*
 # Testing
 coverage/
 .nyc_output/
+.pytest_cache/

 # TypeScript
 *.tsbuildinfo

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+.eggs/
+*.egg
+
+# Generated runtime data
+core/data/
+
 # Misc
 *.local
 .cache/
 tmp/
 temp/
+
+exports/*
+
+.agent-builder-sessions/*
+
+.claude/settings.local.json
+
+.venv
+
+docs/github-issues/*
+core/tests/*dumps/*
+
+screenshots/*
+
@@ -0,0 +1,9 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "uv",
+      "args": ["run", "-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core"
+    }
+  }
+}
@@ -0,0 +1,30 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "uv",
+      "args": [
+        "run",
+        "python",
+        "-m",
+        "framework.mcp.agent_builder_server"
+      ],
+      "cwd": "core",
+      "env": {
+        "PYTHONPATH": "../tools/src"
+      }
+    },
+    "tools": {
+      "command": "uv",
+      "args": [
+        "run",
+        "python",
+        "mcp_server.py",
+        "--stdio"
+      ],
+      "cwd": "tools",
+      "env": {
+        "PYTHONPATH": "src"
+      }
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-debugger
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1 @@
+../../.claude/skills/triage-issue
@@ -0,0 +1,18 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.0
+    hooks:
+      - id: ruff
+        name: ruff lint (core)
+        args: [--fix]
+        files: ^core/
+      - id: ruff
+        name: ruff lint (tools)
+        args: [--fix]
+        files: ^tools/
+      - id: ruff-format
+        name: ruff format (core)
+        files: ^core/
+      - id: ruff-format
+        name: ruff format (tools)
+        files: ^tools/
@@ -0,0 +1 @@
+3.11
@@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "charliermarsh.ruff",
+    "editorconfig.editorconfig",
+    "ms-python.python"
+  ]
+}
@@ -1,40 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [Unreleased]
-
-### Added
- Initial project structure
- React frontend (honeycomb) with Vite and TypeScript
- Node.js backend (hive) with Express and TypeScript
- Docker Compose configuration for local development
- Configuration system via `config.yaml`
- GitHub Actions CI/CD workflows
- Comprehensive documentation
-
-### Changed
- N/A
-
-### Deprecated
- N/A
-
-### Removed
- N/A
-
-### Fixed
- N/A
-
-### Security
- N/A
-
-## [0.1.0] - 2025-01-13
-
-### Added
- Initial release
-
-[Unreleased]: https://github.com/adenhq/beeline/compare/v0.1.0...HEAD
-[0.1.0]: https://github.com/adenhq/beeline/releases/tag/v0.1.0
@@ -1,37 +1,70 @@
-# Contributing to Beeline
+# Contributing to Aden Agent Framework

-Thank you for your interest in contributing to Beeline! This document provides guidelines and information for contributors.
+Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. We’re especially looking for help building tools, integrations ([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If you’re interested in extending its functionality, this is the perfect place to start. 

 ## Code of Conduct

-By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
+By participating in this project, you agree to abide by our [Code of Conduct](docs/CODE_OF_CONDUCT.md).
+
+## Issue Assignment Policy
+
+To prevent duplicate work and respect contributors' time, we require issue assignment before submitting PRs.
+
+### How to Claim an Issue
+
+1. **Find an Issue:** Browse existing issues or create a new one
+2. **Claim It:** Leave a comment (e.g., *"I'd like to work on this!"*)
+3. **Wait for Assignment:** A maintainer will assign you within 24 hours. Issues with reproducible steps or proposals are prioritized.
+4. **Submit Your PR:** Once assigned, you're ready to contribute
+
+> **Note:** PRs for unassigned issues may be delayed or closed if someone else was already assigned.
+
+### Exceptions (No Assignment Needed)
+
+You may submit PRs without prior assignment for:
+- **Documentation:** Fixing typos or clarifying instructions — add the `documentation` label or include `doc`/`docs` in your PR title to bypass the linked issue requirement
+- **Micro-fixes:** Add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement. Micro-fixes must meet **all** qualification criteria:
+
+  | Qualifies | Disqualifies |
+  |-----------|--------------|
+  | < 20 lines changed | Any functional bug fix |
+  | Typos & Documentation & Linting | Refactoring for "clean code" |
+  | No logic/API/DB changes | New features (even tiny ones) |

 ## Getting Started

 1. Fork the repository
-2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/beeline.git`
-3. Create a feature branch: `git checkout -b feature/your-feature-name`
-4. Make your changes
-5. Run tests: `npm run test`
-6. Commit your changes following our commit conventions
-7. Push to your fork and submit a Pull Request
+2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
+3. Add the upstream repository: `git remote add upstream https://github.com/adenhq/hive.git`
+4. Sync with upstream to ensure you're starting from the latest code:
+   ```bash
+   git fetch upstream
+   git checkout main
+   git merge upstream/main
+   ```
+5. Create a feature branch: `git checkout -b feature/your-feature-name`
+6. Make your changes
+7. Run checks and tests:
+   ```bash
+   make check    # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
+   make test     # Core tests (cd core && pytest tests/ -v)
+   ```
+8. Commit your changes following our commit conventions
+9. Push to your fork and submit a Pull Request

 ## Development Setup

 ```bash
-# Install dependencies
-npm install
-
-# Copy configuration
-cp config.yaml.example config.yaml
-
-# Generate environment files
-npm run setup
-
-# Start development environment
-docker compose up
+# Install Python packages and verify setup
+./quickstart.sh
 ```

+> **Windows Users:**  
+> If you are on native Windows, it is recommended to use **WSL (Windows Subsystem for Linux)**.  
+> Alternatively, make sure to run PowerShell or Git Bash with Python 3.11+ installed, and disable "App Execution Aliases" in Windows settings.
+
+> **Tip:** Installing Claude Code skills is optional for running existing agents, but required if you plan to **build new agents**.
+
 ## Commit Convention

 We follow [Conventional Commits](https://www.conventionalcommits.org/):
@@ -62,10 +95,10 @@ docs(readme): update installation instructions

 ## Pull Request Process

-1. Update documentation if needed
-2. Add tests for new functionality
-3. Ensure all tests pass
-4. Update the CHANGELOG.md if applicable
+1. **Get assigned to the issue first** (see [Issue Assignment Policy](#issue-assignment-policy))
+2. Update documentation if needed
+3. Add tests for new functionality
+4. Ensure `make check` and `make test` pass
 5. Request review from maintainers

 ### PR Title Format
@@ -77,30 +110,53 @@ feat(component): add new feature description

 ## Project Structure

- `honeycomb/` - React frontend application
- `hive/` - Node.js backend API
+- `core/` - Core framework (agent runtime, graph executor, protocols)
+- `tools/` - MCP Tools Package (tools for agent capabilities)
+- `exports/` - Agent packages and examples
 - `docs/` - Documentation
 - `scripts/` - Build and utility scripts
+- `.claude/` - Claude Code skills for building/testing agents

 ## Code Style

- Use TypeScript for all new code
- Follow existing code patterns
+- Use Python 3.11+ for all new code
+- Follow PEP 8 style guide
+- Add type hints to function signatures
+- Write docstrings for classes and public functions
 - Use meaningful variable and function names
- Add comments for complex logic
 - Keep functions focused and small

 ## Testing

-```bash
-# Run all tests
-npm run test
+> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
+>
+> ```bash
+> PYTHONPATH=exports uv run python -m agent_name test
+> ```

-# Run tests for a specific package
-npm run test --workspace=honeycomb
-npm run test --workspace=hive
+```bash
+# Run lint and format checks (mirrors CI lint job)
+make check
+
+# Run core framework tests (mirrors CI test job)
+make test
+
+# Or run tests directly
+cd core && pytest tests/ -v
+
+# Run tools package tests (when contributing to tools/)
+cd tools && uv run pytest tests/ -v
+
+# Run tests for a specific agent
+PYTHONPATH=exports uv run python -m agent_name test
 ```

+> **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
+
+## Contributor License Agreement
+
+By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
+
 ## Questions?

 Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).
@@ -0,0 +1,28 @@
+.PHONY: lint format check test install-hooks help
+
+help: ## Show this help
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
+		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'
+
+lint: ## Run ruff linter and formatter (with auto-fix)
+	cd core && ruff check --fix .
+	cd tools && ruff check --fix .
+	cd core && ruff format .
+	cd tools && ruff format .
+
+format: ## Run ruff formatter
+	cd core && ruff format .
+	cd tools && ruff format .
+
+check: ## Run all checks without modifying files (CI-safe)
+	cd core && ruff check .
+	cd tools && ruff check .
+	cd core && ruff format --check .
+	cd tools && ruff format --check .
+
+test: ## Run all tests
+	cd core && uv run python -m pytest tests/ -v
+
+install-hooks: ## Install pre-commit hooks
+	uv pip install pre-commit
+	pre-commit install
@@ -1,115 +1,374 @@
-# Beeline
-
-Beeline Instrumentation for your AI agents
-
 <p align="center">
-  <img width="100%" alt="Beeline Banner" src="https://storage.googleapis.com/aden-prod-assets/website/title-card.png" />
+  <img width="100%" alt="Hive Banner" src="https://github.com/user-attachments/assets/a027429b-5d3c-4d34-88e4-0feaeaabbab3" />
 </p>

-[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/beeline/blob/main/LICENSE)
-[![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
-[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/beeline?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
-[![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
-[![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
-[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
+<p align="center">
+  <a href="README.md">English</a> |
+  <a href="docs/i18n/zh-CN.md">简体中文</a> |
+  <a href="docs/i18n/es.md">Español</a> |
+  <a href="docs/i18n/hi.md">हिन्दी</a> |
+  <a href="docs/i18n/pt.md">Português</a> |
+  <a href="docs/i18n/ja.md">日本語</a> |
+  <a href="docs/i18n/ru.md">Русский</a> |
+  <a href="docs/i18n/ko.md">한국어</a>
+</p>
+
+<p align="center">
+  <a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
+  <a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
+  <a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
+  <a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
+  <a href="https://www.linkedin.com/company/teamaden/"><img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff" alt="LinkedIn" /></a>
+  <img src="https://img.shields.io/badge/MCP-102_Tools-00ADD8?style=flat-square" alt="MCP" />
+</p>
+
+<p align="center">
+  <img src="https://img.shields.io/badge/AI_Agents-Self--Improving-brightgreen?style=flat-square" alt="AI Agents" />
+  <img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
+  <img src="https://img.shields.io/badge/Headless-Development-purple?style=flat-square" alt="Headless" />
+  <img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
+  <img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
+</p>
+<p align="center">
+  <img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
+  <img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
+  <img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
+</p>

 ## Overview

-Beeline provides advanced runtime control for your AI agents, enabling you to observe, intervene, and dynamically adjust agent behavior as it executes. By giving you real-time visibility and control, Beeline helps you build more reliable AI systems—catching and correcting issues during execution rather than reacting after failures occur.
+Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

+https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
+
+## Who Is Hive For?
+
+Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
+
+Hive is a good fit if you:
+
+- Want AI agents that **execute real business processes**, not demos
+- Prefer **goal-driven development** over hardcoded workflows
+- Need **self-healing and adaptive agents** that improve over time
+- Require **human-in-the-loop control**, observability, and cost limits
+- Plan to run agents in **production environments**
+
+Hive may not be the best fit if you’re only experimenting with simple agent chains or one-off scripts.
+
+## When Should You Use Hive?
+
+Use Hive when you need:
+
+- Long-running, autonomous agents
+- Strong guardrails, process, and controls
+- Continuous improvement based on failures
+- Multi-agent coordination
+- A framework that evolves with your goals
+
 ## Quick Links

 - **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
- **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Beeline on your infrastructure
- **[Changelog](https://github.com/adenhq/beeline/releases)** - Latest updates and releases
-<!-- - **[Roadmap](https://adenhq.com/roadmap)** - Upcoming features and plans -->
- **[Report Issues](https://github.com/adenhq/beeline/issues)** - Bug reports and feature requests
+- **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
+- **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
+- **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
+- **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
+- **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs

 ## Quick Start

 ### Prerequisites

- [Docker](https://docs.docker.com/get-docker/) (v20.10+)
- [Docker Compose](https://docs.docker.com/compose/install/) (v2.0+)
+- Python 3.11+ for agent development
+- Claude Code, Codex CLI, or Cursor for utilizing agent skills
+
+> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.

 ### Installation

 ```bash
 # Clone the repository
-git clone https://github.com/adenhq/beeline.git
-cd beeline
+git clone https://github.com/adenhq/hive.git
+cd hive

-# Copy and configure
-cp config.yaml.example config.yaml
-
-# Run setup and start services
-npm run setup
-docker compose up
+# Run quickstart setup
+./quickstart.sh
 ```

-**Access the application:**
+This sets up:

- Dashboard: http://localhost:3000
- API: http://localhost:4000
- Health: http://localhost:4000/health
+- **framework** - Core agent runtime and graph executor (in `core/.venv`)
+- **aden_tools** - MCP tools for agent capabilities (in `tools/.venv`)
+- **credential store** - Encrypted API key storage (`~/.hive/credentials`)
+- **LLM provider** - Interactive default model configuration
+- All required Python dependencies with `uv`
+
+### Build Your First Agent
+
+```bash
+# Build an agent using Claude Code
+claude> /hive
+
+# Test your agent
+claude> /hive-debugger
+
+# (at separate terminal) Launch the interactive dashboard
+hive tui
+
+# Or run directly
+hive run exports/your_agent_name --input '{"key": "value"}'
+```
+##  Coding Agent Support
+### Codex CLI
+Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
+
+1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
+2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
+3. **Launch:** Run `codex` in the repo root, then type `use hive`
+
+Example:
+```
+codex> use hive
+```
+
+### Opencode 
+Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
+
+1. **Setup:** Run the quickstart script 
+2. **Launch:** Open Opencode in the project root.
+3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
+4. **Verify:** Ask the agent *"List your tools"* to confirm the connection.
+
+The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
+
+**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
+
+### Antigravity IDE Support
+
+Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
+
+```bash
+./scripts/setup-antigravity-mcp.sh
+```
+
+**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.

 ## Features

- **Observe** - Real-time visibility into agent execution, decisions, and performance
- **Metrics & Analytics** - Track costs, latency, and token usage with TimescaleDB
- **Cost Control** - Set budgets and policies to manage LLM spending
- **Real-time Events** - WebSocket streaming for live agent monitoring
- **Self-Hostable** - Deploy on your own infrastructure with full control
- **Production-Ready** - Built for scale and reliability
+- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
+- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
+- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
+- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
+- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
+- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
+- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
+- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
+- **Production-Ready** - Self-hostable, built for scale and reliability

-## Project Structure
+## Integration

-```
-beeline/
-├── honeycomb/          # Frontend (React + TypeScript + Vite)
-├── hive/               # Backend (Node.js + TypeScript + Express)
-├── docs/               # Documentation
-├── scripts/            # Build and utility scripts
-├── config.yaml.example # Configuration template
-└── docker-compose.yml  # Container orchestration
+<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
+
+Hive is built to be model-agnostic and system-agnostic.
+
+- **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
+- **Business system connectivity** - Hive Framework is designed to connect to all kinds of business systems as tools, such as CRM, support, messaging, data, file, and internal APIs via MCP.
+
+
+## Why Aden
+
+Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.
+
+```mermaid
+flowchart LR
+    GOAL["Define Goal"] --> GEN["Auto-Generate Graph"]
+    GEN --> EXEC["Execute Agents"]
+    EXEC --> MON["Monitor & Observe"]
+    MON --> CHECK{{"Pass?"}}
+    CHECK -- "Yes" --> DONE["Deliver Result"]
+    CHECK -- "No" --> EVOLVE["Evolve Graph"]
+    EVOLVE --> EXEC
+
+    GOAL -.- V1["Natural Language"]
+    GEN -.- V2["Instant Architecture"]
+    EXEC -.- V3["Easy Integrations"]
+    MON -.- V4["Full visibility"]
+    EVOLVE -.- V5["Adaptability"]
+    DONE -.- V6["Reliable outcomes"]
+
+    style GOAL fill:#ffbe42,stroke:#cc5d00,stroke-width:2px,color:#333
+    style GEN fill:#ffb100,stroke:#cc5d00,stroke-width:2px,color:#333
+    style EXEC fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style MON fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style CHECK fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
+    style DONE fill:#4caf50,stroke:#2e7d32,stroke-width:2px,color:#fff
+    style EVOLVE fill:#e8763d,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style V1 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V2 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V3 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V4 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V5 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V6 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
 ```

-## Development
+### The Hive Advantage

-### Local Development with Hot Reload
+| Traditional Frameworks     | Hive                                   |
+| -------------------------- | -------------------------------------- |
+| Hardcode agent workflows   | Describe goals in natural language     |
+| Manual graph definition    | Auto-generated agent graphs            |
+| Reactive error handling    | Outcome-evaluation and adaptiveness    |
+| Static tool configurations | Dynamic SDK-wrapped nodes              |
+| Separate monitoring setup  | Built-in real-time observability       |
+| DIY budget management      | Integrated cost controls & degradation |
+
+### How It Works
+
+1. **[Define Your Goal](docs/key_concepts/goals_outcome.md)** → Describe what you want to achieve in plain English
+2. **Coding Agent Generates** → Creates the [agent graph](docs/key_concepts/graph.md), connection code, and test cases
+3. **[Workers Execute](docs/key_concepts/worker_agent.md)** → SDK-wrapped nodes run with full observability and tool access
+4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
+5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically
+
+## Run Agents
+
+The `hive` CLI is the primary interface for running agents.

 ```bash
-# Copy development overrides
-cp docker-compose.override.yml.example docker-compose.override.yml
+# Browse and run agents interactively (Recommended)
+hive tui

-# Start with hot reload enabled
-docker compose up
+# Run a specific agent directly
+hive run exports/my_agent --input '{"task": "Your input here"}'
+
+# Run a specific agent with the TUI dashboard
+hive run exports/my_agent --tui
+
+# Interactive REPL
+hive shell
 ```

-### Running Without Docker
+The TUI scans both `exports/` and `examples/templates/` for available agents.

-```bash
-# Install dependencies
-npm install
+> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`

-# Generate environment files
-npm run generate:env
-
-# Start frontend (in honeycomb/)
-cd honeycomb && npm run dev
-
-# Start backend (in hive/)
-cd hive && npm run dev
-```
+See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.

 ## Documentation

- **[Developer Guide](DEVELOPER.md)** - Comprehensive guide for developers
+- **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
 - [Getting Started](docs/getting-started.md) - Quick setup instructions
+- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
 - [Configuration Guide](docs/configuration.md) - All configuration options
- [Architecture Overview](docs/architecture.md) - System design and structure
+- [Architecture Overview](docs/architecture/README.md) - System design and structure
+
+## Roadmap
+
+Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [roadmap.md](docs/roadmap.md) for details.
+
+```mermaid
+flowchart TD
+subgraph Foundation
+    direction LR
+    subgraph arch["Architecture"]
+        a1["Node-Based Architecture"]:::done
+        a2["Python SDK"]:::done
+        a3["LLM Integration"]:::done
+        a4["Communication Protocol"]:::done
+    end
+    subgraph ca["Coding Agent"]
+        b1["Goal Creation Session"]:::done
+        b2["Worker Agent Creation"]
+        b3["MCP Tools"]:::done
+    end
+    subgraph wa["Worker Agent"]
+        c1["Human-in-the-Loop"]:::done
+        c2["Callback Handlers"]:::done
+        c3["Intervention Points"]:::done
+        c4["Streaming Interface"]
+    end
+    subgraph cred["Credentials"]
+        d1["Setup Process"]:::done
+        d2["Pluggable Sources"]:::done
+        d3["Enterprise Secrets"]
+        d4["Integration Tools"]:::done
+    end
+    subgraph tools["Tools"]
+        e1["File Use"]:::done
+        e2["Memory STM/LTM"]:::done
+        e3["Web Search/Scraper"]:::done
+        e4["CSV/PDF"]:::done
+        e5["Excel/Email"]
+    end
+    subgraph core["Core"]
+        f1["Eval System"]
+        f2["Pydantic Validation"]:::done
+        f3["Documentation"]:::done
+        f4["Adaptiveness"]
+        f5["Sample Agents"]
+    end
+end
+
+subgraph Expansion
+    direction LR
+    subgraph intel["Intelligence"]
+        g1["Guardrails"]
+        g2["Streaming Mode"]
+        g3["Image Generation"]
+        g4["Semantic Search"]
+    end
+    subgraph mem["Memory Iteration"]
+        h1["Message Model & Sessions"]
+        h2["Storage Migration"]
+        h3["Context Building"]
+        h4["Proactive Compaction"]
+        h5["Token Tracking"]
+    end
+    subgraph evt["Event System"]
+        i1["Event Bus for Nodes"]
+    end
+    subgraph cas["Coding Agent Support"]
+        j1["Claude Code"]
+        j2["Cursor"]
+        j3["Opencode"]
+        j4["Antigravity"]
+        j5["Codex CLI"]
+    end
+    subgraph plat["Platform"]
+        k1["JavaScript/TypeScript SDK"]
+        k2["Custom Tool Integrator"]
+        k3["Windows Support"]
+    end
+    subgraph dep["Deployment"]
+        l1["Self-Hosted"]
+        l2["Cloud Services"]
+        l3["CI/CD Pipeline"]
+    end
+    subgraph tmpl["Templates"]
+        m1["Sales Agent"]
+        m2["Marketing Agent"]
+        m3["Analytics Agent"]
+        m4["Training Agent"]
+        m5["Smart Form Agent"]
+    end
+end
+
+classDef done fill:#9e9e9e,color:#fff,stroke:#757575
+```
+
+## Contributing
+
+We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+
+**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.
+
+1. Find or create an issue and get assigned
+2. Fork the repository
+3. Create your feature branch (`git checkout -b feature/amazing-feature`)
+4. Commit your changes (`git commit -m 'Add amazing feature'`)
+5. Push to the branch (`git push origin feature/amazing-feature`)
+6. Open a Pull Request

 ## Community & Support

@@ -119,16 +378,6 @@ We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature req
 - Twitter/X - [@adenhq](https://x.com/aden_hq)
 - LinkedIn - [Company Page](https://www.linkedin.com/company/teamaden/)

-## Contributing
-
-We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
-
-1. Fork the repository
-2. Create your feature branch (`git checkout -b feature/amazing-feature`)
-3. Commit your changes (`git commit -m 'Add amazing feature'`)
-4. Push to the branch (`git push origin feature/amazing-feature`)
-5. Open a Pull Request
-
 ## Join Our Team

 **We're hiring!** Join us in engineering, research, and go-to-market roles.
@@ -143,8 +392,62 @@ For security concerns, please see [SECURITY.md](SECURITY.md).

 This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.

+## Frequently Asked Questions (FAQ)
+
+**Q: What LLM providers does Hive support?**
+
+Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
+
+**Q: Can I use Hive with local AI models like Ollama?**
+
+Yes! Hive supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.
+
+**Q: What makes Hive different from other agent frameworks?**
+
+Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, [evolves the agent graph](docs/key_concepts/evolution.md), and redeploys. This self-improving loop is unique to Aden.
+
+**Q: Is Hive open-source?**
+
+Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.
+
+**Q: Can Hive handle complex, production-scale use cases?**
+
+Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
+
+**Q: Does Hive support human-in-the-loop workflows?**
+
+Yes, Hive fully supports [human-in-the-loop](docs/key_concepts/graph.md#human-in-the-loop) workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
+
+**Q: What programming languages does Hive support?**
+
+The Hive framework is built in Python. A JavaScript/TypeScript SDK is on the roadmap.
+
+**Q: Can Hive agents interact with external tools and APIs?**
+
+Yes. Aden's SDK-wrapped nodes provide built-in tool access, and the framework supports flexible tool ecosystems. Agents can integrate with external APIs, databases, and services through the node architecture.
+
+**Q: How does cost control work in Hive?**
+
+Hive provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.
+
+**Q: Where can I find examples and documentation?**
+
+Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [developer guide](docs/developer-guide.md).
+
+**Q: How can I contribute to Aden?**
+
+Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
+
+**Q: When will my team start seeing results from Aden's adaptive agents?**
+
+Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
+
+**Q: How does Hive compare to other agent frameworks?**
+
+Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
+
 ---

 <p align="center">
-  Made with care by the <a href="https://adenhq.com">Aden</a> team
+  Made with 🔥 Passion in San Francisco
 </p>
@@ -1,118 +0,0 @@
-# Beeline Configuration
-# ======================
-# Copy this file to config.yaml and customize for your environment.
-# Run `npm run setup` to generate .env files from this configuration.
-#
-# For detailed documentation, see: docs/configuration.md
-
-# -----------------------------------------------------------------------------
-# Application Settings
-# -----------------------------------------------------------------------------
-app:
-  # Application name (displayed in UI and logs)
-  name: Beeline
-
-  # Environment: development, production, or test
-  environment: development
-
-  # Log level: debug, info, warn, error
-  log_level: info
-
-# -----------------------------------------------------------------------------
-# Server Configuration
-# -----------------------------------------------------------------------------
-server:
-  # Frontend settings
-  frontend:
-    # Port for the frontend application
-    port: 3000
-
-  # Backend (Hive) settings
-  backend:
-    # Port for the backend API
-    port: 4000
-
-    # Host to bind to (0.0.0.0 for all interfaces)
-    host: 0.0.0.0
-
-# -----------------------------------------------------------------------------
-# TimescaleDB Configuration (Time-series metrics storage)
-# -----------------------------------------------------------------------------
-timescaledb:
-  # Connection URL for TimescaleDB
-  # Format: postgresql://user:password@host:port/database
-  url: postgresql://postgres:postgres@localhost:5432/aden_tsdb
-
-  # External port mapping (for docker-compose)
-  port: 5432
-
-# -----------------------------------------------------------------------------
-# MongoDB Configuration (Policies, pricing, control config)
-# -----------------------------------------------------------------------------
-mongodb:
-  # Connection URL for MongoDB
-  url: mongodb://localhost:27017
-
-  # Database name for main data
-  database: aden
-
-  # Database name for ERP data
-  erp_database: erp
-
-  # External port mapping (for docker-compose)
-  port: 27017
-
-# -----------------------------------------------------------------------------
-# Redis Configuration (Caching and Socket.IO)
-# -----------------------------------------------------------------------------
-redis:
-  # Connection URL for Redis
-  url: redis://localhost:6379
-
-  # External port mapping (for docker-compose)
-  port: 6379
-
-# -----------------------------------------------------------------------------
-# Authentication & Security
-# -----------------------------------------------------------------------------
-auth:
-  # JWT secret key - CHANGE THIS IN PRODUCTION!
-  # Generate with: openssl rand -base64 32
-  jwt_secret: change-this-to-a-secure-random-string-min-32-chars
-
-  # JWT token expiration (e.g., 1h, 7d, 30d)
-  jwt_expires_in: 7d
-
-  # Passphrase for additional encryption - CHANGE THIS IN PRODUCTION!
-  passphrase: change-this-to-a-secure-passphrase
-
-# -----------------------------------------------------------------------------
-# NPM Configuration
-# -----------------------------------------------------------------------------
-npm:
-  # NPM token for private package access (if needed)
-  token: ""
-
-# -----------------------------------------------------------------------------
-# CORS Configuration
-# -----------------------------------------------------------------------------
-cors:
-  # Allowed origin for CORS requests
-  # In production, set this to your frontend URL
-  origin: http://localhost:3000
-
-# -----------------------------------------------------------------------------
-# Feature Flags
-# -----------------------------------------------------------------------------
-features:
-  # Enable user registration
-  registration: true
-
-  # Enable API rate limiting
-  rate_limiting: false
-
-  # Enable request logging
-  request_logging: true
-
-  # Enable MCP (Model Context Protocol) server
-  mcp_server: true
@@ -0,0 +1,5 @@
+exports/
+docs/
+.agent-builder-sessions/
+.pytest_cache/
+**/__pycache__/
@@ -0,0 +1,14 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core"
+    },
+    "tools": {
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "tools"
+    }
+  }
+}
@@ -0,0 +1,413 @@
+# Agent Builder MCP Tools - MCP Integration Guide
+
+This guide explains how to use the new MCP integration tools in the agent builder MCP server.
+
+## Overview
+
+The agent builder now supports registering external MCP servers as tool sources. This allows you to:
+
+1. Register MCP servers (like tools) during agent building
+2. Discover available tools from those servers
+3. Use those tools in your agent nodes
+4. Automatically generate `mcp_servers.json` configuration on export
+
+## New MCP Tools
+
+### `add_mcp_server`
+
+Register an MCP server as a tool source for your agent.
+
+**Parameters:**
+
+- `name` (string, required): Unique name for the MCP server
+- `transport` (string, required): Transport type - "stdio" or "http"
+- `command` (string): Command to run (for stdio transport)
+- `args` (string): JSON array of command arguments (for stdio)
+- `cwd` (string): Working directory (for stdio)
+- `env` (string): JSON object of environment variables (for stdio)
+- `url` (string): Server URL (for http transport)
+- `headers` (string): JSON object of HTTP headers (for http)
+- `description` (string): Description of the MCP server
+
+**Example - STDIO:**
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": "[\"mcp_server.py\", \"--stdio\"]",
+    "cwd": "../tools",
+    "description": "Aden tools for web search and file operations"
+  }
+}
+```
+
+**Example - HTTP:**
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "remote-tools",
+    "transport": "http",
+    "url": "http://localhost:4001",
+    "description": "Remote tool server"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "server": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": ["mcp_server.py", "--stdio"],
+    "cwd": "../tools",
+    "description": "Aden tools..."
+  },
+  "tools_discovered": 6,
+  "tools": [
+    "web_search",
+    "web_scrape",
+    "file_read",
+    "file_write",
+    "pdf_read",
+    "example_tool"
+  ],
+  "total_mcp_servers": 1,
+  "note": "MCP server 'tools' registered with 6 tools. These tools can now be used in llm_tool_use nodes."
+}
+```
+
+### `list_mcp_servers`
+
+List all registered MCP servers.
+
+**Parameters:** None
+
+**Response:**
+
+```json
+{
+  "mcp_servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "description": "Aden tools..."
+    }
+  ],
+  "total": 1
+}
+```
+
+### `list_mcp_tools`
+
+List tools available from registered MCP servers.
+
+**Parameters:**
+
+- `server_name` (string, optional): Name of specific server to list tools from. If omitted, lists tools from all servers.
+
+**Example:**
+
+```json
+{
+  "name": "list_mcp_tools",
+  "arguments": {
+    "server_name": "tools"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "tools_by_server": {
+    "tools": [
+      {
+        "name": "web_search",
+        "description": "Search the web for information using Brave Search API...",
+        "parameters": ["query", "num_results", "country"]
+      },
+      {
+        "name": "web_scrape",
+        "description": "Scrape and extract text content from a webpage...",
+        "parameters": ["url", "selector", "include_links", "max_length"]
+      }
+    ]
+  },
+  "total_tools": 6,
+  "note": "Use these tool names in the 'tools' parameter when adding llm_tool_use nodes"
+}
+```
+
+### `remove_mcp_server`
+
+Remove a registered MCP server.
+
+**Parameters:**
+
+- `name` (string, required): Name of the MCP server to remove
+
+**Example:**
+
+```json
+{
+  "name": "remove_mcp_server",
+  "arguments": {
+    "name": "tools"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "removed": "tools",
+  "remaining_servers": 0
+}
+```
+
+## Workflow Example
+
+Here's a complete workflow for building an agent with MCP tools:
+
+### 1. Create Session
+
+```json
+{
+  "name": "create_session",
+  "arguments": {
+    "name": "web-research-agent"
+  }
+}
+```
+
+### 2. Register MCP Server
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": "[\"mcp_server.py\", \"--stdio\"]",
+    "cwd": "../tools"
+  }
+}
+```
+
+### 3. List Available Tools
+
+```json
+{
+  "name": "list_mcp_tools",
+  "arguments": {
+    "server_name": "tools"
+  }
+}
+```
+
+### 4. Set Goal
+
+```json
+{
+  "name": "set_goal",
+  "arguments": {
+    "goal_id": "web-research",
+    "name": "Web Research Agent",
+    "description": "Search the web and summarize findings",
+    "success_criteria": "[{\"id\": \"search-success\", \"description\": \"Successfully retrieve search results\", \"metric\": \"results_count\", \"target\": \">= 3\", \"weight\": 1.0}]"
+  }
+}
+```
+
+### 5. Add Node with MCP Tool
+
+```json
+{
+  "name": "add_node",
+  "arguments": {
+    "node_id": "web-searcher",
+    "name": "Web Search",
+    "description": "Search the web for information",
+    "node_type": "llm_tool_use",
+    "input_keys": "[\"query\"]",
+    "output_keys": "[\"search_results\"]",
+    "system_prompt": "Search for {query} using the web_search tool",
+    "tools": "[\"web_search\"]"
+  }
+}
+```
+
+Note: `web_search` is now available because we registered the tools MCP server!
+
+### 6. Export Agent
+
+```json
+{
+  "name": "export_graph",
+  "arguments": {}
+}
+```
+
+The export will create:
+
+- `exports/web-research-agent/agent.json` - Agent specification
+- `exports/web-research-agent/README.md` - Documentation
+- `exports/web-research-agent/mcp_servers.json` - **MCP server configuration** ✨
+
+## MCP Configuration File
+
+When you export an agent with registered MCP servers, an `mcp_servers.json` file is automatically created:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "description": "Aden tools for web search and file operations"
+    }
+  ]
+}
+```
+
+This file is automatically loaded by the AgentRunner when the agent is executed, making the MCP tools available at runtime.
+
+## Using the Exported Agent
+
+Once exported, load and run the agent normally:
+
+```python
+from framework.runner.runner import AgentRunner
+
+# Load agent - MCP servers auto-load from mcp_servers.json
+runner = AgentRunner.load("exports/web-research-agent")
+
+# Run with input
+result = await runner.run({"query": "latest AI breakthroughs"})
+
+# The web_search tool from tools is automatically available!
+```
+
+## Benefits
+
+1. **Discoverable Tools**: See what tools are available before using them
+2. **Validation**: Connection is tested when registering the server
+3. **Automatic Configuration**: No manual file editing required
+4. **Documentation**: README includes MCP server information
+5. **Runtime Ready**: Exported agents work immediately with configured tools
+
+## Common MCP Servers
+
+### tools
+
+Provides:
+
+- `web_search` - Brave Search API integration
+- `web_scrape` - Web page content extraction
+- `file_read` / `file_write` - File operations
+- `pdf_read` - PDF text extraction
+
+### Custom MCP Servers
+
+You can register any MCP server that follows the Model Context Protocol specification.
+
+## Troubleshooting
+
+### "Failed to connect to MCP server"
+
+- Verify the `command` and `args` are correct
+- Check that the server is accessible at the specified path/URL
+- Ensure any required environment variables are set
+- For STDIO: verify the command can be executed from the `cwd`
+- For HTTP: verify the server is running and accessible
+
+### Tools not appearing
+
+- Use `list_mcp_tools` to verify tools were discovered
+- Check the tool names match exactly (case-sensitive)
+- Ensure the MCP server is still registered (`list_mcp_servers`)
+
+### Export doesn't include mcp_servers.json
+
+- Verify you registered at least one MCP server
+- Check `get_session_status` to see `mcp_servers_count > 0`
+- Re-export the agent after registering servers
+
+## Credential Validation
+
+When adding nodes with tools that require API keys (like `web_search`), the agent builder automatically validates that the required credentials are available.
+
+### How It Works
+
+When you call `add_node` or `update_node` with a `tools` parameter, the agent builder:
+
+1. Checks which tools require credentials (e.g., `web_search` requires `BRAVE_SEARCH_API_KEY`)
+2. Validates those credentials are set in the environment or `.env` file
+3. Returns an error if any credentials are missing
+
+### Missing Credentials Error
+
+If credentials are missing, you'll receive a response like:
+
+```json
+{
+  "valid": false,
+  "errors": ["Missing credentials for tools: ['BRAVE_SEARCH_API_KEY']"],
+  "missing_credentials": [
+    {
+      "credential": "brave_search",
+      "env_var": "BRAVE_SEARCH_API_KEY",
+      "tools_affected": ["web_search"],
+      "help_url": "https://brave.com/search/api/",
+      "description": "API key for Brave Search"
+    }
+  ],
+  "action_required": "Add the credentials to your .env file and retry",
+  "example": "Add to .env:\nBRAVE_SEARCH_API_KEY=your_key_here",
+  "message": "Cannot add node: missing API credentials. Add them to .env and retry this command."
+}
+```
+
+### Fixing Credential Errors
+
+1. Get the required API key from the URL in `help_url`
+2. Add it to your environment:
+
+   ```bash
+   # Option 1: Export directly
+   export BRAVE_SEARCH_API_KEY=your-key-here
+
+   # Option 2: Add to tools/.env
+   echo "BRAVE_SEARCH_API_KEY=your-key-here" >> tools/.env
+   ```
+
+3. Retry the `add_node` command
+
+### Required Credentials by Tool
+
+| Tool         | Credential             | Get Key                                               |
+| ------------ | ---------------------- | ----------------------------------------------------- |
+| `web_search` | `BRAVE_SEARCH_API_KEY` | [brave.com/search/api](https://brave.com/search/api/) |
+
+Note: The MCP server itself requires `ANTHROPIC_API_KEY` at startup for LLM operations.
@@ -0,0 +1,364 @@
+# MCP Integration Guide
+
+This guide explains how to integrate Model Context Protocol (MCP) servers with the Hive Core Framework, enabling agents to use tools from external MCP servers.
+
+## Overview
+
+The framework provides built-in support for MCP servers, allowing you to:
+
+- **Register MCP servers** via STDIO or HTTP transport
+- **Auto-discover tools** from registered servers
+- **Use MCP tools** seamlessly in your agents
+- **Manage multiple MCP servers** simultaneously
+
+## Quick Start
+
+### 1. Register an MCP Server Programmatically
+
+```python
+from framework.runner.runner import AgentRunner
+
+# Load your agent
+runner = AgentRunner.load("exports/my-agent")
+
+# Register tools MCP server
+runner.register_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "aden_tools.mcp_server", "--stdio"],
+    cwd="/path/to/tools"
+)
+
+# Tools are now available to your agent
+result = await runner.run({"input": "data"})
+```
+
+### 2. Use Configuration File
+
+Create `mcp_servers.json` in your agent folder:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools"
+    }
+  ]
+}
+```
+
+The framework will automatically load and register these servers when you load the agent:
+
+```python
+runner = AgentRunner.load("exports/my-agent")  # MCP servers auto-loaded
+```
+
+## Transport Types
+
+### STDIO Transport
+
+Best for local MCP servers running as subprocesses:
+
+```python
+runner.register_mcp_server(
+    name="local-tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "my_tools.server", "--stdio"],
+    cwd="/path/to/my-tools",
+    env={
+        "API_KEY": "your-key-here"
+    }
+)
+```
+
+**Configuration:**
+
+- `command`: Executable to run (e.g., "python", "node")
+- `args`: List of command-line arguments
+- `cwd`: Working directory for the process
+- `env`: Environment variables (optional)
+
+### HTTP Transport
+
+Best for remote MCP servers or containerized deployments:
+
+```python
+runner.register_mcp_server(
+    name="remote-tools",
+    transport="http",
+    url="http://localhost:4001",
+    headers={
+        "Authorization": "Bearer token"
+    }
+)
+```
+
+**Configuration:**
+
+- `url`: Base URL of the MCP server
+- `headers`: HTTP headers to include (optional)
+
+## Using MCP Tools in Agents
+
+Once registered, MCP tools are available just like any other tool:
+
+### In Node Specifications
+
+```python
+from framework.builder.workflow import WorkflowBuilder
+
+builder = WorkflowBuilder()
+
+# Add a node that uses MCP tools
+builder.add_node(
+    node_id="researcher",
+    name="Web Researcher",
+    node_type="llm_tool_use",
+    system_prompt="Research the topic using web_search",
+    tools=["web_search"],  # Tool from tools MCP server
+    input_keys=["topic"],
+    output_keys=["findings"]
+)
+```
+
+### In Agent.json
+
+Tools from MCP servers can be referenced in your agent.json just like built-in tools:
+
+```json
+{
+  "nodes": [
+    {
+      "id": "searcher",
+      "name": "Web Searcher",
+      "node_type": "llm_tool_use",
+      "system_prompt": "Search for information about {topic}",
+      "tools": ["web_search", "web_scrape"],
+      "input_keys": ["topic"],
+      "output_keys": ["results"]
+    }
+  ]
+}
+```
+
+## Available Tools from tools
+
+When you register the `tools` MCP server, the following tools become available:
+
+- **web_search**: Search the web using Brave Search API
+- **web_scrape**: Scrape content from a URL
+- **file_read**: Read file contents
+- **file_write**: Write content to a file
+- **pdf_read**: Extract text from PDF files
+
+## Environment Variables
+
+Some MCP tools require environment variables. You can pass them in the configuration:
+
+### Via Programmatic Registration
+
+```python
+runner.register_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "aden_tools.mcp_server", "--stdio"],
+    cwd="../tools",
+    env={
+        "BRAVE_SEARCH_API_KEY": os.environ["BRAVE_SEARCH_API_KEY"]
+    }
+)
+```
+
+### Via Configuration File
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools",
+      "env": {
+        "BRAVE_SEARCH_API_KEY": "${BRAVE_SEARCH_API_KEY}"
+      }
+    }
+  ]
+}
+```
+
+The framework will substitute `${VAR_NAME}` with values from the environment.
+
+## Multiple MCP Servers
+
+You can register multiple MCP servers to access different sets of tools:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools"
+    },
+    {
+      "name": "database-tools",
+      "transport": "http",
+      "url": "http://localhost:5001"
+    },
+    {
+      "name": "analytics-tools",
+      "transport": "http",
+      "url": "http://analytics-server:6001"
+    }
+  ]
+}
+```
+
+All tools from all servers will be available to your agent.
+
+## Best Practices
+
+### 1. Use STDIO for Development
+
+STDIO transport is easier to debug and doesn't require managing server processes:
+
+```python
+runner.register_mcp_server(
+    name="dev-tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "my_tools.server", "--stdio"]
+)
+```
+
+### 2. Use HTTP for Production
+
+HTTP transport is better for:
+
+- Containerized deployments
+- Shared tools across multiple agents
+- Remote tool execution
+
+```python
+runner.register_mcp_server(
+    name="prod-tools",
+    transport="http",
+    url="http://tools-service:8000"
+)
+```
+
+### 3. Handle Cleanup
+
+Always clean up MCP connections when done:
+
+```python
+try:
+    runner = AgentRunner.load("exports/my-agent")
+    runner.register_mcp_server(...)
+    result = await runner.run(input_data)
+finally:
+    runner.cleanup()  # Disconnects all MCP servers
+```
+
+Or use context manager:
+
+```python
+async with AgentRunner.load("exports/my-agent") as runner:
+    runner.register_mcp_server(...)
+    result = await runner.run(input_data)
+    # Automatic cleanup
+```
+
+### 4. Tool Name Conflicts
+
+If multiple MCP servers provide tools with the same name, the last registered server wins. To avoid conflicts:
+
+- Use unique tool names in your MCP servers
+- Register servers in priority order (most important last)
+- Use separate agents for different tool sets
+
+## Troubleshooting
+
+### Connection Errors
+
+If you get connection errors with STDIO transport:
+
+1. Check that the command and path are correct
+2. Verify the MCP server starts successfully standalone
+3. Check environment variables are set correctly
+4. Look at stderr output for error messages
+
+### Tool Not Found
+
+If a tool is registered but not found:
+
+1. Verify the server registered successfully (check logs)
+2. List available tools: `runner._tool_registry.get_registered_names()`
+3. Check tool name spelling in your node configuration
+
+### HTTP Server Not Responding
+
+If HTTP transport fails:
+
+1. Verify the server is running: `curl http://localhost:4001/health`
+2. Check firewall settings
+3. Verify the URL and port are correct
+
+## Example: Full Agent with MCP Tools
+
+Here's a complete example of an agent that uses MCP tools:
+
+```python
+import asyncio
+from pathlib import Path
+from framework.runner.runner import AgentRunner
+
+async def main():
+    # Create agent path
+    agent_path = Path("exports/web-research-agent")
+
+    # Load agent
+    runner = AgentRunner.load(agent_path)
+
+    # Register MCP server
+    runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+        env={
+            "BRAVE_SEARCH_API_KEY": "your-api-key"
+        }
+    )
+
+    # Run agent
+    result = await runner.run({
+        "query": "latest developments in quantum computing"
+    })
+
+    print(f"Research complete: {result}")
+
+    # Cleanup
+    runner.cleanup()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## See Also
+
+- [MCP_SERVER_GUIDE.md](MCP_SERVER_GUIDE.md) - Building your own MCP servers
+- [examples/mcp_integration_example.py](examples/mcp_integration_example.py) - More examples
+- [examples/mcp_servers.json](examples/mcp_servers.json) - Example configuration
@@ -0,0 +1,393 @@
+# MCP Server Guide - Agent Builder
+
+This guide covers the MCP (Model Context Protocol) server for building goal-driven agents.
+
+## Setup
+
+### Quick Setup
+
+```bash
+# Using the setup script (recommended)
+python setup_mcp.py
+
+# Or using bash
+./setup_mcp.sh
+```
+
+### Manual Configuration
+
+Add to your MCP client configuration (e.g., Claude Desktop):
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "/path/to/goal-agent"
+    }
+  }
+}
+```
+
+## Available MCP Tools
+
+### Session Management
+
+#### `create_session`
+Create a new agent building session.
+
+**Parameters:**
+- `name` (string, required): Name of the agent
+
+**Example:**
+```json
+{
+  "name": "research-summary-agent"
+}
+```
+
+#### `get_session_status`
+Get the current status of the build session.
+
+**Returns:**
+- Session name
+- Goal status
+- Number of nodes
+- Number of edges
+- Validation status
+
+---
+
+### Goal Definition
+
+#### `set_goal`
+Define the goal for the agent with success criteria and constraints.
+
+**Parameters:**
+- `goal_id` (string, required): Unique identifier for the goal
+- `name` (string, required): Human-readable name
+- `description` (string, required): What the agent should accomplish
+- `success_criteria` (string, required): JSON array of success criteria
+- `constraints` (string, optional): JSON array of constraints
+
+**Success Criterion Structure:**
+```json
+{
+  "id": "criterion_id",
+  "description": "What should be achieved",
+  "metric": "How to measure it",
+  "target": "Target value",
+  "weight": 1.0
+}
+```
+
+**Constraint Structure:**
+```json
+{
+  "id": "constraint_id",
+  "description": "What must not happen",
+  "constraint_type": "hard|soft",
+  "category": "safety|quality|performance"
+}
+```
+
+---
+
+### Node Management
+
+#### `add_node`
+Add a processing node to the agent graph.
+
+**Parameters:**
+- `node_id` (string, required): Unique node identifier
+- `name` (string, required): Human-readable name
+- `description` (string, required): What this node does
+- `node_type` (string, required): One of: `llm_generate`, `llm_tool_use`, `router`, `function`
+- `input_keys` (string, required): JSON array of input variable names
+- `output_keys` (string, required): JSON array of output variable names
+- `system_prompt` (string, optional): System prompt for LLM nodes
+- `tools` (string, optional): JSON array of tool names for tool_use nodes
+- `routes` (string, optional): JSON object of route mappings for router nodes
+
+**Node Types:**
+
+1. **llm_generate**: Uses LLM to generate output from inputs
+   - Requires: `system_prompt`
+   - Tools: Not used
+
+2. **llm_tool_use**: Uses LLM with tools to accomplish tasks
+   - Requires: `system_prompt`, `tools`
+   - Tools: Array of tool names (e.g., `["web_search", "web_fetch"]`)
+
+3. **router**: LLM-powered routing to different paths
+   - Requires: `system_prompt`, `routes`
+   - Routes: Object mapping route names to target node IDs
+   - Example: `{"pass": "success_node", "fail": "retry_node"}`
+
+4. **function**: Executes a pre-defined function
+   - System prompt describes the function behavior
+   - No LLM calls, pure computation
+
+**Example:**
+```json
+{
+  "node_id": "search_sources",
+  "name": "Search Sources",
+  "description": "Searches for relevant sources on the topic",
+  "node_type": "llm_tool_use",
+  "input_keys": "[\"topic\", \"search_queries\"]",
+  "output_keys": "[\"sources\", \"source_count\"]",
+  "system_prompt": "Search for sources using the provided queries...",
+  "tools": "[\"web_search\"]"
+}
+```
+
+---
+
+### Edge Management
+
+#### `add_edge`
+Connect two nodes with an edge to define execution flow.
+
+**Parameters:**
+- `edge_id` (string, required): Unique edge identifier
+- `source` (string, required): Source node ID
+- `target` (string, required): Target node ID
+- `condition` (string, optional): When to traverse: `on_success` (default) or `on_failure`
+- `condition_expr` (string, optional): Python expression for conditional routing
+- `priority` (integer, optional): Edge priority (default: 0)
+
+**Example:**
+```json
+{
+  "edge_id": "search_to_extract",
+  "source": "search_sources",
+  "target": "extract_content",
+  "condition": "on_success"
+}
+```
+
+---
+
+### Graph Validation
+
+#### `validate_graph`
+Validate the complete graph structure.
+
+**Checks:**
+- Entry node exists
+- All nodes are reachable from entry
+- Terminal nodes have no outgoing edges
+- No cycles (unless explicitly allowed)
+- Context flow: all required inputs are available
+
+**Returns:**
+- `valid` (boolean)
+- `errors` (array): List of validation errors
+- `warnings` (array): Non-critical issues
+- `entry_node` (string): Entry node ID
+- `terminal_nodes` (array): Terminal node IDs
+
+---
+
+### Graph Export
+
+#### `export_graph`
+Export the validated graph as an agent specification.
+
+**What it does:**
+1. Validates the graph
+2. Auto-generates missing edges from router routes
+3. Writes files to disk:
+   - `exports/{agent-name}/agent.json` - Full agent specification
+   - `exports/{agent-name}/README.md` - Auto-generated documentation
+
+**Returns:**
+- `success` (boolean)
+- `files_written` (object): Paths and sizes of written files
+- `agent` (object): Agent metadata
+- `graph` (object): Graph specification
+- `goal` (object): Goal definition
+- `required_tools` (array): All tools used by the agent
+
+**Important:** This tool automatically writes files to the `exports/` directory!
+
+---
+
+### Testing
+
+#### `test_node`
+Test a single node with sample inputs.
+
+**Parameters:**
+- `node_id` (string, required): Node to test
+- `test_input` (string, required): JSON object with input values
+- `mock_llm_response` (string, optional): Mock LLM response for testing
+
+**Example:**
+```json
+{
+  "node_id": "research_planner",
+  "test_input": "{\"topic\": \"LLM compaction\"}"
+}
+```
+
+#### `test_graph`
+Test the complete agent graph with sample inputs.
+
+**Parameters:**
+- `test_input` (string, required): JSON object with initial inputs
+- `dry_run` (boolean, optional): Simulate without LLM calls (default: true)
+- `max_steps` (integer, optional): Maximum execution steps (default: 10)
+
+**Example:**
+```json
+{
+  "test_input": "{\"topic\": \"AI safety\"}",
+  "dry_run": true,
+  "max_steps": 10
+}
+```
+
+---
+
+### Evaluation Rules
+
+#### `add_evaluation_rule`
+Add a rule for the HybridJudge to evaluate node outputs.
+
+**Parameters:**
+- `rule_id` (string, required): Unique rule identifier
+- `description` (string, required): What this rule checks
+- `condition` (string, required): Python expression to evaluate
+- `action` (string, required): Action to take: `accept`, `retry`, `escalate`
+- `priority` (integer, optional): Rule priority (default: 0)
+- `feedback_template` (string, optional): Feedback message template
+
+**Condition Examples:**
+- `'result.get("success") == True'` - Check for success flag
+- `'result.get("error_type") == "timeout"'` - Check error type
+- `'len(result.get("data", [])) > 0'` - Check for non-empty data
+
+**Example:**
+```json
+{
+  "rule_id": "timeout_retry",
+  "description": "Retry on timeout errors",
+  "condition": "result.get('error_type') == 'timeout'",
+  "action": "retry",
+  "priority": 10,
+  "feedback_template": "Timeout occurred, retrying..."
+}
+```
+
+#### `list_evaluation_rules`
+List all configured evaluation rules.
+
+#### `remove_evaluation_rule`
+Remove an evaluation rule.
+
+**Parameters:**
+- `rule_id` (string, required): Rule to remove
+
+---
+
+## Example Workflow
+
+Here's a complete workflow for building a research agent:
+
+```python
+# 1. Create session
+create_session(name="research-agent")
+
+# 2. Define goal
+set_goal(
+    goal_id="research-goal",
+    name="Research Topic Agent",
+    description="Research a topic and produce a summary",
+    success_criteria=json.dumps([{
+        "id": "comprehensive",
+        "description": "Cover main aspects",
+        "metric": "Key topics addressed",
+        "target": "At least 3-5 aspects",
+        "weight": 1.0
+    }])
+)
+
+# 3. Add nodes
+add_node(
+    node_id="planner",
+    name="Research Planner",
+    description="Creates research strategy",
+    node_type="llm_generate",
+    input_keys='["topic"]',
+    output_keys='["strategy", "queries"]',
+    system_prompt="Analyze topic and create research plan..."
+)
+
+add_node(
+    node_id="searcher",
+    name="Search Sources",
+    description="Find relevant sources",
+    node_type="llm_tool_use",
+    input_keys='["queries"]',
+    output_keys='["sources"]',
+    system_prompt="Search for sources...",
+    tools='["web_search"]'
+)
+
+# 4. Connect nodes
+add_edge(
+    edge_id="plan_to_search",
+    source="planner",
+    target="searcher"
+)
+
+# 5. Validate
+validate_graph()
+
+# 6. Export
+export_graph()
+```
+
+The exported agent will be saved to `exports/research-agent/`.
+
+---
+
+## Tips
+
+1. **Start with the goal**: Define clear success criteria before building nodes
+2. **Test nodes individually**: Use `test_node` to verify each node works
+3. **Use router nodes for branching**: Don't create edges manually for routers - define routes and they'll be auto-generated
+4. **Add evaluation rules**: Help the judge evaluate outputs deterministically
+5. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
+6. **Check exports**: Review the generated README.md to verify your agent structure
+
+---
+
+## Common Issues
+
+### "Node X is unreachable from entry"
+- Make sure there's a path of edges from the entry node to all nodes
+- Check that you've defined edges connecting your nodes
+
+### "Missing required input Y for node X"
+- Ensure previous nodes output the required inputs
+- Check your input_keys and output_keys match
+
+### "Router routes don't match edges"
+- Don't worry! The export tool auto-generates missing edges from routes
+- If you see this warning, it's informational only
+
+### "Cannot find tool Z"
+- Verify the tool name matches available tools (e.g., "web_search", "web_fetch")
+- Check the `required_tools` section in the exported agent
+
+---
+
+## Resources
+
+- **Framework Documentation**: See [README.md](README.md)
+- **Example Agents**: Check the `exports/` directory for examples
+- **MCP Protocol**: https://modelcontextprotocol.io
@@ -0,0 +1,203 @@
+# Framework
+
+A goal-driven agent runtime with Builder-friendly observability.
+
+## Overview
+
+Framework provides a runtime framework that captures **decisions**, not just actions. This enables a "Builder" LLM to analyze and improve agent behavior by understanding:
+
+- What the agent was trying to accomplish
+- What options it considered
+- What it chose and why
+- What happened as a result
+
+## Installation
+
+```bash
+uv pip install -e .
+```
+
+## MCP Server Setup
+
+The framework includes an MCP (Model Context Protocol) server for building agents. To set up the MCP server:
+
+### Automated Setup
+
+**Using bash (Linux/macOS):**
+```bash
+./setup_mcp.sh
+```
+
+**Using Python (cross-platform):**
+```bash
+python setup_mcp.py
+```
+
+The setup script will:
+1. Install the framework package
+2. Install MCP dependencies (mcp, fastmcp)
+3. Create/verify `.mcp.json` configuration
+4. Test the MCP server module
+
+### Manual Setup
+
+If you prefer manual setup:
+
+```bash
+# Install framework
+uv pip install -e .
+
+# Install MCP dependencies
+uv pip install mcp fastmcp
+
+# Test the server
+uv run python -m framework.mcp.agent_builder_server
+```
+
+### Using with MCP Clients
+
+To use the agent builder with Claude Desktop or other MCP clients, add this to your MCP client configuration:
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "/path/to/goal-agent"
+    }
+  }
+}
+```
+
+The MCP server provides tools for:
+- Creating agent building sessions
+- Defining goals with success criteria
+- Adding nodes (llm_generate, llm_tool_use, router, function)
+- Connecting nodes with edges
+- Validating and exporting agent graphs
+- Testing nodes and full agent graphs
+
+## Quick Start
+
+### Calculator Agent
+
+Run an LLM-powered calculator:
+
+```bash
+# Single calculation
+uv run python -m framework calculate "2 + 3 * 4"
+
+# Interactive mode
+uv run python -m framework interactive
+
+# Analyze runs with Builder
+uv run python -m framework analyze calculator
+```
+
+### Using the Runtime
+
+```python
+from framework import Runtime
+
+runtime = Runtime("/path/to/storage")
+
+# Start a run
+run_id = runtime.start_run("my_goal", "Description of what we're doing")
+
+# Record a decision
+decision_id = runtime.decide(
+    intent="Choose how to process the data",
+    options=[
+        {"id": "fast", "description": "Quick processing", "pros": ["Fast"], "cons": ["Less accurate"]},
+        {"id": "thorough", "description": "Detailed processing", "pros": ["Accurate"], "cons": ["Slower"]},
+    ],
+    chosen="thorough",
+    reasoning="Accuracy is more important for this task"
+)
+
+# Record the outcome
+runtime.record_outcome(
+    decision_id=decision_id,
+    success=True,
+    result={"processed": 100},
+    summary="Processed 100 items with detailed analysis"
+)
+
+# End the run
+runtime.end_run(success=True, narrative="Successfully processed all data")
+```
+
+### Testing Agents
+
+The framework includes a goal-based testing framework for validating agent behavior.
+
+Tests are generated using MCP tools (`generate_constraint_tests`, `generate_success_tests`) which return guidelines. Claude writes tests directly using the Write tool based on these guidelines.
+
+```bash
+# Run tests against an agent
+uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
+
+# Debug failed tests
+uv run python -m framework test-debug <agent_path> <test_name>
+
+# List tests for a goal
+uv run python -m framework test-list <goal_id>
+```
+
+For detailed testing workflows, see the [hive-test skill](../.claude/skills/hive-test/SKILL.md).
+
+### Analyzing Agent Behavior with Builder
+
+The BuilderQuery interface allows you to analyze agent runs and identify improvements:
+
+```python
+from framework import BuilderQuery
+
+query = BuilderQuery("/path/to/storage")
+
+# Find patterns across runs
+patterns = query.find_patterns("my_goal")
+print(f"Success rate: {patterns.success_rate:.1%}")
+
+# Analyze a failure
+analysis = query.analyze_failure("run_123")
+print(f"Root cause: {analysis.root_cause}")
+print(f"Suggestions: {analysis.suggestions}")
+
+# Get improvement recommendations
+suggestions = query.suggest_improvements("my_goal")
+for s in suggestions:
+    print(f"[{s['priority']}] {s['recommendation']}")
+```
+
+## Architecture
+
+```
+┌─────────────────┐
+│  Human Engineer │  ← Supervision, approval
+└────────┬────────┘
+         │
+┌────────▼────────┐
+│   Builder LLM   │  ← Analyzes runs, suggests improvements
+│  (BuilderQuery) │
+└────────┬────────┘
+         │
+┌────────▼────────┐
+│   Agent LLM     │  ← Executes tasks, records decisions
+│    (Runtime)    │
+└─────────────────┘
+```
+
+## Key Concepts
+
+- **Decision**: The atomic unit of agent behavior. Captures intent, options, choice, and reasoning.
+- **Run**: A complete execution with all decisions and outcomes.
+- **Runtime**: Interface agents use to record their behavior.
+- **BuilderQuery**: Interface Builder uses to analyze agent behavior.
+
+## Requirements
+
+- Python 3.11+
+- pydantic >= 2.0
+- anthropic >= 0.40.0 (for LLM-powered agents)
@@ -0,0 +1,740 @@
+#!/usr/bin/env python3
+"""
+EventLoopNode WebSocket Demo
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams EventLoopNode execution to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/event_loop_wss_demo.py
+
+    Then open http://localhost:8765 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+import os  # noqa: E402
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("demo")
+
+# -------------------------------------------------------------------------
+# Persistent state (shared across WebSocket connections)
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_demo_"))
+STORE = FileConversationStore(STORE_DIR / "conversation")
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Tool Registry — real tools via ToolRegistry (same pattern as GraphExecutor)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+# Credential store: Aden sync (OAuth2 tokens) + encrypted files + env var fallback
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_local_storage = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+
+if os.environ.get("ADEN_API_KEY"):
+    try:
+        from framework.credentials.aden import (  # noqa: E402
+            AdenCachedStorage,
+            AdenClientConfig,
+            AdenCredentialClient,
+            AdenSyncProvider,
+        )
+
+        _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com"))
+        _provider = AdenSyncProvider(client=_client)
+        _storage = AdenCachedStorage(
+            local_storage=_local_storage,
+            aden_provider=_provider,
+        )
+        _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True)
+        _synced = _provider.sync_all(_cred_store)
+        logger.info("Synced %d credentials from Aden", _synced)
+    except Exception as e:
+        logger.warning("Aden sync unavailable: %s", e)
+        _cred_store = CredentialStore(storage=_local_storage)
+else:
+    logger.info("ADEN_API_KEY not set, using local credential storage")
+    _cred_store = CredentialStore(storage=_local_storage)
+
+CREDENTIALS = CredentialStoreAdapter(_cred_store)
+
+# Debug: log which credentials resolved
+for _name in ["brave_search", "hubspot", "anthropic"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# --- web_search (Brave Search API) ---
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results to return (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={"X-Subscription-Token": api_key, "Accept": "application/json"},
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+# --- web_scrape (httpx + BeautifulSoup, no playwright for sync compat) ---
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS)
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {"url": url, "title": title, "content": text, "length": len(text)}
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+# --- HubSpot CRM tools (optional, requires HUBSPOT_ACCESS_TOKEN) ---
+
+_HUBSPOT_API = "https://api.hubapi.com"
+
+
+def _hubspot_headers() -> dict | None:
+    token = CREDENTIALS.get("hubspot")
+    if token:
+        logger.debug("HubSpot token: %s...%s (len=%d)", token[:8], token[-4:], len(token))
+    else:
+        logger.debug("HubSpot token: not found")
+    if not token:
+        return None
+    return {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+
+def _exec_hubspot_search(inputs: dict) -> dict:
+    headers = _hubspot_headers()
+    if not headers:
+        return {"error": "HUBSPOT_ACCESS_TOKEN not set"}
+    object_type = inputs.get("object_type", "contacts")
+    query = inputs.get("query", "")
+    limit = min(inputs.get("limit", 10), 100)
+    body: dict = {"limit": limit}
+    if query:
+        body["query"] = query
+    try:
+        resp = httpx.post(
+            f"{_HUBSPOT_API}/crm/v3/objects/{object_type}/search",
+            headers=headers,
+            json=body,
+            timeout=30.0,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HubSpot API HTTP {resp.status_code}: {resp.text[:200]}"}
+        return resp.json()
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"HubSpot error: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="hubspot_search",
+    tool=Tool(
+        name="hubspot_search",
+        description=(
+            "Search HubSpot CRM objects (contacts, companies, or deals). "
+            "Returns matching records with their properties."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "object_type": {
+                    "type": "string",
+                    "description": "CRM object type: 'contacts', 'companies', or 'deals'",
+                },
+                "query": {
+                    "type": "string",
+                    "description": "Search query (name, email, domain, etc.)",
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Max results (1-100, default 10)",
+                },
+            },
+            "required": ["object_type"],
+        },
+    ),
+    executor=lambda inputs: _exec_hubspot_search(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page (embedded)
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>EventLoopNode Live Demo</title>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117; color: #c9d1d9;
+    height: 100vh; display: flex; flex-direction: column;
+  }
+  header {
+    background: #161b22; padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex; align-items: center; gap: 16px;
+  }
+  header h1 { font-size: 16px; color: #58a6ff; font-weight: 600; }
+  .status {
+    font-size: 12px; padding: 3px 10px; border-radius: 12px;
+    background: #21262d; color: #8b949e;
+  }
+  .status.running { background: #1a4b2e; color: #3fb950; }
+  .status.done { background: #1a3a5c; color: #58a6ff; }
+  .status.error { background: #4b1a1a; color: #f85149; }
+  .chat { flex: 1; overflow-y: auto; padding: 16px; }
+  .msg {
+    margin: 8px 0; padding: 10px 14px; border-radius: 8px;
+    line-height: 1.6; white-space: pre-wrap; word-wrap: break-word;
+  }
+  .msg.user { background: #1a3a5c; color: #58a6ff; }
+  .msg.assistant { background: #161b22; color: #c9d1d9; }
+  .msg.event {
+    background: transparent; color: #8b949e; font-size: 11px;
+    padding: 4px 14px; border-left: 3px solid #30363d;
+  }
+  .msg.event.loop { border-left-color: #58a6ff; }
+  .msg.event.tool { border-left-color: #d29922; }
+  .msg.event.stall { border-left-color: #f85149; }
+  .input-bar {
+    padding: 12px 16px; background: #161b22;
+    border-top: 1px solid #30363d; display: flex; gap: 8px;
+  }
+  .input-bar input {
+    flex: 1; background: #0d1117; border: 1px solid #30363d;
+    color: #c9d1d9; padding: 8px 12px; border-radius: 6px;
+    font-family: inherit; font-size: 14px; outline: none;
+  }
+  .input-bar input:focus { border-color: #58a6ff; }
+  .input-bar button {
+    background: #238636; color: #fff; border: none;
+    padding: 8px 20px; border-radius: 6px; cursor: pointer;
+    font-family: inherit; font-weight: 600;
+  }
+  .input-bar button:hover { background: #2ea043; }
+  .input-bar button:disabled {
+    background: #21262d; color: #484f58; cursor: not-allowed;
+  }
+  .input-bar button.clear { background: #da3633; }
+  .input-bar button.clear:hover { background: #f85149; }
+</style>
+</head>
+<body>
+  <header>
+    <h1>EventLoopNode Live</h1>
+    <span id="status" class="status">Idle</span>
+    <span id="iter" class="status" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Ask anything..." autofocus />
+    <button id="go" onclick="run()">Send</button>
+    <button class="clear"
+            onclick="clearConversation()">Clear</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+const chat = document.getElementById('chat');
+const status = document.getElementById('status');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setStatus(text, cls) {
+  status.textContent = text;
+  status.className = 'status ' + cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setStatus('Error', 'error'); };
+  ws.onclose = () => {
+    setStatus('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'ready') {
+    setStatus('Ready', 'done');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg('RESULT  ' + evt.tool_name + ': ' + preview,
+           'event ' + cls);
+    currentAssistantEl = addMsg('', 'assistant');
+  }
+  else if (evt.type === 'result') {
+    setStatus('Session ended', evt.success ? 'done' : 'error');
+    if (evt.error) addMsg('ERROR  ' + evt.error, 'event stall');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+  else if (evt.type === 'cleared') {
+    chat.innerHTML = '';
+    iterCount = 0;
+    iterEl.textContent = 'Step 0';
+    iterEl.style.display = 'none';
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  setStatus('Running', 'running');
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+function clearConversation() {
+  if (ws && ws.readyState === 1) {
+    ws.send(JSON.stringify({ command: 'clear' }));
+  }
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Persistent WebSocket: long-lived EventLoopNode with client_facing blocking."""
+    global STORE
+
+    # -- Event forwarding (WebSocket ← EventBus) ----------------------------
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    # -- Per-connection state -----------------------------------------------
+    node = None
+    loop_task = None
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    node_spec = NodeSpec(
+        id="assistant",
+        name="Chat Assistant",
+        description="A conversational assistant that remembers context across messages",
+        node_type="event_loop",
+        client_facing=True,
+        system_prompt=(
+            "You are a helpful assistant with access to tools. "
+            "You can search the web, scrape webpages, and query HubSpot CRM. "
+            "Use tools when the user asks for current information or external data. "
+            "You have full conversation history, so you can reference previous messages."
+        ),
+    )
+
+    # -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus ---
+    async def on_input_requested(event):
+        try:
+            await websocket.send(json.dumps({"type": "ready"}))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[EventType.CLIENT_INPUT_REQUESTED],
+        handler=on_input_requested,
+    )
+
+    async def start_loop(first_message: str):
+        """Create an EventLoopNode and run it as a background task."""
+        nonlocal node, loop_task
+
+        memory = SharedMemory()
+        ctx = NodeContext(
+            runtime=RUNTIME,
+            node_id="assistant",
+            node_spec=node_spec,
+            memory=memory,
+            input_data={},
+            llm=LLM,
+            available_tools=tools,
+        )
+        node = EventLoopNode(
+            event_bus=bus,
+            config=LoopConfig(max_iterations=10_000, max_history_tokens=32_000),
+            conversation_store=STORE,
+            tool_executor=tool_executor,
+        )
+        await node.inject_event(first_message)
+
+        async def _run():
+            try:
+                result = await node.execute(ctx)
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": result.success,
+                                "output": result.output,
+                                "error": result.error,
+                                "tokens": result.tokens_used,
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+                logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}")
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("Loop stopped: WebSocket closed")
+            except Exception as e:
+                logger.exception("Loop error")
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": False,
+                                "error": str(e),
+                                "output": {},
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+
+        loop_task = asyncio.create_task(_run())
+
+    async def stop_loop():
+        """Signal the node and wait for the loop task to finish."""
+        nonlocal node, loop_task
+        if loop_task and not loop_task.done():
+            if node:
+                node.signal_shutdown()
+            try:
+                await asyncio.wait_for(loop_task, timeout=5.0)
+            except (TimeoutError, asyncio.CancelledError):
+                loop_task.cancel()
+        node = None
+        loop_task = None
+
+    # -- Message loop (runs for the lifetime of this WebSocket) -------------
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            # Clear command
+            if msg.get("command") == "clear":
+                import shutil
+
+                await stop_loop()
+                await STORE.close()
+                conv_dir = STORE_DIR / "conversation"
+                if conv_dir.exists():
+                    shutil.rmtree(conv_dir)
+                STORE = FileConversationStore(conv_dir)
+                await websocket.send(json.dumps({"type": "cleared"}))
+                logger.info("Conversation cleared")
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            if node is None:
+                # First message — spin up the loop
+                logger.info(f"Starting persistent loop: {topic}")
+                await start_loop(topic)
+            else:
+                # Subsequent message — inject into the running loop
+                logger.info(f"Injecting message: {topic}")
+                await node.inject_event(topic)
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+    finally:
+        await stop_loop()
+        logger.info("WebSocket closed, loop stopped")
+
+
+# -------------------------------------------------------------------------
+# HTTP handler for serving the HTML page
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None  # let websockets handle the upgrade
+    # Serve the HTML page for any other path
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8765
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Demo running at http://localhost:{port}")
+        logger.info("Open in your browser and enter a topic to research.")
+        await asyncio.Future()  # run forever
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,930 @@
+#!/usr/bin/env python3
+"""
+Two-Node ContextHandoff Demo
+
+Demonstrates ContextHandoff between two EventLoopNode instances:
+  Node A (Researcher) → ContextHandoff → Node B (Analyst)
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams both nodes to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/handoff_demo.py
+
+    Then open http://localhost:8766 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.context_handoff import ContextHandoff  # noqa: E402
+from framework.graph.conversation import NodeConversation  # noqa: E402
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("handoff_demo")
+
+# -------------------------------------------------------------------------
+# Persistent state
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_"))
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Credentials
+# -------------------------------------------------------------------------
+
+# Composite credential store: encrypted files (primary) + env vars (fallback)
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_composite = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite))
+
+for _name in ["brave_search", "hubspot"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# -------------------------------------------------------------------------
+# Tool Registry — web_search + web_scrape for Node A (Researcher)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={
+            "X-Subscription-Token": api_key,
+            "Accept": "application/json",
+        },
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(
+            url,
+            timeout=30.0,
+            follow_redirects=True,
+            headers=_SCRAPE_HEADERS,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {
+            "url": url,
+            "title": title,
+            "content": text,
+            "length": len(text),
+        }
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+# -------------------------------------------------------------------------
+# Node Specs
+# -------------------------------------------------------------------------
+
+RESEARCHER_SPEC = NodeSpec(
+    id="researcher",
+    name="Researcher",
+    description="Researches a topic using web search and scraping tools",
+    node_type="event_loop",
+    input_keys=["topic"],
+    output_keys=["research_summary"],
+    system_prompt=(
+        "You are a thorough research assistant. Your job is to research "
+        "the given topic using the web_search and web_scrape tools.\n\n"
+        "1. Search for relevant information on the topic\n"
+        "2. Scrape 1-2 of the most promising URLs for details\n"
+        "3. Synthesize your findings into a comprehensive summary\n"
+        "4. Use set_output with key='research_summary' to save your "
+        "findings\n\n"
+        "Be thorough but efficient. Aim for 2-4 search/scrape calls, "
+        "then summarize and set_output."
+    ),
+)
+
+ANALYST_SPEC = NodeSpec(
+    id="analyst",
+    name="Analyst",
+    description="Analyzes research findings and provides insights",
+    node_type="event_loop",
+    input_keys=["context"],
+    output_keys=["analysis"],
+    system_prompt=(
+        "You are a strategic analyst. You receive research findings from "
+        "a previous researcher and must:\n\n"
+        "1. Identify key themes and patterns\n"
+        "2. Assess the reliability and significance of the findings\n"
+        "3. Provide actionable insights and recommendations\n"
+        "4. Use set_output with key='analysis' to save your analysis\n\n"
+        "Be concise but insightful. Focus on what matters most."
+    ),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>ContextHandoff Demo</title>
+<style>
+  * {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+  }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117;
+    color: #c9d1d9;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+  }
+  header {
+    background: #161b22;
+    padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex;
+    align-items: center;
+    gap: 16px;
+  }
+  header h1 {
+    font-size: 16px;
+    color: #58a6ff;
+    font-weight: 600;
+  }
+  .badge {
+    font-size: 12px;
+    padding: 3px 10px;
+    border-radius: 12px;
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.researcher {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .badge.analyst {
+    background: #1a4b2e;
+    color: #3fb950;
+  }
+  .badge.handoff {
+    background: #3d1f00;
+    color: #d29922;
+  }
+  .badge.done {
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.error {
+    background: #4b1a1a;
+    color: #f85149;
+  }
+  .chat {
+    flex: 1;
+    overflow-y: auto;
+    padding: 16px;
+  }
+  .msg {
+    margin: 8px 0;
+    padding: 10px 14px;
+    border-radius: 8px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+  }
+  .msg.user {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .msg.assistant {
+    background: #161b22;
+    color: #c9d1d9;
+  }
+  .msg.assistant.analyst-msg {
+    border-left: 3px solid #3fb950;
+  }
+  .msg.event {
+    background: transparent;
+    color: #8b949e;
+    font-size: 11px;
+    padding: 4px 14px;
+    border-left: 3px solid #30363d;
+  }
+  .msg.event.loop {
+    border-left-color: #58a6ff;
+  }
+  .msg.event.tool {
+    border-left-color: #d29922;
+  }
+  .msg.event.stall {
+    border-left-color: #f85149;
+  }
+  .handoff-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #1c1200;
+    border: 1px solid #d29922;
+    border-radius: 8px;
+    text-align: center;
+  }
+  .handoff-banner h3 {
+    color: #d29922;
+    font-size: 14px;
+    margin-bottom: 8px;
+  }
+  .handoff-banner p, .result-banner p {
+    color: #8b949e;
+    font-size: 12px;
+    line-height: 1.5;
+    max-height: 200px;
+    overflow-y: auto;
+    white-space: pre-wrap;
+    text-align: left;
+  }
+  .result-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #0a2614;
+    border: 1px solid #3fb950;
+    border-radius: 8px;
+  }
+  .result-banner h3 {
+    color: #3fb950;
+    font-size: 14px;
+    margin-bottom: 8px;
+    text-align: center;
+  }
+  .result-banner .label {
+    color: #58a6ff;
+    font-size: 11px;
+    font-weight: 600;
+    margin-top: 10px;
+    margin-bottom: 2px;
+  }
+  .result-banner .tokens {
+    color: #484f58;
+    font-size: 11px;
+    text-align: center;
+    margin-top: 10px;
+  }
+  .input-bar {
+    padding: 12px 16px;
+    background: #161b22;
+    border-top: 1px solid #30363d;
+    display: flex;
+    gap: 8px;
+  }
+  .input-bar input {
+    flex: 1;
+    background: #0d1117;
+    border: 1px solid #30363d;
+    color: #c9d1d9;
+    padding: 8px 12px;
+    border-radius: 6px;
+    font-family: inherit;
+    font-size: 14px;
+    outline: none;
+  }
+  .input-bar input:focus {
+    border-color: #58a6ff;
+  }
+  .input-bar button {
+    background: #238636;
+    color: #fff;
+    border: none;
+    padding: 8px 20px;
+    border-radius: 6px;
+    cursor: pointer;
+    font-family: inherit;
+    font-weight: 600;
+  }
+  .input-bar button:hover {
+    background: #2ea043;
+  }
+  .input-bar button:disabled {
+    background: #21262d;
+    color: #484f58;
+    cursor: not-allowed;
+  }
+</style>
+</head>
+<body>
+  <header>
+    <h1>ContextHandoff Demo</h1>
+    <span id="phase" class="badge">Idle</span>
+    <span id="iter" class="badge" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Enter a research topic..." autofocus />
+    <button id="go" onclick="run()">Research</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+let currentPhase = 'idle';
+const chat = document.getElementById('chat');
+const phase = document.getElementById('phase');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setPhase(text, cls) {
+  phase.textContent = text;
+  phase.className = 'badge ' + cls;
+  currentPhase = cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function addHandoffBanner(summary) {
+  const banner = document.createElement('div');
+  banner.className = 'handoff-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Context Handoff: Researcher -> Analyst';
+  const p = document.createElement('p');
+  p.textContent = summary || 'Passing research context...';
+  banner.appendChild(h3);
+  banner.appendChild(p);
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function addResultBanner(researcher, analyst, tokens) {
+  const banner = document.createElement('div');
+  banner.className = 'result-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Pipeline Complete';
+  banner.appendChild(h3);
+
+  if (researcher && researcher.research_summary) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'RESEARCH SUMMARY';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = researcher.research_summary;
+    banner.appendChild(p);
+  }
+
+  if (analyst && analyst.analysis) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'ANALYSIS';
+    lbl.style.color = '#3fb950';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = analyst.analysis;
+    banner.appendChild(p);
+  }
+
+  if (tokens) {
+    const t = document.createElement('div');
+    t.className = 'tokens';
+    t.textContent = 'Total tokens: ' + tokens.toLocaleString();
+    banner.appendChild(t);
+  }
+
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setPhase('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setPhase('Error', 'error'); };
+  ws.onclose = () => {
+    setPhase('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'phase') {
+    if (evt.phase === 'researcher') {
+      setPhase('Researcher', 'researcher');
+    } else if (evt.phase === 'handoff') {
+      setPhase('Handoff', 'handoff');
+    } else if (evt.phase === 'analyst') {
+      setPhase('Analyst', 'analyst');
+    }
+    iterCount = 0;
+    iterEl.style.display = 'none';
+  }
+  else if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg(
+      'RESULT  ' + evt.tool_name + ': ' + preview,
+      'event ' + cls
+    );
+    var assistCls = currentPhase === 'analyst'
+      ? 'assistant analyst-msg' : 'assistant';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'handoff_context') {
+    addHandoffBanner(evt.summary);
+    var assistCls = 'assistant analyst-msg';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'node_result') {
+    if (evt.node_id === 'researcher') {
+      if (currentAssistantEl
+          && !currentAssistantEl.textContent) {
+        currentAssistantEl.remove();
+      }
+    }
+  }
+  else if (evt.type === 'done') {
+    setPhase('Done', 'done');
+    iterEl.style.display = 'none';
+    if (currentAssistantEl
+        && !currentAssistantEl.textContent) {
+      currentAssistantEl.remove();
+    }
+    currentAssistantEl = null;
+    addResultBanner(
+      evt.researcher, evt.analyst, evt.total_tokens
+    );
+    goBtn.disabled = false;
+    inputEl.placeholder = 'Enter another topic...';
+  }
+  else if (evt.type === 'error') {
+    setPhase('Error', 'error');
+    addMsg('ERROR  ' + evt.message, 'event stall');
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  chat.innerHTML = '';
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler — sequential Node A → Handoff → Node B
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Run the two-node handoff pipeline per user message."""
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            logger.info(f"Starting handoff pipeline for: {topic}")
+
+            try:
+                await _run_pipeline(websocket, topic)
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("WebSocket closed during pipeline")
+                return
+            except Exception as e:
+                logger.exception("Pipeline error")
+                try:
+                    await websocket.send(json.dumps({"type": "error", "message": str(e)}))
+                except Exception:
+                    pass
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+
+
+async def _run_pipeline(websocket, topic: str):
+    """Execute: Node A (research) → ContextHandoff → Node B (analysis)."""
+    import shutil
+
+    # Fresh stores for each run
+    run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR))
+    store_a = FileConversationStore(run_dir / "node_a")
+    store_b = FileConversationStore(run_dir / "node_b")
+
+    # Shared event bus
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    # ---- Phase 1: Researcher ------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "researcher"}))
+
+    node_a = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge: accept when output_keys filled
+        config=LoopConfig(
+            max_iterations=20,
+            max_tool_calls_per_turn=10,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_a,
+        tool_executor=tool_executor,
+    )
+
+    ctx_a = NodeContext(
+        runtime=RUNTIME,
+        node_id="researcher",
+        node_spec=RESEARCHER_SPEC,
+        memory=SharedMemory(),
+        input_data={"topic": topic},
+        llm=LLM,
+        available_tools=tools,
+    )
+
+    result_a = await node_a.execute(ctx_a)
+    logger.info(
+        "Researcher done: success=%s, tokens=%s",
+        result_a.success,
+        result_a.tokens_used,
+    )
+
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "node_result",
+                "node_id": "researcher",
+                "success": result_a.success,
+                "output": result_a.output,
+            }
+        )
+    )
+
+    if not result_a.success:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": f"Researcher failed: {result_a.error}",
+                }
+            )
+        )
+        return
+
+    # ---- Phase 2: Context Handoff -------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "handoff"}))
+
+    # Restore the researcher's conversation from store
+    conversation_a = await NodeConversation.restore(store_a)
+    if conversation_a is None:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": "Failed to restore researcher conversation",
+                }
+            )
+        )
+        return
+
+    handoff_engine = ContextHandoff(llm=LLM)
+    handoff_context = handoff_engine.summarize_conversation(
+        conversation=conversation_a,
+        node_id="researcher",
+        output_keys=["research_summary"],
+    )
+
+    formatted_handoff = ContextHandoff.format_as_input(handoff_context)
+    logger.info(
+        "Handoff: %d turns, ~%d tokens, keys=%s",
+        handoff_context.turn_count,
+        handoff_context.total_tokens_used,
+        list(handoff_context.key_outputs.keys()),
+    )
+
+    # Send handoff context to browser
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "handoff_context",
+                "summary": handoff_context.summary[:500],
+                "turn_count": handoff_context.turn_count,
+                "tokens": handoff_context.total_tokens_used,
+                "key_outputs": handoff_context.key_outputs,
+            }
+        )
+    )
+
+    # ---- Phase 3: Analyst ---------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "analyst"}))
+
+    node_b = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge
+        config=LoopConfig(
+            max_iterations=10,
+            max_tool_calls_per_turn=5,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_b,
+    )
+
+    ctx_b = NodeContext(
+        runtime=RUNTIME,
+        node_id="analyst",
+        node_spec=ANALYST_SPEC,
+        memory=SharedMemory(),
+        input_data={"context": formatted_handoff},
+        llm=LLM,
+        available_tools=[],
+    )
+
+    result_b = await node_b.execute(ctx_b)
+    logger.info(
+        "Analyst done: success=%s, tokens=%s",
+        result_b.success,
+        result_b.tokens_used,
+    )
+
+    # ---- Done ---------------------------------------------------------------
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "done",
+                "researcher": result_a.output,
+                "analyst": result_b.output,
+                "total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)),
+            }
+        )
+    )
+
+    # Clean up temp stores
+    try:
+        shutil.rmtree(run_dir)
+    except Exception:
+        pass
+
+
+# -------------------------------------------------------------------------
+# HTTP handler
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8766
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Handoff demo at http://localhost:{port}")
+        logger.info("Enter a research topic to start the pipeline.")
+        await asyncio.Future()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,123 @@
+"""
+Minimal Manual Agent Example
+----------------------------
+This example demonstrates how to build and run an agent programmatically
+without using the Claude Code CLI or external LLM APIs.
+
+It uses 'function' nodes to define logic in pure Python, making it perfect
+for understanding the core runtime loop:
+Setup -> Graph definition -> Execution -> Result
+
+Run with:
+    uv run python core/examples/manual_agent.py
+"""
+
+import asyncio
+
+from framework.graph import EdgeCondition, EdgeSpec, Goal, GraphSpec, NodeSpec
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+
+# 1. Define Node Logic (Pure Python Functions)
+def greet(name: str) -> str:
+    """Generate a simple greeting."""
+    return f"Hello, {name}!"
+
+
+def uppercase(greeting: str) -> str:
+    """Convert text to uppercase."""
+    return greeting.upper()
+
+
+async def main():
+    print("🚀 Setting up Manual Agent...")
+
+    # 2. Define the Goal
+    # Every agent needs a goal with success criteria
+    goal = Goal(
+        id="greet-user",
+        name="Greet User",
+        description="Generate a friendly uppercase greeting",
+        success_criteria=[
+            {
+                "id": "greeting_generated",
+                "description": "Greeting produced",
+                "metric": "custom",
+                "target": "any",
+            }
+        ],
+    )
+
+    # 3. Define Nodes
+    # Nodes describe steps in the process
+    node1 = NodeSpec(
+        id="greeter",
+        name="Greeter",
+        description="Generates a simple greeting",
+        node_type="function",
+        function="greet",  # Matches the registered function name
+        input_keys=["name"],
+        output_keys=["greeting"],
+    )
+
+    node2 = NodeSpec(
+        id="uppercaser",
+        name="Uppercaser",
+        description="Converts greeting to uppercase",
+        node_type="function",
+        function="uppercase",
+        input_keys=["greeting"],
+        output_keys=["final_greeting"],
+    )
+
+    # 4. Define Edges
+    # Edges define the flow between nodes
+    edge1 = EdgeSpec(
+        id="greet-to-upper",
+        source="greeter",
+        target="uppercaser",
+        condition=EdgeCondition.ON_SUCCESS,
+    )
+
+    # 5. Create Graph
+    # The graph works like a blueprint connecting nodes and edges
+    graph = GraphSpec(
+        id="greeting-agent",
+        goal_id="greet-user",
+        entry_node="greeter",
+        terminal_nodes=["uppercaser"],
+        nodes=[node1, node2],
+        edges=[edge1],
+    )
+
+    # 6. Initialize Runtime & Executor
+    # Runtime handles state/memory; Executor runs the graph
+    from pathlib import Path
+
+    runtime = Runtime(storage_path=Path("./agent_logs"))
+    executor = GraphExecutor(runtime=runtime)
+
+    # 7. Register Function Implementations
+    # Connect string names in NodeSpecs to actual Python functions
+    executor.register_function("greeter", greet)
+    executor.register_function("uppercaser", uppercase)
+
+    # 8. Execute Agent
+    print("▶ Executing agent with input: name='Alice'...")
+
+    result = await executor.execute(graph=graph, goal=goal, input_data={"name": "Alice"})
+
+    # 9. Verify Results
+    if result.success:
+        print("\n✅ Success!")
+        print(f"Path taken: {' -> '.join(result.path)}")
+        print(f"Final output: {result.output.get('final_greeting')}")
+    else:
+        print(f"\n❌ Failed: {result.error}")
+
+
+if __name__ == "__main__":
+    # Optional: Enable logging to see internal decision flow
+    # logging.basicConfig(level=logging.INFO)
+    asyncio.run(main())
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""
+Example: Integrating MCP Servers with the Core Framework
+
+This example demonstrates how to:
+1. Register MCP servers programmatically
+2. Use MCP tools in agents
+3. Load MCP servers from configuration files
+"""
+
+import asyncio
+from pathlib import Path
+
+from framework.runner.runner import AgentRunner
+
+
+async def example_1_programmatic_registration():
+    """Example 1: Register MCP server programmatically"""
+    print("\n=== Example 1: Programmatic MCP Server Registration ===\n")
+
+    # Load an existing agent
+    runner = AgentRunner.load("exports/task-planner")
+
+    # Register tools MCP server via STDIO
+    num_tools = runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+    )
+
+    print(f"Registered {num_tools} tools from tools MCP server")
+
+    # List all available tools
+    tools = runner._tool_registry.get_tools()
+    print(f"\nAvailable tools: {list(tools.keys())}")
+
+    # Run the agent with MCP tools available
+    result = await runner.run(
+        {"objective": "Search for 'Claude AI' and summarize the top 3 results"}
+    )
+
+    print(f"\nAgent result: {result}")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def example_2_http_transport():
+    """Example 2: Connect to MCP server via HTTP"""
+    print("\n=== Example 2: HTTP MCP Server Connection ===\n")
+
+    # First, start the tools MCP server in HTTP mode:
+    # cd tools && python mcp_server.py --port 4001
+
+    runner = AgentRunner.load("exports/task-planner")
+
+    # Register tools via HTTP
+    num_tools = runner.register_mcp_server(
+        name="tools-http",
+        transport="http",
+        url="http://localhost:4001",
+    )
+
+    print(f"Registered {num_tools} tools from HTTP MCP server")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def example_3_config_file():
+    """Example 3: Load MCP servers from configuration file"""
+    print("\n=== Example 3: Load from Configuration File ===\n")
+
+    # Create a test agent folder with mcp_servers.json
+    test_agent_path = Path("exports/task-planner")
+
+    # Copy example config (in practice, you'd place this in your agent folder)
+    import shutil
+
+    shutil.copy("examples/mcp_servers.json", test_agent_path / "mcp_servers.json")
+
+    # Load agent - MCP servers will be auto-discovered
+    runner = AgentRunner.load(test_agent_path)
+
+    # Tools are automatically available
+    tools = runner._tool_registry.get_tools()
+    print(f"Available tools: {list(tools.keys())}")
+
+    # Cleanup
+    runner.cleanup()
+
+    # Clean up the test config
+    (test_agent_path / "mcp_servers.json").unlink()
+
+
+async def example_4_custom_agent_with_mcp_tools():
+    """Example 4: Build custom agent that uses MCP tools"""
+    print("\n=== Example 4: Custom Agent with MCP Tools ===\n")
+
+    from framework.builder.workflow import GraphBuilder
+
+    # Create a workflow builder
+    builder = GraphBuilder()
+
+    # Define goal
+    builder.set_goal(
+        goal_id="web-researcher",
+        name="Web Research Agent",
+        description="Search the web and summarize findings",
+    )
+
+    # Add success criteria
+    builder.add_success_criterion(
+        "search-results", "Successfully retrieve at least 3 web search results"
+    )
+    builder.add_success_criterion("summary", "Provide a clear, concise summary of the findings")
+
+    # Add nodes that will use MCP tools
+    builder.add_node(
+        node_id="web-searcher",
+        name="Web Search",
+        description="Search the web for information",
+        node_type="llm_tool_use",
+        system_prompt="Search for {query} and return the top results. Use the web_search tool.",
+        tools=["web_search"],  # This tool comes from tools MCP server
+        input_keys=["query"],
+        output_keys=["search_results"],
+    )
+
+    builder.add_node(
+        node_id="summarizer",
+        name="Summarize Results",
+        description="Summarize the search results",
+        node_type="llm_generate",
+        system_prompt="Summarize the following search results in 2-3 sentences: {search_results}",
+        input_keys=["search_results"],
+        output_keys=["summary"],
+    )
+
+    # Connect nodes
+    builder.add_edge("web-searcher", "summarizer")
+
+    # Set entry point
+    builder.set_entry("web-searcher")
+    builder.set_terminal("summarizer")
+
+    # Export the agent
+    export_path = Path("exports/web-research-agent")
+    export_path.mkdir(parents=True, exist_ok=True)
+    builder.export(export_path)
+
+    # Load and register MCP server
+    runner = AgentRunner.load(export_path)
+    runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+    )
+
+    # Run the agent
+    result = await runner.run({"query": "latest AI breakthroughs 2026"})
+
+    print(f"\nAgent completed with result:\n{result}")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def main():
+    """Run all examples"""
+    print("=" * 60)
+    print("MCP Integration Examples")
+    print("=" * 60)
+
+    try:
+        # Run examples
+        await example_1_programmatic_registration()
+        # await example_2_http_transport()  # Requires HTTP server running
+        # await example_3_config_file()
+        # await example_4_custom_agent_with_mcp_tools()
+
+    except Exception as e:
+        print(f"\nError running example: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,22 @@
+{
+  "servers": [
+    {
+      "name": "tools",
+      "description": "Aden tools including web search, file operations, and PDF reading",
+      "transport": "stdio",
+      "command": "uv",
+      "args": ["run", "python", "mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "env": {
+        "BRAVE_SEARCH_API_KEY": "${BRAVE_SEARCH_API_KEY}"
+      }
+    },
+    {
+      "name": "tools-http",
+      "description": "Aden tools via HTTP (for Docker deployments)",
+      "transport": "http",
+      "url": "http://localhost:4001",
+      "headers": {}
+    }
+  ]
+}
@@ -0,0 +1,70 @@
+"""
+Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
+
+The runtime is designed around DECISIONS, not just actions. Every significant
+choice the agent makes is captured with:
+- What it was trying to do (intent)
+- What options it considered
+- What it chose and why
+- What happened as a result
+- Whether that was good or bad (evaluated post-hoc)
+
+This gives the Builder LLM the information it needs to improve agent behavior.
+
+## Testing Framework
+
+The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
+- Generate tests from Goal success_criteria and constraints
+- Mandatory user approval before tests are stored
+- Parallel test execution with error categorization
+- Debug tools with fix suggestions
+
+See `framework.testing` for details.
+"""
+
+from framework.builder.query import BuilderQuery
+from framework.llm import AnthropicProvider, LLMProvider
+from framework.runner import AgentOrchestrator, AgentRunner
+from framework.runtime.core import Runtime
+from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
+from framework.schemas.run import Problem, Run, RunSummary
+
+# Testing framework
+from framework.testing import (
+    ApprovalStatus,
+    DebugTool,
+    ErrorCategory,
+    Test,
+    TestResult,
+    TestStorage,
+    TestSuiteResult,
+)
+
+__all__ = [
+    # Schemas
+    "Decision",
+    "Option",
+    "Outcome",
+    "DecisionEvaluation",
+    "Run",
+    "RunSummary",
+    "Problem",
+    # Runtime
+    "Runtime",
+    # Builder
+    "BuilderQuery",
+    # LLM
+    "LLMProvider",
+    "AnthropicProvider",
+    # Runner
+    "AgentRunner",
+    "AgentOrchestrator",
+    # Testing
+    "Test",
+    "TestResult",
+    "TestSuiteResult",
+    "TestStorage",
+    "ApprovalStatus",
+    "ErrorCategory",
+    "DebugTool",
+]
@@ -0,0 +1,6 @@
+"""Allow running as ``python -m framework``, which powers the ``hive`` console entry point."""
+
+from framework.cli import main
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,21 @@
+"""Builder interface for analyzing and building agents."""
+
+from framework.builder.query import BuilderQuery
+from framework.builder.workflow import (
+    BuildPhase,
+    BuildSession,
+    GraphBuilder,
+    TestCase,
+    TestResult,
+    ValidationResult,
+)
+
+__all__ = [
+    "BuilderQuery",
+    "GraphBuilder",
+    "BuildSession",
+    "BuildPhase",
+    "ValidationResult",
+    "TestCase",
+    "TestResult",
+]
@@ -0,0 +1,501 @@
+"""
+Builder Query Interface - How I (Builder) analyze agent runs.
+
+This is designed around the questions I need to answer:
+1. What happened? (summaries, narratives)
+2. Why did it fail? (failure analysis, decision traces)
+3. What patterns emerge? (across runs, across nodes)
+4. What should we change? (suggestions)
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+from framework.schemas.decision import Decision
+from framework.schemas.run import Run, RunStatus, RunSummary
+from framework.storage.backend import FileStorage
+
+
+class FailureAnalysis:
+    """Structured analysis of why a run failed."""
+
+    def __init__(
+        self,
+        run_id: str,
+        failure_point: str,
+        root_cause: str,
+        decision_chain: list[str],
+        problems: list[str],
+        suggestions: list[str],
+    ):
+        self.run_id = run_id
+        self.failure_point = failure_point
+        self.root_cause = root_cause
+        self.decision_chain = decision_chain
+        self.problems = problems
+        self.suggestions = suggestions
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "run_id": self.run_id,
+            "failure_point": self.failure_point,
+            "root_cause": self.root_cause,
+            "decision_chain": self.decision_chain,
+            "problems": self.problems,
+            "suggestions": self.suggestions,
+        }
+
+    def __str__(self) -> str:
+        lines = [
+            f"=== Failure Analysis for {self.run_id} ===",
+            "",
+            f"Failure Point: {self.failure_point}",
+            f"Root Cause: {self.root_cause}",
+            "",
+            "Decision Chain Leading to Failure:",
+        ]
+        for i, dec in enumerate(self.decision_chain, 1):
+            lines.append(f"  {i}. {dec}")
+
+        if self.problems:
+            lines.append("")
+            lines.append("Reported Problems:")
+            for prob in self.problems:
+                lines.append(f"  - {prob}")
+
+        if self.suggestions:
+            lines.append("")
+            lines.append("Suggestions:")
+            for sug in self.suggestions:
+                lines.append(f"  → {sug}")
+
+        return "\n".join(lines)
+
+
+class PatternAnalysis:
+    """Patterns detected across multiple runs."""
+
+    def __init__(
+        self,
+        goal_id: str,
+        run_count: int,
+        success_rate: float,
+        common_failures: list[tuple[str, int]],
+        problematic_nodes: list[tuple[str, float]],
+        decision_patterns: dict[str, Any],
+    ):
+        self.goal_id = goal_id
+        self.run_count = run_count
+        self.success_rate = success_rate
+        self.common_failures = common_failures
+        self.problematic_nodes = problematic_nodes
+        self.decision_patterns = decision_patterns
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "goal_id": self.goal_id,
+            "run_count": self.run_count,
+            "success_rate": self.success_rate,
+            "common_failures": self.common_failures,
+            "problematic_nodes": self.problematic_nodes,
+            "decision_patterns": self.decision_patterns,
+        }
+
+    def __str__(self) -> str:
+        lines = [
+            f"=== Pattern Analysis for Goal {self.goal_id} ===",
+            "",
+            f"Runs Analyzed: {self.run_count}",
+            f"Success Rate: {self.success_rate:.1%}",
+        ]
+
+        if self.common_failures:
+            lines.append("")
+            lines.append("Common Failures:")
+            for failure, count in self.common_failures:
+                lines.append(f"  - {failure} ({count} occurrences)")
+
+        if self.problematic_nodes:
+            lines.append("")
+            lines.append("Problematic Nodes (failure rate):")
+            for node, rate in self.problematic_nodes:
+                lines.append(f"  - {node}: {rate:.1%} failure rate")
+
+        return "\n".join(lines)
+
+
+class BuilderQuery:
+    """
+    The interface I (Builder) use to understand what agents are doing.
+
+    This is optimized for the questions I need to answer when analyzing
+    agent behavior and deciding what to improve.
+    """
+
+    def __init__(self, storage_path: str | Path):
+        self.storage = FileStorage(storage_path)
+
+    # === WHAT HAPPENED? ===
+
+    def get_run_summary(self, run_id: str) -> RunSummary | None:
+        """Get a quick summary of a run."""
+        return self.storage.load_summary(run_id)
+
+    def get_full_run(self, run_id: str) -> Run | None:
+        """Get the complete run with all decisions."""
+        return self.storage.load_run(run_id)
+
+    def list_runs_for_goal(self, goal_id: str) -> list[RunSummary]:
+        """Get summaries of all runs for a goal."""
+        run_ids = self.storage.get_runs_by_goal(goal_id)
+        summaries = []
+        for run_id in run_ids:
+            summary = self.storage.load_summary(run_id)
+            if summary:
+                summaries.append(summary)
+        return summaries
+
+    def get_recent_failures(self, limit: int = 10) -> list[RunSummary]:
+        """Get recent failed runs."""
+        run_ids = self.storage.get_runs_by_status(RunStatus.FAILED)
+        summaries = []
+        for run_id in run_ids[:limit]:
+            summary = self.storage.load_summary(run_id)
+            if summary:
+                summaries.append(summary)
+        return summaries
+
+    # === WHY DID IT FAIL? ===
+
+    def analyze_failure(self, run_id: str) -> FailureAnalysis | None:
+        """
+        Deep analysis of why a run failed.
+
+        This is my primary tool for understanding what went wrong.
+        """
+        run = self.storage.load_run(run_id)
+        if run is None or run.status != RunStatus.FAILED:
+            return None
+
+        # Find the first failed decision
+        failed_decisions = [d for d in run.decisions if not d.was_successful]
+        if not failed_decisions:
+            failure_point = "Unknown - no decision marked as failed"
+            root_cause = "Run failed but all decisions succeeded (external cause?)"
+        else:
+            first_failure = failed_decisions[0]
+            failure_point = first_failure.summary_for_builder()
+            root_cause = first_failure.outcome.error if first_failure.outcome else "Unknown"
+
+        # Build the decision chain leading to failure
+        decision_chain = []
+        for d in run.decisions:
+            decision_chain.append(d.summary_for_builder())
+            if not d.was_successful:
+                break
+
+        # Extract problems
+        problems = [f"[{p.severity}] {p.description}" for p in run.problems]
+
+        # Generate suggestions based on the failure
+        suggestions = self._generate_suggestions(run, failed_decisions)
+
+        return FailureAnalysis(
+            run_id=run_id,
+            failure_point=failure_point,
+            root_cause=root_cause,
+            decision_chain=decision_chain,
+            problems=problems,
+            suggestions=suggestions,
+        )
+
+    def get_decision_trace(self, run_id: str) -> list[str]:
+        """Get a readable trace of all decisions in a run."""
+        run = self.storage.load_run(run_id)
+        if run is None:
+            return []
+        return [d.summary_for_builder() for d in run.decisions]
+
+    # === WHAT PATTERNS EMERGE? ===
+
+    def find_patterns(self, goal_id: str) -> PatternAnalysis | None:
+        """
+        Find patterns across runs for a goal.
+
+        This helps me understand systemic issues vs one-off failures.
+        """
+        run_ids = self.storage.get_runs_by_goal(goal_id)
+        if not run_ids:
+            return None
+
+        runs = []
+        for run_id in run_ids:
+            run = self.storage.load_run(run_id)
+            if run:
+                runs.append(run)
+
+        if not runs:
+            return None
+
+        # Calculate success rate
+        completed = [r for r in runs if r.status == RunStatus.COMPLETED]
+        success_rate = len(completed) / len(runs) if runs else 0.0
+
+        # Find common failures
+        failure_counts: dict[str, int] = defaultdict(int)
+        for run in runs:
+            for decision in run.decisions:
+                if not decision.was_successful and decision.outcome:
+                    error = decision.outcome.error or "Unknown error"
+                    failure_counts[error] += 1
+
+        common_failures = sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+
+        # Find problematic nodes
+        node_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"total": 0, "failed": 0})
+        for run in runs:
+            for decision in run.decisions:
+                node_stats[decision.node_id]["total"] += 1
+                if not decision.was_successful:
+                    node_stats[decision.node_id]["failed"] += 1
+
+        problematic_nodes = []
+        for node_id, stats in node_stats.items():
+            if stats["total"] > 0:
+                failure_rate = stats["failed"] / stats["total"]
+                if failure_rate > 0.1:  # More than 10% failure rate
+                    problematic_nodes.append((node_id, failure_rate))
+
+        problematic_nodes.sort(key=lambda x: x[1], reverse=True)
+
+        # Decision patterns
+        decision_patterns = self._analyze_decision_patterns(runs)
+
+        return PatternAnalysis(
+            goal_id=goal_id,
+            run_count=len(runs),
+            success_rate=success_rate,
+            common_failures=common_failures,
+            problematic_nodes=problematic_nodes,
+            decision_patterns=decision_patterns,
+        )
+
+    def compare_runs(self, run_id_1: str, run_id_2: str) -> dict[str, Any]:
+        """Compare two runs to understand what differed."""
+        run1 = self.storage.load_run(run_id_1)
+        run2 = self.storage.load_run(run_id_2)
+
+        if run1 is None or run2 is None:
+            return {"error": "One or both runs not found"}
+
+        return {
+            "run_1": {
+                "id": run1.id,
+                "status": run1.status.value,
+                "decisions": len(run1.decisions),
+                "success_rate": run1.metrics.success_rate,
+            },
+            "run_2": {
+                "id": run2.id,
+                "status": run2.status.value,
+                "decisions": len(run2.decisions),
+                "success_rate": run2.metrics.success_rate,
+            },
+            "differences": self._find_differences(run1, run2),
+        }
+
+    # === WHAT SHOULD WE CHANGE? ===
+
+    def suggest_improvements(self, goal_id: str) -> list[dict[str, Any]]:
+        """
+        Generate improvement suggestions based on run analysis.
+
+        This is what I use to propose changes to the human engineer.
+        """
+        patterns = self.find_patterns(goal_id)
+        if patterns is None:
+            return []
+
+        suggestions = []
+
+        # Suggestion: Fix problematic nodes
+        for node_id, failure_rate in patterns.problematic_nodes:
+            suggestions.append(
+                {
+                    "type": "node_improvement",
+                    "target": node_id,
+                    "reason": f"Node has {failure_rate:.1%} failure rate",
+                    "recommendation": (
+                        f"Review and improve node '{node_id}' - "
+                        "high failure rate suggests prompt or tool issues"
+                    ),
+                    "priority": "high" if failure_rate > 0.3 else "medium",
+                }
+            )
+
+        # Suggestion: Address common failures
+        for failure, count in patterns.common_failures:
+            if count >= 2:
+                suggestions.append(
+                    {
+                        "type": "error_handling",
+                        "target": failure,
+                        "reason": f"Error occurred {count} times",
+                        "recommendation": f"Add handling for: {failure}",
+                        "priority": "high" if count >= 5 else "medium",
+                    }
+                )
+
+        # Suggestion: Overall success rate
+        if patterns.success_rate < 0.8:
+            suggestions.append(
+                {
+                    "type": "architecture",
+                    "target": goal_id,
+                    "reason": f"Goal success rate is only {patterns.success_rate:.1%}",
+                    "recommendation": (
+                        "Consider restructuring the agent graph or improving goal definition"
+                    ),
+                    "priority": "high",
+                }
+            )
+
+        return suggestions
+
+    def get_node_performance(self, node_id: str) -> dict[str, Any]:
+        """Get performance metrics for a specific node across all runs."""
+        run_ids = self.storage.get_runs_by_node(node_id)
+
+        total_decisions = 0
+        successful_decisions = 0
+        total_latency = 0
+        total_tokens = 0
+        decision_types: dict[str, int] = defaultdict(int)
+
+        for run_id in run_ids:
+            run = self.storage.load_run(run_id)
+            if run:
+                for decision in run.decisions:
+                    if decision.node_id == node_id:
+                        total_decisions += 1
+                        if decision.was_successful:
+                            successful_decisions += 1
+                        if decision.outcome:
+                            total_latency += decision.outcome.latency_ms
+                            total_tokens += decision.outcome.tokens_used
+                        decision_types[decision.decision_type.value] += 1
+
+        return {
+            "node_id": node_id,
+            "total_decisions": total_decisions,
+            "success_rate": successful_decisions / total_decisions if total_decisions > 0 else 0,
+            "avg_latency_ms": total_latency / total_decisions if total_decisions > 0 else 0,
+            "total_tokens": total_tokens,
+            "decision_type_distribution": dict(decision_types),
+        }
+
+    # === PRIVATE HELPERS ===
+
+    def _generate_suggestions(
+        self,
+        run: Run,
+        failed_decisions: list[Decision],
+    ) -> list[str]:
+        """Generate suggestions based on failure analysis."""
+        suggestions = []
+
+        for decision in failed_decisions:
+            # Check if there were alternatives
+            if len(decision.options) > 1:
+                chosen = decision.chosen_option
+                alternatives = [o for o in decision.options if o.id != decision.chosen_option_id]
+                if alternatives:
+                    alt_desc = alternatives[0].description
+                    chosen_desc = chosen.description if chosen else "unknown"
+                    suggestions.append(
+                        f"Consider alternative: '{alt_desc}' instead of '{chosen_desc}'"
+                    )
+
+            # Check for missing context
+            if not decision.input_context:
+                suggestions.append(
+                    f"Decision '{decision.intent}' had no input context - "
+                    "ensure relevant data is passed"
+                )
+
+            # Check for constraint issues
+            if decision.active_constraints:
+                constraints = ", ".join(decision.active_constraints)
+                suggestions.append(f"Review constraints: {constraints} - may be too restrictive")
+
+        # Check for reported problems with suggestions
+        for problem in run.problems:
+            if problem.suggested_fix:
+                suggestions.append(problem.suggested_fix)
+
+        return suggestions
+
+    def _analyze_decision_patterns(self, runs: list[Run]) -> dict[str, Any]:
+        """Analyze decision patterns across runs."""
+        type_counts: dict[str, int] = defaultdict(int)
+        option_counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+
+        for run in runs:
+            for decision in run.decisions:
+                type_counts[decision.decision_type.value] += 1
+
+                # Track which options are chosen for similar intents
+                intent_key = decision.intent[:50]  # Truncate for grouping
+                if decision.chosen_option:
+                    option_counts[intent_key][decision.chosen_option.description] += 1
+
+        # Find most common choices per intent
+        common_choices = {}
+        for intent, choices in option_counts.items():
+            if choices:
+                most_common = max(choices.items(), key=lambda x: x[1])
+                common_choices[intent] = {
+                    "choice": most_common[0],
+                    "count": most_common[1],
+                    "alternatives": len(choices) - 1,
+                }
+
+        return {
+            "decision_type_distribution": dict(type_counts),
+            "common_choices": common_choices,
+        }
+
+    def _find_differences(self, run1: Run, run2: Run) -> list[str]:
+        """Find key differences between two runs."""
+        differences = []
+
+        # Status difference
+        if run1.status != run2.status:
+            differences.append(f"Status: {run1.status.value} vs {run2.status.value}")
+
+        # Decision count difference
+        if len(run1.decisions) != len(run2.decisions):
+            differences.append(f"Decision count: {len(run1.decisions)} vs {len(run2.decisions)}")
+
+        # Find first divergence point
+        for i, (d1, d2) in enumerate(zip(run1.decisions, run2.decisions, strict=False)):
+            if d1.chosen_option_id != d2.chosen_option_id:
+                differences.append(
+                    f"Diverged at decision {i}: "
+                    f"chose '{d1.chosen_option_id}' vs '{d2.chosen_option_id}'"
+                )
+                break
+
+        # Node differences
+        nodes1 = set(run1.metrics.nodes_executed)
+        nodes2 = set(run2.metrics.nodes_executed)
+        if nodes1 != nodes2:
+            only_1 = nodes1 - nodes2
+            only_2 = nodes2 - nodes1
+            if only_1:
+                differences.append(f"Nodes only in run 1: {only_1}")
+            if only_2:
+                differences.append(f"Nodes only in run 2: {only_2}")
+
+        return differences
@@ -0,0 +1,807 @@
+"""
+GraphBuilder Workflow - Enforced incremental building with HITL approval.
+
+The build process:
+1. Define Goal → APPROVE
+2. Add Node → VALIDATE → TEST → APPROVE
+3. Add Edge → VALIDATE → TEST → APPROVE
+4. Repeat until graph is complete
+5. Final integration test → APPROVE
+6. Export
+
+Each step requires validation and human approval before proceeding.
+You cannot skip steps or bypass validation.
+"""
+
+from collections.abc import Callable
+from datetime import datetime
+from enum import StrEnum
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.goal import Goal
+from framework.graph.node import NodeSpec
+
+
+class BuildPhase(StrEnum):
+    """Current phase of the build process."""
+
+    INIT = "init"  # Just started
+    GOAL_DRAFT = "goal_draft"  # Drafting goal
+    GOAL_APPROVED = "goal_approved"  # Goal approved
+    ADDING_NODES = "adding_nodes"  # Adding nodes
+    ADDING_EDGES = "adding_edges"  # Adding edges
+    TESTING = "testing"  # Running tests
+    APPROVED = "approved"  # Fully approved
+    EXPORTED = "exported"  # Exported to file
+
+
+class ValidationResult(BaseModel):
+    """Result of a validation check."""
+
+    valid: bool
+    errors: list[str] = Field(default_factory=list)
+    warnings: list[str] = Field(default_factory=list)
+    suggestions: list[str] = Field(default_factory=list)
+
+
+class TestCase(BaseModel):
+    """A test case for validating agent behavior."""
+
+    id: str
+    description: str
+    input: dict[str, Any]
+    expected_output: Any = None  # None means just check it doesn't error
+    expected_contains: str | None = None
+
+
+class TestResult(BaseModel):
+    """Result of running a test case."""
+
+    test_id: str
+    passed: bool
+    actual_output: Any = None
+    error: str | None = None
+    execution_path: list[str] = Field(default_factory=list)
+
+
+class BuildSession(BaseModel):
+    """
+    Persistent build session state.
+
+    Saved after each approved step so you can resume later.
+    """
+
+    id: str
+    name: str
+    phase: BuildPhase = BuildPhase.INIT
+    created_at: datetime = Field(default_factory=datetime.now)
+    updated_at: datetime = Field(default_factory=datetime.now)
+
+    # The artifacts being built
+    goal: Goal | None = None
+    nodes: list[NodeSpec] = Field(default_factory=list)
+    edges: list[EdgeSpec] = Field(default_factory=list)
+
+    # Test cases
+    test_cases: list[TestCase] = Field(default_factory=list)
+    test_results: list[TestResult] = Field(default_factory=list)
+
+    # Approval history
+    approvals: list[dict[str, Any]] = Field(default_factory=list)
+
+    # Tools (stored as dicts for serialization)
+    tools: list[dict[str, Any]] = Field(default_factory=list)
+
+    model_config = {"extra": "allow"}
+
+
+class GraphBuilder:
+    """
+    Enforced incremental graph building with HITL approval.
+
+    Usage:
+        builder = GraphBuilder("my-agent")
+
+        # Step 1: Define and approve goal
+        builder.set_goal(goal)
+        builder.validate()  # Must pass
+        builder.approve("Goal looks good")  # Human approval required
+
+        # Step 2: Add nodes one by one
+        builder.add_node(node_spec)
+        builder.validate()  # Must pass
+        builder.test(test_case)  # Must pass
+        builder.approve("Node works")
+
+        # Step 3: Add edges
+        builder.add_edge(edge_spec)
+        builder.validate()
+        builder.approve("Edge correct")
+
+        # Step 4: Final approval
+        builder.run_all_tests()
+        builder.final_approve("Ready for production")
+
+        # Step 5: Export
+        graph = builder.export()
+    """
+
+    def __init__(
+        self,
+        name: str,
+        storage_path: Path | str | None = None,
+        session_id: str | None = None,
+    ):
+        self.storage_path = Path(storage_path) if storage_path else Path.home() / ".core" / "builds"
+        self.storage_path.mkdir(parents=True, exist_ok=True)
+
+        if session_id:
+            self.session = self._load_session(session_id)
+        else:
+            self.session = BuildSession(
+                id=f"build_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+                name=name,
+            )
+
+        self._pending_validation: ValidationResult | None = None
+
+    # =========================================================================
+    # PHASE 1: GOAL
+    # =========================================================================
+
+    def set_goal(self, goal: Goal) -> ValidationResult:
+        """
+        Set the goal for this agent.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.INIT, BuildPhase.GOAL_DRAFT])
+
+        self.session.goal = goal
+        self.session.phase = BuildPhase.GOAL_DRAFT
+
+        validation = self._validate_goal(goal)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_goal(self, goal: Goal) -> ValidationResult:
+        """Validate a goal definition."""
+        errors = []
+        warnings = []
+        suggestions = []
+
+        if not goal.id:
+            errors.append("Goal must have an id")
+        if not goal.name:
+            errors.append("Goal must have a name")
+        if not goal.description:
+            errors.append("Goal must have a description")
+
+        if not goal.success_criteria:
+            errors.append("Goal must have at least one success criterion")
+        else:
+            for sc in goal.success_criteria:
+                if not sc.description:
+                    errors.append(f"Success criterion '{sc.id}' needs a description")
+
+        if not goal.constraints:
+            warnings.append("Consider adding constraints to define boundaries")
+
+        if not goal.required_capabilities:
+            suggestions.append("Specify required_capabilities (e.g., ['llm', 'tools'])")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+            suggestions=suggestions,
+        )
+
+    # =========================================================================
+    # PHASE 2: NODES
+    # =========================================================================
+
+    def add_node(self, node: NodeSpec) -> ValidationResult:
+        """
+        Add a node to the graph.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.GOAL_APPROVED, BuildPhase.ADDING_NODES])
+
+        # Check for duplicate
+        if any(n.id == node.id for n in self.session.nodes):
+            return ValidationResult(
+                valid=False,
+                errors=[f"Node with id '{node.id}' already exists"],
+            )
+
+        self.session.nodes.append(node)
+        self.session.phase = BuildPhase.ADDING_NODES
+
+        validation = self._validate_node(node)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_node(self, node: NodeSpec) -> ValidationResult:
+        """Validate a node definition."""
+        errors = []
+        warnings = []
+        suggestions = []
+
+        if not node.id:
+            errors.append("Node must have an id")
+        if not node.name:
+            errors.append("Node must have a name")
+        if not node.description:
+            warnings.append(f"Node '{node.id}' should have a description")
+
+        # Type-specific validation
+        if node.node_type == "llm_tool_use":
+            if not node.tools:
+                errors.append(f"LLM tool node '{node.id}' must specify tools")
+            if not node.system_prompt:
+                warnings.append(f"LLM node '{node.id}' should have a system_prompt")
+
+        if node.node_type == "router":
+            if not node.routes:
+                errors.append(f"Router node '{node.id}' must specify routes")
+
+        if node.node_type == "function":
+            if not node.function:
+                errors.append(f"Function node '{node.id}' must specify function name")
+
+        # Check input/output keys
+        if not node.input_keys:
+            suggestions.append(f"Consider specifying input_keys for '{node.id}'")
+        if not node.output_keys:
+            suggestions.append(f"Consider specifying output_keys for '{node.id}'")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+            suggestions=suggestions,
+        )
+
+    def update_node(self, node_id: str, **updates) -> ValidationResult:
+        """Update an existing node."""
+        self._require_phase([BuildPhase.ADDING_NODES])
+
+        for i, node in enumerate(self.session.nodes):
+            if node.id == node_id:
+                node_dict = node.model_dump()
+                node_dict.update(updates)
+                updated_node = NodeSpec(**node_dict)
+                self.session.nodes[i] = updated_node
+
+                validation = self._validate_node(updated_node)
+                self._pending_validation = validation
+                self._save_session()
+                return validation
+
+        return ValidationResult(valid=False, errors=[f"Node '{node_id}' not found"])
+
+    def remove_node(self, node_id: str) -> ValidationResult:
+        """Remove a node (only if no edges reference it)."""
+        self._require_phase([BuildPhase.ADDING_NODES])
+
+        # Check for edge references
+        for edge in self.session.edges:
+            if edge.source == node_id or edge.target == node_id:
+                return ValidationResult(
+                    valid=False,
+                    errors=[f"Cannot remove node '{node_id}': referenced by edge '{edge.id}'"],
+                )
+
+        self.session.nodes = [n for n in self.session.nodes if n.id != node_id]
+        self._save_session()
+
+        return ValidationResult(valid=True)
+
+    # =========================================================================
+    # PHASE 3: EDGES
+    # =========================================================================
+
+    def add_edge(self, edge: EdgeSpec) -> ValidationResult:
+        """
+        Add an edge to the graph.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.ADDING_NODES, BuildPhase.ADDING_EDGES])
+
+        # Check for duplicate
+        if any(e.id == edge.id for e in self.session.edges):
+            return ValidationResult(
+                valid=False,
+                errors=[f"Edge with id '{edge.id}' already exists"],
+            )
+
+        self.session.edges.append(edge)
+        self.session.phase = BuildPhase.ADDING_EDGES
+
+        validation = self._validate_edge(edge)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_edge(self, edge: EdgeSpec) -> ValidationResult:
+        """Validate an edge definition."""
+        errors = []
+        warnings = []
+
+        if not edge.id:
+            errors.append("Edge must have an id")
+
+        # Check source exists
+        if not any(n.id == edge.source for n in self.session.nodes):
+            errors.append(f"Edge source '{edge.source}' not found in nodes")
+
+        # Check target exists
+        if not any(n.id == edge.target for n in self.session.nodes):
+            errors.append(f"Edge target '{edge.target}' not found in nodes")
+
+        # Warn about conditional edges without expressions
+        if edge.condition == EdgeCondition.CONDITIONAL and not edge.condition_expr:
+            warnings.append(f"Conditional edge '{edge.id}' has no condition_expr")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+        )
+
+    # =========================================================================
+    # VALIDATION & TESTING
+    # =========================================================================
+
+    def validate(self) -> ValidationResult:
+        """Validate the entire current graph state."""
+        errors = []
+        warnings = []
+
+        # Must have a goal
+        if not self.session.goal:
+            errors.append("No goal defined")
+            return ValidationResult(valid=False, errors=errors)
+
+        # Must have at least one node
+        if not self.session.nodes:
+            errors.append("No nodes defined")
+
+        # Check for entry node
+        entry_candidates = []
+        for node in self.session.nodes:
+            # A node is an entry candidate if no edges point to it
+            if not any(e.target == node.id for e in self.session.edges):
+                entry_candidates.append(node.id)
+
+        if len(entry_candidates) == 0 and self.session.nodes:
+            errors.append("No entry node found (all nodes have incoming edges)")
+        elif len(entry_candidates) > 1:
+            warnings.append(f"Multiple entry candidates: {entry_candidates}. Specify one.")
+
+        # Check for terminal nodes
+        terminal_candidates = []
+        for node in self.session.nodes:
+            if not any(e.source == node.id for e in self.session.edges):
+                terminal_candidates.append(node.id)
+
+        if not terminal_candidates and self.session.nodes:
+            warnings.append("No terminal nodes found (all nodes have outgoing edges)")
+
+        # Check reachability
+        if entry_candidates and self.session.nodes:
+            reachable = self._compute_reachable(entry_candidates[0])
+            unreachable = [n.id for n in self.session.nodes if n.id not in reachable]
+            if unreachable:
+                errors.append(f"Unreachable nodes: {unreachable}")
+
+        validation = ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+        )
+        self._pending_validation = validation
+        return validation
+
+    def _compute_reachable(self, start: str) -> set[str]:
+        """Compute all nodes reachable from start."""
+        reachable = set()
+        to_visit = [start]
+
+        while to_visit:
+            current = to_visit.pop()
+            if current in reachable:
+                continue
+            reachable.add(current)
+
+            for edge in self.session.edges:
+                if edge.source == current:
+                    to_visit.append(edge.target)
+
+            # Also follow router routes
+            for node in self.session.nodes:
+                if node.id == current and node.routes:
+                    for target in node.routes.values():
+                        to_visit.append(target)
+
+        return reachable
+
+    def add_test(self, test: TestCase) -> None:
+        """Add a test case."""
+        self.session.test_cases.append(test)
+        self._save_session()
+
+    def run_test(
+        self,
+        test: TestCase,
+        executor_factory: Callable,
+    ) -> TestResult:
+        """
+        Run a single test case.
+
+        executor_factory should return a configured GraphExecutor.
+        """
+        self._require_phase([BuildPhase.ADDING_NODES, BuildPhase.ADDING_EDGES, BuildPhase.TESTING])
+        self.session.phase = BuildPhase.TESTING
+
+        try:
+            # Build temporary graph for testing
+            graph = self._build_graph()
+            executor = executor_factory()
+
+            # Run the test
+            import asyncio
+
+            result = asyncio.run(
+                executor.execute(
+                    graph=graph,
+                    goal=self.session.goal,
+                    input_data=test.input,
+                )
+            )
+
+            # Check result
+            passed = result.success
+            if test.expected_output is not None:
+                passed = passed and (result.output.get("result") == test.expected_output)
+            if test.expected_contains:
+                output_str = str(result.output)
+                passed = passed and (test.expected_contains in output_str)
+
+            test_result = TestResult(
+                test_id=test.id,
+                passed=passed,
+                actual_output=result.output,
+                execution_path=result.path,
+            )
+
+        except Exception as e:
+            test_result = TestResult(
+                test_id=test.id,
+                passed=False,
+                error=str(e),
+            )
+
+        self.session.test_results.append(test_result)
+        self._save_session()
+
+        return test_result
+
+    def run_all_tests(self, executor_factory: Callable) -> list[TestResult]:
+        """Run all test cases."""
+        results = []
+        for test in self.session.test_cases:
+            result = self.run_test(test, executor_factory)
+            results.append(result)
+        return results
+
+    # =========================================================================
+    # APPROVAL
+    # =========================================================================
+
+    def approve(self, comment: str) -> bool:
+        """
+        Approve the current pending change.
+
+        Must have a passing validation to approve.
+        Returns True if approved, False if validation failed.
+        """
+        if self._pending_validation is None:
+            raise RuntimeError("Nothing to approve. Run validation first.")
+
+        if not self._pending_validation.valid:
+            return False
+
+        self.session.approvals.append(
+            {
+                "phase": self.session.phase.value,
+                "comment": comment,
+                "timestamp": datetime.now().isoformat(),
+                "validation": self._pending_validation.model_dump(),
+            }
+        )
+
+        # Advance phase if appropriate
+        if self.session.phase == BuildPhase.GOAL_DRAFT:
+            self.session.phase = BuildPhase.GOAL_APPROVED
+
+        self._pending_validation = None
+        self._save_session()
+
+        return True
+
+    def final_approve(self, comment: str) -> bool:
+        """
+        Final approval for the complete graph.
+
+        Requires all tests to pass.
+        """
+        # Run final validation
+        validation = self.validate()
+        if not validation.valid:
+            self._pending_validation = validation
+            return False
+
+        # Check test results
+        if self.session.test_cases:
+            failed_tests = [t for t in self.session.test_results if not t.passed]
+            if failed_tests:
+                self._pending_validation = ValidationResult(
+                    valid=False,
+                    errors=[f"Failed tests: {[t.test_id for t in failed_tests]}"],
+                )
+                return False
+
+        self.session.phase = BuildPhase.APPROVED
+        self.session.approvals.append(
+            {
+                "phase": "final",
+                "comment": comment,
+                "timestamp": datetime.now().isoformat(),
+            }
+        )
+
+        self._save_session()
+        return True
+
+    # =========================================================================
+    # EXPORT
+    # =========================================================================
+
+    def export(self) -> GraphSpec:
+        """
+        Export the approved graph.
+
+        Requires final approval.
+        """
+        self._require_phase([BuildPhase.APPROVED])
+
+        graph = self._build_graph()
+
+        self.session.phase = BuildPhase.EXPORTED
+        self._save_session()
+
+        return graph
+
+    def _build_graph(self) -> GraphSpec:
+        """Build a GraphSpec from current session."""
+        # Determine entry node
+        entry_node = None
+        for node in self.session.nodes:
+            if not any(e.target == node.id for e in self.session.edges):
+                entry_node = node.id
+                break
+
+        # Determine terminal nodes
+        terminal_nodes = []
+        for node in self.session.nodes:
+            if not any(e.source == node.id for e in self.session.edges):
+                terminal_nodes.append(node.id)
+
+        # Collect all memory keys
+        memory_keys = set()
+        for node in self.session.nodes:
+            memory_keys.update(node.input_keys)
+            memory_keys.update(node.output_keys)
+
+        return GraphSpec(
+            id=f"{self.session.name}-graph",
+            goal_id=self.session.goal.id if self.session.goal else "",
+            entry_node=entry_node or "",
+            terminal_nodes=terminal_nodes,
+            nodes=self.session.nodes,
+            edges=self.session.edges,
+            memory_keys=list(memory_keys),
+        )
+
+    def export_to_file(self, path: Path | str) -> None:
+        """Export the graph to a Python file."""
+        self._require_phase([BuildPhase.APPROVED, BuildPhase.EXPORTED])
+
+        graph = self._build_graph()
+
+        # Generate Python code
+        code = self._generate_code(graph)
+
+        Path(path).write_text(code)
+        self.session.phase = BuildPhase.EXPORTED
+        self._save_session()
+
+    def _generate_code(self, graph: GraphSpec) -> str:
+        """Generate Python code for the graph."""
+        lines = [
+            '"""',
+            f"Generated agent: {self.session.name}",
+            f"Generated at: {datetime.now().isoformat()}",
+            '"""',
+            "",
+            "from framework.graph import (",
+            "    Goal, SuccessCriterion, Constraint,",
+            "    NodeSpec, EdgeSpec, EdgeCondition,",
+            ")",
+            "from framework.graph.edge import GraphSpec",
+            "from framework.graph.goal import GoalStatus",
+            "",
+            "",
+            "# Goal",
+        ]
+
+        if self.session.goal:
+            goal_json = self.session.goal.model_dump_json(indent=4)
+            lines.append("GOAL = Goal.model_validate_json('''")
+            lines.append(goal_json)
+            lines.append("''')")
+        else:
+            lines.append("GOAL = None")
+
+        lines.extend(
+            [
+                "",
+                "",
+                "# Nodes",
+                "NODES = [",
+            ]
+        )
+
+        for node in self.session.nodes:
+            node_json = node.model_dump_json(indent=4)
+            lines.append("    NodeSpec.model_validate_json('''")
+            lines.append(node_json)
+            lines.append("    '''),")
+
+        lines.extend(
+            [
+                "]",
+                "",
+                "",
+                "# Edges",
+                "EDGES = [",
+            ]
+        )
+
+        for edge in self.session.edges:
+            edge_json = edge.model_dump_json(indent=4)
+            lines.append("    EdgeSpec.model_validate_json('''")
+            lines.append(edge_json)
+            lines.append("    '''),")
+
+        lines.extend(
+            [
+                "]",
+                "",
+                "",
+                "# Graph",
+            ]
+        )
+
+        graph_json = graph.model_dump_json(indent=4)
+        lines.append("GRAPH = GraphSpec.model_validate_json('''")
+        lines.append(graph_json)
+        lines.append("''')")
+
+        return "\n".join(lines)
+
+    # =========================================================================
+    # SESSION MANAGEMENT
+    # =========================================================================
+
+    def _require_phase(self, allowed: list[BuildPhase]) -> None:
+        """Ensure we're in an allowed phase."""
+        if self.session.phase not in allowed:
+            raise RuntimeError(
+                f"Cannot perform this action in phase '{self.session.phase.value}'. "
+                f"Allowed phases: {[p.value for p in allowed]}"
+            )
+
+    def _save_session(self) -> None:
+        """Save session to disk."""
+        self.session.updated_at = datetime.now()
+        path = self.storage_path / f"{self.session.id}.json"
+        path.write_text(self.session.model_dump_json(indent=2))
+
+    def _load_session(self, session_id: str) -> BuildSession:
+        """Load session from disk."""
+        path = self.storage_path / f"{session_id}.json"
+        if not path.exists():
+            raise FileNotFoundError(f"Session not found: {session_id}")
+        return BuildSession.model_validate_json(path.read_text())
+
+    @classmethod
+    def list_sessions(cls, storage_path: Path | str | None = None) -> list[str]:
+        """List all saved sessions."""
+        path = Path(storage_path) if storage_path else Path.home() / ".core" / "builds"
+        if not path.exists():
+            return []
+        return [f.stem for f in path.glob("*.json")]
+
+    # =========================================================================
+    # STATUS
+    # =========================================================================
+
+    def status(self) -> dict[str, Any]:
+        """Get current build status."""
+        return {
+            "session_id": self.session.id,
+            "name": self.session.name,
+            "phase": self.session.phase.value,
+            "goal": self.session.goal.name if self.session.goal else None,
+            "nodes": len(self.session.nodes),
+            "edges": len(self.session.edges),
+            "tests": len(self.session.test_cases),
+            "tests_passed": sum(1 for t in self.session.test_results if t.passed),
+            "approvals": len(self.session.approvals),
+            "pending_validation": self._pending_validation.model_dump()
+            if self._pending_validation
+            else None,
+        }
+
+    def show(self) -> str:
+        """Show current graph as text."""
+        lines = [
+            f"=== Build: {self.session.name} ===",
+            f"Phase: {self.session.phase.value}",
+            "",
+        ]
+
+        if self.session.goal:
+            lines.extend(
+                [
+                    f"Goal: {self.session.goal.name}",
+                    f"  {self.session.goal.description}",
+                    "",
+                ]
+            )
+
+        if self.session.nodes:
+            lines.append("Nodes:")
+            for node in self.session.nodes:
+                lines.append(f"  [{node.id}] {node.name} ({node.node_type})")
+            lines.append("")
+
+        if self.session.edges:
+            lines.append("Edges:")
+            for edge in self.session.edges:
+                lines.append(f"  {edge.source} --{edge.condition.value}--> {edge.target}")
+            lines.append("")
+
+        if self._pending_validation:
+            lines.append("Pending Validation:")
+            lines.append(f"  Valid: {self._pending_validation.valid}")
+            for err in self._pending_validation.errors:
+                lines.append(f"  ERROR: {err}")
+            for warn in self._pending_validation.warnings:
+                lines.append(f"  WARN: {warn}")
+
+        return "\n".join(lines)
@@ -0,0 +1,92 @@
+"""
+Command-line interface for Aden Hive.
+
+Usage:
+    hive run exports/my-agent --input '{"key": "value"}'
+    hive info exports/my-agent
+    hive validate exports/my-agent
+    hive list exports/
+    hive dispatch exports/ --input '{"key": "value"}'
+    hive shell exports/my-agent
+
+Testing commands:
+    hive test-run <agent_path> --goal <goal_id>
+    hive test-debug <goal_id> <test_id>
+    hive test-list <goal_id>
+    hive test-stats <goal_id>
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+
+def _configure_paths():
+    """Auto-configure sys.path so agents in exports/ are discoverable.
+
+    Resolves the project root by walking up from this file (framework/cli.py lives
+    inside core/framework/) or from CWD, then adds the exports/ directory to sys.path
+    if it exists. This eliminates the need for manual PYTHONPATH configuration.
+    """
+    # Strategy 1: resolve relative to this file (works when installed via pip install -e core/)
+    framework_dir = Path(__file__).resolve().parent  # core/framework/
+    core_dir = framework_dir.parent  # core/
+    project_root = core_dir.parent  # project root
+
+    # Strategy 2: if project_root doesn't look right, fall back to CWD
+    if not (project_root / "exports").is_dir() and not (project_root / "core").is_dir():
+        project_root = Path.cwd()
+
+    # Add exports/ to sys.path so agents are importable as top-level packages
+    exports_dir = project_root / "exports"
+    if exports_dir.is_dir():
+        exports_str = str(exports_dir)
+        if exports_str not in sys.path:
+            sys.path.insert(0, exports_str)
+
+    # Add examples/templates/ to sys.path so template agents are importable
+    templates_dir = project_root / "examples" / "templates"
+    if templates_dir.is_dir():
+        templates_str = str(templates_dir)
+        if templates_str not in sys.path:
+            sys.path.insert(0, templates_str)
+
+    # Ensure core/ is also in sys.path (for non-editable-install scenarios)
+    core_str = str(project_root / "core")
+    if (project_root / "core").is_dir() and core_str not in sys.path:
+        sys.path.insert(0, core_str)
+
+
+def main():
+    _configure_paths()
+
+    parser = argparse.ArgumentParser(
+        prog="hive",
+        description="Aden Hive - Build and run goal-driven agents",
+    )
+    parser.add_argument(
+        "--model",
+        default="claude-haiku-4-5-20251001",
+        help="Anthropic model to use",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Register runner commands (run, info, validate, list, dispatch, shell)
+    from framework.runner.cli import register_commands
+
+    register_commands(subparsers)
+
+    # Register testing commands (test-run, test-debug, test-list, test-stats)
+    from framework.testing.cli import register_testing_commands
+
+    register_testing_commands(subparsers)
+
+    args = parser.parse_args()
+
+    if hasattr(args, "func"):
+        sys.exit(args.func(args))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,74 @@
+"""Shared Hive configuration utilities.
+
+Centralises reading of ~/.hive/configuration.json so that the runner
+and every agent template share one implementation instead of copy-pasting
+helper functions.
+"""
+
+import json
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from framework.graph.edge import DEFAULT_MAX_TOKENS
+
+# ---------------------------------------------------------------------------
+# Low-level config file access
+# ---------------------------------------------------------------------------
+
+HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+
+
+def get_hive_config() -> dict[str, Any]:
+    """Load hive configuration from ~/.hive/configuration.json."""
+    if not HIVE_CONFIG_FILE.exists():
+        return {}
+    try:
+        with open(HIVE_CONFIG_FILE, encoding="utf-8-sig") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+# ---------------------------------------------------------------------------
+# Derived helpers
+# ---------------------------------------------------------------------------
+
+
+def get_preferred_model() -> str:
+    """Return the user's preferred LLM model string (e.g. 'anthropic/claude-sonnet-4-20250514')."""
+    llm = get_hive_config().get("llm", {})
+    if llm.get("provider") and llm.get("model"):
+        return f"{llm['provider']}/{llm['model']}"
+    return "anthropic/claude-sonnet-4-20250514"
+
+
+def get_max_tokens() -> int:
+    """Return the configured max_tokens, falling back to DEFAULT_MAX_TOKENS."""
+    return get_hive_config().get("llm", {}).get("max_tokens", DEFAULT_MAX_TOKENS)
+
+
+def get_api_key() -> str | None:
+    """Return the API key from the environment variable specified in configuration."""
+    llm = get_hive_config().get("llm", {})
+    api_key_env_var = llm.get("api_key_env_var")
+    if api_key_env_var:
+        return os.environ.get(api_key_env_var)
+    return None
+
+
+# ---------------------------------------------------------------------------
+# RuntimeConfig – shared across agent templates
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class RuntimeConfig:
+    """Agent runtime configuration loaded from ~/.hive/configuration.json."""
+
+    model: str = field(default_factory=get_preferred_model)
+    temperature: float = 0.7
+    max_tokens: int = field(default_factory=get_max_tokens)
+    api_key: str | None = field(default_factory=get_api_key)
+    api_base: str | None = None
--- a/Show More
+++ b/Show More