refactor: remove all old unused skills

2026-03-04 16:18:28 -08:00
parent 34a44aa83c
commit 13a8e28ae2
52 changed files with 0 additions and 6522 deletions
@@ -1,9 +0,0 @@
-{
-  "mcpServers": {
-    "agent-builder": {
-      "command": "uv",
-      "args": ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"],
-      "disabled": false
-    }
-  }
-}
@@ -1 +0,0 @@
-../../.claude/skills/hive
@@ -1 +0,0 @@
-../../.claude/skills/hive-concepts
@@ -1 +0,0 @@
-../../.claude/skills/hive-create
@@ -1 +0,0 @@
-../../.claude/skills/hive-credentials
@@ -1 +0,0 @@
-../../.claude/skills/hive-patterns
@@ -1 +0,0 @@
-../../.claude/skills/hive-test
@@ -1,5 +0,0 @@
---
-description: hive-concepts
---
-
-use hive-concepts skill
@@ -1,5 +0,0 @@
---
-description: hive-create
---
-
-use hive-create skill
@@ -1,5 +0,0 @@
---
-description: hive-credentials
---
-
-use hive-credentials skill
@@ -1,5 +0,0 @@
---
-description: hive-patterns
---
-
-use hive-patterns skill
@@ -1,5 +0,0 @@
---
-description: hive-test
---
-
-use hive-test skill
@@ -1,5 +0,0 @@
---
-description: hive
---
-
-use hive skill
@@ -1 +0,0 @@
-../../.claude/skills/hive
@@ -1 +0,0 @@
-../../.claude/skills/hive-concepts
@@ -1 +0,0 @@
-../../.claude/skills/hive-create
@@ -1 +0,0 @@
-../../.claude/skills/hive-credentials
@@ -1 +0,0 @@
-../../.claude/skills/hive-patterns
@@ -1 +0,0 @@
-../../.claude/skills/hive-test
@@ -1,399 +0,0 @@
---
-name: hive-concepts
-description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "2.0"
-  type: foundational
-  part_of: hive
---
-
-# Building Agents - Core Concepts
-
-Foundational knowledge for building goal-driven agents as Python packages.
-
-## Architecture: Python Services (Not JSON Configs)
-
-Agents are built as Python packages:
-
-```
-exports/my_agent/
-├── __init__.py          # Package exports
-├── __main__.py          # CLI (run, info, validate, shell)
-├── agent.py             # Graph construction (goal, edges, agent class)
-├── nodes/__init__.py    # Node definitions (NodeSpec)
-├── config.py            # Runtime config
-└── README.md            # Documentation
-```
-
-**Key Principle: Agent is visible and editable during build**
-
- Files created immediately as components are approved
- User can watch files grow in their editor
- No session state - just direct file writes
- No "export" step - agent is ready when build completes
-
-## Core Concepts
-
-### Goal
-
-Success criteria and constraints (written to agent.py)
-
-```python
-goal = Goal(
-    id="research-goal",
-    name="Technical Research Agent",
-    description="Research technical topics thoroughly",
-    success_criteria=[
-        SuccessCriterion(
-            id="completeness",
-            description="Cover all aspects of topic",
-            metric="coverage_score",
-            target=">=0.9",
-            weight=0.4,
-        ),
-        # 3-5 success criteria total
-    ],
-    constraints=[
-        Constraint(
-            id="accuracy",
-            description="All information must be verified",
-            constraint_type="hard",
-            category="quality",
-        ),
-        # 1-5 constraints total
-    ],
-)
-```
-
-### Node
-
-Unit of work (written to nodes/__init__.py)
-
-**Node Types:**
-
- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
- `function` — Deterministic Python operations. No LLM involved.
-
-```python
-search_node = NodeSpec(
-    id="search-web",
-    name="Search Web",
-    description="Search for information and extract results",
-    node_type="event_loop",
-    input_keys=["query"],
-    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
-    tools=["web_search"],
-)
-```
-
-**NodeSpec Fields for Event Loop Nodes:**
-
-| Field | Default | Description |
-|-------|---------|-------------|
-| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
-| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
-| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
-
-### Edge
-
-Connection between nodes (written to agent.py)
-
-**Edge Conditions:**
-
- `on_success` — Proceed if node succeeds (most common)
- `on_failure` — Handle errors
- `always` — Always proceed
- `conditional` — Based on expression evaluating node output
-
-**Edge Priority:**
-
-Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
-
-```python
-# Forward edge (evaluated first)
-EdgeSpec(
-    id="review-to-campaign",
-    source="review",
-    target="campaign-builder",
-    condition=EdgeCondition.CONDITIONAL,
-    condition_expr="output.get('approved_contacts') is not None",
-    priority=1,
-)
-
-# Feedback edge (evaluated after forward edges)
-EdgeSpec(
-    id="review-feedback",
-    source="review",
-    target="extractor",
-    condition=EdgeCondition.CONDITIONAL,
-    condition_expr="output.get('redo_extraction') is not None",
-    priority=-1,
-)
-```
-
-### Client-Facing Nodes
-
-For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
- Stream its LLM output directly to the end user
- Block for user input between conversational turns
- Resume when new input is injected via `inject_event()`
-
-```python
-intake_node = NodeSpec(
-    id="intake",
-    name="Intake",
-    description="Gather requirements from the user",
-    node_type="event_loop",
-    client_facing=True,
-    input_keys=[],
-    output_keys=["repo_url", "project_url"],
-    system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
-)
-```
-
-> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
-
-**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
-
-```python
-system_prompt="""\
-**STEP 1 — Respond to the user (text only, NO tool calls):**
-[Present information, ask questions, etc.]
-
-**STEP 2 — After the user responds, call set_output:**
-[Call set_output with the structured outputs]
-"""
-```
-
-This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
-
-### Node Design: Fewer, Richer Nodes
-
-Prefer fewer nodes that do more work over many thin single-purpose nodes:
-
- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
- **Good**: 4 rich nodes (intake → research → review → report)
-
-Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
-
-### nullable_output_keys for Cross-Edge Inputs
-
-When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
-
-```python
-research_node = NodeSpec(
-    id="research",
-    input_keys=["research_brief", "feedback"],
-    nullable_output_keys=["feedback"],  # Not present on first visit
-    max_node_visits=3,
-    ...
-)
-```
-
-## Event Loop Architecture Concepts
-
-### How EventLoopNode Works
-
-An event loop node runs a multi-turn loop:
-1. LLM receives system prompt + conversation history
-2. LLM responds (text and/or tool calls)
-3. Tool calls are executed, results added to conversation
-4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
-5. Repeat until judge ACCEPTs or max_iterations reached
-
-### EventLoopNode Runtime
-
-EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
-
-```python
-# Direct execution — executor auto-creates EventLoopNodes
-from framework.graph.executor import GraphExecutor
-from framework.runtime.core import Runtime
-
-runtime = Runtime(storage_path)
-executor = GraphExecutor(
-    runtime=runtime,
-    llm=llm,
-    tools=tools,
-    tool_executor=tool_executor,
-    storage_path=storage_path,
-)
-result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
-
-# TUI execution — AgentRuntime also works
-from framework.runtime.agent_runtime import create_agent_runtime
-runtime = create_agent_runtime(
-    graph=graph, goal=goal, storage_path=storage_path,
-    entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
-)
-```
-
-### set_output
-
-Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
-
-`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
-
-### JudgeProtocol
-
-**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
-
-The judge controls when a node's loop exits:
- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
- **SchemaJudge**: Validates outputs against a Pydantic model
- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
-
-### LoopConfig
-
-Controls loop behavior:
- `max_iterations` (default 50) — prevents infinite loops
- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
- `tool_call_overflow_margin` (default 0.5) — wiggle room before discarding extra tool calls (50% means hard cutoff at 150% of limit)
- `stall_detection_threshold` (default 3) — detects repeated identical responses
- `max_history_tokens` (default 32000) — triggers conversation compaction
-
-### Data Tools (Spillover Management)
-
-When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
-
- `save_data(filename, data)` — Write data to a file in the data directory
- `load_data(filename, offset=0, limit=50)` — Read data with line-based pagination
- `list_data_files()` — List available data files
- `serve_file_to_user(filename, label="")` — Get a clickable file:// URI for the user
-
-Note: `data_dir` is a framework-injected context parameter — the LLM never sees or passes it. `GraphExecutor.execute()` sets it per-execution via `contextvars`, so data tools and spillover always share the same session-scoped directory.
-
-These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
-
-```python
-research_node = NodeSpec(
-    ...
-    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
-)
-```
-
-### Fan-Out / Fan-In
-
-Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
-
-### max_node_visits
-
-Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
-
-## Tool Discovery & Validation
-
-**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
-
-Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
-
-### Step 1: Register MCP Server (if not already done)
-
-```python
-mcp__agent-builder__add_mcp_server(
-    name="tools",
-    transport="stdio",
-    command="python",
-    args='["mcp_server.py", "--stdio"]',
-    cwd="../tools"
-)
-```
-
-### Step 2: Discover Available Tools
-
-```python
-# List all tools from all registered servers
-mcp__agent-builder__list_mcp_tools()
-
-# Or list tools from a specific server
-mcp__agent-builder__list_mcp_tools(server_name="tools")
-```
-
-### Step 3: Validate Before Adding Nodes
-
-Before writing a node with `tools=[...]`:
-
-1. Call `list_mcp_tools()` to get available tools
-2. Check each tool in your node exists in the response
-3. If a tool doesn't exist:
-   - **DO NOT proceed** with the node
-   - Inform the user: "The tool 'X' is not available. Available tools are: ..."
-   - Ask if they want to use an alternative or proceed without the tool
-
-### Tool Validation Anti-Patterns
-
- **Never assume a tool exists** - always call `list_mcp_tools()` first
- **Never write a node with unverified tools** - validate before writing
- **Never silently drop tools** - if a tool doesn't exist, inform the user
- **Never guess tool names** - use exact names from discovery response
-
-## Workflow Overview: Incremental File Construction
-
-```
-1. CREATE PACKAGE → mkdir + write skeletons
-2. DEFINE GOAL → Write to agent.py + config.py
-3. FOR EACH NODE:
-   - Propose design (event_loop for LLM work, function for deterministic)
-   - User approves
-   - Write to nodes/__init__.py IMMEDIATELY
-   - (Optional) Validate with test_node
-4. CONNECT EDGES → Update agent.py
-   - Use priority for feedback edges (negative priority)
-   - (Optional) Validate with validate_graph
-5. FINALIZE → Write agent class to agent.py
-6. DONE - Agent ready at exports/my_agent/
-```
-
-**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
-
-## When to Use This Skill
-
-Use hive-concepts when:
- Starting a new agent project and need to understand fundamentals
- Need to understand agent architecture before building
- Want to validate tool availability before proceeding
- Learning about node types, edges, and graph execution
-
-**Next Steps:**
- Ready to build? → Use `hive-create` skill
- Need patterns and examples? → Use `hive-patterns` skill
-
-## MCP Tools for Validation
-
-After writing files, optionally use MCP tools for validation:
-
-**test_node** - Validate node configuration with mock inputs
-```python
-mcp__agent-builder__test_node(
-    node_id="search-web",
-    test_input='{"query": "test query"}',
-    mock_llm_response='{"results": "mock output"}'
-)
-```
-
-**validate_graph** - Check graph structure
-```python
-mcp__agent-builder__validate_graph()
-# Returns: unreachable nodes, missing connections, event_loop validation, etc.
-```
-
-**configure_loop** - Set event loop parameters
-```python
-mcp__agent-builder__configure_loop(
-    max_iterations=50,
-    max_tool_calls_per_turn=10,
-    stall_detection_threshold=3,
-    max_history_tokens=32000
-)
-```
-
-**Key Point:** Files are written FIRST. MCP tools are for validation only.
-
-## Related Skills
-
- **hive-create** - Step-by-step building process
- **hive-patterns** - Best practices: judges, feedback edges, fan-out, context management
- **hive** - Complete workflow orchestrator
- **hive-test** - Test and validate completed agents
@@ -1,24 +0,0 @@
-"""
-Deep Research Agent - Interactive, rigorous research with TUI conversation.
-
-Research any topic through multi-source web search, quality evaluation,
-and synthesis. Features client-facing TUI interaction at key checkpoints
-for user guidance and iterative deepening.
-"""
-
-from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
-from .config import RuntimeConfig, AgentMetadata, default_config, metadata
-
-__version__ = "1.0.0"
-
-__all__ = [
-    "DeepResearchAgent",
-    "default_agent",
-    "goal",
-    "nodes",
-    "edges",
-    "RuntimeConfig",
-    "AgentMetadata",
-    "default_config",
-    "metadata",
-]
@@ -1,241 +0,0 @@
-"""
-CLI entry point for Deep Research Agent.
-
-Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
-"""
-
-import asyncio
-import json
-import logging
-import sys
-import click
-
-from .agent import default_agent, DeepResearchAgent
-
-
-def setup_logging(verbose=False, debug=False):
-    """Configure logging for execution visibility."""
-    if debug:
-        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
-    elif verbose:
-        level, fmt = logging.INFO, "%(message)s"
-    else:
-        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
-    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
-    logging.getLogger("framework").setLevel(level)
-
-
-@click.group()
-@click.version_option(version="1.0.0")
-def cli():
-    """Deep Research Agent - Interactive, rigorous research with TUI conversation."""
-    pass
-
-
-@cli.command()
-@click.option("--topic", "-t", type=str, required=True, help="Research topic")
-@click.option("--mock", is_flag=True, help="Run in mock mode")
-@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
-@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
-@click.option("--debug", is_flag=True, help="Show debug logging")
-def run(topic, mock, quiet, verbose, debug):
-    """Execute research on a topic."""
-    if not quiet:
-        setup_logging(verbose=verbose, debug=debug)
-
-    context = {"topic": topic}
-
-    result = asyncio.run(default_agent.run(context, mock_mode=mock))
-
-    output_data = {
-        "success": result.success,
-        "steps_executed": result.steps_executed,
-        "output": result.output,
-    }
-    if result.error:
-        output_data["error"] = result.error
-
-    click.echo(json.dumps(output_data, indent=2, default=str))
-    sys.exit(0 if result.success else 1)
-
-
-@cli.command()
-@click.option("--mock", is_flag=True, help="Run in mock mode")
-@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
-@click.option("--debug", is_flag=True, help="Show debug logging")
-def tui(mock, verbose, debug):
-    """Launch the TUI dashboard for interactive research."""
-    setup_logging(verbose=verbose, debug=debug)
-
-    try:
-        from framework.tui.app import AdenTUI
-    except ImportError:
-        click.echo(
-            "TUI requires the 'textual' package. Install with: pip install textual"
-        )
-        sys.exit(1)
-
-    from pathlib import Path
-
-    from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
-
-    async def run_with_tui():
-        agent = DeepResearchAgent()
-
-        # Build graph and tools
-        agent._event_bus = EventBus()
-        agent._tool_registry = ToolRegistry()
-
-        storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
-        storage_path.mkdir(parents=True, exist_ok=True)
-
-        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
-        if mcp_config_path.exists():
-            agent._tool_registry.load_mcp_config(mcp_config_path)
-
-        llm = None
-        if not mock:
-            llm = LiteLLMProvider(
-                model=agent.config.model,
-                api_key=agent.config.api_key,
-                api_base=agent.config.api_base,
-            )
-
-        tools = list(agent._tool_registry.get_tools().values())
-        tool_executor = agent._tool_registry.get_executor()
-        graph = agent._build_graph()
-
-        runtime = create_agent_runtime(
-            graph=graph,
-            goal=agent.goal,
-            storage_path=storage_path,
-            entry_points=[
-                EntryPointSpec(
-                    id="start",
-                    name="Start Research",
-                    entry_node="intake",
-                    trigger_type="manual",
-                    isolation_level="isolated",
-                ),
-            ],
-            llm=llm,
-            tools=tools,
-            tool_executor=tool_executor,
-        )
-
-        await runtime.start()
-
-        try:
-            app = AdenTUI(runtime)
-            await app.run_async()
-        finally:
-            await runtime.stop()
-
-    asyncio.run(run_with_tui())
-
-
-@cli.command()
-@click.option("--json", "output_json", is_flag=True)
-def info(output_json):
-    """Show agent information."""
-    info_data = default_agent.info()
-    if output_json:
-        click.echo(json.dumps(info_data, indent=2))
-    else:
-        click.echo(f"Agent: {info_data['name']}")
-        click.echo(f"Version: {info_data['version']}")
-        click.echo(f"Description: {info_data['description']}")
-        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
-        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
-        click.echo(f"Entry: {info_data['entry_node']}")
-        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
-
-
-@cli.command()
-def validate():
-    """Validate agent structure."""
-    validation = default_agent.validate()
-    if validation["valid"]:
-        click.echo("Agent is valid")
-        if validation["warnings"]:
-            for warning in validation["warnings"]:
-                click.echo(f"  WARNING: {warning}")
-    else:
-        click.echo("Agent has errors:")
-        for error in validation["errors"]:
-            click.echo(f"  ERROR: {error}")
-    sys.exit(0 if validation["valid"] else 1)
-
-
-@cli.command()
-@click.option("--verbose", "-v", is_flag=True)
-def shell(verbose):
-    """Interactive research session (CLI, no TUI)."""
-    asyncio.run(_interactive_shell(verbose))
-
-
-async def _interactive_shell(verbose=False):
-    """Async interactive shell."""
-    setup_logging(verbose=verbose)
-
-    click.echo("=== Deep Research Agent ===")
-    click.echo("Enter a topic to research (or 'quit' to exit):\n")
-
-    agent = DeepResearchAgent()
-    await agent.start()
-
-    try:
-        while True:
-            try:
-                topic = await asyncio.get_event_loop().run_in_executor(
-                    None, input, "Topic> "
-                )
-                if topic.lower() in ["quit", "exit", "q"]:
-                    click.echo("Goodbye!")
-                    break
-
-                if not topic.strip():
-                    continue
-
-                click.echo("\nResearching...\n")
-
-                result = await agent.trigger_and_wait("start", {"topic": topic})
-
-                if result is None:
-                    click.echo("\n[Execution timed out]\n")
-                    continue
-
-                if result.success:
-                    output = result.output
-                    if "report_content" in output:
-                        click.echo("\n--- Report ---\n")
-                        click.echo(output["report_content"])
-                        click.echo("\n")
-                    if "references" in output:
-                        click.echo("--- References ---\n")
-                        for ref in output.get("references", []):
-                            click.echo(
-                                f"  [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}"
-                            )
-                        click.echo("\n")
-                else:
-                    click.echo(f"\nResearch failed: {result.error}\n")
-
-            except KeyboardInterrupt:
-                click.echo("\nGoodbye!")
-                break
-            except Exception as e:
-                click.echo(f"Error: {e}", err=True)
-                import traceback
-
-                traceback.print_exc()
-    finally:
-        await agent.stop()
-
-
-if __name__ == "__main__":
-    cli()
@@ -1,358 +0,0 @@
-"""Agent graph construction for Deep Research Agent."""
-
-from pathlib import Path
-
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-from .config import default_config, metadata
-from .nodes import (
-    intake_node,
-    research_node,
-    review_node,
-    report_node,
-)
-
-# Goal definition
-goal = Goal(
-    id="rigorous-interactive-research",
-    name="Rigorous Interactive Research",
-    description=(
-        "Research any topic by searching diverse sources, analyzing findings, "
-        "and producing a cited report — with user checkpoints to guide direction."
-    ),
-    success_criteria=[
-        SuccessCriterion(
-            id="source-diversity",
-            description="Use multiple diverse, authoritative sources",
-            metric="source_count",
-            target=">=5",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="citation-coverage",
-            description="Every factual claim in the report cites its source",
-            metric="citation_coverage",
-            target="100%",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="user-satisfaction",
-            description="User reviews findings before report generation",
-            metric="user_approval",
-            target="true",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="report-completeness",
-            description="Final report answers the original research questions",
-            metric="question_coverage",
-            target="90%",
-            weight=0.25,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="no-hallucination",
-            description="Only include information found in fetched sources",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="source-attribution",
-            description="Every claim must cite its source with a numbered reference",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="user-checkpoint",
-            description="Present findings to the user before writing the final report",
-            constraint_type="functional",
-            category="interaction",
-        ),
-    ],
-)
-
-# Node list
-nodes = [
-    intake_node,
-    research_node,
-    review_node,
-    report_node,
-]
-
-# Edge definitions
-edges = [
-    # intake -> research
-    EdgeSpec(
-        id="intake-to-research",
-        source="intake",
-        target="research",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    # research -> review
-    EdgeSpec(
-        id="research-to-review",
-        source="research",
-        target="review",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    # review -> research (feedback loop)
-    EdgeSpec(
-        id="review-to-research-feedback",
-        source="review",
-        target="research",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="needs_more_research == True",
-        priority=1,
-    ),
-    # review -> report (user satisfied)
-    EdgeSpec(
-        id="review-to-report",
-        source="review",
-        target="report",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="needs_more_research == False",
-        priority=2,
-    ),
-    # report -> research (user wants deeper research on current topic)
-    EdgeSpec(
-        id="report-to-research",
-        source="report",
-        target="research",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="str(next_action).lower() == 'more_research'",
-        priority=2,
-    ),
-    # report -> intake (user wants a new topic — default when not more_research)
-    EdgeSpec(
-        id="report-to-intake",
-        source="report",
-        target="intake",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="str(next_action).lower() != 'more_research'",
-        priority=1,
-    ),
-]
-
-# Graph configuration
-entry_node = "intake"
-entry_points = {"start": "intake"}
-pause_nodes = []
-terminal_nodes = []
-
-
-class DeepResearchAgent:
-    """
-    Deep Research Agent — 4-node pipeline with user checkpoints.
-
-    Flow: intake -> research -> review -> report
-                      ^           |
-                      +-- feedback loop (if user wants more)
-
-    Uses AgentRuntime for proper session management:
-    - Session-scoped storage (sessions/{session_id}/)
-    - Checkpointing for resume capability
-    - Runtime logging
-    - Data folder for save_data/load_data
-    """
-
-    def __init__(self, config=None):
-        self.config = config or default_config
-        self.goal = goal
-        self.nodes = nodes
-        self.edges = edges
-        self.entry_node = entry_node
-        self.entry_points = entry_points
-        self.pause_nodes = pause_nodes
-        self.terminal_nodes = terminal_nodes
-        self._graph: GraphSpec | None = None
-        self._agent_runtime: AgentRuntime | None = None
-        self._tool_registry: ToolRegistry | None = None
-        self._storage_path: Path | None = None
-
-    def _build_graph(self) -> GraphSpec:
-        """Build the GraphSpec."""
-        return GraphSpec(
-            id="deep-research-agent-graph",
-            goal_id=self.goal.id,
-            version="1.0.0",
-            entry_node=self.entry_node,
-            entry_points=self.entry_points,
-            terminal_nodes=self.terminal_nodes,
-            pause_nodes=self.pause_nodes,
-            nodes=self.nodes,
-            edges=self.edges,
-            default_model=self.config.model,
-            max_tokens=self.config.max_tokens,
-            loop_config={
-                "max_iterations": 100,
-                "max_tool_calls_per_turn": 30,
-                "max_history_tokens": 32000,
-            },
-            conversation_mode="continuous",
-            identity_prompt=(
-                "You are a rigorous research agent. You search for information "
-                "from diverse, authoritative sources, analyze findings critically, "
-                "and produce well-cited reports. You never fabricate information — "
-                "every claim must trace back to a source you actually retrieved."
-            ),
-        )
-
-    def _setup(self, mock_mode=False) -> None:
-        """Set up the agent runtime with sessions, checkpoints, and logging."""
-        self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
-        self._storage_path.mkdir(parents=True, exist_ok=True)
-
-        self._tool_registry = ToolRegistry()
-
-        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
-        if mcp_config_path.exists():
-            self._tool_registry.load_mcp_config(mcp_config_path)
-
-        llm = None
-        if not mock_mode:
-            llm = LiteLLMProvider(
-                model=self.config.model,
-                api_key=self.config.api_key,
-                api_base=self.config.api_base,
-            )
-
-        tool_executor = self._tool_registry.get_executor()
-        tools = list(self._tool_registry.get_tools().values())
-
-        self._graph = self._build_graph()
-
-        checkpoint_config = CheckpointConfig(
-            enabled=True,
-            checkpoint_on_node_start=False,
-            checkpoint_on_node_complete=True,
-            checkpoint_max_age_days=7,
-            async_checkpoint=True,
-        )
-
-        entry_point_specs = [
-            EntryPointSpec(
-                id="default",
-                name="Default",
-                entry_node=self.entry_node,
-                trigger_type="manual",
-                isolation_level="shared",
-            )
-        ]
-
-        self._agent_runtime = create_agent_runtime(
-            graph=self._graph,
-            goal=self.goal,
-            storage_path=self._storage_path,
-            entry_points=entry_point_specs,
-            llm=llm,
-            tools=tools,
-            tool_executor=tool_executor,
-            checkpoint_config=checkpoint_config,
-        )
-
-    async def start(self, mock_mode=False) -> None:
-        """Set up and start the agent runtime."""
-        if self._agent_runtime is None:
-            self._setup(mock_mode=mock_mode)
-        if not self._agent_runtime.is_running:
-            await self._agent_runtime.start()
-
-    async def stop(self) -> None:
-        """Stop the agent runtime and clean up."""
-        if self._agent_runtime and self._agent_runtime.is_running:
-            await self._agent_runtime.stop()
-        self._agent_runtime = None
-
-    async def trigger_and_wait(
-        self,
-        entry_point: str = "default",
-        input_data: dict | None = None,
-        timeout: float | None = None,
-        session_state: dict | None = None,
-    ) -> ExecutionResult | None:
-        """Execute the graph and wait for completion."""
-        if self._agent_runtime is None:
-            raise RuntimeError("Agent not started. Call start() first.")
-
-        return await self._agent_runtime.trigger_and_wait(
-            entry_point_id=entry_point,
-            input_data=input_data or {},
-            session_state=session_state,
-        )
-
-    async def run(
-        self, context: dict, mock_mode=False, session_state=None
-    ) -> ExecutionResult:
-        """Run the agent (convenience method for single execution)."""
-        await self.start(mock_mode=mock_mode)
-        try:
-            result = await self.trigger_and_wait(
-                "default", context, session_state=session_state
-            )
-            return result or ExecutionResult(success=False, error="Execution timeout")
-        finally:
-            await self.stop()
-
-    def info(self):
-        """Get agent information."""
-        return {
-            "name": metadata.name,
-            "version": metadata.version,
-            "description": metadata.description,
-            "goal": {
-                "name": self.goal.name,
-                "description": self.goal.description,
-            },
-            "nodes": [n.id for n in self.nodes],
-            "edges": [e.id for e in self.edges],
-            "entry_node": self.entry_node,
-            "entry_points": self.entry_points,
-            "pause_nodes": self.pause_nodes,
-            "terminal_nodes": self.terminal_nodes,
-            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
-        }
-
-    def validate(self):
-        """Validate agent structure."""
-        errors = []
-        warnings = []
-
-        node_ids = {node.id for node in self.nodes}
-        for edge in self.edges:
-            if edge.source not in node_ids:
-                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
-            if edge.target not in node_ids:
-                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
-
-        if self.entry_node not in node_ids:
-            errors.append(f"Entry node '{self.entry_node}' not found")
-
-        for terminal in self.terminal_nodes:
-            if terminal not in node_ids:
-                errors.append(f"Terminal node '{terminal}' not found")
-
-        for ep_id, node_id in self.entry_points.items():
-            if node_id not in node_ids:
-                errors.append(
-                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
-                )
-
-        return {
-            "valid": len(errors) == 0,
-            "errors": errors,
-            "warnings": warnings,
-        }
-
-
-# Create default instance
-default_agent = DeepResearchAgent()
@@ -1,26 +0,0 @@
-"""Runtime configuration."""
-
-from dataclasses import dataclass
-
-from framework.config import RuntimeConfig
-
-default_config = RuntimeConfig()
-
-
-@dataclass
-class AgentMetadata:
-    name: str = "Deep Research Agent"
-    version: str = "1.0.0"
-    description: str = (
-        "Interactive research agent that rigorously investigates topics through "
-        "multi-source search, quality evaluation, and synthesis - with TUI conversation "
-        "at key checkpoints for user guidance and feedback."
-    )
-    intro_message: str = (
-        "Hi! I'm your deep research assistant. Tell me a topic and I'll investigate it "
-        "thoroughly — searching multiple sources, evaluating quality, and synthesizing "
-        "a comprehensive report. What would you like me to research?"
-    )
-
-
-metadata = AgentMetadata()
@@ -1,9 +0,0 @@
-{
-  "hive-tools": {
-    "transport": "stdio",
-    "command": "uv",
-    "args": ["run", "python", "mcp_server.py", "--stdio"],
-    "cwd": "../../tools",
-    "description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
-  }
-}
@@ -1,213 +0,0 @@
-"""Node definitions for Deep Research Agent."""
-
-from framework.graph import NodeSpec
-
-# Node 1: Intake (client-facing)
-# Brief conversation to clarify what the user wants researched.
-intake_node = NodeSpec(
-    id="intake",
-    name="Research Intake",
-    description="Discuss the research topic with the user, clarify scope, and confirm direction",
-    node_type="event_loop",
-    client_facing=True,
-    max_node_visits=0,
-    input_keys=["topic"],
-    output_keys=["research_brief"],
-    success_criteria=(
-        "The research brief is specific and actionable: it states the topic, "
-        "the key questions to answer, the desired scope, and depth."
-    ),
-    system_prompt="""\
-You are a research intake specialist. The user wants to research a topic.
-Have a brief conversation to clarify what they need.
-
-**STEP 1 — Read and respond (text only, NO tool calls):**
-1. Read the topic provided
-2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
-3. If it's already clear, confirm your understanding and ask the user to confirm
-
-Keep it short. Don't over-ask.
-
-**STEP 2 — After the user confirms, call set_output:**
- set_output("research_brief", "A clear paragraph describing exactly what to research, \
-what questions to answer, what scope to cover, and how deep to go.")
-""",
-    tools=[],
-)
-
-# Node 2: Research
-# The workhorse — searches the web, fetches content, analyzes sources.
-# One node with both tools avoids the context-passing overhead of 5 separate nodes.
-research_node = NodeSpec(
-    id="research",
-    name="Research",
-    description="Search the web, fetch source content, and compile findings",
-    node_type="event_loop",
-    max_node_visits=0,
-    input_keys=["research_brief", "feedback"],
-    output_keys=["findings", "sources", "gaps"],
-    nullable_output_keys=["feedback"],
-    success_criteria=(
-        "Findings reference at least 3 distinct sources with URLs. "
-        "Key claims are substantiated by fetched content, not generated."
-    ),
-    system_prompt="""\
-You are a research agent. Given a research brief, find and analyze sources.
-
-If feedback is provided, this is a follow-up round — focus on the gaps identified.
-
-Work in phases:
-1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
-   Prioritize authoritative sources (.edu, .gov, established publications).
-2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
-   Skip URLs that fail. Extract the substantive content.
-3. **Analyze**: Review what you've collected. Identify key findings, themes,
-   and any contradictions between sources.
-
-Important:
- Work in batches of 3-4 tool calls at a time — never more than 10 per turn
- After each batch, assess whether you have enough material
- Prefer quality over quantity — 5 good sources beat 15 thin ones
- Track which URL each finding comes from (you'll need citations later)
- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
-
-Context management:
- Your tool results are automatically saved to files. After compaction, the file \
-references remain in the conversation — use load_data() to recover any content you need.
- Use append_data('research_notes.md', ...) to maintain a running log of key findings \
-as you go. This survives compaction and helps the report node produce a detailed report.
-
-When done, use set_output (one key at a time, separate turns):
- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
-Include themes, contradictions, and confidence levels.")
- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
-""",
-    tools=[
-        "web_search",
-        "web_scrape",
-        "load_data",
-        "save_data",
-        "append_data",
-        "list_data_files",
-    ],
-)
-
-# Node 3: Review (client-facing)
-# Shows the user what was found and asks whether to dig deeper or proceed.
-review_node = NodeSpec(
-    id="review",
-    name="Review Findings",
-    description="Present findings to user and decide whether to research more or write the report",
-    node_type="event_loop",
-    client_facing=True,
-    max_node_visits=0,
-    input_keys=["findings", "sources", "gaps", "research_brief"],
-    output_keys=["needs_more_research", "feedback"],
-    success_criteria=(
-        "The user has been presented with findings and has explicitly indicated "
-        "whether they want more research or are ready for the report."
-    ),
-    system_prompt="""\
-Present the research findings to the user clearly and concisely.
-
-**STEP 1 — Present (your first message, text only, NO tool calls):**
-1. **Summary** (2-3 sentences of what was found)
-2. **Key Findings** (bulleted, with confidence levels)
-3. **Sources Used** (count and quality assessment)
-4. **Gaps** (what's still unclear or under-covered)
-
-End by asking: Are they satisfied, or do they want deeper research? \
-Should we proceed to writing the final report?
-
-**STEP 2 — After the user responds, call set_output:**
- set_output("needs_more_research", "true")  — if they want more
- set_output("needs_more_research", "false") — if they're satisfied
- set_output("feedback", "What the user wants explored further, or empty string")
-""",
-    tools=[],
-)
-
-# Node 4: Report (client-facing)
-# Writes an HTML report, serves the link to the user, and answers follow-ups.
-report_node = NodeSpec(
-    id="report",
-    name="Write & Deliver Report",
-    description="Write a cited HTML report from the findings and present it to the user",
-    node_type="event_loop",
-    client_facing=True,
-    max_node_visits=0,
-    input_keys=["findings", "sources", "research_brief"],
-    output_keys=["delivery_status", "next_action"],
-    success_criteria=(
-        "An HTML report has been saved, the file link has been presented to the user, "
-        "and the user has indicated what they want to do next."
-    ),
-    system_prompt="""\
-Write a research report as an HTML file and present it to the user.
-
-IMPORTANT: save_data requires TWO separate arguments: filename and data.
-Call it like: save_data(filename="report.html", data="<html>...</html>")
-Do NOT use _raw, do NOT nest arguments inside a JSON string.
-
-**STEP 1 — Write and save the HTML report (tool calls, NO text to user yet):**
-
-Build a clean HTML document. Keep the HTML concise — aim for clarity over length.
-Use minimal embedded CSS (a few lines of style, not a full framework).
-
-Report structure:
- Title & date
- Executive Summary (2-3 paragraphs)
- Key Findings (organized by theme, with [n] citation links)
- Analysis (synthesis, implications)
- Conclusion (key takeaways)
- References (numbered list with clickable URLs)
-
-Requirements:
- Every factual claim must cite its source with [n] notation
- Be objective — present multiple viewpoints where sources disagree
- Answer the original research questions from the brief
- If findings appear incomplete or summarized, call list_data_files() and load_data() \
-to access the detailed source material from the research phase. The research node's \
-tool results and research_notes.md contain the full data.
-
-Save the HTML:
-  save_data(filename="report.html", data="<html>...</html>")
-
-Then get the clickable link:
-  serve_file_to_user(filename="report.html", label="Research Report")
-
-If save_data fails, simplify and shorten the HTML, then retry.
-
-**STEP 2 — Present the link to the user (text only, NO tool calls):**
-
-Tell the user the report is ready and include the file:// URI from
-serve_file_to_user so they can click it to open. Give a brief summary
-of what the report covers. Ask if they have questions or want to continue.
-
-**STEP 3 — After the user responds:**
- Answer any follow-up questions from the research material
- When the user is ready to move on, ask what they'd like to do next:
-  - Research a new topic?
-  - Dig deeper into the current topic?
- Then call set_output:
-  - set_output("delivery_status", "completed")
-  - set_output("next_action", "new_topic")       — if they want a new topic
-  - set_output("next_action", "more_research")   — if they want deeper research
-""",
-    tools=[
-        "save_data",
-        "append_data",
-        "edit_data",
-        "serve_file_to_user",
-        "load_data",
-        "list_data_files",
-    ],
-)
-
-__all__ = [
-    "intake_node",
-    "research_node",
-    "review_node",
-    "report_node",
-]
@@ -1,640 +0,0 @@
---
-name: hive-credentials
-description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "2.3"
-  type: utility
---
-
-# Setup Credentials
-
-Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
-
-## When to Use
-
- Before running or testing an agent for the first time
- When `AgentRunner.run()` fails with "missing required credentials"
- When a user asks to configure credentials for an agent
- After building a new agent that uses tools requiring API keys
-
-## Workflow
-
-### Step 1: Identify the Agent
-
-Determine which agent needs credentials. The user will either:
-
- Name the agent directly (e.g., "set up credentials for hubspot-agent")
- Have an agent directory open (check `exports/` for agent dirs)
- Be working on an agent in the current session
-
-Locate the agent's directory under `exports/{agent_name}/`.
-
-### Step 2: Detect Missing Credentials
-
-Use the `check_missing_credentials` MCP tool to detect what the agent needs and what's already configured. This tool loads the agent, inspects its required tools and node types, maps them to credentials via `CREDENTIAL_SPECS`, and checks both the encrypted store and environment variables.
-
-```
-check_missing_credentials(agent_path="exports/{agent_name}")
-```
-
-The tool returns a JSON response:
-
-```json
-{
-  "agent": "exports/{agent_name}",
-  "missing": [
-    {
-      "credential_name": "brave_search",
-      "env_var": "BRAVE_SEARCH_API_KEY",
-      "description": "Brave Search API key for web search",
-      "help_url": "https://brave.com/search/api/",
-      "tools": ["web_search"]
-    }
-  ],
-  "available": [
-    {
-      "credential_name": "anthropic",
-      "env_var": "ANTHROPIC_API_KEY",
-      "source": "encrypted_store"
-    }
-  ],
-  "total_missing": 1,
-  "ready": false
-}
-```
-
-**If `ready` is true (nothing missing):** Report all credentials as configured and skip Steps 3-5. Example:
-
-```
-All required credentials are already configured:
-  ✓ anthropic (ANTHROPIC_API_KEY)
-  ✓ brave_search (BRAVE_SEARCH_API_KEY)
-Your agent is ready to run!
-```
-
-**If credentials are missing:** Continue to Step 3 with the `missing` list.
-
-### Step 3: Present Auth Options for Each Missing Credential
-
-For each missing credential, check what authentication methods are available:
-
-```python
-from aden_tools.credentials import CREDENTIAL_SPECS
-
-spec = CREDENTIAL_SPECS.get("hubspot")
-if spec:
-    # Determine available auth options
-    auth_options = []
-    if spec.aden_supported:
-        auth_options.append("aden")
-    if spec.direct_api_key_supported:
-        auth_options.append("direct")
-    auth_options.append("custom")  # Always available
-
-    # Get setup info
-    setup_info = {
-        "env_var": spec.env_var,
-        "description": spec.description,
-        "help_url": spec.help_url,
-        "api_key_instructions": spec.api_key_instructions,
-    }
-```
-
-Present the available options using AskUserQuestion:
-
-```
-Choose how to configure HUBSPOT_ACCESS_TOKEN:
-
-  1) Aden Platform (OAuth) (Recommended)
-     Secure OAuth2 flow via hive.adenhq.com
-     - Quick setup with automatic token refresh
-     - No need to manage API keys manually
-
-  2) Direct API Key
-     Enter your own API key manually
-     - Requires creating a HubSpot Private App
-     - Full control over scopes and permissions
-
-  3) Local Credential Setup (Advanced)
-     Programmatic configuration for CI/CD
-     - For automated deployments
-     - Requires manual API calls
-```
-
-### Step 4: Execute Auth Flow Based on User Choice
-
-#### Prerequisite: Ensure HIVE_CREDENTIAL_KEY Is Available
-
-Before storing any credentials, verify `HIVE_CREDENTIAL_KEY` is set (needed to encrypt/decrypt the local store). Check both the current session and shell config:
-
-```bash
-# Check current session
-printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
-
-# Check shell config files
-for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
-```
-
- **In current session** — proceed to store credentials
- **In shell config but NOT in current session** — run `source ~/.zshrc` (or `~/.bashrc`) first, then proceed
- **Not set anywhere** — `EncryptedFileStorage` will auto-generate one. After storing, tell the user to persist it: `export HIVE_CREDENTIAL_KEY="{generated_key}"` in their shell profile
-
-> **⚠️ IMPORTANT: After adding `HIVE_CREDENTIAL_KEY` to the user's shell config, always display:**
-> ```
-> ⚠️  Environment variables were added to your shell config.
->     Open a NEW TERMINAL for them to take effect outside this session.
-> ```
-
-#### Option 1: Aden Platform (OAuth)
-
-This is the recommended flow for supported integrations (HubSpot, etc.).
-
-**How Aden OAuth Works:**
-
-The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
-
-1. User has an Aden account
-2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
-3. We just need to sync those credentials down to the local credential store
-
-**4.1a. Check for ADEN_API_KEY**
-
-```python
-import os
-aden_key = os.environ.get("ADEN_API_KEY")
-```
-
-If not set, guide user to get one from Aden (this is where they do OAuth):
-
-```python
-from aden_tools.credentials import open_browser, get_aden_setup_url
-
-# Open browser to Aden - user will sign up and connect integrations there
-url = get_aden_setup_url()  # https://hive.adenhq.com
-success, msg = open_browser(url)
-
-print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
-print("Once done, copy your API key and return here.")
-```
-
-Ask user to provide the ADEN_API_KEY they received.
-
-**4.1b. Save ADEN_API_KEY to Shell Config**
-
-With user approval, persist ADEN_API_KEY to their shell config:
-
-```python
-from aden_tools.credentials import (
-    detect_shell,
-    add_env_var_to_shell_config,
-    get_shell_source_command,
-)
-
-shell_type = detect_shell()  # 'bash', 'zsh', or 'unknown'
-
-# Ask user for approval before modifying shell config
-# If approved:
-success, config_path = add_env_var_to_shell_config(
-    "ADEN_API_KEY",
-    user_provided_key,
-    comment="Aden Platform (OAuth) API key"
-)
-
-if success:
-    source_cmd = get_shell_source_command()
-    print(f"Saved to {config_path}")
-    print(f"Run: {source_cmd}")
-```
-
-> **⚠️ IMPORTANT: After adding `ADEN_API_KEY` to the user's shell config, always display:**
-> ```
-> ⚠️  Environment variables were added to your shell config.
->     Open a NEW TERMINAL for them to take effect outside this session.
-> ```
-
-Also save to `~/.hive/configuration.json` for the framework:
-
-```python
-import json
-from pathlib import Path
-
-config_path = Path.home() / ".hive" / "configuration.json"
-config = json.loads(config_path.read_text()) if config_path.exists() else {}
-
-config["aden"] = {
-    "api_key_configured": True,
-    "api_url": "https://api.adenhq.com"
-}
-
-config_path.parent.mkdir(parents=True, exist_ok=True)
-config_path.write_text(json.dumps(config, indent=2))
-```
-
-**4.1c. Sync Credentials from Aden Server**
-
-Since the user has already authorized integrations on Aden, use the one-liner factory method:
-
-```python
-from core.framework.credentials import CredentialStore
-
-# This single call handles everything:
-# - Creates encrypted local storage at ~/.hive/credentials
-# - Configures Aden client from ADEN_API_KEY env var
-# - Syncs all credentials from Aden server automatically
-store = CredentialStore.with_aden_sync(
-    base_url="https://api.adenhq.com",
-    auto_sync=True,  # Syncs on creation
-)
-
-# Check what was synced
-synced = store.list_credentials()
-print(f"Synced credentials: {synced}")
-
-# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
-if "hubspot" not in synced:
-    print("HubSpot not found in your Aden account.")
-    print("Please visit https://hive.adenhq.com to connect HubSpot, then try again.")
-```
-
-For more control over the sync process:
-
-```python
-from core.framework.credentials import CredentialStore
-from core.framework.credentials.aden import (
-    AdenCredentialClient,
-    AdenClientConfig,
-    AdenSyncProvider,
-)
-
-# Create client (API key loaded from ADEN_API_KEY env var)
-client = AdenCredentialClient(AdenClientConfig(
-    base_url="https://api.adenhq.com",
-))
-
-# Create provider and store
-provider = AdenSyncProvider(client=client)
-store = CredentialStore.with_encrypted_storage()
-
-# Manual sync
-synced_count = provider.sync_all(store)
-print(f"Synced {synced_count} credentials from Aden")
-```
-
-**4.1d. Run Health Check**
-
-```python
-from aden_tools.credentials import check_credential_health
-
-# Get the token from the store
-cred = store.get_credential("hubspot")
-token = cred.keys["access_token"].value.get_secret_value()
-
-result = check_credential_health("hubspot", token)
-if result.valid:
-    print("HubSpot credentials validated successfully!")
-else:
-    print(f"Validation failed: {result.message}")
-    # Offer to retry the OAuth flow
-```
-
-#### Option 2: Direct API Key
-
-For users who prefer manual API key management.
-
-**4.2a. Show Setup Instructions**
-
-```python
-from aden_tools.credentials import CREDENTIAL_SPECS
-
-spec = CREDENTIAL_SPECS.get("hubspot")
-if spec and spec.api_key_instructions:
-    print(spec.api_key_instructions)
-# Output:
-# To get a HubSpot Private App token:
-# 1. Go to HubSpot Settings > Integrations > Private Apps
-# 2. Click "Create a private app"
-# 3. Name your app (e.g., "Hive Agent")
-# ...
-
-if spec and spec.help_url:
-    print(f"More info: {spec.help_url}")
-```
-
-**4.2b. Collect API Key from User**
-
-Use AskUserQuestion to securely collect the API key:
-
-```
-Please provide your HubSpot access token:
-(This will be stored securely in ~/.hive/credentials)
-```
-
-**4.2c. Run Health Check Before Storing**
-
-```python
-from aden_tools.credentials import check_credential_health
-
-result = check_credential_health("hubspot", user_provided_token)
-if not result.valid:
-    print(f"Warning: {result.message}")
-    # Ask user if they want to:
-    # 1. Try a different token
-    # 2. Continue anyway (not recommended)
-```
-
-**4.2d. Store in Local Encrypted Store**
-
-```python
-from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
-from pydantic import SecretStr
-
-store = CredentialStore.with_encrypted_storage()
-
-cred = CredentialObject(
-    id="hubspot",
-    name="HubSpot Access Token",
-    keys={
-        "access_token": CredentialKey(
-            name="access_token",
-            value=SecretStr(user_provided_token),
-        )
-    },
-)
-store.save_credential(cred)
-```
-
-**4.2e. Export to Current Session**
-
-```bash
-export HUBSPOT_ACCESS_TOKEN="the-value"
-```
-
-#### Option 3: Local Credential Setup (Advanced)
-
-For programmatic/CI/CD setups.
-
-**4.3a. Show Documentation**
-
-```
-For advanced credential management, you can use the CredentialStore API directly:
-
-  from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
-  from pydantic import SecretStr
-
-  store = CredentialStore.with_encrypted_storage()
-
-  cred = CredentialObject(
-      id="hubspot",
-      name="HubSpot Access Token",
-      keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
-  )
-  store.save_credential(cred)
-
-For CI/CD environments:
-  - Set HIVE_CREDENTIAL_KEY for encryption
-  - Pre-populate ~/.hive/credentials programmatically
-  - Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
-
-Documentation: See core/framework/credentials/README.md
-```
-
-### Step 5: Record Configuration Method
-
-Track which auth method was used for each credential in `~/.hive/configuration.json`:
-
-```python
-import json
-from pathlib import Path
-from datetime import datetime
-
-config_path = Path.home() / ".hive" / "configuration.json"
-config = json.loads(config_path.read_text()) if config_path.exists() else {}
-
-if "credential_methods" not in config:
-    config["credential_methods"] = {}
-
-config["credential_methods"]["hubspot"] = {
-    "method": "aden",  # or "direct" or "custom"
-    "configured_at": datetime.now().isoformat(),
-}
-
-config_path.write_text(json.dumps(config, indent=2))
-```
-
-### Step 6: Verify All Credentials
-
-Use the `verify_credentials` MCP tool to confirm everything is properly configured:
-
-```
-verify_credentials(agent_path="exports/{agent_name}")
-```
-
-The tool returns:
-
-```json
-{
-  "agent": "exports/{agent_name}",
-  "ready": true,
-  "missing_credentials": [],
-  "warnings": [],
-  "errors": []
-}
-```
-
-If `ready` is true, report success. If `missing_credentials` is non-empty, identify what failed and loop back to Step 3 for the remaining credentials.
-
-## Health Check Reference
-
-Health checks validate credentials by making lightweight API calls:
-
-| Credential      | Endpoint                                | What It Checks                    |
-| --------------- | --------------------------------------- | --------------------------------- |
-| `anthropic`     | `POST /v1/messages`                     | API key validity                  |
-| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                  |
-| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity         |
-| `github`        | `GET /user`                             | Token validity, user identity     |
-| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes |
-| `resend`        | `GET /domains`                          | API key validity                  |
-
-```python
-from aden_tools.credentials import check_credential_health, HealthCheckResult
-
-result: HealthCheckResult = check_credential_health("hubspot", token_value)
-# result.valid: bool
-# result.message: str
-# result.details: dict (status_code, rate_limited, etc.)
-```
-
-## Encryption Key (HIVE_CREDENTIAL_KEY)
-
-The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
-
- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
- The user MUST persist this key (e.g., in `~/.bashrc`/`~/.zshrc` or a secrets manager)
- Without this key, stored credentials cannot be decrypted
-
-**Shell config rule:** Only TWO keys belong in shell config (`~/.zshrc`/`~/.bashrc`):
- `HIVE_CREDENTIAL_KEY` — encryption key for the credential store
- `ADEN_API_KEY` — Aden platform auth key (needed before the store can sync)
-
-All other API keys (Brave, Google, HubSpot, etc.) must go in the encrypted store only. **Never offer to add them to shell config.**
-
-If `HIVE_CREDENTIAL_KEY` is not set:
-
-1. Let the store generate one
-2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
-3. Recommend adding it to `~/.bashrc` or their shell profile
-
-## Security Rules
-
- **NEVER** log, print, or echo credential values in tool output
- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
- **NEVER** hardcode credentials in source code
- **NEVER** offer to save API keys to shell config (`~/.zshrc`/`~/.bashrc`) — the **only** keys that belong in shell config are `HIVE_CREDENTIAL_KEY` and `ADEN_API_KEY`. All other credentials (Brave, Google, HubSpot, GitHub, Resend, etc.) go in the encrypted store only.
- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
- **ALWAYS** run health checks before storing credentials (when possible)
- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
-
-## Credential Sources Reference
-
-All credential specs are defined in `tools/src/aden_tools/credentials/`:
-
-| File              | Category      | Credentials                                   | Aden Supported |
-| ----------------- | ------------- | --------------------------------------------- | -------------- |
-| `llm.py`          | LLM Providers | `anthropic`                                   | No             |
-| `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
-| `email.py`        | Email         | `resend`                                      | No             |
-| `integrations.py` | Integrations  | `github`, `hubspot`, `google_calendar_oauth`  | No / Yes       |
-
-**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
-variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
-Add them to `llm.py` as needed.
-
-To check what's registered:
-
-```python
-from aden_tools.credentials import CREDENTIAL_SPECS
-for name, spec in CREDENTIAL_SPECS.items():
-    print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
-```
-
-## Migration: CredentialManager → CredentialStore
-
-**CredentialManager is deprecated.** Use CredentialStore instead.
-
-| Old (Deprecated)                          | New (Recommended)                                                    |
-| ----------------------------------------- | -------------------------------------------------------------------- |
-| `CredentialManager()`                     | `CredentialStore.with_encrypted_storage()`                           |
-| `creds.get("hubspot")`                    | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
-| `creds.validate_for_tools(tools)`         | Use `store.is_available(cred_id)` per credential                     |
-| `creds.get_auth_options("hubspot")`       | Check `CREDENTIAL_SPECS["hubspot"].aden_supported`                   |
-| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly                        |
-
-**Why migrate?**
-
- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
- **CredentialStoreAdapter** exists for backward compatibility during migration
-
-```python
-# Old way (deprecated)
-from aden_tools.credentials import CredentialManager
-creds = CredentialManager()
-token = creds.get("hubspot")
-
-# New way (recommended)
-from core.framework.credentials import CredentialStore
-store = CredentialStore.with_encrypted_storage()
-token = store.get("hubspot")
-
-# With Aden sync (recommended for OAuth integrations)
-store = CredentialStore.with_aden_sync()
-token = store.get_key("hubspot", "access_token")
-```
-
-## Example Session
-
-```
-User: /hive-credentials for my research-agent
-
-Agent: Let me check what credentials your research-agent needs.
-
-[Calls check_missing_credentials(agent_path="exports/research-agent")]
-→ Returns:
-  available: anthropic (encrypted_store), brave_search (encrypted_store)
-  missing: google_search (GOOGLE_API_KEY), google_cse (GOOGLE_CSE_ID)
-  ready: false
-
-Agent: 2 of 4 required credentials are already configured. Only Google Custom
-Search needs setup (2 values).
-
--- Setting up Google Custom Search (google_search + google_cse) ---
-
-This requires two values that work together.
-
-[Checks HIVE_CREDENTIAL_KEY before storing]
-$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
-set
-
-First, the Google API Key:
-1. Go to https://console.cloud.google.com/apis/credentials
-2. Create a new project (or select an existing one)
-3. Enable the "Custom Search API" from the API Library
-4. Go to Credentials > Create Credentials > API Key
-5. Copy the generated API key
-
-[AskUserQuestion: "Please provide your Google API key:"]
-[User provides key]
-
-Now, the Custom Search Engine ID:
-1. Go to https://programmablesearchengine.google.com/controlpanel/all
-2. Click "Add" to create a new search engine
-3. Under "What to search", select "Search the entire web"
-4. Give your search engine a name
-5. Click "Create"
-6. Copy the Search Engine ID (cx value)
-
-[AskUserQuestion: "Please provide your Google CSE ID:"]
-[User provides ID]
-
-[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
-[Stores both in local encrypted store, exports to env]
-
-✓ Google Custom Search credentials valid
-
-[Calls verify_credentials(agent_path="exports/research-agent")]
-→ Returns: ready: true, missing_credentials: []
-
-All credentials are now configured:
-  ✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
-  ✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
-  ✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
-  ✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
-
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                      ✅ CREDENTIALS CONFIGURED                              │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│     OPEN A NEW TERMINAL before running commands below.                      │
-│     Environment variables were saved to your shell config but               │
-│     only take effect in new terminal sessions.                              │
-│                                                                             │
-│  NEXT STEPS:                                                                │
-│                                                                             │
-│  1. RUN YOUR AGENT:                                                         │
-│                                                                             │
-│     hive tui                                                                │
-│                                                                             │
-│  2. IF YOU ENCOUNTER ISSUES, USE THE DEBUGGER:                              │
-│                                                                             │
-│     /hive-debugger                                                          │
-│                                                                             │
-│     The debugger analyzes runtime logs, identifies retry loops, tool        │
-│     failures, stalled execution, and provides actionable fix suggestions.   │
-│                                                                             │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
@@ -1,385 +0,0 @@
---
-name: hive-patterns
-description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "2.0"
-  type: reference
-  part_of: hive
---
-
-# Building Agents - Patterns & Best Practices
-
-Design patterns, examples, and best practices for building robust goal-driven agents.
-
-**Prerequisites:** Complete agent structure using `hive-create`.
-
-## Practical Example: Hybrid Workflow
-
-How to build a node using both direct file writes and optional MCP validation:
-
-```python
-# 1. WRITE TO FILE FIRST (Primary - makes it visible)
-node_code = '''
-search_node = NodeSpec(
-    id="search-web",
-    node_type="event_loop",
-    input_keys=["query"],
-    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
-    tools=["web_search"],
-)
-'''
-
-Edit(
-    file_path="exports/research_agent/nodes/__init__.py",
-    old_string="# Nodes will be added here",
-    new_string=node_code
-)
-
-# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
-validation = mcp__agent-builder__test_node(
-    node_id="search-web",
-    test_input='{"query": "python tutorials"}',
-    mock_llm_response='{"search_results": [...mock results...]}'
-)
-```
-
-**User experience:**
-
- Immediately sees node in their editor (from step 1)
- Gets validation feedback (from step 2)
- Can edit the file directly if needed
-
-## Multi-Turn Interaction Patterns
-
-For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
-
-### Client-Facing Nodes
-
-A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
-
-```python
-# Client-facing node with STEP 1/STEP 2 prompt pattern
-intake_node = NodeSpec(
-    id="intake",
-    name="Intake",
-    description="Gather requirements from the user",
-    node_type="event_loop",
-    client_facing=True,
-    input_keys=["topic"],
-    output_keys=["research_brief"],
-    system_prompt="""\
-You are an intake specialist.
-
-**STEP 1 — Read and respond (text only, NO tool calls):**
-1. Read the topic provided
-2. If it's vague, ask 1-2 clarifying questions
-3. If it's clear, confirm your understanding
-
-**STEP 2 — After the user confirms, call set_output:**
- set_output("research_brief", "Clear description of what to research")
-""",
-)
-
-# Internal node runs without user interaction
-research_node = NodeSpec(
-    id="research",
-    name="Research",
-    description="Search and analyze sources",
-    node_type="event_loop",
-    input_keys=["research_brief"],
-    output_keys=["findings", "sources"],
-    system_prompt="Research the topic using web_search and web_scrape...",
-    tools=["web_search", "web_scrape", "load_data", "save_data"],
-)
-```
-
-**How it works:**
-
- Client-facing nodes stream LLM text to the user and block for input after each response
- User input is injected via `node.inject_event(text)`
- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
- Internal nodes (non-client-facing) run their entire loop without blocking
- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
-
-**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
-
-### When to Use client_facing
-
-| Scenario                            | client_facing | Why                    |
-| ----------------------------------- | :-----------: | ---------------------- |
-| Gathering user requirements         |      Yes      | Need user input        |
-| Human review/approval checkpoint    |      Yes      | Need human decision    |
-| Data processing (scanning, scoring) |      No       | Runs autonomously      |
-| Report generation                   |      No       | No user input needed   |
-| Final confirmation before action    |      Yes      | Need explicit approval |
-
-> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
-
-## Edge-Based Routing and Feedback Loops
-
-### Conditional Edge Routing
-
-Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
-
-```python
-# Node with mutually exclusive outputs
-review_node = NodeSpec(
-    id="review",
-    name="Review",
-    node_type="event_loop",
-    client_facing=True,
-    output_keys=["approved_contacts", "redo_extraction"],
-    nullable_output_keys=["approved_contacts", "redo_extraction"],
-    max_node_visits=3,
-    system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
-)
-
-# Forward edge (positive priority, evaluated first)
-EdgeSpec(
-    id="review-to-campaign",
-    source="review",
-    target="campaign-builder",
-    condition=EdgeCondition.CONDITIONAL,
-    condition_expr="output.get('approved_contacts') is not None",
-    priority=1,
-)
-
-# Feedback edge (negative priority, evaluated after forward edges)
-EdgeSpec(
-    id="review-feedback",
-    source="review",
-    target="extractor",
-    condition=EdgeCondition.CONDITIONAL,
-    condition_expr="output.get('redo_extraction') is not None",
-    priority=-1,
-)
-```
-
-**Key concepts:**
-
- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
-
-### Routing Decision Table
-
-| Pattern                | Old Approach            | New Approach                                  |
-| ---------------------- | ----------------------- | --------------------------------------------- |
-| Conditional branching  | `router` node           | Conditional edges with `condition_expr`       |
-| Binary approve/reject  | `pause_nodes` + resume  | `client_facing=True` + `nullable_output_keys` |
-| Loop-back on rejection | Manual entry_points     | Feedback edge with `priority=-1`              |
-| Multi-way routing      | Router with routes dict | Multiple conditional edges with priorities    |
-
-## Judge Patterns
-
-**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
-
- Output rollback logic
- `_user_has_responded` flags
- Premature set_output rejection
- Interaction protocol injection into system prompts
-
-Judges control when an event_loop node's loop exits. Choose based on validation needs.
-
-### Implicit Judge (Default)
-
-When no judge is configured, the implicit judge ACCEPTs when:
-
- The LLM finishes its response with no tool calls
- All required output keys have been set via `set_output`
-
-Best for simple nodes where "all outputs set" is sufficient validation.
-
-### SchemaJudge
-
-Validates outputs against a Pydantic model. Use when you need structural validation.
-
-```python
-from pydantic import BaseModel
-
-class ScannerOutput(BaseModel):
-    github_users: list[dict]  # Must be a list of user objects
-
-class SchemaJudge:
-    def __init__(self, output_model: type[BaseModel]):
-        self._model = output_model
-
-    async def evaluate(self, context: dict) -> JudgeVerdict:
-        missing = context.get("missing_keys", [])
-        if missing:
-            return JudgeVerdict(
-                action="RETRY",
-                feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
-            )
-        try:
-            self._model.model_validate(context["output_accumulator"])
-            return JudgeVerdict(action="ACCEPT")
-        except ValidationError as e:
-            return JudgeVerdict(action="RETRY", feedback=str(e))
-```
-
-### When to Use Which Judge
-
-| Judge           | Use When                              | Example                |
-| --------------- | ------------------------------------- | ---------------------- |
-| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
-| SchemaJudge     | Need structural validation of outputs | API response parsing   |
-| Custom          | Domain-specific validation logic      | Score must be 0.0-1.0  |
-
-## Fan-Out / Fan-In (Parallel Execution)
-
-Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
-
-```python
-# Scanner fans out to Profiler and Scorer in parallel
-EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
-         condition=EdgeCondition.ON_SUCCESS)
-EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
-         condition=EdgeCondition.ON_SUCCESS)
-
-# Both fan in to Extractor
-EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
-         condition=EdgeCondition.ON_SUCCESS)
-EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
-         condition=EdgeCondition.ON_SUCCESS)
-```
-
-**Requirements:**
-
- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
- Only one parallel branch may contain a `client_facing` node
- Fan-in node receives outputs from all completed branches in shared memory
-
-## Context Management Patterns
-
-### Tiered Compaction
-
-EventLoopNode automatically manages context window usage with tiered compaction:
-
-1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
-2. **Normal compaction** — LLM summarizes older messages
-3. **Aggressive compaction** — Keeps only recent messages + summary
-4. **Emergency** — Hard reset with tool history preservation
-
-### Spillover Pattern
-
-The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
-
-For explicit data management, use the data tools (real MCP tools, not synthetic):
-
-```python
-# save_data, load_data, list_data_files, serve_file_to_user are real MCP tools
-# data_dir is auto-injected by the framework — the LLM never sees it
-
-# Saving large results
-save_data(filename="sources.json", data=large_json_string)
-
-# Reading with pagination (line-based offset/limit)
-load_data(filename="sources.json", offset=0, limit=50)
-
-# Listing available files
-list_data_files()
-
-# Serving a file to the user as a clickable link
-serve_file_to_user(filename="report.html", label="Research Report")
-```
-
-Add data tools to nodes that handle large tool results:
-
-```python
-research_node = NodeSpec(
-    ...
-    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
-)
-```
-
-`data_dir` is a framework context parameter — auto-injected at call time. `GraphExecutor.execute()` sets it per-execution via `ToolRegistry.set_execution_context(data_dir=...)` (using `contextvars` for concurrency safety), ensuring it matches the session-scoped spillover directory.
-
-## Anti-Patterns
-
-### What NOT to Do
-
- **Don't rely on `export_graph`** — Write files immediately, not at end
- **Don't hide code in session** — Write to files as components are approved
- **Don't wait to write files** — Agent visible from first step
- **Don't batch everything** — Write incrementally, one component at a time
- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
-
-### Fewer, Richer Nodes
-
-A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
-
-| Bad (8 thin nodes)  | Good (4 rich nodes)                 |
-| ------------------- | ----------------------------------- |
-| parse-query         | intake (client-facing)              |
-| search-sources      | research (search + fetch + analyze) |
-| fetch-content       | review (client-facing)              |
-| evaluate-sources    | report (write + deliver)            |
-| synthesize-findings |                                     |
-| write-report        |                                     |
-| quality-check       |                                     |
-| save-report         |                                     |
-
-**Why fewer nodes are better:**
-
- The LLM retains full context of its work within a single node
- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
- Fewer edges means simpler graph and fewer failure points
- Data tools (`save_data`/`load_data`) handle context window limits within a single node
-
-### MCP Tools - Correct Usage
-
-**MCP tools OK for:**
-
- `test_node` — Validate node configuration with mock inputs
- `validate_graph` — Check graph structure
- `configure_loop` — Set event loop parameters
- `create_session` — Track session state for bookkeeping
-
-**Just don't:** Use MCP as the primary construction method or rely on export_graph
-
-## Error Handling Patterns
-
-### Graceful Failure with Fallback
-
-```python
-edges = [
-    # Success path
-    EdgeSpec(id="api-success", source="api-call", target="process-results",
-             condition=EdgeCondition.ON_SUCCESS),
-    # Fallback on failure
-    EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
-             condition=EdgeCondition.ON_FAILURE, priority=1),
-    # Report if fallback also fails
-    EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
-             condition=EdgeCondition.ON_FAILURE, priority=1),
-]
-```
-
-## Handoff to Testing
-
-When agent is complete, transition to testing phase:
-
-### Pre-Testing Checklist
-
- [ ] Agent structure validates: `uv run python -m agent_name validate`
- [ ] All nodes defined in nodes/**init**.py
- [ ] All edges connect valid nodes with correct priorities
- [ ] Feedback edge targets have `max_node_visits > 1`
- [ ] Client-facing nodes have meaningful system prompts
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
-
-## Related Skills
-
- **hive-concepts** — Fundamental concepts (node types, edges, event loop architecture)
- **hive-create** — Step-by-step building process
- **hive-test** — Test and validate agents
- **hive** — Complete workflow orchestrator
-
---
-
-**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
@@ -1,940 +0,0 @@
---
-name: hive-test
-description: Iterative agent testing with session recovery. Execute, analyze, fix, resume from checkpoints. Use when testing an agent, debugging test failures, or verifying fixes without re-running from scratch.
---
-
-# Agent Testing
-
-Test agents iteratively: execute, analyze failures, fix, resume from checkpoint, repeat.
-
-## When to Use
-
- Testing a newly built agent against its goal
- Debugging a failing agent iteratively
- Verifying fixes without re-running expensive early nodes
- Running final regression tests before deployment
-
-## Prerequisites
-
-1. Agent package at `exports/{agent_name}/` (built with `/hive-create`)
-2. Credentials configured (`/hive-credentials`)
-3. `ANTHROPIC_API_KEY` set (or appropriate LLM provider key)
-
-**Path distinction** (critical — don't confuse these):
- `exports/{agent_name}/` — agent source code (edit here)
- `~/.hive/agents/{agent_name}/` — runtime data: sessions, checkpoints, logs (read here)
-
---
-
-## The Iterative Test Loop
-
-This is the core workflow. Don't re-run the entire agent when a late node fails — analyze, fix, and resume from the last clean checkpoint.
-
-```
-┌──────────────────────────────────────┐
-│ PHASE 1: Generate Test Scenarios     │
-│ Goal → synthetic test inputs + tests │
-└──────────────┬───────────────────────┘
-               ↓
-┌──────────────────────────────────────┐
-│ PHASE 2: Execute                     │◄────────────────┐
-│ Run agent (CLI or pytest)            │                 │
-└──────────────┬───────────────────────┘                 │
-               ↓                                         │
-          Pass? ──yes──► PHASE 6: Final Verification     │
-               │                                         │
-               no                                        │
-               ↓                                         │
-┌──────────────────────────────────────┐                 │
-│ PHASE 3: Analyze                     │                 │
-│ Session + runtime logs + checkpoints │                 │
-└──────────────┬───────────────────────┘                 │
-               ↓                                         │
-┌──────────────────────────────────────┐                 │
-│ PHASE 4: Fix                         │                 │
-│ Prompt / code / graph / goal         │                 │
-└──────────────┬───────────────────────┘                 │
-               ↓                                         │
-┌──────────────────────────────────────┐                 │
-│ PHASE 5: Recover & Resume            │─────────────────┘
-│ Checkpoint resume OR fresh re-run    │
-└──────────────────────────────────────┘
-```
-
---
-
-### Phase 1: Generate Test Scenarios
-
-Create synthetic tests from the agent's goal, constraints, and success criteria.
-
-#### Step 1a: Read the goal
-
-```python
-# Read goal from agent.py
-Read(file_path="exports/{agent_name}/agent.py")
-# Extract the Goal definition and convert to JSON string
-```
-
-#### Step 1b: Get test guidelines
-
-```python
-# Get constraint test guidelines
-generate_constraint_tests(
-    goal_id="your-goal-id",
-    goal_json='{"id": "...", "constraints": [...]}',
-    agent_path="exports/{agent_name}"
-)
-
-# Get success criteria test guidelines
-generate_success_tests(
-    goal_id="your-goal-id",
-    goal_json='{"id": "...", "success_criteria": [...]}',
-    node_names="intake,research,review,report",
-    tool_names="web_search,web_scrape",
-    agent_path="exports/{agent_name}"
-)
-```
-
-These return `file_header`, `test_template`, `constraints_formatted`/`success_criteria_formatted`, and `test_guidelines`. They do NOT generate test code — you write the tests.
-
-#### Step 1c: Write tests
-
-```python
-Write(
-    file_path=result["output_file"],
-    content=result["file_header"] + "\n\n" + your_test_code
-)
-```
-
-#### Test writing rules
-
- Every test MUST be `async` with `@pytest.mark.asyncio`
- Every test MUST accept `runner, auto_responder, mock_mode` fixtures
- Use `await auto_responder.start()` before running, `await auto_responder.stop()` in `finally`
- Use `await runner.run(input_dict)` — this goes through AgentRunner → AgentRuntime → ExecutionStream
- Access output via `result.output.get("key")` — NEVER `result.output["key"]`
- `result.success=True` means no exception, NOT goal achieved — always check output
- Write 8-15 tests total, not 30+
- Each real test costs ~3 seconds + LLM tokens
- NEVER use `default_agent.run()` — it bypasses the runtime (no sessions, no logs, client-facing nodes hang)
-
-#### Step 1d: Check existing tests
-
-Before generating, check if tests already exist:
-
-```python
-list_tests(
-    goal_id="your-goal-id",
-    agent_path="exports/{agent_name}"
-)
-```
-
---
-
-### Phase 2: Execute
-
-Two execution paths, use the right one for your situation.
-
-#### Iterative debugging (for complex agents)
-
-Run the agent via CLI. This creates sessions with checkpoints at `~/.hive/agents/{agent_name}/sessions/`:
-
-```bash
-uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
-```
-
-Sessions and checkpoints are saved automatically.
-
-**Client-facing nodes**: Agents with `client_facing=True` nodes (interactive conversation) work in headless mode when run from a real terminal — the agent streams output to stdout and reads user input from stdin via a `>>> ` prompt. In non-interactive shells (like Claude Code's Bash tool), client-facing nodes will hang because there is no stdin. For testing interactive agents from Claude Code, use `run_tests` with mock mode or have the user run the agent manually in their terminal.
-
-#### Automated regression (for CI or final verification)
-
-Use the `run_tests` MCP tool to run all pytest tests:
-
-```python
-run_tests(
-    goal_id="your-goal-id",
-    agent_path="exports/{agent_name}"
-)
-```
-
-Returns structured results:
-```json
-{
-  "overall_passed": false,
-  "summary": {"total": 12, "passed": 10, "failed": 2, "pass_rate": "83.3%"},
-  "test_results": [{"test_name": "test_success_source_diversity", "status": "failed"}],
-  "failures": [{"test_name": "test_success_source_diversity", "details": "..."}]
-}
-```
-
-**Options:**
-```python
-# Run only constraint tests
-run_tests(goal_id, agent_path, test_types='["constraint"]')
-
-# Stop on first failure
-run_tests(goal_id, agent_path, fail_fast=True)
-
-# Parallel execution
-run_tests(goal_id, agent_path, parallel=4)
-```
-
-**Note:** `run_tests` uses `AgentRunner` with `tmp_path` storage, so sessions are isolated per test run. For checkpoint-based recovery with persistent sessions, use CLI execution. Use `run_tests` for quick regression checks and final verification.
-
---
-
-### Phase 3: Analyze Failures
-
-When a test fails, drill down systematically. Don't guess — use the tools.
-
-#### Step 3a: Get error category
-
-```python
-debug_test(
-    goal_id="your-goal-id",
-    test_name="test_success_source_diversity",
-    agent_path="exports/{agent_name}"
-)
-```
-
-Returns error category (`IMPLEMENTATION_ERROR`, `ASSERTION_FAILURE`, `TIMEOUT`, `IMPORT_ERROR`, `API_ERROR`) plus full traceback and suggestions.
-
-#### Step 3b: Find the failed session
-
-```python
-list_agent_sessions(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    status="failed",
-    limit=5
-)
-```
-
-Returns session list with IDs, timestamps, current_node (where it failed), execution_quality.
-
-#### Step 3c: Inspect session state
-
-```python
-get_agent_session_state(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    session_id="session_20260209_143022_abc12345"
-)
-```
-
-Returns execution path, which node was current, step count, timestamps — but excludes memory values (to avoid context bloat). Shows `memory_keys` and `memory_size` instead.
-
-#### Step 3d: Examine runtime logs (L2/L3)
-
-```python
-# L2: Per-node success/failure, retry counts
-query_runtime_log_details(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    run_id="session_20260209_143022_abc12345",
-    needs_attention_only=True
-)
-
-# L3: Exact LLM responses, tool call inputs/outputs
-query_runtime_log_raw(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    run_id="session_20260209_143022_abc12345",
-    node_id="research"
-)
-```
-
-#### Step 3e: Inspect memory data
-
-```python
-# See what data a node actually produced
-get_agent_session_memory(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    session_id="session_20260209_143022_abc12345",
-    key="research_results"
-)
-```
-
-#### Step 3f: Find recovery points
-
-```python
-list_agent_checkpoints(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    session_id="session_20260209_143022_abc12345",
-    is_clean="true"
-)
-```
-
-Returns checkpoint summaries with IDs, types (`node_start`, `node_complete`), which node, and `is_clean` flag. Clean checkpoints are safe resume points.
-
-#### Step 3g: Compare checkpoints (optional)
-
-To understand what changed between two points in execution:
-
-```python
-compare_agent_checkpoints(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    session_id="session_20260209_143022_abc12345",
-    checkpoint_id_before="cp_node_complete_research_143030",
-    checkpoint_id_after="cp_node_complete_review_143115"
-)
-```
-
-Returns memory diff (added/removed/changed keys) and execution path diff.
-
---
-
-### Phase 4: Fix Based on Root Cause
-
-Use the analysis from Phase 3 to determine what to fix and where.
-
-| Root Cause | What to Fix | Where to Edit |
-|------------|------------|---------------|
-| **Prompt issue** — LLM produces wrong output format, misses instructions | Node `system_prompt` | `exports/{agent}/nodes/__init__.py` |
-| **Code bug** — TypeError, KeyError, logic error in Python | Agent code | `exports/{agent}/agent.py`, `nodes/__init__.py` |
-| **Graph issue** — wrong routing, missing edge, bad condition_expr | Edges, node config | `exports/{agent}/agent.py` |
-| **Tool issue** — MCP tool fails, wrong config, missing credential | Tool config | `exports/{agent}/mcp_servers.json`, `/hive-credentials` |
-| **Goal issue** — success criteria too strict/vague, wrong constraints | Goal definition | `exports/{agent}/agent.py` (goal section) |
-| **Test issue** — test expectations don't match actual agent behavior | Test code | `exports/{agent}/tests/test_*.py` |
-
-#### Fix strategies by error category
-
-**IMPLEMENTATION_ERROR** (TypeError, AttributeError, KeyError):
-```python
-# Read the failing code
-Read(file_path="exports/{agent_name}/nodes/__init__.py")
-
-# Fix the bug
-Edit(
-    file_path="exports/{agent_name}/nodes/__init__.py",
-    old_string="results.get('videos')",
-    new_string="(results or {}).get('videos', [])"
-)
-```
-
-**ASSERTION_FAILURE** (test assertions fail but agent ran successfully):
- Check if the agent's output is actually wrong → fix the prompt
- Check if the test's expectations are unrealistic → fix the test
- Use `get_agent_session_memory` to see what the agent actually produced
-
-**TIMEOUT / STALL** (agent runs too long):
- Check `node_visit_counts` for feedback loops hitting max_node_visits
- Check L3 logs for tool calls that hang
- Reduce `max_iterations` in loop_config or fix the prompt to converge faster
-
-**API_ERROR** (connection, rate limit, auth):
- Verify credentials with `/hive-credentials`
- Check MCP server configuration
-
---
-
-### Phase 5: Recover & Resume
-
-After fixing the agent, decide whether to resume or re-run.
-
-#### When to resume from checkpoint
-
-Resume when ALL of these are true:
- The fix is to a node that comes AFTER existing clean checkpoints
- Clean checkpoints exist (from a CLI execution with checkpointing)
- The early nodes are expensive (web scraping, API calls, long LLM chains)
-
-```bash
-# Resume from the last clean checkpoint before the failing node
-uv run hive run exports/{agent_name} \
-  --resume-session session_20260209_143022_abc12345 \
-  --checkpoint cp_node_complete_research_143030
-```
-
-This skips all nodes before the checkpoint and only re-runs the fixed node onward.
-
-#### When to re-run from scratch
-
-Re-run when ANY of these are true:
- The fix is to the entry node or an early node
- No checkpoints exist (e.g., agent was run via `run_tests`)
- The agent is fast (2-3 nodes, completes in seconds)
- You changed the graph structure (added/removed nodes/edges)
-
-```bash
-uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
-```
-
-#### Inspecting a checkpoint before resuming
-
-```python
-get_agent_checkpoint(
-    agent_work_dir="~/.hive/agents/{agent_name}",
-    session_id="session_20260209_143022_abc12345",
-    checkpoint_id="cp_node_complete_research_143030"
-)
-```
-
-Returns the full checkpoint: shared_memory snapshot, execution_path, current_node, next_node, is_clean.
-
-#### Loop back to Phase 2
-
-After resuming or re-running, check if the fix worked. If not, go back to Phase 3.
-
---
-
-### Phase 6: Final Verification
-
-Once the iterative fix loop converges (the agent produces correct output), run the full automated test suite:
-
-```python
-run_tests(
-    goal_id="your-goal-id",
-    agent_path="exports/{agent_name}"
-)
-```
-
-All tests should pass. If not, repeat the loop for remaining failures.
-
---
-
-## Credential Requirements
-
-**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).
-
-### Prerequisites
-
-Before running agent tests, you MUST collect ALL required credentials from the user.
-
-**Step 1: LLM API Key (always required)**
-```bash
-export ANTHROPIC_API_KEY="your-key-here"
-```
-
-**Step 2: Tool-specific credentials (depends on agent's tools)**
-
-Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
-
-```python
-from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
-
-creds = CredentialManager()
-
-# Determine which tools the agent uses (from agent.json or mcp_servers.json)
-agent_tools = [...]  # e.g., ["hubspot_search_contacts", "web_search", ...]
-
-# Find all missing credentials for those tools
-missing = creds.get_missing_for_tools(agent_tools)
-```
-
-Common tool credentials:
-| Tool | Env Var | Help URL |
-|------|---------|----------|
-| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
-| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
-| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
-
-**Why ALL credentials are required:**
- Tests need to execute the agent's LLM nodes to validate behavior
- Tools with missing credentials will return error dicts instead of real data
- Mock mode bypasses everything, providing no confidence in real-world performance
-
-### Mock Mode Limitations
-
-Mock mode (`--mock` flag or `MOCK_MODE=1`) is **ONLY for structure validation**:
-
- Validates graph structure (nodes, edges, connections)
- Validates that `AgentRunner.load()` succeeds and the agent is importable
- Does NOT execute event_loop agents — MockLLMProvider never calls `set_output`, so event_loop nodes loop forever
- Does NOT test LLM reasoning, content quality, or constraint validation
- Does NOT test real API integrations or tool use
-
-**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials.
-
-### Enforcing Credentials in Tests
-
-When writing tests, **ALWAYS include credential checks**:
-
-```python
-import os
-import pytest
-from aden_tools.credentials import CredentialManager
-
-pytestmark = pytest.mark.skipif(
-    not CredentialManager().is_available("anthropic") and not os.environ.get("MOCK_MODE"),
-    reason="API key required for real testing. Set ANTHROPIC_API_KEY or use MOCK_MODE=1."
-)
-
-
-@pytest.fixture(scope="session", autouse=True)
-def check_credentials():
-    """Ensure ALL required credentials are set for real testing."""
-    creds = CredentialManager()
-    mock_mode = os.environ.get("MOCK_MODE")
-
-    if not creds.is_available("anthropic"):
-        if mock_mode:
-            print("\nRunning in MOCK MODE - structure validation only")
-        else:
-            pytest.fail(
-                "\nANTHROPIC_API_KEY not set!\n"
-                "Set API key: export ANTHROPIC_API_KEY='your-key-here'\n"
-                "Or run structure validation: MOCK_MODE=1 pytest exports/{agent}/tests/"
-            )
-
-    if not mock_mode:
-        agent_tools = []  # Update per agent
-        missing = creds.get_missing_for_tools(agent_tools)
-        if missing:
-            lines = ["\nMissing tool credentials!"]
-            for name in missing:
-                spec = creds.specs.get(name)
-                if spec:
-                    lines.append(f"  {spec.env_var} - {spec.description}")
-            pytest.fail("\n".join(lines))
-```
-
-### User Communication
-
-When the user asks to test an agent, **ALWAYS check for ALL credentials first**:
-
-1. **Identify the agent's tools** from `mcp_servers.json`
-2. **Check ALL required credentials** using `CredentialManager`
-3. **Ask the user to provide any missing credentials** before proceeding
-4. Collect ALL missing credentials in a single prompt — not one at a time
-
---
-
-## Safe Test Patterns
-
-### OutputCleaner
-
-The framework automatically validates and cleans node outputs using a fast LLM at edge traversal time. Tests should still use safe patterns because OutputCleaner may not catch all issues.
-
-### Safe Access (REQUIRED)
-
-```python
-# UNSAFE - will crash on missing keys
-approval = result.output["approval_decision"]
-category = result.output["analysis"]["category"]
-
-# SAFE - use .get() with defaults
-output = result.output or {}
-approval = output.get("approval_decision", "UNKNOWN")
-
-# SAFE - type check before operations
-analysis = output.get("analysis", {})
-if isinstance(analysis, dict):
-    category = analysis.get("category", "unknown")
-
-# SAFE - handle JSON parsing trap (LLM response as string)
-import json
-recommendation = output.get("recommendation", "{}")
-if isinstance(recommendation, str):
-    try:
-        parsed = json.loads(recommendation)
-        if isinstance(parsed, dict):
-            approval = parsed.get("approval_decision", "UNKNOWN")
-    except json.JSONDecodeError:
-        approval = "UNKNOWN"
-elif isinstance(recommendation, dict):
-    approval = recommendation.get("approval_decision", "UNKNOWN")
-
-# SAFE - type check before iteration
-items = output.get("items", [])
-if isinstance(items, list):
-    for item in items:
-        ...
-```
-
-### Helper Functions for conftest.py
-
-```python
-import json
-import re
-
-def _parse_json_from_output(result, key):
-    """Parse JSON from agent output (framework may store full LLM response as string)."""
-    response_text = result.output.get(key, "")
-    json_text = re.sub(r'```json\s*|\s*```', '', response_text).strip()
-    try:
-        return json.loads(json_text)
-    except (json.JSONDecodeError, AttributeError, TypeError):
-        return result.output.get(key)
-
-def safe_get_nested(result, key_path, default=None):
-    """Safely get nested value from result.output."""
-    output = result.output or {}
-    current = output
-    for key in key_path:
-        if isinstance(current, dict):
-            current = current.get(key)
-        elif isinstance(current, str):
-            try:
-                json_text = re.sub(r'```json\s*|\s*```', '', current).strip()
-                parsed = json.loads(json_text)
-                if isinstance(parsed, dict):
-                    current = parsed.get(key)
-                else:
-                    return default
-            except json.JSONDecodeError:
-                return default
-        else:
-            return default
-    return current if current is not None else default
-
-# Make available in tests
-pytest.parse_json_from_output = _parse_json_from_output
-pytest.safe_get_nested = safe_get_nested
-```
-
-### ExecutionResult Fields
-
-**`result.success=True` means NO exception, NOT goal achieved**
-
-```python
-# WRONG
-assert result.success
-
-# RIGHT
-assert result.success, f"Agent failed: {result.error}"
-output = result.output or {}
-approval = output.get("approval_decision")
-assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
-```
-
-All fields:
- `success: bool` — Completed without exception (NOT goal achieved!)
- `output: dict` — Complete memory snapshot (may contain raw strings)
- `error: str | None` — Error message if failed
- `steps_executed: int` — Number of nodes executed
- `total_tokens: int` — Cumulative token usage
- `total_latency_ms: int` — Total execution time
- `path: list[str]` — Node IDs traversed (may repeat in feedback loops)
- `paused_at: str | None` — Node ID if paused
- `session_state: dict` — State for resuming
- `node_visit_counts: dict[str, int]` — Visit counts per node (feedback loop testing)
- `execution_quality: str` — "clean", "degraded", or "failed"
-
-### Test Count Guidance
-
-**Write 8-15 tests, not 30+**
-
- 2-3 tests per success criterion
- 1 happy path test
- 1 boundary/edge case test
- 1 error handling test (optional)
-
-Each real test costs ~3 seconds + LLM tokens. 12 tests = ~36 seconds, $0.12.
-
---
-
-## Test Patterns
-
-### Happy Path
-```python
-@pytest.mark.asyncio
-async def test_happy_path(runner, auto_responder, mock_mode):
-    """Test normal successful execution."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "python tutorials"})
-    finally:
-        await auto_responder.stop()
-    assert result.success, f"Agent failed: {result.error}"
-    output = result.output or {}
-    assert output.get("report"), "No report produced"
-```
-
-### Boundary Condition
-```python
-@pytest.mark.asyncio
-async def test_minimum_sources(runner, auto_responder, mock_mode):
-    """Test at minimum source threshold."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "niche topic"})
-    finally:
-        await auto_responder.stop()
-    assert result.success, f"Agent failed: {result.error}"
-    output = result.output or {}
-    sources = output.get("sources", [])
-    if isinstance(sources, list):
-        assert len(sources) >= 3, f"Expected >= 3 sources, got {len(sources)}"
-```
-
-### Error Handling
-```python
-@pytest.mark.asyncio
-async def test_empty_input(runner, auto_responder, mock_mode):
-    """Test graceful handling of empty input."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": ""})
-    finally:
-        await auto_responder.stop()
-    # Agent should either fail gracefully or produce an error message
-    output = result.output or {}
-    assert not result.success or output.get("error"), "Should handle empty input"
-```
-
-### Feedback Loop
-```python
-@pytest.mark.asyncio
-async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
-    """Test that feedback loops don't run forever."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "test"})
-    finally:
-        await auto_responder.stop()
-    visits = result.node_visit_counts or {}
-    for node_id, count in visits.items():
-        assert count <= 5, f"Node {node_id} visited {count} times — possible infinite loop"
-```
-
---
-
-## MCP Tool Reference
-
-### Phase 1: Test Generation
-
-```python
-# Check existing tests
-list_tests(goal_id, agent_path)
-
-# Get constraint test guidelines (returns templates, NOT generated tests)
-generate_constraint_tests(goal_id, goal_json, agent_path)
-# Returns: output_file, file_header, test_template, constraints_formatted, test_guidelines
-
-# Get success criteria test guidelines
-generate_success_tests(goal_id, goal_json, node_names, tool_names, agent_path)
-# Returns: output_file, file_header, test_template, success_criteria_formatted, test_guidelines
-```
-
-### Phase 2: Execution
-
-```python
-# Automated regression (no checkpoints, fresh runs)
-run_tests(goal_id, agent_path, test_types='["all"]', parallel=-1, fail_fast=False)
-
-# Run only specific test types
-run_tests(goal_id, agent_path, test_types='["constraint"]')
-run_tests(goal_id, agent_path, test_types='["success"]')
-```
-
-```bash
-# Iterative debugging with checkpoints (via CLI)
-uv run hive run exports/{agent_name} --input '{"query": "test"}'
-```
-
-### Phase 3: Analysis
-
-```python
-# Debug a specific failed test
-debug_test(goal_id, test_name, agent_path)
-
-# Find failed sessions
-list_agent_sessions(agent_work_dir, status="failed", limit=5)
-
-# Inspect session state (excludes memory values)
-get_agent_session_state(agent_work_dir, session_id)
-
-# Inspect memory data
-get_agent_session_memory(agent_work_dir, session_id, key="research_results")
-
-# Runtime logs: L1 summaries
-query_runtime_logs(agent_work_dir, status="needs_attention")
-
-# Runtime logs: L2 per-node details
-query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
-
-# Runtime logs: L3 tool/LLM raw data
-query_runtime_log_raw(agent_work_dir, run_id, node_id="research")
-
-# Find clean checkpoints
-list_agent_checkpoints(agent_work_dir, session_id, is_clean="true")
-
-# Compare checkpoints (memory diff)
-compare_agent_checkpoints(agent_work_dir, session_id, cp_before, cp_after)
-```
-
-### Phase 5: Recovery
-
-```python
-# Inspect checkpoint before resuming
-get_agent_checkpoint(agent_work_dir, session_id, checkpoint_id)
-# Empty checkpoint_id = latest checkpoint
-```
-
-```bash
-# Resume from checkpoint via CLI (headless)
-uv run hive run exports/{agent_name} \
-  --resume-session {session_id} --checkpoint {checkpoint_id}
-```
-
---
-
-## Anti-Patterns
-
-| Don't | Do Instead |
-|-------|-----------|
-| Use `default_agent.run()` in tests | Use `runner.run()` with `auto_responder` fixtures (goes through AgentRuntime) |
-| Re-run entire agent when a late node fails | Resume from last clean checkpoint |
-| Treat `result.success` as goal achieved | Check `result.output` for actual criteria |
-| Access `result.output["key"]` directly | Use `result.output.get("key")` |
-| Fix random things hoping tests pass | Analyze L2/L3 logs to find root cause first |
-| Write 30+ tests | Write 8-15 focused tests |
-| Skip credential check | Use `/hive-credentials` before testing |
-| Confuse `exports/` with `~/.hive/agents/` | Code in `exports/`, runtime data in `~/.hive/` |
-| Use `run_tests` for iterative debugging | Use headless CLI with checkpoints for iterative debugging |
-| Use headless CLI for final regression | Use `run_tests` for automated regression |
-| Use `--tui` from Claude Code | Use headless `run` command — TUI hangs in non-interactive shells |
-| Test client-facing nodes from Claude Code | Use mock mode, or have the user run the agent in their terminal |
-| Run tests without reading goal first | Always understand the goal before writing tests |
-| Skip Phase 3 analysis and guess | Use session + log tools to identify root cause |
-
---
-
-## Example Walkthrough: Deep Research Agent
-
-A complete iteration showing the test loop for an agent with nodes: `intake → research → review → report`.
-
-### Phase 1: Generate tests
-
-```python
-# Read the goal
-Read(file_path="exports/deep_research_agent/agent.py")
-
-# Get success criteria test guidelines
-result = generate_success_tests(
-    goal_id="rigorous-interactive-research",
-    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "target": ">=5"}, {"id": "citation-coverage", "target": "100%"}, {"id": "report-completeness", "target": "90%"}]}',
-    node_names="intake,research,review,report",
-    tool_names="web_search,web_scrape",
-    agent_path="exports/deep_research_agent"
-)
-
-# Write tests
-Write(
-    file_path=result["output_file"],
-    content=result["file_header"] + "\n\n" + test_code
-)
-```
-
-### Phase 2: First execution
-
-```python
-run_tests(
-    goal_id="rigorous-interactive-research",
-    agent_path="exports/deep_research_agent",
-    fail_fast=True
-)
-```
-
-Result: `test_success_source_diversity` fails — agent only found 2 sources instead of 5.
-
-### Phase 3: Analyze
-
-```python
-# Debug the failing test
-debug_test(
-    goal_id="rigorous-interactive-research",
-    test_name="test_success_source_diversity",
-    agent_path="exports/deep_research_agent"
-)
-# → ASSERTION_FAILURE: Expected >= 5 sources, got 2
-
-# Find the session
-list_agent_sessions(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    status="completed",
-    limit=1
-)
-# → session_20260209_150000_abc12345
-
-# See what the research node produced
-get_agent_session_memory(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    session_id="session_20260209_150000_abc12345",
-    key="research_results"
-)
-# → Only 2 web_search calls made, each returned 1 source
-
-# Check the LLM's behavior in the research node
-query_runtime_log_raw(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    run_id="session_20260209_150000_abc12345",
-    node_id="research"
-)
-# → LLM called web_search only twice, then called set_output
-```
-
-Root cause: The research node's prompt doesn't tell the LLM to search for at least 5 diverse sources. It stops after the first couple of searches.
-
-### Phase 4: Fix the prompt
-
-```python
-Read(file_path="exports/deep_research_agent/nodes/__init__.py")
-
-Edit(
-    file_path="exports/deep_research_agent/nodes/__init__.py",
-    old_string='system_prompt="Search for information on the user\'s topic."',
-    new_string='system_prompt="Search for information on the user\'s topic. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries to ensure source diversity. Do not stop searching until you have at least 5 distinct sources."'
-)
-```
-
-### Phase 5: Resume from checkpoint
-
-For this example, the fix is to the `research` node. If we had run via CLI with checkpointing, we could resume from the checkpoint after `intake` to skip re-running intake:
-
-```bash
-# Check if clean checkpoint exists after intake
-list_agent_checkpoints(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    session_id="session_20260209_150000_abc12345",
-    is_clean="true"
-)
-# → cp_node_complete_intake_150005
-
-# Resume from after intake, re-run research with fixed prompt
-uv run hive run exports/deep_research_agent \
-  --resume-session session_20260209_150000_abc12345 \
-  --checkpoint cp_node_complete_intake_150005
-```
-
-Or for this simple case (intake is fast), just re-run:
-
-```bash
-uv run hive run exports/deep_research_agent --input '{"topic": "test"}'
-```
-
-### Phase 6: Final verification
-
-```python
-run_tests(
-    goal_id="rigorous-interactive-research",
-    agent_path="exports/deep_research_agent"
-)
-# → All 12 tests pass
-```
-
---
-
-## Test File Structure
-
-```
-exports/{agent_name}/
-├── agent.py              ← Agent to test (goal, nodes, edges)
-├── nodes/__init__.py     ← Node implementations (prompts, config)
-├── config.py             ← Agent configuration
-├── mcp_servers.json      ← Tool server config
-└── tests/
-    ├── conftest.py           ← Shared fixtures + safe access helpers
-    ├── test_constraints.py   ← Constraint tests
-    ├── test_success_criteria.py  ← Success criteria tests
-    └── test_edge_cases.py    ← Edge case tests
-```
-
-## Integration with Other Skills
-
-| Scenario | From | To | Action |
-|----------|------|----|--------|
-| Agent built, ready to test | `/hive-create` | `/hive-test` | Generate tests, start loop |
-| Prompt fix needed | `/hive-test` Phase 4 | Direct edit | Edit `nodes/__init__.py`, resume |
-| Goal definition wrong | `/hive-test` Phase 4 | `/hive-create` | Update goal, may need rebuild |
-| Missing credentials | `/hive-test` Phase 3 | `/hive-credentials` | Set up credentials |
-| Complex runtime failure | `/hive-test` Phase 3 | `/hive-debugger` | Deep L1/L2/L3 analysis |
-| All tests pass | `/hive-test` Phase 6 | Done | Agent validated |
@@ -1,333 +0,0 @@
-# Example: Iterative Testing of a Research Agent
-
-This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
-
-## Agent Structure
-
-```
-exports/deep_research_agent/
-├── agent.py          # Goal + graph: intake → research → review → report
-├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
-├── config.py         # Model config
-├── mcp_servers.json  # Tools: web_search, web_scrape
-└── tests/            # Test files (we'll create these)
-```
-
-**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
-
---
-
-## Phase 1: Generate Tests
-
-### Read the goal
-
-```python
-Read(file_path="exports/deep_research_agent/agent.py")
-# Extract: goal_id="rigorous-interactive-research"
-# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
-# constraints: no-hallucination, source-attribution
-```
-
-### Get test guidelines
-
-```python
-result = generate_success_tests(
-    goal_id="rigorous-interactive-research",
-    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
-    node_names="intake,research,review,report",
-    tool_names="web_search,web_scrape",
-    agent_path="exports/deep_research_agent"
-)
-```
-
-### Write tests
-
-```python
-Write(
-    file_path="exports/deep_research_agent/tests/test_success_criteria.py",
-    content=result["file_header"] + '''
-
-@pytest.mark.asyncio
-async def test_success_source_diversity(runner, auto_responder, mock_mode):
-    """At least 5 diverse sources are found."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "impact of remote work on productivity"})
-    finally:
-        await auto_responder.stop()
-    assert result.success, f"Agent failed: {result.error}"
-    output = result.output or {}
-    sources = output.get("sources", [])
-    if isinstance(sources, list):
-        assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
-
-@pytest.mark.asyncio
-async def test_success_citation_coverage(runner, auto_responder, mock_mode):
-    """Every factual claim in the report cites its source."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "climate change effects on agriculture"})
-    finally:
-        await auto_responder.stop()
-    assert result.success, f"Agent failed: {result.error}"
-    output = result.output or {}
-    report = output.get("report", "")
-    # Check that report contains numbered references
-    assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
-
-@pytest.mark.asyncio
-async def test_success_report_completeness(runner, auto_responder, mock_mode):
-    """Report addresses the original research question."""
-    query = "pros and cons of nuclear energy"
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": query})
-    finally:
-        await auto_responder.stop()
-    assert result.success, f"Agent failed: {result.error}"
-    output = result.output or {}
-    report = output.get("report", "")
-    assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
-
-@pytest.mark.asyncio
-async def test_empty_query_handling(runner, auto_responder, mock_mode):
-    """Agent handles empty input gracefully."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": ""})
-    finally:
-        await auto_responder.stop()
-    output = result.output or {}
-    assert not result.success or output.get("error"), "Should handle empty query"
-
-@pytest.mark.asyncio
-async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
-    """Feedback loop between review and research terminates."""
-    await auto_responder.start()
-    try:
-        result = await runner.run({"query": "quantum computing basics"})
-    finally:
-        await auto_responder.stop()
-    visits = result.node_visit_counts or {}
-    for node_id, count in visits.items():
-        assert count <= 5, f"Node {node_id} visited {count} times"
-'''
-)
-```
-
---
-
-## Phase 2: First Execution
-
-```python
-run_tests(
-    goal_id="rigorous-interactive-research",
-    agent_path="exports/deep_research_agent",
-    fail_fast=True
-)
-```
-
-**Result:**
-```json
-{
-  "overall_passed": false,
-  "summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
-  "failures": [
-    {"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
-    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
-  ]
-}
-```
-
---
-
-## Phase 3: Analyze (Iteration 1)
-
-### Debug the first failure
-
-```python
-debug_test(
-    goal_id="rigorous-interactive-research",
-    test_name="test_success_source_diversity",
-    agent_path="exports/deep_research_agent"
-)
-# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
-```
-
-### Find the session and inspect memory
-
-```python
-list_agent_sessions(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    status="completed",
-    limit=1
-)
-# → session_20260209_150000_abc12345
-
-get_agent_session_memory(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    session_id="session_20260209_150000_abc12345",
-    key="research_results"
-)
-# → Only 2 sources found. LLM stopped searching after 2 queries.
-```
-
-### Check LLM behavior in the research node
-
-```python
-query_runtime_log_raw(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    run_id="session_20260209_150000_abc12345",
-    node_id="research"
-)
-# → LLM called web_search twice, got results, immediately called set_output.
-# → Prompt doesn't instruct it to find at least 5 sources.
-```
-
-**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
-
---
-
-## Phase 4: Fix (Iteration 1)
-
-```python
-Read(file_path="exports/deep_research_agent/nodes/__init__.py")
-
-# Fix the research node prompt
-Edit(
-    file_path="exports/deep_research_agent/nodes/__init__.py",
-    old_string='system_prompt="Search for information on the user\'s topic using web search."',
-    new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
-)
-```
-
---
-
-## Phase 5: Recover & Resume (Iteration 1)
-
-The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
-
-```python
-run_tests(
-    goal_id="rigorous-interactive-research",
-    agent_path="exports/deep_research_agent",
-    fail_fast=True
-)
-```
-
-**Result:**
-```json
-{
-  "overall_passed": false,
-  "summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
-  "failures": [
-    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
-  ]
-}
-```
-
-Source diversity now passes. Citation coverage still fails.
-
---
-
-## Phase 3: Analyze (Iteration 2)
-
-```python
-debug_test(
-    goal_id="rigorous-interactive-research",
-    test_name="test_success_citation_coverage",
-    agent_path="exports/deep_research_agent"
-)
-# Category: ASSERTION_FAILURE — Report lacks citations
-
-# Check what the report node produced
-list_agent_sessions(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    status="completed",
-    limit=1
-)
-# → session_20260209_151500_def67890
-
-get_agent_session_memory(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    session_id="session_20260209_151500_def67890",
-    key="report"
-)
-# → Report text exists but uses no numbered references.
-# → Sources are in memory but report node doesn't cite them.
-```
-
-**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
-
---
-
-## Phase 4: Fix (Iteration 2)
-
-```python
-Edit(
-    file_path="exports/deep_research_agent/nodes/__init__.py",
-    old_string='system_prompt="Write a comprehensive report based on the research findings."',
-    new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
-)
-```
-
---
-
-## Phase 5: Resume (Iteration 2)
-
-The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
-
-```bash
-# Run via CLI to get checkpoints
-uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
-
-# After it runs, find the clean checkpoint before report
-list_agent_checkpoints(
-    agent_work_dir="~/.hive/agents/deep_research_agent",
-    session_id="session_20260209_152000_ghi34567",
-    is_clean="true"
-)
-# → cp_node_complete_review_152100 (after review, before report)
-
-# Resume — skips intake, research, review entirely
-uv run hive run exports/deep_research_agent \
-  --resume-session session_20260209_152000_ghi34567 \
-  --checkpoint cp_node_complete_review_152100
-```
-
-Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
-
---
-
-## Phase 6: Final Verification
-
-```python
-run_tests(
-    goal_id="rigorous-interactive-research",
-    agent_path="exports/deep_research_agent"
-)
-```
-
-**Result:**
-```json
-{
-  "overall_passed": true,
-  "summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
-}
-```
-
-All tests pass.
-
---
-
-## Summary
-
-| Iteration | Failure | Root Cause | Fix | Recovery |
-|-----------|---------|------------|-----|----------|
-| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
-| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
-
-**Key takeaways:**
- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
- Final `run_tests` confirms all scenarios pass end-to-end
@@ -1,526 +0,0 @@
---
-name: hive
-description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates hive-* skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "2.0"
-  type: workflow-orchestrator
-  orchestrates:
-    - hive-concepts
-    - hive-create
-    - hive-patterns
-    - hive-test
-    - hive-credentials
-    - hive-debugger
---
-
-# Agent Development Workflow
-
-**THIS IS AN EXECUTABLE WORKFLOW. DO NOT explore the codebase or read source files. ROUTE to the correct skill IMMEDIATELY.**
-
-When this skill is loaded, **ALWAYS use the AskUserQuestion tool** to present options:
-
-```
-Use AskUserQuestion with these options:
- "Build a new agent" → Then invoke /hive-create
- "Test an existing agent" → Then invoke /hive-test
- "Learn agent concepts" → Then invoke /hive-concepts
- "Optimize agent design" → Then invoke /hive-patterns
- "Set up credentials" → Then invoke /hive-credentials
- "Debug a failing agent" → Then invoke /hive-debugger
- "Other" (please describe what you want to achieve)
-```
-
-**DO NOT:** Read source files, explore the codebase, search for code, or do any investigation before routing. The sub-skills handle all of that.
-
---
-
-Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.
-
-## Overview
-
-This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:
-
-1. **Understand Concepts** → `/hive-concepts` (optional)
-2. **Build Structure** → `/hive-create`
-3. **Optimize Design** → `/hive-patterns` (optional)
-4. **Setup Credentials** → `/hive-credentials` (if agent uses tools requiring API keys)
-5. **Test & Validate** → `/hive-test`
-6. **Debug Issues** → `/hive-debugger` (if agent fails at runtime)
-
-## When to Use This Workflow
-
-Use this meta-skill when:
- Starting a new agent from scratch
- Unclear which skill to use first
- Need end-to-end guidance for agent development
- Want consistent, repeatable agent builds
-
-**Skip this workflow** if:
- You only need to test an existing agent → use `/hive-test` directly
- You know exactly which phase you're in → use specific skill directly
-
-## Quick Decision Tree
-
-```
-"Need to understand agent concepts" → hive-concepts
-"Build a new agent" → hive-create
-"Optimize my agent design" → hive-patterns
-"Need client-facing nodes or feedback loops" → hive-patterns
-"Set up API keys for my agent" → hive-credentials
-"Test my agent" → hive-test
-"My agent is failing/stuck/has errors" → hive-debugger
-"Not sure what I need" → Read phases below, then decide
-"Agent has structure but needs implementation" → See agent directory STATUS.md
-```
-
-## Phase 0: Understand Concepts (Optional)
-
-**Skill**: `/hive-concepts`
-**Input**: Questions about agent architecture
-
-### When to Use
-
- First time building an agent
- Need to understand node types, edges, goals
- Want to validate tool availability
- Learning about event loop architecture and client-facing nodes
-
-### What This Phase Provides
-
- Architecture overview (Python packages, not JSON)
- Core concepts (Goal, Node, Edge, Event Loop, Judges)
- Tool discovery and validation procedures
- Workflow overview
-
-**Skip this phase** if you already understand agent fundamentals.
-
-## Phase 1: Build Agent Structure
-
-**Skill**: `/hive-create`
-**Input**: User requirements ("Build an agent that...") or a template to start from
-
-### What This Phase Does
-
-Creates the complete agent architecture:
- Package structure (`exports/agent_name/`)
- Goal with success criteria and constraints
- Workflow graph (nodes and edges)
- Node specifications
- CLI interface
- Documentation
-
-### Process
-
-1. **Create package** - Directory structure with skeleton files
-2. **Define goal** - Success criteria and constraints written to agent.py
-3. **Design nodes** - Each node approved and written incrementally
-4. **Connect edges** - Workflow graph with conditional routing
-5. **Finalize** - Agent class, exports, and documentation
-
-### Outputs
-
- ✅ `exports/agent_name/` package created
- ✅ Goal defined in agent.py
- ✅ 3-5 success criteria defined
- ✅ 1-5 constraints defined
- ✅ 5-10 nodes specified in nodes/__init__.py
- ✅ 8-15 edges connecting workflow
- ✅ Validated structure (passes `uv run python -m agent_name validate`)
- ✅ README.md with usage instructions
- ✅ CLI commands (info, validate, run, shell)
-
-### Success Criteria
-
-You're ready for Phase 2 when:
- Agent structure validates without errors
- All nodes and edges are defined
- CLI commands work (info, validate)
- You see: "Agent complete: exports/agent_name/"
-
-### Common Outputs
-
-The hive-create skill produces:
-```
-exports/agent_name/
-├── __init__.py          (package exports)
-├── __main__.py          (CLI interface)
-├── agent.py             (goal, graph, agent class)
-├── nodes/__init__.py    (node specifications)
-├── config.py            (configuration)
-├── implementations.py   (may be created for Python functions)
-└── README.md            (documentation)
-```
-
-### Next Steps
-
-**If structure complete and validated:**
-→ Check `exports/agent_name/STATUS.md` or `IMPLEMENTATION_GUIDE.md`
-→ These files explain implementation options
-→ You may need to add Python functions or MCP tools (not covered by current skills)
-
-**If want to optimize design:**
-→ Proceed to Phase 1.5 (hive-patterns)
-
-**If ready to test:**
-→ Proceed to Phase 2
-
-## Phase 1.5: Optimize Design (Optional)
-
-**Skill**: `/hive-patterns`
-**Input**: Completed agent structure
-
-### When to Use
-
- Want to add client-facing blocking or feedback edges
- Need judge patterns for output validation
- Want fan-out/fan-in (parallel execution)
- Need error handling patterns
- Want best practices guidance
-
-### What This Phase Provides
-
- Client-facing interaction patterns
- Feedback edge routing with nullable output keys
- Judge patterns (implicit, SchemaJudge)
- Fan-out/fan-in parallel execution
- Context management and spillover patterns
- Anti-patterns to avoid
-
-**Skip this phase** if your agent design is straightforward.
-
-## Phase 2: Test & Validate
-
-**Skill**: `/hive-test`
-**Input**: Working agent from Phase 1
-
-### What This Phase Does
-
-Guides the creation and execution of a comprehensive test suite:
- Constraint tests
- Success criteria tests
- Edge case tests
- Integration tests
-
-### Process
-
-1. **Analyze agent** - Read goal, constraints, success criteria
-2. **Generate tests** - The calling agent writes pytest files in `exports/agent_name/tests/` using hive-test guidelines and templates
-3. **User approval** - Review and approve each test
-4. **Run evaluation** - Execute tests and collect results
-5. **Debug failures** - Identify and fix issues
-6. **Iterate** - Repeat until all tests pass
-
-### Outputs
-
- ✅ Test files in `exports/agent_name/tests/`
- ✅ Test report with pass/fail metrics
- ✅ Coverage of all success criteria
- ✅ Coverage of all constraints
- ✅ Edge case handling verified
-
-### Success Criteria
-
-You're done when:
- All tests pass
- All success criteria validated
- All constraints verified
- Agent handles edge cases
- Test coverage is comprehensive
-
-### Next Steps
-
-**Agent ready for:**
- Production deployment
- Integration into larger systems
- Documentation and handoff
- Continuous monitoring
-
-## Phase Transitions
-
-### From Phase 1 to Phase 2
-
-**Trigger signals:**
- "Agent complete: exports/..."
- Structure validation passes
- README indicates implementation complete
-
-**Before proceeding:**
- Verify agent can be imported: `from exports.agent_name import default_agent`
- Check if implementation is needed (see STATUS.md or IMPLEMENTATION_GUIDE.md)
- Confirm agent executes without import errors
-
-### Skipping Phases
-
-**When to skip Phase 1:**
- Agent structure already exists
- Only need to add tests
- Modifying existing agent
-
-**When to skip Phase 2:**
- Prototyping or exploring
- Agent not production-bound
- Manual testing sufficient
-
-## Common Patterns
-
-### Pattern 1: Complete New Build (Simple)
-
-```
-User: "Build an agent that monitors files"
-→ Use /hive-create
-→ Agent structure created
-→ Use /hive-test
-→ Tests created and passing
-→ Done: Production-ready agent
-```
-
-### Pattern 1b: Complete New Build (With Learning)
-
-```
-User: "Build an agent (first time)"
-→ Use /hive-concepts (understand concepts)
-→ Use /hive-create (build structure)
-→ Use /hive-patterns (optimize design)
-→ Use /hive-test (validate)
-→ Done: Production-ready agent
-```
-
-### Pattern 1c: Build from Template
-
-```
-User: "Build an agent based on the deep research template"
-→ Use /hive-create
-→ Select "From a template" path
-→ Pick template, name new agent
-→ Review/modify goal, nodes, graph
-→ Agent exported with customizations
-→ Use /hive-test
-→ Done: Customized agent
-```
-
-### Pattern 2: Test Existing Agent
-
-```
-User: "Test my agent at exports/my_agent"
-→ Skip Phase 1
-→ Use /hive-test directly
-→ Tests created
-→ Done: Validated agent
-```
-
-### Pattern 3: Iterative Development
-
-```
-User: "Build an agent"
-→ Use /hive-create (Phase 1)
-→ Implementation needed (see STATUS.md)
-→ [User implements functions]
-→ Use /hive-test (Phase 2)
-→ Tests reveal bugs
-→ [Fix bugs manually]
-→ Re-run tests
-→ Done: Working agent
-```
-
-### Pattern 4: Agent with Review Loops and HITL Checkpoints
-
-```
-User: "Build an agent with human review and feedback loops"
-→ Use /hive-concepts (learn event loop, client-facing nodes)
-→ Use /hive-create (build structure with feedback edges)
-→ Use /hive-patterns (implement client-facing + feedback patterns)
-→ Use /hive-test (validate review flows and edge routing)
-→ Done: Agent with HITL checkpoints and review loops
-```
-
-## Skill Dependencies
-
-```
-hive (meta-skill)
-    │
-    ├── hive-concepts (foundational)
-    │   ├── Architecture concepts (event loop, judges)
-    │   ├── Node types (event_loop, function)
-    │   ├── Edge routing and priority
-    │   ├── Tool discovery procedures
-    │   └── Workflow overview
-    │
-    ├── hive-create (procedural)
-    │   ├── Creates package structure
-    │   ├── Defines goal
-    │   ├── Adds nodes (event_loop, function)
-    │   ├── Connects edges with priority routing
-    │   ├── Finalizes agent class
-    │   └── Requires: hive-concepts
-    │
-    ├── hive-patterns (reference)
-    │   ├── Client-facing interaction patterns
-    │   ├── Feedback edges and review loops
-    │   ├── Judge patterns (implicit, SchemaJudge)
-    │   ├── Fan-out/fan-in parallel execution
-    │   └── Context management and anti-patterns
-    │
-    ├── hive-credentials (utility)
-    │   ├── Detects missing credentials
-    │   ├── Offers auth method choices (Aden OAuth, direct API key)
-    │   ├── Stores securely in ~/.hive/credentials
-    │   └── Validates with health checks
-    │
-    ├── hive-test (validation)
-    │   ├── Reads agent goal
-    │   ├── Generates tests
-    │   ├── Runs evaluation
-    │   └── Reports results
-    │
-    └── hive-debugger (troubleshooting)
-        ├── Monitors runtime logs (L1/L2/L3)
-        ├── Identifies retry loops, tool failures
-        ├── Categorizes issues (10 categories)
-        └── Provides fix recommendations
-```
-
-## Troubleshooting
-
-### "Agent structure won't validate"
-
- Check node IDs match between nodes/__init__.py and agent.py
- Verify all edges reference valid node IDs
- Ensure entry_node exists in nodes list
- Run: `PYTHONPATH=exports uv run python -m agent_name validate`
-
-### "Agent has structure but won't run"
-
- Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
- Implementation may be needed (Python functions or MCP tools)
- This is expected - hive-create creates structure, not implementation
- See implementation guide for completion options
-
-### "Tests are failing"
-
- Review test output for specific failures
- Check agent goal and success criteria
- Verify constraints are met
- Use `/hive-test` to debug and iterate
- Fix agent code and re-run tests
-
-### "Agent is failing at runtime"
-
- Use `/hive-debugger` to analyze runtime logs
- The debugger identifies retry loops, tool failures, and stalled execution
- Get actionable fix recommendations with code changes
- Monitor the agent in real-time during TUI sessions
-
-### "Not sure which phase I'm in"
-
-Run these checks:
-
-```bash
-# Check if agent structure exists
-ls exports/my_agent/agent.py
-
-# Check if it validates
-PYTHONPATH=exports uv run python -m my_agent validate
-
-# Check if tests exist
-ls exports/my_agent/tests/
-
-# If structure exists and validates → Phase 2 (testing)
-# If structure doesn't exist → Phase 1 (building)
-# If tests exist but failing → Debug phase
-```
-
-## Best Practices
-
-### For Phase 1 (Building)
-
-1. **Start with clear requirements** - Know what the agent should do
-2. **Define success criteria early** - Measurable goals drive design
-3. **Keep nodes focused** - One responsibility per node
-4. **Use descriptive names** - Node IDs should explain purpose
-5. **Validate incrementally** - Check structure after each major addition
-
-### For Phase 2 (Testing)
-
-1. **Test constraints first** - Hard requirements must pass
-2. **Mock external dependencies** - Use mock mode for LLMs/APIs
-3. **Cover edge cases** - Test failures, not just success paths
-4. **Iterate quickly** - Fix one test at a time
-5. **Document test patterns** - Future tests follow same structure
-
-### General Workflow
-
-1. **Use version control** - Git commit after each phase
-2. **Document decisions** - Update README with changes
-3. **Keep iterations small** - Build → Test → Fix → Repeat
-4. **Preserve working states** - Tag successful iterations
-5. **Learn from failures** - Failed tests reveal design issues
-
-## Exit Criteria
-
-You're done with the workflow when:
-
-✅ Agent structure validates
-✅ All tests pass
-✅ Success criteria met
-✅ Constraints verified
-✅ Documentation complete
-✅ Agent ready for deployment
-
-## Additional Resources
-
- **hive-concepts**: See `.claude/skills/hive-concepts/SKILL.md`
- **hive-create**: See `.claude/skills/hive-create/SKILL.md`
- **hive-patterns**: See `.claude/skills/hive-patterns/SKILL.md`
- **hive-test**: See `.claude/skills/hive-test/SKILL.md`
- **Agent framework docs**: See `core/README.md`
- **Example agents**: See `exports/` directory
-
-## Summary
-
-This workflow provides a proven path from concept to production-ready agent:
-
-1. **Learn** with `/hive-concepts` → Understand fundamentals (optional)
-2. **Build** with `/hive-create` → Get validated structure
-3. **Optimize** with `/hive-patterns` → Apply best practices (optional)
-4. **Configure** with `/hive-credentials` → Set up API keys (if needed)
-5. **Test** with `/hive-test` → Get verified functionality
-6. **Debug** with `/hive-debugger` → Fix runtime issues (if needed)
-
-The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.
-
-## Skill Selection Guide
-
-**Choose hive-concepts when:**
- First time building agents
- Need to understand event loop architecture
- Validating tool availability
- Learning about node types, edges, and judges
-
-**Choose hive-create when:**
- Actually building an agent
- Have clear requirements
- Ready to write code
- Want step-by-step guidance
- Want to start from an existing template and customize it
-
-**Choose hive-patterns when:**
- Agent structure complete
- Need client-facing nodes or feedback edges
- Implementing review loops or fan-out/fan-in
- Want judge patterns or context management
- Want best practices
-
-**Choose hive-test when:**
- Agent structure complete
- Ready to validate functionality
- Need comprehensive test coverage
- Testing feedback loops, output keys, or fan-out
-
-**Choose hive-debugger when:**
- Agent is failing or stuck at runtime
- Seeing retry loops or escalations
- Tool calls are failing
- Need to understand why a node isn't completing
- Want real-time monitoring of agent execution
@@ -1,199 +0,0 @@
-# Example: File Monitor Agent
-
-This example shows the complete /hive workflow in action for building a file monitoring agent.
-
-## Initial Request
-
-```
-User: "Build an agent that monitors ~/Downloads and copies new files to ~/Documents"
-```
-
-## Phase 1: Building (20 minutes)
-
-### Step 1: Create Structure
-
-Agent invokes `/hive-create` skill and:
-
-1. Creates `exports/file_monitor_agent/` package
-2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
-
-**Output**: Package structure visible immediately
-
-### Step 2: Define Goal
-
-```python
-goal = Goal(
-    id="file-monitor-copy",
-    name="Automated File Monitor & Copy",
-    success_criteria=[
-        # 100% detection rate
-        # 100% copy success
-        # 100% conflict resolution
-        # >99% uptime
-    ],
-    constraints=[
-        # Preserve originals
-        # Handle errors gracefully
-        # Track state
-        # Respect permissions
-    ]
-)
-```
-
-**Output**: Goal written to agent.py
-
-### Step 3: Design Nodes
-
-7 nodes approved and written incrementally:
-
-1. `initialize-state` - Set up tracking
-2. `list-downloads` - Scan directory
-3. `identify-new-files` - Find new files
-4. `check-for-new-files` - Router
-5. `copy-files` - Copy with conflict resolution
-6. `update-state` - Mark as processed
-7. `wait-interval` - Sleep between cycles
-
-**Output**: All nodes in nodes/__init__.py
-
-### Step 4: Connect Edges
-
-8 edges connecting the workflow loop:
-
-```
-initialize → list → identify → check
-                                ↓  ↓
-                              copy  wait
-                                ↓    ↑
-                              update ↓
-                                ↓    ↓
-                              wait → list (loop)
-```
-
-**Output**: Edges written to agent.py
-
-### Step 5: Finalize
-
-```bash
-$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
-✓ Agent is valid
-
-$ PYTHONPATH=exports uv run python -m file_monitor_agent info
-Agent: File Monitor & Copy Agent
-Nodes: 7
-Edges: 8
-```
-
-**Phase 1 Complete**: Structure validated ✅
-
-### Status After Phase 1
-
-```
-exports/file_monitor_agent/
-├── __init__.py          ✅ (exports)
-├── __main__.py          ✅ (CLI)
-├── agent.py             ✅ (goal, graph, agent class)
-├── nodes/__init__.py    ✅ (7 nodes)
-├── config.py            ✅ (configuration)
-├── implementations.py   ✅ (Python functions)
-├── README.md            ✅ (documentation)
-├── IMPLEMENTATION_GUIDE.md ✅ (next steps)
-└── STATUS.md            ✅ (current state)
-```
-
-**Note**: Implementation gap exists - data flow needs connection (covered in STATUS.md)
-
-## Phase 2: Testing (25 minutes)
-
-### Step 1: Analyze Agent
-
-Agent invokes `/hive-test` skill and:
-
-1. Reads goal from `exports/file_monitor_agent/agent.py`
-2. Identifies 4 success criteria to test
-3. Identifies 4 constraints to verify
-4. Plans test coverage
-
-### Step 2: Generate Tests
-
-Creates test files:
-
-```
-exports/file_monitor_agent/tests/
-├── conftest.py              (fixtures)
-├── test_constraints.py      (4 constraint tests)
-├── test_success_criteria.py (4 success tests)
-└── test_edge_cases.py       (error handling)
-```
-
-Tests approved incrementally by user.
-
-### Step 3: Run Tests
-
-```bash
-$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/
-
-test_constraints.py::test_preserves_originals     PASSED
-test_constraints.py::test_handles_errors          PASSED
-test_constraints.py::test_tracks_state            PASSED
-test_constraints.py::test_respects_permissions    PASSED
-
-test_success_criteria.py::test_detects_all_files  PASSED
-test_success_criteria.py::test_copies_all_files   PASSED
-test_success_criteria.py::test_resolves_conflicts PASSED
-test_success_criteria.py::test_continuous_run     PASSED
-
-test_edge_cases.py::test_empty_directory          PASSED
-test_edge_cases.py::test_permission_denied        PASSED
-test_edge_cases.py::test_disk_full                PASSED
-test_edge_cases.py::test_large_files              PASSED
-
-========================== 12 passed in 3.42s ==========================
-```
-
-**Phase 2 Complete**: All tests pass ✅
-
-## Final Output
-
-**Production-Ready Agent:**
-
-```bash
-# Run the agent
-./RUN_AGENT.sh
-
-# Or manually
-PYTHONPATH=exports uv run python -m file_monitor_agent run
-```
-
-**Capabilities:**
- Monitors ~/Downloads continuously
- Copies new files to ~/Documents
- Resolves conflicts with timestamps
- Handles errors gracefully
- Tracks processed files
- Runs as background service
-
-**Total Time**: ~45 minutes from concept to production
-
-## Key Learnings
-
-1. **Incremental building** - Files written immediately, visible throughout
-2. **Validation early** - Structure validated before moving to implementation
-3. **Test-driven** - Tests reveal real behavior
-4. **Documentation included** - README, STATUS, and guides auto-generated
-5. **Repeatable process** - Same workflow for any agent type
-
-## Variations
-
-**For simpler agents:**
- Fewer nodes (3-5 instead of 7)
- Simpler workflow (linear instead of looping)
- Faster build time (10-15 minutes)
-
-**For complex agents:**
- More nodes (10-15+)
- Multiple subgraphs
- Pause/resume points for human-in-the-loop
- Longer build time (45-60 minutes)
-
-The workflow scales to your needs!
@@ -1,7 +0,0 @@
-# Project-level Codex config for Hive.
-# Keep this file minimal: MCP connectivity + skill discovery.
-
-[mcp_servers.agent-builder]
-command = "uv"
-args = ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"]
-cwd = "."
@@ -1,20 +0,0 @@
-{
-  "mcpServers": {
-    "agent-builder": {
-      "command": "python",
-      "args": ["-m", "framework.mcp.agent_builder_server"],
-      "cwd": "core",
-      "env": {
-        "PYTHONPATH": "../tools/src"
-      }
-    },
-    "tools": {
-      "command": "python",
-      "args": ["mcp_server.py", "--stdio"],
-      "cwd": "tools",
-      "env": {
-        "PYTHONPATH": "src"
-      }
-    }
-  }
-}
@@ -1 +0,0 @@
-../../.claude/skills/hive
@@ -1 +0,0 @@
-../../.claude/skills/hive-concepts
@@ -1 +0,0 @@
-../../.claude/skills/hive-create
@@ -1 +0,0 @@
-../../.claude/skills/hive-credentials
@@ -1 +0,0 @@
-../../.claude/skills/hive-patterns
@@ -1 +0,0 @@
-../../.claude/skills/hive-test
@@ -1,30 +0,0 @@
-{
-  "mcpServers": {
-    "agent-builder": {
-      "command": "uv",
-      "args": [
-        "run",
-        "python",
-        "-m",
-        "framework.mcp.agent_builder_server"
-      ],
-      "cwd": "core",
-      "env": {
-        "PYTHONPATH": "../tools/src"
-      }
-    },
-    "tools": {
-      "command": "uv",
-      "args": [
-        "run",
-        "python",
-        "mcp_server.py",
-        "--stdio"
-      ],
-      "cwd": "tools",
-      "env": {
-        "PYTHONPATH": "src"
-      }
-    }
-  }
-}
@@ -1 +0,0 @@
-../../.claude/skills/hive
@@ -1 +0,0 @@
-../../.claude/skills/hive-concepts
@@ -1 +0,0 @@
-../../.claude/skills/hive-create
@@ -1 +0,0 @@
-../../.claude/skills/hive-credentials
@@ -1 +0,0 @@
-../../.claude/skills/hive-debugger
@@ -1 +0,0 @@
-../../.claude/skills/hive-patterns
@@ -1 +0,0 @@
-../../.claude/skills/hive-test
@@ -1 +0,0 @@
-../../.claude/skills/triage-issue
@@ -1,7 +0,0 @@
-{
-  "recommendations": [
-    "charliermarsh.ruff",
-    "editorconfig.editorconfig",
-    "ms-python.python"
-  ]
-}