Merge pull request #5071 from TimothyZhang7/feature/queen-bee

Feature/queen bee
fix: lint
2026-02-18 20:59:04 -08:00 · 2026-02-18 20:51:40 -08:00 · 2026-02-18 20:47:07 -08:00 · 2026-02-18 20:39:25 -08:00 · 2026-02-18 20:34:19 -08:00 · 2026-02-18 20:32:24 -08:00
448 changed files with 84642 additions and 14240 deletions
@@ -0,0 +1,9 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "uv",
+      "args": ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"],
+      "disabled": false
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1,5 @@
+---
+description: hive-concepts
+---
+
+use hive-concepts skill
@@ -0,0 +1,5 @@
+---
+description: hive-create
+---
+
+use hive-create skill
@@ -0,0 +1,5 @@
+---
+description: hive-credentials
+---
+
+use hive-credentials skill
@@ -0,0 +1,5 @@
+---
+description: hive-patterns
+---
+
+use hive-patterns skill
@@ -0,0 +1,5 @@
+---
+description: hive-test
+---
+
+use hive-test skill
@@ -0,0 +1,5 @@
+---
+description: hive
+---
+
+use hive skill
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1,34 @@
+{
+  "permissions": {
+    "allow": [
+      "mcp__agent-builder__create_session",
+      "mcp__agent-builder__set_goal",
+      "mcp__agent-builder__add_node",
+      "mcp__agent-builder__add_edge",
+      "mcp__agent-builder__configure_loop",
+      "mcp__agent-builder__add_mcp_server",
+      "mcp__agent-builder__validate_graph",
+      "mcp__agent-builder__export_graph",
+      "mcp__agent-builder__load_session_by_id",
+      "Bash(git status:*)",
+      "Bash(gh run view:*)",
+      "Bash(uv run:*)",
+      "Bash(env:*)",
+      "mcp__agent-builder__test_node",
+      "mcp__agent-builder__list_mcp_tools",
+      "Bash(python -m py_compile:*)",
+      "Bash(python -m pytest:*)",
+      "Bash(source:*)",
+      "mcp__agent-builder__update_node",
+      "mcp__agent-builder__check_missing_credentials",
+      "mcp__agent-builder__list_stored_credentials",
+      "Bash(find:*)",
+      "mcp__agent-builder__run_tests",
+      "Bash(PYTHONPATH=core:exports:tools/src uv run pytest:*)",
+      "mcp__agent-builder__list_agent_sessions",
+      "mcp__agent-builder__generate_constraint_tests",
+      "mcp__agent-builder__generate_success_tests"
+    ]
+  },
+  "enabledMcpjsonServers": ["agent-builder", "tools"]
+}
@@ -1,361 +0,0 @@
---
-name: building-agents-construction
-description: Step-by-step guide for building goal-driven agents. Creates package structure, defines goals, adds nodes, connects edges, and finalizes agent class. Use when actively building an agent.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "2.0"
-  type: procedural
-  part_of: building-agents
-  requires: building-agents-core
---
-
-# Agent Construction - EXECUTE THESE STEPS
-
-**THIS IS AN EXECUTABLE WORKFLOW. DO NOT DISPLAY THIS FILE. EXECUTE THE STEPS BELOW.**
-
-When this skill is loaded, IMMEDIATELY begin executing Step 1. Do not explain what you will do - just do it.
-
---
-
-## STEP 1: Initialize Build Environment
-
-**EXECUTE THESE TOOL CALLS NOW:**
-
-1. Register the hive-tools MCP server:
-
-```
-mcp__agent-builder__add_mcp_server(
-    name="hive-tools",
-    transport="stdio",
-    command="python",
-    args='["mcp_server.py", "--stdio"]',
-    cwd="tools",
-    description="Hive tools MCP server"
-)
-```
-
-2. Create a build session (replace AGENT_NAME with the user's requested agent name in snake_case):
-
-```
-mcp__agent-builder__create_session(name="AGENT_NAME")
-```
-
-3. Discover available tools:
-
-```
-mcp__agent-builder__list_mcp_tools()
-```
-
-4. Create the package directory:
-
-```
-mkdir -p exports/AGENT_NAME/nodes
-```
-
-**AFTER completing these calls**, tell the user:
-
-> ✅ Build environment initialized
->
-> - Session created
-> - Available tools: [list the tools from step 3]
->
-> Proceeding to define the agent goal...
-
-**THEN immediately proceed to STEP 2.**
-
---
-
-## STEP 2: Define and Approve Goal
-
-**PROPOSE a goal to the user.** Based on what they asked for, propose:
-
- Goal ID (kebab-case)
- Goal name
- Goal description
- 3-5 success criteria (each with: id, description, metric, target, weight)
- 2-4 constraints (each with: id, description, constraint_type, category)
-
-**FORMAT your proposal as a clear summary, then ask for approval:**
-
-> **Proposed Goal: [Name]**
->
-> [Description]
->
-> **Success Criteria:**
->
-> 1. [criterion 1]
-> 2. [criterion 2]
->    ...
->
-> **Constraints:**
->
-> 1. [constraint 1]
-> 2. [constraint 2]
->    ...
-
-**THEN call AskUserQuestion:**
-
-```
-AskUserQuestion(questions=[{
-    "question": "Do you approve this goal definition?",
-    "header": "Goal",
-    "options": [
-        {"label": "Approve", "description": "Goal looks good, proceed"},
-        {"label": "Modify", "description": "I want to change something"}
-    ],
-    "multiSelect": false
-}])
-```
-
-**WAIT for user response.**
-
- If **Approve**: Call `mcp__agent-builder__set_goal(...)` with the goal details, then proceed to STEP 3
- If **Modify**: Ask what they want to change, update proposal, ask again
-
---
-
-## STEP 3: Design Node Workflow
-
-**BEFORE designing nodes**, review the available tools from Step 1. Nodes can ONLY use tools that exist.
-
-**DESIGN the workflow** as a series of nodes. For each node, determine:
-
- node_id (kebab-case)
- name
- description
- node_type: `"llm_generate"` (no tools) or `"llm_tool_use"` (uses tools)
- input_keys (what data this node receives)
- output_keys (what data this node produces)
- tools (ONLY tools that exist - empty list for llm_generate)
- system_prompt
-
-**PRESENT the workflow to the user:**
-
-> **Proposed Workflow: [N] nodes**
->
-> 1. **[node-id]** - [description]
->
->    - Type: [llm_generate/llm_tool_use]
->    - Input: [keys]
->    - Output: [keys]
->    - Tools: [tools or "none"]
->
-> 2. **[node-id]** - [description]
->    ...
->
-> **Flow:** node1 → node2 → node3 → ...
-
-**THEN call AskUserQuestion:**
-
-```
-AskUserQuestion(questions=[{
-    "question": "Do you approve this workflow design?",
-    "header": "Workflow",
-    "options": [
-        {"label": "Approve", "description": "Workflow looks good, proceed to build nodes"},
-        {"label": "Modify", "description": "I want to change the workflow"}
-    ],
-    "multiSelect": false
-}])
-```
-
-**WAIT for user response.**
-
- If **Approve**: Proceed to STEP 4
- If **Modify**: Ask what they want to change, update design, ask again
-
---
-
-## STEP 4: Build Nodes One by One
-
-**FOR EACH node in the approved workflow:**
-
-1. **Call** `mcp__agent-builder__add_node(...)` with the node details
-
-   - input_keys and output_keys must be JSON strings: `'["key1", "key2"]'`
-   - tools must be a JSON string: `'["tool1"]'` or `'[]'`
-
-2. **Call** `mcp__agent-builder__test_node(...)` to validate:
-
-```
-mcp__agent-builder__test_node(
-    node_id="the-node-id",
-    test_input='{"key": "test value"}',
-    mock_llm_response='{"output_key": "test output"}'
-)
-```
-
-3. **Check result:**
-
-   - If valid: Tell user "✅ Node [id] validated" and continue to next node
-   - If invalid: Show errors, fix the node, re-validate
-
-4. **Show progress** after each node:
-
-```
-mcp__agent-builder__get_session_status()
-```
-
-> ✅ Node [X] of [Y] complete: [node-id]
-
-**AFTER all nodes are added and validated**, proceed to STEP 5.
-
---
-
-## STEP 5: Connect Edges
-
-**DETERMINE the edges** based on the workflow flow. For each connection:
-
- edge_id (kebab-case)
- source (node that outputs)
- target (node that receives)
- condition: `"on_success"`, `"always"`, `"on_failure"`, or `"conditional"`
- condition_expr (Python expression, only if conditional)
- priority (integer, lower = higher priority)
-
-**FOR EACH edge, call:**
-
-```
-mcp__agent-builder__add_edge(
-    edge_id="source-to-target",
-    source="source-node-id",
-    target="target-node-id",
-    condition="on_success",
-    condition_expr="",
-    priority=1
-)
-```
-
-**AFTER all edges are added, validate the graph:**
-
-```
-mcp__agent-builder__validate_graph()
-```
-
- If valid: Tell user "✅ Graph structure validated" and proceed to STEP 6
- If invalid: Show errors, fix edges, re-validate
-
---
-
-## STEP 6: Generate Agent Package
-
-**EXPORT the graph data:**
-
-```
-mcp__agent-builder__export_graph()
-```
-
-This returns JSON with all the goal, nodes, edges, and MCP server configurations.
-
-**THEN write the Python package files** using the exported data. Create these files in `exports/AGENT_NAME/`:
-
-1. `config.py` - Runtime configuration with model settings
-2. `nodes/__init__.py` - All NodeSpec definitions
-3. `agent.py` - Goal, edges, graph config, and agent class
-4. `__init__.py` - Package exports
-5. `__main__.py` - CLI interface
-6. `mcp_servers.json` - MCP server configurations
-7. `README.md` - Usage documentation
-
-**IMPORTANT entry_points format:**
-
- MUST be: `{"start": "first-node-id"}`
- NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
- NOT: `{"first-node-id"}` (WRONG - this is a set)
-
-**Use the example agent** at `.claude/skills/building-agents-construction/examples/online_research_agent/` as a template for file structure and patterns.
-
-**AFTER writing all files, tell the user:**
-
-> ✅ Agent package created: `exports/AGENT_NAME/`
->
-> **Files generated:**
->
-> - `__init__.py` - Package exports
-> - `agent.py` - Goal, nodes, edges, agent class
-> - `config.py` - Runtime configuration
-> - `__main__.py` - CLI interface
-> - `nodes/__init__.py` - Node definitions
-> - `mcp_servers.json` - MCP server config
-> - `README.md` - Usage documentation
->
-> **Test your agent:**
->
-> ```bash
-> cd /home/timothy/oss/hive
-> PYTHONPATH=core:exports python -m AGENT_NAME validate
-> PYTHONPATH=core:exports python -m AGENT_NAME info
-> ```
-
---
-
-## STEP 7: Verify and Test
-
-**RUN validation:**
-
-```bash
-cd /home/timothy/oss/hive && PYTHONPATH=core:exports python -m AGENT_NAME validate
-```
-
- If valid: Agent is complete!
- If errors: Fix the issues and re-run
-
-**SHOW final session summary:**
-
-```
-mcp__agent-builder__get_session_status()
-```
-
-**TELL the user the agent is ready** and suggest next steps:
-
- Run with mock mode to test without API calls
- Use `/testing-agent` skill for comprehensive testing
- Use `/setup-credentials` if the agent needs API keys
-
---
-
-## REFERENCE: Node Types
-
-| Type           | tools param            | Use when                                       |
-| -------------- | ---------------------- | ---------------------------------------------- |
-| `llm_generate` | `'[]'`                 | Pure reasoning, JSON output, no external calls |
-| `llm_tool_use` | `'["tool1", "tool2"]'` | Needs to call MCP tools                        |
-
---
-
-## REFERENCE: Edge Conditions
-
-| Condition     | When edge is followed                 |
-| ------------- | ------------------------------------- |
-| `on_success`  | Source node completed successfully    |
-| `on_failure`  | Source node failed                    |
-| `always`      | Always, regardless of success/failure |
-| `conditional` | When condition_expr evaluates to True |
-
---
-
-## REFERENCE: System Prompt Best Practice
-
-For nodes with JSON output, include this in the system_prompt:
-
-```
-CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
-Just the JSON object starting with { and ending with }.
-
-Return this exact structure:
-{
-  "key1": "...",
-  "key2": "..."
-}
-```
-
---
-
-## COMMON MISTAKES TO AVOID
-
-1. **Using tools that don't exist** - Always check `mcp__agent-builder__list_mcp_tools()` first
-2. **Wrong entry_points format** - Must be `{"start": "node-id"}`, NOT a set or list
-3. **Skipping validation** - Always validate nodes and graph before proceeding
-4. **Not waiting for approval** - Always ask user before major steps
-5. **Displaying this file** - Execute the steps, don't show documentation
@@ -1,80 +0,0 @@
-# Online Research Agent
-
-Deep-dive research agent that searches 10+ sources and produces comprehensive narrative reports with citations.
-
-## Features
-
- Generates multiple search queries from a topic
- Searches and fetches 15+ web sources
- Evaluates and ranks sources by relevance
- Synthesizes findings into themes
- Writes narrative report with numbered citations
- Quality checks for uncited claims
- Saves report to local markdown file
-
-## Usage
-
-### CLI
-
-```bash
-# Show agent info
-python -m online_research_agent info
-
-# Validate structure
-python -m online_research_agent validate
-
-# Run research on a topic
-python -m online_research_agent run --topic "impact of AI on healthcare"
-
-# Interactive shell
-python -m online_research_agent shell
-```
-
-### Python API
-
-```python
-from online_research_agent import default_agent
-
-# Simple usage
-result = await default_agent.run({"topic": "climate change solutions"})
-
-# Check output
-if result.success:
-    print(f"Report saved to: {result.output['file_path']}")
-    print(result.output['final_report'])
-```
-
-## Workflow
-
-```
-parse-query → search-sources → fetch-content → evaluate-sources
-                                                      ↓
-                                write-report ← synthesize-findings
-                                      ↓
-                               quality-check → save-report
-```
-
-## Output
-
-Reports are saved to `./research_reports/` as markdown files with:
-
-1. Executive Summary
-2. Introduction
-3. Key Findings (by theme)
-4. Analysis
-5. Conclusion
-6. References
-
-## Requirements
-
- Python 3.11+
- LLM provider API key (Groq, Cerebras, etc.)
- Internet access for web search/fetch
-
-## Configuration
-
-Edit `config.py` to change:
-
- `model`: LLM model (default: groq/moonshotai/kimi-k2-instruct-0905)
- `temperature`: Generation temperature (default: 0.7)
- `max_tokens`: Max tokens per response (default: 16384)
@@ -1,23 +0,0 @@
-"""
-Online Research Agent - Deep-dive research with narrative reports.
-
-Research any topic by searching multiple sources, synthesizing information,
-and producing a well-structured narrative report with citations.
-"""
-
-from .agent import OnlineResearchAgent, default_agent, goal, nodes, edges
-from .config import RuntimeConfig, AgentMetadata, default_config, metadata
-
-__version__ = "1.0.0"
-
-__all__ = [
-    "OnlineResearchAgent",
-    "default_agent",
-    "goal",
-    "nodes",
-    "edges",
-    "RuntimeConfig",
-    "AgentMetadata",
-    "default_config",
-    "metadata",
-]
@@ -1,419 +0,0 @@
-"""Agent graph construction for Online Research Agent."""
-
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-
-from .config import default_config, metadata
-from .nodes import (
-    parse_query_node,
-    search_sources_node,
-    fetch_content_node,
-    evaluate_sources_node,
-    synthesize_findings_node,
-    write_report_node,
-    quality_check_node,
-    save_report_node,
-)
-
-# Goal definition
-goal = Goal(
-    id="comprehensive-online-research",
-    name="Comprehensive Online Research",
-    description="Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations.",
-    success_criteria=[
-        SuccessCriterion(
-            id="source-coverage",
-            description="Query 10+ diverse sources",
-            metric="source_count",
-            target=">=10",
-            weight=0.20,
-        ),
-        SuccessCriterion(
-            id="relevance",
-            description="All sources directly address the query",
-            metric="relevance_score",
-            target="90%",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="synthesis",
-            description="Synthesize findings into coherent narrative",
-            metric="coherence_score",
-            target="85%",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="citations",
-            description="Include citations for all claims",
-            metric="citation_coverage",
-            target="100%",
-            weight=0.15,
-        ),
-        SuccessCriterion(
-            id="actionable",
-            description="Report answers the user's question",
-            metric="answer_completeness",
-            target="90%",
-            weight=0.15,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="no-hallucination",
-            description="Only include information found in sources",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="source-attribution",
-            description="Every factual claim must cite its source",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="recency-preference",
-            description="Prefer recent sources when relevant",
-            constraint_type="quality",
-            category="relevance",
-        ),
-        Constraint(
-            id="no-paywalled",
-            description="Avoid sources that require payment to access",
-            constraint_type="functional",
-            category="accessibility",
-        ),
-    ],
-)
-
-# Node list
-nodes = [
-    parse_query_node,
-    search_sources_node,
-    fetch_content_node,
-    evaluate_sources_node,
-    synthesize_findings_node,
-    write_report_node,
-    quality_check_node,
-    save_report_node,
-]
-
-# Edge definitions
-edges = [
-    EdgeSpec(
-        id="parse-to-search",
-        source="parse-query",
-        target="search-sources",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="search-to-fetch",
-        source="search-sources",
-        target="fetch-content",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="fetch-to-evaluate",
-        source="fetch-content",
-        target="evaluate-sources",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="evaluate-to-synthesize",
-        source="evaluate-sources",
-        target="synthesize-findings",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="synthesize-to-write",
-        source="synthesize-findings",
-        target="write-report",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="write-to-quality",
-        source="write-report",
-        target="quality-check",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="quality-to-save",
-        source="quality-check",
-        target="save-report",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-]
-
-# Graph configuration
-entry_node = "parse-query"
-entry_points = {"start": "parse-query"}
-pause_nodes = []
-terminal_nodes = ["save-report"]
-
-
-class OnlineResearchAgent:
-    """
-    Online Research Agent - Deep-dive research with narrative reports.
-
-    Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
-    """
-
-    def __init__(self, config=None):
-        self.config = config or default_config
-        self.goal = goal
-        self.nodes = nodes
-        self.edges = edges
-        self.entry_node = entry_node
-        self.entry_points = entry_points
-        self.pause_nodes = pause_nodes
-        self.terminal_nodes = terminal_nodes
-        self._runtime: AgentRuntime | None = None
-        self._graph: GraphSpec | None = None
-
-    def _build_entry_point_specs(self) -> list[EntryPointSpec]:
-        """Convert entry_points dict to EntryPointSpec list."""
-        specs = []
-        for ep_id, node_id in self.entry_points.items():
-            if ep_id == "start":
-                trigger_type = "manual"
-                name = "Start"
-            elif "_resume" in ep_id:
-                trigger_type = "resume"
-                name = f"Resume from {ep_id.replace('_resume', '')}"
-            else:
-                trigger_type = "manual"
-                name = ep_id.replace("-", " ").title()
-
-            specs.append(
-                EntryPointSpec(
-                    id=ep_id,
-                    name=name,
-                    entry_node=node_id,
-                    trigger_type=trigger_type,
-                    isolation_level="shared",
-                )
-            )
-        return specs
-
-    def _create_runtime(self, mock_mode=False) -> AgentRuntime:
-        """Create AgentRuntime instance."""
-        import json
-        from pathlib import Path
-
-        # Persistent storage in ~/.hive for telemetry and run history
-        storage_path = Path.home() / ".hive" / "online_research_agent"
-        storage_path.mkdir(parents=True, exist_ok=True)
-
-        tool_registry = ToolRegistry()
-
-        # Load MCP servers (always load, needed for tool validation)
-        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
-        if mcp_config_path.exists():
-            tool_registry.load_mcp_config(mcp_config_path)
-
-        llm = None
-        if not mock_mode:
-            # LiteLLMProvider uses environment variables for API keys
-            llm = LiteLLMProvider(
-                model=self.config.model,
-                api_key=self.config.api_key,
-                api_base=self.config.api_base,
-            )
-
-        self._graph = GraphSpec(
-            id="online-research-agent-graph",
-            goal_id=self.goal.id,
-            version="1.0.0",
-            entry_node=self.entry_node,
-            entry_points=self.entry_points,
-            terminal_nodes=self.terminal_nodes,
-            pause_nodes=self.pause_nodes,
-            nodes=self.nodes,
-            edges=self.edges,
-            default_model=self.config.model,
-            max_tokens=self.config.max_tokens,
-        )
-
-        # Create AgentRuntime with all entry points
-        self._runtime = create_agent_runtime(
-            graph=self._graph,
-            goal=self.goal,
-            storage_path=storage_path,
-            entry_points=self._build_entry_point_specs(),
-            llm=llm,
-            tools=list(tool_registry.get_tools().values()),
-            tool_executor=tool_registry.get_executor(),
-        )
-
-        return self._runtime
-
-    async def start(self, mock_mode=False) -> None:
-        """Start the agent runtime."""
-        if self._runtime is None:
-            self._create_runtime(mock_mode=mock_mode)
-        await self._runtime.start()
-
-    async def stop(self) -> None:
-        """Stop the agent runtime."""
-        if self._runtime is not None:
-            await self._runtime.stop()
-
-    async def trigger(
-        self,
-        entry_point: str,
-        input_data: dict,
-        correlation_id: str | None = None,
-        session_state: dict | None = None,
-    ) -> str:
-        """
-        Trigger execution at a specific entry point (non-blocking).
-
-        Args:
-            entry_point: Entry point ID (e.g., "start", "pause-node_resume")
-            input_data: Input data for the execution
-            correlation_id: Optional ID to correlate related executions
-            session_state: Optional session state to resume from (with paused_at, memory)
-
-        Returns:
-            Execution ID for tracking
-        """
-        if self._runtime is None or not self._runtime.is_running:
-            raise RuntimeError("Agent runtime not started. Call start() first.")
-        return await self._runtime.trigger(
-            entry_point, input_data, correlation_id, session_state=session_state
-        )
-
-    async def trigger_and_wait(
-        self,
-        entry_point: str,
-        input_data: dict,
-        timeout: float | None = None,
-        session_state: dict | None = None,
-    ) -> ExecutionResult | None:
-        """
-        Trigger execution and wait for completion.
-
-        Args:
-            entry_point: Entry point ID
-            input_data: Input data for the execution
-            timeout: Maximum time to wait (seconds)
-            session_state: Optional session state to resume from (with paused_at, memory)
-
-        Returns:
-            ExecutionResult or None if timeout
-        """
-        if self._runtime is None or not self._runtime.is_running:
-            raise RuntimeError("Agent runtime not started. Call start() first.")
-        return await self._runtime.trigger_and_wait(
-            entry_point, input_data, timeout, session_state=session_state
-        )
-
-    async def run(
-        self, context: dict, mock_mode=False, session_state=None
-    ) -> ExecutionResult:
-        """
-        Run the agent (convenience method for simple single execution).
-
-        For more control, use start() + trigger_and_wait() + stop().
-        """
-        await self.start(mock_mode=mock_mode)
-        try:
-            # Determine entry point based on session_state
-            if session_state and "paused_at" in session_state:
-                paused_node = session_state["paused_at"]
-                resume_key = f"{paused_node}_resume"
-                if resume_key in self.entry_points:
-                    entry_point = resume_key
-                else:
-                    entry_point = "start"
-            else:
-                entry_point = "start"
-
-            result = await self.trigger_and_wait(
-                entry_point, context, session_state=session_state
-            )
-            return result or ExecutionResult(success=False, error="Execution timeout")
-        finally:
-            await self.stop()
-
-    async def get_goal_progress(self) -> dict:
-        """Get goal progress across all executions."""
-        if self._runtime is None:
-            raise RuntimeError("Agent runtime not started")
-        return await self._runtime.get_goal_progress()
-
-    def get_stats(self) -> dict:
-        """Get runtime statistics."""
-        if self._runtime is None:
-            return {"running": False}
-        return self._runtime.get_stats()
-
-    def info(self):
-        """Get agent information."""
-        return {
-            "name": metadata.name,
-            "version": metadata.version,
-            "description": metadata.description,
-            "goal": {
-                "name": self.goal.name,
-                "description": self.goal.description,
-            },
-            "nodes": [n.id for n in self.nodes],
-            "edges": [e.id for e in self.edges],
-            "entry_node": self.entry_node,
-            "entry_points": self.entry_points,
-            "pause_nodes": self.pause_nodes,
-            "terminal_nodes": self.terminal_nodes,
-            "multi_entrypoint": True,
-        }
-
-    def validate(self):
-        """Validate agent structure."""
-        errors = []
-        warnings = []
-
-        node_ids = {node.id for node in self.nodes}
-        for edge in self.edges:
-            if edge.source not in node_ids:
-                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
-            if edge.target not in node_ids:
-                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
-
-        if self.entry_node not in node_ids:
-            errors.append(f"Entry node '{self.entry_node}' not found")
-
-        for terminal in self.terminal_nodes:
-            if terminal not in node_ids:
-                errors.append(f"Terminal node '{terminal}' not found")
-
-        for pause in self.pause_nodes:
-            if pause not in node_ids:
-                errors.append(f"Pause node '{pause}' not found")
-
-        # Validate entry points
-        for ep_id, node_id in self.entry_points.items():
-            if node_id not in node_ids:
-                errors.append(
-                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
-                )
-
-        return {
-            "valid": len(errors) == 0,
-            "errors": errors,
-            "warnings": warnings,
-        }
-
-
-# Create default instance
-default_agent = OnlineResearchAgent()
@@ -1,396 +0,0 @@
-"""Node definitions for Online Research Agent."""
-
-from framework.graph import NodeSpec
-
-# Node 1: Parse Query
-parse_query_node = NodeSpec(
-    id="parse-query",
-    name="Parse Query",
-    description="Analyze the research topic and generate 3-5 diverse search queries to cover different aspects",
-    node_type="llm_generate",
-    input_keys=["topic"],
-    output_keys=["search_queries", "research_focus", "key_aspects"],
-    output_schema={
-        "research_focus": {
-            "type": "string",
-            "required": True,
-            "description": "Brief statement of what we're researching",
-        },
-        "key_aspects": {
-            "type": "array",
-            "required": True,
-            "description": "List of 3-5 key aspects to investigate",
-        },
-        "search_queries": {
-            "type": "array",
-            "required": True,
-            "description": "List of 3-5 search queries",
-        },
-    },
-    system_prompt="""\
-You are a research query strategist. Given a research topic, analyze it and generate search queries.
-
-Your task:
-1. Understand the core research question
-2. Identify 3-5 key aspects to investigate
-3. Generate 3-5 diverse search queries that will find comprehensive information
-
-CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
-
-Return this JSON structure:
-{
-  "research_focus": "Brief statement of what we're researching",
-  "key_aspects": ["aspect1", "aspect2", "aspect3"],
-  "search_queries": [
-    "query 1 - broad overview",
-    "query 2 - specific angle",
-    "query 3 - recent developments",
-    "query 4 - expert opinions",
-    "query 5 - data/statistics"
-  ]
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 2: Search Sources
-search_sources_node = NodeSpec(
-    id="search-sources",
-    name="Search Sources",
-    description="Execute web searches using the generated queries to find 15+ source URLs",
-    node_type="llm_tool_use",
-    input_keys=["search_queries", "research_focus"],
-    output_keys=["source_urls", "search_results_summary"],
-    output_schema={
-        "source_urls": {
-            "type": "array",
-            "required": True,
-            "description": "List of source URLs found",
-        },
-        "search_results_summary": {
-            "type": "string",
-            "required": True,
-            "description": "Brief summary of what was found",
-        },
-    },
-    system_prompt="""\
-You are a research assistant executing web searches. Use the web_search tool to find sources.
-
-Your task:
-1. Execute each search query using web_search tool
-2. Collect URLs from search results
-3. Aim for 15+ diverse sources
-
-After searching, return JSON with found sources:
-{
-  "source_urls": ["url1", "url2", ...],
-  "search_results_summary": "Brief summary of what was found"
-}
-""",
-    tools=["web_search"],
-    max_retries=3,
-)
-
-# Node 3: Fetch Content
-fetch_content_node = NodeSpec(
-    id="fetch-content",
-    name="Fetch Content",
-    description="Fetch and extract content from the discovered source URLs",
-    node_type="llm_tool_use",
-    input_keys=["source_urls", "research_focus"],
-    output_keys=["fetched_sources", "fetch_errors"],
-    output_schema={
-        "fetched_sources": {
-            "type": "array",
-            "required": True,
-            "description": "List of fetched source objects with url, title, content",
-        },
-        "fetch_errors": {
-            "type": "array",
-            "required": True,
-            "description": "List of URLs that failed to fetch",
-        },
-    },
-    system_prompt="""\
-You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
-
-Your task:
-1. Fetch content from each source URL using web_scrape tool
-2. Extract the main content relevant to the research focus
-3. Track any URLs that failed to fetch
-
-After fetching, return JSON:
-{
-  "fetched_sources": [
-    {"url": "...", "title": "...", "content": "extracted text..."},
-    ...
-  ],
-  "fetch_errors": ["url that failed", ...]
-}
-""",
-    tools=["web_scrape"],
-    max_retries=3,
-)
-
-# Node 4: Evaluate Sources
-evaluate_sources_node = NodeSpec(
-    id="evaluate-sources",
-    name="Evaluate Sources",
-    description="Score sources for relevance and quality, filter to top 10",
-    node_type="llm_generate",
-    input_keys=["fetched_sources", "research_focus", "key_aspects"],
-    output_keys=["ranked_sources", "source_analysis"],
-    output_schema={
-        "ranked_sources": {
-            "type": "array",
-            "required": True,
-            "description": "List of ranked sources with scores",
-        },
-        "source_analysis": {
-            "type": "string",
-            "required": True,
-            "description": "Overview of source quality and coverage",
-        },
-    },
-    system_prompt="""\
-You are a source evaluator. Assess each source for quality and relevance.
-
-Scoring criteria:
- Relevance to research focus (1-10)
- Source credibility (1-10)
- Information depth (1-10)
- Recency if relevant (1-10)
-
-Your task:
-1. Score each source
-2. Rank by combined score
-3. Select top 10 sources
-4. Note what each source uniquely contributes
-
-Return JSON:
-{
-  "ranked_sources": [
-    {"url": "...", "title": "...", "content": "...", "score": 8.5, "unique_value": "..."},
-    ...
-  ],
-  "source_analysis": "Overview of source quality and coverage"
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 5: Synthesize Findings
-synthesize_findings_node = NodeSpec(
-    id="synthesize-findings",
-    name="Synthesize Findings",
-    description="Extract key facts from sources and identify common themes",
-    node_type="llm_generate",
-    input_keys=["ranked_sources", "research_focus", "key_aspects"],
-    output_keys=["key_findings", "themes", "source_citations"],
-    output_schema={
-        "key_findings": {
-            "type": "array",
-            "required": True,
-            "description": "List of key findings with sources and confidence",
-        },
-        "themes": {
-            "type": "array",
-            "required": True,
-            "description": "List of themes with descriptions and supporting sources",
-        },
-        "source_citations": {
-            "type": "object",
-            "required": True,
-            "description": "Map of facts to supporting URLs",
-        },
-    },
-    system_prompt="""\
-You are a research synthesizer. Analyze multiple sources to extract insights.
-
-Your task:
-1. Identify key facts from each source
-2. Find common themes across sources
-3. Note contradictions or debates
-4. Build a citation map (fact -> source URL)
-
-Return JSON:
-{
-  "key_findings": [
-    {"finding": "...", "sources": ["url1", "url2"], "confidence": "high/medium/low"},
-    ...
-  ],
-  "themes": [
-    {"theme": "...", "description": "...", "supporting_sources": ["url1", ...]},
-    ...
-  ],
-  "source_citations": {
-    "fact or claim": ["supporting url1", "url2"],
-    ...
-  }
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 6: Write Report
-write_report_node = NodeSpec(
-    id="write-report",
-    name="Write Report",
-    description="Generate a narrative report with proper citations",
-    node_type="llm_generate",
-    input_keys=[
-        "key_findings",
-        "themes",
-        "source_citations",
-        "research_focus",
-        "ranked_sources",
-    ],
-    output_keys=["report_content", "references"],
-    output_schema={
-        "report_content": {
-            "type": "string",
-            "required": True,
-            "description": "Full markdown report text with citations",
-        },
-        "references": {
-            "type": "array",
-            "required": True,
-            "description": "List of reference objects with number, url, title",
-        },
-    },
-    system_prompt="""\
-You are a research report writer. Create a well-structured narrative report.
-
-Report structure:
-1. Executive Summary (2-3 paragraphs)
-2. Introduction (context and scope)
-3. Key Findings (organized by theme)
-4. Analysis (synthesis and implications)
-5. Conclusion
-6. References (numbered list of all sources)
-
-Citation format: Use numbered citations like [1], [2] that correspond to the References section.
-
-IMPORTANT:
- Every factual claim MUST have a citation
- Write in clear, professional prose
- Be objective and balanced
- Highlight areas of consensus and debate
-
-Return JSON:
-{
-  "report_content": "Full markdown report text with citations...",
-  "references": [
-    {"number": 1, "url": "...", "title": "..."},
-    ...
-  ]
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 7: Quality Check
-quality_check_node = NodeSpec(
-    id="quality-check",
-    name="Quality Check",
-    description="Verify all claims have citations and report is coherent",
-    node_type="llm_generate",
-    input_keys=["report_content", "references", "source_citations"],
-    output_keys=["quality_score", "issues", "final_report"],
-    output_schema={
-        "quality_score": {
-            "type": "number",
-            "required": True,
-            "description": "Quality score 0-1",
-        },
-        "issues": {
-            "type": "array",
-            "required": True,
-            "description": "List of issues found and fixed",
-        },
-        "final_report": {
-            "type": "string",
-            "required": True,
-            "description": "Corrected full report",
-        },
-    },
-    system_prompt="""\
-You are a quality assurance reviewer. Check the research report for issues.
-
-Check for:
-1. Uncited claims (factual statements without [n] citation)
-2. Broken citations (references to non-existent numbers)
-3. Coherence (logical flow between sections)
-4. Completeness (all key aspects covered)
-5. Accuracy (claims match source content)
-
-If issues found, fix them in the final report.
-
-Return JSON:
-{
-  "quality_score": 0.95,
-  "issues": [
-    {"type": "uncited_claim", "location": "paragraph 3", "fixed": true},
-    ...
-  ],
-  "final_report": "Corrected full report with all issues fixed..."
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 8: Save Report
-save_report_node = NodeSpec(
-    id="save-report",
-    name="Save Report",
-    description="Write the final report to a local markdown file",
-    node_type="llm_tool_use",
-    input_keys=["final_report", "references", "research_focus"],
-    output_keys=["file_path", "save_status"],
-    output_schema={
-        "file_path": {
-            "type": "string",
-            "required": True,
-            "description": "Path where report was saved",
-        },
-        "save_status": {
-            "type": "string",
-            "required": True,
-            "description": "Status of save operation",
-        },
-    },
-    system_prompt="""\
-You are a file manager. Save the research report to disk.
-
-Your task:
-1. Generate a filename from the research focus (slugified, with date)
-2. Use the write_to_file tool to save the report as markdown
-3. Save to the ./research_reports/ directory
-
-Filename format: research_YYYY-MM-DD_topic-slug.md
-
-Return JSON:
-{
-  "file_path": "research_reports/research_2026-01-23_topic-name.md",
-  "save_status": "success"
-}
-""",
-    tools=["write_to_file"],
-    max_retries=3,
-)
-
-__all__ = [
-    "parse_query_node",
-    "search_sources_node",
-    "fetch_content_node",
-    "evaluate_sources_node",
-    "synthesize_findings_node",
-    "write_report_node",
-    "quality_check_node",
-    "save_report_node",
-]
@@ -1,303 +0,0 @@
---
-name: building-agents-core
-description: Core concepts for goal-driven agents - architecture, node types, tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "1.0"
-  type: foundational
-  part_of: building-agents
---
-
-# Building Agents - Core Concepts
-
-Foundational knowledge for building goal-driven agents as Python packages.
-
-## Architecture: Python Services (Not JSON Configs)
-
-Agents are built as Python packages:
-
-```
-exports/my_agent/
-├── __init__.py          # Package exports
-├── __main__.py          # CLI (run, info, validate, shell)
-├── agent.py             # Graph construction (goal, edges, agent class)
-├── nodes/__init__.py    # Node definitions (NodeSpec)
-├── config.py            # Runtime config
-└── README.md            # Documentation
-```
-
-**Key Principle: Agent is visible and editable during build**
-
- ✅ Files created immediately as components are approved
- ✅ User can watch files grow in their editor
- ✅ No session state - just direct file writes
- ✅ No "export" step - agent is ready when build completes
-
-## Core Concepts
-
-### Goal
-
-Success criteria and constraints (written to agent.py)
-
-```python
-goal = Goal(
-    id="research-goal",
-    name="Technical Research Agent",
-    description="Research technical topics thoroughly",
-    success_criteria=[
-        SuccessCriterion(
-            id="completeness",
-            description="Cover all aspects of topic",
-            metric="coverage_score",
-            target=">=0.9",
-            weight=0.4,
-        ),
-        # 3-5 success criteria total
-    ],
-    constraints=[
-        Constraint(
-            id="accuracy",
-            description="All information must be verified",
-            constraint_type="hard",
-            category="quality",
-        ),
-        # 1-5 constraints total
-    ],
-)
-```
-
-### Node
-
-Unit of work (written to nodes/__init__.py)
-
-**Node Types:**
-
- `llm_generate` - Text generation, parsing
- `llm_tool_use` - Actions requiring tools
- `router` - Conditional branching
- `function` - Deterministic operations
-
-```python
-search_node = NodeSpec(
-    id="search-web",
-    name="Search Web",
-    description="Search for information online",
-    node_type="llm_tool_use",
-    input_keys=["query"],
-    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}",
-    tools=["web_search"],
-    max_retries=3,
-)
-```
-
-### Edge
-
-Connection between nodes (written to agent.py)
-
-**Edge Conditions:**
-
- `on_success` - Proceed if node succeeds
- `on_failure` - Handle errors
- `always` - Always proceed
- `conditional` - Based on expression
-
-```python
-EdgeSpec(
-    id="search-to-analyze",
-    source="search-web",
-    target="analyze-results",
-    condition=EdgeCondition.ON_SUCCESS,
-    priority=1,
-)
-```
-
-### Pause/Resume
-
-Multi-turn conversations
-
- **Pause nodes** - Stop execution, wait for user input
- **Resume entry points** - Continue from pause with user's response
-
-```python
-# Example pause/resume configuration
-pause_nodes = ["request-clarification"]
-entry_points = {
-    "start": "analyze-request",
-    "request-clarification_resume": "process-clarification"
-}
-```
-
-## Tool Discovery & Validation
-
-**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
-
-Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
-
-### Step 1: Register MCP Server (if not already done)
-
-```python
-mcp__agent-builder__add_mcp_server(
-    name="tools",
-    transport="stdio",
-    command="python",
-    args='["mcp_server.py", "--stdio"]',
-    cwd="../tools"
-)
-```
-
-### Step 2: Discover Available Tools
-
-```python
-# List all tools from all registered servers
-mcp__agent-builder__list_mcp_tools()
-
-# Or list tools from a specific server
-mcp__agent-builder__list_mcp_tools(server_name="tools")
-```
-
-This returns available tools with their descriptions and parameters:
-
-```json
-{
-  "success": true,
-  "tools_by_server": {
-    "tools": [
-      {
-        "name": "web_search",
-        "description": "Search the web...",
-        "parameters": ["query"]
-      },
-      {
-        "name": "web_scrape",
-        "description": "Scrape a URL...",
-        "parameters": ["url"]
-      }
-    ]
-  },
-  "total_tools": 14
-}
-```
-
-### Step 3: Validate Before Adding Nodes
-
-Before writing a node with `tools=[...]`:
-
-1. Call `list_mcp_tools()` to get available tools
-2. Check each tool in your node exists in the response
-3. If a tool doesn't exist:
-   - **DO NOT proceed** with the node
-   - Inform the user: "The tool 'X' is not available. Available tools are: ..."
-   - Ask if they want to use an alternative or proceed without the tool
-
-### Tool Validation Anti-Patterns
-
-❌ **Never assume a tool exists** - always call `list_mcp_tools()` first
-❌ **Never write a node with unverified tools** - validate before writing
-❌ **Never silently drop tools** - if a tool doesn't exist, inform the user
-❌ **Never guess tool names** - use exact names from discovery response
-
-### Example Validation Flow
-
-```python
-# 1. User requests: "Add a node that searches the web"
-# 2. Discover available tools
-tools_response = mcp__agent-builder__list_mcp_tools()
-
-# 3. Check if web_search exists
-available = [t["name"] for tools in tools_response["tools_by_server"].values() for t in tools]
-if "web_search" not in available:
-    # Inform user and ask how to proceed
-    print("❌ 'web_search' not available. Available tools:", available)
-else:
-    # Proceed with node creation
-    # ...
-```
-
-## Workflow Overview: Incremental File Construction
-
-```
-1. CREATE PACKAGE → mkdir + write skeletons
-2. DEFINE GOAL → Write to agent.py + config.py
-3. FOR EACH NODE:
-   - Propose design
-   - User approves
-   - Write to nodes/__init__.py IMMEDIATELY ← FILE WRITTEN
-   - (Optional) Validate with test_node ← MCP VALIDATION
-   - User can open file and see it
-4. CONNECT EDGES → Update agent.py ← FILE WRITTEN
-   - (Optional) Validate with validate_graph ← MCP VALIDATION
-5. FINALIZE → Write agent class to agent.py ← FILE WRITTEN
-6. DONE - Agent ready at exports/my_agent/
-```
-
-**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
-
-### The Key Difference
-
-**OLD (Bad):**
-
-```
-MCP add_node → Session State → MCP add_node → Session State → ...
-                                                                ↓
-                                                     MCP export_graph
-                                                                ↓
-                                                       Files appear
-```
-
-**NEW (Good):**
-
-```
-Write node to file → (Optional: MCP test_node) → Write node to file → ...
-       ↓                                               ↓
-  File visible                                    File visible
-  immediately                                     immediately
-```
-
-**Bottom line:** Use Write/Edit for construction, MCP for validation if needed.
-
-## When to Use This Skill
-
-Use building-agents-core when:
- Starting a new agent project and need to understand fundamentals
- Need to understand agent architecture before building
- Want to validate tool availability before proceeding
- Learning about node types, edges, and graph execution
-
-**Next Steps:**
- Ready to build? → Use `building-agents-construction` skill
- Need patterns and examples? → Use `building-agents-patterns` skill
-
-## MCP Tools for Validation
-
-After writing files, optionally use MCP tools for validation:
-
-**test_node** - Validate node configuration with mock inputs
-```python
-mcp__agent-builder__test_node(
-    node_id="search-web",
-    test_input='{"query": "test query"}',
-    mock_llm_response='{"results": "mock output"}'
-)
-```
-
-**validate_graph** - Check graph structure
-```python
-mcp__agent-builder__validate_graph()
-# Returns: unreachable nodes, missing connections, etc.
-```
-
-**create_session** - Track session state for bookkeeping
-```python
-mcp__agent-builder__create_session(session_name="my-build")
-```
-
-**Key Point:** Files are written FIRST. MCP tools are for validation only.
-
-## Related Skills
-
- **building-agents-construction** - Step-by-step building process
- **building-agents-patterns** - Best practices and examples
- **agent-workflow** - Complete workflow orchestrator
- **testing-agent** - Test and validate completed agents
@@ -1,497 +0,0 @@
---
-name: building-agents-patterns
-description: Best practices, patterns, and examples for building goal-driven agents. Includes pause/resume architecture, hybrid workflows, anti-patterns, and handoff to testing. Use when optimizing agent design.
-license: Apache-2.0
-metadata:
-  author: hive
-  version: "1.0"
-  type: reference
-  part_of: building-agents
---
-
-# Building Agents - Patterns & Best Practices
-
-Design patterns, examples, and best practices for building robust goal-driven agents.
-
-**Prerequisites:** Complete agent structure using `building-agents-construction`.
-
-## Practical Example: Hybrid Workflow
-
-How to build a node using both direct file writes and optional MCP validation:
-
-```python
-# 1. WRITE TO FILE FIRST (Primary - makes it visible)
-node_code = '''
-search_node = NodeSpec(
-    id="search-web",
-    node_type="llm_tool_use",
-    input_keys=["query"],
-    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}",
-    tools=["web_search"],
-)
-'''
-
-Edit(
-    file_path="exports/research_agent/nodes/__init__.py",
-    old_string="# Nodes will be added here",
-    new_string=node_code
-)
-
-print("✅ Added search_node to nodes/__init__.py")
-print("📁 Open exports/research_agent/nodes/__init__.py to see it!")
-
-# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
-validation = mcp__agent-builder__test_node(
-    node_id="search-web",
-    test_input='{"query": "python tutorials"}',
-    mock_llm_response='{"search_results": [...mock results...]}'
-)
-
-print(f"✓ Validation: {validation['success']}")
-```
-
-**User experience:**
-
- Immediately sees node in their editor (from step 1)
- Gets validation feedback (from step 2)
- Can edit the file directly if needed
-
-This combines visibility (files) with validation (MCP tools).
-
-## Pause/Resume Architecture
-
-For agents needing multi-turn conversations with user interaction:
-
-### Basic Pause/Resume Flow
-
-```python
-# Define pause nodes - execution stops at these nodes
-pause_nodes = ["request-clarification", "await-approval"]
-
-# Define entry points - where to resume from each pause
-entry_points = {
-    "start": "analyze-request",  # Initial entry
-    "request-clarification_resume": "process-clarification",  # Resume from clarification
-    "await-approval_resume": "execute-action",  # Resume from approval
-}
-```
-
-### Example: Multi-Turn Research Agent
-
-```python
-# Nodes
-nodes = [
-    NodeSpec(id="analyze-request", ...),
-    NodeSpec(id="request-clarification", ...),  # PAUSE NODE
-    NodeSpec(id="process-clarification", ...),
-    NodeSpec(id="generate-results", ...),
-    NodeSpec(id="await-approval", ...),  # PAUSE NODE
-    NodeSpec(id="execute-action", ...),
-]
-
-# Edges with resume flows
-edges = [
-    EdgeSpec(
-        id="analyze-to-clarify",
-        source="analyze-request",
-        target="request-clarification",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="needs_clarification == true",
-    ),
-    # When resumed, goes to process-clarification
-    EdgeSpec(
-        id="clarify-to-process",
-        source="request-clarification",
-        target="process-clarification",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="results-to-approval",
-        source="generate-results",
-        target="await-approval",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    # When resumed, goes to execute-action
-    EdgeSpec(
-        id="approval-to-execute",
-        source="await-approval",
-        target="execute-action",
-        condition=EdgeCondition.ALWAYS,
-    ),
-]
-
-# Configuration
-pause_nodes = ["request-clarification", "await-approval"]
-entry_points = {
-    "start": "analyze-request",
-    "request-clarification_resume": "process-clarification",
-    "await-approval_resume": "execute-action",
-}
-```
-
-### Running Pause/Resume Agents
-
-```python
-# Initial run - will pause at first pause node
-result1 = await agent.run(
-    context={"query": "research topic"},
-    session_state=None
-)
-
-# Check if paused
-if result1.paused_at:
-    print(f"Paused at: {result1.paused_at}")
-
-    # Resume with user input
-    result2 = await agent.run(
-        context={"user_response": "clarification details"},
-        session_state=result1.session_state  # Pass previous state
-    )
-```
-
-## Anti-Patterns
-
-### What NOT to Do
-
-❌ **Don't rely on `export_graph`** - Write files immediately, not at end
-```python
-# BAD: Building in session state, exporting at end
-mcp__agent-builder__add_node(...)
-mcp__agent-builder__add_node(...)
-mcp__agent-builder__export_graph()  # Files appear only now
-
-# GOOD: Writing files immediately
-Write(file_path="...", content=node_code)  # File visible now
-Write(file_path="...", content=node_code)  # File visible now
-```
-
-❌ **Don't hide code in session** - Write to files as components approved
-```python
-# BAD: Accumulating changes invisibly
-session.add_component(component1)
-session.add_component(component2)
-# User can't see anything yet
-
-# GOOD: Incremental visibility
-Edit(file_path="...", ...)  # User sees change 1
-Edit(file_path="...", ...)  # User sees change 2
-```
-
-❌ **Don't wait to write files** - Agent visible from first step
-```python
-# BAD: Building everything before writing
-design_all_nodes()
-design_all_edges()
-write_everything_at_once()
-
-# GOOD: Write as you go
-write_package_structure()  # Visible
-write_goal()  # Visible
-write_node_1()  # Visible
-write_node_2()  # Visible
-```
-
-❌ **Don't batch everything** - Write incrementally
-```python
-# BAD: Batching all nodes
-nodes = [design_node_1(), design_node_2(), ...]
-write_all_nodes(nodes)
-
-# GOOD: One at a time with user feedback
-write_node_1()  # User approves
-write_node_2()  # User approves
-write_node_3()  # User approves
-```
-
-### MCP Tools - Correct Usage
-
-**MCP tools OK for:**
-✅ `test_node` - Validate node configuration with mock inputs
-✅ `validate_graph` - Check graph structure
-✅ `create_session` - Track session state for bookkeeping
-✅ Other validation tools
-
-**Just don't:** Use MCP as the primary construction method or rely on export_graph
-
-## Best Practices
-
-### 1. Show Progress After Each Write
-
-```python
-# After writing a node
-print("✅ Added analyze_request_node to nodes/__init__.py")
-print("📊 Progress: 1/6 nodes added")
-print("📁 Open exports/my_agent/nodes/__init__.py to see it!")
-```
-
-### 2. Let User Open Files During Build
-
-```python
-# Encourage file inspection
-print("✅ Goal written to agent.py")
-print("")
-print("💡 Tip: Open exports/my_agent/agent.py in your editor to see the goal!")
-```
-
-### 3. Write Incrementally - One Component at a Time
-
-```python
-# Good flow
-write_package_structure()
-show_user("Package created")
-
-write_goal()
-show_user("Goal written")
-
-for node in nodes:
-    get_approval(node)
-    write_node(node)
-    show_user(f"Node {node.id} written")
-```
-
-### 4. Test As You Build
-
-```python
-# After adding several nodes
-print("💡 You can test current state with:")
-print("  PYTHONPATH=core:exports python -m my_agent validate")
-print("  PYTHONPATH=core:exports python -m my_agent info")
-```
-
-### 5. Keep User Informed
-
-```python
-# Clear status updates
-print("🔨 Creating package structure...")
-print("✅ Package created: exports/my_agent/")
-print("")
-print("📝 Next: Define agent goal")
-```
-
-## Continuous Monitoring Agents
-
-For agents that run continuously without terminal nodes:
-
-```python
-# No terminal nodes - loops forever
-terminal_nodes = []
-
-# Workflow loops back to start
-edges = [
-    EdgeSpec(id="monitor-to-check", source="monitor", target="check-condition"),
-    EdgeSpec(id="check-to-wait", source="check-condition", target="wait"),
-    EdgeSpec(id="wait-to-monitor", source="wait", target="monitor"),  # Loop
-]
-
-# Entry node only
-entry_node = "monitor"
-entry_points = {"start": "monitor"}
-pause_nodes = []
-```
-
-**Example: File Monitor**
-
-```python
-nodes = [
-    NodeSpec(id="list-files", ...),
-    NodeSpec(id="check-new-files", node_type="router", ...),
-    NodeSpec(id="process-files", ...),
-    NodeSpec(id="wait-interval", node_type="function", ...),
-]
-
-edges = [
-    EdgeSpec(id="list-to-check", source="list-files", target="check-new-files"),
-    EdgeSpec(
-        id="check-to-process",
-        source="check-new-files",
-        target="process-files",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="new_files_count > 0",
-    ),
-    EdgeSpec(
-        id="check-to-wait",
-        source="check-new-files",
-        target="wait-interval",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="new_files_count == 0",
-    ),
-    EdgeSpec(id="process-to-wait", source="process-files", target="wait-interval"),
-    EdgeSpec(id="wait-to-list", source="wait-interval", target="list-files"),  # Loop back
-]
-
-terminal_nodes = []  # No terminal - runs forever
-```
-
-## Complex Routing Patterns
-
-### Multi-Condition Router
-
-```python
-router_node = NodeSpec(
-    id="decision-router",
-    node_type="router",
-    input_keys=["analysis_result"],
-    output_keys=["decision"],
-    system_prompt="""
-    Based on the analysis result, decide the next action:
-    - If confidence > 0.9: route to "execute"
-    - If 0.5 <= confidence <= 0.9: route to "review"
-    - If confidence < 0.5: route to "clarify"
-
-    Return: {"decision": "execute|review|clarify"}
-    """,
-)
-
-# Edges for each route
-edges = [
-    EdgeSpec(
-        id="router-to-execute",
-        source="decision-router",
-        target="execute-action",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'execute'",
-        priority=1,
-    ),
-    EdgeSpec(
-        id="router-to-review",
-        source="decision-router",
-        target="human-review",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'review'",
-        priority=2,
-    ),
-    EdgeSpec(
-        id="router-to-clarify",
-        source="decision-router",
-        target="request-clarification",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'clarify'",
-        priority=3,
-    ),
-]
-```
-
-## Error Handling Patterns
-
-### Graceful Failure with Fallback
-
-```python
-# Primary node with error handling
-nodes = [
-    NodeSpec(id="api-call", max_retries=3, ...),
-    NodeSpec(id="fallback-cache", ...),
-    NodeSpec(id="report-error", ...),
-]
-
-edges = [
-    # Success path
-    EdgeSpec(
-        id="api-success",
-        source="api-call",
-        target="process-results",
-        condition=EdgeCondition.ON_SUCCESS,
-    ),
-    # Fallback on failure
-    EdgeSpec(
-        id="api-to-fallback",
-        source="api-call",
-        target="fallback-cache",
-        condition=EdgeCondition.ON_FAILURE,
-        priority=1,
-    ),
-    # Report if fallback also fails
-    EdgeSpec(
-        id="fallback-to-error",
-        source="fallback-cache",
-        target="report-error",
-        condition=EdgeCondition.ON_FAILURE,
-        priority=1,
-    ),
-]
-```
-
-## Performance Optimization
-
-### Parallel Node Execution
-
-```python
-# Use multiple edges from same source for parallel execution
-edges = [
-    EdgeSpec(
-        id="start-to-search1",
-        source="start",
-        target="search-source-1",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="start-to-search2",
-        source="start",
-        target="search-source-2",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="start-to-search3",
-        source="start",
-        target="search-source-3",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    # Converge results
-    EdgeSpec(
-        id="search1-to-merge",
-        source="search-source-1",
-        target="merge-results",
-    ),
-    EdgeSpec(
-        id="search2-to-merge",
-        source="search-source-2",
-        target="merge-results",
-    ),
-    EdgeSpec(
-        id="search3-to-merge",
-        source="search-source-3",
-        target="merge-results",
-    ),
-]
-```
-
-## Handoff to Testing
-
-When agent is complete, transition to testing phase:
-
-```python
-print("""
-✅ Agent complete: exports/my_agent/
-
-Next steps:
-1. Switch to testing-agent skill
-2. Generate and approve tests
-3. Run evaluation
-4. Debug any failures
-
-Command: "Test the agent at exports/my_agent/"
-""")
-```
-
-### Pre-Testing Checklist
-
-Before handing off to testing-agent:
-
- [ ] Agent structure validates: `python -m agent_name validate`
- [ ] All nodes defined in nodes/__init__.py
- [ ] All edges connect valid nodes
- [ ] Entry node specified
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
- [ ] README.md with usage instructions
- [ ] CLI commands work (info, validate)
-
-## Related Skills
-
- **building-agents-core** - Fundamental concepts
- **building-agents-construction** - Step-by-step building
- **testing-agent** - Test and validate agents
- **agent-workflow** - Complete workflow orchestrator
-
---
-
-**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
@@ -0,0 +1,399 @@
+---
+name: hive-concepts
+description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: foundational
+  part_of: hive
+---
+
+# Building Agents - Core Concepts
+
+Foundational knowledge for building goal-driven agents as Python packages.
+
+## Architecture: Python Services (Not JSON Configs)
+
+Agents are built as Python packages:
+
+```
+exports/my_agent/
+├── __init__.py          # Package exports
+├── __main__.py          # CLI (run, info, validate, shell)
+├── agent.py             # Graph construction (goal, edges, agent class)
+├── nodes/__init__.py    # Node definitions (NodeSpec)
+├── config.py            # Runtime config
+└── README.md            # Documentation
+```
+
+**Key Principle: Agent is visible and editable during build**
+
+- Files created immediately as components are approved
+- User can watch files grow in their editor
+- No session state - just direct file writes
+- No "export" step - agent is ready when build completes
+
+## Core Concepts
+
+### Goal
+
+Success criteria and constraints (written to agent.py)
+
+```python
+goal = Goal(
+    id="research-goal",
+    name="Technical Research Agent",
+    description="Research technical topics thoroughly",
+    success_criteria=[
+        SuccessCriterion(
+            id="completeness",
+            description="Cover all aspects of topic",
+            metric="coverage_score",
+            target=">=0.9",
+            weight=0.4,
+        ),
+        # 3-5 success criteria total
+    ],
+    constraints=[
+        Constraint(
+            id="accuracy",
+            description="All information must be verified",
+            constraint_type="hard",
+            category="quality",
+        ),
+        # 1-5 constraints total
+    ],
+)
+```
+
+### Node
+
+Unit of work (written to nodes/__init__.py)
+
+**Node Types:**
+
+- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
+- `function` — Deterministic Python operations. No LLM involved.
+
+```python
+search_node = NodeSpec(
+    id="search-web",
+    name="Search Web",
+    description="Search for information and extract results",
+    node_type="event_loop",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
+    tools=["web_search"],
+)
+```
+
+**NodeSpec Fields for Event Loop Nodes:**
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
+| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
+| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
+
+### Edge
+
+Connection between nodes (written to agent.py)
+
+**Edge Conditions:**
+
+- `on_success` — Proceed if node succeeds (most common)
+- `on_failure` — Handle errors
+- `always` — Always proceed
+- `conditional` — Based on expression evaluating node output
+
+**Edge Priority:**
+
+Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
+
+```python
+# Forward edge (evaluated first)
+EdgeSpec(
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
+    priority=1,
+)
+
+# Feedback edge (evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
+```
+
+### Client-Facing Nodes
+
+For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
+- Stream its LLM output directly to the end user
+- Block for user input between conversational turns
+- Resume when new input is injected via `inject_event()`
+
+```python
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=[],
+    output_keys=["repo_url", "project_url"],
+    system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
+)
+```
+
+> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
+
+**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
+
+```python
+system_prompt="""\
+**STEP 1 — Respond to the user (text only, NO tool calls):**
+[Present information, ask questions, etc.]
+
+**STEP 2 — After the user responds, call set_output:**
+[Call set_output with the structured outputs]
+"""
+```
+
+This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
+
+### Node Design: Fewer, Richer Nodes
+
+Prefer fewer nodes that do more work over many thin single-purpose nodes:
+
+- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
+- **Good**: 4 rich nodes (intake → research → review → report)
+
+Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
+
+### nullable_output_keys for Cross-Edge Inputs
+
+When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
+
+```python
+research_node = NodeSpec(
+    id="research",
+    input_keys=["research_brief", "feedback"],
+    nullable_output_keys=["feedback"],  # Not present on first visit
+    max_node_visits=3,
+    ...
+)
+```
+
+## Event Loop Architecture Concepts
+
+### How EventLoopNode Works
+
+An event loop node runs a multi-turn loop:
+1. LLM receives system prompt + conversation history
+2. LLM responds (text and/or tool calls)
+3. Tool calls are executed, results added to conversation
+4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
+5. Repeat until judge ACCEPTs or max_iterations reached
+
+### EventLoopNode Runtime
+
+EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
+
+```python
+# Direct execution — executor auto-creates EventLoopNodes
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+runtime = Runtime(storage_path)
+executor = GraphExecutor(
+    runtime=runtime,
+    llm=llm,
+    tools=tools,
+    tool_executor=tool_executor,
+    storage_path=storage_path,
+)
+result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
+
+# TUI execution — AgentRuntime also works
+from framework.runtime.agent_runtime import create_agent_runtime
+runtime = create_agent_runtime(
+    graph=graph, goal=goal, storage_path=storage_path,
+    entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
+)
+```
+
+### set_output
+
+Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
+
+`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
+
+### JudgeProtocol
+
+**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
+
+The judge controls when a node's loop exits:
+- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
+- **SchemaJudge**: Validates outputs against a Pydantic model
+- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
+
+### LoopConfig
+
+Controls loop behavior:
+- `max_iterations` (default 50) — prevents infinite loops
+- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
+- `tool_call_overflow_margin` (default 0.5) — wiggle room before discarding extra tool calls (50% means hard cutoff at 150% of limit)
+- `stall_detection_threshold` (default 3) — detects repeated identical responses
+- `max_history_tokens` (default 32000) — triggers conversation compaction
+
+### Data Tools (Spillover Management)
+
+When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
+
+- `save_data(filename, data)` — Write data to a file in the data directory
+- `load_data(filename, offset=0, limit=50)` — Read data with line-based pagination
+- `list_data_files()` — List available data files
+- `serve_file_to_user(filename, label="")` — Get a clickable file:// URI for the user
+
+Note: `data_dir` is a framework-injected context parameter — the LLM never sees or passes it. `GraphExecutor.execute()` sets it per-execution via `contextvars`, so data tools and spillover always share the same session-scoped directory.
+
+These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+### Fan-Out / Fan-In
+
+Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
+
+### max_node_visits
+
+Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
+
+## Tool Discovery & Validation
+
+**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
+
+Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
+
+### Step 1: Register MCP Server (if not already done)
+
+```python
+mcp__agent-builder__add_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args='["mcp_server.py", "--stdio"]',
+    cwd="../tools"
+)
+```
+
+### Step 2: Discover Available Tools
+
+```python
+# List all tools from all registered servers
+mcp__agent-builder__list_mcp_tools()
+
+# Or list tools from a specific server
+mcp__agent-builder__list_mcp_tools(server_name="tools")
+```
+
+### Step 3: Validate Before Adding Nodes
+
+Before writing a node with `tools=[...]`:
+
+1. Call `list_mcp_tools()` to get available tools
+2. Check each tool in your node exists in the response
+3. If a tool doesn't exist:
+   - **DO NOT proceed** with the node
+   - Inform the user: "The tool 'X' is not available. Available tools are: ..."
+   - Ask if they want to use an alternative or proceed without the tool
+
+### Tool Validation Anti-Patterns
+
+- **Never assume a tool exists** - always call `list_mcp_tools()` first
+- **Never write a node with unverified tools** - validate before writing
+- **Never silently drop tools** - if a tool doesn't exist, inform the user
+- **Never guess tool names** - use exact names from discovery response
+
+## Workflow Overview: Incremental File Construction
+
+```
+1. CREATE PACKAGE → mkdir + write skeletons
+2. DEFINE GOAL → Write to agent.py + config.py
+3. FOR EACH NODE:
+   - Propose design (event_loop for LLM work, function for deterministic)
+   - User approves
+   - Write to nodes/__init__.py IMMEDIATELY
+   - (Optional) Validate with test_node
+4. CONNECT EDGES → Update agent.py
+   - Use priority for feedback edges (negative priority)
+   - (Optional) Validate with validate_graph
+5. FINALIZE → Write agent class to agent.py
+6. DONE - Agent ready at exports/my_agent/
+```
+
+**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
+
+## When to Use This Skill
+
+Use hive-concepts when:
+- Starting a new agent project and need to understand fundamentals
+- Need to understand agent architecture before building
+- Want to validate tool availability before proceeding
+- Learning about node types, edges, and graph execution
+
+**Next Steps:**
+- Ready to build? → Use `hive-create` skill
+- Need patterns and examples? → Use `hive-patterns` skill
+
+## MCP Tools for Validation
+
+After writing files, optionally use MCP tools for validation:
+
+**test_node** - Validate node configuration with mock inputs
+```python
+mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "test query"}',
+    mock_llm_response='{"results": "mock output"}'
+)
+```
+
+**validate_graph** - Check graph structure
+```python
+mcp__agent-builder__validate_graph()
+# Returns: unreachable nodes, missing connections, event_loop validation, etc.
+```
+
+**configure_loop** - Set event loop parameters
+```python
+mcp__agent-builder__configure_loop(
+    max_iterations=50,
+    max_tool_calls_per_turn=10,
+    stall_detection_threshold=3,
+    max_history_tokens=32000
+)
+```
+
+**Key Point:** Files are written FIRST. MCP tools are for validation only.
+
+## Related Skills
+
+- **hive-create** - Step-by-step building process
+- **hive-patterns** - Best practices: judges, feedback edges, fan-out, context management
+- **hive** - Complete workflow orchestrator
+- **hive-test** - Test and validate completed agents
@@ -0,0 +1,24 @@
+"""
+Deep Research Agent - Interactive, rigorous research with TUI conversation.
+
+Research any topic through multi-source web search, quality evaluation,
+and synthesis. Features client-facing TUI interaction at key checkpoints
+for user guidance and iterative deepening.
+"""
+
+from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
+from .config import RuntimeConfig, AgentMetadata, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "DeepResearchAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -0,0 +1,241 @@
+"""
+CLI entry point for Deep Research Agent.
+
+Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import click
+
+from .agent import default_agent, DeepResearchAgent
+
+
+def setup_logging(verbose=False, debug=False):
+    """Configure logging for execution visibility."""
+    if debug:
+        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+    elif verbose:
+        level, fmt = logging.INFO, "%(message)s"
+    else:
+        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
+    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+    logging.getLogger("framework").setLevel(level)
+
+
+@click.group()
+@click.version_option(version="1.0.0")
+def cli():
+    """Deep Research Agent - Interactive, rigorous research with TUI conversation."""
+    pass
+
+
+@cli.command()
+@click.option("--topic", "-t", type=str, required=True, help="Research topic")
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def run(topic, mock, quiet, verbose, debug):
+    """Execute research on a topic."""
+    if not quiet:
+        setup_logging(verbose=verbose, debug=debug)
+
+    context = {"topic": topic}
+
+    result = asyncio.run(default_agent.run(context, mock_mode=mock))
+
+    output_data = {
+        "success": result.success,
+        "steps_executed": result.steps_executed,
+        "output": result.output,
+    }
+    if result.error:
+        output_data["error"] = result.error
+
+    click.echo(json.dumps(output_data, indent=2, default=str))
+    sys.exit(0 if result.success else 1)
+
+
+@cli.command()
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def tui(mock, verbose, debug):
+    """Launch the TUI dashboard for interactive research."""
+    setup_logging(verbose=verbose, debug=debug)
+
+    try:
+        from framework.tui.app import AdenTUI
+    except ImportError:
+        click.echo(
+            "TUI requires the 'textual' package. Install with: pip install textual"
+        )
+        sys.exit(1)
+
+    from pathlib import Path
+
+    from framework.llm import LiteLLMProvider
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import create_agent_runtime
+    from framework.runtime.event_bus import EventBus
+    from framework.runtime.execution_stream import EntryPointSpec
+
+    async def run_with_tui():
+        agent = DeepResearchAgent()
+
+        # Build graph and tools
+        agent._event_bus = EventBus()
+        agent._tool_registry = ToolRegistry()
+
+        storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            agent._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock:
+            llm = LiteLLMProvider(
+                model=agent.config.model,
+                api_key=agent.config.api_key,
+                api_base=agent.config.api_base,
+            )
+
+        tools = list(agent._tool_registry.get_tools().values())
+        tool_executor = agent._tool_registry.get_executor()
+        graph = agent._build_graph()
+
+        runtime = create_agent_runtime(
+            graph=graph,
+            goal=agent.goal,
+            storage_path=storage_path,
+            entry_points=[
+                EntryPointSpec(
+                    id="start",
+                    name="Start Research",
+                    entry_node="intake",
+                    trigger_type="manual",
+                    isolation_level="isolated",
+                ),
+            ],
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+        )
+
+        await runtime.start()
+
+        try:
+            app = AdenTUI(runtime)
+            await app.run_async()
+        finally:
+            await runtime.stop()
+
+    asyncio.run(run_with_tui())
+
+
+@cli.command()
+@click.option("--json", "output_json", is_flag=True)
+def info(output_json):
+    """Show agent information."""
+    info_data = default_agent.info()
+    if output_json:
+        click.echo(json.dumps(info_data, indent=2))
+    else:
+        click.echo(f"Agent: {info_data['name']}")
+        click.echo(f"Version: {info_data['version']}")
+        click.echo(f"Description: {info_data['description']}")
+        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
+        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
+        click.echo(f"Entry: {info_data['entry_node']}")
+        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
+
+
+@cli.command()
+def validate():
+    """Validate agent structure."""
+    validation = default_agent.validate()
+    if validation["valid"]:
+        click.echo("Agent is valid")
+        if validation["warnings"]:
+            for warning in validation["warnings"]:
+                click.echo(f"  WARNING: {warning}")
+    else:
+        click.echo("Agent has errors:")
+        for error in validation["errors"]:
+            click.echo(f"  ERROR: {error}")
+    sys.exit(0 if validation["valid"] else 1)
+
+
+@cli.command()
+@click.option("--verbose", "-v", is_flag=True)
+def shell(verbose):
+    """Interactive research session (CLI, no TUI)."""
+    asyncio.run(_interactive_shell(verbose))
+
+
+async def _interactive_shell(verbose=False):
+    """Async interactive shell."""
+    setup_logging(verbose=verbose)
+
+    click.echo("=== Deep Research Agent ===")
+    click.echo("Enter a topic to research (or 'quit' to exit):\n")
+
+    agent = DeepResearchAgent()
+    await agent.start()
+
+    try:
+        while True:
+            try:
+                topic = await asyncio.get_event_loop().run_in_executor(
+                    None, input, "Topic> "
+                )
+                if topic.lower() in ["quit", "exit", "q"]:
+                    click.echo("Goodbye!")
+                    break
+
+                if not topic.strip():
+                    continue
+
+                click.echo("\nResearching...\n")
+
+                result = await agent.trigger_and_wait("start", {"topic": topic})
+
+                if result is None:
+                    click.echo("\n[Execution timed out]\n")
+                    continue
+
+                if result.success:
+                    output = result.output
+                    if "report_content" in output:
+                        click.echo("\n--- Report ---\n")
+                        click.echo(output["report_content"])
+                        click.echo("\n")
+                    if "references" in output:
+                        click.echo("--- References ---\n")
+                        for ref in output.get("references", []):
+                            click.echo(
+                                f"  [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}"
+                            )
+                        click.echo("\n")
+                else:
+                    click.echo(f"\nResearch failed: {result.error}\n")
+
+            except KeyboardInterrupt:
+                click.echo("\nGoodbye!")
+                break
+            except Exception as e:
+                click.echo(f"Error: {e}", err=True)
+                import traceback
+
+                traceback.print_exc()
+    finally:
+        await agent.stop()
+
+
+if __name__ == "__main__":
+    cli()
@@ -0,0 +1,358 @@
+"""Agent graph construction for Deep Research Agent."""
+
+from pathlib import Path
+
+from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.graph.checkpoint_config import CheckpointConfig
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.runtime.execution_stream import EntryPointSpec
+
+from .config import default_config, metadata
+from .nodes import (
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+)
+
+# Goal definition
+goal = Goal(
+    id="rigorous-interactive-research",
+    name="Rigorous Interactive Research",
+    description=(
+        "Research any topic by searching diverse sources, analyzing findings, "
+        "and producing a cited report — with user checkpoints to guide direction."
+    ),
+    success_criteria=[
+        SuccessCriterion(
+            id="source-diversity",
+            description="Use multiple diverse, authoritative sources",
+            metric="source_count",
+            target=">=5",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="citation-coverage",
+            description="Every factual claim in the report cites its source",
+            metric="citation_coverage",
+            target="100%",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="user-satisfaction",
+            description="User reviews findings before report generation",
+            metric="user_approval",
+            target="true",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="report-completeness",
+            description="Final report answers the original research questions",
+            metric="question_coverage",
+            target="90%",
+            weight=0.25,
+        ),
+    ],
+    constraints=[
+        Constraint(
+            id="no-hallucination",
+            description="Only include information found in fetched sources",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="source-attribution",
+            description="Every claim must cite its source with a numbered reference",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="user-checkpoint",
+            description="Present findings to the user before writing the final report",
+            constraint_type="functional",
+            category="interaction",
+        ),
+    ],
+)
+
+# Node list
+nodes = [
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+]
+
+# Edge definitions
+edges = [
+    # intake -> research
+    EdgeSpec(
+        id="intake-to-research",
+        source="intake",
+        target="research",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # research -> review
+    EdgeSpec(
+        id="research-to-review",
+        source="research",
+        target="review",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # review -> research (feedback loop)
+    EdgeSpec(
+        id="review-to-research-feedback",
+        source="review",
+        target="research",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == True",
+        priority=1,
+    ),
+    # review -> report (user satisfied)
+    EdgeSpec(
+        id="review-to-report",
+        source="review",
+        target="report",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == False",
+        priority=2,
+    ),
+    # report -> research (user wants deeper research on current topic)
+    EdgeSpec(
+        id="report-to-research",
+        source="report",
+        target="research",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="str(next_action).lower() == 'more_research'",
+        priority=2,
+    ),
+    # report -> intake (user wants a new topic — default when not more_research)
+    EdgeSpec(
+        id="report-to-intake",
+        source="report",
+        target="intake",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="str(next_action).lower() != 'more_research'",
+        priority=1,
+    ),
+]
+
+# Graph configuration
+entry_node = "intake"
+entry_points = {"start": "intake"}
+pause_nodes = []
+terminal_nodes = []
+
+
+class DeepResearchAgent:
+    """
+    Deep Research Agent — 4-node pipeline with user checkpoints.
+
+    Flow: intake -> research -> review -> report
+                      ^           |
+                      +-- feedback loop (if user wants more)
+
+    Uses AgentRuntime for proper session management:
+    - Session-scoped storage (sessions/{session_id}/)
+    - Checkpointing for resume capability
+    - Runtime logging
+    - Data folder for save_data/load_data
+    """
+
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self._graph: GraphSpec | None = None
+        self._agent_runtime: AgentRuntime | None = None
+        self._tool_registry: ToolRegistry | None = None
+        self._storage_path: Path | None = None
+
+    def _build_graph(self) -> GraphSpec:
+        """Build the GraphSpec."""
+        return GraphSpec(
+            id="deep-research-agent-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            loop_config={
+                "max_iterations": 100,
+                "max_tool_calls_per_turn": 20,
+                "max_history_tokens": 32000,
+            },
+            conversation_mode="continuous",
+            identity_prompt=(
+                "You are a rigorous research agent. You search for information "
+                "from diverse, authoritative sources, analyze findings critically, "
+                "and produce well-cited reports. You never fabricate information — "
+                "every claim must trace back to a source you actually retrieved."
+            ),
+        )
+
+    def _setup(self, mock_mode=False) -> None:
+        """Set up the agent runtime with sessions, checkpoints, and logging."""
+        self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
+        self._storage_path.mkdir(parents=True, exist_ok=True)
+
+        self._tool_registry = ToolRegistry()
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            self._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock_mode:
+            llm = LiteLLMProvider(
+                model=self.config.model,
+                api_key=self.config.api_key,
+                api_base=self.config.api_base,
+            )
+
+        tool_executor = self._tool_registry.get_executor()
+        tools = list(self._tool_registry.get_tools().values())
+
+        self._graph = self._build_graph()
+
+        checkpoint_config = CheckpointConfig(
+            enabled=True,
+            checkpoint_on_node_start=False,
+            checkpoint_on_node_complete=True,
+            checkpoint_max_age_days=7,
+            async_checkpoint=True,
+        )
+
+        entry_point_specs = [
+            EntryPointSpec(
+                id="default",
+                name="Default",
+                entry_node=self.entry_node,
+                trigger_type="manual",
+                isolation_level="shared",
+            )
+        ]
+
+        self._agent_runtime = create_agent_runtime(
+            graph=self._graph,
+            goal=self.goal,
+            storage_path=self._storage_path,
+            entry_points=entry_point_specs,
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+            checkpoint_config=checkpoint_config,
+        )
+
+    async def start(self, mock_mode=False) -> None:
+        """Set up and start the agent runtime."""
+        if self._agent_runtime is None:
+            self._setup(mock_mode=mock_mode)
+        if not self._agent_runtime.is_running:
+            await self._agent_runtime.start()
+
+    async def stop(self) -> None:
+        """Stop the agent runtime and clean up."""
+        if self._agent_runtime and self._agent_runtime.is_running:
+            await self._agent_runtime.stop()
+        self._agent_runtime = None
+
+    async def trigger_and_wait(
+        self,
+        entry_point: str = "default",
+        input_data: dict | None = None,
+        timeout: float | None = None,
+        session_state: dict | None = None,
+    ) -> ExecutionResult | None:
+        """Execute the graph and wait for completion."""
+        if self._agent_runtime is None:
+            raise RuntimeError("Agent not started. Call start() first.")
+
+        return await self._agent_runtime.trigger_and_wait(
+            entry_point_id=entry_point,
+            input_data=input_data or {},
+            session_state=session_state,
+        )
+
+    async def run(
+        self, context: dict, mock_mode=False, session_state=None
+    ) -> ExecutionResult:
+        """Run the agent (convenience method for single execution)."""
+        await self.start(mock_mode=mock_mode)
+        try:
+            result = await self.trigger_and_wait(
+                "default", context, session_state=session_state
+            )
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    def info(self):
+        """Get agent information."""
+        return {
+            "name": metadata.name,
+            "version": metadata.version,
+            "description": metadata.description,
+            "goal": {
+                "name": self.goal.name,
+                "description": self.goal.description,
+            },
+            "nodes": [n.id for n in self.nodes],
+            "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node,
+            "entry_points": self.entry_points,
+            "pause_nodes": self.pause_nodes,
+            "terminal_nodes": self.terminal_nodes,
+            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
+        }
+
+    def validate(self):
+        """Validate agent structure."""
+        errors = []
+        warnings = []
+
+        node_ids = {node.id for node in self.nodes}
+        for edge in self.edges:
+            if edge.source not in node_ids:
+                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
+            if edge.target not in node_ids:
+                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
+
+        if self.entry_node not in node_ids:
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        for terminal in self.terminal_nodes:
+            if terminal not in node_ids:
+                errors.append(f"Terminal node '{terminal}' not found")
+
+        for ep_id, node_id in self.entry_points.items():
+            if node_id not in node_ids:
+                errors.append(
+                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
+                )
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+        }
+
+
+# Create default instance
+default_agent = DeepResearchAgent()
@@ -0,0 +1,26 @@
+"""Runtime configuration."""
+
+from dataclasses import dataclass
+
+from framework.config import RuntimeConfig
+
+default_config = RuntimeConfig()
+
+
+@dataclass
+class AgentMetadata:
+    name: str = "Deep Research Agent"
+    version: str = "1.0.0"
+    description: str = (
+        "Interactive research agent that rigorously investigates topics through "
+        "multi-source search, quality evaluation, and synthesis - with TUI conversation "
+        "at key checkpoints for user guidance and feedback."
+    )
+    intro_message: str = (
+        "Hi! I'm your deep research assistant. Tell me a topic and I'll investigate it "
+        "thoroughly — searching multiple sources, evaluating quality, and synthesizing "
+        "a comprehensive report. What would you like me to research?"
+    )
+
+
+metadata = AgentMetadata()
@@ -1,8 +1,8 @@
 {
  "hive-tools": {
    "transport": "stdio",
-    "command": "python",
-    "args": ["mcp_server.py", "--stdio"],
+    "command": "uv",
+    "args": ["run", "python", "mcp_server.py", "--stdio"],
    "cwd": "../../tools",
    "description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
  }
@@ -0,0 +1,204 @@
+"""Node definitions for Deep Research Agent."""
+
+from framework.graph import NodeSpec
+
+# Node 1: Intake (client-facing)
+# Brief conversation to clarify what the user wants researched.
+intake_node = NodeSpec(
+    id="intake",
+    name="Research Intake",
+    description="Discuss the research topic with the user, clarify scope, and confirm direction",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    success_criteria=(
+        "The research brief is specific and actionable: it states the topic, "
+        "the key questions to answer, the desired scope, and depth."
+    ),
+    system_prompt="""\
+You are a research intake specialist. The user wants to research a topic.
+Have a brief conversation to clarify what they need.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
+3. If it's already clear, confirm your understanding and ask the user to confirm
+
+Keep it short. Don't over-ask.
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "A clear paragraph describing exactly what to research, \
+what questions to answer, what scope to cover, and how deep to go.")
+""",
+    tools=[],
+)
+
+# Node 2: Research
+# The workhorse — searches the web, fetches content, analyzes sources.
+# One node with both tools avoids the context-passing overhead of 5 separate nodes.
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search the web, fetch source content, and compile findings",
+    node_type="event_loop",
+    max_node_visits=0,
+    input_keys=["research_brief", "feedback"],
+    output_keys=["findings", "sources", "gaps"],
+    nullable_output_keys=["feedback"],
+    success_criteria=(
+        "Findings reference at least 3 distinct sources with URLs. "
+        "Key claims are substantiated by fetched content, not generated."
+    ),
+    system_prompt="""\
+You are a research agent. Given a research brief, find and analyze sources.
+
+If feedback is provided, this is a follow-up round — focus on the gaps identified.
+
+Work in phases:
+1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
+   Prioritize authoritative sources (.edu, .gov, established publications).
+2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
+   Skip URLs that fail. Extract the substantive content.
+3. **Analyze**: Review what you've collected. Identify key findings, themes,
+   and any contradictions between sources.
+
+Important:
+- Work in batches of 3-4 tool calls at a time — never more than 10 per turn
+- After each batch, assess whether you have enough material
+- Prefer quality over quantity — 5 good sources beat 15 thin ones
+- Track which URL each finding comes from (you'll need citations later)
+- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
+
+When done, use set_output (one key at a time, separate turns):
+- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
+Include themes, contradictions, and confidence levels.")
+- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
+- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
+""",
+    tools=[
+        "web_search",
+        "web_scrape",
+        "load_data",
+        "save_data",
+        "append_data",
+        "list_data_files",
+    ],
+)
+
+# Node 3: Review (client-facing)
+# Shows the user what was found and asks whether to dig deeper or proceed.
+review_node = NodeSpec(
+    id="review",
+    name="Review Findings",
+    description="Present findings to user and decide whether to research more or write the report",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["findings", "sources", "gaps", "research_brief"],
+    output_keys=["needs_more_research", "feedback"],
+    success_criteria=(
+        "The user has been presented with findings and has explicitly indicated "
+        "whether they want more research or are ready for the report."
+    ),
+    system_prompt="""\
+Present the research findings to the user clearly and concisely.
+
+**STEP 1 — Present (your first message, text only, NO tool calls):**
+1. **Summary** (2-3 sentences of what was found)
+2. **Key Findings** (bulleted, with confidence levels)
+3. **Sources Used** (count and quality assessment)
+4. **Gaps** (what's still unclear or under-covered)
+
+End by asking: Are they satisfied, or do they want deeper research? \
+Should we proceed to writing the final report?
+
+**STEP 2 — After the user responds, call set_output:**
+- set_output("needs_more_research", "true")  — if they want more
+- set_output("needs_more_research", "false") — if they're satisfied
+- set_output("feedback", "What the user wants explored further, or empty string")
+""",
+    tools=[],
+)
+
+# Node 4: Report (client-facing)
+# Writes an HTML report, serves the link to the user, and answers follow-ups.
+report_node = NodeSpec(
+    id="report",
+    name="Write & Deliver Report",
+    description="Write a cited HTML report from the findings and present it to the user",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["findings", "sources", "research_brief"],
+    output_keys=["delivery_status", "next_action"],
+    success_criteria=(
+        "An HTML report has been saved, the file link has been presented to the user, "
+        "and the user has indicated what they want to do next."
+    ),
+    system_prompt="""\
+Write a research report as an HTML file and present it to the user.
+
+IMPORTANT: save_data requires TWO separate arguments: filename and data.
+Call it like: save_data(filename="report.html", data="<html>...</html>")
+Do NOT use _raw, do NOT nest arguments inside a JSON string.
+
+**STEP 1 — Write and save the HTML report (tool calls, NO text to user yet):**
+
+Build a clean HTML document. Keep the HTML concise — aim for clarity over length.
+Use minimal embedded CSS (a few lines of style, not a full framework).
+
+Report structure:
+- Title & date
+- Executive Summary (2-3 paragraphs)
+- Key Findings (organized by theme, with [n] citation links)
+- Analysis (synthesis, implications)
+- Conclusion (key takeaways)
+- References (numbered list with clickable URLs)
+
+Requirements:
+- Every factual claim must cite its source with [n] notation
+- Be objective — present multiple viewpoints where sources disagree
+- Answer the original research questions from the brief
+
+Save the HTML:
+  save_data(filename="report.html", data="<html>...</html>")
+
+Then get the clickable link:
+  serve_file_to_user(filename="report.html", label="Research Report")
+
+If save_data fails, simplify and shorten the HTML, then retry.
+
+**STEP 2 — Present the link to the user (text only, NO tool calls):**
+
+Tell the user the report is ready and include the file:// URI from
+serve_file_to_user so they can click it to open. Give a brief summary
+of what the report covers. Ask if they have questions or want to continue.
+
+**STEP 3 — After the user responds:**
+- Answer any follow-up questions from the research material
+- When the user is ready to move on, ask what they'd like to do next:
+  - Research a new topic?
+  - Dig deeper into the current topic?
+- Then call set_output:
+  - set_output("delivery_status", "completed")
+  - set_output("next_action", "new_topic")       — if they want a new topic
+  - set_output("next_action", "more_research")   — if they want deeper research
+""",
+    tools=[
+        "save_data",
+        "append_data",
+        "edit_data",
+        "serve_file_to_user",
+        "load_data",
+        "list_data_files",
+    ],
+)
+
+__all__ = [
+    "intake_node",
+    "research_node",
+    "review_node",
+    "report_node",
+]
@@ -1,10 +1,10 @@
 ---
-name: setup-credentials
+name: hive-credentials
 description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
 license: Apache-2.0
 metadata:
  author: hive
-  version: "2.2"
+  version: "2.3"
  type: utility
 ---

@@ -31,95 +31,50 @@ Determine which agent needs credentials. The user will either:

 Locate the agent's directory under `exports/{agent_name}/`.

-### Step 2: Detect Required Credentials (Bash-First)
+### Step 2: Detect Missing Credentials

-Use bash commands to determine what the agent needs and what's already configured. This avoids Python import issues and works even when `HIVE_CREDENTIAL_KEY` is not set.
+Use the `check_missing_credentials` MCP tool to detect what the agent needs and what's already configured. This tool loads the agent, inspects its required tools and node types, maps them to credentials via `CREDENTIAL_SPECS`, and checks both the encrypted store and environment variables.

-#### Step 2a: Read Agent Requirements
-
-Extract `required_tools` and node types from the agent config:
-
-```bash
-# Get required tools
-jq -r '.required_tools[]?' exports/{agent_name}/agent.json 2>/dev/null
-
-# Get node types from graph nodes
-jq -r '.graph.nodes[]?.node_type' exports/{agent_name}/agent.json 2>/dev/null | sort -u
+```
+check_missing_credentials(agent_path="exports/{agent_name}")
 ```

-Map the extracted tools and node types to credentials by reading the spec files directly:
+The tool returns a JSON response:

-```bash
-# Read all credential specs — each file defines tools, node_types, env_var, and credential_id
-cat tools/src/aden_tools/credentials/llm.py tools/src/aden_tools/credentials/search.py tools/src/aden_tools/credentials/email.py tools/src/aden_tools/credentials/integrations.py
+```json
+{
+  "agent": "exports/{agent_name}",
+  "missing": [
+    {
+      "credential_name": "brave_search",
+      "env_var": "BRAVE_SEARCH_API_KEY",
+      "description": "Brave Search API key for web search",
+      "help_url": "https://brave.com/search/api/",
+      "tools": ["web_search"]
+    }
+  ],
+  "available": [
+    {
+      "credential_name": "anthropic",
+      "env_var": "ANTHROPIC_API_KEY",
+      "source": "encrypted_store"
+    }
+  ],
+  "total_missing": 1,
+  "ready": false
+}
 ```

-For each `CredentialSpec`, match its `tools` and `node_types` lists against the agent's required tools and node types. Extract the `env_var`, `credential_id`, and `credential_group` for every match. This is the list of needed credentials.
-
-#### Step 2b: Check Existing Credential Sources
-
-For each needed credential, check three sources. A credential is "found" if it exists in ANY of them:
-
-**1. Encrypted store metadata index** (unencrypted JSON — no decryption key needed):
-
-```bash
-cat ~/.hive/credentials/metadata/index.json 2>/dev/null | jq -r '.credentials | keys[]'
-```
-
-If a credential ID appears in this list, it is stored in the encrypted store.
-
-**2. Environment variables:**
-
-```bash
-# Check each needed env var, e.g.:
-printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "ANTHROPIC_API_KEY: set" || echo "ANTHROPIC_API_KEY: not set"
-printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "BRAVE_SEARCH_API_KEY: set" || echo "BRAVE_SEARCH_API_KEY: not set"
-```
-
-**3. Project `.env` file:**
-
-```bash
-# Check each needed env var, e.g.:
-grep -q '^ANTHROPIC_API_KEY=' .env 2>/dev/null && echo "ANTHROPIC_API_KEY: in .env" || echo "ANTHROPIC_API_KEY: not in .env"
-grep -q '^BRAVE_SEARCH_API_KEY=' .env 2>/dev/null && echo "BRAVE_SEARCH_API_KEY: in .env" || echo "BRAVE_SEARCH_API_KEY: not in .env"
-```
-
-#### Step 2c: HIVE_CREDENTIAL_KEY Check
-
-If any credentials were found in the encrypted store metadata index, verify the encryption key is available. The key is typically persisted to shell config by a previous setup-credentials run.
-
-Check both the current session AND shell config files:
-
-```bash
-# Check 1: Current session
-printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
-
-# Check 2: Shell config files (where setup-credentials persists it)
-# Note: check each file individually to avoid non-zero exit when one doesn't exist
-for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
-```
-
-Decision logic:
- **In current session** — no action needed, credentials in the store are usable
- **In shell config but NOT in current session** — the key is persisted but this shell hasn't sourced it. Run `source ~/.zshrc` (or `~/.bashrc`), then re-check. Credentials in the store are usable after sourcing.
- **Not in session AND not in shell config** — the key was never persisted. Warn the user that credentials in the store cannot be decrypted. Help fix the key situation (recover/re-persist), do NOT re-collect credential values that are already stored.
-
-#### Step 2d: Compute Missing & Group
-
-Diff the "needed" credentials against the "found" credentials to get the truly missing list.
-
-Group related credentials by their `credential_group` field from the spec files. Credentials that share the same non-empty `credential_group` value should be presented as a single setup step rather than asking for each one individually.
-
-**If nothing is missing and there's no HIVE_CREDENTIAL_KEY issue:** Report all credentials as configured and skip Steps 3-5. Example:
+**If `ready` is true (nothing missing):** Report all credentials as configured and skip Steps 3-5. Example:

 ```
 All required credentials are already configured:
-  ✓ anthropic (ANTHROPIC_API_KEY) — found in encrypted store
-  ✓ brave_search (BRAVE_SEARCH_API_KEY) — found in environment
+  ✓ anthropic (ANTHROPIC_API_KEY)
+  ✓ brave_search (BRAVE_SEARCH_API_KEY)
 Your agent is ready to run!
 ```

-**If credentials are missing:** Continue to Step 3 with only the missing ones.
+**If credentials are missing:** Continue to Step 3 with the `missing` list.

 ### Step 3: Present Auth Options for Each Missing Credential

@@ -153,7 +108,7 @@ Present the available options using AskUserQuestion:
 Choose how to configure HUBSPOT_ACCESS_TOKEN:

  1) Aden Platform (OAuth) (Recommended)
-     Secure OAuth2 flow via integration.adenhq.com
+     Secure OAuth2 flow via hive.adenhq.com
     - Quick setup with automatic token refresh
     - No need to manage API keys manually

@@ -170,6 +125,28 @@ Choose how to configure HUBSPOT_ACCESS_TOKEN:

 ### Step 4: Execute Auth Flow Based on User Choice

+#### Prerequisite: Ensure HIVE_CREDENTIAL_KEY Is Available
+
+Before storing any credentials, verify `HIVE_CREDENTIAL_KEY` is set (needed to encrypt/decrypt the local store). Check both the current session and shell config:
+
+```bash
+# Check current session
+printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+
+# Check shell config files
+for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+```
+
+- **In current session** — proceed to store credentials
+- **In shell config but NOT in current session** — run `source ~/.zshrc` (or `~/.bashrc`) first, then proceed
+- **Not set anywhere** — `EncryptedFileStorage` will auto-generate one. After storing, tell the user to persist it: `export HIVE_CREDENTIAL_KEY="{generated_key}"` in their shell profile
+
+> **⚠️ IMPORTANT: After adding `HIVE_CREDENTIAL_KEY` to the user's shell config, always display:**
+> ```
+> ⚠️  Environment variables were added to your shell config.
+>     Open a NEW TERMINAL for them to take effect outside this session.
+> ```
+
 #### Option 1: Aden Platform (OAuth)

 This is the recommended flow for supported integrations (HubSpot, etc.).
@@ -195,7 +172,7 @@ If not set, guide user to get one from Aden (this is where they do OAuth):
 from aden_tools.credentials import open_browser, get_aden_setup_url

 # Open browser to Aden - user will sign up and connect integrations there
-url = get_aden_setup_url()  # https://integration.adenhq.com/setup
+url = get_aden_setup_url()  # https://hive.adenhq.com
 success, msg = open_browser(url)

 print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
@@ -231,6 +208,12 @@ if success:
    print(f"Run: {source_cmd}")
 ```

+> **⚠️ IMPORTANT: After adding `ADEN_API_KEY` to the user's shell config, always display:**
+> ```
+> ⚠️  Environment variables were added to your shell config.
+>     Open a NEW TERMINAL for them to take effect outside this session.
+> ```
+
 Also save to `~/.hive/configuration.json` for the framework:

 ```python
@@ -272,7 +255,7 @@ print(f"Synced credentials: {synced}")
 # If the required credential wasn't synced, the user hasn't authorized it on Aden yet
 if "hubspot" not in synced:
    print("HubSpot not found in your Aden account.")
-    print("Please visit https://integration.adenhq.com to connect HubSpot, then try again.")
+    print("Please visit https://hive.adenhq.com to connect HubSpot, then try again.")
 ```

 For more control over the sync process:
@@ -442,28 +425,38 @@ config_path.write_text(json.dumps(config, indent=2))

 ### Step 6: Verify All Credentials

-Run validation again to confirm everything is set:
+Use the `verify_credentials` MCP tool to confirm everything is properly configured:

-```python
-runner = AgentRunner.load("exports/{agent_name}")
-validation = runner.validate()
-assert not validation.missing_credentials, "Still missing credentials!"
+```
+verify_credentials(agent_path="exports/{agent_name}")
 ```

-Report the result to the user.
+The tool returns:
+
+```json
+{
+  "agent": "exports/{agent_name}",
+  "ready": true,
+  "missing_credentials": [],
+  "warnings": [],
+  "errors": []
+}
+```
+
+If `ready` is true, report success. If `missing_credentials` is non-empty, identify what failed and loop back to Step 3 for the remaining credentials.

 ## Health Check Reference

 Health checks validate credentials by making lightweight API calls:

-| Credential      | Endpoint                                | What It Checks                     |
-| --------------- | --------------------------------------- | ---------------------------------- |
-| `anthropic`     | `POST /v1/messages`                     | API key validity                   |
-| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                   |
-| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity          |
-| `github`        | `GET /user`                             | Token validity, user identity      |
-| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes  |
-| `resend`        | `GET /domains`                          | API key validity                   |
+| Credential      | Endpoint                                | What It Checks                    |
+| --------------- | --------------------------------------- | --------------------------------- |
+| `anthropic`     | `POST /v1/messages`                     | API key validity                  |
+| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                  |
+| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity         |
+| `github`        | `GET /user`                             | Token validity, user identity     |
+| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes |
+| `resend`        | `GET /domains`                          | API key validity                  |

 ```python
 from aden_tools.credentials import check_credential_health, HealthCheckResult
@@ -479,9 +472,14 @@ result: HealthCheckResult = check_credential_health("hubspot", token_value)
 The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.

 - If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
- The user MUST persist this key (e.g., in `~/.bashrc` or a secrets manager)
+- The user MUST persist this key (e.g., in `~/.bashrc`/`~/.zshrc` or a secrets manager)
 - Without this key, stored credentials cannot be decrypted
- This is the ONLY secret that should live in `~/.bashrc` or environment config
+
+**Shell config rule:** Only TWO keys belong in shell config (`~/.zshrc`/`~/.bashrc`):
+- `HIVE_CREDENTIAL_KEY` — encryption key for the credential store
+- `ADEN_API_KEY` — Aden platform auth key (needed before the store can sync)
+
+All other API keys (Brave, Google, HubSpot, etc.) must go in the encrypted store only. **Never offer to add them to shell config.**

 If `HIVE_CREDENTIAL_KEY` is not set:

@@ -494,6 +492,7 @@ If `HIVE_CREDENTIAL_KEY` is not set:
 - **NEVER** log, print, or echo credential values in tool output
 - **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
 - **NEVER** hardcode credentials in source code
+- **NEVER** offer to save API keys to shell config (`~/.zshrc`/`~/.bashrc`) — the **only** keys that belong in shell config are `HIVE_CREDENTIAL_KEY` and `ADEN_API_KEY`. All other credentials (Brave, Google, HubSpot, GitHub, Resend, etc.) go in the encrypted store only.
 - **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
 - **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
 - **ALWAYS** run health checks before storing credentials (when possible)
@@ -509,7 +508,7 @@ All credential specs are defined in `tools/src/aden_tools/credentials/`:
 | `llm.py`          | LLM Providers | `anthropic`                                   | No             |
 | `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
 | `email.py`        | Email         | `resend`                                      | No             |
-| `integrations.py` | Integrations  | `github`, `hubspot`                           | No / Yes       |
+| `integrations.py` | Integrations  | `github`, `hubspot`, `google_calendar_oauth`  | No / Yes       |

 **Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
 variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
@@ -560,60 +559,27 @@ token = store.get_key("hubspot", "access_token")
 ## Example Session

 ```
-User: /setup-credentials for my research-agent
+User: /hive-credentials for my research-agent

 Agent: Let me check what credentials your research-agent needs.

-[Reads agent config]
-$ jq -r '.required_tools[]?' exports/research-agent/agent.json
-web_search
-google_search
-
-$ jq -r '.graph.nodes[]?.node_type' exports/research-agent/agent.json | sort -u
-llm_tool_use
-
-[Maps tools/nodes to credentials using lookup table]
-Needed: anthropic, brave_search, google_search, google_cse
-
-[Checks encrypted store metadata index]
-$ cat ~/.hive/credentials/metadata/index.json | jq -r '.credentials | keys[]'
-anthropic
-brave_search
-
-[Checks environment variables]
-$ printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
-not set
-$ printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
-not set
-$ printenv GOOGLE_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
-not set
-$ printenv GOOGLE_CSE_ID > /dev/null 2>&1 && echo "set" || echo "not set"
-not set
-
-[Checks HIVE_CREDENTIAL_KEY since credentials found in store]
-$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
-session: not set
-$ for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
-/Users/user/.zshrc
-
-[Key is in shell config but not current session — sourcing it]
-$ source ~/.zshrc
-
-[Computes missing credentials]
-Found:
-  ✓ anthropic (ANTHROPIC_API_KEY) — in encrypted store
-  ✓ brave_search (BRAVE_SEARCH_API_KEY) — in encrypted store
-Missing:
-  ✗ google_search (GOOGLE_API_KEY)
-  ✗ google_cse (GOOGLE_CSE_ID)
+[Calls check_missing_credentials(agent_path="exports/research-agent")]
+→ Returns:
+  available: anthropic (encrypted_store), brave_search (encrypted_store)
+  missing: google_search (GOOGLE_API_KEY), google_cse (GOOGLE_CSE_ID)
+  ready: false

 Agent: 2 of 4 required credentials are already configured. Only Google Custom
-Search needs setup (2 values as a single group).
+Search needs setup (2 values).

 --- Setting up Google Custom Search (google_search + google_cse) ---

 This requires two values that work together.

+[Checks HIVE_CREDENTIAL_KEY before storing]
+$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+set
+
 First, the Google API Key:
 1. Go to https://console.cloud.google.com/apis/credentials
 2. Create a new project (or select an existing one)
@@ -640,10 +606,35 @@ Now, the Custom Search Engine ID:

 ✓ Google Custom Search credentials valid

+[Calls verify_credentials(agent_path="exports/research-agent")]
+→ Returns: ready: true, missing_credentials: []
+
 All credentials are now configured:
  ✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
  ✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
  ✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
  ✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
-  Your agent is ready to run!
+
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                      ✅ CREDENTIALS CONFIGURED                              │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                             │
+│     OPEN A NEW TERMINAL before running commands below.                      │
+│     Environment variables were saved to your shell config but               │
+│     only take effect in new terminal sessions.                              │
+│                                                                             │
+│  NEXT STEPS:                                                                │
+│                                                                             │
+│  1. RUN YOUR AGENT:                                                         │
+│                                                                             │
+│     hive tui                                                                │
+│                                                                             │
+│  2. IF YOU ENCOUNTER ISSUES, USE THE DEBUGGER:                              │
+│                                                                             │
+│     /hive-debugger                                                          │
+│                                                                             │
+│     The debugger analyzes runtime logs, identifies retry loops, tool        │
+│     failures, stalled execution, and provides actionable fix suggestions.   │
+│                                                                             │
+└─────────────────────────────────────────────────────────────────────────────┘
 ```
@@ -0,0 +1,385 @@
+---
+name: hive-patterns
+description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: reference
+  part_of: hive
+---
+
+# Building Agents - Patterns & Best Practices
+
+Design patterns, examples, and best practices for building robust goal-driven agents.
+
+**Prerequisites:** Complete agent structure using `hive-create`.
+
+## Practical Example: Hybrid Workflow
+
+How to build a node using both direct file writes and optional MCP validation:
+
+```python
+# 1. WRITE TO FILE FIRST (Primary - makes it visible)
+node_code = '''
+search_node = NodeSpec(
+    id="search-web",
+    node_type="event_loop",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
+    tools=["web_search"],
+)
+'''
+
+Edit(
+    file_path="exports/research_agent/nodes/__init__.py",
+    old_string="# Nodes will be added here",
+    new_string=node_code
+)
+
+# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
+validation = mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "python tutorials"}',
+    mock_llm_response='{"search_results": [...mock results...]}'
+)
+```
+
+**User experience:**
+
+- Immediately sees node in their editor (from step 1)
+- Gets validation feedback (from step 2)
+- Can edit the file directly if needed
+
+## Multi-Turn Interaction Patterns
+
+For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
+
+### Client-Facing Nodes
+
+A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
+
+```python
+# Client-facing node with STEP 1/STEP 2 prompt pattern
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    system_prompt="""\
+You are an intake specialist.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions
+3. If it's clear, confirm your understanding
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "Clear description of what to research")
+""",
+)
+
+# Internal node runs without user interaction
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search and analyze sources",
+    node_type="event_loop",
+    input_keys=["research_brief"],
+    output_keys=["findings", "sources"],
+    system_prompt="Research the topic using web_search and web_scrape...",
+    tools=["web_search", "web_scrape", "load_data", "save_data"],
+)
+```
+
+**How it works:**
+
+- Client-facing nodes stream LLM text to the user and block for input after each response
+- User input is injected via `node.inject_event(text)`
+- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
+- Internal nodes (non-client-facing) run their entire loop without blocking
+- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
+
+**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
+
+### When to Use client_facing
+
+| Scenario                            | client_facing | Why                    |
+| ----------------------------------- | :-----------: | ---------------------- |
+| Gathering user requirements         |      Yes      | Need user input        |
+| Human review/approval checkpoint    |      Yes      | Need human decision    |
+| Data processing (scanning, scoring) |      No       | Runs autonomously      |
+| Report generation                   |      No       | No user input needed   |
+| Final confirmation before action    |      Yes      | Need explicit approval |
+
+> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
+
+## Edge-Based Routing and Feedback Loops
+
+### Conditional Edge Routing
+
+Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
+
+```python
+# Node with mutually exclusive outputs
+review_node = NodeSpec(
+    id="review",
+    name="Review",
+    node_type="event_loop",
+    client_facing=True,
+    output_keys=["approved_contacts", "redo_extraction"],
+    nullable_output_keys=["approved_contacts", "redo_extraction"],
+    max_node_visits=3,
+    system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
+)
+
+# Forward edge (positive priority, evaluated first)
+EdgeSpec(
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
+    priority=1,
+)
+
+# Feedback edge (negative priority, evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
+```
+
+**Key concepts:**
+
+- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
+- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
+- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
+
+### Routing Decision Table
+
+| Pattern                | Old Approach            | New Approach                                  |
+| ---------------------- | ----------------------- | --------------------------------------------- |
+| Conditional branching  | `router` node           | Conditional edges with `condition_expr`       |
+| Binary approve/reject  | `pause_nodes` + resume  | `client_facing=True` + `nullable_output_keys` |
+| Loop-back on rejection | Manual entry_points     | Feedback edge with `priority=-1`              |
+| Multi-way routing      | Router with routes dict | Multiple conditional edges with priorities    |
+
+## Judge Patterns
+
+**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
+
+- Output rollback logic
+- `_user_has_responded` flags
+- Premature set_output rejection
+- Interaction protocol injection into system prompts
+
+Judges control when an event_loop node's loop exits. Choose based on validation needs.
+
+### Implicit Judge (Default)
+
+When no judge is configured, the implicit judge ACCEPTs when:
+
+- The LLM finishes its response with no tool calls
+- All required output keys have been set via `set_output`
+
+Best for simple nodes where "all outputs set" is sufficient validation.
+
+### SchemaJudge
+
+Validates outputs against a Pydantic model. Use when you need structural validation.
+
+```python
+from pydantic import BaseModel
+
+class ScannerOutput(BaseModel):
+    github_users: list[dict]  # Must be a list of user objects
+
+class SchemaJudge:
+    def __init__(self, output_model: type[BaseModel]):
+        self._model = output_model
+
+    async def evaluate(self, context: dict) -> JudgeVerdict:
+        missing = context.get("missing_keys", [])
+        if missing:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
+            )
+        try:
+            self._model.model_validate(context["output_accumulator"])
+            return JudgeVerdict(action="ACCEPT")
+        except ValidationError as e:
+            return JudgeVerdict(action="RETRY", feedback=str(e))
+```
+
+### When to Use Which Judge
+
+| Judge           | Use When                              | Example                |
+| --------------- | ------------------------------------- | ---------------------- |
+| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
+| SchemaJudge     | Need structural validation of outputs | API response parsing   |
+| Custom          | Domain-specific validation logic      | Score must be 0.0-1.0  |
+
+## Fan-Out / Fan-In (Parallel Execution)
+
+Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
+
+```python
+# Scanner fans out to Profiler and Scorer in parallel
+EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
+         condition=EdgeCondition.ON_SUCCESS)
+
+# Both fan in to Extractor
+EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+```
+
+**Requirements:**
+
+- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
+- Only one parallel branch may contain a `client_facing` node
+- Fan-in node receives outputs from all completed branches in shared memory
+
+## Context Management Patterns
+
+### Tiered Compaction
+
+EventLoopNode automatically manages context window usage with tiered compaction:
+
+1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
+2. **Normal compaction** — LLM summarizes older messages
+3. **Aggressive compaction** — Keeps only recent messages + summary
+4. **Emergency** — Hard reset with tool history preservation
+
+### Spillover Pattern
+
+The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
+
+For explicit data management, use the data tools (real MCP tools, not synthetic):
+
+```python
+# save_data, load_data, list_data_files, serve_file_to_user are real MCP tools
+# data_dir is auto-injected by the framework — the LLM never sees it
+
+# Saving large results
+save_data(filename="sources.json", data=large_json_string)
+
+# Reading with pagination (line-based offset/limit)
+load_data(filename="sources.json", offset=0, limit=50)
+
+# Listing available files
+list_data_files()
+
+# Serving a file to the user as a clickable link
+serve_file_to_user(filename="report.html", label="Research Report")
+```
+
+Add data tools to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+`data_dir` is a framework context parameter — auto-injected at call time. `GraphExecutor.execute()` sets it per-execution via `ToolRegistry.set_execution_context(data_dir=...)` (using `contextvars` for concurrency safety), ensuring it matches the session-scoped spillover directory.
+
+## Anti-Patterns
+
+### What NOT to Do
+
+- **Don't rely on `export_graph`** — Write files immediately, not at end
+- **Don't hide code in session** — Write to files as components are approved
+- **Don't wait to write files** — Agent visible from first step
+- **Don't batch everything** — Write incrementally, one component at a time
+- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
+- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
+
+### Fewer, Richer Nodes
+
+A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
+
+| Bad (8 thin nodes)  | Good (4 rich nodes)                 |
+| ------------------- | ----------------------------------- |
+| parse-query         | intake (client-facing)              |
+| search-sources      | research (search + fetch + analyze) |
+| fetch-content       | review (client-facing)              |
+| evaluate-sources    | report (write + deliver)            |
+| synthesize-findings |                                     |
+| write-report        |                                     |
+| quality-check       |                                     |
+| save-report         |                                     |
+
+**Why fewer nodes are better:**
+
+- The LLM retains full context of its work within a single node
+- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
+- Fewer edges means simpler graph and fewer failure points
+- Data tools (`save_data`/`load_data`) handle context window limits within a single node
+
+### MCP Tools - Correct Usage
+
+**MCP tools OK for:**
+
+- `test_node` — Validate node configuration with mock inputs
+- `validate_graph` — Check graph structure
+- `configure_loop` — Set event loop parameters
+- `create_session` — Track session state for bookkeeping
+
+**Just don't:** Use MCP as the primary construction method or rely on export_graph
+
+## Error Handling Patterns
+
+### Graceful Failure with Fallback
+
+```python
+edges = [
+    # Success path
+    EdgeSpec(id="api-success", source="api-call", target="process-results",
+             condition=EdgeCondition.ON_SUCCESS),
+    # Fallback on failure
+    EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
+    # Report if fallback also fails
+    EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
+]
+```
+
+## Handoff to Testing
+
+When agent is complete, transition to testing phase:
+
+### Pre-Testing Checklist
+
+- [ ] Agent structure validates: `uv run python -m agent_name validate`
+- [ ] All nodes defined in nodes/**init**.py
+- [ ] All edges connect valid nodes with correct priorities
+- [ ] Feedback edge targets have `max_node_visits > 1`
+- [ ] Client-facing nodes have meaningful system prompts
+- [ ] Agent can be imported: `from exports.agent_name import default_agent`
+
+## Related Skills
+
+- **hive-concepts** — Fundamental concepts (node types, edges, event loop architecture)
+- **hive-create** — Step-by-step building process
+- **hive-test** — Test and validate agents
+- **hive** — Complete workflow orchestrator
+
+---
+
+**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
@@ -0,0 +1,940 @@
+---
+name: hive-test
+description: Iterative agent testing with session recovery. Execute, analyze, fix, resume from checkpoints. Use when testing an agent, debugging test failures, or verifying fixes without re-running from scratch.
+---
+
+# Agent Testing
+
+Test agents iteratively: execute, analyze failures, fix, resume from checkpoint, repeat.
+
+## When to Use
+
+- Testing a newly built agent against its goal
+- Debugging a failing agent iteratively
+- Verifying fixes without re-running expensive early nodes
+- Running final regression tests before deployment
+
+## Prerequisites
+
+1. Agent package at `exports/{agent_name}/` (built with `/hive-create`)
+2. Credentials configured (`/hive-credentials`)
+3. `ANTHROPIC_API_KEY` set (or appropriate LLM provider key)
+
+**Path distinction** (critical — don't confuse these):
+- `exports/{agent_name}/` — agent source code (edit here)
+- `~/.hive/agents/{agent_name}/` — runtime data: sessions, checkpoints, logs (read here)
+
+---
+
+## The Iterative Test Loop
+
+This is the core workflow. Don't re-run the entire agent when a late node fails — analyze, fix, and resume from the last clean checkpoint.
+
+```
+┌──────────────────────────────────────┐
+│ PHASE 1: Generate Test Scenarios     │
+│ Goal → synthetic test inputs + tests │
+└──────────────┬───────────────────────┘
+               ↓
+┌──────────────────────────────────────┐
+│ PHASE 2: Execute                     │◄────────────────┐
+│ Run agent (CLI or pytest)            │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+          Pass? ──yes──► PHASE 6: Final Verification     │
+               │                                         │
+               no                                        │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 3: Analyze                     │                 │
+│ Session + runtime logs + checkpoints │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 4: Fix                         │                 │
+│ Prompt / code / graph / goal         │                 │
+└──────────────┬───────────────────────┘                 │
+               ↓                                         │
+┌──────────────────────────────────────┐                 │
+│ PHASE 5: Recover & Resume            │─────────────────┘
+│ Checkpoint resume OR fresh re-run    │
+└──────────────────────────────────────┘
+```
+
+---
+
+### Phase 1: Generate Test Scenarios
+
+Create synthetic tests from the agent's goal, constraints, and success criteria.
+
+#### Step 1a: Read the goal
+
+```python
+# Read goal from agent.py
+Read(file_path="exports/{agent_name}/agent.py")
+# Extract the Goal definition and convert to JSON string
+```
+
+#### Step 1b: Get test guidelines
+
+```python
+# Get constraint test guidelines
+generate_constraint_tests(
+    goal_id="your-goal-id",
+    goal_json='{"id": "...", "constraints": [...]}',
+    agent_path="exports/{agent_name}"
+)
+
+# Get success criteria test guidelines
+generate_success_tests(
+    goal_id="your-goal-id",
+    goal_json='{"id": "...", "success_criteria": [...]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/{agent_name}"
+)
+```
+
+These return `file_header`, `test_template`, `constraints_formatted`/`success_criteria_formatted`, and `test_guidelines`. They do NOT generate test code — you write the tests.
+
+#### Step 1c: Write tests
+
+```python
+Write(
+    file_path=result["output_file"],
+    content=result["file_header"] + "\n\n" + your_test_code
+)
+```
+
+#### Test writing rules
+
+- Every test MUST be `async` with `@pytest.mark.asyncio`
+- Every test MUST accept `runner, auto_responder, mock_mode` fixtures
+- Use `await auto_responder.start()` before running, `await auto_responder.stop()` in `finally`
+- Use `await runner.run(input_dict)` — this goes through AgentRunner → AgentRuntime → ExecutionStream
+- Access output via `result.output.get("key")` — NEVER `result.output["key"]`
+- `result.success=True` means no exception, NOT goal achieved — always check output
+- Write 8-15 tests total, not 30+
+- Each real test costs ~3 seconds + LLM tokens
+- NEVER use `default_agent.run()` — it bypasses the runtime (no sessions, no logs, client-facing nodes hang)
+
+#### Step 1d: Check existing tests
+
+Before generating, check if tests already exist:
+
+```python
+list_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+---
+
+### Phase 2: Execute
+
+Two execution paths, use the right one for your situation.
+
+#### Iterative debugging (for complex agents)
+
+Run the agent via CLI. This creates sessions with checkpoints at `~/.hive/agents/{agent_name}/sessions/`:
+
+```bash
+uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
+```
+
+Sessions and checkpoints are saved automatically.
+
+**Client-facing nodes**: Agents with `client_facing=True` nodes (interactive conversation) work in headless mode when run from a real terminal — the agent streams output to stdout and reads user input from stdin via a `>>> ` prompt. In non-interactive shells (like Claude Code's Bash tool), client-facing nodes will hang because there is no stdin. For testing interactive agents from Claude Code, use `run_tests` with mock mode or have the user run the agent manually in their terminal.
+
+#### Automated regression (for CI or final verification)
+
+Use the `run_tests` MCP tool to run all pytest tests:
+
+```python
+run_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+Returns structured results:
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 12, "passed": 10, "failed": 2, "pass_rate": "83.3%"},
+  "test_results": [{"test_name": "test_success_source_diversity", "status": "failed"}],
+  "failures": [{"test_name": "test_success_source_diversity", "details": "..."}]
+}
+```
+
+**Options:**
+```python
+# Run only constraint tests
+run_tests(goal_id, agent_path, test_types='["constraint"]')
+
+# Stop on first failure
+run_tests(goal_id, agent_path, fail_fast=True)
+
+# Parallel execution
+run_tests(goal_id, agent_path, parallel=4)
+```
+
+**Note:** `run_tests` uses `AgentRunner` with `tmp_path` storage, so sessions are isolated per test run. For checkpoint-based recovery with persistent sessions, use CLI execution. Use `run_tests` for quick regression checks and final verification.
+
+---
+
+### Phase 3: Analyze Failures
+
+When a test fails, drill down systematically. Don't guess — use the tools.
+
+#### Step 3a: Get error category
+
+```python
+debug_test(
+    goal_id="your-goal-id",
+    test_name="test_success_source_diversity",
+    agent_path="exports/{agent_name}"
+)
+```
+
+Returns error category (`IMPLEMENTATION_ERROR`, `ASSERTION_FAILURE`, `TIMEOUT`, `IMPORT_ERROR`, `API_ERROR`) plus full traceback and suggestions.
+
+#### Step 3b: Find the failed session
+
+```python
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    status="failed",
+    limit=5
+)
+```
+
+Returns session list with IDs, timestamps, current_node (where it failed), execution_quality.
+
+#### Step 3c: Inspect session state
+
+```python
+get_agent_session_state(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345"
+)
+```
+
+Returns execution path, which node was current, step count, timestamps — but excludes memory values (to avoid context bloat). Shows `memory_keys` and `memory_size` instead.
+
+#### Step 3d: Examine runtime logs (L2/L3)
+
+```python
+# L2: Per-node success/failure, retry counts
+query_runtime_log_details(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    run_id="session_20260209_143022_abc12345",
+    needs_attention_only=True
+)
+
+# L3: Exact LLM responses, tool call inputs/outputs
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    run_id="session_20260209_143022_abc12345",
+    node_id="research"
+)
+```
+
+#### Step 3e: Inspect memory data
+
+```python
+# See what data a node actually produced
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    key="research_results"
+)
+```
+
+#### Step 3f: Find recovery points
+
+```python
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    is_clean="true"
+)
+```
+
+Returns checkpoint summaries with IDs, types (`node_start`, `node_complete`), which node, and `is_clean` flag. Clean checkpoints are safe resume points.
+
+#### Step 3g: Compare checkpoints (optional)
+
+To understand what changed between two points in execution:
+
+```python
+compare_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    checkpoint_id_before="cp_node_complete_research_143030",
+    checkpoint_id_after="cp_node_complete_review_143115"
+)
+```
+
+Returns memory diff (added/removed/changed keys) and execution path diff.
+
+---
+
+### Phase 4: Fix Based on Root Cause
+
+Use the analysis from Phase 3 to determine what to fix and where.
+
+| Root Cause | What to Fix | Where to Edit |
+|------------|------------|---------------|
+| **Prompt issue** — LLM produces wrong output format, misses instructions | Node `system_prompt` | `exports/{agent}/nodes/__init__.py` |
+| **Code bug** — TypeError, KeyError, logic error in Python | Agent code | `exports/{agent}/agent.py`, `nodes/__init__.py` |
+| **Graph issue** — wrong routing, missing edge, bad condition_expr | Edges, node config | `exports/{agent}/agent.py` |
+| **Tool issue** — MCP tool fails, wrong config, missing credential | Tool config | `exports/{agent}/mcp_servers.json`, `/hive-credentials` |
+| **Goal issue** — success criteria too strict/vague, wrong constraints | Goal definition | `exports/{agent}/agent.py` (goal section) |
+| **Test issue** — test expectations don't match actual agent behavior | Test code | `exports/{agent}/tests/test_*.py` |
+
+#### Fix strategies by error category
+
+**IMPLEMENTATION_ERROR** (TypeError, AttributeError, KeyError):
+```python
+# Read the failing code
+Read(file_path="exports/{agent_name}/nodes/__init__.py")
+
+# Fix the bug
+Edit(
+    file_path="exports/{agent_name}/nodes/__init__.py",
+    old_string="results.get('videos')",
+    new_string="(results or {}).get('videos', [])"
+)
+```
+
+**ASSERTION_FAILURE** (test assertions fail but agent ran successfully):
+- Check if the agent's output is actually wrong → fix the prompt
+- Check if the test's expectations are unrealistic → fix the test
+- Use `get_agent_session_memory` to see what the agent actually produced
+
+**TIMEOUT / STALL** (agent runs too long):
+- Check `node_visit_counts` for feedback loops hitting max_node_visits
+- Check L3 logs for tool calls that hang
+- Reduce `max_iterations` in loop_config or fix the prompt to converge faster
+
+**API_ERROR** (connection, rate limit, auth):
+- Verify credentials with `/hive-credentials`
+- Check MCP server configuration
+
+---
+
+### Phase 5: Recover & Resume
+
+After fixing the agent, decide whether to resume or re-run.
+
+#### When to resume from checkpoint
+
+Resume when ALL of these are true:
+- The fix is to a node that comes AFTER existing clean checkpoints
+- Clean checkpoints exist (from a CLI execution with checkpointing)
+- The early nodes are expensive (web scraping, API calls, long LLM chains)
+
+```bash
+# Resume from the last clean checkpoint before the failing node
+uv run hive run exports/{agent_name} \
+  --resume-session session_20260209_143022_abc12345 \
+  --checkpoint cp_node_complete_research_143030
+```
+
+This skips all nodes before the checkpoint and only re-runs the fixed node onward.
+
+#### When to re-run from scratch
+
+Re-run when ANY of these are true:
+- The fix is to the entry node or an early node
+- No checkpoints exist (e.g., agent was run via `run_tests`)
+- The agent is fast (2-3 nodes, completes in seconds)
+- You changed the graph structure (added/removed nodes/edges)
+
+```bash
+uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
+```
+
+#### Inspecting a checkpoint before resuming
+
+```python
+get_agent_checkpoint(
+    agent_work_dir="~/.hive/agents/{agent_name}",
+    session_id="session_20260209_143022_abc12345",
+    checkpoint_id="cp_node_complete_research_143030"
+)
+```
+
+Returns the full checkpoint: shared_memory snapshot, execution_path, current_node, next_node, is_clean.
+
+#### Loop back to Phase 2
+
+After resuming or re-running, check if the fix worked. If not, go back to Phase 3.
+
+---
+
+### Phase 6: Final Verification
+
+Once the iterative fix loop converges (the agent produces correct output), run the full automated test suite:
+
+```python
+run_tests(
+    goal_id="your-goal-id",
+    agent_path="exports/{agent_name}"
+)
+```
+
+All tests should pass. If not, repeat the loop for remaining failures.
+
+---
+
+## Credential Requirements
+
+**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).
+
+### Prerequisites
+
+Before running agent tests, you MUST collect ALL required credentials from the user.
+
+**Step 1: LLM API Key (always required)**
+```bash
+export ANTHROPIC_API_KEY="your-key-here"
+```
+
+**Step 2: Tool-specific credentials (depends on agent's tools)**
+
+Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
+
+```python
+from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
+
+creds = CredentialManager()
+
+# Determine which tools the agent uses (from agent.json or mcp_servers.json)
+agent_tools = [...]  # e.g., ["hubspot_search_contacts", "web_search", ...]
+
+# Find all missing credentials for those tools
+missing = creds.get_missing_for_tools(agent_tools)
+```
+
+Common tool credentials:
+| Tool | Env Var | Help URL |
+|------|---------|----------|
+| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
+| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
+| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
+
+**Why ALL credentials are required:**
+- Tests need to execute the agent's LLM nodes to validate behavior
+- Tools with missing credentials will return error dicts instead of real data
+- Mock mode bypasses everything, providing no confidence in real-world performance
+
+### Mock Mode Limitations
+
+Mock mode (`--mock` flag or `MOCK_MODE=1`) is **ONLY for structure validation**:
+
+- Validates graph structure (nodes, edges, connections)
+- Validates that `AgentRunner.load()` succeeds and the agent is importable
+- Does NOT execute event_loop agents — MockLLMProvider never calls `set_output`, so event_loop nodes loop forever
+- Does NOT test LLM reasoning, content quality, or constraint validation
+- Does NOT test real API integrations or tool use
+
+**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials.
+
+### Enforcing Credentials in Tests
+
+When writing tests, **ALWAYS include credential checks**:
+
+```python
+import os
+import pytest
+from aden_tools.credentials import CredentialManager
+
+pytestmark = pytest.mark.skipif(
+    not CredentialManager().is_available("anthropic") and not os.environ.get("MOCK_MODE"),
+    reason="API key required for real testing. Set ANTHROPIC_API_KEY or use MOCK_MODE=1."
+)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def check_credentials():
+    """Ensure ALL required credentials are set for real testing."""
+    creds = CredentialManager()
+    mock_mode = os.environ.get("MOCK_MODE")
+
+    if not creds.is_available("anthropic"):
+        if mock_mode:
+            print("\nRunning in MOCK MODE - structure validation only")
+        else:
+            pytest.fail(
+                "\nANTHROPIC_API_KEY not set!\n"
+                "Set API key: export ANTHROPIC_API_KEY='your-key-here'\n"
+                "Or run structure validation: MOCK_MODE=1 pytest exports/{agent}/tests/"
+            )
+
+    if not mock_mode:
+        agent_tools = []  # Update per agent
+        missing = creds.get_missing_for_tools(agent_tools)
+        if missing:
+            lines = ["\nMissing tool credentials!"]
+            for name in missing:
+                spec = creds.specs.get(name)
+                if spec:
+                    lines.append(f"  {spec.env_var} - {spec.description}")
+            pytest.fail("\n".join(lines))
+```
+
+### User Communication
+
+When the user asks to test an agent, **ALWAYS check for ALL credentials first**:
+
+1. **Identify the agent's tools** from `mcp_servers.json`
+2. **Check ALL required credentials** using `CredentialManager`
+3. **Ask the user to provide any missing credentials** before proceeding
+4. Collect ALL missing credentials in a single prompt — not one at a time
+
+---
+
+## Safe Test Patterns
+
+### OutputCleaner
+
+The framework automatically validates and cleans node outputs using a fast LLM at edge traversal time. Tests should still use safe patterns because OutputCleaner may not catch all issues.
+
+### Safe Access (REQUIRED)
+
+```python
+# UNSAFE - will crash on missing keys
+approval = result.output["approval_decision"]
+category = result.output["analysis"]["category"]
+
+# SAFE - use .get() with defaults
+output = result.output or {}
+approval = output.get("approval_decision", "UNKNOWN")
+
+# SAFE - type check before operations
+analysis = output.get("analysis", {})
+if isinstance(analysis, dict):
+    category = analysis.get("category", "unknown")
+
+# SAFE - handle JSON parsing trap (LLM response as string)
+import json
+recommendation = output.get("recommendation", "{}")
+if isinstance(recommendation, str):
+    try:
+        parsed = json.loads(recommendation)
+        if isinstance(parsed, dict):
+            approval = parsed.get("approval_decision", "UNKNOWN")
+    except json.JSONDecodeError:
+        approval = "UNKNOWN"
+elif isinstance(recommendation, dict):
+    approval = recommendation.get("approval_decision", "UNKNOWN")
+
+# SAFE - type check before iteration
+items = output.get("items", [])
+if isinstance(items, list):
+    for item in items:
+        ...
+```
+
+### Helper Functions for conftest.py
+
+```python
+import json
+import re
+
+def _parse_json_from_output(result, key):
+    """Parse JSON from agent output (framework may store full LLM response as string)."""
+    response_text = result.output.get(key, "")
+    json_text = re.sub(r'```json\s*|\s*```', '', response_text).strip()
+    try:
+        return json.loads(json_text)
+    except (json.JSONDecodeError, AttributeError, TypeError):
+        return result.output.get(key)
+
+def safe_get_nested(result, key_path, default=None):
+    """Safely get nested value from result.output."""
+    output = result.output or {}
+    current = output
+    for key in key_path:
+        if isinstance(current, dict):
+            current = current.get(key)
+        elif isinstance(current, str):
+            try:
+                json_text = re.sub(r'```json\s*|\s*```', '', current).strip()
+                parsed = json.loads(json_text)
+                if isinstance(parsed, dict):
+                    current = parsed.get(key)
+                else:
+                    return default
+            except json.JSONDecodeError:
+                return default
+        else:
+            return default
+    return current if current is not None else default
+
+# Make available in tests
+pytest.parse_json_from_output = _parse_json_from_output
+pytest.safe_get_nested = safe_get_nested
+```
+
+### ExecutionResult Fields
+
+**`result.success=True` means NO exception, NOT goal achieved**
+
+```python
+# WRONG
+assert result.success
+
+# RIGHT
+assert result.success, f"Agent failed: {result.error}"
+output = result.output or {}
+approval = output.get("approval_decision")
+assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
+```
+
+All fields:
+- `success: bool` — Completed without exception (NOT goal achieved!)
+- `output: dict` — Complete memory snapshot (may contain raw strings)
+- `error: str | None` — Error message if failed
+- `steps_executed: int` — Number of nodes executed
+- `total_tokens: int` — Cumulative token usage
+- `total_latency_ms: int` — Total execution time
+- `path: list[str]` — Node IDs traversed (may repeat in feedback loops)
+- `paused_at: str | None` — Node ID if paused
+- `session_state: dict` — State for resuming
+- `node_visit_counts: dict[str, int]` — Visit counts per node (feedback loop testing)
+- `execution_quality: str` — "clean", "degraded", or "failed"
+
+### Test Count Guidance
+
+**Write 8-15 tests, not 30+**
+
+- 2-3 tests per success criterion
+- 1 happy path test
+- 1 boundary/edge case test
+- 1 error handling test (optional)
+
+Each real test costs ~3 seconds + LLM tokens. 12 tests = ~36 seconds, $0.12.
+
+---
+
+## Test Patterns
+
+### Happy Path
+```python
+@pytest.mark.asyncio
+async def test_happy_path(runner, auto_responder, mock_mode):
+    """Test normal successful execution."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "python tutorials"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    assert output.get("report"), "No report produced"
+```
+
+### Boundary Condition
+```python
+@pytest.mark.asyncio
+async def test_minimum_sources(runner, auto_responder, mock_mode):
+    """Test at minimum source threshold."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "niche topic"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    sources = output.get("sources", [])
+    if isinstance(sources, list):
+        assert len(sources) >= 3, f"Expected >= 3 sources, got {len(sources)}"
+```
+
+### Error Handling
+```python
+@pytest.mark.asyncio
+async def test_empty_input(runner, auto_responder, mock_mode):
+    """Test graceful handling of empty input."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": ""})
+    finally:
+        await auto_responder.stop()
+    # Agent should either fail gracefully or produce an error message
+    output = result.output or {}
+    assert not result.success or output.get("error"), "Should handle empty input"
+```
+
+### Feedback Loop
+```python
+@pytest.mark.asyncio
+async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
+    """Test that feedback loops don't run forever."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "test"})
+    finally:
+        await auto_responder.stop()
+    visits = result.node_visit_counts or {}
+    for node_id, count in visits.items():
+        assert count <= 5, f"Node {node_id} visited {count} times — possible infinite loop"
+```
+
+---
+
+## MCP Tool Reference
+
+### Phase 1: Test Generation
+
+```python
+# Check existing tests
+list_tests(goal_id, agent_path)
+
+# Get constraint test guidelines (returns templates, NOT generated tests)
+generate_constraint_tests(goal_id, goal_json, agent_path)
+# Returns: output_file, file_header, test_template, constraints_formatted, test_guidelines
+
+# Get success criteria test guidelines
+generate_success_tests(goal_id, goal_json, node_names, tool_names, agent_path)
+# Returns: output_file, file_header, test_template, success_criteria_formatted, test_guidelines
+```
+
+### Phase 2: Execution
+
+```python
+# Automated regression (no checkpoints, fresh runs)
+run_tests(goal_id, agent_path, test_types='["all"]', parallel=-1, fail_fast=False)
+
+# Run only specific test types
+run_tests(goal_id, agent_path, test_types='["constraint"]')
+run_tests(goal_id, agent_path, test_types='["success"]')
+```
+
+```bash
+# Iterative debugging with checkpoints (via CLI)
+uv run hive run exports/{agent_name} --input '{"query": "test"}'
+```
+
+### Phase 3: Analysis
+
+```python
+# Debug a specific failed test
+debug_test(goal_id, test_name, agent_path)
+
+# Find failed sessions
+list_agent_sessions(agent_work_dir, status="failed", limit=5)
+
+# Inspect session state (excludes memory values)
+get_agent_session_state(agent_work_dir, session_id)
+
+# Inspect memory data
+get_agent_session_memory(agent_work_dir, session_id, key="research_results")
+
+# Runtime logs: L1 summaries
+query_runtime_logs(agent_work_dir, status="needs_attention")
+
+# Runtime logs: L2 per-node details
+query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
+
+# Runtime logs: L3 tool/LLM raw data
+query_runtime_log_raw(agent_work_dir, run_id, node_id="research")
+
+# Find clean checkpoints
+list_agent_checkpoints(agent_work_dir, session_id, is_clean="true")
+
+# Compare checkpoints (memory diff)
+compare_agent_checkpoints(agent_work_dir, session_id, cp_before, cp_after)
+```
+
+### Phase 5: Recovery
+
+```python
+# Inspect checkpoint before resuming
+get_agent_checkpoint(agent_work_dir, session_id, checkpoint_id)
+# Empty checkpoint_id = latest checkpoint
+```
+
+```bash
+# Resume from checkpoint via CLI (headless)
+uv run hive run exports/{agent_name} \
+  --resume-session {session_id} --checkpoint {checkpoint_id}
+```
+
+---
+
+## Anti-Patterns
+
+| Don't | Do Instead |
+|-------|-----------|
+| Use `default_agent.run()` in tests | Use `runner.run()` with `auto_responder` fixtures (goes through AgentRuntime) |
+| Re-run entire agent when a late node fails | Resume from last clean checkpoint |
+| Treat `result.success` as goal achieved | Check `result.output` for actual criteria |
+| Access `result.output["key"]` directly | Use `result.output.get("key")` |
+| Fix random things hoping tests pass | Analyze L2/L3 logs to find root cause first |
+| Write 30+ tests | Write 8-15 focused tests |
+| Skip credential check | Use `/hive-credentials` before testing |
+| Confuse `exports/` with `~/.hive/agents/` | Code in `exports/`, runtime data in `~/.hive/` |
+| Use `run_tests` for iterative debugging | Use headless CLI with checkpoints for iterative debugging |
+| Use headless CLI for final regression | Use `run_tests` for automated regression |
+| Use `--tui` from Claude Code | Use headless `run` command — TUI hangs in non-interactive shells |
+| Test client-facing nodes from Claude Code | Use mock mode, or have the user run the agent in their terminal |
+| Run tests without reading goal first | Always understand the goal before writing tests |
+| Skip Phase 3 analysis and guess | Use session + log tools to identify root cause |
+
+---
+
+## Example Walkthrough: Deep Research Agent
+
+A complete iteration showing the test loop for an agent with nodes: `intake → research → review → report`.
+
+### Phase 1: Generate tests
+
+```python
+# Read the goal
+Read(file_path="exports/deep_research_agent/agent.py")
+
+# Get success criteria test guidelines
+result = generate_success_tests(
+    goal_id="rigorous-interactive-research",
+    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "target": ">=5"}, {"id": "citation-coverage", "target": "100%"}, {"id": "report-completeness", "target": "90%"}]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/deep_research_agent"
+)
+
+# Write tests
+Write(
+    file_path=result["output_file"],
+    content=result["file_header"] + "\n\n" + test_code
+)
+```
+
+### Phase 2: First execution
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+Result: `test_success_source_diversity` fails — agent only found 2 sources instead of 5.
+
+### Phase 3: Analyze
+
+```python
+# Debug the failing test
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_source_diversity",
+    agent_path="exports/deep_research_agent"
+)
+# → ASSERTION_FAILURE: Expected >= 5 sources, got 2
+
+# Find the session
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_150000_abc12345
+
+# See what the research node produced
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    key="research_results"
+)
+# → Only 2 web_search calls made, each returned 1 source
+
+# Check the LLM's behavior in the research node
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    run_id="session_20260209_150000_abc12345",
+    node_id="research"
+)
+# → LLM called web_search only twice, then called set_output
+```
+
+Root cause: The research node's prompt doesn't tell the LLM to search for at least 5 diverse sources. It stops after the first couple of searches.
+
+### Phase 4: Fix the prompt
+
+```python
+Read(file_path="exports/deep_research_agent/nodes/__init__.py")
+
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Search for information on the user\'s topic."',
+    new_string='system_prompt="Search for information on the user\'s topic. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries to ensure source diversity. Do not stop searching until you have at least 5 distinct sources."'
+)
+```
+
+### Phase 5: Resume from checkpoint
+
+For this example, the fix is to the `research` node. If we had run via CLI with checkpointing, we could resume from the checkpoint after `intake` to skip re-running intake:
+
+```bash
+# Check if clean checkpoint exists after intake
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    is_clean="true"
+)
+# → cp_node_complete_intake_150005
+
+# Resume from after intake, re-run research with fixed prompt
+uv run hive run exports/deep_research_agent \
+  --resume-session session_20260209_150000_abc12345 \
+  --checkpoint cp_node_complete_intake_150005
+```
+
+Or for this simple case (intake is fast), just re-run:
+
+```bash
+uv run hive run exports/deep_research_agent --input '{"topic": "test"}'
+```
+
+### Phase 6: Final verification
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent"
+)
+# → All 12 tests pass
+```
+
+---
+
+## Test File Structure
+
+```
+exports/{agent_name}/
+├── agent.py              ← Agent to test (goal, nodes, edges)
+├── nodes/__init__.py     ← Node implementations (prompts, config)
+├── config.py             ← Agent configuration
+├── mcp_servers.json      ← Tool server config
+└── tests/
+    ├── conftest.py           ← Shared fixtures + safe access helpers
+    ├── test_constraints.py   ← Constraint tests
+    ├── test_success_criteria.py  ← Success criteria tests
+    └── test_edge_cases.py    ← Edge case tests
+```
+
+## Integration with Other Skills
+
+| Scenario | From | To | Action |
+|----------|------|----|--------|
+| Agent built, ready to test | `/hive-create` | `/hive-test` | Generate tests, start loop |
+| Prompt fix needed | `/hive-test` Phase 4 | Direct edit | Edit `nodes/__init__.py`, resume |
+| Goal definition wrong | `/hive-test` Phase 4 | `/hive-create` | Update goal, may need rebuild |
+| Missing credentials | `/hive-test` Phase 3 | `/hive-credentials` | Set up credentials |
+| Complex runtime failure | `/hive-test` Phase 3 | `/hive-debugger` | Deep L1/L2/L3 analysis |
+| All tests pass | `/hive-test` Phase 6 | Done | Agent validated |
@@ -0,0 +1,333 @@
+# Example: Iterative Testing of a Research Agent
+
+This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
+
+## Agent Structure
+
+```
+exports/deep_research_agent/
+├── agent.py          # Goal + graph: intake → research → review → report
+├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
+├── config.py         # Model config
+├── mcp_servers.json  # Tools: web_search, web_scrape
+└── tests/            # Test files (we'll create these)
+```
+
+**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
+
+---
+
+## Phase 1: Generate Tests
+
+### Read the goal
+
+```python
+Read(file_path="exports/deep_research_agent/agent.py")
+# Extract: goal_id="rigorous-interactive-research"
+# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
+# constraints: no-hallucination, source-attribution
+```
+
+### Get test guidelines
+
+```python
+result = generate_success_tests(
+    goal_id="rigorous-interactive-research",
+    goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
+    node_names="intake,research,review,report",
+    tool_names="web_search,web_scrape",
+    agent_path="exports/deep_research_agent"
+)
+```
+
+### Write tests
+
+```python
+Write(
+    file_path="exports/deep_research_agent/tests/test_success_criteria.py",
+    content=result["file_header"] + '''
+
+@pytest.mark.asyncio
+async def test_success_source_diversity(runner, auto_responder, mock_mode):
+    """At least 5 diverse sources are found."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "impact of remote work on productivity"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    sources = output.get("sources", [])
+    if isinstance(sources, list):
+        assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
+
+@pytest.mark.asyncio
+async def test_success_citation_coverage(runner, auto_responder, mock_mode):
+    """Every factual claim in the report cites its source."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "climate change effects on agriculture"})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    report = output.get("report", "")
+    # Check that report contains numbered references
+    assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
+
+@pytest.mark.asyncio
+async def test_success_report_completeness(runner, auto_responder, mock_mode):
+    """Report addresses the original research question."""
+    query = "pros and cons of nuclear energy"
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": query})
+    finally:
+        await auto_responder.stop()
+    assert result.success, f"Agent failed: {result.error}"
+    output = result.output or {}
+    report = output.get("report", "")
+    assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
+
+@pytest.mark.asyncio
+async def test_empty_query_handling(runner, auto_responder, mock_mode):
+    """Agent handles empty input gracefully."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": ""})
+    finally:
+        await auto_responder.stop()
+    output = result.output or {}
+    assert not result.success or output.get("error"), "Should handle empty query"
+
+@pytest.mark.asyncio
+async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
+    """Feedback loop between review and research terminates."""
+    await auto_responder.start()
+    try:
+        result = await runner.run({"query": "quantum computing basics"})
+    finally:
+        await auto_responder.stop()
+    visits = result.node_visit_counts or {}
+    for node_id, count in visits.items():
+        assert count <= 5, f"Node {node_id} visited {count} times"
+'''
+)
+```
+
+---
+
+## Phase 2: First Execution
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
+  "failures": [
+    {"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
+    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
+  ]
+}
+```
+
+---
+
+## Phase 3: Analyze (Iteration 1)
+
+### Debug the first failure
+
+```python
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_source_diversity",
+    agent_path="exports/deep_research_agent"
+)
+# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
+```
+
+### Find the session and inspect memory
+
+```python
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_150000_abc12345
+
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_150000_abc12345",
+    key="research_results"
+)
+# → Only 2 sources found. LLM stopped searching after 2 queries.
+```
+
+### Check LLM behavior in the research node
+
+```python
+query_runtime_log_raw(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    run_id="session_20260209_150000_abc12345",
+    node_id="research"
+)
+# → LLM called web_search twice, got results, immediately called set_output.
+# → Prompt doesn't instruct it to find at least 5 sources.
+```
+
+**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
+
+---
+
+## Phase 4: Fix (Iteration 1)
+
+```python
+Read(file_path="exports/deep_research_agent/nodes/__init__.py")
+
+# Fix the research node prompt
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Search for information on the user\'s topic using web search."',
+    new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
+)
+```
+
+---
+
+## Phase 5: Recover & Resume (Iteration 1)
+
+The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent",
+    fail_fast=True
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": false,
+  "summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
+  "failures": [
+    {"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
+  ]
+}
+```
+
+Source diversity now passes. Citation coverage still fails.
+
+---
+
+## Phase 3: Analyze (Iteration 2)
+
+```python
+debug_test(
+    goal_id="rigorous-interactive-research",
+    test_name="test_success_citation_coverage",
+    agent_path="exports/deep_research_agent"
+)
+# Category: ASSERTION_FAILURE — Report lacks citations
+
+# Check what the report node produced
+list_agent_sessions(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    status="completed",
+    limit=1
+)
+# → session_20260209_151500_def67890
+
+get_agent_session_memory(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_151500_def67890",
+    key="report"
+)
+# → Report text exists but uses no numbered references.
+# → Sources are in memory but report node doesn't cite them.
+```
+
+**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
+
+---
+
+## Phase 4: Fix (Iteration 2)
+
+```python
+Edit(
+    file_path="exports/deep_research_agent/nodes/__init__.py",
+    old_string='system_prompt="Write a comprehensive report based on the research findings."',
+    new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
+)
+```
+
+---
+
+## Phase 5: Resume (Iteration 2)
+
+The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
+
+```bash
+# Run via CLI to get checkpoints
+uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
+
+# After it runs, find the clean checkpoint before report
+list_agent_checkpoints(
+    agent_work_dir="~/.hive/agents/deep_research_agent",
+    session_id="session_20260209_152000_ghi34567",
+    is_clean="true"
+)
+# → cp_node_complete_review_152100 (after review, before report)
+
+# Resume — skips intake, research, review entirely
+uv run hive run exports/deep_research_agent \
+  --resume-session session_20260209_152000_ghi34567 \
+  --checkpoint cp_node_complete_review_152100
+```
+
+Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
+
+---
+
+## Phase 6: Final Verification
+
+```python
+run_tests(
+    goal_id="rigorous-interactive-research",
+    agent_path="exports/deep_research_agent"
+)
+```
+
+**Result:**
+```json
+{
+  "overall_passed": true,
+  "summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
+}
+```
+
+All tests pass.
+
+---
+
+## Summary
+
+| Iteration | Failure | Root Cause | Fix | Recovery |
+|-----------|---------|------------|-----|----------|
+| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
+| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
+
+**Key takeaways:**
+- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
+- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
+- Final `run_tests` confirms all scenarios pass end-to-end
@@ -1,32 +1,53 @@
 ---
-name: agent-workflow
-description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates building-agents-* and testing-agent skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
+name: hive
+description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates hive-* skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
 license: Apache-2.0
 metadata:
  author: hive
  version: "2.0"
  type: workflow-orchestrator
  orchestrates:
-    - building-agents-core
-    - building-agents-construction
-    - building-agents-patterns
-    - testing-agent
-    - setup-credentials
+    - hive-concepts
+    - hive-create
+    - hive-patterns
+    - hive-test
+    - hive-credentials
+    - hive-debugger
 ---

 # Agent Development Workflow

+**THIS IS AN EXECUTABLE WORKFLOW. DO NOT explore the codebase or read source files. ROUTE to the correct skill IMMEDIATELY.**
+
+When this skill is loaded, **ALWAYS use the AskUserQuestion tool** to present options:
+
+```
+Use AskUserQuestion with these options:
+- "Build a new agent" → Then invoke /hive-create
+- "Test an existing agent" → Then invoke /hive-test
+- "Learn agent concepts" → Then invoke /hive-concepts
+- "Optimize agent design" → Then invoke /hive-patterns
+- "Set up credentials" → Then invoke /hive-credentials
+- "Debug a failing agent" → Then invoke /hive-debugger
+- "Other" (please describe what you want to achieve)
+```
+
+**DO NOT:** Read source files, explore the codebase, search for code, or do any investigation before routing. The sub-skills handle all of that.
+
+---
+
 Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.

 ## Overview

 This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:

-1. **Understand Concepts** → `/building-agents-core` (optional)
-2. **Build Structure** → `/building-agents-construction`
-3. **Optimize Design** → `/building-agents-patterns` (optional)
-4. **Setup Credentials** → `/setup-credentials` (if agent uses tools requiring API keys)
-5. **Test & Validate** → `/testing-agent`
+1. **Understand Concepts** → `/hive-concepts` (optional)
+2. **Build Structure** → `/hive-create`
+3. **Optimize Design** → `/hive-patterns` (optional)
+4. **Setup Credentials** → `/hive-credentials` (if agent uses tools requiring API keys)
+5. **Test & Validate** → `/hive-test`
+6. **Debug Issues** → `/hive-debugger` (if agent fails at runtime)

 ## When to Use This Workflow

@@ -37,25 +58,26 @@ Use this meta-skill when:
 - Want consistent, repeatable agent builds

 **Skip this workflow** if:
- You only need to test an existing agent → use `/testing-agent` directly
+- You only need to test an existing agent → use `/hive-test` directly
 - You know exactly which phase you're in → use specific skill directly

 ## Quick Decision Tree

 ```
-"Need to understand agent concepts" → building-agents-core
-"Build a new agent" → building-agents-construction
-"Optimize my agent design" → building-agents-patterns
-"Set up API keys for my agent" → setup-credentials
-"Test my agent" → testing-agent
+"Need to understand agent concepts" → hive-concepts
+"Build a new agent" → hive-create
+"Optimize my agent design" → hive-patterns
+"Need client-facing nodes or feedback loops" → hive-patterns
+"Set up API keys for my agent" → hive-credentials
+"Test my agent" → hive-test
+"My agent is failing/stuck/has errors" → hive-debugger
 "Not sure what I need" → Read phases below, then decide
 "Agent has structure but needs implementation" → See agent directory STATUS.md
 ```

 ## Phase 0: Understand Concepts (Optional)

-**Duration**: 5-10 minutes
-**Skill**: `/building-agents-core`
+**Skill**: `/hive-concepts`
 **Input**: Questions about agent architecture

 ### When to Use
@@ -63,12 +85,12 @@ Use this meta-skill when:
 - First time building an agent
 - Need to understand node types, edges, goals
 - Want to validate tool availability
- Learning about pause/resume architecture
+- Learning about event loop architecture and client-facing nodes

 ### What This Phase Provides

 - Architecture overview (Python packages, not JSON)
- Core concepts (Goal, Node, Edge, Pause/Resume)
+- Core concepts (Goal, Node, Edge, Event Loop, Judges)
 - Tool discovery and validation procedures
 - Workflow overview

@@ -76,9 +98,8 @@ Use this meta-skill when:

 ## Phase 1: Build Agent Structure

-**Duration**: 15-30 minutes
-**Skill**: `/building-agents-construction`
-**Input**: User requirements ("Build an agent that...")
+**Skill**: `/hive-create`
+**Input**: User requirements ("Build an agent that...") or a template to start from

 ### What This Phase Does

@@ -106,7 +127,7 @@ Creates the complete agent architecture:
 - ✅ 1-5 constraints defined
 - ✅ 5-10 nodes specified in nodes/__init__.py
 - ✅ 8-15 edges connecting workflow
- ✅ Validated structure (passes `python -m agent_name validate`)
+- ✅ Validated structure (passes `uv run python -m agent_name validate`)
 - ✅ README.md with usage instructions
 - ✅ CLI commands (info, validate, run, shell)

@@ -120,7 +141,7 @@ You're ready for Phase 2 when:

 ### Common Outputs

-The building-agents-construction skill produces:
+The hive-create skill produces:
 ```
 exports/agent_name/
 ├── __init__.py          (package exports)
@@ -140,53 +161,52 @@ exports/agent_name/
 → You may need to add Python functions or MCP tools (not covered by current skills)

 **If want to optimize design:**
-→ Proceed to Phase 1.5 (building-agents-patterns)
+→ Proceed to Phase 1.5 (hive-patterns)

 **If ready to test:**
 → Proceed to Phase 2

 ## Phase 1.5: Optimize Design (Optional)

-**Duration**: 10-15 minutes
-**Skill**: `/building-agents-patterns`
+**Skill**: `/hive-patterns`
 **Input**: Completed agent structure

 ### When to Use

- Want to add pause/resume functionality
+- Want to add client-facing blocking or feedback edges
+- Need judge patterns for output validation
+- Want fan-out/fan-in (parallel execution)
 - Need error handling patterns
- Want to optimize performance
- Need examples of complex routing
 - Want best practices guidance

 ### What This Phase Provides

- Practical examples and patterns
- Pause/resume architecture
- Error handling strategies
+- Client-facing interaction patterns
+- Feedback edge routing with nullable output keys
+- Judge patterns (implicit, SchemaJudge)
+- Fan-out/fan-in parallel execution
+- Context management and spillover patterns
 - Anti-patterns to avoid
- Performance optimization techniques

 **Skip this phase** if your agent design is straightforward.

 ## Phase 2: Test & Validate

-**Duration**: 20-40 minutes
-**Skill**: `/testing-agent`
+**Skill**: `/hive-test`
 **Input**: Working agent from Phase 1

 ### What This Phase Does

-Creates comprehensive test suite:
- Constraint tests (verify hard requirements)
- Success criteria tests (measure goal achievement)
- Edge case tests (handle failures gracefully)
- Integration tests (end-to-end workflows)
+Guides the creation and execution of a comprehensive test suite:
+- Constraint tests
+- Success criteria tests
+- Edge case tests
+- Integration tests

 ### Process

 1. **Analyze agent** - Read goal, constraints, success criteria
-2. **Generate tests** - Create pytest files in `exports/agent_name/tests/`
+2. **Generate tests** - The calling agent writes pytest files in `exports/agent_name/tests/` using hive-test guidelines and templates
 3. **User approval** - Review and approve each test
 4. **Run evaluation** - Execute tests and collect results
 5. **Debug failures** - Identify and fix issues
@@ -249,9 +269,9 @@ You're done when:

 ```
 User: "Build an agent that monitors files"
-→ Use /building-agents-construction
+→ Use /hive-create
 → Agent structure created
-→ Use /testing-agent
+→ Use /hive-test
 → Tests created and passing
 → Done: Production-ready agent
 ```
@@ -260,19 +280,32 @@ User: "Build an agent that monitors files"

 ```
 User: "Build an agent (first time)"
-→ Use /building-agents-core (understand concepts)
-→ Use /building-agents-construction (build structure)
-→ Use /building-agents-patterns (optimize design)
-→ Use /testing-agent (validate)
+→ Use /hive-concepts (understand concepts)
+→ Use /hive-create (build structure)
+→ Use /hive-patterns (optimize design)
+→ Use /hive-test (validate)
 → Done: Production-ready agent
 ```

+### Pattern 1c: Build from Template
+
+```
+User: "Build an agent based on the deep research template"
+→ Use /hive-create
+→ Select "From a template" path
+→ Pick template, name new agent
+→ Review/modify goal, nodes, graph
+→ Agent exported with customizations
+→ Use /hive-test
+→ Done: Customized agent
+```
+
 ### Pattern 2: Test Existing Agent

 ```
 User: "Test my agent at exports/my_agent"
 → Skip Phase 1
-→ Use /testing-agent directly
+→ Use /hive-test directly
 → Tests created
 → Done: Validated agent
 ```
@@ -281,58 +314,71 @@ User: "Test my agent at exports/my_agent"

 ```
 User: "Build an agent"
-→ Use /building-agents-construction (Phase 1)
+→ Use /hive-create (Phase 1)
 → Implementation needed (see STATUS.md)
 → [User implements functions]
-→ Use /testing-agent (Phase 2)
+→ Use /hive-test (Phase 2)
 → Tests reveal bugs
 → [Fix bugs manually]
 → Re-run tests
 → Done: Working agent
 ```

-### Pattern 4: Complex Agent with Patterns
+### Pattern 4: Agent with Review Loops and HITL Checkpoints

 ```
-User: "Build an agent with multi-turn conversations"
-→ Use /building-agents-core (learn pause/resume)
-→ Use /building-agents-construction (build structure)
-→ Use /building-agents-patterns (implement pause/resume pattern)
-→ Use /testing-agent (validate conversation flows)
-→ Done: Complex conversational agent
+User: "Build an agent with human review and feedback loops"
+→ Use /hive-concepts (learn event loop, client-facing nodes)
+→ Use /hive-create (build structure with feedback edges)
+→ Use /hive-patterns (implement client-facing + feedback patterns)
+→ Use /hive-test (validate review flows and edge routing)
+→ Done: Agent with HITL checkpoints and review loops
 ```

 ## Skill Dependencies

 ```
-agent-workflow (meta-skill)
+hive (meta-skill)
    │
-    ├── building-agents-core (foundational)
-    │   ├── Architecture concepts
-    │   ├── Node/Edge/Goal definitions
+    ├── hive-concepts (foundational)
+    │   ├── Architecture concepts (event loop, judges)
+    │   ├── Node types (event_loop, function)
+    │   ├── Edge routing and priority
    │   ├── Tool discovery procedures
    │   └── Workflow overview
    │
-    ├── building-agents-construction (procedural)
+    ├── hive-create (procedural)
    │   ├── Creates package structure
    │   ├── Defines goal
-    │   ├── Adds nodes incrementally
-    │   ├── Connects edges
+    │   ├── Adds nodes (event_loop, function)
+    │   ├── Connects edges with priority routing
    │   ├── Finalizes agent class
-    │   └── Requires: building-agents-core
+    │   └── Requires: hive-concepts
    │
-    ├── building-agents-patterns (reference)
-    │   ├── Best practices
-    │   ├── Pause/resume patterns
-    │   ├── Error handling
-    │   ├── Anti-patterns
-    │   └── Performance optimization
+    ├── hive-patterns (reference)
+    │   ├── Client-facing interaction patterns
+    │   ├── Feedback edges and review loops
+    │   ├── Judge patterns (implicit, SchemaJudge)
+    │   ├── Fan-out/fan-in parallel execution
+    │   └── Context management and anti-patterns
    │
-    └── testing-agent
-        ├── Reads agent goal
-        ├── Generates tests
-        ├── Runs evaluation
-        └── Reports results
+    ├── hive-credentials (utility)
+    │   ├── Detects missing credentials
+    │   ├── Offers auth method choices (Aden OAuth, direct API key)
+    │   ├── Stores securely in ~/.hive/credentials
+    │   └── Validates with health checks
+    │
+    ├── hive-test (validation)
+    │   ├── Reads agent goal
+    │   ├── Generates tests
+    │   ├── Runs evaluation
+    │   └── Reports results
+    │
+    └── hive-debugger (troubleshooting)
+        ├── Monitors runtime logs (L1/L2/L3)
+        ├── Identifies retry loops, tool failures
+        ├── Categorizes issues (10 categories)
+        └── Provides fix recommendations
 ```

 ## Troubleshooting
@@ -342,13 +388,13 @@ agent-workflow (meta-skill)
 - Check node IDs match between nodes/__init__.py and agent.py
 - Verify all edges reference valid node IDs
 - Ensure entry_node exists in nodes list
- Run: `PYTHONPATH=core:exports python -m agent_name validate`
+- Run: `PYTHONPATH=exports uv run python -m agent_name validate`

 ### "Agent has structure but won't run"

 - Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
 - Implementation may be needed (Python functions or MCP tools)
- This is expected - building-agents-construction creates structure, not implementation
+- This is expected - hive-create creates structure, not implementation
 - See implementation guide for completion options

 ### "Tests are failing"
@@ -356,9 +402,16 @@ agent-workflow (meta-skill)
 - Review test output for specific failures
 - Check agent goal and success criteria
 - Verify constraints are met
- Use `/testing-agent` to debug and iterate
+- Use `/hive-test` to debug and iterate
 - Fix agent code and re-run tests

+### "Agent is failing at runtime"
+
+- Use `/hive-debugger` to analyze runtime logs
+- The debugger identifies retry loops, tool failures, and stalled execution
+- Get actionable fix recommendations with code changes
+- Monitor the agent in real-time during TUI sessions
+
 ### "Not sure which phase I'm in"

 Run these checks:
@@ -368,7 +421,7 @@ Run these checks:
 ls exports/my_agent/agent.py

 # Check if it validates
-PYTHONPATH=core:exports python -m my_agent validate
+PYTHONPATH=exports uv run python -m my_agent validate

 # Check if tests exist
 ls exports/my_agent/tests/
@@ -417,10 +470,10 @@ You're done with the workflow when:

 ## Additional Resources

- **building-agents-core**: See `.claude/skills/building-agents-core/SKILL.md`
- **building-agents-construction**: See `.claude/skills/building-agents-construction/SKILL.md`
- **building-agents-patterns**: See `.claude/skills/building-agents-patterns/SKILL.md`
- **testing-agent**: See `.claude/skills/testing-agent/SKILL.md`
+- **hive-concepts**: See `.claude/skills/hive-concepts/SKILL.md`
+- **hive-create**: See `.claude/skills/hive-create/SKILL.md`
+- **hive-patterns**: See `.claude/skills/hive-patterns/SKILL.md`
+- **hive-test**: See `.claude/skills/hive-test/SKILL.md`
 - **Agent framework docs**: See `core/README.md`
 - **Example agents**: See `exports/` directory

@@ -428,36 +481,46 @@ You're done with the workflow when:

 This workflow provides a proven path from concept to production-ready agent:

-1. **Learn** with `/building-agents-core` → Understand fundamentals (optional)
-2. **Build** with `/building-agents-construction` → Get validated structure
-3. **Optimize** with `/building-agents-patterns` → Apply best practices (optional)
-4. **Test** with `/testing-agent` → Get verified functionality
+1. **Learn** with `/hive-concepts` → Understand fundamentals (optional)
+2. **Build** with `/hive-create` → Get validated structure
+3. **Optimize** with `/hive-patterns` → Apply best practices (optional)
+4. **Configure** with `/hive-credentials` → Set up API keys (if needed)
+5. **Test** with `/hive-test` → Get verified functionality
+6. **Debug** with `/hive-debugger` → Fix runtime issues (if needed)

 The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.

 ## Skill Selection Guide

-**Choose building-agents-core when:**
+**Choose hive-concepts when:**
 - First time building agents
- Need to understand architecture
+- Need to understand event loop architecture
 - Validating tool availability
- Learning about node types and edges
+- Learning about node types, edges, and judges

-**Choose building-agents-construction when:**
+**Choose hive-create when:**
 - Actually building an agent
 - Have clear requirements
 - Ready to write code
 - Want step-by-step guidance
+- Want to start from an existing template and customize it

-**Choose building-agents-patterns when:**
+**Choose hive-patterns when:**
 - Agent structure complete
- Need advanced patterns
- Implementing pause/resume
- Optimizing performance
+- Need client-facing nodes or feedback edges
+- Implementing review loops or fan-out/fan-in
+- Want judge patterns or context management
 - Want best practices

-**Choose testing-agent when:**
+**Choose hive-test when:**
 - Agent structure complete
 - Ready to validate functionality
 - Need comprehensive test coverage
- Debugging agent behavior
+- Testing feedback loops, output keys, or fan-out
+
+**Choose hive-debugger when:**
+- Agent is failing or stuck at runtime
+- Seeing retry loops or escalations
+- Tool calls are failing
+- Need to understand why a node isn't completing
+- Want real-time monitoring of agent execution
@@ -1,6 +1,6 @@
 # Example: File Monitor Agent

-This example shows the complete agent-workflow in action for building a file monitoring agent.
+This example shows the complete /hive workflow in action for building a file monitoring agent.

 ## Initial Request

@@ -12,7 +12,7 @@ User: "Build an agent that monitors ~/Downloads and copies new files to ~/Docume

 ### Step 1: Create Structure

-Agent invokes `/building-agents` skill and:
+Agent invokes `/hive-create` skill and:

 1. Creates `exports/file_monitor_agent/` package
 2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
@@ -75,10 +75,10 @@ initialize → list → identify → check
 ### Step 5: Finalize

 ```bash
-$ PYTHONPATH=core:exports python -m file_monitor_agent validate
+$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
 ✓ Agent is valid

-$ PYTHONPATH=core:exports python -m file_monitor_agent info
+$ PYTHONPATH=exports uv run python -m file_monitor_agent info
 Agent: File Monitor & Copy Agent
 Nodes: 7
 Edges: 8
@@ -107,7 +107,7 @@ exports/file_monitor_agent/

 ### Step 1: Analyze Agent

-Agent invokes `/testing-agent` skill and:
+Agent invokes `/hive-test` skill and:

 1. Reads goal from `exports/file_monitor_agent/agent.py`
 2. Identifies 4 success criteria to test
@@ -131,7 +131,7 @@ Tests approved incrementally by user.
 ### Step 3: Run Tests

 ```bash
-$ PYTHONPATH=core:exports pytest exports/file_monitor_agent/tests/
+$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/

 test_constraints.py::test_preserves_originals     PASSED
 test_constraints.py::test_handles_errors          PASSED
@@ -162,7 +162,7 @@ test_edge_cases.py::test_large_files              PASSED
 ./RUN_AGENT.sh

 # Or manually
-PYTHONPATH=core:exports:tools/src python -m file_monitor_agent run
+PYTHONPATH=exports uv run python -m file_monitor_agent run
 ```

 **Capabilities:**
@@ -1,351 +0,0 @@
-# Example: Testing a YouTube Research Agent
-
-This example walks through testing a YouTube research agent that finds relevant videos based on a topic.
-
-## Prerequisites
-
- Agent built with building-agents skill at `exports/youtube-research/`
- Goal defined with success criteria and constraints
-
-## Step 1: Load the Goal
-
-First, load the goal that was defined during the Goal stage:
-
-```json
-{
-    "id": "youtube-research",
-    "name": "YouTube Research Agent",
-    "description": "Find relevant YouTube videos on a given topic",
-    "success_criteria": [
-        {
-            "id": "find_videos",
-            "description": "Find 3-5 relevant videos",
-            "metric": "video_count",
-            "target": "3-5",
-            "weight": 1.0
-        },
-        {
-            "id": "relevance",
-            "description": "Videos must be relevant to the topic",
-            "metric": "relevance_score",
-            "target": ">0.8",
-            "weight": 0.8
-        }
-    ],
-    "constraints": [
-        {
-            "id": "api_limits",
-            "description": "Must not exceed YouTube API rate limits",
-            "constraint_type": "hard",
-            "category": "technical"
-        },
-        {
-            "id": "content_safety",
-            "description": "Must filter out inappropriate content",
-            "constraint_type": "hard",
-            "category": "safety"
-        }
-    ]
-}
-```
-
-## Step 2: Get Constraint Test Guidelines
-
-During the Goal stage (or early Eval), get test guidelines for constraints:
-
-```python
-result = generate_constraint_tests(
-    goal_id="youtube-research",
-    goal_json='<goal JSON above>',
-    agent_path="exports/youtube-research"
-)
-```
-
-**The result contains guidelines (not generated tests):**
- `output_file`: Where to write tests
- `file_header`: Imports and fixtures to use
- `test_template`: Format for test functions
- `constraints_formatted`: The constraints to test
- `test_guidelines`: Rules for writing tests
-
-## Step 3: Write Constraint Tests
-
-Using the guidelines, write tests directly with the Write tool:
-
-```python
-# Write constraint tests using the provided file_header and guidelines
-Write(
-    file_path="exports/youtube-research/tests/test_constraints.py",
-    content='''
-"""Constraint tests for youtube-research agent."""
-
-import os
-import pytest
-from exports.youtube_research import default_agent
-
-
-pytestmark = pytest.mark.skipif(
-    not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
-    reason="API key required for real testing."
-)
-
-
-@pytest.mark.asyncio
-async def test_constraint_api_limits_respected():
-    """Verify API rate limits are not exceeded."""
-    import time
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-
-    for i in range(10):
-        result = await default_agent.run({"topic": f"test_{i}"}, mock_mode=mock_mode)
-        time.sleep(0.1)
-
-    # Should complete without rate limit errors
-    assert "rate limit" not in str(result).lower()
-
-
-@pytest.mark.asyncio
-async def test_constraint_content_safety_filter():
-    """Verify inappropriate content is filtered."""
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-    result = await default_agent.run({"topic": "general topic"}, mock_mode=mock_mode)
-
-    for video in result.videos:
-        assert video.safe_for_work is True
-        assert video.age_restricted is False
-'''
-)
-```
-
-## Step 4: Get Success Criteria Test Guidelines
-
-After the agent is built, get success criteria test guidelines:
-
-```python
-result = generate_success_tests(
-    goal_id="youtube-research",
-    goal_json='<goal JSON>',
-    node_names="search_node,filter_node,rank_node,format_node",
-    tool_names="youtube_search,video_details,channel_info",
-    agent_path="exports/youtube-research"
-)
-```
-
-## Step 5: Write Success Criteria Tests
-
-Using the guidelines, write success criteria tests:
-
-```python
-Write(
-    file_path="exports/youtube-research/tests/test_success_criteria.py",
-    content='''
-"""Success criteria tests for youtube-research agent."""
-
-import os
-import pytest
-from exports.youtube_research import default_agent
-
-
-pytestmark = pytest.mark.skipif(
-    not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
-    reason="API key required for real testing."
-)
-
-
-@pytest.mark.asyncio
-async def test_find_videos_happy_path():
-    """Test finding videos for a common topic."""
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-    result = await default_agent.run({"topic": "machine learning"}, mock_mode=mock_mode)
-
-    assert result.success
-    assert 3 <= len(result.videos) <= 5
-    assert all(v.title for v in result.videos)
-    assert all(v.video_id for v in result.videos)
-
-
-@pytest.mark.asyncio
-async def test_find_videos_minimum_boundary():
-    """Test at minimum threshold (3 videos)."""
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-    result = await default_agent.run({"topic": "niche topic xyz"}, mock_mode=mock_mode)
-
-    assert len(result.videos) >= 3
-
-
-@pytest.mark.asyncio
-async def test_relevance_score_threshold():
-    """Test relevance scoring meets threshold."""
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-    result = await default_agent.run({"topic": "python programming"}, mock_mode=mock_mode)
-
-    for video in result.videos:
-        assert video.relevance_score > 0.8
-
-
-@pytest.mark.asyncio
-async def test_find_videos_no_results_graceful():
-    """Test graceful handling of no results."""
-    mock_mode = bool(os.environ.get("MOCK_MODE"))
-    result = await default_agent.run({"topic": "xyznonexistent123"}, mock_mode=mock_mode)
-
-    # Should not crash, return empty or message
-    assert result.videos == [] or result.message
-'''
-)
-```
-
-## Step 6: Run All Tests
-
-Execute all tests:
-
-```python
-result = run_tests(
-    goal_id="youtube-research",
-    agent_path="exports/youtube-research",
-    test_types='["all"]',
-    parallel=4
-)
-```
-
-**Results:**
-
-```json
-{
-    "goal_id": "youtube-research",
-    "overall_passed": false,
-    "summary": {
-        "total": 6,
-        "passed": 5,
-        "failed": 1,
-        "pass_rate": "83.3%"
-    },
-    "duration_ms": 4521,
-    "results": [
-        {"test_id": "test_constraint_api_001", "passed": true, "duration_ms": 1234},
-        {"test_id": "test_constraint_content_001", "passed": true, "duration_ms": 456},
-        {"test_id": "test_success_001", "passed": true, "duration_ms": 789},
-        {"test_id": "test_success_002", "passed": true, "duration_ms": 654},
-        {"test_id": "test_success_003", "passed": true, "duration_ms": 543},
-        {"test_id": "test_success_004", "passed": false, "duration_ms": 845,
-         "error_category": "IMPLEMENTATION_ERROR",
-         "error_message": "TypeError: 'NoneType' object has no attribute 'videos'"}
-    ]
-}
-```
-
-## Step 7: Debug the Failed Test
-
-```python
-result = debug_test(
-    goal_id="youtube-research",
-    test_name="test_find_videos_no_results_graceful",
-    agent_path="exports/youtube-research"
-)
-```
-
-**Debug Output:**
-
-```json
-{
-    "test_id": "test_success_004",
-    "test_name": "test_find_videos_no_results_graceful",
-    "input": {"topic": "xyznonexistent123"},
-    "expected": "Empty list or message",
-    "actual": {"error": "TypeError: 'NoneType' object has no attribute 'videos'"},
-    "passed": false,
-    "error_message": "TypeError: 'NoneType' object has no attribute 'videos'",
-    "error_category": "IMPLEMENTATION_ERROR",
-    "stack_trace": "Traceback (most recent call last):\n  File \"filter_node.py\", line 42\n    for video in result.videos:\nTypeError: 'NoneType' object has no attribute 'videos'",
-    "logs": [
-        {"timestamp": "2026-01-20T10:00:01", "node": "search_node", "level": "INFO", "msg": "Searching for: xyznonexistent123"},
-        {"timestamp": "2026-01-20T10:00:02", "node": "search_node", "level": "WARNING", "msg": "No results found"},
-        {"timestamp": "2026-01-20T10:00:02", "node": "filter_node", "level": "ERROR", "msg": "NoneType error"}
-    ],
-    "runtime_data": {
-        "execution_path": ["start", "search_node", "filter_node"],
-        "node_outputs": {
-            "search_node": null
-        }
-    },
-    "suggested_fix": "Add null check in filter_node before accessing .videos attribute",
-    "iteration_guidance": {
-        "stage": "Agent",
-        "action": "Fix the code in nodes/edges",
-        "restart_required": false,
-        "description": "The goal is correct, but filter_node doesn't handle null results from search_node."
-    }
-}
-```
-
-## Step 8: Iterate Based on Category
-
-Since this is an **IMPLEMENTATION_ERROR**, we:
-
-1. **Don't restart** the Goal → Agent → Eval flow
-2. **Fix the agent** using building-agents skill:
-   - Modify `filter_node` to handle null results
-3. **Re-run Eval** (tests only)
-
-### Fix in building-agents:
-
-```python
-# Update the filter_node to handle null
-add_node(
-    node_id="filter_node",
-    name="Filter Node",
-    description="Filter and rank videos",
-    node_type="function",
-    input_keys=["search_results"],
-    output_keys=["filtered_videos"],
-    system_prompt="""
-    Filter videos by relevance.
-    IMPORTANT: Handle case where search_results is None or empty.
-    Return empty list if no results.
-    """
-)
-```
-
-### Re-export and re-test:
-
-```python
-# Re-export the fixed agent
-export_graph(path="exports/youtube-research")
-
-# Re-run tests
-result = run_tests(
-    goal_id="youtube-research",
-    agent_path="exports/youtube-research",
-    test_types='["all"]'
-)
-```
-
-**Updated Results:**
-
-```json
-{
-    "goal_id": "youtube-research",
-    "overall_passed": true,
-    "summary": {
-        "total": 6,
-        "passed": 6,
-        "failed": 0,
-        "pass_rate": "100.0%"
-    }
-}
-```
-
-## Summary
-
-1. **Got guidelines** for constraint tests during Goal stage
-2. **Wrote** constraint tests using Write tool
-3. **Got guidelines** for success criteria tests during Eval stage
-4. **Wrote** success criteria tests using Write tool
-5. **Ran** tests in parallel
-6. **Debugged** the one failure
-7. **Categorized** as IMPLEMENTATION_ERROR
-8. **Fixed** the agent (not the goal)
-9. **Re-ran** Eval only (didn't restart full flow)
-10. **Passed** all tests
-
-The agent is now validated and ready for production use.
@@ -0,0 +1,7 @@
+# Project-level Codex config for Hive.
+# Keep this file minimal: MCP connectivity + skill discovery.
+
+[mcp_servers.agent-builder]
+command = "uv"
+args = ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"]
+cwd = "."
@@ -1 +0,0 @@
-../../.claude/skills/agent-workflow
@@ -1 +0,0 @@
-../../.claude/skills/building-agents-construction
@@ -1 +0,0 @@
-../../.claude/skills/building-agents-core
@@ -1 +0,0 @@
-../../.claude/skills/building-agents-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -1 +0,0 @@
-../../.claude/skills/testing-agent
@@ -55,14 +55,10 @@ jobs:
      - name: Install uv
        uses: astral-sh/setup-uv@v4

-      - name: Install dependencies
+      - name: Install dependencies and run tests
        run: |
          cd core
          uv sync
-
-      - name: Run tests
-        run: |
-          cd core
          uv run pytest tests/ -v

  test-tools:
@@ -126,7 +122,7 @@ jobs:
          for agent_dir in "${agent_dirs[@]}"; do
            if [ -f "$agent_dir/agent.json" ]; then
              echo "Validating $agent_dir"
-              python -c "import json; json.load(open('$agent_dir/agent.json'))"
+              uv run python -c "import json; json.load(open('$agent_dir/agent.json'))"
              validated=$((validated + 1))
            fi
          done
@@ -54,7 +54,6 @@ __pycache__/
 *.egg-info/
 .eggs/
 *.egg
-uv.lock

 # Generated runtime data
 core/data/
@@ -75,3 +74,6 @@ exports/*

 docs/github-issues/*
 core/tests/*dumps/*
+
+screenshots/*
+
@@ -4,11 +4,6 @@
      "command": "uv",
      "args": ["run", "-m", "framework.mcp.agent_builder_server"],
      "cwd": "core"
-    },
-    "tools": {
-      "command": "uv",
-      "args": ["run", "mcp_server.py", "--stdio"],
-      "cwd": "tools"
    }
  }
 }
@@ -0,0 +1,30 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "uv",
+      "args": [
+        "run",
+        "python",
+        "-m",
+        "framework.mcp.agent_builder_server"
+      ],
+      "cwd": "core",
+      "env": {
+        "PYTHONPATH": "../tools/src"
+      }
+    },
+    "tools": {
+      "command": "uv",
+      "args": [
+        "run",
+        "python",
+        "mcp_server.py",
+        "--stdio"
+      ],
+      "cwd": "tools",
+      "env": {
+        "PYTHONPATH": "src"
+      }
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/hive
@@ -0,0 +1 @@
+../../.claude/skills/hive-concepts
@@ -0,0 +1 @@
+../../.claude/skills/hive-create
@@ -0,0 +1 @@
+../../.claude/skills/hive-credentials
@@ -0,0 +1 @@
+../../.claude/skills/hive-debugger
@@ -0,0 +1 @@
+../../.claude/skills/hive-patterns
@@ -0,0 +1 @@
+../../.claude/skills/hive-test
@@ -0,0 +1 @@
+../../.claude/skills/triage-issue
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.15.0
    hooks:
      - id: ruff
        name: ruff lint (core)
@@ -1,41 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [Unreleased]
-
-### Added
- Initial project structure
- React frontend (honeycomb) with Vite and TypeScript
- Node.js backend (hive) with Express and TypeScript
- Docker Compose configuration for local development
- Configuration system via `config.yaml`
- GitHub Actions CI/CD workflows
- Comprehensive documentation
-
-### Changed
- N/A
-
-### Deprecated
- N/A
-
-### Removed
- N/A
-
-
-### Fixed
- tools: Fixed web_scrape tool attempting to parse non-HTML content (PDF, JSON) as HTML (#487)
-
-### Security
- N/A
-
-## [0.1.0] - 2025-01-13
-
-### Added
- Initial release
-
-[Unreleased]: https://github.com/adenhq/hive/compare/v0.1.0...HEAD
-[0.1.0]: https://github.com/adenhq/hive/releases/tag/v0.1.0
@@ -1,10 +1,10 @@
 # Contributing to Aden Agent Framework

-Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. We’re especially looking for help building tools, integrations([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If you’re interested in extending its functionality, this is the perfect place to start. 
+Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. We’re especially looking for help building tools, integrations ([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If you’re interested in extending its functionality, this is the perfect place to start. 

 ## Code of Conduct

-By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
+By participating in this project, you agree to abide by our [Code of Conduct](docs/CODE_OF_CONDUCT.md).

 ## Issue Assignment Policy

@@ -35,15 +35,22 @@ You may submit PRs without prior assignment for:

 1. Fork the repository
 2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
-3. Create a feature branch: `git checkout -b feature/your-feature-name`
-4. Make your changes
-5. Run checks and tests:
+3. Add the upstream repository: `git remote add upstream https://github.com/adenhq/hive.git`
+4. Sync with upstream to ensure you're starting from the latest code:
+   ```bash
+   git fetch upstream
+   git checkout main
+   git merge upstream/main
+   ```
+5. Create a feature branch: `git checkout -b feature/your-feature-name`
+6. Make your changes
+7. Run checks and tests:
   ```bash
   make check    # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
   make test     # Core tests (cd core && pytest tests/ -v)
   ```
-6. Commit your changes following our commit conventions
-7. Push to your fork and submit a Pull Request
+8. Commit your changes following our commit conventions
+9. Push to your fork and submit a Pull Request

 ## Development Setup

@@ -92,8 +99,7 @@ docs(readme): update installation instructions
 2. Update documentation if needed
 3. Add tests for new functionality
 4. Ensure `make check` and `make test` pass
-5. Update the CHANGELOG.md if applicable
-6. Request review from maintainers
+5. Request review from maintainers

 ### PR Title Format

@@ -125,7 +131,7 @@ feat(component): add new feature description
 > **Note:** When testing agents in `exports/`, always set PYTHONPATH:
 >
 > ```bash
-> PYTHONPATH=core:exports python -m agent_name test
+> PYTHONPATH=exports uv run python -m agent_name test
 > ```

 ```bash
@@ -138,8 +144,11 @@ make test
 # Or run tests directly
 cd core && pytest tests/ -v

+# Run tools package tests (when contributing to tools/)
+cd tools && uv run pytest tests/ -v
+
 # Run tests for a specific agent
-PYTHONPATH=core:exports python -m agent_name test
+PYTHONPATH=exports uv run python -m agent_name test
 ```

 > **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
@@ -152,4 +161,4 @@ By submitting a Pull Request, you agree that your contributions will be licensed

 Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).

-Thank you for contributing!
+Thank you for contributing!
@@ -4,9 +4,11 @@ help: ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
 		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'

-lint: ## Run ruff linter (with auto-fix)
+lint: ## Run ruff linter and formatter (with auto-fix)
 	cd core && ruff check --fix .
 	cd tools && ruff check --fix .
+	cd core && ruff format .
+	cd tools && ruff format .

 format: ## Run ruff formatter
 	cd core && ruff format .
@@ -19,8 +21,8 @@ check: ## Run all checks without modifying files (CI-safe)
 	cd tools && ruff format --check .

 test: ## Run all tests
-	cd core && python -m pytest tests/ -v
+	cd core && uv run python -m pytest tests/ -v

 install-hooks: ## Install pre-commit hooks
-	pip install pre-commit
+	uv pip install pre-commit
 	pre-commit install
@@ -1,51 +0,0 @@
-## Summary
- **Added HubSpot integration** — new HubSpot MCP tool with search, get, create, and update operations for contacts, companies, and deals. Includes OAuth2 provider for HubSpot credentials and credential store adapter for the tools layer.
- **Replaced web_scrape tool with Playwright + stealth** — swapped httpx/BeautifulSoup for a headless Chromium browser using `playwright` (async API) and `playwright-stealth`, enabling JS-rendered page scraping and bot detection evasion
- **Added empty response retry logic** — LLM provider now detects empty responses (e.g. Gemini returning 200 with no content on rate limit) and retries with exponential backoff, preventing hallucinated output from the cleanup LLM
- **Added context-aware input compaction** — LLM nodes now estimate input token count before calling the model and progressively truncate the largest values if they exceed the context window budget
- **Increased rate limit retries to 10** with verbose `[retry]` and `[compaction]` logging that includes model name, finish reason, and attempt count
- **Updated setup scripts** — `scripts/setup-python.sh` now installs Playwright Chromium browser automatically for web scraping support
- **Interactive quickstart onboarding** — `quickstart.sh` rewritten as bee-themed interactive wizard that detects existing API keys (including Claude Code subscription), lets user pick ONE default LLM provider, and saves configuration to `~/.hive/configuration.json`
- **Fixed lint errors** across `hubspot_tool.py` (line length) and `agent_builder_server.py` (unused variable)
-
-## Changed files
-
-### HubSpot Integration
- `tools/src/aden_tools/tools/hubspot_tool/` — New MCP tool: contacts, companies, and deals CRUD
- `tools/src/aden_tools/tools/__init__.py` — Registered HubSpot tools
- `tools/src/aden_tools/credentials/integrations.py` — HubSpot credential integration
- `tools/src/aden_tools/credentials/__init__.py` — Updated credential exports
- `core/framework/credentials/oauth2/hubspot_provider.py` — HubSpot OAuth2 provider
- `core/framework/credentials/oauth2/__init__.py` — Registered HubSpot OAuth2 provider
- `core/framework/runner/runner.py` — Updated runner for credential support
-
-### Web Scrape Rewrite
- `tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py` — Playwright async rewrite
- `tools/src/aden_tools/tools/web_scrape_tool/README.md` — Updated docs
- `tools/pyproject.toml` — Added `playwright`, `playwright-stealth` deps
- `tools/Dockerfile` — Added `playwright install chromium --with-deps`
- `scripts/setup-python.sh` — Added Playwright Chromium browser install step
-
-### LLM Reliability
- `core/framework/llm/litellm.py` — Empty response retry + max retries 10 + verbose logging
- `core/framework/graph/node.py` — Input compaction via `_compact_inputs()`, `_estimate_tokens()`, `_get_context_limit()`
-
-### Quickstart & Setup
- `quickstart.sh` — Interactive bee-themed onboarding wizard with single provider selection
- `~/.hive/configuration.json` — New user config file for default LLM provider/model
-
-### Fixes
- `core/framework/mcp/agent_builder_server.py` — Removed unused variable
- `tools/src/aden_tools/tools/hubspot_tool/hubspot_tool.py` — Fixed E501 line length violations
-
-## Test plan
- [ ] Run `make lint` — passes clean
- [ ] Run `./quickstart.sh` and verify interactive flow works, config saved to `~/.hive/configuration.json`
- [ ] Run `./scripts/setup-python.sh` and verify Playwright Chromium installs
- [ ] Run `pytest tests/tools/test_web_scrape_tool.py -v`
- [ ] Run agent against a JS-heavy site and verify `web_scrape` returns rendered content
- [ ] Set `HUBSPOT_ACCESS_TOKEN` and verify HubSpot tool CRUD operations work
- [ ] Trigger rate limit and verify `[retry]` logs appear with correct attempt counts
- [ ] Run agent with large inputs and verify `[compaction]` logs show truncation
-
-🤖 Generated with [Claude Code](https://claude.com/claude-code)
@@ -1,5 +1,5 @@
 <p align="center">
-  <img width="100%" alt="Hive Banner" src="https://storage.googleapis.com/aden-prod-assets/website/aden-title-card.png" />
+  <img width="100%" alt="Hive Banner" src="https://github.com/user-attachments/assets/a027429b-5d3c-4d34-88e4-0feaeaabbab3" />
 </p>

 <p align="center">
@@ -13,16 +13,19 @@
  <a href="docs/i18n/ko.md">한국어</a>
 </p>

-[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
-[![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
-[![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
-[![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
-[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
+<p align="center">
+  <a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
+  <a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
+  <a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
+  <a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
+  <a href="https://www.linkedin.com/company/teamaden/"><img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff" alt="LinkedIn" /></a>
+  <img src="https://img.shields.io/badge/MCP-102_Tools-00ADD8?style=flat-square" alt="MCP" />
+</p>

 <p align="center">
  <img src="https://img.shields.io/badge/AI_Agents-Self--Improving-brightgreen?style=flat-square" alt="AI Agents" />
  <img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
-  <img src="https://img.shields.io/badge/Goal--Driven-Development-purple?style=flat-square" alt="Goal-Driven" />
+  <img src="https://img.shields.io/badge/Headless-Development-purple?style=flat-square" alt="Headless" />
  <img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
  <img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
 </p>
@@ -30,15 +33,16 @@
  <img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
  <img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
  <img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
-  <img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
 </p>

 ## Overview

-Build reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
+Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

+https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
+
 ## Who Is Hive For?

 Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
@@ -58,45 +62,36 @@ Hive may not be the best fit if you’re only experimenting with simple agent ch
 Use Hive when you need:

 - Long-running, autonomous agents
- Multi-agent coordination
+- Strong guardrails, process, and controls
 - Continuous improvement based on failures
- Strong monitoring, safety, and budget controls
+- Multi-agent coordination
 - A framework that evolves with your goals

-
-## What is Aden
-
-<p align="center">
-  <img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
-</p>
-
-Aden is a platform for building, deploying, operating, and adapting AI agents:
-
- **Build** - A Coding Agent generates specialized Worker Agents (Sales, Marketing, Ops) from natural language goals
- **Deploy** - Headless deployment with CI/CD integration and full API lifecycle management
- **Operate** - Real-time monitoring, observability, and runtime guardrails keep agents reliable
- **Adapt** - Continuous evaluation, supervision, and adaptation ensure agents improve over time
- **Infra** - Shared memory, LLM integrations, tools, and skills power every agent
-
 ## Quick Links

 - **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
 - **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
 - **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
-<!-- - **[Roadmap](https://adenhq.com/roadmap)** - Upcoming features and plans -->
+- **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
 - **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
+- **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs

 ## Quick Start

-## Prerequisites
+### Prerequisites

 - Python 3.11+ for agent development
- Claude Code or Cursor for utilizing agent skills
+- Claude Code, Codex CLI, or Cursor for utilizing agent skills

 > **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.

 ### Installation

+>**Note**
+> Hive uses a `uv` workspace layout and is not installed with `pip install`.
+> Running `pip install -e .` from the repository root will create a placeholder package and Hive will not function correctly.
+> Please use the quickstart script below to set up the environment.
+
 ```bash
 # Clone the repository
 git clone https://github.com/adenhq/hive.git
@@ -107,45 +102,85 @@ cd hive
 ```

 This sets up:
+
 - **framework** - Core agent runtime and graph executor (in `core/.venv`)
 - **aden_tools** - MCP tools for agent capabilities (in `tools/.venv`)
- All required Python dependencies
+- **credential store** - Encrypted API key storage (`~/.hive/credentials`)
+- **LLM provider** - Interactive default model configuration
+- All required Python dependencies with `uv`

 ### Build Your First Agent

 ```bash
 # Build an agent using Claude Code
-claude> /building-agents-construction
+claude> /hive

 # Test your agent
-claude> /testing-agent
+claude> /hive-debugger

-# Run your agent
-PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+# (at separate terminal) Launch the interactive dashboard
+hive tui
+
+# Or run directly
+hive run exports/your_agent_name --input '{"key": "value"}'
+```
+##  Coding Agent Support
+### Codex CLI
+Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
+
+1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
+2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
+3. **Launch:** Run `codex` in the repo root, then type `use hive`
+
+Example:
+```
+codex> use hive
 ```

-**[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
+### Opencode 
+Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).

-### Cursor IDE Support
+1. **Setup:** Run the quickstart script 
+2. **Launch:** Open Opencode in the project root.
+3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
+4. **Verify:** Ask the agent *"List your tools"* to confirm the connection.

-Skills are also available in Cursor. To enable:
+The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.

-1. Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`)
-2. Run `MCP: Enable` to enable MCP servers
-3. Restart Cursor to load the MCP servers from `.cursor/mcp.json`
-4. Type `/` in Agent chat and search for skills (e.g., `/building-agents-construction`)
+**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
+
+### Antigravity IDE Support
+
+Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
+
+```bash
+./scripts/setup-antigravity-mcp.sh
+```
+
+**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.

 ## Features

- **Goal-Driven Development** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **Adaptiveness** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
- **Dynamic Node Connections** - No predefined edges; connection code is generated by any capable LLM based on your goals
+- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
+- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
+- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
 - **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
- **Human-in-the-Loop** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
+- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
 - **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
+- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
 - **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
 - **Production-Ready** - Self-hostable, built for scale and reliability

+## Integration
+
+<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
+
+Hive is built to be model-agnostic and system-agnostic.
+
+- **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
+- **Business system connectivity** - Hive Framework is designed to connect to all kinds of business systems as tools, such as CRM, support, messaging, data, file, and internal APIs via MCP.
+
+
 ## Why Aden

 Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.
@@ -182,67 +217,60 @@ flowchart LR
    style V6 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
 ```

-### The Aden Advantage
+### The Hive Advantage

-| Traditional Frameworks     | Aden                                   |
+| Traditional Frameworks     | Hive                                   |
 | -------------------------- | -------------------------------------- |
 | Hardcode agent workflows   | Describe goals in natural language     |
 | Manual graph definition    | Auto-generated agent graphs            |
-| Reactive error handling    | Outcome-evaluation and adaptiveness               |
+| Reactive error handling    | Outcome-evaluation and adaptiveness    |
 | Static tool configurations | Dynamic SDK-wrapped nodes              |
 | Separate monitoring setup  | Built-in real-time observability       |
 | DIY budget management      | Integrated cost controls & degradation |

 ### How It Works

-1. **Define Your Goal** → Describe what you want to achieve in plain English
-2. **Coding Agent Generates** → Creates the agent graph, connection code, and test cases
-3. **Workers Execute** → SDK-wrapped nodes run with full observability and tool access
+1. **[Define Your Goal](docs/key_concepts/goals_outcome.md)** → Describe what you want to achieve in plain English
+2. **Coding Agent Generates** → Creates the [agent graph](docs/key_concepts/graph.md), connection code, and test cases
+3. **[Workers Execute](docs/key_concepts/worker_agent.md)** → SDK-wrapped nodes run with full observability and tool access
 4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
-5. **Adaptiveness** → On failure, the system evolves the graph and redeploys automatically
+5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically

-## Run pre-built Agents (Coming Soon)
+## Run Agents

-### Run a sample agent
-Aden Hive provides a list of featured agents that you can use and build on top of.
-
-### Run an agent shared by others
-Put the agent in `exports/` and run `PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'`
-
-
-For building and running goal-driven agents with the framework:
+The `hive` CLI is the primary interface for running agents.

 ```bash
-# One-time setup
-./quickstart.sh
+# Browse and run agents interactively (Recommended)
+hive tui

-# This sets up:
-# - framework package (core runtime)
-# - aden_tools package (MCP tools)
-# - All Python dependencies
+# Run a specific agent directly
+hive run exports/my_agent --input '{"task": "Your input here"}'

-# Build new agents using Claude Code skills
-claude> /building-agents-construction
+# Run a specific agent with the TUI dashboard
+hive run exports/my_agent --tui

-# Test agents
-claude> /testing-agent
-
-# Run agents
-PYTHONPATH=core:exports python -m agent_name run --input '{...}'
+# Interactive REPL
+hive shell
 ```

-See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions.
+The TUI scans both `exports/` and `examples/templates/` for available agents.
+
+> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
+
+See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.

 ## Documentation

- **[Developer Guide](DEVELOPER.md)** - Comprehensive guide for developers
+- **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
 - [Getting Started](docs/getting-started.md) - Quick setup instructions
+- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
 - [Configuration Guide](docs/configuration.md) - All configuration options
 - [Architecture Overview](docs/architecture/README.md) - System design and structure

 ## Roadmap

-Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [ROADMAP.md](ROADMAP.md) for details.
+Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [roadmap.md](docs/roadmap.md) for details.

 ```mermaid
 flowchart TD
@@ -310,6 +338,7 @@ subgraph Expansion
        j2["Cursor"]
        j3["Opencode"]
        j4["Antigravity"]
+        j5["Codex CLI"]
    end
    subgraph plat["Platform"]
        k1["JavaScript/TypeScript SDK"]
@@ -332,11 +361,12 @@ end

 classDef done fill:#9e9e9e,color:#fff,stroke:#757575
 ```
+
 ## Contributing

 We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.

-**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work. 
+**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.

 1. Find or create an issue and get assigned
 2. Fork the repository
@@ -369,10 +399,6 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS

 ## Frequently Asked Questions (FAQ)

-**Q: Does Hive depend on LangChain or other agent frameworks?**
-
-No. Hive is built from the ground up with no dependencies on LangChain, CrewAI, or other agent frameworks. The framework is designed to be lean and flexible, generating agent graphs dynamically rather than relying on predefined components.
-
 **Q: What LLM providers does Hive support?**

 Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
@@ -383,37 +409,25 @@ Yes! Hive supports local models through LiteLLM. Simply use the model name forma

 **Q: What makes Hive different from other agent frameworks?**

-Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, evolves the agent graph, and redeploys. This self-improving loop is unique to Aden.
+Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, [evolves the agent graph](docs/key_concepts/evolution.md), and redeploys. This self-improving loop is unique to Aden.

 **Q: Is Hive open-source?**

 Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.

-**Q: Does Hive collect data from users?**
-
-Hive collects telemetry data for monitoring and observability purposes, including token usage, latency metrics, and cost tracking. Content capture (prompts and responses) is configurable and stored with team-scoped data isolation. All data stays within your infrastructure when self-hosted.
-
-**Q: What deployment options does Hive support?**
-
-Hive supports self-hosted deployments via Python packages. See the [Environment Setup Guide](ENVIRONMENT_SETUP.md) for installation instructions. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
-
 **Q: Can Hive handle complex, production-scale use cases?**

 Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.

 **Q: Does Hive support human-in-the-loop workflows?**

-Yes, Hive fully supports human-in-the-loop workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
-
-**Q: What monitoring and debugging tools does Hive provide?**
-
-Hive includes comprehensive observability features: real-time WebSocket streaming for live agent execution monitoring, TimescaleDB-powered analytics for cost and performance metrics, health check endpoints for Kubernetes integration, and MCP tools for agent execution, including file operations, web search, data processing, and more.
+Yes, Hive fully supports [human-in-the-loop](docs/key_concepts/graph.md#human-in-the-loop) workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.

 **Q: What programming languages does Hive support?**

 The Hive framework is built in Python. A JavaScript/TypeScript SDK is on the roadmap.

-**Q: Can Aden agents interact with external tools and APIs?**
+**Q: Can Hive agents interact with external tools and APIs?**

 Yes. Aden's SDK-wrapped nodes provide built-in tool access, and the framework supports flexible tool ecosystems. Agents can integrate with external APIs, databases, and services through the node architecture.

@@ -423,7 +437,7 @@ Hive provides granular budget controls including spending limits, throttles, and

 **Q: Where can I find examples and documentation?**

-Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [DEVELOPER.md](DEVELOPER.md) guide.
+Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [developer guide](docs/developer-guide.md).

 **Q: How can I contribute to Aden?**

@@ -437,10 +451,6 @@ Aden's adaptation loop begins working from the first execution. When an agent fa

 Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.

-**Q: Does Aden offer enterprise support?**
-
-For enterprise inquiries, contact the Aden team through [adenhq.com](https://adenhq.com) or join our [Discord community](https://discord.com/invite/MXE49hrKDk) for support and discussions.
-
 ---

 <p align="center">
@@ -1,4 +1,5 @@
 exports/
 docs/
+.agent-builder-sessions/
 .pytest_cache/
 **/__pycache__/
@@ -82,7 +82,7 @@ Register an MCP server as a tool source for your agent.
    "example_tool"
  ],
  "total_mcp_servers": 1,
-  "note": "MCP server 'tools' registered with 6 tools. These tools can now be used in llm_tool_use nodes."
+  "note": "MCP server 'tools' registered with 6 tools. These tools can now be used in event_loop nodes."
 }
 ```

@@ -149,7 +149,7 @@ List tools available from registered MCP servers.
    ]
  },
  "total_tools": 6,
-  "note": "Use these tool names in the 'tools' parameter when adding llm_tool_use nodes"
+  "note": "Use these tool names in the 'tools' parameter when adding event_loop nodes"
 }
 ```

@@ -246,7 +246,7 @@ Here's a complete workflow for building an agent with MCP tools:
    "node_id": "web-searcher",
    "name": "Web Search",
    "description": "Search the web for information",
-    "node_type": "llm_tool_use",
+    "node_type": "event_loop",
    "input_keys": "[\"query\"]",
    "output_keys": "[\"search_results\"]",
    "system_prompt": "Search for {query} using the web_search tool",
@@ -119,7 +119,7 @@ builder = WorkflowBuilder()
 builder.add_node(
    node_id="researcher",
    name="Web Researcher",
-    node_type="llm_tool_use",
+    node_type="event_loop",
    system_prompt="Research the topic using web_search",
    tools=["web_search"],  # Tool from tools MCP server
    input_keys=["topic"],
@@ -137,7 +137,7 @@ Tools from MCP servers can be referenced in your agent.json just like built-in t
    {
      "id": "searcher",
      "name": "Web Searcher",
-      "node_type": "llm_tool_use",
+      "node_type": "event_loop",
      "system_prompt": "Search for information about {topic}",
      "tools": ["web_search", "web_scrape"],
      "input_keys": ["topic"],
@@ -103,31 +103,20 @@ Add a processing node to the agent graph.
 - `node_id` (string, required): Unique node identifier
 - `name` (string, required): Human-readable name
 - `description` (string, required): What this node does
- `node_type` (string, required): One of: `llm_generate`, `llm_tool_use`, `router`, `function`
+- `node_type` (string, required): Must be `event_loop` (the only valid type)
 - `input_keys` (string, required): JSON array of input variable names
 - `output_keys` (string, required): JSON array of output variable names
- `system_prompt` (string, optional): System prompt for LLM nodes
- `tools` (string, optional): JSON array of tool names for tool_use nodes
- `routes` (string, optional): JSON object of route mappings for router nodes
+- `system_prompt` (string, optional): System prompt for the LLM
+- `tools` (string, optional): JSON array of tool names
+- `client_facing` (boolean, optional): Set to true for human-in-the-loop interaction

-**Node Types:**
+**Node Type:**

-1. **llm_generate**: Uses LLM to generate output from inputs
-   - Requires: `system_prompt`
-   - Tools: Not used
-
-2. **llm_tool_use**: Uses LLM with tools to accomplish tasks
-   - Requires: `system_prompt`, `tools`
-   - Tools: Array of tool names (e.g., `["web_search", "web_fetch"]`)
-
-3. **router**: LLM-powered routing to different paths
-   - Requires: `system_prompt`, `routes`
-   - Routes: Object mapping route names to target node IDs
-   - Example: `{"pass": "success_node", "fail": "retry_node"}`
-
-4. **function**: Executes a pre-defined function
-   - System prompt describes the function behavior
-   - No LLM calls, pure computation
+**event_loop**: LLM-powered node with self-correction loop
+- Requires: `system_prompt`
+- Optional: `tools` (array of tool names, e.g., `["web_search", "web_fetch"]`)
+- Optional: `client_facing` (set to true for HITL / user interaction)
+- Supports: iterative refinement, judge-based evaluation, tool use, streaming

 **Example:**
 ```json
@@ -135,7 +124,7 @@ Add a processing node to the agent graph.
  "node_id": "search_sources",
  "name": "Search Sources",
  "description": "Searches for relevant sources on the topic",
-  "node_type": "llm_tool_use",
+  "node_type": "event_loop",
  "input_keys": "[\"topic\", \"search_queries\"]",
  "output_keys": "[\"sources\", \"source_count\"]",
  "system_prompt": "Search for sources using the provided queries...",
@@ -198,7 +187,7 @@ Export the validated graph as an agent specification.

 **What it does:**
 1. Validates the graph
-2. Auto-generates missing edges from router routes
+2. Validates edge connectivity
 3. Writes files to disk:
   - `exports/{agent-name}/agent.json` - Full agent specification
   - `exports/{agent-name}/README.md` - Auto-generated documentation
@@ -252,47 +241,6 @@ Test the complete agent graph with sample inputs.

 ---

-### Evaluation Rules
-
-#### `add_evaluation_rule`
-Add a rule for the HybridJudge to evaluate node outputs.
-
-**Parameters:**
- `rule_id` (string, required): Unique rule identifier
- `description` (string, required): What this rule checks
- `condition` (string, required): Python expression to evaluate
- `action` (string, required): Action to take: `accept`, `retry`, `escalate`
- `priority` (integer, optional): Rule priority (default: 0)
- `feedback_template` (string, optional): Feedback message template
-
-**Condition Examples:**
- `'result.get("success") == True'` - Check for success flag
- `'result.get("error_type") == "timeout"'` - Check error type
- `'len(result.get("data", [])) > 0'` - Check for non-empty data
-
-**Example:**
-```json
-{
-  "rule_id": "timeout_retry",
-  "description": "Retry on timeout errors",
-  "condition": "result.get('error_type') == 'timeout'",
-  "action": "retry",
-  "priority": 10,
-  "feedback_template": "Timeout occurred, retrying..."
-}
-```
-
-#### `list_evaluation_rules`
-List all configured evaluation rules.
-
-#### `remove_evaluation_rule`
-Remove an evaluation rule.
-
-**Parameters:**
- `rule_id` (string, required): Rule to remove
-
---
-
 ## Example Workflow

 Here's a complete workflow for building a research agent:
@@ -320,7 +268,7 @@ add_node(
    node_id="planner",
    name="Research Planner",
    description="Creates research strategy",
-    node_type="llm_generate",
+    node_type="event_loop",
    input_keys='["topic"]',
    output_keys='["strategy", "queries"]',
    system_prompt="Analyze topic and create research plan..."
@@ -330,7 +278,7 @@ add_node(
    node_id="searcher",
    name="Search Sources",
    description="Find relevant sources",
-    node_type="llm_tool_use",
+    node_type="event_loop",
    input_keys='["queries"]',
    output_keys='["sources"]',
    system_prompt="Search for sources...",
@@ -359,10 +307,9 @@ The exported agent will be saved to `exports/research-agent/`.

 1. **Start with the goal**: Define clear success criteria before building nodes
 2. **Test nodes individually**: Use `test_node` to verify each node works
-3. **Use router nodes for branching**: Don't create edges manually for routers - define routes and they'll be auto-generated
-4. **Add evaluation rules**: Help the judge evaluate outputs deterministically
-5. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
-6. **Check exports**: Review the generated README.md to verify your agent structure
+3. **Use conditional edges for branching**: Define condition_expr on edges for decision points
+4. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
+5. **Check exports**: Review the generated README.md to verify your agent structure

 ---

@@ -14,7 +14,7 @@ Framework provides a runtime framework that captures **decisions**, not just act
 ## Installation

 ```bash
-pip install -e .
+uv pip install -e .
 ```

 ## MCP Server Setup
@@ -45,13 +45,13 @@ If you prefer manual setup:

 ```bash
 # Install framework
-pip install -e .
+uv pip install -e .

 # Install MCP dependencies
-pip install mcp fastmcp
+uv pip install mcp fastmcp

 # Test the server
-python -m framework.mcp.agent_builder_server
+uv run python -m framework.mcp.agent_builder_server
 ```

 ### Using with MCP Clients
@@ -73,7 +73,7 @@ To use the agent builder with Claude Desktop or other MCP clients, add this to y
 The MCP server provides tools for:
 - Creating agent building sessions
 - Defining goals with success criteria
- Adding nodes (llm_generate, llm_tool_use, router, function)
+- Adding nodes (event_loop only)
 - Connecting nodes with edges
 - Validating and exporting agent graphs
 - Testing nodes and full agent graphs
@@ -86,13 +86,13 @@ Run an LLM-powered calculator:

 ```bash
 # Single calculation
-python -m framework calculate "2 + 3 * 4"
+uv run python -m framework calculate "2 + 3 * 4"

 # Interactive mode
-python -m framework interactive
+uv run python -m framework interactive

 # Analyze runs with Builder
-python -m framework analyze calculator
+uv run python -m framework analyze calculator
 ```

 ### Using the Runtime
@@ -136,16 +136,16 @@ Tests are generated using MCP tools (`generate_constraint_tests`, `generate_succ

 ```bash
 # Run tests against an agent
-python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
+uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4

 # Debug failed tests
-python -m framework test-debug <agent_path> <test_name>
+uv run python -m framework test-debug <agent_path> <test_name>

 # List tests for a goal
-python -m framework test-list <goal_id>
+uv run python -m framework test-list <goal_id>
 ```

-For detailed testing workflows, see the [testing-agent skill](../.claude/skills/testing-agent/SKILL.md).
+For detailed testing workflows, see the [hive-test skill](../.claude/skills/hive-test/SKILL.md).

 ### Analyzing Agent Behavior with Builder

@@ -68,7 +68,7 @@ from framework.graph.event_loop_node import (  # noqa: E402
 )
 from framework.graph.executor import GraphExecutor  # noqa: E402
 from framework.graph.goal import Goal  # noqa: E402
-from framework.graph.node import NodeSpec  # noqa: E402
+from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec  # noqa: E402
 from framework.llm.litellm import LiteLLMProvider  # noqa: E402
 from framework.runner.tool_registry import ToolRegistry  # noqa: E402
 from framework.runtime.core import Runtime  # noqa: E402
@@ -654,7 +654,7 @@ NODE_SPECS = {
        id="sender",
        name="Sender",
        description="Send approved campaign emails",
-        node_type="function",
+        node_type="event_loop",
        input_keys=["approved_emails"],
        output_keys=["send_results"],
    ),
@@ -823,11 +823,20 @@ def _send_email_via_resend(
        return {"error": f"Network error: {e}"}


+class SenderNode(NodeProtocol):
+    """Node wrapper for send_emails function."""
+
+    async def execute(self, ctx: NodeContext) -> NodeResult:
+        approved = ctx.input_data.get("approved_emails", "")
+        result_str = send_emails(approved_emails=approved)
+        ctx.memory.write("send_results", result_str)
+        return NodeResult(success=True, output={"send_results": result_str})
+
+
 def send_emails(approved_emails: str = "") -> str:
    """Send approved campaign emails via Resend, or log if unconfigured.

-    Called by FunctionNode which unpacks input_keys as kwargs.
-    Returns a JSON string (FunctionNode wraps it in NodeResult).
+    Returns a JSON string.
    """
    approved = approved_emails
    if not approved:
@@ -1780,7 +1789,7 @@ async def _run_pipeline(websocket, initial_message: str):
    )
    for nid, impl in nodes.items():
        executor.register_node(nid, impl)
-    executor.register_function("sender", send_emails)
+    executor.register_node("sender", SenderNode())

    # --- Event forwarding: bus → WebSocket ---

@@ -4,34 +4,45 @@ Minimal Manual Agent Example
 This example demonstrates how to build and run an agent programmatically
 without using the Claude Code CLI or external LLM APIs.

-It uses 'function' nodes to define logic in pure Python, making it perfect
-for understanding the core runtime loop:
+It uses custom NodeProtocol implementations to define logic in pure Python,
+making it perfect for understanding the core runtime loop:
 Setup -> Graph definition -> Execution -> Result

 Run with:
-    PYTHONPATH=core python core/examples/manual_agent.py
+    uv run python core/examples/manual_agent.py
 """

 import asyncio

 from framework.graph import EdgeCondition, EdgeSpec, Goal, GraphSpec, NodeSpec
 from framework.graph.executor import GraphExecutor
+from framework.graph.node import NodeContext, NodeProtocol, NodeResult
 from framework.runtime.core import Runtime


-# 1. Define Node Logic (Pure Python Functions)
-def greet(name: str) -> str:
+# 1. Define Node Logic (Custom NodeProtocol implementations)
+class GreeterNode(NodeProtocol):
    """Generate a simple greeting."""
-    return f"Hello, {name}!"
+
+    async def execute(self, ctx: NodeContext) -> NodeResult:
+        name = ctx.input_data.get("name", "World")
+        greeting = f"Hello, {name}!"
+        ctx.memory.write("greeting", greeting)
+        return NodeResult(success=True, output={"greeting": greeting})


-def uppercase(greeting: str) -> str:
+class UppercaserNode(NodeProtocol):
    """Convert text to uppercase."""
-    return greeting.upper()
+
+    async def execute(self, ctx: NodeContext) -> NodeResult:
+        greeting = ctx.input_data.get("greeting") or ctx.memory.read("greeting") or ""
+        result = greeting.upper()
+        ctx.memory.write("final_greeting", result)
+        return NodeResult(success=True, output={"final_greeting": result})


 async def main():
-    print("🚀 Setting up Manual Agent...")
+    print("Setting up Manual Agent...")

    # 2. Define the Goal
    # Every agent needs a goal with success criteria
@@ -55,8 +66,7 @@ async def main():
        id="greeter",
        name="Greeter",
        description="Generates a simple greeting",
-        node_type="function",
-        function="greet",  # Matches the registered function name
+        node_type="event_loop",
        input_keys=["name"],
        output_keys=["greeting"],
    )
@@ -65,8 +75,7 @@ async def main():
        id="uppercaser",
        name="Uppercaser",
        description="Converts greeting to uppercase",
-        node_type="function",
-        function="uppercase",
+        node_type="event_loop",
        input_keys=["greeting"],
        output_keys=["final_greeting"],
    )
@@ -98,23 +107,23 @@ async def main():
    runtime = Runtime(storage_path=Path("./agent_logs"))
    executor = GraphExecutor(runtime=runtime)

-    # 7. Register Function Implementations
-    # Connect string names in NodeSpecs to actual Python functions
-    executor.register_function("greeter", greet)
-    executor.register_function("uppercaser", uppercase)
+    # 7. Register Node Implementations
+    # Connect node IDs in the graph to actual Python implementations
+    executor.register_node("greeter", GreeterNode())
+    executor.register_node("uppercaser", UppercaserNode())

    # 8. Execute Agent
-    print("▶ Executing agent with input: name='Alice'...")
+    print("Executing agent with input: name='Alice'...")

    result = await executor.execute(graph=graph, goal=goal, input_data={"name": "Alice"})

    # 9. Verify Results
    if result.success:
-        print("\n✅ Success!")
+        print("\nSuccess!")
        print(f"Path taken: {' -> '.join(result.path)}")
        print(f"Final output: {result.output.get('final_greeting')}")
    else:
-        print(f"\n❌ Failed: {result.error}")
+        print(f"\nFailed: {result.error}")


 if __name__ == "__main__":
@@ -122,7 +122,7 @@ async def example_4_custom_agent_with_mcp_tools():
        node_id="web-searcher",
        name="Web Search",
        description="Search the web for information",
-        node_type="llm_tool_use",
+        node_type="event_loop",
        system_prompt="Search for {query} and return the top results. Use the web_search tool.",
        tools=["web_search"],  # This tool comes from tools MCP server
        input_keys=["query"],
@@ -133,7 +133,7 @@ async def example_4_custom_agent_with_mcp_tools():
        node_id="summarizer",
        name="Summarize Results",
        description="Summarize the search results",
-        node_type="llm_generate",
+        node_type="event_loop",
        system_prompt="Summarize the following search results in 2-3 sentences: {search_results}",
        input_keys=["search_results"],
        output_keys=["summary"],
@@ -4,8 +4,8 @@
      "name": "tools",
      "description": "Aden tools including web search, file operations, and PDF reading",
      "transport": "stdio",
-      "command": "python",
-      "args": ["mcp_server.py", "--stdio"],
+      "command": "uv",
+      "args": ["run", "python", "mcp_server.py", "--stdio"],
      "cwd": "../tools",
      "env": {
        "BRAVE_SEARCH_API_KEY": "${BRAVE_SEARCH_API_KEY}"
@@ -0,0 +1,13 @@
+"""Framework-provided agents."""
+
+from pathlib import Path
+
+FRAMEWORK_AGENTS_DIR = Path(__file__).parent
+
+
+def list_framework_agents() -> list[Path]:
+    """List all framework agent directories."""
+    return sorted(
+        [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
+        key=lambda p: p.name,
+    )
@@ -0,0 +1,44 @@
+"""
+Hive Coder — Native coding agent that builds Hive agent packages.
+
+Deeply understands the agent framework and produces complete Python packages
+with goals, nodes, edges, system prompts, MCP configuration, and tests
+from natural language specifications.
+"""
+
+from .agent import (
+    HiveCoderAgent,
+    conversation_mode,
+    default_agent,
+    edges,
+    entry_node,
+    entry_points,
+    goal,
+    identity_prompt,
+    loop_config,
+    nodes,
+    pause_nodes,
+    terminal_nodes,
+)
+from .config import AgentMetadata, RuntimeConfig, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "HiveCoderAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "entry_node",
+    "entry_points",
+    "pause_nodes",
+    "terminal_nodes",
+    "conversation_mode",
+    "identity_prompt",
+    "loop_config",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -0,0 +1,223 @@
+"""CLI entry point for Hive Coder agent."""
+
+import asyncio
+import json
+import logging
+import sys
+
+import click
+
+from .agent import HiveCoderAgent, default_agent
+
+
+def setup_logging(verbose=False, debug=False):
+    """Configure logging for execution visibility."""
+    if debug:
+        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+    elif verbose:
+        level, fmt = logging.INFO, "%(message)s"
+    else:
+        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
+    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+    logging.getLogger("framework").setLevel(level)
+
+
+@click.group()
+@click.version_option(version="1.0.0")
+def cli():
+    """Hive Coder — Build Hive agent packages from natural language."""
+    pass
+
+
+@cli.command()
+@click.option("--request", "-r", type=str, required=True, help="What agent to build")
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def run(request, mock, quiet, verbose, debug):
+    """Execute agent building from a request."""
+    if not quiet:
+        setup_logging(verbose=verbose, debug=debug)
+
+    context = {"user_request": request}
+
+    result = asyncio.run(default_agent.run(context, mock_mode=mock))
+
+    output_data = {
+        "success": result.success,
+        "steps_executed": result.steps_executed,
+        "output": result.output,
+    }
+    if result.error:
+        output_data["error"] = result.error
+
+    click.echo(json.dumps(output_data, indent=2, default=str))
+    sys.exit(0 if result.success else 1)
+
+
+@cli.command()
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def tui(mock, verbose, debug):
+    """Launch the TUI dashboard for interactive agent building."""
+    setup_logging(verbose=verbose, debug=debug)
+
+    try:
+        from framework.tui.app import AdenTUI
+    except ImportError:
+        click.echo("TUI requires the 'textual' package. Install with: pip install textual")
+        sys.exit(1)
+
+    from pathlib import Path
+
+    from framework.llm import LiteLLMProvider
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import create_agent_runtime
+    from framework.runtime.execution_stream import EntryPointSpec
+
+    async def run_with_tui():
+        agent = HiveCoderAgent()
+
+        agent._tool_registry = ToolRegistry()
+
+        storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            agent._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock:
+            llm = LiteLLMProvider(
+                model=agent.config.model,
+                api_key=agent.config.api_key,
+                api_base=agent.config.api_base,
+            )
+
+        tools = list(agent._tool_registry.get_tools().values())
+        tool_executor = agent._tool_registry.get_executor()
+        graph = agent._build_graph()
+
+        runtime = create_agent_runtime(
+            graph=graph,
+            goal=agent.goal,
+            storage_path=storage_path,
+            entry_points=[
+                EntryPointSpec(
+                    id="start",
+                    name="Build Agent",
+                    entry_node="coder",
+                    trigger_type="manual",
+                    isolation_level="isolated",
+                ),
+            ],
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+        )
+
+        await runtime.start()
+
+        try:
+            app = AdenTUI(runtime)
+            await app.run_async()
+        finally:
+            await runtime.stop()
+
+    asyncio.run(run_with_tui())
+
+
+@cli.command()
+@click.option("--json", "output_json", is_flag=True)
+def info(output_json):
+    """Show agent information."""
+    info_data = default_agent.info()
+    if output_json:
+        click.echo(json.dumps(info_data, indent=2))
+    else:
+        click.echo(f"Agent: {info_data['name']}")
+        click.echo(f"Version: {info_data['version']}")
+        click.echo(f"Description: {info_data['description']}")
+        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
+        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
+        click.echo(f"Entry: {info_data['entry_node']}")
+        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}")
+
+
+@cli.command()
+def validate():
+    """Validate agent structure."""
+    validation = default_agent.validate()
+    if validation["valid"]:
+        click.echo("Agent is valid")
+        if validation["warnings"]:
+            for warning in validation["warnings"]:
+                click.echo(f"  WARNING: {warning}")
+    else:
+        click.echo("Agent has errors:")
+        for error in validation["errors"]:
+            click.echo(f"  ERROR: {error}")
+    sys.exit(0 if validation["valid"] else 1)
+
+
+@cli.command()
+@click.option("--verbose", "-v", is_flag=True)
+def shell(verbose):
+    """Interactive agent building session (CLI, no TUI)."""
+    asyncio.run(_interactive_shell(verbose))
+
+
+async def _interactive_shell(verbose=False):
+    """Async interactive shell."""
+    setup_logging(verbose=verbose)
+
+    click.echo("=== Hive Coder ===")
+    click.echo("Describe the agent you want to build (or 'quit' to exit):\n")
+
+    agent = HiveCoderAgent()
+    await agent.start()
+
+    try:
+        while True:
+            try:
+                request = await asyncio.get_event_loop().run_in_executor(None, input, "Build> ")
+                if request.lower() in ["quit", "exit", "q"]:
+                    click.echo("Goodbye!")
+                    break
+
+                if not request.strip():
+                    continue
+
+                click.echo("\nBuilding agent...\n")
+
+                result = await agent.trigger_and_wait("default", {"user_request": request})
+
+                if result is None:
+                    click.echo("\n[Execution timed out]\n")
+                    continue
+
+                if result.success:
+                    output = result.output or {}
+                    agent_name = output.get("agent_name", "unknown")
+                    validation = output.get("validation_result", "unknown")
+                    click.echo(f"\nAgent '{agent_name}' built. Validation: {validation}\n")
+                else:
+                    click.echo(f"\nBuild failed: {result.error}\n")
+
+            except KeyboardInterrupt:
+                click.echo("\nGoodbye!")
+                break
+            except Exception as e:
+                click.echo(f"Error: {e}", err=True)
+                import traceback
+
+                traceback.print_exc()
+    finally:
+        await agent.stop()
+
+
+if __name__ == "__main__":
+    cli()
@@ -0,0 +1,314 @@
+"""Agent graph construction for Hive Coder."""
+
+from pathlib import Path
+
+from framework.graph import Constraint, Goal, SuccessCriterion
+from framework.graph.checkpoint_config import CheckpointConfig
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.runtime.execution_stream import EntryPointSpec
+
+from .config import default_config, metadata
+from .nodes import coder_node
+
+# Goal definition
+goal = Goal(
+    id="agent-builder",
+    name="Hive Agent Builder",
+    description=(
+        "Build complete, validated Hive agent packages from natural language "
+        "specifications. Produces production-ready Python packages with goals, "
+        "nodes, edges, system prompts, MCP configuration, and tests."
+    ),
+    success_criteria=[
+        SuccessCriterion(
+            id="valid-package",
+            description="Generated agent package passes structural validation",
+            metric="validation_pass",
+            target="true",
+            weight=0.30,
+        ),
+        SuccessCriterion(
+            id="complete-files",
+            description=(
+                "All required files generated: agent.py, config.py, "
+                "nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
+            ),
+            metric="file_count",
+            target=">=6",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="user-satisfaction",
+            description="User reviews and approves the generated agent",
+            metric="user_approval",
+            target="true",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="framework-compliance",
+            description=(
+                "Generated code follows framework patterns: STEP 1/STEP 2 "
+                "for client-facing, correct imports, entry_points format"
+            ),
+            metric="pattern_compliance",
+            target="100%",
+            weight=0.20,
+        ),
+    ],
+    constraints=[
+        Constraint(
+            id="dynamic-tool-discovery",
+            description=(
+                "Always discover available tools dynamically via "
+                "discover_mcp_tools before referencing tools in agent designs"
+            ),
+            constraint_type="hard",
+            category="correctness",
+        ),
+        Constraint(
+            id="no-fabricated-tools",
+            description="Only reference tools that exist in hive-tools MCP",
+            constraint_type="hard",
+            category="correctness",
+        ),
+        Constraint(
+            id="valid-python",
+            description="All generated Python files must be syntactically correct",
+            constraint_type="hard",
+            category="correctness",
+        ),
+        Constraint(
+            id="self-verification",
+            description="Run validation after writing code; fix errors before presenting",
+            constraint_type="hard",
+            category="quality",
+        ),
+    ],
+)
+
+# Nodes — single coder node (guardian is now auto-attached by the framework)
+nodes = [coder_node]
+
+# No edges needed — single forever-alive event_loop node
+edges = []
+
+# Graph configuration
+entry_node = "coder"
+entry_points = {"start": "coder"}
+pause_nodes = []
+terminal_nodes = []  # Forever-alive: loops until user exits
+
+# No async entry points — guardian is now auto-attached via attach_guardian()
+async_entry_points = []
+
+# Module-level variables read by AgentRunner.load()
+conversation_mode = "continuous"
+identity_prompt = (
+    "You are Hive Coder, the best agent-building coding agent on the planet. "
+    "You deeply understand the Hive agent framework at the source code level "
+    "and produce production-ready agent packages from natural language. "
+    "You can dynamically discover available framework tools, inspect runtime "
+    "sessions and checkpoints from agents you build, and run their test suites. "
+    "You follow coding agent discipline: read before writing, verify "
+    "assumptions by reading actual code, adhere to project conventions, "
+    "self-verify with validation, and fix your own errors. You are concise, "
+    "direct, and technically rigorous. No emojis. No fluff."
+)
+loop_config = {
+    "max_iterations": 100,
+    "max_tool_calls_per_turn": 20,
+    "max_history_tokens": 32000,
+}
+
+
+class HiveCoderAgent:
+    """
+    Hive Coder — builds Hive agent packages from natural language.
+
+    Single-node architecture: the coder runs in a continuous while(true) loop.
+    The guardian watchdog is auto-attached by the framework in TUI mode.
+    """
+
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self.async_entry_points = async_entry_points
+        self._graph: GraphSpec | None = None
+        self._agent_runtime: AgentRuntime | None = None
+        self._tool_registry: ToolRegistry | None = None
+        self._storage_path: Path | None = None
+
+    def _build_graph(self) -> GraphSpec:
+        """Build the GraphSpec."""
+        return GraphSpec(
+            id="hive-coder-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            loop_config=loop_config,
+            conversation_mode=conversation_mode,
+            identity_prompt=identity_prompt,
+            async_entry_points=self.async_entry_points,
+        )
+
+    def _setup(self, mock_mode=False) -> None:
+        """Set up the agent runtime."""
+        self._storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
+        self._storage_path.mkdir(parents=True, exist_ok=True)
+
+        self._tool_registry = ToolRegistry()
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            self._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock_mode:
+            llm = LiteLLMProvider(
+                model=self.config.model,
+                api_key=self.config.api_key,
+                api_base=self.config.api_base,
+            )
+
+        tool_executor = self._tool_registry.get_executor()
+        tools = list(self._tool_registry.get_tools().values())
+
+        self._graph = self._build_graph()
+
+        checkpoint_config = CheckpointConfig(
+            enabled=True,
+            checkpoint_on_node_start=False,
+            checkpoint_on_node_complete=True,
+            checkpoint_max_age_days=7,
+            async_checkpoint=True,
+        )
+
+        entry_point_specs = [
+            EntryPointSpec(
+                id="default",
+                name="Default",
+                entry_node=self.entry_node,
+                trigger_type="manual",
+                isolation_level="shared",
+            ),
+        ]
+
+        self._agent_runtime = create_agent_runtime(
+            graph=self._graph,
+            goal=self.goal,
+            storage_path=self._storage_path,
+            entry_points=entry_point_specs,
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+            checkpoint_config=checkpoint_config,
+            graph_id="hive_coder",
+        )
+
+    async def start(self, mock_mode=False) -> None:
+        """Set up and start the agent runtime."""
+        if self._agent_runtime is None:
+            self._setup(mock_mode=mock_mode)
+        if not self._agent_runtime.is_running:
+            await self._agent_runtime.start()
+
+    async def stop(self) -> None:
+        """Stop the agent runtime and clean up."""
+        if self._agent_runtime and self._agent_runtime.is_running:
+            await self._agent_runtime.stop()
+        self._agent_runtime = None
+
+    async def trigger_and_wait(
+        self,
+        entry_point: str = "default",
+        input_data: dict | None = None,
+        timeout: float | None = None,
+        session_state: dict | None = None,
+    ) -> ExecutionResult | None:
+        """Execute the graph and wait for completion."""
+        if self._agent_runtime is None:
+            raise RuntimeError("Agent not started. Call start() first.")
+
+        return await self._agent_runtime.trigger_and_wait(
+            entry_point_id=entry_point,
+            input_data=input_data or {},
+            session_state=session_state,
+        )
+
+    async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
+        """Run the agent (convenience method for single execution)."""
+        await self.start(mock_mode=mock_mode)
+        try:
+            result = await self.trigger_and_wait("default", context, session_state=session_state)
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    def info(self):
+        """Get agent information."""
+        return {
+            "name": metadata.name,
+            "version": metadata.version,
+            "description": metadata.description,
+            "goal": {
+                "name": self.goal.name,
+                "description": self.goal.description,
+            },
+            "nodes": [n.id for n in self.nodes],
+            "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node,
+            "entry_points": self.entry_points,
+            "pause_nodes": self.pause_nodes,
+            "terminal_nodes": self.terminal_nodes,
+            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
+        }
+
+    def validate(self):
+        """Validate agent structure."""
+        errors = []
+        warnings = []
+
+        node_ids = {node.id for node in self.nodes}
+        for edge in self.edges:
+            if edge.source not in node_ids:
+                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
+            if edge.target not in node_ids:
+                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
+
+        if self.entry_node not in node_ids:
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        for terminal in self.terminal_nodes:
+            if terminal not in node_ids:
+                errors.append(f"Terminal node '{terminal}' not found")
+
+        for ep_id, node_id in self.entry_points.items():
+            if node_id not in node_ids:
+                errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+        }
+
+
+# Create default instance
+default_agent = HiveCoderAgent()
@@ -1,4 +1,4 @@
-"""Runtime configuration."""
+"""Runtime configuration for Hive Coder agent."""

 import json
 from dataclasses import dataclass, field
@@ -24,7 +24,7 @@ def _load_preferred_model() -> str:
 class RuntimeConfig:
    model: str = field(default_factory=_load_preferred_model)
    temperature: float = 0.7
-    max_tokens: int = 8192
+    max_tokens: int = 40000
    api_key: str | None = None
    api_base: str | None = None

@@ -32,12 +32,20 @@ class RuntimeConfig:
 default_config = RuntimeConfig()


-# Agent metadata
@dataclass
 class AgentMetadata:
-    name: str = "Online Research Agent"
+    name: str = "Hive Coder"
    version: str = "1.0.0"
-    description: str = "Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations."
+    description: str = (
+        "Native coding agent that builds production-ready Hive agent packages "
+        "from natural language specifications. Deeply understands the agent framework "
+        "and produces complete Python packages with goals, nodes, edges, system prompts, "
+        "MCP configuration, and tests."
+    )
+    intro_message: str = (
+        "I'm Hive Coder — I build Hive agents. Describe what kind of agent "
+        "you want to create and I'll design, implement, and validate it for you."
+    )


 metadata = AgentMetadata()
@@ -0,0 +1,96 @@
+"""Attach the Hive Coder's guardian node to any agent runtime.
+
+Usage::
+
+    from framework.agents.hive_coder.guardian import attach_guardian
+
+    runner._setup()
+    attach_guardian(runner._agent_runtime, runner._tool_registry)
+    await runner._agent_runtime.start()
+
+Must be called **before** ``runtime.start()`` — it injects the
+guardian node into the graph and registers an event-driven entry point.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import AgentRuntime
+
+from framework.runtime.execution_stream import EntryPointSpec
+
+from .nodes import ALL_GUARDIAN_TOOLS, guardian_node
+
+logger = logging.getLogger(__name__)
+
+GUARDIAN_ENTRY_POINT = EntryPointSpec(
+    id="guardian",
+    name="Agent Guardian",
+    entry_node="guardian",
+    trigger_type="event",
+    trigger_config={
+        "event_types": [
+            "execution_failed",
+            "node_stalled",
+            "node_tool_doom_loop",
+            "constraint_violation",
+        ],
+        "exclude_own_graph": False,
+    },
+    isolation_level="shared",
+)
+
+
+def attach_guardian(
+    runtime: AgentRuntime,
+    tool_registry: ToolRegistry,
+) -> None:
+    """Inject hive_coder's guardian node into *runtime*'s graph.
+
+    1. Registers graph lifecycle tools if not already present.
+    2. Refreshes the runtime's tool list and executor.
+    3. Adds the guardian node (with dynamically filtered tools) to the graph.
+    4. Registers an event-driven entry point that fires on execution failures,
+       stalls, tool doom loops, and constraint violations.
+
+    Must be called **before** ``runtime.start()``.
+
+    Raises:
+        RuntimeError: If the runtime is already running.
+    """
+    from framework.tools.session_graph_tools import register_graph_tools
+
+    # 1. Register graph lifecycle tools if not already present
+    if not tool_registry.has_tool("load_agent"):
+        register_graph_tools(tool_registry, runtime)
+
+    # 2. Refresh tool schemas and executor on the runtime
+    runtime._tools = list(tool_registry.get_tools().values())
+    runtime._tool_executor = tool_registry.get_executor()
+
+    # 3. Filter guardian tools to only those available in the registry
+    available = set(tool_registry.get_tools().keys())
+    filtered_tools = [t for t in ALL_GUARDIAN_TOOLS if t in available]
+
+    # Build guardian node with filtered tool list
+    node = guardian_node.model_copy(update={"tools": filtered_tools})
+
+    # Add to the runtime's graph (so register_entry_point validation passes)
+    runtime.graph.nodes.append(node)
+
+    # Mark guardian as reachable in graph-level entry_points so
+    # GraphSpec.validate() doesn't flag it as unreachable.
+    runtime.graph.entry_points["guardian"] = "guardian"
+
+    # 4. Register event-driven entry point
+    runtime.register_entry_point(GUARDIAN_ENTRY_POINT)
+
+    logger.info(
+        "Guardian attached with %d tools: %s",
+        len(filtered_tools),
+        filtered_tools,
+    )
@@ -0,0 +1,9 @@
+{
+  "coder-tools": {
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "coder_tools_server.py", "--stdio"],
+    "cwd": "../../../../tools",
+    "description": "Unsandboxed file system tools for code generation and validation"
+  }
+}
@@ -0,0 +1,556 @@
+"""Node definitions for Hive Coder agent."""
+
+from framework.graph import NodeSpec
+
+# Single node — like opencode's while(true) loop.
+# One continuous context handles the entire workflow:
+# discover → design → implement → verify → present → iterate.
+coder_node = NodeSpec(
+    id="coder",
+    name="Hive Coder",
+    description=(
+        "Autonomous coding agent that builds Hive agent packages. "
+        "Handles the full lifecycle: understanding user intent, "
+        "designing architecture, writing code, validating, and "
+        "iterating on feedback — all in one continuous conversation."
+    ),
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["user_request"],
+    output_keys=["agent_name", "validation_result"],
+    success_criteria=(
+        "A complete, validated Hive agent package exists at "
+        "exports/{agent_name}/ and passes structural validation."
+    ),
+    system_prompt="""\
+You are Hive Coder, the best agent-building coding agent. You build \
+production-ready Hive agent packages from natural language.
+
+# Core Mandates
+
+- **Read before writing.** NEVER write code from assumptions. Read \
+reference agents and templates first. Read every file before editing.
+- **Conventions first.** Follow existing project patterns exactly. \
+Analyze imports, structure, and style in reference agents.
+- **Verify assumptions.** Never assume a class, import, or pattern \
+exists. Read actual source to confirm. Search if unsure.
+- **Discover tools dynamically.** NEVER reference tools from static \
+docs. Always run discover_mcp_tools() to see what actually exists.
+- **Professional objectivity.** If a use case is a poor fit for the \
+framework, say so. Technical accuracy over validation.
+- **Concise.** No emojis. No preambles. No postambles. Substance only.
+- **Self-verify.** After writing code, run validation and tests. Fix \
+errors yourself. Don't declare success until validation passes.
+
+# Tools
+
+## File I/O
+- read_file(path, offset?, limit?) — read with line numbers
+- write_file(path, content) — create/overwrite, auto-mkdir
+- edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
+- list_directory(path, recursive?) — list contents
+- search_files(pattern, path?, include?) — regex search
+- run_command(command, cwd?, timeout?) — shell execution
+- undo_changes(path?) — restore from git snapshot
+
+## Meta-Agent
+- discover_mcp_tools(server_config_path?) — connect to MCP servers \
+and list all available tools with full schemas. Default: hive-tools.
+- list_agents() — list all agent packages in exports/ with session counts
+- list_agent_sessions(agent_name, status?, limit?) — list sessions
+- get_agent_session_state(agent_name, session_id) — full session state
+- get_agent_session_memory(agent_name, session_id, key?) — memory data
+- list_agent_checkpoints(agent_name, session_id) — list checkpoints
+- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) — load checkpoint
+- run_agent_tests(agent_name, test_types?, fail_fast?) — run pytest with parsing
+
+# Meta-Agent Capabilities
+
+You are not just a file writer. You have deep integration with the \
+Hive framework:
+
+## Tool Discovery (MANDATORY before designing)
+Before designing any agent, run discover_mcp_tools() to see what \
+tools are actually available from the hive-tools MCP server. This \
+returns full schemas with parameter names, types, and descriptions. \
+NEVER guess tool names or parameters from memory. The tool catalog \
+is the ground truth.
+
+To check a specific agent's tools:
+  discover_mcp_tools("exports/{agent_name}/mcp_servers.json")
+
+## Agent Awareness
+Run list_agents() to see what agents already exist. Read their code \
+for patterns:
+  read_file("exports/{name}/agent.py")
+  read_file("exports/{name}/nodes/__init__.py")
+
+## Post-Build Testing
+After writing agent code, validate structurally AND run tests:
+  run_command("python -c 'from {name} import default_agent; \\
+    print(default_agent.validate())'")
+  run_agent_tests("{name}")
+
+## Debugging Built Agents
+When a user says "my agent is failing" or "debug this agent":
+1. list_agent_sessions("{agent_name}") — find the session
+2. get_agent_session_state("{agent_name}", "{session_id}") — see status
+3. get_agent_session_memory("{agent_name}", "{session_id}") — inspect data
+4. list_agent_checkpoints / get_agent_checkpoint — trace execution
+
+# Workflow
+
+You operate in a continuous loop. The user describes what they want, \
+you build it. No rigid phases — use judgment. But the general flow is:
+
+## 1. Understand
+
+When the user describes what they want to build, hear the structure:
+- The actors, the trigger, the core loop, the output, the pain.
+
+Play back a model: "Here's what I'm picturing: [concrete picture]. \
+Before I start — [1-2 questions you can't infer]."
+
+Ask only what you CANNOT infer. Fill blanks with domain knowledge.
+
+## 2. Qualify
+
+Assess framework fit honestly. Run discover_mcp_tools() to check \
+what tools exist. Read the framework guide:
+  read_file("core/framework/agents/hive_coder/reference/framework_guide.md")
+
+Consider:
+- What works well (multi-turn, HITL, tool orchestration)
+- Limitations (LLM latency, context limits, cost)
+- Deal-breakers (missing tools, wrong paradigm)
+
+Give a clear recommendation: proceed, adjust scope, or reconsider.
+
+## 3. Design
+
+Design the agent architecture:
+- Goal: id, name, description, 3-5 success criteria, 2-4 constraints
+- Nodes: **2-4 nodes MAXIMUM** (see rules below)
+- Edges: on_success for linear, conditional for routing
+- Lifecycle: ALWAYS forever-alive (`terminal_nodes=[]`) unless the user \
+explicitly requests a one-shot/batch agent. Forever-alive agents loop \
+continuously — the user exits by closing the TUI. This is the standard \
+pattern for all interactive agents.
+
+### Node Count Rules (HARD LIMITS)
+
+**2-4 nodes** for all agents. Never exceed 4 unless the user explicitly \
+requests more. Each node boundary serializes outputs to shared memory \
+and DESTROYS all in-context information (tool results, reasoning, history).
+
+**MERGE nodes when:**
+- Node has NO tools (pure LLM reasoning) → merge into predecessor/successor
+- Node sets only 1 trivial output → collapse into predecessor
+- Multiple consecutive autonomous nodes → combine into one rich node
+- A "report" or "summary" node → merge into the client-facing node
+- A "confirm" or "schedule" node that calls no external service → remove
+
+**SEPARATE nodes only when:**
+- Client-facing vs autonomous (different interaction models)
+- Fundamentally different tool sets
+- Fan-out parallelism (parallel branches MUST be separate)
+
+**Typical patterns:**
+- 2 nodes: `interact (client-facing) → process (autonomous) → interact`
+- 3 nodes: `intake (CF) → process (auto) → review (CF) → intake`
+- WRONG: 7 nodes where half have no tools and just do LLM reasoning
+
+Read reference agents before designing:
+  list_agents()
+  read_file("exports/deep_research_agent/agent.py")
+  read_file("exports/deep_research_agent/nodes/__init__.py")
+
+Present the design with ASCII art graph. Get user approval.
+
+## 4. Implement
+
+Read templates before writing code:
+  read_file("core/framework/agents/hive_coder/reference/file_templates.md")
+  read_file("core/framework/agents/hive_coder/reference/anti_patterns.md")
+
+Write files in order:
+1. mkdir -p exports/{name}/nodes exports/{name}/tests
+2. config.py — RuntimeConfig + AgentMetadata
+3. nodes/__init__.py — NodeSpec definitions with system prompts
+4. agent.py — Goal, edges, graph, agent class
+5. __init__.py — package exports
+6. __main__.py — CLI with click
+7. mcp_servers.json — tool server config
+8. tests/ — fixtures
+
+### Critical Rules
+
+**Imports** (must match exactly — only import what you use):
+```python
+from framework.graph import (
+    NodeSpec, EdgeSpec, EdgeCondition,
+    Goal, SuccessCriterion, Constraint,
+)
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.graph.checkpoint_config import CheckpointConfig
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+from framework.runtime.agent_runtime import (
+    AgentRuntime, create_agent_runtime,
+)
+from framework.runtime.execution_stream import EntryPointSpec
+```
+For agents with async entry points (timers, webhooks, events), also add:
+```python
+from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
+from framework.runtime.agent_runtime import (
+    AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
+)
+```
+NEVER `from core.framework...` — PYTHONPATH includes core/.
+
+**__init__.py MUST re-export ALL module-level variables** \
+(THIS IS THE #1 SOURCE OF AGENT LOAD FAILURES):
+The runner imports the package (__init__.py), NOT agent.py. It reads \
+goal, nodes, edges, entry_node, entry_points, pause_nodes, \
+terminal_nodes, conversation_mode, identity_prompt, loop_config via \
+getattr(). If ANY are missing from __init__.py, they silently default \
+to None or {} — causing "must define goal, nodes, edges" or "node X \
+is unreachable" errors. The __init__.py MUST import and re-export \
+ALL of these from .agent:
+```python
+from .agent import (
+    MyAgent, default_agent, goal, nodes, edges,
+    entry_node, entry_points, pause_nodes, terminal_nodes,
+    conversation_mode, identity_prompt, loop_config,
+)
+```
+
+**entry_points**: `{"start": "first-node-id"}`
+For agents with multiple entry points (e.g. a reminder trigger), \
+add them: `{"start": "intake", "reminder": "reminder"}`
+
+**conversation_mode** — ONLY two valid values:
+- `"continuous"` — recommended for interactive agents (context carries \
+across node transitions)
+- Omit entirely — for isolated per-node conversations
+NEVER use: "client_facing", "interactive", "adaptive", or any other \
+value. These DO NOT EXIST.
+
+**loop_config** — ONLY three valid keys:
+```python
+loop_config = {
+    "max_iterations": 100,
+    "max_tool_calls_per_turn": 20,
+    "max_history_tokens": 32000,
+}
+```
+NEVER add: "strategy", "mode", "timeout", or other keys.
+
+**mcp_servers.json**:
+```json
+{
+  "hive-tools": {
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "mcp_server.py", "--stdio"],
+    "cwd": "../../tools"
+  }
+}
+```
+NO "mcpServers" wrapper. cwd "../../tools". command "uv".
+
+**Storage**: `Path.home() / ".hive" / "agents" / "{name}"`
+
+**Client-facing system prompts** — STEP 1/STEP 2 pattern:
+```
+STEP 1 — Present to user (text only, NO tool calls):
+[instructions]
+
+STEP 2 — After user responds, call set_output:
+[set_output calls]
+```
+
+**Autonomous system prompts** — set_output in SEPARATE turn.
+
+**Tools** — NEVER fabricate tool names. Common hallucinations: \
+csv_read, csv_write, csv_append, file_upload, database_query. \
+If discover_mcp_tools() shows these don't exist, use alternatives \
+(e.g. save_data/load_data for data persistence).
+
+**Node rules**:
+- **2-4 nodes MAX.** Never exceed 4. Merge thin nodes aggressively.
+- A node with 0 tools is NOT a real node — merge it.
+- node_type always "event_loop"
+- max_node_visits default is 0 (unbounded) — correct for forever-alive. \
+Only set >0 in one-shot agents with bounded feedback loops.
+- Feedback inputs: nullable_output_keys
+- terminal_nodes=[] for forever-alive (the default)
+- Every node MUST have at least one outgoing edge (no dead ends)
+- Agents are forever-alive unless user explicitly asks for one-shot
+
+**Agent class**: CamelCase name, default_agent at module level. \
+Constructor takes `config=None`. Follow the exact pattern in \
+file_templates.md — do NOT invent constructor params like \
+`llm_provider` or `tool_registry`.
+
+**Module-level variables** (read by AgentRunner.load()):
+goal, nodes, edges, entry_node, entry_points, pause_nodes,
+terminal_nodes, conversation_mode, identity_prompt, loop_config
+
+For agents with async triggers, also export:
+async_entry_points, runtime_config
+
+**Async entry points** (timers, webhooks, events):
+When an agent needs scheduled tasks, webhook reactions, or event-driven \
+triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \
+`AgentRuntimeConfig` (from framework.runtime.agent_runtime):
+- Timer (cron): `trigger_type="timer"`, \
+`trigger_config={"cron": "0 9 * * *"}` — standard 5-field cron expression \
+(e.g. `"0 9 * * MON-FRI"` weekdays 9am, `"*/30 * * * *"` every 30 min)
+- Timer (interval): `trigger_type="timer"`, \
+`trigger_config={"interval_minutes": 20, "run_immediately": False}`
+- Event (for webhooks): `trigger_type="event"`, \
+`trigger_config={"event_types": ["webhook_received"]}`
+- `isolation_level="shared"` so async runs can read primary session memory
+- `runtime_config = AgentRuntimeConfig(webhook_routes=[...])` for HTTP webhooks
+- Reference: `exports/gmail_inbox_guardian/agent.py`
+- Full docs: `core/framework/agents/hive_coder/reference/framework_guide.md` \
+(Async Entry Points section)
+
+## 5. Verify
+
+Run THREE validation steps after writing. All must pass:
+
+**Step A — Class validation** (checks graph structure):
+```
+run_command("python -c 'from {name} import default_agent; \\
+  print(default_agent.validate())'")
+```
+
+**Step B — Runner load test** (checks package export contract — \
+THIS IS THE SAME PATH THE TUI USES):
+```
+run_command("python -c 'from framework.runner.runner import \\
+  AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
+  print(\"AgentRunner.load: OK\")'")
+```
+This catches missing __init__.py exports, bad conversation_mode, \
+invalid loop_config, and unreachable nodes. If Step A passes but \
+Step B fails, the problem is in __init__.py exports.
+
+**Step C — Run tests:**
+```
+run_agent_tests("{name}")
+```
+
+If anything fails: read error, fix with edit_file, re-validate. Up to 3x.
+
+**CRITICAL: Testing forever-alive agents**
+Most agents use `terminal_nodes=[]` (forever-alive). This means \
+`runner.run()` NEVER returns — it hangs forever waiting for a \
+terminal node that doesn't exist. Agent tests MUST be structural:
+- Validate graph, node specs, edges, tools, prompts
+- Check goal/constraints/success criteria definitions
+- Test `AgentRunner.load()` + `_setup()` (skip if no API key)
+- NEVER call `runner.run()` or `trigger_and_wait()` in tests for \
+forever-alive agents — they will hang and time out.
+When you restructure an agent (change nodes/edges), always update \
+the tests to match. Stale tests referencing old node names will fail.
+
+## 6. Present
+
+Show the user what you built: agent name, goal summary, graph ASCII \
+art, files created, validation status. Offer to revise or build another.
+
+After user confirms satisfaction:
+  set_output("agent_name", "the_agent_name")
+  set_output("validation_result", "valid")
+
+If building another agent, just start the loop again — no need to \
+set_output until the user is done.
+
+## 7. Live Test (optional)
+
+After the user approves, offer to load and run the agent in-session. \
+This runs it alongside you, with the Agent Guardian watching for \
+failures automatically.
+
+```
+load_agent("exports/{name}")   # registers as secondary graph
+start_agent("{name}")           # triggers default entry point
+```
+
+If the agent fails, the guardian fires and triages. You can also:
+- `list_agents()` — see all loaded graphs and status
+- `restart_agent("{name}")` then `load_agent` — pick up code changes
+- `unload_agent("{name}")` — remove it from the session
+- `get_user_presence()` — check if user is around
+
+The agent runs in a shared session: it can read memory you've set and \
+its outputs are visible to you. If the guardian escalates a failure, \
+you'll see the error and can fix the code, then reload.
+""",
+    tools=[
+        "read_file",
+        "write_file",
+        "edit_file",
+        "list_directory",
+        "search_files",
+        "run_command",
+        "undo_changes",
+        # Meta-agent tools
+        "discover_mcp_tools",
+        "list_agents",
+        "list_agent_sessions",
+        "get_agent_session_state",
+        "get_agent_session_memory",
+        "list_agent_checkpoints",
+        "get_agent_checkpoint",
+        "run_agent_tests",
+        # Graph lifecycle tools (multi-graph sessions)
+        "load_agent",
+        "unload_agent",
+        "start_agent",
+        "restart_agent",
+        "get_user_presence",
+    ],
+)
+
+
+ALL_GUARDIAN_TOOLS = [
+    # File I/O — available when the agent has hive-tools MCP
+    "read_file",
+    "write_file",
+    "edit_file",
+    "search_files",
+    "run_command",
+    # Graph lifecycle — registered by attach_guardian()
+    "load_agent",
+    "unload_agent",
+    "start_agent",
+    "restart_agent",
+    "get_user_presence",
+    "list_agents",
+]
+
+guardian_node = NodeSpec(
+    id="guardian",
+    name="Agent Guardian",
+    description=(
+        "Event-driven guardian that monitors supervised agent graphs. "
+        "Triggers on failures, stalls, tool doom loops, and constraint "
+        "violations. Assesses severity, checks user presence, and decides: "
+        "ask the user (if present), attempt autonomous fix (if away), or "
+        "escalate for post-mortem."
+    ),
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["event"],
+    output_keys=["resolution"],
+    nullable_output_keys=["resolution"],
+    success_criteria=(
+        "Failure is resolved — either by user guidance, autonomous fix, or documented escalation."
+    ),
+    system_prompt="""\
+You are the Agent Guardian — a watchdog that monitors supervised agent \
+graphs. You fire on failures, stalls, doom loops, and constraint \
+violations. Your job: triage, fix, or escalate.
+
+# Event Types
+
+You trigger on these events:
+
+## execution_failed
+The agent graph crashed — unhandled exception, LLM error, or tool failure.
+- Read the error message and stack trace from the event data.
+- Transient errors (rate limit, timeout, network): auto-retry via restart.
+- Config errors (bad API key, missing tool): needs user input.
+- Logic bugs (bad output, crash in code): read source, fix, reload.
+- Catastrophic (data corruption): escalate, unload the agent.
+
+## node_stalled
+A node has been running too long without producing output. The LLM may \
+be stuck in a reasoning loop, waiting for input that won't come, or \
+the tool call is hanging.
+- Check what node is stalled and how long it's been running.
+- If the node is autonomous: restart the agent to break the stall.
+- If the node is client-facing: check user presence — the user may \
+  have left. Alert them or restart after a timeout.
+- If a tool call is hanging: the MCP server may be down. Restart.
+
+## node_tool_doom_loop
+The LLM is calling the same tools repeatedly without making progress. \
+This usually means the prompt is inadequate, the tool is returning \
+unhelpful errors, or the LLM is stuck in a retry loop.
+- Identify which tool is looping and what errors it's returning.
+- If it's a transient tool error: restart to reset context.
+- If it's a prompt/logic issue: read the node's source, fix the \
+  system prompt or tool configuration, then reload and restart.
+- If the tool itself is broken: unload and escalate.
+
+## constraint_violation
+The agent violated a defined constraint (e.g., token budget exceeded, \
+forbidden action attempted, output format invalid).
+- Read which constraint was violated from the event data.
+- Soft constraints (budget warning): log and notify user.
+- Hard constraints (forbidden action): halt the agent immediately, \
+  escalate to user.
+- Format violations: may be fixable by restarting with better context.
+
+# Decision Protocol
+
+1. **Identify the event type** and read the event data carefully.
+
+2. **Assess severity:**
+   - Transient / auto-recoverable -> auto-retry
+   - Configuration / environment -> needs user input
+   - Logic bug / prompt issue -> needs code fix
+   - Catastrophic / safety -> escalate immediately
+
+3. **Check user presence.** Call get_user_presence().
+   - **present** (idle < 2 min): Ask the user for guidance. Present the \
+     issue clearly and suggest options.
+   - **idle** (2-10 min): Attempt autonomous fix first. If it fails, \
+     queue a notification for when user returns.
+   - **away** (> 10 min) or **never_seen**: Attempt autonomous fix. \
+     Save escalation log via write_file if fix fails.
+
+4. **Act.**
+   - Auto-retry: restart_agent(graph_id), then start_agent.
+   - Config issues: if user present, ask. If away, log and wait.
+   - Code fixes: read source, fix with edit_file, restart_agent.
+   - Escalation: save detailed log, unload the agent.
+
+# Tools
+
+- get_user_presence() -- check if user is active
+- list_agents() -- see loaded graphs and status
+- load_agent(path) -- load an agent graph
+- unload_agent(graph_id) -- remove a graph
+- start_agent(graph_id, entry_point, input_data) -- trigger execution
+- restart_agent(graph_id) -- unload for reload
+- read_file, write_file, edit_file -- inspect/fix agent source code \
+  (available when the agent's MCP server provides them)
+- run_command -- run shell commands (available when provided by MCP)
+
+# Rules
+
+- Be concise. State the event type, your assessment, and your action.
+- If asking the user, present the issue and 2-3 concrete options.
+- After a fix attempt, verify it works before declaring success.
+- For doom loops and stalls, prefer restart first — it's the cheapest fix.
+- set_output("resolution", "...") only after the issue is resolved or \
+  escalated. Use a brief description: "auto-fixed: retry after timeout", \
+  "escalated: missing API key", "user-resolved: updated config", \
+  "auto-fixed: restarted stalled node", "escalated: doom loop in tool X".
+""",
+    # Placeholder — attach_guardian() replaces with filtered list at runtime
+    tools=ALL_GUARDIAN_TOOLS,
+)
+
+
+__all__ = ["coder_node", "guardian_node", "ALL_GUARDIAN_TOOLS"]
@@ -0,0 +1,107 @@
+# Common Mistakes When Building Hive Agents
+
+## Critical Errors
+
+1. **Using tools that don't exist** — Always verify tools are available in the hive-tools MCP server before assigning them to nodes. Never guess tool names.
+
+2. **Wrong entry_points format** — MUST be `{"start": "first-node-id"}`. NOT a set, NOT `{node_id: [keys]}`.
+
+3. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
+
+4. **Missing STEP 1/STEP 2 in client-facing prompts** — Without explicit phases, the LLM calls set_output before the user responds. Always use the pattern.
+
+5. **Forgetting nullable_output_keys** — When a node receives inputs from multiple edges and some inputs only arrive on certain edges (e.g., feedback), mark those as nullable. Without this, the executor blocks waiting for a value that will never arrive.
+
+6. **Creating dead-end nodes in forever-alive graphs** — Every node must have at least one outgoing edge. A node with no outgoing edges ends the execution, breaking the loop.
+
+7. **Setting max_node_visits to a non-zero value in forever-alive agents** — The framework default is `max_node_visits=0` (unbounded). Setting it to any positive value (e.g., 1) means the node stops executing after that many visits, silently breaking the forever-alive loop. Only set `max_node_visits > 0` in one-shot agents with feedback loops that need bounded retries.
+
+7. **Missing module-level exports in `__init__.py`** — The runner loads agents via `importlib.import_module(package_name)`, which imports `__init__.py`. It then reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `pause_nodes`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. If ANY of these are missing from `__init__.py`, they default to `None` or `{}` — causing "must define goal, nodes, edges" errors or "node X is unreachable" validation failures. **ALL module-level variables from agent.py must be re-exported in `__init__.py`.**
+
+## Value Errors
+
+8. **Invalid `conversation_mode` value** — Only two valid values: `"continuous"` (recommended for interactive agents) or omit entirely (for isolated per-node conversations). Values like `"client_facing"`, `"interactive"`, `"adaptive"` do NOT exist and will cause runtime errors.
+
+9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.
+
+10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `discover_mcp_tools()`. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
+
+## Design Errors
+
+11. **Too many thin nodes** — Hard limit: **2-4 nodes** for most agents. Each node boundary serializes outputs to shared memory and loses all in-context information (tool results, intermediate reasoning, conversation history). A node with 0 tools that just does LLM reasoning is NOT a real node — merge it into its predecessor or successor.
+
+**Merge when:**
+- Node has NO tools — pure LLM reasoning belongs in the node that produces or consumes its data
+- Node sets only 1 trivial output (e.g., `set_output("done", "true")`) — collapse into predecessor
+- Multiple consecutive autonomous nodes with same/similar tools — combine into one
+- A "report" or "summary" node that just presents analysis — merge into the client-facing node
+- A "schedule" or "confirm" node that doesn't actually schedule anything — remove entirely
+
+**Keep separate when:**
+- Client-facing vs autonomous — different interaction models require separate nodes
+- Fundamentally different tool sets (e.g., web search vs file I/O)
+- Fan-out parallelism — parallel branches MUST be separate nodes
+
+**Bad example** (7 nodes — WAY too many):
+```
+profile_setup → daily_intake → update_tracker → analyze_progress → generate_plan → schedule_reminders → report
+```
+`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
+
+**Good example** (3 nodes):
+```
+intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back)
+```
+One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved.
+
+12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
+
+13. **Not using continuous conversation mode** — Interactive agents should use `conversation_mode="continuous"`. Without it, each node starts with blank context.
+
+14. **Adding terminal nodes by default** — ALL agents should use `terminal_nodes=[]` (forever-alive) unless the user explicitly requests a one-shot/batch agent. Forever-alive is the standard pattern. Every node must have at least one outgoing edge. Dead-end nodes break the loop.
+
+15. **Calling set_output in same turn as tool calls** — Instruct the LLM to call set_output in a SEPARATE turn from real tool calls.
+
+## File Template Errors
+
+16. **Wrong import paths** — Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`. The PYTHONPATH includes `core/`.
+
+17. **Missing storage path** — Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
+
+18. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
+
+19. **Bare `python` command in mcp_servers.json** — Use `"command": "uv"` with args `["run", "python", ...]`.
+
+## Testing Errors
+
+20. **Using `runner.run()` on forever-alive agents** — `runner.run()` calls `trigger_and_wait()` which blocks until the graph reaches a terminal node. Forever-alive agents have `terminal_nodes=[]`, so **`runner.run()` hangs forever**. This is the #1 cause of stuck test suites.
+
+**For forever-alive agents, write structural tests instead:**
+- Validate graph structure (nodes, edges, entry points)
+- Verify node specs (tools, prompts, client-facing flag)
+- Check goal/constraints/success criteria definitions
+- Test that `AgentRunner.load()` + `_setup()` succeeds (skip if no API key)
+
+**What NOT to do:**
+```python
+# WRONG — hangs forever on forever-alive agents
+result = await runner.run({"topic": "quantum computing"})
+```
+
+**Correct pattern for structure tests:**
+```python
+def test_research_has_web_tools(self):
+    assert "web_search" in research_node.tools
+
+def test_research_routes_back_to_interact(self):
+    edges_to_interact = [e for e in edges if e.source == "research" and e.target == "interact"]
+    assert edges_to_interact
+```
+
+21. **Stale tests after agent restructuring** — When you change an agent's node count or names (e.g., 4 nodes → 2 nodes), the tests MUST be updated too. Tests referencing old node names (e.g., `"review"`, `"report"`) will fail or hang. Always check that test assertions match the current `nodes/__init__.py`.
+
+22. **Running full integration tests without API keys** — Structural tests (validate, import) work without keys. Full integration tests need `ANTHROPIC_API_KEY`. Use `pytest.skip()` in the runner fixture when `_setup()` fails due to missing credentials.
+
+23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
+
+24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
@@ -0,0 +1,597 @@
+# Agent File Templates
+
+Complete code templates for each file in a Hive agent package.
+
+## config.py
+
+```python
+"""Runtime configuration."""
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+def _load_preferred_model() -> str:
+    """Load preferred model from ~/.hive/configuration.json."""
+    config_path = Path.home() / ".hive" / "configuration.json"
+    if config_path.exists():
+        try:
+            with open(config_path) as f:
+                config = json.load(f)
+            llm = config.get("llm", {})
+            if llm.get("provider") and llm.get("model"):
+                return f"{llm['provider']}/{llm['model']}"
+        except Exception:
+            pass
+    return "anthropic/claude-sonnet-4-20250514"
+
+
+@dataclass
+class RuntimeConfig:
+    model: str = field(default_factory=_load_preferred_model)
+    temperature: float = 0.7
+    max_tokens: int = 40000
+    api_key: str | None = None
+    api_base: str | None = None
+
+
+default_config = RuntimeConfig()
+
+
+@dataclass
+class AgentMetadata:
+    name: str = "My Agent Name"
+    version: str = "1.0.0"
+    description: str = "What this agent does."
+    intro_message: str = "Welcome! What would you like me to do?"
+
+
+metadata = AgentMetadata()
+```
+
+## nodes/__init__.py
+
+```python
+"""Node definitions for My Agent."""
+
+from framework.graph import NodeSpec
+
+# Node 1: Intake (client-facing)
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,  # Unlimited for forever-alive
+    input_keys=["topic"],
+    output_keys=["brief"],
+    success_criteria="The brief is specific and actionable.",
+    system_prompt="""\
+You are an intake specialist.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If vague, ask 1-2 clarifying questions
+3. If clear, confirm your understanding
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("brief", "Clear description of what to do")
+""",
+    tools=[],
+)
+
+# Node 2: Worker (autonomous)
+worker_node = NodeSpec(
+    id="worker",
+    name="Worker",
+    description="Do the main work",
+    node_type="event_loop",
+    max_node_visits=0,
+    input_keys=["brief", "feedback"],
+    output_keys=["results"],
+    nullable_output_keys=["feedback"],  # Only on feedback edge
+    success_criteria="Results are complete and accurate.",
+    system_prompt="""\
+You are a worker agent. Given a brief, do the work.
+
+If feedback is provided, this is a follow-up — address the feedback.
+
+Work in phases:
+1. Use tools to gather/process data
+2. Analyze results
+3. Call set_output for each key in a SEPARATE turn:
+   - set_output("results", "structured results")
+""",
+    tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
+)
+
+# Node 3: Review (client-facing)
+review_node = NodeSpec(
+    id="review",
+    name="Review",
+    description="Present results for user approval",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=0,
+    input_keys=["results", "brief"],
+    output_keys=["next_action", "feedback"],
+    nullable_output_keys=["feedback"],
+    success_criteria="User has reviewed and decided next steps.",
+    system_prompt="""\
+Present the results to the user.
+
+**STEP 1 — Present (text only, NO tool calls):**
+1. Summary of work done
+2. Key results
+3. Ask: satisfied, or want changes?
+
+**STEP 2 — After user responds, call set_output:**
+- set_output("next_action", "new_topic")   — if starting fresh
+- set_output("next_action", "revise")      — if changes needed
+- set_output("feedback", "what to change") — only if revising
+""",
+    tools=[],
+)
+
+__all__ = ["intake_node", "worker_node", "review_node"]
+```
+
+## agent.py
+
+```python
+"""Agent graph construction for My Agent."""
+
+from pathlib import Path
+
+from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.graph.checkpoint_config import CheckpointConfig
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.runtime.execution_stream import EntryPointSpec
+
+from .config import default_config, metadata
+from .nodes import intake_node, worker_node, review_node
+
+# Goal definition
+goal = Goal(
+    id="my-agent-goal",
+    name="My Agent Goal",
+    description="What this agent achieves.",
+    success_criteria=[
+        SuccessCriterion(id="sc-1", description="...", metric="...", target="...", weight=0.5),
+        SuccessCriterion(id="sc-2", description="...", metric="...", target="...", weight=0.5),
+    ],
+    constraints=[
+        Constraint(id="c-1", description="...", constraint_type="hard", category="quality"),
+    ],
+)
+
+# Node list
+nodes = [intake_node, worker_node, review_node]
+
+# Edge definitions
+edges = [
+    EdgeSpec(id="intake-to-worker", source="intake", target="worker",
+             condition=EdgeCondition.ON_SUCCESS, priority=1),
+    EdgeSpec(id="worker-to-review", source="worker", target="review",
+             condition=EdgeCondition.ON_SUCCESS, priority=1),
+    # Feedback loop
+    EdgeSpec(id="review-to-worker", source="review", target="worker",
+             condition=EdgeCondition.CONDITIONAL,
+             condition_expr="str(next_action).lower() == 'revise'", priority=2),
+    # Loop back for new topic
+    EdgeSpec(id="review-to-intake", source="review", target="intake",
+             condition=EdgeCondition.CONDITIONAL,
+             condition_expr="str(next_action).lower() == 'new_topic'", priority=1),
+]
+
+# Graph configuration
+entry_node = "intake"
+entry_points = {"start": "intake"}
+pause_nodes = []
+terminal_nodes = []  # Forever-alive
+
+# Module-level vars read by AgentRunner.load()
+conversation_mode = "continuous"
+identity_prompt = "You are a helpful agent."
+loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_history_tokens": 32000}
+
+
+class MyAgent:
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self._graph = None
+        self._agent_runtime = None
+        self._tool_registry = None
+        self._storage_path = None
+
+    def _build_graph(self):
+        return GraphSpec(
+            id="my-agent-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            loop_config=loop_config,
+            conversation_mode=conversation_mode,
+            identity_prompt=identity_prompt,
+        )
+
+    def _setup(self, mock_mode=False):
+        self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
+        self._storage_path.mkdir(parents=True, exist_ok=True)
+        self._tool_registry = ToolRegistry()
+        mcp_config = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config.exists():
+            self._tool_registry.load_mcp_config(mcp_config)
+        llm = None
+        if not mock_mode:
+            llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
+        tools = list(self._tool_registry.get_tools().values())
+        tool_executor = self._tool_registry.get_executor()
+        self._graph = self._build_graph()
+        self._agent_runtime = create_agent_runtime(
+            graph=self._graph, goal=self.goal, storage_path=self._storage_path,
+            entry_points=[EntryPointSpec(id="default", name="Default", entry_node=self.entry_node,
+                                         trigger_type="manual", isolation_level="shared")],
+            llm=llm, tools=tools, tool_executor=tool_executor,
+            checkpoint_config=CheckpointConfig(enabled=True, checkpoint_on_node_complete=True,
+                                                checkpoint_max_age_days=7, async_checkpoint=True),
+        )
+
+    async def start(self, mock_mode=False):
+        if self._agent_runtime is None:
+            self._setup(mock_mode=mock_mode)
+        if not self._agent_runtime.is_running:
+            await self._agent_runtime.start()
+
+    async def stop(self):
+        if self._agent_runtime and self._agent_runtime.is_running:
+            await self._agent_runtime.stop()
+        self._agent_runtime = None
+
+    async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None):
+        if self._agent_runtime is None:
+            raise RuntimeError("Agent not started. Call start() first.")
+        return await self._agent_runtime.trigger_and_wait(
+            entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)
+
+    async def run(self, context, mock_mode=False, session_state=None):
+        await self.start(mock_mode=mock_mode)
+        try:
+            result = await self.trigger_and_wait("default", context, session_state=session_state)
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    def info(self):
+        return {
+            "name": metadata.name, "version": metadata.version, "description": metadata.description,
+            "goal": {"name": self.goal.name, "description": self.goal.description},
+            "nodes": [n.id for n in self.nodes], "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node, "entry_points": self.entry_points,
+            "terminal_nodes": self.terminal_nodes,
+            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
+        }
+
+    def validate(self):
+        errors, warnings = [], []
+        node_ids = {n.id for n in self.nodes}
+        for e in self.edges:
+            if e.source not in node_ids: errors.append(f"Edge {e.id}: source '{e.source}' not found")
+            if e.target not in node_ids: errors.append(f"Edge {e.id}: target '{e.target}' not found")
+        if self.entry_node not in node_ids: errors.append(f"Entry node '{self.entry_node}' not found")
+        for t in self.terminal_nodes:
+            if t not in node_ids: errors.append(f"Terminal node '{t}' not found")
+        for ep_id, nid in self.entry_points.items():
+            if nid not in node_ids: errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
+        return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
+
+
+default_agent = MyAgent()
+```
+
+## agent.py — Async Entry Points Variant
+
+When an agent needs timers, webhooks, or event-driven triggers, add
+`async_entry_points` and optionally `runtime_config` as module-level variables.
+These are IN ADDITION to the standard variables above.
+
+```python
+# Additional imports for async entry points
+from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
+from framework.runtime.agent_runtime import (
+    AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
+)
+
+# ... (goal, nodes, edges, entry_node, entry_points, etc. as above) ...
+
+# Async entry points — event-driven triggers
+async_entry_points = [
+    # Timer with cron: daily at 9am
+    AsyncEntryPointSpec(
+        id="daily-check",
+        name="Daily Check",
+        entry_node="process-node",
+        trigger_type="timer",
+        trigger_config={"cron": "0 9 * * *"},
+        isolation_level="shared",
+        max_concurrent=1,
+    ),
+    # Timer with fixed interval: every 20 minutes
+    AsyncEntryPointSpec(
+        id="scheduled-check",
+        name="Scheduled Check",
+        entry_node="process-node",
+        trigger_type="timer",
+        trigger_config={"interval_minutes": 20, "run_immediately": False},
+        isolation_level="shared",
+        max_concurrent=1,
+    ),
+    # Event: reacts to webhook events
+    AsyncEntryPointSpec(
+        id="webhook-event",
+        name="Webhook Event Handler",
+        entry_node="process-node",
+        trigger_type="event",
+        trigger_config={"event_types": ["webhook_received"]},
+        isolation_level="shared",
+        max_concurrent=10,
+    ),
+]
+
+# Webhook server config (only needed if using webhooks)
+runtime_config = AgentRuntimeConfig(
+    webhook_host="127.0.0.1",
+    webhook_port=8080,
+    webhook_routes=[
+        {
+            "source_id": "my-source",
+            "path": "/webhooks/my-source",
+            "methods": ["POST"],
+        },
+    ],
+)
+```
+
+**Key rules for async entry points:**
+- `async_entry_points` is a list of `AsyncEntryPointSpec` (NOT `EntryPointSpec`)
+- `runtime_config` is `AgentRuntimeConfig` (NOT `RuntimeConfig` from config.py)
+- Valid trigger_types: `timer`, `event`, `webhook`, `manual`, `api`
+- Valid isolation_levels: `isolated`, `shared`, `synchronized`
+- Timer trigger_config (cron): `{"cron": "0 9 * * *"}` — standard 5-field cron expression
+- Timer trigger_config (interval): `{"interval_minutes": float, "run_immediately": bool}`
+- Event trigger_config: `{"event_types": ["webhook_received"], "filter_stream": "...", "filter_node": "..."}`
+- Use `isolation_level="shared"` for async entry points that need to read
+  the primary session's memory (e.g., user-configured rules)
+- The `_build_graph()` method passes `async_entry_points` to GraphSpec
+- Reference: `exports/gmail_inbox_guardian/agent.py`
+
+## __init__.py
+
+**CRITICAL:** The runner imports the package (`__init__.py`) and reads ALL module-level
+variables via `getattr()`. Every variable defined in `agent.py` that the runner needs
+MUST be re-exported here. Missing exports cause silent failures (variables default to
+`None` or `{}`), leading to "must define goal, nodes, edges" errors or graph validation
+failures like "node X is unreachable".
+
+```python
+"""My Agent — description."""
+
+from .agent import (
+    MyAgent,
+    default_agent,
+    goal,
+    nodes,
+    edges,
+    entry_node,
+    entry_points,
+    pause_nodes,
+    terminal_nodes,
+    conversation_mode,
+    identity_prompt,
+    loop_config,
+)
+from .config import default_config, metadata
+
+__all__ = [
+    "MyAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "entry_node",
+    "entry_points",
+    "pause_nodes",
+    "terminal_nodes",
+    "conversation_mode",
+    "identity_prompt",
+    "loop_config",
+    "default_config",
+    "metadata",
+]
+```
+
+**If the agent uses async entry points**, also import and export:
+```python
+from .agent import (
+    ...,
+    async_entry_points,
+    runtime_config,  # Only if using webhooks
+)
+
+__all__ = [
+    ...,
+    "async_entry_points",
+    "runtime_config",
+]
+```
+
+## __main__.py
+
+```python
+"""CLI entry point for My Agent."""
+
+import asyncio, json, logging, sys
+import click
+from .agent import default_agent, MyAgent
+
+
+def setup_logging(verbose=False, debug=False):
+    if debug: level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+    elif verbose: level, fmt = logging.INFO, "%(message)s"
+    else: level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
+    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+
+
+@click.group()
+@click.version_option(version="1.0.0")
+def cli():
+    """My Agent — description."""
+    pass
+
+
+@cli.command()
+@click.option("--topic", "-t", required=True)
+@click.option("--mock", is_flag=True)
+@click.option("--verbose", "-v", is_flag=True)
+def run(topic, mock, verbose):
+    """Execute the agent."""
+    setup_logging(verbose=verbose)
+    result = asyncio.run(default_agent.run({"topic": topic}, mock_mode=mock))
+    click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
+    sys.exit(0 if result.success else 1)
+
+
+@cli.command()
+@click.option("--mock", is_flag=True)
+def tui(mock):
+    """Launch TUI dashboard."""
+    from pathlib import Path
+    from framework.tui.app import AdenTUI
+    from framework.llm import LiteLLMProvider
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import create_agent_runtime
+    from framework.runtime.execution_stream import EntryPointSpec
+
+    async def run_tui():
+        agent = MyAgent()
+        agent._tool_registry = ToolRegistry()
+        storage = Path.home() / ".hive" / "agents" / "my_agent"
+        storage.mkdir(parents=True, exist_ok=True)
+        mcp_cfg = Path(__file__).parent / "mcp_servers.json"
+        if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
+        llm = None if mock else LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
+        runtime = create_agent_runtime(
+            graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
+            entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")],
+            llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
+        await runtime.start()
+        try:
+            app = AdenTUI(runtime)
+            await app.run_async()
+        finally:
+            await runtime.stop()
+    asyncio.run(run_tui())
+
+
+@cli.command()
+def info():
+    """Show agent info."""
+    data = default_agent.info()
+    click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}")
+    click.echo(f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}")
+
+
+@cli.command()
+def validate():
+    """Validate agent structure."""
+    v = default_agent.validate()
+    if v["valid"]: click.echo("Agent is valid")
+    else:
+        click.echo("Errors:")
+        for e in v["errors"]: click.echo(f"  {e}")
+    sys.exit(0 if v["valid"] else 1)
+
+
+if __name__ == "__main__":
+    cli()
+```
+
+## mcp_servers.json
+
+```json
+{
+  "hive-tools": {
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "mcp_server.py", "--stdio"],
+    "cwd": "../../tools",
+    "description": "Hive tools MCP server"
+  }
+}
+```
+
+**CRITICAL FORMAT RULES:**
+- NO `"mcpServers"` wrapper (flat dict, not nested)
+- `cwd` MUST be `"../../tools"` (relative from `exports/AGENT_NAME/` to `tools/`)
+- `command` MUST be `"uv"` with `"args": ["run", "python", ...]` (NOT bare `"python"`)
+
+## tests/conftest.py
+
+```python
+"""Test fixtures."""
+
+import sys
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+_repo_root = Path(__file__).resolve().parents[3]
+for _p in ["exports", "core"]:
+    _path = str(_repo_root / _p)
+    if _path not in sys.path:
+        sys.path.insert(0, _path)
+
+AGENT_PATH = str(Path(__file__).resolve().parents[1])
+
+
+@pytest.fixture(scope="session")
+def mock_mode():
+    return True
+
+
+@pytest_asyncio.fixture(scope="session")
+async def runner(tmp_path_factory, mock_mode):
+    from framework.runner.runner import AgentRunner
+    storage = tmp_path_factory.mktemp("agent_storage")
+    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+    r._setup()
+    yield r
+    await r.cleanup_async()
+```
+
+## entry_points Format
+
+MUST be: `{"start": "first-node-id"}`
+NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
+NOT: `{"first-node-id"}` (WRONG — this is a set)
@@ -0,0 +1,433 @@
+# Hive Agent Framework — Condensed Reference
+
+## Architecture
+
+Agents are Python packages in `exports/`:
+```
+exports/my_agent/
+├── __init__.py          # MUST re-export ALL module-level vars from agent.py
+├── __main__.py          # CLI (run, tui, info, validate, shell)
+├── agent.py             # Graph construction (goal, edges, agent class)
+├── config.py            # Runtime config
+├── nodes/__init__.py    # Node definitions (NodeSpec)
+├── mcp_servers.json     # MCP tool server config
+└── tests/               # pytest tests
+```
+
+## Agent Loading Contract
+
+`AgentRunner.load()` imports the package (`__init__.py`) and reads these
+module-level variables via `getattr()`:
+
+| Variable | Required | Default if missing | Consequence |
+|----------|----------|--------------------|-------------|
+| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
+| `nodes` | YES | `None` | **FATAL** — same error |
+| `edges` | YES | `None` | **FATAL** — same error |
+| `entry_node` | no | `nodes[0].id` | Probably wrong node |
+| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
+| `terminal_nodes` | no | `[]` | OK for forever-alive |
+| `pause_nodes` | no | `[]` | OK |
+| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
+| `identity_prompt` | no | not passed | No agent-level identity |
+| `loop_config` | no | `{}` | No iteration limits |
+| `async_entry_points` | no | `[]` | No async triggers (timers, webhooks, events) |
+| `runtime_config` | no | `None` | No webhook server |
+
+**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
+`agent.py`. Missing exports silently fall back to defaults, causing
+hard-to-debug failures.
+
+**Why `default_agent.validate()` is NOT sufficient:**
+`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
+These are always correct because the constructor references agent.py's module
+vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
+not the class. So `validate()` passes while `AgentRunner.load()` fails.
+Always test with `AgentRunner.load("exports/{name}")` — this is the same
+code path the TUI and `hive run` use.
+
+## Goal
+
+Defines success criteria and constraints:
+```python
+goal = Goal(
+    id="kebab-case-id",
+    name="Display Name",
+    description="What the agent does",
+    success_criteria=[
+        SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
+    ],
+    constraints=[
+        Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
+    ],
+)
+```
+- 3-5 success criteria, weights sum to 1.0
+- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
+
+## NodeSpec Fields
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| id | str | required | kebab-case identifier |
+| name | str | required | Display name |
+| description | str | required | What the node does |
+| node_type | str | required | Always `"event_loop"` |
+| input_keys | list[str] | required | Memory keys this node reads |
+| output_keys | list[str] | required | Memory keys this node writes via set_output |
+| system_prompt | str | "" | LLM instructions |
+| tools | list[str] | [] | Tool names from MCP servers |
+| client_facing | bool | False | If True, streams to user and blocks for input |
+| nullable_output_keys | list[str] | [] | Keys that may remain unset |
+| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
+| max_retries | int | 3 | Retries on failure |
+| success_criteria | str | "" | Natural language for judge evaluation |
+
+## EdgeSpec Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| id | str | kebab-case identifier |
+| source | str | Source node ID |
+| target | str | Target node ID |
+| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
+| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
+| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
+
+## Key Patterns
+
+### STEP 1/STEP 2 (Client-Facing Nodes)
+```
+**STEP 1 — Respond to the user (text only, NO tool calls):**
+[Present information, ask questions]
+
+**STEP 2 — After the user responds, call set_output:**
+- set_output("key", "value based on user response")
+```
+This prevents premature set_output before user interaction.
+
+### Fewer, Richer Nodes (CRITICAL)
+
+**Hard limit: 2-4 nodes for most agents.** Never exceed 5 unless the user
+explicitly requests a complex multi-phase pipeline.
+
+Each node boundary serializes outputs to shared memory and **destroys** all
+in-context information: tool call results, intermediate reasoning, conversation
+history. A research node that searches, fetches, and analyzes in ONE node keeps
+all source material in its conversation context. Split across 3 nodes, each
+downstream node only sees the serialized summary string.
+
+**Decision framework — merge unless ANY of these apply:**
+1. **Client-facing boundary** — Autonomous and client-facing work MUST be
+   separate nodes (different interaction models)
+2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
+   search vs database), separate nodes make sense
+3. **Parallel execution** — Fan-out branches must be separate nodes
+
+**Red flags that you have too many nodes:**
+- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
+- A node that sets only 1 trivial output → collapse into predecessor
+- Multiple consecutive autonomous nodes → combine into one rich node
+- A "report" node that presents analysis → merge into the client-facing node
+- A "confirm" or "schedule" node that doesn't call any external service → remove
+
+**Typical agent structure (3 nodes):**
+```
+intake (client-facing) ←→ process (autonomous) ←→ review (client-facing)
+```
+Or for simpler agents, just 2 nodes:
+```
+interact (client-facing) → process (autonomous) → interact (loop)
+```
+
+### nullable_output_keys
+For inputs that only arrive on certain edges:
+```python
+research_node = NodeSpec(
+    input_keys=["brief", "feedback"],
+    nullable_output_keys=["feedback"],  # Only present on feedback edge
+    max_node_visits=3,
+)
+```
+
+### Mutually Exclusive Outputs
+For routing decisions:
+```python
+review_node = NodeSpec(
+    output_keys=["approved", "feedback"],
+    nullable_output_keys=["approved", "feedback"],  # Node sets one or the other
+)
+```
+
+### Forever-Alive Pattern
+`terminal_nodes=[]` — every node has outgoing edges, graph loops until user exits.
+Use `conversation_mode="continuous"` to preserve context across transitions.
+
+### set_output
+- Synthetic tool injected by framework
+- Call separately from real tool calls (separate turn)
+- `set_output("key", "value")` stores to shared memory
+
+## Edge Conditions
+
+| Condition | When |
+|-----------|------|
+| ON_SUCCESS | Node completed successfully |
+| ON_FAILURE | Node failed |
+| ALWAYS | Unconditional |
+| CONDITIONAL | condition_expr evaluates to True against memory |
+
+condition_expr examples:
+- `"needs_more_research == True"`
+- `"str(next_action).lower() == 'new_agent'"`
+- `"feedback is not None"`
+
+## Graph Lifecycle
+
+| Pattern | terminal_nodes | When |
+|---------|---------------|------|
+| **Forever-alive** | `[]` | **DEFAULT for all agents** |
+| Linear | `["last-node"]` | Only if user explicitly requests one-shot/batch |
+
+**Forever-alive is the default.** Always use `terminal_nodes=[]`.
+The framework default for `max_node_visits` is 0 (unbounded), so
+nodes work correctly in forever-alive loops without explicit override.
+Only set `max_node_visits > 0` in one-shot agents with feedback loops.
+Every node must have at least one outgoing edge — no dead ends. The
+user exits by closing the TUI. Only use terminal nodes if the user
+explicitly asks for a batch/one-shot agent that runs once and exits.
+
+## Continuous Conversation Mode
+
+`conversation_mode` has ONLY two valid states:
+- `"continuous"` — recommended for interactive agents
+- Omit entirely — isolated per-node conversations (each node starts fresh)
+
+**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
+`"adaptive"`, `"shared"`. These do not exist in the framework.
+
+When `conversation_mode="continuous"`:
+- Same conversation thread carries across node transitions
+- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
+- Transition markers inserted at boundaries
+- Compaction happens opportunistically at phase transitions
+
+## loop_config
+
+Only three valid keys:
+```python
+loop_config = {
+    "max_iterations": 100,          # Max LLM turns per node visit
+    "max_tool_calls_per_turn": 20,  # Max tool calls per LLM response
+    "max_history_tokens": 32000,    # Triggers conversation compaction
+}
+```
+**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
+`"temperature"`. These are silently ignored or cause errors.
+
+## Data Tools (Spillover)
+
+For large data that exceeds context:
+- `save_data(filename, data)` — Write to session data dir
+- `load_data(filename, offset, limit)` — Read with pagination
+- `list_data_files()` — List files
+- `serve_file_to_user(filename, label)` — Clickable file:// URI
+
+`data_dir` is auto-injected by framework — LLM never sees it.
+
+## Fan-Out / Fan-In
+
+Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
+- Parallel nodes must have disjoint output_keys
+- Only one branch may have client_facing nodes
+- Fan-in node gets all outputs in shared memory
+
+## Judge System
+
+- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
+- **SchemaJudge**: Validates against Pydantic model
+- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
+
+Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
+
+## Async Entry Points (Webhooks, Timers, Events)
+
+For agents that need to react to external events (incoming emails, scheduled
+tasks, API calls), use `AsyncEntryPointSpec` and optionally `AgentRuntimeConfig`.
+
+### Imports
+```python
+from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
+from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
+```
+Note: `AsyncEntryPointSpec` is in `framework.graph.edge` (the graph/declarative layer).
+`AgentRuntimeConfig` is in `framework.runtime.agent_runtime` (the runtime layer).
+
+### AsyncEntryPointSpec Fields
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| id | str | required | Unique identifier |
+| name | str | required | Human-readable name |
+| entry_node | str | required | Node ID to start execution from |
+| trigger_type | str | `"manual"` | `webhook`, `api`, `timer`, `event`, `manual` |
+| trigger_config | dict | `{}` | Trigger-specific config (see below) |
+| isolation_level | str | `"shared"` | `isolated`, `shared`, `synchronized` |
+| priority | int | `0` | Execution priority (higher = more priority) |
+| max_concurrent | int | `10` | Max concurrent executions |
+
+### Trigger Types
+
+**timer** — Fires on a schedule. Two modes: cron expressions or fixed interval.
+
+Cron (preferred for precise scheduling):
+```python
+AsyncEntryPointSpec(
+    id="daily-digest",
+    name="Daily Digest",
+    entry_node="check-node",
+    trigger_type="timer",
+    trigger_config={"cron": "0 9 * * *"},  # daily at 9am
+    isolation_level="shared",
+    max_concurrent=1,
+)
+```
+- `cron` (str) — standard cron expression (5 fields: min hour dom month dow)
+- Examples: `"0 9 * * *"` (daily 9am), `"0 9 * * MON-FRI"` (weekdays 9am), `"*/30 * * * *"` (every 30 min)
+
+Fixed interval (simpler, for polling-style tasks):
+```python
+AsyncEntryPointSpec(
+    id="scheduled-check",
+    name="Scheduled Check",
+    entry_node="check-node",
+    trigger_type="timer",
+    trigger_config={"interval_minutes": 20, "run_immediately": False},
+    isolation_level="shared",
+    max_concurrent=1,
+)
+```
+- `interval_minutes` (float) — how often to fire
+- `run_immediately` (bool, default False) — fire once on startup
+
+**event** — Subscribes to EventBus (e.g., webhook events):
+```python
+AsyncEntryPointSpec(
+    id="email-event",
+    name="Email Event Handler",
+    entry_node="process-emails",
+    trigger_type="event",
+    trigger_config={"event_types": ["webhook_received"]},
+    isolation_level="shared",
+    max_concurrent=10,
+)
+```
+- `event_types` (list[str]) — EventType values to subscribe to
+- `filter_stream` (str, optional) — only receive from this stream
+- `filter_node` (str, optional) — only receive from this node
+
+**webhook** — HTTP endpoint (requires AgentRuntimeConfig):
+The webhook server publishes `WEBHOOK_RECEIVED` events on the EventBus.
+An `event` trigger type with `event_types: ["webhook_received"]` subscribes
+to those events. The flow is:
+```
+HTTP POST /webhooks/gmail → WebhookServer → EventBus (WEBHOOK_RECEIVED)
+  → event entry point → triggers graph execution from entry_node
+```
+
+**manual** — Triggered programmatically via `runtime.trigger()`.
+
+### Isolation Levels
+
+| Level | Meaning |
+|-------|---------|
+| `isolated` | Private state per execution |
+| `shared` | Eventual consistency — async executions can read primary session memory |
+| `synchronized` | Shared with write locks (use when ordering matters) |
+
+For most async patterns, use `shared` — the async execution reads the primary
+session's memory (e.g., user-configured rules) and runs its own workflow.
+
+### AgentRuntimeConfig (for webhook servers)
+
+```python
+from framework.runtime.agent_runtime import AgentRuntimeConfig
+
+runtime_config = AgentRuntimeConfig(
+    webhook_host="127.0.0.1",
+    webhook_port=8080,
+    webhook_routes=[
+        {
+            "source_id": "gmail",
+            "path": "/webhooks/gmail",
+            "methods": ["POST"],
+            "secret": None,  # Optional HMAC-SHA256 secret
+        },
+    ],
+)
+```
+`runtime_config` is a module-level variable read by `AgentRunner.load()`.
+The runner passes it to `create_agent_runtime()`. On `runtime.start()`,
+if webhook_routes is non-empty, an embedded HTTP server starts.
+
+### Session Sharing
+
+Timer and event triggers automatically call `_get_primary_session_state()`
+before execution. This finds the active user-facing session and provides
+its memory to the async execution, filtered to only the async entry node's
+`input_keys`. This means the async flow can read user-configured values
+(like rules, preferences) without needing separate configuration.
+
+### Module-Level Variables
+
+Agents with async entry points must export two additional variables:
+```python
+# In agent.py:
+async_entry_points = [AsyncEntryPointSpec(...), ...]
+runtime_config = AgentRuntimeConfig(...)  # Only if using webhooks
+```
+
+Both must be re-exported from `__init__.py`:
+```python
+from .agent import (
+    ..., async_entry_points, runtime_config,
+)
+```
+
+### Reference Agent
+
+See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
+- Primary client-facing intake node (user configures rules)
+- Timer-based scheduled inbox checks (every 20 min)
+- Webhook-triggered email event handling
+- Shared isolation for memory access across streams
+
+## Framework Capabilities
+
+**Works well:** Multi-turn conversations, HITL review, tool orchestration, structured outputs, parallel execution, context management, error recovery, session persistence.
+
+**Limitations:** LLM latency (2-10s/turn), context window limits (~128K), cost per run, rate limits, node boundaries lose context.
+
+**Not designed for:** Sub-second responses, millions of items, real-time streaming, guaranteed determinism, offline/air-gapped.
+
+## Tool Discovery
+
+Do NOT rely on a static tool list — it will be outdated. Always use
+`discover_mcp_tools()` to get the current tool catalog from the
+hive-tools MCP server. This returns full schemas including parameter
+names, types, and descriptions.
+
+```
+discover_mcp_tools()                          # default: hive-tools
+discover_mcp_tools("exports/my_agent/mcp_servers.json")  # specific agent
+```
+
+Common tool categories (verify via discover_mcp_tools):
+- **Web**: search, scrape, PDF
+- **Data**: save/load/append/list data files, serve to user
+- **File**: view, write, replace, diff, list, grep
+- **Communication**: email, gmail, slack, telegram
+- **CRM**: hubspot, apollo, calcom
+- **GitHub**: stargazers, user profiles, repos
+- **Vision**: image analysis
+- **Time**: current time
@@ -0,0 +1,31 @@
+"""Test fixtures for Hive Coder agent."""
+
+import sys
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+_repo_root = Path(__file__).resolve().parents[3]
+for _p in ["exports", "core"]:
+    _path = str(_repo_root / _p)
+    if _path not in sys.path:
+        sys.path.insert(0, _path)
+
+AGENT_PATH = str(Path(__file__).resolve().parents[1])
+
+
+@pytest.fixture(scope="session")
+def mock_mode():
+    return True
+
+
+@pytest_asyncio.fixture(scope="session")
+async def runner(tmp_path_factory, mock_mode):
+    from framework.runner.runner import AgentRunner
+
+    storage = tmp_path_factory.mktemp("agent_storage")
+    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+    r._setup()
+    yield r
+    await r.cleanup_async()
@@ -245,20 +245,14 @@ class GraphBuilder:
            warnings.append(f"Node '{node.id}' should have a description")

        # Type-specific validation
-        if node.node_type == "llm_tool_use":
-            if not node.tools:
-                errors.append(f"LLM tool node '{node.id}' must specify tools")
-            if not node.system_prompt:
-                warnings.append(f"LLM node '{node.id}' should have a system_prompt")
+        if node.node_type == "event_loop":
+            if node.tools and not node.system_prompt:
+                warnings.append(f"Event loop node '{node.id}' should have a system_prompt")

        if node.node_type == "router":
            if not node.routes:
                errors.append(f"Router node '{node.id}' must specify routes")

-        if node.node_type == "function":
-            if not node.function:
-                errors.append(f"Function node '{node.id}' must specify function name")
-
        # Check input/output keys
        if not node.input_keys:
            suggestions.append(f"Consider specifying input_keys for '{node.id}'")
@@ -400,9 +394,13 @@ class GraphBuilder:
        if not terminal_candidates and self.session.nodes:
            warnings.append("No terminal nodes found (all nodes have outgoing edges)")

-        # Check reachability
+        # Check reachability from ALL entry candidates (not just the first one).
+        # Agents with async entry points have multiple nodes with no incoming
+        # edges (e.g., a primary entry node and an event-driven entry node).
        if entry_candidates and self.session.nodes:
-            reachable = self._compute_reachable(entry_candidates[0])
+            reachable = set()
+            for candidate in entry_candidates:
+                reachable |= self._compute_reachable(candidate)
            unreachable = [n.id for n in self.session.nodes if n.id not in reachable]
            if unreachable:
                errors.append(f"Unreachable nodes: {unreachable}")
@@ -44,11 +44,25 @@ def _configure_paths():
        if exports_str not in sys.path:
            sys.path.insert(0, exports_str)

+    # Add examples/templates/ to sys.path so template agents are importable
+    templates_dir = project_root / "examples" / "templates"
+    if templates_dir.is_dir():
+        templates_str = str(templates_dir)
+        if templates_str not in sys.path:
+            sys.path.insert(0, templates_str)
+
    # Ensure core/ is also in sys.path (for non-editable-install scenarios)
    core_str = str(project_root / "core")
    if (project_root / "core").is_dir() and core_str not in sys.path:
        sys.path.insert(0, core_str)

+    # Add core/framework/agents/ so framework agents are importable as top-level packages
+    framework_agents_dir = project_root / "core" / "framework" / "agents"
+    if framework_agents_dir.is_dir():
+        fa_str = str(framework_agents_dir)
+        if fa_str not in sys.path:
+            sys.path.insert(0, fa_str)
+

 def main():
    _configure_paths()
@@ -0,0 +1,116 @@
+"""Shared Hive configuration utilities.
+
+Centralises reading of ~/.hive/configuration.json so that the runner
+and every agent template share one implementation instead of copy-pasting
+helper functions.
+"""
+
+import json
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from framework.graph.edge import DEFAULT_MAX_TOKENS
+
+# ---------------------------------------------------------------------------
+# Low-level config file access
+# ---------------------------------------------------------------------------
+
+HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+
+
+def get_hive_config() -> dict[str, Any]:
+    """Load hive configuration from ~/.hive/configuration.json."""
+    if not HIVE_CONFIG_FILE.exists():
+        return {}
+    try:
+        with open(HIVE_CONFIG_FILE, encoding="utf-8-sig") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+# ---------------------------------------------------------------------------
+# Derived helpers
+# ---------------------------------------------------------------------------
+
+
+def get_preferred_model() -> str:
+    """Return the user's preferred LLM model string (e.g. 'anthropic/claude-sonnet-4-20250514')."""
+    llm = get_hive_config().get("llm", {})
+    if llm.get("provider") and llm.get("model"):
+        return f"{llm['provider']}/{llm['model']}"
+    return "anthropic/claude-sonnet-4-20250514"
+
+
+def get_max_tokens() -> int:
+    """Return the configured max_tokens, falling back to DEFAULT_MAX_TOKENS."""
+    return get_hive_config().get("llm", {}).get("max_tokens", DEFAULT_MAX_TOKENS)
+
+
+def get_api_key() -> str | None:
+    """Return the API key, supporting env var, Claude Code subscription, and ZAI Code.
+
+    Priority:
+    1. Claude Code subscription (``use_claude_code_subscription: true``)
+       reads the OAuth token from ``~/.claude/.credentials.json``.
+    2. Environment variable named in ``api_key_env_var``.
+    """
+    llm = get_hive_config().get("llm", {})
+
+    # Claude Code subscription: read OAuth token directly
+    if llm.get("use_claude_code_subscription"):
+        try:
+            from framework.runner.runner import get_claude_code_token
+
+            token = get_claude_code_token()
+            if token:
+                return token
+        except ImportError:
+            pass
+
+    # Standard env-var path (covers ZAI Code and all API-key providers)
+    api_key_env_var = llm.get("api_key_env_var")
+    if api_key_env_var:
+        return os.environ.get(api_key_env_var)
+    return None
+
+
+def get_api_base() -> str | None:
+    """Return the api_base URL for OpenAI-compatible endpoints, if configured."""
+    return get_hive_config().get("llm", {}).get("api_base")
+
+
+def get_llm_extra_kwargs() -> dict[str, Any]:
+    """Return extra kwargs for LiteLLMProvider (e.g. OAuth headers).
+
+    When ``use_claude_code_subscription`` is enabled, returns
+    ``extra_headers`` with the OAuth Bearer token so that litellm's
+    built-in Anthropic OAuth handler adds the required beta headers.
+    """
+    llm = get_hive_config().get("llm", {})
+    if llm.get("use_claude_code_subscription"):
+        api_key = get_api_key()
+        if api_key:
+            return {
+                "extra_headers": {"authorization": f"Bearer {api_key}"},
+            }
+    return {}
+
+
+# ---------------------------------------------------------------------------
+# RuntimeConfig – shared across agent templates
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class RuntimeConfig:
+    """Agent runtime configuration loaded from ~/.hive/configuration.json."""
+
+    model: str = field(default_factory=get_preferred_model)
+    temperature: float = 0.7
+    max_tokens: int = field(default_factory=get_max_tokens)
+    api_key: str | None = field(default_factory=get_api_key)
+    api_base: str | None = field(default_factory=get_api_base)
+    extra_kwargs: dict[str, Any] = field(default_factory=get_llm_extra_kwargs)
@@ -59,6 +59,13 @@ from .provider import (
    CredentialProvider,
    StaticProvider,
 )
+from .setup import (
+    CredentialSetupSession,
+    MissingCredential,
+    SetupResult,
+    detect_missing_credentials_from_nodes,
+    run_credential_setup_cli,
+)
 from .storage import (
    CompositeStorage,
    CredentialStorage,
@@ -68,6 +75,7 @@ from .storage import (
 )
 from .store import CredentialStore
 from .template import TemplateResolver
+from .validation import ensure_credential_key_env, validate_agent_credentials

 # Aden sync components (lazy import to avoid httpx dependency when not needed)
 # Usage: from core.framework.credentials.aden import AdenSyncProvider
@@ -111,6 +119,15 @@ __all__ = [
    "CredentialRefreshError",
    "CredentialValidationError",
    "CredentialDecryptionError",
+    # Validation
+    "ensure_credential_key_env",
+    "validate_agent_credentials",
+    # Interactive setup
+    "CredentialSetupSession",
+    "MissingCredential",
+    "SetupResult",
+    "detect_missing_credentials_from_nodes",
+    "run_credential_setup_cli",
    # Aden sync (optional - requires httpx)
    "AdenSyncProvider",
    "AdenCredentialClient",
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`../../.claude/skills/building-agents-construction`
				`@@ -1 +0,0 @@`
				`../../.claude/skills/building-agents-patterns`