created gmail send tool

docs(CONTRIBUTING): add upstream sync steps (#3477 )
Fixes #2692 Added steps to configure the upstream remote and sync the main branch before creating a feature branch. This helps contributors avoid starting from stale code and reduces merge conflicts.
2026-02-05 13:10:47 -08:00 · 2026-02-05 16:28:07 +08:00 · 2026-02-05 16:11:13 +08:00 · 2026-02-04 21:05:22 -08:00 · 2026-02-04 21:02:50 -08:00 · 2026-02-04 20:59:58 -08:00
174 changed files with 35442 additions and 3090 deletions
@@ -1,46 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(npm install:*)",
-      "Bash(npm test:*)",
-      "Skill(building-agents-construction)",
-      "Skill(building-agents-construction:*)",
-      "Bash(PYTHONPATH=core:exports pytest:*)",
-      "mcp__agent-builder__create_session",
-      "mcp__agent-builder__get_session_status",
-      "mcp__agent-builder__set_goal",
-      "mcp__agent-builder__list_mcp_servers",
-      "mcp__agent-builder__test_node",
-      "mcp__agent-builder__add_node",
-      "mcp__agent-builder__add_edge",
-      "mcp__agent-builder__validate_graph",
-      "Bash(ruff check:*)",
-      "Bash(PYTHONPATH=core:exports python:*)",
-      "mcp__agent-builder__list_tests",
-      "mcp__agent-builder__generate_constraint_tests",
-      "Bash(python -m agent:*)",
-      "Bash(python agent.py:*)",
-      "Bash(python -c:*)",
-      "Bash(done)",
-      "Bash(xargs cat:*)",
-      "mcp__agent-builder__list_mcp_tools",
-      "mcp__agent-builder__add_mcp_server",
-      "Bash(gh issue list:*)",
-      "WebFetch(domain:github.com)",
-      "Bash(pip install:*)",
-      "Bash(python -m pytest:*)",
-      "Bash(git checkout:*)",
-      "Bash(git add:*)",
-      "Bash(git commit -m \"$\\(cat <<''EOF''\nfeat\\(tools\\): Add Excel tool for spreadsheet operations\n\nAdds a new Excel tool for reading and manipulating .xlsx/.xlsm files:\n- excel_read: Read Excel files with pagination and sheet selection\n- excel_write: Create new Excel files with data\n- excel_append: Append rows to existing files\n- excel_info: Get metadata about Excel files \\(sheets, columns, row counts\\)\n- excel_sheet_list: List all sheets in a workbook\n\nIncludes comprehensive test coverage \\(37 tests\\) and documentation.\n\nReferences #2805\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
-      "Bash(git push:*)",
-      "Bash(git pull:*)",
-      "Bash(git stash:*)",
-      "Bash(git merge:*)"
-    ]
-  },
-  "enableAllProjectMcpServers": true,
-  "enabledMcpjsonServers": [
-    "agent-builder",
-    "tools"
-  ]
-}
@@ -46,6 +46,7 @@ Use this meta-skill when:
 "Need to understand agent concepts" → building-agents-core
 "Build a new agent" → building-agents-construction
 "Optimize my agent design" → building-agents-patterns
+"Need client-facing nodes or feedback loops" → building-agents-patterns
 "Set up API keys for my agent" → setup-credentials
 "Test my agent" → testing-agent
 "Not sure what I need" → Read phases below, then decide
@@ -63,12 +64,12 @@ Use this meta-skill when:
 - First time building an agent
 - Need to understand node types, edges, goals
 - Want to validate tool availability
- Learning about pause/resume architecture
+- Learning about event loop architecture and client-facing nodes

 ### What This Phase Provides

 - Architecture overview (Python packages, not JSON)
- Core concepts (Goal, Node, Edge, Pause/Resume)
+- Core concepts (Goal, Node, Edge, Event Loop, Judges)
 - Tool discovery and validation procedures
 - Workflow overview

@@ -106,7 +107,7 @@ Creates the complete agent architecture:
 - ✅ 1-5 constraints defined
 - ✅ 5-10 nodes specified in nodes/__init__.py
 - ✅ 8-15 edges connecting workflow
- ✅ Validated structure (passes `python -m agent_name validate`)
+- ✅ Validated structure (passes `uv run python -m agent_name validate`)
 - ✅ README.md with usage instructions
 - ✅ CLI commands (info, validate, run, shell)

@@ -153,19 +154,20 @@ exports/agent_name/

 ### When to Use

- Want to add pause/resume functionality
+- Want to add client-facing blocking or feedback edges
+- Need judge patterns for output validation
+- Want fan-out/fan-in (parallel execution)
 - Need error handling patterns
- Want to optimize performance
- Need examples of complex routing
 - Want best practices guidance

 ### What This Phase Provides

- Practical examples and patterns
- Pause/resume architecture
- Error handling strategies
+- Client-facing interaction patterns
+- Feedback edge routing with nullable output keys
+- Judge patterns (implicit, SchemaJudge)
+- Fan-out/fan-in parallel execution
+- Context management and spillover patterns
 - Anti-patterns to avoid
- Performance optimization techniques

 **Skip this phase** if your agent design is straightforward.

@@ -291,15 +293,15 @@ User: "Build an agent"
 → Done: Working agent
 ```

-### Pattern 4: Complex Agent with Patterns
+### Pattern 4: Agent with Review Loops and HITL Checkpoints

 ```
-User: "Build an agent with multi-turn conversations"
-→ Use /building-agents-core (learn pause/resume)
-→ Use /building-agents-construction (build structure)
-→ Use /building-agents-patterns (implement pause/resume pattern)
-→ Use /testing-agent (validate conversation flows)
-→ Done: Complex conversational agent
+User: "Build an agent with human review and feedback loops"
+→ Use /building-agents-core (learn event loop, client-facing nodes)
+→ Use /building-agents-construction (build structure with feedback edges)
+→ Use /building-agents-patterns (implement client-facing + feedback patterns)
+→ Use /testing-agent (validate review flows and edge routing)
+→ Done: Agent with HITL checkpoints and review loops
 ```

 ## Skill Dependencies
@@ -308,25 +310,26 @@ User: "Build an agent with multi-turn conversations"
 agent-workflow (meta-skill)
    │
    ├── building-agents-core (foundational)
-    │   ├── Architecture concepts
-    │   ├── Node/Edge/Goal definitions
+    │   ├── Architecture concepts (event loop, judges)
+    │   ├── Node types (event_loop, function)
+    │   ├── Edge routing and priority
    │   ├── Tool discovery procedures
    │   └── Workflow overview
    │
    ├── building-agents-construction (procedural)
    │   ├── Creates package structure
    │   ├── Defines goal
-    │   ├── Adds nodes incrementally
-    │   ├── Connects edges
+    │   ├── Adds nodes (event_loop, function)
+    │   ├── Connects edges with priority routing
    │   ├── Finalizes agent class
    │   └── Requires: building-agents-core
    │
    ├── building-agents-patterns (reference)
-    │   ├── Best practices
-    │   ├── Pause/resume patterns
-    │   ├── Error handling
-    │   ├── Anti-patterns
-    │   └── Performance optimization
+    │   ├── Client-facing interaction patterns
+    │   ├── Feedback edges and review loops
+    │   ├── Judge patterns (implicit, SchemaJudge)
+    │   ├── Fan-out/fan-in parallel execution
+    │   └── Context management and anti-patterns
    │
    └── testing-agent
        ├── Reads agent goal
@@ -342,7 +345,7 @@ agent-workflow (meta-skill)
 - Check node IDs match between nodes/__init__.py and agent.py
 - Verify all edges reference valid node IDs
 - Ensure entry_node exists in nodes list
- Run: `PYTHONPATH=core:exports python -m agent_name validate`
+- Run: `PYTHONPATH=exports uv run python -m agent_name validate`

 ### "Agent has structure but won't run"

@@ -368,7 +371,7 @@ Run these checks:
 ls exports/my_agent/agent.py

 # Check if it validates
-PYTHONPATH=core:exports python -m my_agent validate
+PYTHONPATH=exports uv run python -m my_agent validate

 # Check if tests exist
 ls exports/my_agent/tests/
@@ -439,9 +442,9 @@ The workflow is **flexible** - skip phases as needed, iterate freely, and adapt

 **Choose building-agents-core when:**
 - First time building agents
- Need to understand architecture
+- Need to understand event loop architecture
 - Validating tool availability
- Learning about node types and edges
+- Learning about node types, edges, and judges

 **Choose building-agents-construction when:**
 - Actually building an agent
@@ -451,13 +454,13 @@ The workflow is **flexible** - skip phases as needed, iterate freely, and adapt

 **Choose building-agents-patterns when:**
 - Agent structure complete
- Need advanced patterns
- Implementing pause/resume
- Optimizing performance
+- Need client-facing nodes or feedback edges
+- Implementing review loops or fan-out/fan-in
+- Want judge patterns or context management
 - Want best practices

 **Choose testing-agent when:**
 - Agent structure complete
 - Ready to validate functionality
 - Need comprehensive test coverage
- Debugging agent behavior
+- Testing feedback loops, output keys, or fan-out
@@ -75,10 +75,10 @@ initialize → list → identify → check
 ### Step 5: Finalize

 ```bash
-$ PYTHONPATH=core:exports python -m file_monitor_agent validate
+$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
 ✓ Agent is valid

-$ PYTHONPATH=core:exports python -m file_monitor_agent info
+$ PYTHONPATH=exports uv run python -m file_monitor_agent info
 Agent: File Monitor & Copy Agent
 Nodes: 7
 Edges: 8
@@ -131,7 +131,7 @@ Tests approved incrementally by user.
 ### Step 3: Run Tests

 ```bash
-$ PYTHONPATH=core:exports pytest exports/file_monitor_agent/tests/
+$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/

 test_constraints.py::test_preserves_originals     PASSED
 test_constraints.py::test_handles_errors          PASSED
@@ -162,7 +162,7 @@ test_edge_cases.py::test_large_files              PASSED
 ./RUN_AGENT.sh

 # Or manually
-PYTHONPATH=core:exports:tools/src python -m file_monitor_agent run
+PYTHONPATH=exports uv run python -m file_monitor_agent run
 ```

 **Capabilities:**
@@ -124,11 +124,14 @@ AskUserQuestion(questions=[{
 - node_id (kebab-case)
 - name
 - description
- node_type: `"llm_generate"` (no tools) or `"llm_tool_use"` (uses tools)
+- node_type: `"event_loop"` (recommended for all LLM work) or `"function"` (deterministic, no LLM)
 - input_keys (what data this node receives)
 - output_keys (what data this node produces)
- tools (ONLY tools that exist - empty list for llm_generate)
- system_prompt
+- tools (ONLY tools that exist - empty list if no tools needed)
+- system_prompt (should mention `set_output` for producing structured outputs)
+- client_facing: True if this node interacts with the user
+- nullable_output_keys (for mutually exclusive outputs)
+- max_node_visits (>1 if this node is a feedback loop target)

 **PRESENT the workflow to the user:**

@@ -136,7 +139,7 @@ AskUserQuestion(questions=[{
 >
 > 1. **[node-id]** - [description]
 >
->    - Type: [llm_generate/llm_tool_use]
+>    - Type: event_loop [client-facing] / function
 >    - Input: [keys]
 >    - Output: [keys]
 >    - Tools: [tools or "none"]
@@ -211,8 +214,8 @@ mcp__agent-builder__get_session_status()
 - source (node that outputs)
 - target (node that receives)
 - condition: `"on_success"`, `"always"`, `"on_failure"`, or `"conditional"`
- condition_expr (Python expression, only if conditional)
- priority (integer, lower = higher priority)
+- condition_expr (Python expression using `output.get(...)`, only if conditional)
+- priority (positive = forward edge evaluated first, negative = feedback edge)

 **FOR EACH edge, call:**

@@ -264,7 +267,7 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
 - NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
 - NOT: `{"first-node-id"}` (WRONG - this is a set)

-**Use the example agent** at `.claude/skills/building-agents-construction/examples/online_research_agent/` as a template for file structure and patterns.
+**Use the example agent** at `.claude/skills/building-agents-construction/examples/deep_research_agent/` as a template for file structure and patterns. It demonstrates: STEP 1/STEP 2 prompts, client-facing nodes, feedback loops, nullable_output_keys, and data tools.

 **AFTER writing all files, tell the user:**

@@ -284,8 +287,8 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
 >
 > ```bash
 > cd /home/timothy/oss/hive
-> PYTHONPATH=core:exports python -m AGENT_NAME validate
-> PYTHONPATH=core:exports python -m AGENT_NAME info
+> PYTHONPATH=exports uv run python -m AGENT_NAME validate
+> PYTHONPATH=exports uv run python -m AGENT_NAME info
 > ```

 ---
@@ -295,7 +298,7 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
 **RUN validation:**

 ```bash
-cd /home/timothy/oss/hive && PYTHONPATH=core:exports python -m AGENT_NAME validate
+cd /home/timothy/oss/hive && PYTHONPATH=exports uv run python -m AGENT_NAME validate
 ```

 - If valid: Agent is complete!
@@ -317,39 +320,86 @@ mcp__agent-builder__get_session_status()

 ## REFERENCE: Node Types

-| Type           | tools param            | Use when                                       |
-| -------------- | ---------------------- | ---------------------------------------------- |
-| `llm_generate` | `'[]'`                 | Pure reasoning, JSON output, no external calls |
-| `llm_tool_use` | `'["tool1", "tool2"]'` | Needs to call MCP tools                        |
+| Type | tools param | Use when |
+|------|-------------|----------|
+| `event_loop` | `'["tool1"]'` or `'[]'` | LLM-powered work with or without tools |
+| `function` | N/A | Deterministic Python operations, no LLM |

 ---

-## REFERENCE: Edge Conditions
+## REFERENCE: NodeSpec New Fields

-| Condition     | When edge is followed                 |
-| ------------- | ------------------------------------- |
-| `on_success`  | Source node completed successfully    |
-| `on_failure`  | Source node failed                    |
-| `always`      | Always, regardless of success/failure |
+| Field | Default | Description |
+|-------|---------|-------------|
+| `client_facing` | `False` | Streams output to user, blocks for input between turns |
+| `nullable_output_keys` | `[]` | Output keys that may remain unset (mutually exclusive outputs) |
+| `max_node_visits` | `1` | Max executions per run. Set >1 for feedback loop targets. 0=unlimited |
+
+---
+
+## REFERENCE: Edge Conditions & Priority
+
+| Condition | When edge is followed |
+|-----------|--------------------------------------|
+| `on_success` | Source node completed successfully |
+| `on_failure` | Source node failed |
+| `always` | Always, regardless of success/failure |
 | `conditional` | When condition_expr evaluates to True |

+**Priority:** Positive = forward edge (evaluated first). Negative = feedback edge (loops back to earlier node). Multiple ON_SUCCESS edges from same source = parallel execution (fan-out).
+
 ---

 ## REFERENCE: System Prompt Best Practice

-For nodes with JSON output, include this in the system_prompt:
+For **internal** event_loop nodes (not client-facing), instruct the LLM to use `set_output`:

 ```
-CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
-Just the JSON object starting with { and ending with }.
+Use set_output(key, value) to store your results. For example:
+- set_output("search_results", <your results as a JSON string>)

-Return this exact structure:
-{
-  "key1": "...",
-  "key2": "..."
-}
+Do NOT return raw JSON. Use the set_output tool to produce outputs.
 ```

+For **client-facing** event_loop nodes, use the STEP 1/STEP 2 pattern:
+
+```
+**STEP 1 — Respond to the user (text only, NO tool calls):**
+[Present information, ask questions, etc.]
+
+**STEP 2 — After the user responds, call set_output:**
+- set_output("key", "value based on user's response")
+```
+
+This prevents the LLM from calling `set_output` before the user has had a chance to respond. The "NO tool calls" instruction in STEP 1 ensures the node blocks for user input before proceeding.
+
+---
+
+## EventLoopNode Runtime
+
+EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. Both direct `GraphExecutor` and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically. No manual `node_registry` setup is needed.
+
+```python
+# Direct execution
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+storage_path = Path.home() / ".hive" / "my_agent"
+storage_path.mkdir(parents=True, exist_ok=True)
+runtime = Runtime(storage_path)
+
+executor = GraphExecutor(
+    runtime=runtime,
+    llm=llm,
+    tools=tools,
+    tool_executor=tool_executor,
+    storage_path=storage_path,
+)
+result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
+```
+
+**DO NOT pass `runtime=None` to `GraphExecutor`** — it will crash with `'NoneType' object has no attribute 'start_run'`.
+
 ---

 ## COMMON MISTAKES TO AVOID
@@ -359,3 +409,7 @@ Return this exact structure:
 3. **Skipping validation** - Always validate nodes and graph before proceeding
 4. **Not waiting for approval** - Always ask user before major steps
 5. **Displaying this file** - Execute the steps, don't show documentation
+6. **Too many thin nodes** - Prefer fewer, richer nodes (4 nodes > 8 nodes)
+7. **Missing STEP 1/STEP 2 in client-facing prompts** - Client-facing nodes need explicit phases to prevent premature set_output
+8. **Forgetting nullable_output_keys** - Mark input_keys that only arrive on certain edges (e.g., feedback) as nullable on the receiving node
+9. **Adding framework gating for LLM behavior** - Fix prompts or use judges, not ad-hoc code
@@ -0,0 +1,24 @@
+"""
+Deep Research Agent - Interactive, rigorous research with TUI conversation.
+
+Research any topic through multi-source web search, quality evaluation,
+and synthesis. Features client-facing TUI interaction at key checkpoints
+for user guidance and iterative deepening.
+"""
+
+from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
+from .config import RuntimeConfig, AgentMetadata, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "DeepResearchAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -1,5 +1,5 @@
 """
-CLI entry point for Online Research Agent.
+CLI entry point for Deep Research Agent.

 Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
 """
@@ -10,7 +10,7 @@ import logging
 import sys
 import click

-from .agent import default_agent, OnlineResearchAgent
+from .agent import default_agent, DeepResearchAgent


 def setup_logging(verbose=False, debug=False):
@@ -28,7 +28,7 @@ def setup_logging(verbose=False, debug=False):
@click.group()
@click.version_option(version="1.0.0")
 def cli():
-    """Online Research Agent - Deep-dive research with narrative reports."""
+    """Deep Research Agent - Interactive, rigorous research with TUI conversation."""
    pass


@@ -59,6 +59,83 @@ def run(topic, mock, quiet, verbose, debug):
    sys.exit(0 if result.success else 1)


+@cli.command()
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def tui(mock, verbose, debug):
+    """Launch the TUI dashboard for interactive research."""
+    setup_logging(verbose=verbose, debug=debug)
+
+    try:
+        from framework.tui.app import AdenTUI
+    except ImportError:
+        click.echo("TUI requires the 'textual' package. Install with: pip install textual")
+        sys.exit(1)
+
+    from pathlib import Path
+
+    from framework.llm import LiteLLMProvider
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.agent_runtime import create_agent_runtime
+    from framework.runtime.event_bus import EventBus
+    from framework.runtime.execution_stream import EntryPointSpec
+
+    async def run_with_tui():
+        agent = DeepResearchAgent()
+
+        # Build graph and tools
+        agent._event_bus = EventBus()
+        agent._tool_registry = ToolRegistry()
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            agent._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock:
+            llm = LiteLLMProvider(
+                model=agent.config.model,
+                api_key=agent.config.api_key,
+                api_base=agent.config.api_base,
+            )
+
+        tools = list(agent._tool_registry.get_tools().values())
+        tool_executor = agent._tool_registry.get_executor()
+        graph = agent._build_graph()
+
+        storage_path = Path.home() / ".hive" / "deep_research_agent"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        runtime = create_agent_runtime(
+            graph=graph,
+            goal=agent.goal,
+            storage_path=storage_path,
+            entry_points=[
+                EntryPointSpec(
+                    id="start",
+                    name="Start Research",
+                    entry_node="intake",
+                    trigger_type="manual",
+                    isolation_level="isolated",
+                ),
+            ],
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+        )
+
+        await runtime.start()
+
+        try:
+            app = AdenTUI(runtime)
+            await app.run_async()
+        finally:
+            await runtime.stop()
+
+    asyncio.run(run_with_tui())
+
+
@cli.command()
@click.option("--json", "output_json", is_flag=True)
 def info(output_json):
@@ -71,6 +148,7 @@ def info(output_json):
        click.echo(f"Version: {info_data['version']}")
        click.echo(f"Description: {info_data['description']}")
        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
+        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
        click.echo(f"Entry: {info_data['entry_node']}")
        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")

@@ -81,6 +159,9 @@ def validate():
    validation = default_agent.validate()
    if validation["valid"]:
        click.echo("Agent is valid")
+        if validation["warnings"]:
+            for warning in validation["warnings"]:
+                click.echo(f"  WARNING: {warning}")
    else:
        click.echo("Agent has errors:")
        for error in validation["errors"]:
@@ -91,7 +172,7 @@ def validate():
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
 def shell(verbose):
-    """Interactive research session."""
+    """Interactive research session (CLI, no TUI)."""
    asyncio.run(_interactive_shell(verbose))


@@ -99,10 +180,10 @@ async def _interactive_shell(verbose=False):
    """Async interactive shell."""
    setup_logging(verbose=verbose)

-    click.echo("=== Online Research Agent ===")
+    click.echo("=== Deep Research Agent ===")
    click.echo("Enter a topic to research (or 'quit' to exit):\n")

-    agent = OnlineResearchAgent()
+    agent = DeepResearchAgent()
    await agent.start()

    try:
@@ -118,7 +199,7 @@ async def _interactive_shell(verbose=False):
                if not topic.strip():
                    continue

-                click.echo("\nResearching... (this may take a few minutes)\n")
+                click.echo("\nResearching...\n")

                result = await agent.trigger_and_wait("start", {"topic": topic})

@@ -128,16 +209,14 @@ async def _interactive_shell(verbose=False):

                if result.success:
                    output = result.output
-                    if "file_path" in output:
-                        click.echo(f"\nReport saved to: {output['file_path']}\n")
-                    if "final_report" in output:
-                        click.echo("\n--- Report Preview ---\n")
-                        preview = (
-                            output["final_report"][:500] + "..."
-                            if len(output.get("final_report", "")) > 500
-                            else output.get("final_report", "")
-                        )
-                        click.echo(preview)
+                    if "report_content" in output:
+                        click.echo("\n--- Report ---\n")
+                        click.echo(output["report_content"])
+                        click.echo("\n")
+                    if "references" in output:
+                        click.echo("--- References ---\n")
+                        for ref in output.get("references", []):
+                            click.echo(f"  [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}")
                        click.echo("\n")
                else:
                    click.echo(f"\nResearch failed: {result.error}\n")
@@ -148,7 +227,6 @@ async def _interactive_shell(verbose=False):
            except Exception as e:
                click.echo(f"Error: {e}", err=True)
                import traceback
-
                traceback.print_exc()
    finally:
        await agent.stop()
@@ -0,0 +1,305 @@
+"""Agent graph construction for Deep Research Agent."""
+
+from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult, GraphExecutor
+from framework.runtime.event_bus import EventBus
+from framework.runtime.core import Runtime
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+
+from .config import default_config, metadata
+from .nodes import (
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+)
+
+# Goal definition
+goal = Goal(
+    id="rigorous-interactive-research",
+    name="Rigorous Interactive Research",
+    description=(
+        "Research any topic by searching diverse sources, analyzing findings, "
+        "and producing a cited report — with user checkpoints to guide direction."
+    ),
+    success_criteria=[
+        SuccessCriterion(
+            id="source-diversity",
+            description="Use multiple diverse, authoritative sources",
+            metric="source_count",
+            target=">=5",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="citation-coverage",
+            description="Every factual claim in the report cites its source",
+            metric="citation_coverage",
+            target="100%",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="user-satisfaction",
+            description="User reviews findings before report generation",
+            metric="user_approval",
+            target="true",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="report-completeness",
+            description="Final report answers the original research questions",
+            metric="question_coverage",
+            target="90%",
+            weight=0.25,
+        ),
+    ],
+    constraints=[
+        Constraint(
+            id="no-hallucination",
+            description="Only include information found in fetched sources",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="source-attribution",
+            description="Every claim must cite its source with a numbered reference",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="user-checkpoint",
+            description="Present findings to the user before writing the final report",
+            constraint_type="functional",
+            category="interaction",
+        ),
+    ],
+)
+
+# Node list
+nodes = [
+    intake_node,
+    research_node,
+    review_node,
+    report_node,
+]
+
+# Edge definitions
+edges = [
+    # intake -> research
+    EdgeSpec(
+        id="intake-to-research",
+        source="intake",
+        target="research",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # research -> review
+    EdgeSpec(
+        id="research-to-review",
+        source="research",
+        target="review",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    # review -> research (feedback loop)
+    EdgeSpec(
+        id="review-to-research-feedback",
+        source="review",
+        target="research",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == True",
+        priority=1,
+    ),
+    # review -> report (user satisfied)
+    EdgeSpec(
+        id="review-to-report",
+        source="review",
+        target="report",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_more_research == False",
+        priority=2,
+    ),
+]
+
+# Graph configuration
+entry_node = "intake"
+entry_points = {"start": "intake"}
+pause_nodes = []
+terminal_nodes = ["report"]
+
+
+class DeepResearchAgent:
+    """
+    Deep Research Agent — 4-node pipeline with user checkpoints.
+
+    Flow: intake -> research -> review -> report
+                      ^           |
+                      +-- feedback loop (if user wants more)
+    """
+
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self._executor: GraphExecutor | None = None
+        self._graph: GraphSpec | None = None
+        self._event_bus: EventBus | None = None
+        self._tool_registry: ToolRegistry | None = None
+
+    def _build_graph(self) -> GraphSpec:
+        """Build the GraphSpec."""
+        return GraphSpec(
+            id="deep-research-agent-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+        )
+
+    def _setup(self, mock_mode=False) -> GraphExecutor:
+        """Set up the executor with all components."""
+        from pathlib import Path
+
+        storage_path = Path.home() / ".hive" / "deep_research_agent"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        self._event_bus = EventBus()
+        self._tool_registry = ToolRegistry()
+
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
+        if mcp_config_path.exists():
+            self._tool_registry.load_mcp_config(mcp_config_path)
+
+        llm = None
+        if not mock_mode:
+            llm = LiteLLMProvider(
+                model=self.config.model,
+                api_key=self.config.api_key,
+                api_base=self.config.api_base,
+            )
+
+        tool_executor = self._tool_registry.get_executor()
+        tools = list(self._tool_registry.get_tools().values())
+
+        self._graph = self._build_graph()
+        runtime = Runtime(storage_path)
+
+        self._executor = GraphExecutor(
+            runtime=runtime,
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+            event_bus=self._event_bus,
+            storage_path=storage_path,
+        )
+
+        return self._executor
+
+    async def start(self, mock_mode=False) -> None:
+        """Set up the agent (initialize executor and tools)."""
+        if self._executor is None:
+            self._setup(mock_mode=mock_mode)
+
+    async def stop(self) -> None:
+        """Clean up resources."""
+        self._executor = None
+        self._event_bus = None
+
+    async def trigger_and_wait(
+        self,
+        entry_point: str,
+        input_data: dict,
+        timeout: float | None = None,
+        session_state: dict | None = None,
+    ) -> ExecutionResult | None:
+        """Execute the graph and wait for completion."""
+        if self._executor is None:
+            raise RuntimeError("Agent not started. Call start() first.")
+        if self._graph is None:
+            raise RuntimeError("Graph not built. Call start() first.")
+
+        return await self._executor.execute(
+            graph=self._graph,
+            goal=self.goal,
+            input_data=input_data,
+            session_state=session_state,
+        )
+
+    async def run(
+        self, context: dict, mock_mode=False, session_state=None
+    ) -> ExecutionResult:
+        """Run the agent (convenience method for single execution)."""
+        await self.start(mock_mode=mock_mode)
+        try:
+            result = await self.trigger_and_wait(
+                "start", context, session_state=session_state
+            )
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    def info(self):
+        """Get agent information."""
+        return {
+            "name": metadata.name,
+            "version": metadata.version,
+            "description": metadata.description,
+            "goal": {
+                "name": self.goal.name,
+                "description": self.goal.description,
+            },
+            "nodes": [n.id for n in self.nodes],
+            "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node,
+            "entry_points": self.entry_points,
+            "pause_nodes": self.pause_nodes,
+            "terminal_nodes": self.terminal_nodes,
+            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
+        }
+
+    def validate(self):
+        """Validate agent structure."""
+        errors = []
+        warnings = []
+
+        node_ids = {node.id for node in self.nodes}
+        for edge in self.edges:
+            if edge.source not in node_ids:
+                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
+            if edge.target not in node_ids:
+                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
+
+        if self.entry_node not in node_ids:
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        for terminal in self.terminal_nodes:
+            if terminal not in node_ids:
+                errors.append(f"Terminal node '{terminal}' not found")
+
+        for ep_id, node_id in self.entry_points.items():
+            if node_id not in node_ids:
+                errors.append(
+                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
+                )
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+        }
+
+
+# Create default instance
+default_agent = DeepResearchAgent()
@@ -32,12 +32,15 @@ class RuntimeConfig:
 default_config = RuntimeConfig()


-# Agent metadata
@dataclass
 class AgentMetadata:
-    name: str = "Online Research Agent"
+    name: str = "Deep Research Agent"
    version: str = "1.0.0"
-    description: str = "Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations."
+    description: str = (
+        "Interactive research agent that rigorously investigates topics through "
+        "multi-source search, quality evaluation, and synthesis - with TUI conversation "
+        "at key checkpoints for user guidance and feedback."
+    )


 metadata = AgentMetadata()
@@ -0,0 +1,147 @@
+"""Node definitions for Deep Research Agent."""
+
+from framework.graph import NodeSpec
+
+# Node 1: Intake (client-facing)
+# Brief conversation to clarify what the user wants researched.
+intake_node = NodeSpec(
+    id="intake",
+    name="Research Intake",
+    description="Discuss the research topic with the user, clarify scope, and confirm direction",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    system_prompt="""\
+You are a research intake specialist. The user wants to research a topic.
+Have a brief conversation to clarify what they need.
+
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
+3. If it's already clear, confirm your understanding and ask the user to confirm
+
+Keep it short. Don't over-ask.
+
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "A clear paragraph describing exactly what to research, \
+what questions to answer, what scope to cover, and how deep to go.")
+""",
+    tools=[],
+)
+
+# Node 2: Research
+# The workhorse — searches the web, fetches content, analyzes sources.
+# One node with both tools avoids the context-passing overhead of 5 separate nodes.
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search the web, fetch source content, and compile findings",
+    node_type="event_loop",
+    max_node_visits=3,
+    input_keys=["research_brief", "feedback"],
+    output_keys=["findings", "sources", "gaps"],
+    nullable_output_keys=["feedback"],
+    system_prompt="""\
+You are a research agent. Given a research brief, find and analyze sources.
+
+If feedback is provided, this is a follow-up round — focus on the gaps identified.
+
+Work in phases:
+1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
+   Prioritize authoritative sources (.edu, .gov, established publications).
+2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
+   Skip URLs that fail. Extract the substantive content.
+3. **Analyze**: Review what you've collected. Identify key findings, themes,
+   and any contradictions between sources.
+
+Important:
+- Work in batches of 3-4 tool calls at a time to manage context
+- After each batch, assess whether you have enough material
+- Prefer quality over quantity — 5 good sources beat 15 thin ones
+- Track which URL each finding comes from (you'll need citations later)
+
+When done, use set_output:
+- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
+Include themes, contradictions, and confidence levels.")
+- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
+- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
+""",
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+
+# Node 3: Review (client-facing)
+# Shows the user what was found and asks whether to dig deeper or proceed.
+review_node = NodeSpec(
+    id="review",
+    name="Review Findings",
+    description="Present findings to user and decide whether to research more or write the report",
+    node_type="event_loop",
+    client_facing=True,
+    max_node_visits=3,
+    input_keys=["findings", "sources", "gaps", "research_brief"],
+    output_keys=["needs_more_research", "feedback"],
+    system_prompt="""\
+Present the research findings to the user clearly and concisely.
+
+**STEP 1 — Present (your first message, text only, NO tool calls):**
+1. **Summary** (2-3 sentences of what was found)
+2. **Key Findings** (bulleted, with confidence levels)
+3. **Sources Used** (count and quality assessment)
+4. **Gaps** (what's still unclear or under-covered)
+
+End by asking: Are they satisfied, or do they want deeper research? \
+Should we proceed to writing the final report?
+
+**STEP 2 — After the user responds, call set_output:**
+- set_output("needs_more_research", "true")  — if they want more
+- set_output("needs_more_research", "false") — if they're satisfied
+- set_output("feedback", "What the user wants explored further, or empty string")
+""",
+    tools=[],
+)
+
+# Node 4: Report (client-facing)
+# Writes the final report and presents it to the user.
+report_node = NodeSpec(
+    id="report",
+    name="Write & Deliver Report",
+    description="Write a cited report from the findings and present it to the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=["findings", "sources", "research_brief"],
+    output_keys=["delivery_status"],
+    system_prompt="""\
+Write a comprehensive research report and present it to the user.
+
+**STEP 1 — Write and present the report (text only, NO tool calls):**
+
+Report structure:
+1. **Executive Summary** (2-3 paragraphs)
+2. **Findings** (organized by theme, with [n] citations)
+3. **Analysis** (synthesis, implications, areas of debate)
+4. **Conclusion** (key takeaways, confidence assessment)
+5. **References** (numbered list of sources cited)
+
+Requirements:
+- Every factual claim must cite its source with [n] notation
+- Be objective — present multiple viewpoints where sources disagree
+- Distinguish well-supported conclusions from speculation
+- Answer the original research questions from the brief
+
+End by asking the user if they have questions or want to save the report.
+
+**STEP 2 — After the user responds:**
+- Answer follow-up questions from the research material
+- If they want to save, use write_to_file tool
+- When the user is satisfied: set_output("delivery_status", "completed")
+""",
+    tools=["write_to_file"],
+)
+
+__all__ = [
+    "intake_node",
+    "research_node",
+    "review_node",
+    "report_node",
+]
@@ -1,80 +0,0 @@
-# Online Research Agent
-
-Deep-dive research agent that searches 10+ sources and produces comprehensive narrative reports with citations.
-
-## Features
-
- Generates multiple search queries from a topic
- Searches and fetches 15+ web sources
- Evaluates and ranks sources by relevance
- Synthesizes findings into themes
- Writes narrative report with numbered citations
- Quality checks for uncited claims
- Saves report to local markdown file
-
-## Usage
-
-### CLI
-
-```bash
-# Show agent info
-python -m online_research_agent info
-
-# Validate structure
-python -m online_research_agent validate
-
-# Run research on a topic
-python -m online_research_agent run --topic "impact of AI on healthcare"
-
-# Interactive shell
-python -m online_research_agent shell
-```
-
-### Python API
-
-```python
-from online_research_agent import default_agent
-
-# Simple usage
-result = await default_agent.run({"topic": "climate change solutions"})
-
-# Check output
-if result.success:
-    print(f"Report saved to: {result.output['file_path']}")
-    print(result.output['final_report'])
-```
-
-## Workflow
-
-```
-parse-query → search-sources → fetch-content → evaluate-sources
-                                                      ↓
-                                write-report ← synthesize-findings
-                                      ↓
-                               quality-check → save-report
-```
-
-## Output
-
-Reports are saved to `./research_reports/` as markdown files with:
-
-1. Executive Summary
-2. Introduction
-3. Key Findings (by theme)
-4. Analysis
-5. Conclusion
-6. References
-
-## Requirements
-
- Python 3.11+
- LLM provider API key (Groq, Cerebras, etc.)
- Internet access for web search/fetch
-
-## Configuration
-
-Edit `config.py` to change:
-
- `model`: LLM model (default: groq/moonshotai/kimi-k2-instruct-0905)
- `temperature`: Generation temperature (default: 0.7)
- `max_tokens`: Max tokens per response (default: 16384)
@@ -1,23 +0,0 @@
-"""
-Online Research Agent - Deep-dive research with narrative reports.
-
-Research any topic by searching multiple sources, synthesizing information,
-and producing a well-structured narrative report with citations.
-"""
-
-from .agent import OnlineResearchAgent, default_agent, goal, nodes, edges
-from .config import RuntimeConfig, AgentMetadata, default_config, metadata
-
-__version__ = "1.0.0"
-
-__all__ = [
-    "OnlineResearchAgent",
-    "default_agent",
-    "goal",
-    "nodes",
-    "edges",
-    "RuntimeConfig",
-    "AgentMetadata",
-    "default_config",
-    "metadata",
-]
@@ -1,429 +0,0 @@
-"""Agent graph construction for Online Research Agent."""
-
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-
-from .config import default_config, metadata
-from .nodes import (
-    parse_query_node,
-    search_sources_node,
-    fetch_content_node,
-    evaluate_sources_node,
-    synthesize_findings_node,
-    write_report_node,
-    quality_check_node,
-    save_report_node,
-)
-
-# Goal definition
-goal = Goal(
-    id="comprehensive-online-research",
-    name="Comprehensive Online Research",
-    description="Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations.",
-    success_criteria=[
-        SuccessCriterion(
-            id="source-coverage",
-            description="Query 10+ diverse sources",
-            metric="source_count",
-            target=">=10",
-            weight=0.20,
-        ),
-        SuccessCriterion(
-            id="relevance",
-            description="All sources directly address the query",
-            metric="relevance_score",
-            target="90%",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="synthesis",
-            description="Synthesize findings into coherent narrative",
-            metric="coherence_score",
-            target="85%",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="citations",
-            description="Include citations for all claims",
-            metric="citation_coverage",
-            target="100%",
-            weight=0.15,
-        ),
-        SuccessCriterion(
-            id="actionable",
-            description="Report answers the user's question",
-            metric="answer_completeness",
-            target="90%",
-            weight=0.15,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="no-hallucination",
-            description="Only include information found in sources",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="source-attribution",
-            description="Every factual claim must cite its source",
-            constraint_type="quality",
-            category="accuracy",
-        ),
-        Constraint(
-            id="recency-preference",
-            description="Prefer recent sources when relevant",
-            constraint_type="quality",
-            category="relevance",
-        ),
-        Constraint(
-            id="no-paywalled",
-            description="Avoid sources that require payment to access",
-            constraint_type="functional",
-            category="accessibility",
-        ),
-    ],
-)
-
-# Node list
-nodes = [
-    parse_query_node,
-    search_sources_node,
-    fetch_content_node,
-    evaluate_sources_node,
-    synthesize_findings_node,
-    write_report_node,
-    quality_check_node,
-    save_report_node,
-]
-
-# Edge definitions
-edges = [
-    EdgeSpec(
-        id="parse-to-search",
-        source="parse-query",
-        target="search-sources",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="search-to-fetch",
-        source="search-sources",
-        target="fetch-content",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="fetch-to-evaluate",
-        source="fetch-content",
-        target="evaluate-sources",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="evaluate-to-synthesize",
-        source="evaluate-sources",
-        target="synthesize-findings",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="synthesize-to-write",
-        source="synthesize-findings",
-        target="write-report",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="write-to-quality",
-        source="write-report",
-        target="quality-check",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-    EdgeSpec(
-        id="quality-to-save",
-        source="quality-check",
-        target="save-report",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),
-]
-
-# Graph configuration
-entry_node = "parse-query"
-entry_points = {"start": "parse-query"}
-pause_nodes = []
-terminal_nodes = ["save-report"]
-
-
-class OnlineResearchAgent:
-    """
-    Online Research Agent - Deep-dive research with narrative reports.
-
-    Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
-    """
-
-    def __init__(self, config=None):
-        self.config = config or default_config
-        self.goal = goal
-        self.nodes = nodes
-        self.edges = edges
-        self.entry_node = entry_node
-        self.entry_points = entry_points
-        self.pause_nodes = pause_nodes
-        self.terminal_nodes = terminal_nodes
-        self._runtime: AgentRuntime | None = None
-        self._graph: GraphSpec | None = None
-
-    def _build_entry_point_specs(self) -> list[EntryPointSpec]:
-        """Convert entry_points dict to EntryPointSpec list."""
-        specs = []
-        for ep_id, node_id in self.entry_points.items():
-            if ep_id == "start":
-                trigger_type = "manual"
-                name = "Start"
-            elif "_resume" in ep_id:
-                trigger_type = "resume"
-                name = f"Resume from {ep_id.replace('_resume', '')}"
-            else:
-                trigger_type = "manual"
-                name = ep_id.replace("-", " ").title()
-
-            specs.append(
-                EntryPointSpec(
-                    id=ep_id,
-                    name=name,
-                    entry_node=node_id,
-                    trigger_type=trigger_type,
-                    isolation_level="shared",
-                )
-            )
-        return specs
-
-    def _create_runtime(self, mock_mode=False) -> AgentRuntime:
-        """Create AgentRuntime instance."""
-        import json
-        from pathlib import Path
-
-        # Persistent storage in ~/.hive for telemetry and run history
-        storage_path = Path.home() / ".hive" / "online_research_agent"
-        storage_path.mkdir(parents=True, exist_ok=True)
-
-        tool_registry = ToolRegistry()
-
-        # Load MCP servers (always load, needed for tool validation)
-        agent_dir = Path(__file__).parent
-        mcp_config_path = agent_dir / "mcp_servers.json"
-
-        if mcp_config_path.exists():
-            with open(mcp_config_path) as f:
-                mcp_servers = json.load(f)
-
-            for server_config in mcp_servers.get("servers", []):
-                # Resolve relative cwd paths
-                cwd = server_config.get("cwd")
-                if cwd and not Path(cwd).is_absolute():
-                    server_config["cwd"] = str(agent_dir / cwd)
-                tool_registry.register_mcp_server(server_config)
-
-        llm = None
-        if not mock_mode:
-            # LiteLLMProvider uses environment variables for API keys
-            llm = LiteLLMProvider(
-                model=self.config.model,
-                api_key=self.config.api_key,
-                api_base=self.config.api_base,
-            )
-
-        self._graph = GraphSpec(
-            id="online-research-agent-graph",
-            goal_id=self.goal.id,
-            version="1.0.0",
-            entry_node=self.entry_node,
-            entry_points=self.entry_points,
-            terminal_nodes=self.terminal_nodes,
-            pause_nodes=self.pause_nodes,
-            nodes=self.nodes,
-            edges=self.edges,
-            default_model=self.config.model,
-            max_tokens=self.config.max_tokens,
-        )
-
-        # Create AgentRuntime with all entry points
-        self._runtime = create_agent_runtime(
-            graph=self._graph,
-            goal=self.goal,
-            storage_path=storage_path,
-            entry_points=self._build_entry_point_specs(),
-            llm=llm,
-            tools=list(tool_registry.get_tools().values()),
-            tool_executor=tool_registry.get_executor(),
-        )
-
-        return self._runtime
-
-    async def start(self, mock_mode=False) -> None:
-        """Start the agent runtime."""
-        if self._runtime is None:
-            self._create_runtime(mock_mode=mock_mode)
-        await self._runtime.start()
-
-    async def stop(self) -> None:
-        """Stop the agent runtime."""
-        if self._runtime is not None:
-            await self._runtime.stop()
-
-    async def trigger(
-        self,
-        entry_point: str,
-        input_data: dict,
-        correlation_id: str | None = None,
-        session_state: dict | None = None,
-    ) -> str:
-        """
-        Trigger execution at a specific entry point (non-blocking).
-
-        Args:
-            entry_point: Entry point ID (e.g., "start", "pause-node_resume")
-            input_data: Input data for the execution
-            correlation_id: Optional ID to correlate related executions
-            session_state: Optional session state to resume from (with paused_at, memory)
-
-        Returns:
-            Execution ID for tracking
-        """
-        if self._runtime is None or not self._runtime.is_running:
-            raise RuntimeError("Agent runtime not started. Call start() first.")
-        return await self._runtime.trigger(
-            entry_point, input_data, correlation_id, session_state=session_state
-        )
-
-    async def trigger_and_wait(
-        self,
-        entry_point: str,
-        input_data: dict,
-        timeout: float | None = None,
-        session_state: dict | None = None,
-    ) -> ExecutionResult | None:
-        """
-        Trigger execution and wait for completion.
-
-        Args:
-            entry_point: Entry point ID
-            input_data: Input data for the execution
-            timeout: Maximum time to wait (seconds)
-            session_state: Optional session state to resume from (with paused_at, memory)
-
-        Returns:
-            ExecutionResult or None if timeout
-        """
-        if self._runtime is None or not self._runtime.is_running:
-            raise RuntimeError("Agent runtime not started. Call start() first.")
-        return await self._runtime.trigger_and_wait(
-            entry_point, input_data, timeout, session_state=session_state
-        )
-
-    async def run(
-        self, context: dict, mock_mode=False, session_state=None
-    ) -> ExecutionResult:
-        """
-        Run the agent (convenience method for simple single execution).
-
-        For more control, use start() + trigger_and_wait() + stop().
-        """
-        await self.start(mock_mode=mock_mode)
-        try:
-            # Determine entry point based on session_state
-            if session_state and "paused_at" in session_state:
-                paused_node = session_state["paused_at"]
-                resume_key = f"{paused_node}_resume"
-                if resume_key in self.entry_points:
-                    entry_point = resume_key
-                else:
-                    entry_point = "start"
-            else:
-                entry_point = "start"
-
-            result = await self.trigger_and_wait(
-                entry_point, context, session_state=session_state
-            )
-            return result or ExecutionResult(success=False, error="Execution timeout")
-        finally:
-            await self.stop()
-
-    async def get_goal_progress(self) -> dict:
-        """Get goal progress across all executions."""
-        if self._runtime is None:
-            raise RuntimeError("Agent runtime not started")
-        return await self._runtime.get_goal_progress()
-
-    def get_stats(self) -> dict:
-        """Get runtime statistics."""
-        if self._runtime is None:
-            return {"running": False}
-        return self._runtime.get_stats()
-
-    def info(self):
-        """Get agent information."""
-        return {
-            "name": metadata.name,
-            "version": metadata.version,
-            "description": metadata.description,
-            "goal": {
-                "name": self.goal.name,
-                "description": self.goal.description,
-            },
-            "nodes": [n.id for n in self.nodes],
-            "edges": [e.id for e in self.edges],
-            "entry_node": self.entry_node,
-            "entry_points": self.entry_points,
-            "pause_nodes": self.pause_nodes,
-            "terminal_nodes": self.terminal_nodes,
-            "multi_entrypoint": True,
-        }
-
-    def validate(self):
-        """Validate agent structure."""
-        errors = []
-        warnings = []
-
-        node_ids = {node.id for node in self.nodes}
-        for edge in self.edges:
-            if edge.source not in node_ids:
-                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
-            if edge.target not in node_ids:
-                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
-
-        if self.entry_node not in node_ids:
-            errors.append(f"Entry node '{self.entry_node}' not found")
-
-        for terminal in self.terminal_nodes:
-            if terminal not in node_ids:
-                errors.append(f"Terminal node '{terminal}' not found")
-
-        for pause in self.pause_nodes:
-            if pause not in node_ids:
-                errors.append(f"Pause node '{pause}' not found")
-
-        # Validate entry points
-        for ep_id, node_id in self.entry_points.items():
-            if node_id not in node_ids:
-                errors.append(
-                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
-                )
-
-        return {
-            "valid": len(errors) == 0,
-            "errors": errors,
-            "warnings": warnings,
-        }
-
-
-# Create default instance
-default_agent = OnlineResearchAgent()
@@ -1,396 +0,0 @@
-"""Node definitions for Online Research Agent."""
-
-from framework.graph import NodeSpec
-
-# Node 1: Parse Query
-parse_query_node = NodeSpec(
-    id="parse-query",
-    name="Parse Query",
-    description="Analyze the research topic and generate 3-5 diverse search queries to cover different aspects",
-    node_type="llm_generate",
-    input_keys=["topic"],
-    output_keys=["search_queries", "research_focus", "key_aspects"],
-    output_schema={
-        "research_focus": {
-            "type": "string",
-            "required": True,
-            "description": "Brief statement of what we're researching",
-        },
-        "key_aspects": {
-            "type": "array",
-            "required": True,
-            "description": "List of 3-5 key aspects to investigate",
-        },
-        "search_queries": {
-            "type": "array",
-            "required": True,
-            "description": "List of 3-5 search queries",
-        },
-    },
-    system_prompt="""\
-You are a research query strategist. Given a research topic, analyze it and generate search queries.
-
-Your task:
-1. Understand the core research question
-2. Identify 3-5 key aspects to investigate
-3. Generate 3-5 diverse search queries that will find comprehensive information
-
-CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
-
-Return this JSON structure:
-{
-  "research_focus": "Brief statement of what we're researching",
-  "key_aspects": ["aspect1", "aspect2", "aspect3"],
-  "search_queries": [
-    "query 1 - broad overview",
-    "query 2 - specific angle",
-    "query 3 - recent developments",
-    "query 4 - expert opinions",
-    "query 5 - data/statistics"
-  ]
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 2: Search Sources
-search_sources_node = NodeSpec(
-    id="search-sources",
-    name="Search Sources",
-    description="Execute web searches using the generated queries to find 15+ source URLs",
-    node_type="llm_tool_use",
-    input_keys=["search_queries", "research_focus"],
-    output_keys=["source_urls", "search_results_summary"],
-    output_schema={
-        "source_urls": {
-            "type": "array",
-            "required": True,
-            "description": "List of source URLs found",
-        },
-        "search_results_summary": {
-            "type": "string",
-            "required": True,
-            "description": "Brief summary of what was found",
-        },
-    },
-    system_prompt="""\
-You are a research assistant executing web searches. Use the web_search tool to find sources.
-
-Your task:
-1. Execute each search query using web_search tool
-2. Collect URLs from search results
-3. Aim for 15+ diverse sources
-
-After searching, return JSON with found sources:
-{
-  "source_urls": ["url1", "url2", ...],
-  "search_results_summary": "Brief summary of what was found"
-}
-""",
-    tools=["web_search"],
-    max_retries=3,
-)
-
-# Node 3: Fetch Content
-fetch_content_node = NodeSpec(
-    id="fetch-content",
-    name="Fetch Content",
-    description="Fetch and extract content from the discovered source URLs",
-    node_type="llm_tool_use",
-    input_keys=["source_urls", "research_focus"],
-    output_keys=["fetched_sources", "fetch_errors"],
-    output_schema={
-        "fetched_sources": {
-            "type": "array",
-            "required": True,
-            "description": "List of fetched source objects with url, title, content",
-        },
-        "fetch_errors": {
-            "type": "array",
-            "required": True,
-            "description": "List of URLs that failed to fetch",
-        },
-    },
-    system_prompt="""\
-You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
-
-Your task:
-1. Fetch content from each source URL using web_scrape tool
-2. Extract the main content relevant to the research focus
-3. Track any URLs that failed to fetch
-
-After fetching, return JSON:
-{
-  "fetched_sources": [
-    {"url": "...", "title": "...", "content": "extracted text..."},
-    ...
-  ],
-  "fetch_errors": ["url that failed", ...]
-}
-""",
-    tools=["web_scrape"],
-    max_retries=3,
-)
-
-# Node 4: Evaluate Sources
-evaluate_sources_node = NodeSpec(
-    id="evaluate-sources",
-    name="Evaluate Sources",
-    description="Score sources for relevance and quality, filter to top 10",
-    node_type="llm_generate",
-    input_keys=["fetched_sources", "research_focus", "key_aspects"],
-    output_keys=["ranked_sources", "source_analysis"],
-    output_schema={
-        "ranked_sources": {
-            "type": "array",
-            "required": True,
-            "description": "List of ranked sources with scores",
-        },
-        "source_analysis": {
-            "type": "string",
-            "required": True,
-            "description": "Overview of source quality and coverage",
-        },
-    },
-    system_prompt="""\
-You are a source evaluator. Assess each source for quality and relevance.
-
-Scoring criteria:
- Relevance to research focus (1-10)
- Source credibility (1-10)
- Information depth (1-10)
- Recency if relevant (1-10)
-
-Your task:
-1. Score each source
-2. Rank by combined score
-3. Select top 10 sources
-4. Note what each source uniquely contributes
-
-Return JSON:
-{
-  "ranked_sources": [
-    {"url": "...", "title": "...", "content": "...", "score": 8.5, "unique_value": "..."},
-    ...
-  ],
-  "source_analysis": "Overview of source quality and coverage"
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 5: Synthesize Findings
-synthesize_findings_node = NodeSpec(
-    id="synthesize-findings",
-    name="Synthesize Findings",
-    description="Extract key facts from sources and identify common themes",
-    node_type="llm_generate",
-    input_keys=["ranked_sources", "research_focus", "key_aspects"],
-    output_keys=["key_findings", "themes", "source_citations"],
-    output_schema={
-        "key_findings": {
-            "type": "array",
-            "required": True,
-            "description": "List of key findings with sources and confidence",
-        },
-        "themes": {
-            "type": "array",
-            "required": True,
-            "description": "List of themes with descriptions and supporting sources",
-        },
-        "source_citations": {
-            "type": "object",
-            "required": True,
-            "description": "Map of facts to supporting URLs",
-        },
-    },
-    system_prompt="""\
-You are a research synthesizer. Analyze multiple sources to extract insights.
-
-Your task:
-1. Identify key facts from each source
-2. Find common themes across sources
-3. Note contradictions or debates
-4. Build a citation map (fact -> source URL)
-
-Return JSON:
-{
-  "key_findings": [
-    {"finding": "...", "sources": ["url1", "url2"], "confidence": "high/medium/low"},
-    ...
-  ],
-  "themes": [
-    {"theme": "...", "description": "...", "supporting_sources": ["url1", ...]},
-    ...
-  ],
-  "source_citations": {
-    "fact or claim": ["supporting url1", "url2"],
-    ...
-  }
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 6: Write Report
-write_report_node = NodeSpec(
-    id="write-report",
-    name="Write Report",
-    description="Generate a narrative report with proper citations",
-    node_type="llm_generate",
-    input_keys=[
-        "key_findings",
-        "themes",
-        "source_citations",
-        "research_focus",
-        "ranked_sources",
-    ],
-    output_keys=["report_content", "references"],
-    output_schema={
-        "report_content": {
-            "type": "string",
-            "required": True,
-            "description": "Full markdown report text with citations",
-        },
-        "references": {
-            "type": "array",
-            "required": True,
-            "description": "List of reference objects with number, url, title",
-        },
-    },
-    system_prompt="""\
-You are a research report writer. Create a well-structured narrative report.
-
-Report structure:
-1. Executive Summary (2-3 paragraphs)
-2. Introduction (context and scope)
-3. Key Findings (organized by theme)
-4. Analysis (synthesis and implications)
-5. Conclusion
-6. References (numbered list of all sources)
-
-Citation format: Use numbered citations like [1], [2] that correspond to the References section.
-
-IMPORTANT:
- Every factual claim MUST have a citation
- Write in clear, professional prose
- Be objective and balanced
- Highlight areas of consensus and debate
-
-Return JSON:
-{
-  "report_content": "Full markdown report text with citations...",
-  "references": [
-    {"number": 1, "url": "...", "title": "..."},
-    ...
-  ]
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 7: Quality Check
-quality_check_node = NodeSpec(
-    id="quality-check",
-    name="Quality Check",
-    description="Verify all claims have citations and report is coherent",
-    node_type="llm_generate",
-    input_keys=["report_content", "references", "source_citations"],
-    output_keys=["quality_score", "issues", "final_report"],
-    output_schema={
-        "quality_score": {
-            "type": "number",
-            "required": True,
-            "description": "Quality score 0-1",
-        },
-        "issues": {
-            "type": "array",
-            "required": True,
-            "description": "List of issues found and fixed",
-        },
-        "final_report": {
-            "type": "string",
-            "required": True,
-            "description": "Corrected full report",
-        },
-    },
-    system_prompt="""\
-You are a quality assurance reviewer. Check the research report for issues.
-
-Check for:
-1. Uncited claims (factual statements without [n] citation)
-2. Broken citations (references to non-existent numbers)
-3. Coherence (logical flow between sections)
-4. Completeness (all key aspects covered)
-5. Accuracy (claims match source content)
-
-If issues found, fix them in the final report.
-
-Return JSON:
-{
-  "quality_score": 0.95,
-  "issues": [
-    {"type": "uncited_claim", "location": "paragraph 3", "fixed": true},
-    ...
-  ],
-  "final_report": "Corrected full report with all issues fixed..."
-}
-""",
-    tools=[],
-    max_retries=3,
-)
-
-# Node 8: Save Report
-save_report_node = NodeSpec(
-    id="save-report",
-    name="Save Report",
-    description="Write the final report to a local markdown file",
-    node_type="llm_tool_use",
-    input_keys=["final_report", "references", "research_focus"],
-    output_keys=["file_path", "save_status"],
-    output_schema={
-        "file_path": {
-            "type": "string",
-            "required": True,
-            "description": "Path where report was saved",
-        },
-        "save_status": {
-            "type": "string",
-            "required": True,
-            "description": "Status of save operation",
-        },
-    },
-    system_prompt="""\
-You are a file manager. Save the research report to disk.
-
-Your task:
-1. Generate a filename from the research focus (slugified, with date)
-2. Use the write_to_file tool to save the report as markdown
-3. Save to the ./research_reports/ directory
-
-Filename format: research_YYYY-MM-DD_topic-slug.md
-
-Return JSON:
-{
-  "file_path": "research_reports/research_2026-01-23_topic-name.md",
-  "save_status": "success"
-}
-""",
-    tools=["write_to_file"],
-    max_retries=3,
-)
-
-__all__ = [
-    "parse_query_node",
-    "search_sources_node",
-    "fetch_content_node",
-    "evaluate_sources_node",
-    "synthesize_findings_node",
-    "write_report_node",
-    "quality_check_node",
-    "save_report_node",
-]
@@ -1,10 +1,10 @@
 ---
 name: building-agents-core
-description: Core concepts for goal-driven agents - architecture, node types, tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
+description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
 license: Apache-2.0
 metadata:
  author: hive
-  version: "1.0"
+  version: "2.0"
  type: foundational
  part_of: building-agents
 ---
@@ -29,10 +29,10 @@ exports/my_agent/

 **Key Principle: Agent is visible and editable during build**

- ✅ Files created immediately as components are approved
- ✅ User can watch files grow in their editor
- ✅ No session state - just direct file writes
- ✅ No "export" step - agent is ready when build completes
+- Files created immediately as components are approved
+- User can watch files grow in their editor
+- No session state - just direct file writes
+- No "export" step - agent is ready when build completes

 ## Core Concepts

@@ -73,62 +73,212 @@ Unit of work (written to nodes/__init__.py)

 **Node Types:**

- `llm_generate` - Text generation, parsing
- `llm_tool_use` - Actions requiring tools
- `router` - Conditional branching
- `function` - Deterministic operations
+- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
+- `function` — Deterministic Python operations. No LLM involved.

 ```python
 search_node = NodeSpec(
    id="search-web",
    name="Search Web",
-    description="Search for information online",
-    node_type="llm_tool_use",
+    description="Search for information and extract results",
+    node_type="event_loop",
    input_keys=["query"],
    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}",
+    system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
    tools=["web_search"],
-    max_retries=3,
 )
 ```

+**NodeSpec Fields for Event Loop Nodes:**
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
+| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
+| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
+
 ### Edge

 Connection between nodes (written to agent.py)

 **Edge Conditions:**

- `on_success` - Proceed if node succeeds
- `on_failure` - Handle errors
- `always` - Always proceed
- `conditional` - Based on expression
+- `on_success` — Proceed if node succeeds (most common)
+- `on_failure` — Handle errors
+- `always` — Always proceed
+- `conditional` — Based on expression evaluating node output
+
+**Edge Priority:**
+
+Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).

 ```python
+# Forward edge (evaluated first)
 EdgeSpec(
-    id="search-to-analyze",
-    source="search-web",
-    target="analyze-results",
-    condition=EdgeCondition.ON_SUCCESS,
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
    priority=1,
 )
+
+# Feedback edge (evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
 ```

-### Pause/Resume
+### Client-Facing Nodes

-Multi-turn conversations
-
- **Pause nodes** - Stop execution, wait for user input
- **Resume entry points** - Continue from pause with user's response
+For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
+- Stream its LLM output directly to the end user
+- Block for user input between conversational turns
+- Resume when new input is injected via `inject_event()`

 ```python
-# Example pause/resume configuration
-pause_nodes = ["request-clarification"]
-entry_points = {
-    "start": "analyze-request",
-    "request-clarification_resume": "process-clarification"
-}
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=[],
+    output_keys=["repo_url", "project_url"],
+    system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
+)
 ```

+> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
+
+**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
+
+```python
+system_prompt="""\
+**STEP 1 — Respond to the user (text only, NO tool calls):**
+[Present information, ask questions, etc.]
+
+**STEP 2 — After the user responds, call set_output:**
+[Call set_output with the structured outputs]
+"""
+```
+
+This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
+
+### Node Design: Fewer, Richer Nodes
+
+Prefer fewer nodes that do more work over many thin single-purpose nodes:
+
+- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
+- **Good**: 4 rich nodes (intake → research → review → report)
+
+Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
+
+### nullable_output_keys for Cross-Edge Inputs
+
+When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
+
+```python
+research_node = NodeSpec(
+    id="research",
+    input_keys=["research_brief", "feedback"],
+    nullable_output_keys=["feedback"],  # Not present on first visit
+    max_node_visits=3,
+    ...
+)
+```
+
+## Event Loop Architecture Concepts
+
+### How EventLoopNode Works
+
+An event loop node runs a multi-turn loop:
+1. LLM receives system prompt + conversation history
+2. LLM responds (text and/or tool calls)
+3. Tool calls are executed, results added to conversation
+4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
+5. Repeat until judge ACCEPTs or max_iterations reached
+
+### EventLoopNode Runtime
+
+EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
+
+```python
+# Direct execution — executor auto-creates EventLoopNodes
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+runtime = Runtime(storage_path)
+executor = GraphExecutor(
+    runtime=runtime,
+    llm=llm,
+    tools=tools,
+    tool_executor=tool_executor,
+    storage_path=storage_path,
+)
+result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
+
+# TUI execution — AgentRuntime also works
+from framework.runtime.agent_runtime import create_agent_runtime
+runtime = create_agent_runtime(
+    graph=graph, goal=goal, storage_path=storage_path,
+    entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
+)
+```
+
+### set_output
+
+Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
+
+`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
+
+### JudgeProtocol
+
+**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
+
+The judge controls when a node's loop exits:
+- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
+- **SchemaJudge**: Validates outputs against a Pydantic model
+- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
+
+### LoopConfig
+
+Controls loop behavior:
+- `max_iterations` (default 50) — prevents infinite loops
+- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
+- `stall_detection_threshold` (default 3) — detects repeated identical responses
+- `max_history_tokens` (default 32000) — triggers conversation compaction
+
+### Data Tools (Spillover Management)
+
+When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
+
+- `save_data(filename, data, data_dir)` — Write data to a file in the data directory
+- `load_data(filename, data_dir, offset=0, limit=50)` — Read data with line-based pagination
+- `list_data_files(data_dir)` — List available data files
+
+These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+### Fan-Out / Fan-In
+
+Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
+
+### max_node_visits
+
+Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
+
 ## Tool Discovery & Validation

 **CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
@@ -157,29 +307,6 @@ mcp__agent-builder__list_mcp_tools()
 mcp__agent-builder__list_mcp_tools(server_name="tools")
 ```

-This returns available tools with their descriptions and parameters:
-
-```json
-{
-  "success": true,
-  "tools_by_server": {
-    "tools": [
-      {
-        "name": "web_search",
-        "description": "Search the web...",
-        "parameters": ["query"]
-      },
-      {
-        "name": "web_scrape",
-        "description": "Scrape a URL...",
-        "parameters": ["url"]
-      }
-    ]
-  },
-  "total_tools": 14
-}
-```
-
 ### Step 3: Validate Before Adding Nodes

 Before writing a node with `tools=[...]`:
@@ -193,27 +320,10 @@ Before writing a node with `tools=[...]`:

 ### Tool Validation Anti-Patterns

-❌ **Never assume a tool exists** - always call `list_mcp_tools()` first
-❌ **Never write a node with unverified tools** - validate before writing
-❌ **Never silently drop tools** - if a tool doesn't exist, inform the user
-❌ **Never guess tool names** - use exact names from discovery response
-
-### Example Validation Flow
-
-```python
-# 1. User requests: "Add a node that searches the web"
-# 2. Discover available tools
-tools_response = mcp__agent-builder__list_mcp_tools()
-
-# 3. Check if web_search exists
-available = [t["name"] for tools in tools_response["tools_by_server"].values() for t in tools]
-if "web_search" not in available:
-    # Inform user and ask how to proceed
-    print("❌ 'web_search' not available. Available tools:", available)
-else:
-    # Proceed with node creation
-    # ...
-```
+- **Never assume a tool exists** - always call `list_mcp_tools()` first
+- **Never write a node with unverified tools** - validate before writing
+- **Never silently drop tools** - if a tool doesn't exist, inform the user
+- **Never guess tool names** - use exact names from discovery response

 ## Workflow Overview: Incremental File Construction

@@ -221,42 +331,19 @@ else:
 1. CREATE PACKAGE → mkdir + write skeletons
 2. DEFINE GOAL → Write to agent.py + config.py
 3. FOR EACH NODE:
-   - Propose design
+   - Propose design (event_loop for LLM work, function for deterministic)
   - User approves
-   - Write to nodes/__init__.py IMMEDIATELY ← FILE WRITTEN
-   - (Optional) Validate with test_node ← MCP VALIDATION
-   - User can open file and see it
-4. CONNECT EDGES → Update agent.py ← FILE WRITTEN
-   - (Optional) Validate with validate_graph ← MCP VALIDATION
-5. FINALIZE → Write agent class to agent.py ← FILE WRITTEN
+   - Write to nodes/__init__.py IMMEDIATELY
+   - (Optional) Validate with test_node
+4. CONNECT EDGES → Update agent.py
+   - Use priority for feedback edges (negative priority)
+   - (Optional) Validate with validate_graph
+5. FINALIZE → Write agent class to agent.py
 6. DONE - Agent ready at exports/my_agent/
 ```

 **Files written immediately. MCP tools optional for validation/testing bookkeeping.**

-### The Key Difference
-
-**OLD (Bad):**
-
-```
-MCP add_node → Session State → MCP add_node → Session State → ...
-                                                                ↓
-                                                     MCP export_graph
-                                                                ↓
-                                                       Files appear
-```
-
-**NEW (Good):**
-
-```
-Write node to file → (Optional: MCP test_node) → Write node to file → ...
-       ↓                                               ↓
-  File visible                                    File visible
-  immediately                                     immediately
-```
-
-**Bottom line:** Use Write/Edit for construction, MCP for validation if needed.
-
 ## When to Use This Skill

 Use building-agents-core when:
@@ -285,12 +372,17 @@ mcp__agent-builder__test_node(
 **validate_graph** - Check graph structure
 ```python
 mcp__agent-builder__validate_graph()
-# Returns: unreachable nodes, missing connections, etc.
+# Returns: unreachable nodes, missing connections, event_loop validation, etc.
 ```

-**create_session** - Track session state for bookkeeping
+**configure_loop** - Set event loop parameters
 ```python
-mcp__agent-builder__create_session(session_name="my-build")
+mcp__agent-builder__configure_loop(
+    max_iterations=50,
+    max_tool_calls_per_turn=10,
+    stall_detection_threshold=3,
+    max_history_tokens=32000
+)
 ```

 **Key Point:** Files are written FIRST. MCP tools are for validation only.
@@ -298,6 +390,6 @@ mcp__agent-builder__create_session(session_name="my-build")
 ## Related Skills

 - **building-agents-construction** - Step-by-step building process
- **building-agents-patterns** - Best practices and examples
+- **building-agents-patterns** - Best practices: judges, feedback edges, fan-out, context management
 - **agent-workflow** - Complete workflow orchestrator
 - **testing-agent** - Test and validate completed agents
@@ -1,10 +1,10 @@
 ---
 name: building-agents-patterns
-description: Best practices, patterns, and examples for building goal-driven agents. Includes pause/resume architecture, hybrid workflows, anti-patterns, and handoff to testing. Use when optimizing agent design.
+description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
 license: Apache-2.0
 metadata:
  author: hive
-  version: "1.0"
+  version: "2.0"
  type: reference
  part_of: building-agents
 ---
@@ -24,10 +24,10 @@ How to build a node using both direct file writes and optional MCP validation:
 node_code = '''
 search_node = NodeSpec(
    id="search-web",
-    node_type="llm_tool_use",
+    node_type="event_loop",
    input_keys=["query"],
    output_keys=["search_results"],
-    system_prompt="Search the web for: {query}",
+    system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
    tools=["web_search"],
 )
 '''
@@ -38,17 +38,12 @@ Edit(
    new_string=node_code
 )

-print("✅ Added search_node to nodes/__init__.py")
-print("📁 Open exports/research_agent/nodes/__init__.py to see it!")
-
 # 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
 validation = mcp__agent-builder__test_node(
    node_id="search-web",
    test_input='{"query": "python tutorials"}',
    mock_llm_response='{"search_results": [...mock results...]}'
 )
-
-print(f"✓ Validation: {validation['success']}")
 ```

 **User experience:**
@@ -57,401 +52,300 @@ print(f"✓ Validation: {validation['success']}")
 - Gets validation feedback (from step 2)
 - Can edit the file directly if needed

-This combines visibility (files) with validation (MCP tools).
+## Multi-Turn Interaction Patterns

-## Pause/Resume Architecture
+For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.

-For agents needing multi-turn conversations with user interaction:
+### Client-Facing Nodes

-### Basic Pause/Resume Flow
+A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.

 ```python
-# Define pause nodes - execution stops at these nodes
-pause_nodes = ["request-clarification", "await-approval"]
+# Client-facing node with STEP 1/STEP 2 prompt pattern
+intake_node = NodeSpec(
+    id="intake",
+    name="Intake",
+    description="Gather requirements from the user",
+    node_type="event_loop",
+    client_facing=True,
+    input_keys=["topic"],
+    output_keys=["research_brief"],
+    system_prompt="""\
+You are an intake specialist.

-# Define entry points - where to resume from each pause
-entry_points = {
-    "start": "analyze-request",  # Initial entry
-    "request-clarification_resume": "process-clarification",  # Resume from clarification
-    "await-approval_resume": "execute-action",  # Resume from approval
-}
-```
+**STEP 1 — Read and respond (text only, NO tool calls):**
+1. Read the topic provided
+2. If it's vague, ask 1-2 clarifying questions
+3. If it's clear, confirm your understanding

-### Example: Multi-Turn Research Agent
-
-```python
-# Nodes
-nodes = [
-    NodeSpec(id="analyze-request", ...),
-    NodeSpec(id="request-clarification", ...),  # PAUSE NODE
-    NodeSpec(id="process-clarification", ...),
-    NodeSpec(id="generate-results", ...),
-    NodeSpec(id="await-approval", ...),  # PAUSE NODE
-    NodeSpec(id="execute-action", ...),
-]
-
-# Edges with resume flows
-edges = [
-    EdgeSpec(
-        id="analyze-to-clarify",
-        source="analyze-request",
-        target="request-clarification",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="needs_clarification == true",
-    ),
-    # When resumed, goes to process-clarification
-    EdgeSpec(
-        id="clarify-to-process",
-        source="request-clarification",
-        target="process-clarification",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="results-to-approval",
-        source="generate-results",
-        target="await-approval",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    # When resumed, goes to execute-action
-    EdgeSpec(
-        id="approval-to-execute",
-        source="await-approval",
-        target="execute-action",
-        condition=EdgeCondition.ALWAYS,
-    ),
-]
-
-# Configuration
-pause_nodes = ["request-clarification", "await-approval"]
-entry_points = {
-    "start": "analyze-request",
-    "request-clarification_resume": "process-clarification",
-    "await-approval_resume": "execute-action",
-}
-```
-
-### Running Pause/Resume Agents
-
-```python
-# Initial run - will pause at first pause node
-result1 = await agent.run(
-    context={"query": "research topic"},
-    session_state=None
+**STEP 2 — After the user confirms, call set_output:**
+- set_output("research_brief", "Clear description of what to research")
+""",
 )

-# Check if paused
-if result1.paused_at:
-    print(f"Paused at: {result1.paused_at}")
-
-    # Resume with user input
-    result2 = await agent.run(
-        context={"user_response": "clarification details"},
-        session_state=result1.session_state  # Pass previous state
-    )
+# Internal node runs without user interaction
+research_node = NodeSpec(
+    id="research",
+    name="Research",
+    description="Search and analyze sources",
+    node_type="event_loop",
+    input_keys=["research_brief"],
+    output_keys=["findings", "sources"],
+    system_prompt="Research the topic using web_search and web_scrape...",
+    tools=["web_search", "web_scrape", "load_data", "save_data"],
+)
 ```

+**How it works:**
+- Client-facing nodes stream LLM text to the user and block for input after each response
+- User input is injected via `node.inject_event(text)`
+- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
+- Internal nodes (non-client-facing) run their entire loop without blocking
+- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
+
+**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
+
+### When to Use client_facing
+
+| Scenario | client_facing | Why |
+|----------|:---:|-----|
+| Gathering user requirements | Yes | Need user input |
+| Human review/approval checkpoint | Yes | Need human decision |
+| Data processing (scanning, scoring) | No | Runs autonomously |
+| Report generation | No | No user input needed |
+| Final confirmation before action | Yes | Need explicit approval |
+
+> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
+
+## Edge-Based Routing and Feedback Loops
+
+### Conditional Edge Routing
+
+Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
+
+```python
+# Node with mutually exclusive outputs
+review_node = NodeSpec(
+    id="review",
+    name="Review",
+    node_type="event_loop",
+    client_facing=True,
+    output_keys=["approved_contacts", "redo_extraction"],
+    nullable_output_keys=["approved_contacts", "redo_extraction"],
+    max_node_visits=3,
+    system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
+)
+
+# Forward edge (positive priority, evaluated first)
+EdgeSpec(
+    id="review-to-campaign",
+    source="review",
+    target="campaign-builder",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('approved_contacts') is not None",
+    priority=1,
+)
+
+# Feedback edge (negative priority, evaluated after forward edges)
+EdgeSpec(
+    id="review-feedback",
+    source="review",
+    target="extractor",
+    condition=EdgeCondition.CONDITIONAL,
+    condition_expr="output.get('redo_extraction') is not None",
+    priority=-1,
+)
+```
+
+**Key concepts:**
+- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
+- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
+- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
+
+### Routing Decision Table
+
+| Pattern | Old Approach | New Approach |
+|---------|-------------|--------------|
+| Conditional branching | `router` node | Conditional edges with `condition_expr` |
+| Binary approve/reject | `pause_nodes` + resume | `client_facing=True` + `nullable_output_keys` |
+| Loop-back on rejection | Manual entry_points | Feedback edge with `priority=-1` |
+| Multi-way routing | Router with routes dict | Multiple conditional edges with priorities |
+
+## Judge Patterns
+
+**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
+- Output rollback logic
+- `_user_has_responded` flags
+- Premature set_output rejection
+- Interaction protocol injection into system prompts
+
+Judges control when an event_loop node's loop exits. Choose based on validation needs.
+
+### Implicit Judge (Default)
+
+When no judge is configured, the implicit judge ACCEPTs when:
+- The LLM finishes its response with no tool calls
+- All required output keys have been set via `set_output`
+
+Best for simple nodes where "all outputs set" is sufficient validation.
+
+### SchemaJudge
+
+Validates outputs against a Pydantic model. Use when you need structural validation.
+
+```python
+from pydantic import BaseModel
+
+class ScannerOutput(BaseModel):
+    github_users: list[dict]  # Must be a list of user objects
+
+class SchemaJudge:
+    def __init__(self, output_model: type[BaseModel]):
+        self._model = output_model
+
+    async def evaluate(self, context: dict) -> JudgeVerdict:
+        missing = context.get("missing_keys", [])
+        if missing:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
+            )
+        try:
+            self._model.model_validate(context["output_accumulator"])
+            return JudgeVerdict(action="ACCEPT")
+        except ValidationError as e:
+            return JudgeVerdict(action="RETRY", feedback=str(e))
+```
+
+### When to Use Which Judge
+
+| Judge | Use When | Example |
+|-------|----------|---------|
+| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
+| SchemaJudge | Need structural validation of outputs | API response parsing |
+| Custom | Domain-specific validation logic | Score must be 0.0-1.0 |
+
+## Fan-Out / Fan-In (Parallel Execution)
+
+Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
+
+```python
+# Scanner fans out to Profiler and Scorer in parallel
+EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
+         condition=EdgeCondition.ON_SUCCESS)
+
+# Both fan in to Extractor
+EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
+         condition=EdgeCondition.ON_SUCCESS)
+```
+
+**Requirements:**
+- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
+- Only one parallel branch may contain a `client_facing` node
+- Fan-in node receives outputs from all completed branches in shared memory
+
+## Context Management Patterns
+
+### Tiered Compaction
+
+EventLoopNode automatically manages context window usage with tiered compaction:
+1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
+2. **Normal compaction** — LLM summarizes older messages
+3. **Aggressive compaction** — Keeps only recent messages + summary
+4. **Emergency** — Hard reset with tool history preservation
+
+### Spillover Pattern
+
+The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
+
+For explicit data management, use the data tools (real MCP tools, not synthetic):
+
+```python
+# save_data, load_data, list_data_files are real MCP tools
+# Each takes a data_dir parameter since the MCP server is shared
+
+# Saving large results
+save_data(filename="sources.json", data=large_json_string, data_dir="/path/to/spillover")
+
+# Reading with pagination (line-based offset/limit)
+load_data(filename="sources.json", data_dir="/path/to/spillover", offset=0, limit=50)
+
+# Listing available files
+list_data_files(data_dir="/path/to/spillover")
+```
+
+Add data tools to nodes that handle large tool results:
+
+```python
+research_node = NodeSpec(
+    ...
+    tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
+)
+```
+
+The `data_dir` is passed by the framework (from the node's spillover directory). The LLM sees `data_dir` in truncation messages and uses it when calling `load_data`.
+
 ## Anti-Patterns

 ### What NOT to Do

-❌ **Don't rely on `export_graph`** - Write files immediately, not at end
-```python
-# BAD: Building in session state, exporting at end
-mcp__agent-builder__add_node(...)
-mcp__agent-builder__add_node(...)
-mcp__agent-builder__export_graph()  # Files appear only now
+- **Don't rely on `export_graph`** — Write files immediately, not at end
+- **Don't hide code in session** — Write to files as components are approved
+- **Don't wait to write files** — Agent visible from first step
+- **Don't batch everything** — Write incrementally, one component at a time
+- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
+- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead

-# GOOD: Writing files immediately
-Write(file_path="...", content=node_code)  # File visible now
-Write(file_path="...", content=node_code)  # File visible now
-```
+### Fewer, Richer Nodes

-❌ **Don't hide code in session** - Write to files as components approved
-```python
-# BAD: Accumulating changes invisibly
-session.add_component(component1)
-session.add_component(component2)
-# User can't see anything yet
+A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.

-# GOOD: Incremental visibility
-Edit(file_path="...", ...)  # User sees change 1
-Edit(file_path="...", ...)  # User sees change 2
-```
+| Bad (8 thin nodes) | Good (4 rich nodes) |
+|---------------------|---------------------|
+| parse-query | intake (client-facing) |
+| search-sources | research (search + fetch + analyze) |
+| fetch-content | review (client-facing) |
+| evaluate-sources | report (write + deliver) |
+| synthesize-findings | |
+| write-report | |
+| quality-check | |
+| save-report | |

-❌ **Don't wait to write files** - Agent visible from first step
-```python
-# BAD: Building everything before writing
-design_all_nodes()
-design_all_edges()
-write_everything_at_once()
-
-# GOOD: Write as you go
-write_package_structure()  # Visible
-write_goal()  # Visible
-write_node_1()  # Visible
-write_node_2()  # Visible
-```
-
-❌ **Don't batch everything** - Write incrementally
-```python
-# BAD: Batching all nodes
-nodes = [design_node_1(), design_node_2(), ...]
-write_all_nodes(nodes)
-
-# GOOD: One at a time with user feedback
-write_node_1()  # User approves
-write_node_2()  # User approves
-write_node_3()  # User approves
-```
+**Why fewer nodes are better:**
+- The LLM retains full context of its work within a single node
+- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
+- Fewer edges means simpler graph and fewer failure points
+- Data tools (`save_data`/`load_data`) handle context window limits within a single node

 ### MCP Tools - Correct Usage

 **MCP tools OK for:**
-✅ `test_node` - Validate node configuration with mock inputs
-✅ `validate_graph` - Check graph structure
-✅ `create_session` - Track session state for bookkeeping
-✅ Other validation tools
+- `test_node` — Validate node configuration with mock inputs
+- `validate_graph` — Check graph structure
+- `configure_loop` — Set event loop parameters
+- `create_session` — Track session state for bookkeeping

 **Just don't:** Use MCP as the primary construction method or rely on export_graph

-## Best Practices
-
-### 1. Show Progress After Each Write
-
-```python
-# After writing a node
-print("✅ Added analyze_request_node to nodes/__init__.py")
-print("📊 Progress: 1/6 nodes added")
-print("📁 Open exports/my_agent/nodes/__init__.py to see it!")
-```
-
-### 2. Let User Open Files During Build
-
-```python
-# Encourage file inspection
-print("✅ Goal written to agent.py")
-print("")
-print("💡 Tip: Open exports/my_agent/agent.py in your editor to see the goal!")
-```
-
-### 3. Write Incrementally - One Component at a Time
-
-```python
-# Good flow
-write_package_structure()
-show_user("Package created")
-
-write_goal()
-show_user("Goal written")
-
-for node in nodes:
-    get_approval(node)
-    write_node(node)
-    show_user(f"Node {node.id} written")
-```
-
-### 4. Test As You Build
-
-```python
-# After adding several nodes
-print("💡 You can test current state with:")
-print("  PYTHONPATH=core:exports python -m my_agent validate")
-print("  PYTHONPATH=core:exports python -m my_agent info")
-```
-
-### 5. Keep User Informed
-
-```python
-# Clear status updates
-print("🔨 Creating package structure...")
-print("✅ Package created: exports/my_agent/")
-print("")
-print("📝 Next: Define agent goal")
-```
-
-## Continuous Monitoring Agents
-
-For agents that run continuously without terminal nodes:
-
-```python
-# No terminal nodes - loops forever
-terminal_nodes = []
-
-# Workflow loops back to start
-edges = [
-    EdgeSpec(id="monitor-to-check", source="monitor", target="check-condition"),
-    EdgeSpec(id="check-to-wait", source="check-condition", target="wait"),
-    EdgeSpec(id="wait-to-monitor", source="wait", target="monitor"),  # Loop
-]
-
-# Entry node only
-entry_node = "monitor"
-entry_points = {"start": "monitor"}
-pause_nodes = []
-```
-
-**Example: File Monitor**
-
-```python
-nodes = [
-    NodeSpec(id="list-files", ...),
-    NodeSpec(id="check-new-files", node_type="router", ...),
-    NodeSpec(id="process-files", ...),
-    NodeSpec(id="wait-interval", node_type="function", ...),
-]
-
-edges = [
-    EdgeSpec(id="list-to-check", source="list-files", target="check-new-files"),
-    EdgeSpec(
-        id="check-to-process",
-        source="check-new-files",
-        target="process-files",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="new_files_count > 0",
-    ),
-    EdgeSpec(
-        id="check-to-wait",
-        source="check-new-files",
-        target="wait-interval",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="new_files_count == 0",
-    ),
-    EdgeSpec(id="process-to-wait", source="process-files", target="wait-interval"),
-    EdgeSpec(id="wait-to-list", source="wait-interval", target="list-files"),  # Loop back
-]
-
-terminal_nodes = []  # No terminal - runs forever
-```
-
-## Complex Routing Patterns
-
-### Multi-Condition Router
-
-```python
-router_node = NodeSpec(
-    id="decision-router",
-    node_type="router",
-    input_keys=["analysis_result"],
-    output_keys=["decision"],
-    system_prompt="""
-    Based on the analysis result, decide the next action:
-    - If confidence > 0.9: route to "execute"
-    - If 0.5 <= confidence <= 0.9: route to "review"
-    - If confidence < 0.5: route to "clarify"
-
-    Return: {"decision": "execute|review|clarify"}
-    """,
-)
-
-# Edges for each route
-edges = [
-    EdgeSpec(
-        id="router-to-execute",
-        source="decision-router",
-        target="execute-action",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'execute'",
-        priority=1,
-    ),
-    EdgeSpec(
-        id="router-to-review",
-        source="decision-router",
-        target="human-review",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'review'",
-        priority=2,
-    ),
-    EdgeSpec(
-        id="router-to-clarify",
-        source="decision-router",
-        target="request-clarification",
-        condition=EdgeCondition.CONDITIONAL,
-        condition_expr="decision == 'clarify'",
-        priority=3,
-    ),
-]
-```
-
 ## Error Handling Patterns

 ### Graceful Failure with Fallback

 ```python
-# Primary node with error handling
-nodes = [
-    NodeSpec(id="api-call", max_retries=3, ...),
-    NodeSpec(id="fallback-cache", ...),
-    NodeSpec(id="report-error", ...),
-]
-
 edges = [
    # Success path
-    EdgeSpec(
-        id="api-success",
-        source="api-call",
-        target="process-results",
-        condition=EdgeCondition.ON_SUCCESS,
-    ),
+    EdgeSpec(id="api-success", source="api-call", target="process-results",
+             condition=EdgeCondition.ON_SUCCESS),
    # Fallback on failure
-    EdgeSpec(
-        id="api-to-fallback",
-        source="api-call",
-        target="fallback-cache",
-        condition=EdgeCondition.ON_FAILURE,
-        priority=1,
-    ),
+    EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
    # Report if fallback also fails
-    EdgeSpec(
-        id="fallback-to-error",
-        source="fallback-cache",
-        target="report-error",
-        condition=EdgeCondition.ON_FAILURE,
-        priority=1,
-    ),
-]
-```
-
-## Performance Optimization
-
-### Parallel Node Execution
-
-```python
-# Use multiple edges from same source for parallel execution
-edges = [
-    EdgeSpec(
-        id="start-to-search1",
-        source="start",
-        target="search-source-1",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="start-to-search2",
-        source="start",
-        target="search-source-2",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    EdgeSpec(
-        id="start-to-search3",
-        source="start",
-        target="search-source-3",
-        condition=EdgeCondition.ALWAYS,
-    ),
-    # Converge results
-    EdgeSpec(
-        id="search1-to-merge",
-        source="search-source-1",
-        target="merge-results",
-    ),
-    EdgeSpec(
-        id="search2-to-merge",
-        source="search-source-2",
-        target="merge-results",
-    ),
-    EdgeSpec(
-        id="search3-to-merge",
-        source="search-source-3",
-        target="merge-results",
-    ),
+    EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
+             condition=EdgeCondition.ON_FAILURE, priority=1),
 ]
 ```

@@ -459,38 +353,21 @@ edges = [

 When agent is complete, transition to testing phase:

-```python
-print("""
-✅ Agent complete: exports/my_agent/
-
-Next steps:
-1. Switch to testing-agent skill
-2. Generate and approve tests
-3. Run evaluation
-4. Debug any failures
-
-Command: "Test the agent at exports/my_agent/"
-""")
-```
-
 ### Pre-Testing Checklist

-Before handing off to testing-agent:
-
- [ ] Agent structure validates: `python -m agent_name validate`
+- [ ] Agent structure validates: `uv run python -m agent_name validate`
 - [ ] All nodes defined in nodes/__init__.py
- [ ] All edges connect valid nodes
- [ ] Entry node specified
+- [ ] All edges connect valid nodes with correct priorities
+- [ ] Feedback edge targets have `max_node_visits > 1`
+- [ ] Client-facing nodes have meaningful system prompts
 - [ ] Agent can be imported: `from exports.agent_name import default_agent`
- [ ] README.md with usage instructions
- [ ] CLI commands work (info, validate)

 ## Related Skills

- **building-agents-core** - Fundamental concepts
- **building-agents-construction** - Step-by-step building
- **testing-agent** - Test and validate agents
- **agent-workflow** - Complete workflow orchestrator
+- **building-agents-core** — Fundamental concepts (node types, edges, event loop architecture)
+- **building-agents-construction** — Step-by-step building process
+- **testing-agent** — Test and validate agents
+- **agent-workflow** — Complete workflow orchestrator

 ---

@@ -1,10 +1,10 @@
 ---
 name: setup-credentials
-description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the encrypted credential store at ~/.hive/credentials.
+description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
 license: Apache-2.0
 metadata:
  author: hive
-  version: "2.1"
+  version: "2.2"
  type: utility
 ---

@@ -31,48 +31,96 @@ Determine which agent needs credentials. The user will either:

 Locate the agent's directory under `exports/{agent_name}/`.

-### Step 2: Detect Required Credentials
+### Step 2: Detect Required Credentials (Bash-First)

-Read the agent's configuration to determine which tools and node types it uses:
+Use bash commands to determine what the agent needs and what's already configured. This avoids Python import issues and works even when `HIVE_CREDENTIAL_KEY` is not set.

-```python
-from core.framework.runner import AgentRunner
+#### Step 2a: Read Agent Requirements

-runner = AgentRunner.load("exports/{agent_name}")
-validation = runner.validate()
+Extract `required_tools` and node types from the agent config:

-# validation.missing_credentials contains env var names
-# validation.warnings contains detailed messages with help URLs
+```bash
+# Get required tools
+jq -r '.required_tools[]?' exports/{agent_name}/agent.json 2>/dev/null
+
+# Get node types from graph nodes
+jq -r '.graph.nodes[]?.node_type' exports/{agent_name}/agent.json 2>/dev/null | sort -u
 ```

-Alternatively, check the credential store directly:
+Map the extracted tools and node types to credentials by reading the spec files directly:

-```python
-from core.framework.credentials import CredentialStore
-
-# Use encrypted storage (default: ~/.hive/credentials)
-store = CredentialStore.with_encrypted_storage()
-
-# Check what's available
-available = store.list_credentials()
-print(f"Available credentials: {available}")
-
-# Check if specific credential exists
-if store.is_available("hubspot"):
-    print("HubSpot credential found")
-else:
-    print("HubSpot credential missing")
+```bash
+# Read all credential specs — each file defines tools, node_types, env_var, and credential_id
+cat tools/src/aden_tools/credentials/llm.py tools/src/aden_tools/credentials/search.py tools/src/aden_tools/credentials/email.py tools/src/aden_tools/credentials/integrations.py
 ```

-To see all known credential specs (for help URLs and setup instructions):
+For each `CredentialSpec`, match its `tools` and `node_types` lists against the agent's required tools and node types. Extract the `env_var`, `credential_id`, and `credential_group` for every match. This is the list of needed credentials.

-```python
-from aden_tools.credentials import CREDENTIAL_SPECS
+#### Step 2b: Check Existing Credential Sources

-for name, spec in CREDENTIAL_SPECS.items():
-    print(f"{name}: env_var={spec.env_var}, aden={spec.aden_supported}")
+For each needed credential, check three sources. A credential is "found" if it exists in ANY of them:
+
+**1. Encrypted store metadata index** (unencrypted JSON — no decryption key needed):
+
+```bash
+cat ~/.hive/credentials/metadata/index.json 2>/dev/null | jq -r '.credentials | keys[]'
 ```

+If a credential ID appears in this list, it is stored in the encrypted store.
+
+**2. Environment variables:**
+
+```bash
+# Check each needed env var, e.g.:
+printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "ANTHROPIC_API_KEY: set" || echo "ANTHROPIC_API_KEY: not set"
+printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "BRAVE_SEARCH_API_KEY: set" || echo "BRAVE_SEARCH_API_KEY: not set"
+```
+
+**3. Project `.env` file:**
+
+```bash
+# Check each needed env var, e.g.:
+grep -q '^ANTHROPIC_API_KEY=' .env 2>/dev/null && echo "ANTHROPIC_API_KEY: in .env" || echo "ANTHROPIC_API_KEY: not in .env"
+grep -q '^BRAVE_SEARCH_API_KEY=' .env 2>/dev/null && echo "BRAVE_SEARCH_API_KEY: in .env" || echo "BRAVE_SEARCH_API_KEY: not in .env"
+```
+
+#### Step 2c: HIVE_CREDENTIAL_KEY Check
+
+If any credentials were found in the encrypted store metadata index, verify the encryption key is available. The key is typically persisted to shell config by a previous setup-credentials run.
+
+Check both the current session AND shell config files:
+
+```bash
+# Check 1: Current session
+printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+
+# Check 2: Shell config files (where setup-credentials persists it)
+# Note: check each file individually to avoid non-zero exit when one doesn't exist
+for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+```
+
+Decision logic:
+- **In current session** — no action needed, credentials in the store are usable
+- **In shell config but NOT in current session** — the key is persisted but this shell hasn't sourced it. Run `source ~/.zshrc` (or `~/.bashrc`), then re-check. Credentials in the store are usable after sourcing.
+- **Not in session AND not in shell config** — the key was never persisted. Warn the user that credentials in the store cannot be decrypted. Help fix the key situation (recover/re-persist), do NOT re-collect credential values that are already stored.
+
+#### Step 2d: Compute Missing & Group
+
+Diff the "needed" credentials against the "found" credentials to get the truly missing list.
+
+Group related credentials by their `credential_group` field from the spec files. Credentials that share the same non-empty `credential_group` value should be presented as a single setup step rather than asking for each one individually.
+
+**If nothing is missing and there's no HIVE_CREDENTIAL_KEY issue:** Report all credentials as configured and skip Steps 3-5. Example:
+
+```
+All required credentials are already configured:
+  ✓ anthropic (ANTHROPIC_API_KEY) — found in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — found in environment
+Your agent is ready to run!
+```
+
+**If credentials are missing:** Continue to Step 3 with only the missing ones.
+
 ### Step 3: Present Auth Options for Each Missing Credential

 For each missing credential, check what authentication methods are available:
@@ -104,7 +152,7 @@ Present the available options using AskUserQuestion:
 ```
 Choose how to configure HUBSPOT_ACCESS_TOKEN:

-  1) Aden Authorization Server (Recommended)
+  1) Aden Platform (OAuth) (Recommended)
     Secure OAuth2 flow via integration.adenhq.com
     - Quick setup with automatic token refresh
     - No need to manage API keys manually
@@ -114,7 +162,7 @@ Choose how to configure HUBSPOT_ACCESS_TOKEN:
     - Requires creating a HubSpot Private App
     - Full control over scopes and permissions

-  3) Custom Credential Store (Advanced)
+  3) Local Credential Setup (Advanced)
     Programmatic configuration for CI/CD
     - For automated deployments
     - Requires manual API calls
@@ -122,7 +170,7 @@ Choose how to configure HUBSPOT_ACCESS_TOKEN:

 ### Step 4: Execute Auth Flow Based on User Choice

-#### Option 1: Aden Authorization Server
+#### Option 1: Aden Platform (OAuth)

 This is the recommended flow for supported integrations (HubSpot, etc.).

@@ -174,7 +222,7 @@ shell_type = detect_shell()  # 'bash', 'zsh', or 'unknown'
 success, config_path = add_env_var_to_shell_config(
    "ADEN_API_KEY",
    user_provided_key,
-    comment="Aden authorization server API key"
+    comment="Aden Platform (OAuth) API key"
 )

 if success:
@@ -313,7 +361,7 @@ if not result.valid:
    # 2. Continue anyway (not recommended)
 ```

-**4.2d. Store in Encrypted Credential Store**
+**4.2d. Store in Local Encrypted Store**

 ```python
 from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
@@ -340,7 +388,7 @@ store.save_credential(cred)
 export HUBSPOT_ACCESS_TOKEN="the-value"
 ```

-#### Option 3: Custom Credential Store (Advanced)
+#### Option 3: Local Credential Setup (Advanced)

 For programmatic/CI/CD setups.

@@ -408,10 +456,14 @@ Report the result to the user.

 Health checks validate credentials by making lightweight API calls:

-| Credential     | Endpoint                                | What It Checks                    |
-| -------------- | --------------------------------------- | --------------------------------- |
-| `hubspot`      | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes |
-| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity                  |
+| Credential      | Endpoint                                | What It Checks                     |
+| --------------- | --------------------------------------- | ---------------------------------- |
+| `anthropic`     | `POST /v1/messages`                     | API key validity                   |
+| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                   |
+| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity          |
+| `github`        | `GET /user`                             | Token validity, user identity      |
+| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes  |
+| `resend`        | `GET /domains`                          | API key validity                   |

 ```python
 from aden_tools.credentials import check_credential_health, HealthCheckResult
@@ -424,7 +476,7 @@ result: HealthCheckResult = check_credential_health("hubspot", token_value)

 ## Encryption Key (HIVE_CREDENTIAL_KEY)

-The encrypted credential store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
+The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.

 - If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
 - The user MUST persist this key (e.g., in `~/.bashrc` or a secrets manager)
@@ -443,7 +495,7 @@ If `HIVE_CREDENTIAL_KEY` is not set:
 - **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
 - **NEVER** hardcode credentials in source code
 - **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
- **ALWAYS** use the encrypted credential store (`~/.hive/credentials`) for persistence
+- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
 - **ALWAYS** run health checks before storing credentials (when possible)
 - **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
 - When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
@@ -456,7 +508,8 @@ All credential specs are defined in `tools/src/aden_tools/credentials/`:
 | ----------------- | ------------- | --------------------------------------------- | -------------- |
 | `llm.py`          | LLM Providers | `anthropic`                                   | No             |
 | `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
-| `integrations.py` | Integrations  | `hubspot`                                     | Yes            |
+| `email.py`        | Email         | `resend`                                      | No             |
+| `integrations.py` | Integrations  | `github`, `hubspot`                           | No / Yes       |

 **Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
 variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
@@ -507,66 +560,90 @@ token = store.get_key("hubspot", "access_token")
 ## Example Session

 ```
-User: /setup-credentials for my hubspot-agent
+User: /setup-credentials for my research-agent

-Agent: Let me check what credentials your hubspot-agent needs.
+Agent: Let me check what credentials your research-agent needs.

-[Runs validation, finds ANTHROPIC_API_KEY and HUBSPOT_ACCESS_TOKEN missing]
+[Reads agent config]
+$ jq -r '.required_tools[]?' exports/research-agent/agent.json
+web_search
+google_search

-Agent: Your hubspot-agent requires 2 credentials:
+$ jq -r '.graph.nodes[]?.node_type' exports/research-agent/agent.json | sort -u
+event_loop

-  1. ANTHROPIC_API_KEY - Anthropic API key for LLM calls
-  2. HUBSPOT_ACCESS_TOKEN - HubSpot access token
+[Maps tools/nodes to credentials using lookup table]
+Needed: anthropic, brave_search, google_search, google_cse

-Let's set these up one at a time.
+[Checks encrypted store metadata index]
+$ cat ~/.hive/credentials/metadata/index.json | jq -r '.credentials | keys[]'
+anthropic
+brave_search

--- Setting up ANTHROPIC_API_KEY ---
+[Checks environment variables]
+$ printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv GOOGLE_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv GOOGLE_CSE_ID > /dev/null 2>&1 && echo "set" || echo "not set"
+not set

-This credential only supports direct API key entry.
+[Checks HIVE_CREDENTIAL_KEY since credentials found in store]
+$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+session: not set
+$ for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+/Users/user/.zshrc

-[Shows instructions for getting Anthropic API key]
-[AskUserQuestion: "Please provide your Anthropic API key:"]
-[User provides key]
-[Stores in credential store, exports to env]
+[Key is in shell config but not current session — sourcing it]
+$ source ~/.zshrc

--- Setting up HUBSPOT_ACCESS_TOKEN ---
+[Computes missing credentials]
+Found:
+  ✓ anthropic (ANTHROPIC_API_KEY) — in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — in encrypted store
+Missing:
+  ✗ google_search (GOOGLE_API_KEY)
+  ✗ google_cse (GOOGLE_CSE_ID)

-[AskUserQuestion with 3 options]
+Agent: 2 of 4 required credentials are already configured. Only Google Custom
+Search needs setup (2 values as a single group).

-Choose how to configure HUBSPOT_ACCESS_TOKEN:
-  1) Aden Authorization Server (Recommended)
-  2) Direct API Key
-  3) Custom Credential Store (Advanced)
+--- Setting up Google Custom Search (google_search + google_cse) ---

-User: 1 (Aden)
+This requires two values that work together.

-Agent: Great! Let me check if you have an Aden API key configured...
+First, the Google API Key:
+1. Go to https://console.cloud.google.com/apis/credentials
+2. Create a new project (or select an existing one)
+3. Enable the "Custom Search API" from the API Library
+4. Go to Credentials > Create Credentials > API Key
+5. Copy the generated API key

-[Checks for ADEN_API_KEY, not found]
-[Opens browser to integration.adenhq.com/setup]
-
-Agent: I've opened Aden's setup page. Please:
-  1. Sign in or create an account
-  2. Connect your HubSpot account (OAuth happens on Aden's site)
-  3. Copy the API key shown after setup
-
-[AskUserQuestion: "Please provide your Aden API key:"]
+[AskUserQuestion: "Please provide your Google API key:"]
 [User provides key]

-Agent: Would you like me to save this to your shell config (~/.zshrc) for future sessions?
+Now, the Custom Search Engine ID:
+1. Go to https://programmablesearchengine.google.com/controlpanel/all
+2. Click "Add" to create a new search engine
+3. Under "What to search", select "Search the entire web"
+4. Give your search engine a name
+5. Click "Create"
+6. Copy the Search Engine ID (cx value)

-[User approves]
-[Saves to ~/.zshrc and ~/.hive/configuration.json]
+[AskUserQuestion: "Please provide your Google CSE ID:"]
+[User provides ID]

-Agent: Let me sync your credentials from Aden...
+[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
+[Stores both in local encrypted store, exports to env]

-[Syncs credentials from Aden server - OAuth already done on Aden's side]
-[Runs health check]
-
-Agent: HubSpot credentials validated successfully!
+✓ Google Custom Search credentials valid

 All credentials are now configured:
-  - ANTHROPIC_API_KEY: Stored in encrypted credential store
-  - HUBSPOT_ACCESS_TOKEN: Synced from Aden (OAuth completed on Aden)
-  - Validation passed - your agent is ready to run!
+  ✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
+  ✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
+  ✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
+  Your agent is ready to run!
 ```
@@ -930,9 +930,10 @@ assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
 - `steps_executed: int` - Number of nodes executed
 - `total_tokens: int` - Cumulative token usage
 - `total_latency_ms: int` - Total execution time
- `path: list[str]` - Node IDs traversed
+- `path: list[str]` - Node IDs traversed (may contain repeated IDs from feedback loops)
 - `paused_at: str | None` - Node ID if HITL pause occurred
 - `session_state: dict` - State for resuming
+- `node_visit_counts: dict[str, int]` - How many times each node executed (useful for feedback loop testing)

 ### Happy Path Test
 ```python
@@ -975,6 +976,57 @@ async def test_performance_latency(mock_mode):
    assert duration < 5.0, f"Took {{duration}}s, expected <5s"
 ```

+### Testing Event Loop Nodes
+
+Event loop nodes run multi-turn loops internally. Tests should verify:
+
+**Output Keys Test** — All required keys are set via `set_output`:
+```python
+@pytest.mark.asyncio
+async def test_all_output_keys_set(mock_mode):
+    """Test that event_loop nodes set all required output keys."""
+    result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
+    assert result.success, f"Agent failed: {{result.error}}"
+    output = result.output or {{}}
+    for key in ["expected_key_1", "expected_key_2"]:
+        assert key in output, f"Output key '{{key}}' not set by event_loop node"
+```
+
+**Feedback Loop Test** — Verify feedback loops terminate:
+```python
+@pytest.mark.asyncio
+async def test_feedback_loop_respects_max_visits(mock_mode):
+    """Test that feedback loops terminate at max_node_visits."""
+    result = await default_agent.run({{"input": "trigger_rejection"}}, mock_mode=mock_mode)
+    assert result.success or result.error is not None
+    visits = getattr(result, "node_visit_counts", {{}}) or {{}}
+    for node_id, count in visits.items():
+        assert count <= 5, f"Node {{node_id}} visited {{count}} times"
+```
+
+**Fan-Out Test** — Verify parallel branches both complete:
+```python
+@pytest.mark.asyncio
+async def test_parallel_branches_complete(mock_mode):
+    """Test that fan-out branches all complete and produce outputs."""
+    result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
+    assert result.success
+    output = result.output or {{}}
+    # Check outputs from both parallel branches
+    assert "branch_a_output" in output, "Branch A output missing"
+    assert "branch_b_output" in output, "Branch B output missing"
+```
+
+**Client-Facing Node Test** — In mock mode, client-facing nodes may not block:
+```python
+@pytest.mark.asyncio
+async def test_client_facing_node(mock_mode):
+    """Test that client-facing nodes produce output."""
+    result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
+    # In mock mode, client-facing blocking is typically bypassed
+    assert result.success or result.paused_at is not None
+```
+
 ## Integration with building-agents

 ### Handoff Points
@@ -1,9 +1,10 @@
 ---
 name: Bug Report
 about: Report a bug to help us improve
-title: '[Bug]: '
-labels: bug
+title: "[Bug]: "
+labels: bug, enhancement
 assignees: ''
+
 ---

 ## Describe the Bug
@@ -1,9 +1,10 @@
 ---
 name: Feature Request
 about: Suggest a new feature or enhancement
-title: '[Feature]: '
+title: "[Feature]: "
 labels: enhancement
 assignees: ''
+
 ---

 ## Problem Statement
@@ -0,0 +1,71 @@
+---
+name: Integration Request
+about: Suggest a new integration
+title: "[Integration]:"
+labels: ''
+assignees: ''
+
+---
+
+## Service                                                                                      
+                                                                                                 
+ Name and brief description of the service and what it enables agents to do.                     
+                                                                                                 
+ **Description:** [e.g., "API key for Slack Bot" — short one-liner for the credential spec]      
+                                                                                                 
+ ## Credential Identity                                                                          
+                                                                                                 
+ - **credential_id:** [e.g., `slack`]                                                            
+ - **env_var:** [e.g., `SLACK_BOT_TOKEN`]                                                        
+ - **credential_key:** [e.g., `access_token`, `api_key`, `bot_token`]                            
+                                                                                                 
+ ## Tools                                                                                        
+                                                                                                 
+ Tool function names that require this credential:                                               
+                                                                                                 
+ - [e.g., `slack_send_message`]                                                                  
+ - [e.g., `slack_list_channels`]                                                                 
+                                                                                                 
+ ## Auth Methods                                                                                 
+                                                                                                 
+ - **Direct API key supported:** Yes / No                                                        
+ - **Aden OAuth supported:** Yes / No                                                            
+                                                                                                 
+ If Aden OAuth is supported, describe the OAuth scopes/permissions required.                     
+                                                                                                 
+ ## How to Get the Credential                                                                    
+                                                                                                 
+ Link where users obtain the key/token:                                                          
+                                                                                                 
+ [e.g., https://api.slack.com/apps]                                                              
+                                                                                                 
+ Step-by-step instructions:                                                                      
+                                                                                                 
+ 1. Go to ...                                                                                    
+ 2. Create a ...                                                                                 
+ 3. Select scopes/permissions: ...                                                               
+ 4. Copy the key/token                                                                           
+                                                                                                 
+ ## Health Check                                                                                 
+                                                                                                 
+ A lightweight API call to validate the credential (no writes, no charges).                      
+                                                                                                 
+ - **Endpoint:** [e.g., `https://slack.com/api/auth.test`]                                       
+ - **Method:** [e.g., `GET` or `POST`]                                                           
+ - **Auth header:** [e.g., `Authorization: Bearer {token}` or `X-Api-Key: {key}`]                
+ - **Parameters (if any):** [e.g., `?limit=1`]                                                   
+ - **200 means:** [e.g., key is valid]                                                           
+ - **401 means:** [e.g., invalid or expired]                                                     
+ - **429 means:** [e.g., rate limited but key is valid]                                          
+                                                                                                 
+ ## Credential Group                                                                             
+                                                                                                 
+ Does this require multiple credentials configured together? (e.g., Google Custom Search needs   
+ both an API key and a CSE ID)                                                                   
+                                                                                                 
+ - [ ] No, single credential                                                                     
+ - [ ] Yes — list the other credential IDs in the group:                                         
+                                                                                                 
+ ## Additional Context                                                                           
+                                                                                                 
+ Links to API docs, rate limits, free tier availability, or anything else relevant.
@@ -21,23 +21,22 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: |
-          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+        run: uv sync --project core --group dev

      - name: Ruff lint
        run: |
-          ruff check core/
-          ruff check tools/
+          uv run --project core ruff check core/
+          uv run --project core ruff check tools/

      - name: Ruff format
        run: |
-          ruff format --check core/
-          ruff format --check tools/
+          uv run --project core ruff format --check core/
+          uv run --project core ruff format --check tools/

  test:
    name: Test Python Framework
@@ -52,23 +51,19 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'

-      - name: Install dependencies
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync
+          uv run pytest tests/ -v

-      - name: Run tests
-        run: |
-          cd core
-          pytest tests/ -v
-
-  validate:
-    name: Validate Agent Exports
+  test-tools:
+    name: Test Tools
    runs-on: ubuntu-latest
-    needs: [lint, test]
    steps:
      - uses: actions/checkout@v4

@@ -76,13 +71,35 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
+        run: |
+          cd tools
+          uv sync --extra dev
+          uv run pytest tests/ -v
+
+  validate:
+    name: Validate Agent Exports
+    runs-on: ubuntu-latest
+    needs: [lint, test, test-tools]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync

      - name: Validate exported agents
        run: |
@@ -105,7 +122,7 @@ jobs:
          for agent_dir in "${agent_dirs[@]}"; do
            if [ -f "$agent_dir/agent.json" ]; then
              echo "Validating $agent_dir"
-              python -c "import json; json.load(open('$agent_dir/agent.json'))"
+              uv run python -c "import json; json.load(open('$agent_dir/agent.json'))"
              validated=$((validated + 1))
            fi
          done
@@ -80,7 +80,13 @@ jobs:
            - help wanted: Extra attention is needed (if issue needs community input)
            - backlog: Tracked for the future, but not currently planned or prioritized

-            You may apply multiple labels if appropriate (e.g., "bug" and "help wanted").
+            ### 6. Estimate size (if NOT a duplicate, spam, or invalid)
+            Apply exactly ONE size label to help contributors match their capacity to the task:
+            - "size: small": Docs, typos, single-file fixes, config changes
+            - "size: medium": Bug fixes with tests, adding a single tool, changes within one package
+            - "size: large": Cross-package changes (core + tools), new modules, complex logic, architectural refactors
+
+            You may apply multiple labels if appropriate (e.g., "bug", "size: small", and "good first issue").

            ## Tools Available:
            - mcp__github__get_issue: Get issue details
@@ -21,18 +21,19 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync

      - name: Run tests
        run: |
          cd core
-          pytest tests/ -v
+          uv run pytest tests/ -v

      - name: Generate changelog
        id: changelog
@@ -54,7 +54,6 @@ __pycache__/
 *.egg-info/
 .eggs/
 *.egg
-uv.lock

 # Generated runtime data
 core/data/
@@ -69,4 +68,9 @@ exports/*

 .agent-builder-sessions/*

-.venv
+.claude/settings.local.json
+
+.venv
+
+docs/github-issues/*
+core/tests/*dumps/*
@@ -1,20 +1,14 @@
 {
  "mcpServers": {
    "agent-builder": {
-      "command": ".venv/bin/python",
-      "args": ["-m", "framework.mcp.agent_builder_server"],
-      "cwd": "core",
-      "env": {
-        "PYTHONPATH": "../tools/src"
-      }
+      "command": "uv",
+      "args": ["run", "-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core"
    },
    "tools": {
-      "command": ".venv/bin/python",
-      "args": ["mcp_server.py", "--stdio"],
-      "cwd": "tools",
-      "env": {
-        "PYTHONPATH": "src:../core"
-      }
+      "command": "uv",
+      "args": ["run", "mcp_server.py", "--stdio"],
+      "cwd": "tools"
    }
  }
 }
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.15.0
    hooks:
      - id: ruff
        name: ruff lint (core)
@@ -35,9 +35,16 @@ You may submit PRs without prior assignment for:

 1. Fork the repository
 2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
-3. Create a feature branch: `git checkout -b feature/your-feature-name`
-4. Make your changes
-5. Run checks and tests:
+3. Add the upstream repository: `git remote add upstream https://github.com/adenhq/hive.git`
+4. Sync with upstream to ensure you're starting from the latest code:
+   ```bash
+   git fetch upstream
+   git checkout main
+   git merge upstream/main
+   ```
+5. Create a feature branch: `git checkout -b feature/your-feature-name`
+6. Make your changes
+7. Run checks and tests:
   ```bash
   make check    # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
   make test     # Core tests (cd core && pytest tests/ -v)
@@ -125,7 +132,7 @@ feat(component): add new feature description
 > **Note:** When testing agents in `exports/`, always set PYTHONPATH:
 >
 > ```bash
-> PYTHONPATH=core:exports python -m agent_name test
+> PYTHONPATH=exports uv run python -m agent_name test
 > ```

 ```bash
@@ -139,7 +146,7 @@ make test
 cd core && pytest tests/ -v

 # Run tests for a specific agent
-PYTHONPATH=core:exports python -m agent_name test
+PYTHONPATH=exports uv run python -m agent_name test
 ```

 > **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
@@ -44,7 +44,7 @@ Aden Agent Framework is a Python-based system for building goal-driven, self-imp
 Ensure you have installed:

 - **Python 3.11+** - [Download](https://www.python.org/downloads/) (3.12 or 3.13 recommended)
- **pip** - Package installer for Python (comes with Python)
+- **uv** - Python package manager ([Install](https://docs.astral.sh/uv/getting-started/installation/))
 - **git** - Version control
 - **Claude Code** - [Install](https://docs.anthropic.com/claude/docs/claude-code) (optional, for using building skills)

@@ -52,7 +52,7 @@ Verify installation:

 ```bash
 python --version    # Should be 3.11+
-pip --version       # Should be latest
+uv --version        # Should be latest
 git --version       # Any recent version
 ```

@@ -111,12 +111,12 @@ This installs agent-related Claude Code skills:

 ```bash
 # Verify package imports
-python -c "import framework; print('✓ framework OK')"
-python -c "import aden_tools; print('✓ aden_tools OK')"
-python -c "import litellm; print('✓ litellm OK')"
+uv run python -c "import framework; print('✓ framework OK')"
+uv run python -c "import aden_tools; print('✓ aden_tools OK')"
+uv run python -c "import litellm; print('✓ litellm OK')"

 # Run an agent (after building one via /building-agents-construction)
-PYTHONPATH=core:exports python -m your_agent_name validate
+PYTHONPATH=exports uv run python -m your_agent_name validate
 ```

 ---
@@ -128,8 +128,12 @@ hive/                                    # Repository root
 │
 ├── .github/                             # GitHub configuration
 │   ├── workflows/
-│   │   ├── ci.yml                       # Runs on every PR
-│   │   └── release.yml                  # Runs on tags
+│   │   ├── ci.yml                       # Lint, test, validate on every PR
+│   │   ├── release.yml                  # Runs on tags
+│   │   ├── pr-requirements.yml          # PR requirement checks
+│   │   ├── pr-check-command.yml         # PR check commands
+│   │   ├── claude-issue-triage.yml      # Automated issue triage
+│   │   └── auto-close-duplicates.yml    # Close duplicate issues
 │   ├── ISSUE_TEMPLATE/                  # Bug report & feature request templates
 │   ├── PULL_REQUEST_TEMPLATE.md         # PR description template
 │   └── CODEOWNERS                       # Auto-assign reviewers
@@ -166,7 +170,6 @@ hive/                                    # Repository root
 │   │   ├── testing/                     # Testing utilities
 │   │   └── __init__.py
 │   ├── pyproject.toml                   # Package metadata and dependencies
-│   ├── requirements.txt                 # Python dependencies
 │   ├── README.md                        # Framework documentation
 │   ├── MCP_INTEGRATION_GUIDE.md         # MCP server integration guide
 │   └── docs/                            # Protocol documentation
@@ -182,7 +185,6 @@ hive/                                    # Repository root
 │   │       ├── mcp_server.py            # HTTP MCP server
 │   │       └── __init__.py
 │   ├── pyproject.toml                   # Package metadata
-│   ├── requirements.txt                 # Python dependencies
 │   └── README.md                        # Tools documentation
 │
 ├── exports/                             # AGENT PACKAGES (user-created, gitignored)
@@ -191,14 +193,15 @@ hive/                                    # Repository root
 ├── docs/                                # Documentation
 │   ├── getting-started.md               # Quick start guide
 │   ├── configuration.md                 # Configuration reference
-│   ├── architecture.md                  # System architecture
-│   └── articles/                        # Technical articles
+│   ├── architecture/                    # System architecture
+│   ├── articles/                        # Technical articles
+│   ├── quizzes/                         # Developer quizzes
+│   └── i18n/                            # Translations
 │
-├── scripts/                             # Build & utility scripts
-│   ├── setup-python.sh                  # Python environment setup
-│   └── setup.sh                         # Legacy setup script
+├── scripts/                             # Utility scripts
+│   └── auto-close-duplicates.ts         # GitHub duplicate issue closer
 │
-├── quickstart.sh                        # Install Claude Code skills
+├── quickstart.sh                        # Interactive setup wizard
 ├── ENVIRONMENT_SETUP.md                 # Complete Python setup guide
 ├── README.md                            # Project overview
 ├── DEVELOPER.md                         # This file
@@ -252,7 +255,7 @@ claude> /testing-agent
 4. **Validate the Agent**

   ```bash
-   PYTHONPATH=core:exports python -m your_agent_name validate
+   PYTHONPATH=exports uv run python -m your_agent_name validate
   ```

 5. **Test the Agent**
@@ -298,19 +301,19 @@ If you prefer to build agents manually:

 ```bash
 # Validate agent structure
-PYTHONPATH=core:exports python -m agent_name validate
+PYTHONPATH=exports uv run python -m agent_name validate

 # Show agent information
-PYTHONPATH=core:exports python -m agent_name info
+PYTHONPATH=exports uv run python -m agent_name info

 # Run agent with input
-PYTHONPATH=core:exports python -m agent_name run --input '{
+PYTHONPATH=exports uv run python -m agent_name run --input '{
  "ticket_content": "My login is broken",
  "customer_id": "CUST-123"
 }'

 # Run in mock mode (no LLM calls)
-PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
+PYTHONPATH=exports uv run python -m agent_name run --mock --input '{...}'
 ```

 ---
@@ -334,17 +337,17 @@ This generates and runs:

 ```bash
 # Run all tests for an agent
-PYTHONPATH=core:exports python -m agent_name test
+PYTHONPATH=exports uv run python -m agent_name test

 # Run specific test type
-PYTHONPATH=core:exports python -m agent_name test --type constraint
-PYTHONPATH=core:exports python -m agent_name test --type success
+PYTHONPATH=exports uv run python -m agent_name test --type constraint
+PYTHONPATH=exports uv run python -m agent_name test --type success

 # Run with parallel execution
-PYTHONPATH=core:exports python -m agent_name test --parallel 4
+PYTHONPATH=exports uv run python -m agent_name test --parallel 4

 # Fail fast (stop on first failure)
-PYTHONPATH=core:exports python -m agent_name test --fail-fast
+PYTHONPATH=exports uv run python -m agent_name test --fail-fast
 ```

 ### Writing Custom Tests
@@ -375,7 +378,7 @@ def test_ticket_categorization():
 - **PEP 8** - Follow Python style guide
 - **Type hints** - Use for function signatures and class attributes
 - **Docstrings** - Document classes and public functions
- **Black** - Code formatter (run with `black .`)
+- **Ruff** - Linter and formatter (run with `make check`)

 ```python
 # Good
@@ -509,8 +512,8 @@ chore(deps): update React to 18.2.0

 1. Create a feature branch from `main`
 2. Make your changes with clear commits
-3. Run tests locally: `PYTHONPATH=core:exports python -m pytest`
-4. Run linting: `black --check .`
+3. Run tests locally: `make test`
+4. Run linting: `make check`
 5. Push and create a PR
 6. Fill out the PR template
 7. Request review from CODEOWNERS
@@ -528,16 +531,11 @@ chore(deps): update React to 18.2.0
 ```bash
 # Add to core framework
 cd core
-pip install <package>
-# Then add to requirements.txt or pyproject.toml
+uv add <package>

 # Add to tools package
 cd tools
-pip install <package>
-# Then add to requirements.txt or pyproject.toml
-
-# Reinstall in editable mode
-pip install -e .
+uv add <package>
 ```

 ### Creating a New Agent
@@ -636,10 +634,10 @@ import logging
 logging.basicConfig(level=logging.DEBUG)

 # Run with verbose output
-PYTHONPATH=core:exports python -m agent_name run --input '{...}' --verbose
+PYTHONPATH=exports uv run python -m agent_name run --input '{...}' --verbose

 # Use mock mode to test without LLM calls
-PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
+PYTHONPATH=exports uv run python -m agent_name run --mock --input '{...}'
 ```

 ---
@@ -670,9 +668,8 @@ cat .env
 # Or check shell environment
 echo $ANTHROPIC_API_KEY

-# Copy from .env.example if needed
-cp .env.example .env
-# Then edit .env with your API keys
+# Create .env if needed
+# Then add your API keys
 ```


@@ -21,6 +21,19 @@ This will:
 - Fix package compatibility issues (openai + litellm)
 - Verify all installations

+## Windows Setup
+
+Windows users should use **WSL (Windows Subsystem for Linux)** to set up and run agents.
+
+1. [Install WSL 2](https://learn.microsoft.com/en-us/windows/wsl/install) if you haven't already:
+   ```powershell
+   wsl --install
+   ```
+2. Open your WSL terminal, clone the repo, and run the quickstart script:
+   ```bash
+   ./quickstart.sh
+   ```
+
 ## Alpine Linux Setup

 If you are using Alpine Linux (e.g., inside a Docker container), you must install system dependencies and use a virtual environment before running the setup script:
@@ -32,9 +45,9 @@ apk add bash git python3 py3-pip nodejs npm curl build-base python3-dev linux-he
 ```
 2. Set up Virtual Environment (Required for Python 3.12+):
 ```
-python3 -m venv venv
-source venv/bin/activate
-pip install --upgrade pip setuptools wheel
+uv venv
+source .venv/bin/activate
+# uv handles pip/setuptools/wheel automatically
 ```
 3. Run the Quickstart Script:
 ```
@@ -49,29 +62,29 @@ If you prefer to set up manually or the script fails:

 ```bash
 cd core
-pip install -e .
+uv pip install -e .
 ```

 ### 2. Install Tools Package

 ```bash
 cd tools
-pip install -e .
+uv pip install -e .
 ```

 ### 3. Upgrade OpenAI Package

 ```bash
 # litellm requires openai >= 1.0.0
-pip install --upgrade "openai>=1.0.0"
+uv pip install --upgrade "openai>=1.0.0"
 ```

 ### 4. Verify Installation

 ```bash
-python -c "import framework; print('✓ framework OK')"
-python -c "import aden_tools; print('✓ aden_tools OK')"
-python -c "import litellm; print('✓ litellm OK')"
+uv run python -c "import framework; print('✓ framework OK')"
+uv run python -c "import aden_tools; print('✓ aden_tools OK')"
+uv run python -c "import litellm; print('✓ litellm OK')"
 ```

 > **Windows Tip:**  
@@ -100,33 +113,44 @@ For running agents with real LLMs:
 export ANTHROPIC_API_KEY="your-key-here"
 ```

+Windows (PowerShell):
+
+```powershell
+$env:ANTHROPIC_API_KEY="your-key-here"
+```
+
 ## Running Agents

 All agent commands must be run from the project root with `PYTHONPATH` set:

 ```bash
 # From /hive/ directory
-PYTHONPATH=core:exports python -m agent_name COMMAND
+PYTHONPATH=exports uv run python -m agent_name COMMAND
 ```

-### Example Commands
+Windows (PowerShell):

-After building an agent via `/building-agents-construction`, use these commands:
+```powershell
+$env:PYTHONPATH="core;exports"
+python -m agent_name COMMAND
+```
+
+### Example: Support Ticket Agent

 ```bash
 # Validate agent structure
-PYTHONPATH=core:exports python -m your_agent_name validate
+PYTHONPATH=exports uv run python -m your_agent_name validate

 # Show agent information
-PYTHONPATH=core:exports python -m your_agent_name info
+PYTHONPATH=exports uv run python -m your_agent_name info

 # Run agent with input
-PYTHONPATH=core:exports python -m your_agent_name run --input '{
+PYTHONPATH=exports uv run python -m your_agent_name run --input '{
  "task": "Your input here"
 }'

 # Run in mock mode (no LLM calls)
-PYTHONPATH=core:exports python -m your_agent_name run --mock --input '{...}'
+PYTHONPATH=exports uv run python -m your_agent_name run --mock --input '{...}'
 ```

 ## Building New Agents and Run Flow
@@ -231,7 +255,7 @@ This workflow orchestrates all agent-building skills to take you from idea → p

 ```bash
 # Create virtual environment
-python3 -m venv .venv
+uv venv

 # Activate it
 source .venv/bin/activate  # macOS/Linux
@@ -245,7 +269,15 @@ Always activate the venv before running agents:

 ```bash
 source .venv/bin/activate
-PYTHONPATH=core:exports python -m your_agent_name demo
+PYTHONPATH=exports uv run python -m your_agent_name demo
+```
+
+### PowerShell: “running scripts is disabled on this system”
+
+Run once per session:
+
+```powershell
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
 ```

 ### "ModuleNotFoundError: No module named 'framework'"
@@ -253,7 +285,7 @@ PYTHONPATH=core:exports python -m your_agent_name demo
 **Solution:** Install the core package:

 ```bash
-cd core && pip install -e .
+cd core && uv pip install -e .
 ```

 ### "ModuleNotFoundError: No module named 'aden_tools'"
@@ -261,7 +293,7 @@ cd core && pip install -e .
 **Solution:** Install the tools package:

 ```bash
-cd tools && pip install -e .
+cd tools && uv pip install -e .
 ```

 Or run the setup script:
@@ -277,17 +309,26 @@ Or run the setup script:
 **Solution:** Upgrade openai:

 ```bash
-pip install --upgrade "openai>=1.0.0"
+uv pip install --upgrade "openai>=1.0.0"
 ```

 ### "No module named 'your_agent_name'"

 **Cause:** Not running from project root, missing PYTHONPATH, or agent not yet created

-**Solution:** Ensure you're in the project root directory, have built an agent, and use:
+**Solution:** Ensure you're in `/hive/` and use:
+
+Linux/macOS:

 ```bash
-PYTHONPATH=core:exports python -m your_agent_name validate
+PYTHONPATH=exports uv run python -m your_agent_name validate
+```
+
+Windows:
+
+```powershell
+$env:PYTHONPATH="core;exports"
+python -m support_ticket_agent validate
 ```

 ### Agent imports fail with "broken installation"
@@ -298,7 +339,7 @@ PYTHONPATH=core:exports python -m your_agent_name validate

 ```bash
 # Remove broken installations
-pip uninstall -y framework tools
+uv pip uninstall framework tools

 # Reinstall correctly
 ./quickstart.sh
@@ -352,12 +393,12 @@ If you need to use both packages in a single script (e.g., for testing), you hav

 ```bash
 # Option 1: Install both in a shared environment
-python -m venv .venv
+uv venv
 source .venv/bin/activate
-pip install -e core/ -e tools/
+uv pip install -e core/ -e tools/

 # Option 2: Use PYTHONPATH (for quick testing)
-PYTHONPATH=core:tools/src python your_script.py
+PYTHONPATH=tools/src uv run python your_script.py
 ```

 ### MCP Server Configuration
@@ -383,7 +424,7 @@ This ensures each MCP server runs with its correct dependencies.

 ### Why PYTHONPATH is Required

-The packages are installed in **editable mode** (`pip install -e`), which means:
+The packages are installed in **editable mode** (`uv pip install -e`), which means:

 - `framework` and `aden_tools` are globally importable (no PYTHONPATH needed)
 - `exports` is NOT installed as a package (PYTHONPATH required)
@@ -412,7 +453,7 @@ Enter goal: "Build an agent that processes customer support tickets"
 ### 3. Validate Agent

 ```bash
-PYTHONPATH=core:exports python -m your_agent_name validate
+PYTHONPATH=exports uv run python -m your_agent_name validate
 ```

 ### 4. Test Agent
@@ -424,7 +465,7 @@ claude> /testing-agent
 ### 5. Run Agent

 ```bash
-PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
 ```

 ## IDE Setup
@@ -4,9 +4,11 @@ help: ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
 		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'

-lint: ## Run ruff linter (with auto-fix)
+lint: ## Run ruff linter and formatter (with auto-fix)
 	cd core && ruff check --fix .
 	cd tools && ruff check --fix .
+	cd core && ruff format .
+	cd tools && ruff format .

 format: ## Run ruff formatter
 	cd core && ruff format .
@@ -19,8 +21,8 @@ check: ## Run all checks without modifying files (CI-safe)
 	cd tools && ruff format --check .

 test: ## Run all tests
-	cd core && python -m pytest tests/ -v
+	cd core && uv run python -m pytest tests/ -v

 install-hooks: ## Install pre-commit hooks
-	pip install pre-commit
+	uv pip install pre-commit
 	pre-commit install
@@ -4,7 +4,6 @@
 - **Added empty response retry logic** — LLM provider now detects empty responses (e.g. Gemini returning 200 with no content on rate limit) and retries with exponential backoff, preventing hallucinated output from the cleanup LLM
 - **Added context-aware input compaction** — LLM nodes now estimate input token count before calling the model and progressively truncate the largest values if they exceed the context window budget
 - **Increased rate limit retries to 10** with verbose `[retry]` and `[compaction]` logging that includes model name, finish reason, and attempt count
- **Updated setup scripts** — `scripts/setup-python.sh` now installs Playwright Chromium browser automatically for web scraping support
 - **Interactive quickstart onboarding** — `quickstart.sh` rewritten as bee-themed interactive wizard that detects existing API keys (including Claude Code subscription), lets user pick ONE default LLM provider, and saves configuration to `~/.hive/configuration.json`
 - **Fixed lint errors** across `hubspot_tool.py` (line length) and `agent_builder_server.py` (unused variable)

@@ -24,8 +23,6 @@
 - `tools/src/aden_tools/tools/web_scrape_tool/README.md` — Updated docs
 - `tools/pyproject.toml` — Added `playwright`, `playwright-stealth` deps
 - `tools/Dockerfile` — Added `playwright install chromium --with-deps`
- `scripts/setup-python.sh` — Added Playwright Chromium browser install step
-
 ### LLM Reliability
 - `core/framework/llm/litellm.py` — Empty response retry + max retries 10 + verbose logging
 - `core/framework/graph/node.py` — Input compaction via `_compact_inputs()`, `_estimate_tokens()`, `_get_context_limit()`
@@ -41,7 +38,6 @@
 ## Test plan
 - [ ] Run `make lint` — passes clean
 - [ ] Run `./quickstart.sh` and verify interactive flow works, config saved to `~/.hive/configuration.json`
- [ ] Run `./scripts/setup-python.sh` and verify Playwright Chromium installs
 - [ ] Run `pytest tests/tools/test_web_scrape_tool.py -v`
 - [ ] Run agent against a JS-heavy site and verify `web_scrape` returns rendered content
 - [ ] Set `HUBSPOT_ACCESS_TOKEN` and verify HubSpot tool CRUD operations work
@@ -15,7 +15,6 @@

 [![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
 [![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
-[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/hive?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
 [![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
 [![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
 [![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
@@ -40,6 +39,31 @@ Build reliable, self-improving AI agents without hardcoding workflows. Define yo

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

+## Who Is Hive For?
+
+Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
+
+Hive is a good fit if you:
+
+- Want AI agents that **execute real business processes**, not demos
+- Prefer **goal-driven development** over hardcoded workflows
+- Need **self-healing and adaptive agents** that improve over time
+- Require **human-in-the-loop control**, observability, and cost limits
+- Plan to run agents in **production environments**
+
+Hive may not be the best fit if you’re only experimenting with simple agent chains or one-off scripts.
+
+## When Should You Use Hive?
+
+Use Hive when you need:
+
+- Long-running, autonomous agents
+- Multi-agent coordination
+- Continuous improvement based on failures
+- Strong monitoring, safety, and budget controls
+- A framework that evolves with your goals
+
+
 ## What is Aden

 <p align="center">
@@ -64,11 +88,13 @@ Aden is a platform for building, deploying, operating, and adapting AI agents:

 ## Quick Start

-### Prerequisites
+## Prerequisites

- [Python 3.11+](https://www.python.org/downloads/) for agent development
+- Python 3.11+ for agent development
 - Claude Code or Cursor for utilizing agent skills

+> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
+
 ### Installation

 ```bash
@@ -95,7 +121,7 @@ claude> /building-agents-construction
 claude> /testing-agent

 # Run your agent
-PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
 ```

 **[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
@@ -181,7 +207,7 @@ flowchart LR
 Aden Hive provides a list of featured agents that you can use and build on top of.

 ### Run an agent shared by others
-Put the agent in `exports/` and run `PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'`
+Put the agent in `exports/` and run `PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'`


 For building and running goal-driven agents with the framework:
@@ -202,7 +228,7 @@ claude> /building-agents-construction
 claude> /testing-agent

 # Run agents
-PYTHONPATH=core:exports python -m agent_name run --input '{...}'
+PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
 ```

 See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions.
@@ -0,0 +1,112 @@
+# 🚀 Release v0.4.0
+
+**79 commits since v0.3.2** | **Target: `main` @ `80a41b4`**
+
+---
+
+## ✨ Highlights
+
+This is a major release introducing the **Event Loop Node architecture**, an **interactive TUI dashboard**, **ClientIO gateway** for client-facing agents, a **GitHub tool**, **Slack tool integration** (45+ tools), and a full **migration from pip to uv** for package management.
+
+---
+
+## 🆕 Features
+
+### 🔄 Event Loop Node Architecture
+- Implement event loop node framework (WP1-4, WP8, WP9, WP10, WP12) — a new node type that supports iterative, multi-turn execution with tool calls, judge-based acceptance, and client-facing interaction
+- Emit bus events for runtime observability
+- Add graph validation for client-facing nodes
+- Soft-fail on schema mismatch during context handoff (no more hard failures)
+
+### 🖥️ Interactive TUI Dashboard
+- Add interactive TUI dashboard for agent execution with 3-pane layout (logs/graph + chat)
+- Implement selectable logging, interactive ChatREPL, and thread-safe event handling
+- Screenshot feature, header polish, keybinding updates
+- Lazy widget loading, Horizontal/Vertical layout fixes
+- Integrate agent builder with TUI
+
+### 💬 ClientIO Gateway
+- Implement ClientIO gateway for client-facing node I/O routing
+- Client-facing nodes can now request and receive user input at runtime
+
+### 🐙 GitHub Tool
+- Add GitHub tool for repository and issue management
+- Security and integration fixes from PR feedback
+
+### 💼 Slack Tool Integration
+- Add Slack bot integration with 45+ tools for multipurpose integration
+- Includes CRM support capabilities
+
+### 🔑 Credential Store
+- Provider-based credential store (`aden provider credential store by provider`)
+- Support non-OAuth key setup in credential workflows
+- Quickstart credential store integration
+
+### 📦 Migration to uv
+- Migrate from pip to uv for package management
+- Consolidate workspace to uv monorepo
+- Migrate all CI jobs from pip to uv
+- Check for litellm import in both `CORE_PYTHON` and `TOOLS_PYTHON` environments
+
+### 🛠️ Other Features
+- Tool truncation for handling large tool outputs
+- Inject runtime datetime into LLM system prompts
+- Add sample agent folder structure and examples
+- Add message when LLM key is not available
+- Edit bot prompt to decide on technical size of issues
+- Update skills and agent builder tools; bump pinned ruff version
+
+---
+
+## 🐛 Bug Fixes
+
+- **ON_FAILURE edge routing**: Follow ON_FAILURE edges when a node fails after max retries
+- **Malformed JSON tool arguments**: Handle malformed JSON tool arguments safely in LiteLLMProvider
+- **Quickstart compatibility**: Fix quickstart.sh compatibility and provider selection issues
+- **Silent exit fix**: Resolve silent exit when selecting non-Anthropic LLM provider
+- **Robust compaction logic**: Fix conversation compaction edge cases
+- **Loop prevention**: Prevent infinite loops in feedback edges
+- **Tool pruning logic**: Fix incorrect tool pruning behavior
+- **Text delta granularity**: Fix text delta granularity and tool limit problems
+- **Tool call results**: Fix formulation of tool call results
+- **Max retry reset**: Reset max retry counter to 0 for event loop nodes
+- **Graph validation**: Fix graph validation logic
+- **MCP exports directory**: Handle missing exports directory in test generation tools
+- **Bash version support**: Fix bash version compatibility
+
+---
+
+## 🏗️ Chores & CI
+
+- Consolidate workspace to uv monorepo
+- Migrate remaining CI jobs from pip to uv
+- Clean up use of `setup-python` in CI
+- Windows lint fixes
+- Various lint and formatting fixes
+- Update `.gitignore` and remove local claude settings
+- Update issue templates
+
+---
+
+## 📖 Documentation
+
+- Add Windows compatibility warning
+- Update architecture diagram source path in README
+
+---
+
+## 👏 Contributors
+
+Thanks to all contributors for this release:
+
+- **@mubarakar95** — Interactive TUI dashboard (3-pane layout, ChatREPL, selectable logging, screenshot feature, lazy widget loading)
+- **@levxn** — Slack bot integration with 45+ tools including CRM support
+- **@lakshitaa-chellaramani** — GitHub tool for repository and issue management
+- **@Acid-OP** — ON_FAILURE edge routing fix after max retries
+- **@Siddharth2624** — Malformed JSON tool argument handling in LiteLLMProvider
+- **@Antiarin** — Runtime datetime injection into LLM system prompts
+- **@kuldeepgaur02** — Fix silent exit when selecting non-Anthropic LLM provider
+- **@Anjali Yadav** — Fix missing exports directory in MCP test generation tools
+- **@Hundao** — Migrate remaining CI jobs from pip to uv
+- **@ranjithkumar9343** — Windows compatibility warning documentation
+- **@Yogesh Sakharam Diwate** — Architecture diagram path update in README
@@ -268,7 +268,7 @@ classDef done fill:#9e9e9e,color:#fff,stroke:#757575
    - [ ] Wake-up Tool (resume agent tasks)

 ### Deployment (Self-Hosted)
- [ ] Docker container standardization
+- [ ] Workder agent docker container standardization
 - [ ] Headless backend execution
 - [ ] Exposed API for frontend attachment
 - [ ] Local monitoring & observability
@@ -0,0 +1,30 @@
+# TUI Text Selection and Copy Guide
+
+## Keybindings
+
+| Key           | Action                |
+|---------------|-----------------------|
+| `Tab`         | Next panel            |
+| `Shift+Tab`   | Previous panel        |
+| `Ctrl+S`      | Save SVG screenshot   |
+| `Ctrl+O`      | Command palette       |
+| `Q`           | Quit                  |
+
+## Panel Cycle Order
+
+`Tab` cycles: **Log Pane → Graph View → Chat Input**
+
+## Text Selection
+
+Textual apps capture the mouse, so normal click-drag selection won't work by default. To select and copy text from any pane:
+
+1. **Hold `Shift`** while clicking and dragging — this bypasses Textual's mouse capture and lets your terminal handle selection natively.
+2. Copy with your terminal's shortcut (`Cmd+C` on macOS, `Ctrl+Shift+C` on most Linux terminals).
+
+## Log Pane Scrolling
+
+The log pane uses `auto_scroll=False`. New output only scrolls to the bottom when you are already at the bottom of the log. If you've scrolled up to read earlier output, it stays in place.
+
+## Screenshots
+
+`Ctrl+S` saves an SVG screenshot to the `screenshots/` directory with a timestamped filename. Open the SVG in any browser to view it.
@@ -14,7 +14,7 @@ Framework provides a runtime framework that captures **decisions**, not just act
 ## Installation

 ```bash
-pip install -e .
+uv pip install -e .
 ```

 ## MCP Server Setup
@@ -45,13 +45,13 @@ If you prefer manual setup:

 ```bash
 # Install framework
-pip install -e .
+uv pip install -e .

 # Install MCP dependencies
-pip install mcp fastmcp
+uv pip install mcp fastmcp

 # Test the server
-python -m framework.mcp.agent_builder_server
+uv run python -m framework.mcp.agent_builder_server
 ```

 ### Using with MCP Clients
@@ -86,13 +86,13 @@ Run an LLM-powered calculator:

 ```bash
 # Single calculation
-python -m framework calculate "2 + 3 * 4"
+uv run python -m framework calculate "2 + 3 * 4"

 # Interactive mode
-python -m framework interactive
+uv run python -m framework interactive

 # Analyze runs with Builder
-python -m framework analyze calculator
+uv run python -m framework analyze calculator
 ```

 ### Using the Runtime
@@ -136,13 +136,13 @@ Tests are generated using MCP tools (`generate_constraint_tests`, `generate_succ

 ```bash
 # Run tests against an agent
-python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
+uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4

 # Debug failed tests
-python -m framework test-debug <agent_path> <test_name>
+uv run python -m framework test-debug <agent_path> <test_name>

 # List tests for a goal
-python -m framework test-list <goal_id>
+uv run python -m framework test-list <goal_id>
 ```

 For detailed testing workflows, see the [testing-agent skill](../.claude/skills/testing-agent/SKILL.md).
@@ -0,0 +1,740 @@
+#!/usr/bin/env python3
+"""
+EventLoopNode WebSocket Demo
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams EventLoopNode execution to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/event_loop_wss_demo.py
+
+    Then open http://localhost:8765 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+import os  # noqa: E402
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("demo")
+
+# -------------------------------------------------------------------------
+# Persistent state (shared across WebSocket connections)
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_demo_"))
+STORE = FileConversationStore(STORE_DIR / "conversation")
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Tool Registry — real tools via ToolRegistry (same pattern as GraphExecutor)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+# Credential store: Aden sync (OAuth2 tokens) + encrypted files + env var fallback
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_local_storage = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+
+if os.environ.get("ADEN_API_KEY"):
+    try:
+        from framework.credentials.aden import (  # noqa: E402
+            AdenCachedStorage,
+            AdenClientConfig,
+            AdenCredentialClient,
+            AdenSyncProvider,
+        )
+
+        _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com"))
+        _provider = AdenSyncProvider(client=_client)
+        _storage = AdenCachedStorage(
+            local_storage=_local_storage,
+            aden_provider=_provider,
+        )
+        _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True)
+        _synced = _provider.sync_all(_cred_store)
+        logger.info("Synced %d credentials from Aden", _synced)
+    except Exception as e:
+        logger.warning("Aden sync unavailable: %s", e)
+        _cred_store = CredentialStore(storage=_local_storage)
+else:
+    logger.info("ADEN_API_KEY not set, using local credential storage")
+    _cred_store = CredentialStore(storage=_local_storage)
+
+CREDENTIALS = CredentialStoreAdapter(_cred_store)
+
+# Debug: log which credentials resolved
+for _name in ["brave_search", "hubspot", "anthropic"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# --- web_search (Brave Search API) ---
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results to return (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={"X-Subscription-Token": api_key, "Accept": "application/json"},
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+# --- web_scrape (httpx + BeautifulSoup, no playwright for sync compat) ---
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS)
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {"url": url, "title": title, "content": text, "length": len(text)}
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+# --- HubSpot CRM tools (optional, requires HUBSPOT_ACCESS_TOKEN) ---
+
+_HUBSPOT_API = "https://api.hubapi.com"
+
+
+def _hubspot_headers() -> dict | None:
+    token = CREDENTIALS.get("hubspot")
+    if token:
+        logger.debug("HubSpot token: %s...%s (len=%d)", token[:8], token[-4:], len(token))
+    else:
+        logger.debug("HubSpot token: not found")
+    if not token:
+        return None
+    return {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+
+def _exec_hubspot_search(inputs: dict) -> dict:
+    headers = _hubspot_headers()
+    if not headers:
+        return {"error": "HUBSPOT_ACCESS_TOKEN not set"}
+    object_type = inputs.get("object_type", "contacts")
+    query = inputs.get("query", "")
+    limit = min(inputs.get("limit", 10), 100)
+    body: dict = {"limit": limit}
+    if query:
+        body["query"] = query
+    try:
+        resp = httpx.post(
+            f"{_HUBSPOT_API}/crm/v3/objects/{object_type}/search",
+            headers=headers,
+            json=body,
+            timeout=30.0,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HubSpot API HTTP {resp.status_code}: {resp.text[:200]}"}
+        return resp.json()
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"HubSpot error: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="hubspot_search",
+    tool=Tool(
+        name="hubspot_search",
+        description=(
+            "Search HubSpot CRM objects (contacts, companies, or deals). "
+            "Returns matching records with their properties."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "object_type": {
+                    "type": "string",
+                    "description": "CRM object type: 'contacts', 'companies', or 'deals'",
+                },
+                "query": {
+                    "type": "string",
+                    "description": "Search query (name, email, domain, etc.)",
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Max results (1-100, default 10)",
+                },
+            },
+            "required": ["object_type"],
+        },
+    ),
+    executor=lambda inputs: _exec_hubspot_search(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page (embedded)
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>EventLoopNode Live Demo</title>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117; color: #c9d1d9;
+    height: 100vh; display: flex; flex-direction: column;
+  }
+  header {
+    background: #161b22; padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex; align-items: center; gap: 16px;
+  }
+  header h1 { font-size: 16px; color: #58a6ff; font-weight: 600; }
+  .status {
+    font-size: 12px; padding: 3px 10px; border-radius: 12px;
+    background: #21262d; color: #8b949e;
+  }
+  .status.running { background: #1a4b2e; color: #3fb950; }
+  .status.done { background: #1a3a5c; color: #58a6ff; }
+  .status.error { background: #4b1a1a; color: #f85149; }
+  .chat { flex: 1; overflow-y: auto; padding: 16px; }
+  .msg {
+    margin: 8px 0; padding: 10px 14px; border-radius: 8px;
+    line-height: 1.6; white-space: pre-wrap; word-wrap: break-word;
+  }
+  .msg.user { background: #1a3a5c; color: #58a6ff; }
+  .msg.assistant { background: #161b22; color: #c9d1d9; }
+  .msg.event {
+    background: transparent; color: #8b949e; font-size: 11px;
+    padding: 4px 14px; border-left: 3px solid #30363d;
+  }
+  .msg.event.loop { border-left-color: #58a6ff; }
+  .msg.event.tool { border-left-color: #d29922; }
+  .msg.event.stall { border-left-color: #f85149; }
+  .input-bar {
+    padding: 12px 16px; background: #161b22;
+    border-top: 1px solid #30363d; display: flex; gap: 8px;
+  }
+  .input-bar input {
+    flex: 1; background: #0d1117; border: 1px solid #30363d;
+    color: #c9d1d9; padding: 8px 12px; border-radius: 6px;
+    font-family: inherit; font-size: 14px; outline: none;
+  }
+  .input-bar input:focus { border-color: #58a6ff; }
+  .input-bar button {
+    background: #238636; color: #fff; border: none;
+    padding: 8px 20px; border-radius: 6px; cursor: pointer;
+    font-family: inherit; font-weight: 600;
+  }
+  .input-bar button:hover { background: #2ea043; }
+  .input-bar button:disabled {
+    background: #21262d; color: #484f58; cursor: not-allowed;
+  }
+  .input-bar button.clear { background: #da3633; }
+  .input-bar button.clear:hover { background: #f85149; }
+</style>
+</head>
+<body>
+  <header>
+    <h1>EventLoopNode Live</h1>
+    <span id="status" class="status">Idle</span>
+    <span id="iter" class="status" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Ask anything..." autofocus />
+    <button id="go" onclick="run()">Send</button>
+    <button class="clear"
+            onclick="clearConversation()">Clear</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+const chat = document.getElementById('chat');
+const status = document.getElementById('status');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setStatus(text, cls) {
+  status.textContent = text;
+  status.className = 'status ' + cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setStatus('Error', 'error'); };
+  ws.onclose = () => {
+    setStatus('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'ready') {
+    setStatus('Ready', 'done');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg('RESULT  ' + evt.tool_name + ': ' + preview,
+           'event ' + cls);
+    currentAssistantEl = addMsg('', 'assistant');
+  }
+  else if (evt.type === 'result') {
+    setStatus('Session ended', evt.success ? 'done' : 'error');
+    if (evt.error) addMsg('ERROR  ' + evt.error, 'event stall');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+  else if (evt.type === 'cleared') {
+    chat.innerHTML = '';
+    iterCount = 0;
+    iterEl.textContent = 'Step 0';
+    iterEl.style.display = 'none';
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  setStatus('Running', 'running');
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+function clearConversation() {
+  if (ws && ws.readyState === 1) {
+    ws.send(JSON.stringify({ command: 'clear' }));
+  }
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Persistent WebSocket: long-lived EventLoopNode with client_facing blocking."""
+    global STORE
+
+    # -- Event forwarding (WebSocket ← EventBus) ----------------------------
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    # -- Per-connection state -----------------------------------------------
+    node = None
+    loop_task = None
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    node_spec = NodeSpec(
+        id="assistant",
+        name="Chat Assistant",
+        description="A conversational assistant that remembers context across messages",
+        node_type="event_loop",
+        client_facing=True,
+        system_prompt=(
+            "You are a helpful assistant with access to tools. "
+            "You can search the web, scrape webpages, and query HubSpot CRM. "
+            "Use tools when the user asks for current information or external data. "
+            "You have full conversation history, so you can reference previous messages."
+        ),
+    )
+
+    # -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus ---
+    async def on_input_requested(event):
+        try:
+            await websocket.send(json.dumps({"type": "ready"}))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[EventType.CLIENT_INPUT_REQUESTED],
+        handler=on_input_requested,
+    )
+
+    async def start_loop(first_message: str):
+        """Create an EventLoopNode and run it as a background task."""
+        nonlocal node, loop_task
+
+        memory = SharedMemory()
+        ctx = NodeContext(
+            runtime=RUNTIME,
+            node_id="assistant",
+            node_spec=node_spec,
+            memory=memory,
+            input_data={},
+            llm=LLM,
+            available_tools=tools,
+        )
+        node = EventLoopNode(
+            event_bus=bus,
+            config=LoopConfig(max_iterations=10_000, max_history_tokens=32_000),
+            conversation_store=STORE,
+            tool_executor=tool_executor,
+        )
+        await node.inject_event(first_message)
+
+        async def _run():
+            try:
+                result = await node.execute(ctx)
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": result.success,
+                                "output": result.output,
+                                "error": result.error,
+                                "tokens": result.tokens_used,
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+                logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}")
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("Loop stopped: WebSocket closed")
+            except Exception as e:
+                logger.exception("Loop error")
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": False,
+                                "error": str(e),
+                                "output": {},
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+
+        loop_task = asyncio.create_task(_run())
+
+    async def stop_loop():
+        """Signal the node and wait for the loop task to finish."""
+        nonlocal node, loop_task
+        if loop_task and not loop_task.done():
+            if node:
+                node.signal_shutdown()
+            try:
+                await asyncio.wait_for(loop_task, timeout=5.0)
+            except (TimeoutError, asyncio.CancelledError):
+                loop_task.cancel()
+        node = None
+        loop_task = None
+
+    # -- Message loop (runs for the lifetime of this WebSocket) -------------
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            # Clear command
+            if msg.get("command") == "clear":
+                import shutil
+
+                await stop_loop()
+                await STORE.close()
+                conv_dir = STORE_DIR / "conversation"
+                if conv_dir.exists():
+                    shutil.rmtree(conv_dir)
+                STORE = FileConversationStore(conv_dir)
+                await websocket.send(json.dumps({"type": "cleared"}))
+                logger.info("Conversation cleared")
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            if node is None:
+                # First message — spin up the loop
+                logger.info(f"Starting persistent loop: {topic}")
+                await start_loop(topic)
+            else:
+                # Subsequent message — inject into the running loop
+                logger.info(f"Injecting message: {topic}")
+                await node.inject_event(topic)
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+    finally:
+        await stop_loop()
+        logger.info("WebSocket closed, loop stopped")
+
+
+# -------------------------------------------------------------------------
+# HTTP handler for serving the HTML page
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None  # let websockets handle the upgrade
+    # Serve the HTML page for any other path
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8765
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Demo running at http://localhost:{port}")
+        logger.info("Open in your browser and enter a topic to research.")
+        await asyncio.Future()  # run forever
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,930 @@
+#!/usr/bin/env python3
+"""
+Two-Node ContextHandoff Demo
+
+Demonstrates ContextHandoff between two EventLoopNode instances:
+  Node A (Researcher) → ContextHandoff → Node B (Analyst)
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams both nodes to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/handoff_demo.py
+
+    Then open http://localhost:8766 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.context_handoff import ContextHandoff  # noqa: E402
+from framework.graph.conversation import NodeConversation  # noqa: E402
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("handoff_demo")
+
+# -------------------------------------------------------------------------
+# Persistent state
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_"))
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Credentials
+# -------------------------------------------------------------------------
+
+# Composite credential store: encrypted files (primary) + env vars (fallback)
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_composite = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite))
+
+for _name in ["brave_search", "hubspot"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# -------------------------------------------------------------------------
+# Tool Registry — web_search + web_scrape for Node A (Researcher)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={
+            "X-Subscription-Token": api_key,
+            "Accept": "application/json",
+        },
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(
+            url,
+            timeout=30.0,
+            follow_redirects=True,
+            headers=_SCRAPE_HEADERS,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {
+            "url": url,
+            "title": title,
+            "content": text,
+            "length": len(text),
+        }
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+# -------------------------------------------------------------------------
+# Node Specs
+# -------------------------------------------------------------------------
+
+RESEARCHER_SPEC = NodeSpec(
+    id="researcher",
+    name="Researcher",
+    description="Researches a topic using web search and scraping tools",
+    node_type="event_loop",
+    input_keys=["topic"],
+    output_keys=["research_summary"],
+    system_prompt=(
+        "You are a thorough research assistant. Your job is to research "
+        "the given topic using the web_search and web_scrape tools.\n\n"
+        "1. Search for relevant information on the topic\n"
+        "2. Scrape 1-2 of the most promising URLs for details\n"
+        "3. Synthesize your findings into a comprehensive summary\n"
+        "4. Use set_output with key='research_summary' to save your "
+        "findings\n\n"
+        "Be thorough but efficient. Aim for 2-4 search/scrape calls, "
+        "then summarize and set_output."
+    ),
+)
+
+ANALYST_SPEC = NodeSpec(
+    id="analyst",
+    name="Analyst",
+    description="Analyzes research findings and provides insights",
+    node_type="event_loop",
+    input_keys=["context"],
+    output_keys=["analysis"],
+    system_prompt=(
+        "You are a strategic analyst. You receive research findings from "
+        "a previous researcher and must:\n\n"
+        "1. Identify key themes and patterns\n"
+        "2. Assess the reliability and significance of the findings\n"
+        "3. Provide actionable insights and recommendations\n"
+        "4. Use set_output with key='analysis' to save your analysis\n\n"
+        "Be concise but insightful. Focus on what matters most."
+    ),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>ContextHandoff Demo</title>
+<style>
+  * {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+  }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117;
+    color: #c9d1d9;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+  }
+  header {
+    background: #161b22;
+    padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex;
+    align-items: center;
+    gap: 16px;
+  }
+  header h1 {
+    font-size: 16px;
+    color: #58a6ff;
+    font-weight: 600;
+  }
+  .badge {
+    font-size: 12px;
+    padding: 3px 10px;
+    border-radius: 12px;
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.researcher {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .badge.analyst {
+    background: #1a4b2e;
+    color: #3fb950;
+  }
+  .badge.handoff {
+    background: #3d1f00;
+    color: #d29922;
+  }
+  .badge.done {
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.error {
+    background: #4b1a1a;
+    color: #f85149;
+  }
+  .chat {
+    flex: 1;
+    overflow-y: auto;
+    padding: 16px;
+  }
+  .msg {
+    margin: 8px 0;
+    padding: 10px 14px;
+    border-radius: 8px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+  }
+  .msg.user {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .msg.assistant {
+    background: #161b22;
+    color: #c9d1d9;
+  }
+  .msg.assistant.analyst-msg {
+    border-left: 3px solid #3fb950;
+  }
+  .msg.event {
+    background: transparent;
+    color: #8b949e;
+    font-size: 11px;
+    padding: 4px 14px;
+    border-left: 3px solid #30363d;
+  }
+  .msg.event.loop {
+    border-left-color: #58a6ff;
+  }
+  .msg.event.tool {
+    border-left-color: #d29922;
+  }
+  .msg.event.stall {
+    border-left-color: #f85149;
+  }
+  .handoff-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #1c1200;
+    border: 1px solid #d29922;
+    border-radius: 8px;
+    text-align: center;
+  }
+  .handoff-banner h3 {
+    color: #d29922;
+    font-size: 14px;
+    margin-bottom: 8px;
+  }
+  .handoff-banner p, .result-banner p {
+    color: #8b949e;
+    font-size: 12px;
+    line-height: 1.5;
+    max-height: 200px;
+    overflow-y: auto;
+    white-space: pre-wrap;
+    text-align: left;
+  }
+  .result-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #0a2614;
+    border: 1px solid #3fb950;
+    border-radius: 8px;
+  }
+  .result-banner h3 {
+    color: #3fb950;
+    font-size: 14px;
+    margin-bottom: 8px;
+    text-align: center;
+  }
+  .result-banner .label {
+    color: #58a6ff;
+    font-size: 11px;
+    font-weight: 600;
+    margin-top: 10px;
+    margin-bottom: 2px;
+  }
+  .result-banner .tokens {
+    color: #484f58;
+    font-size: 11px;
+    text-align: center;
+    margin-top: 10px;
+  }
+  .input-bar {
+    padding: 12px 16px;
+    background: #161b22;
+    border-top: 1px solid #30363d;
+    display: flex;
+    gap: 8px;
+  }
+  .input-bar input {
+    flex: 1;
+    background: #0d1117;
+    border: 1px solid #30363d;
+    color: #c9d1d9;
+    padding: 8px 12px;
+    border-radius: 6px;
+    font-family: inherit;
+    font-size: 14px;
+    outline: none;
+  }
+  .input-bar input:focus {
+    border-color: #58a6ff;
+  }
+  .input-bar button {
+    background: #238636;
+    color: #fff;
+    border: none;
+    padding: 8px 20px;
+    border-radius: 6px;
+    cursor: pointer;
+    font-family: inherit;
+    font-weight: 600;
+  }
+  .input-bar button:hover {
+    background: #2ea043;
+  }
+  .input-bar button:disabled {
+    background: #21262d;
+    color: #484f58;
+    cursor: not-allowed;
+  }
+</style>
+</head>
+<body>
+  <header>
+    <h1>ContextHandoff Demo</h1>
+    <span id="phase" class="badge">Idle</span>
+    <span id="iter" class="badge" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Enter a research topic..." autofocus />
+    <button id="go" onclick="run()">Research</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+let currentPhase = 'idle';
+const chat = document.getElementById('chat');
+const phase = document.getElementById('phase');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setPhase(text, cls) {
+  phase.textContent = text;
+  phase.className = 'badge ' + cls;
+  currentPhase = cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function addHandoffBanner(summary) {
+  const banner = document.createElement('div');
+  banner.className = 'handoff-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Context Handoff: Researcher -> Analyst';
+  const p = document.createElement('p');
+  p.textContent = summary || 'Passing research context...';
+  banner.appendChild(h3);
+  banner.appendChild(p);
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function addResultBanner(researcher, analyst, tokens) {
+  const banner = document.createElement('div');
+  banner.className = 'result-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Pipeline Complete';
+  banner.appendChild(h3);
+
+  if (researcher && researcher.research_summary) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'RESEARCH SUMMARY';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = researcher.research_summary;
+    banner.appendChild(p);
+  }
+
+  if (analyst && analyst.analysis) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'ANALYSIS';
+    lbl.style.color = '#3fb950';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = analyst.analysis;
+    banner.appendChild(p);
+  }
+
+  if (tokens) {
+    const t = document.createElement('div');
+    t.className = 'tokens';
+    t.textContent = 'Total tokens: ' + tokens.toLocaleString();
+    banner.appendChild(t);
+  }
+
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setPhase('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setPhase('Error', 'error'); };
+  ws.onclose = () => {
+    setPhase('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'phase') {
+    if (evt.phase === 'researcher') {
+      setPhase('Researcher', 'researcher');
+    } else if (evt.phase === 'handoff') {
+      setPhase('Handoff', 'handoff');
+    } else if (evt.phase === 'analyst') {
+      setPhase('Analyst', 'analyst');
+    }
+    iterCount = 0;
+    iterEl.style.display = 'none';
+  }
+  else if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg(
+      'RESULT  ' + evt.tool_name + ': ' + preview,
+      'event ' + cls
+    );
+    var assistCls = currentPhase === 'analyst'
+      ? 'assistant analyst-msg' : 'assistant';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'handoff_context') {
+    addHandoffBanner(evt.summary);
+    var assistCls = 'assistant analyst-msg';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'node_result') {
+    if (evt.node_id === 'researcher') {
+      if (currentAssistantEl
+          && !currentAssistantEl.textContent) {
+        currentAssistantEl.remove();
+      }
+    }
+  }
+  else if (evt.type === 'done') {
+    setPhase('Done', 'done');
+    iterEl.style.display = 'none';
+    if (currentAssistantEl
+        && !currentAssistantEl.textContent) {
+      currentAssistantEl.remove();
+    }
+    currentAssistantEl = null;
+    addResultBanner(
+      evt.researcher, evt.analyst, evt.total_tokens
+    );
+    goBtn.disabled = false;
+    inputEl.placeholder = 'Enter another topic...';
+  }
+  else if (evt.type === 'error') {
+    setPhase('Error', 'error');
+    addMsg('ERROR  ' + evt.message, 'event stall');
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  chat.innerHTML = '';
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler — sequential Node A → Handoff → Node B
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Run the two-node handoff pipeline per user message."""
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            logger.info(f"Starting handoff pipeline for: {topic}")
+
+            try:
+                await _run_pipeline(websocket, topic)
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("WebSocket closed during pipeline")
+                return
+            except Exception as e:
+                logger.exception("Pipeline error")
+                try:
+                    await websocket.send(json.dumps({"type": "error", "message": str(e)}))
+                except Exception:
+                    pass
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+
+
+async def _run_pipeline(websocket, topic: str):
+    """Execute: Node A (research) → ContextHandoff → Node B (analysis)."""
+    import shutil
+
+    # Fresh stores for each run
+    run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR))
+    store_a = FileConversationStore(run_dir / "node_a")
+    store_b = FileConversationStore(run_dir / "node_b")
+
+    # Shared event bus
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    # ---- Phase 1: Researcher ------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "researcher"}))
+
+    node_a = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge: accept when output_keys filled
+        config=LoopConfig(
+            max_iterations=20,
+            max_tool_calls_per_turn=10,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_a,
+        tool_executor=tool_executor,
+    )
+
+    ctx_a = NodeContext(
+        runtime=RUNTIME,
+        node_id="researcher",
+        node_spec=RESEARCHER_SPEC,
+        memory=SharedMemory(),
+        input_data={"topic": topic},
+        llm=LLM,
+        available_tools=tools,
+    )
+
+    result_a = await node_a.execute(ctx_a)
+    logger.info(
+        "Researcher done: success=%s, tokens=%s",
+        result_a.success,
+        result_a.tokens_used,
+    )
+
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "node_result",
+                "node_id": "researcher",
+                "success": result_a.success,
+                "output": result_a.output,
+            }
+        )
+    )
+
+    if not result_a.success:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": f"Researcher failed: {result_a.error}",
+                }
+            )
+        )
+        return
+
+    # ---- Phase 2: Context Handoff -------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "handoff"}))
+
+    # Restore the researcher's conversation from store
+    conversation_a = await NodeConversation.restore(store_a)
+    if conversation_a is None:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": "Failed to restore researcher conversation",
+                }
+            )
+        )
+        return
+
+    handoff_engine = ContextHandoff(llm=LLM)
+    handoff_context = handoff_engine.summarize_conversation(
+        conversation=conversation_a,
+        node_id="researcher",
+        output_keys=["research_summary"],
+    )
+
+    formatted_handoff = ContextHandoff.format_as_input(handoff_context)
+    logger.info(
+        "Handoff: %d turns, ~%d tokens, keys=%s",
+        handoff_context.turn_count,
+        handoff_context.total_tokens_used,
+        list(handoff_context.key_outputs.keys()),
+    )
+
+    # Send handoff context to browser
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "handoff_context",
+                "summary": handoff_context.summary[:500],
+                "turn_count": handoff_context.turn_count,
+                "tokens": handoff_context.total_tokens_used,
+                "key_outputs": handoff_context.key_outputs,
+            }
+        )
+    )
+
+    # ---- Phase 3: Analyst ---------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "analyst"}))
+
+    node_b = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge
+        config=LoopConfig(
+            max_iterations=10,
+            max_tool_calls_per_turn=5,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_b,
+    )
+
+    ctx_b = NodeContext(
+        runtime=RUNTIME,
+        node_id="analyst",
+        node_spec=ANALYST_SPEC,
+        memory=SharedMemory(),
+        input_data={"context": formatted_handoff},
+        llm=LLM,
+        available_tools=[],
+    )
+
+    result_b = await node_b.execute(ctx_b)
+    logger.info(
+        "Analyst done: success=%s, tokens=%s",
+        result_b.success,
+        result_b.tokens_used,
+    )
+
+    # ---- Done ---------------------------------------------------------------
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "done",
+                "researcher": result_a.output,
+                "analyst": result_b.output,
+                "total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)),
+            }
+        )
+    )
+
+    # Clean up temp stores
+    try:
+        shutil.rmtree(run_dir)
+    except Exception:
+        pass
+
+
+# -------------------------------------------------------------------------
+# HTTP handler
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8766
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Handoff demo at http://localhost:{port}")
+        logger.info("Enter a research topic to start the pipeline.")
+        await asyncio.Future()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -9,7 +9,7 @@ for understanding the core runtime loop:
 Setup -> Graph definition -> Execution -> Result

 Run with:
-    PYTHONPATH=core python core/examples/manual_agent.py
+    uv run python core/examples/manual_agent.py
 """

 import asyncio
@@ -15,7 +15,7 @@ You cannot skip steps or bypass validation.

 from collections.abc import Callable
 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Any

@@ -26,7 +26,7 @@ from framework.graph.goal import Goal
 from framework.graph.node import NodeSpec


-class BuildPhase(str, Enum):
+class BuildPhase(StrEnum):
    """Current phase of the build process."""

    INIT = "init"  # Just started
@@ -64,6 +64,8 @@ class AdenCachedStorage(CredentialStorage):
    - **Reads**: Try local cache first, fallback to Aden if stale/missing
    - **Writes**: Always write to local cache
    - **Offline resilience**: Uses cached credentials when Aden is unreachable
+    - **Provider-based lookup**: Match credentials by provider name (e.g., "hubspot")
+      when direct ID lookup fails, since Aden uses hash-based IDs internally.

    The cache TTL determines how long to trust local credentials before
    checking with the Aden server for updates. This balances:
@@ -85,6 +87,7 @@ class AdenCachedStorage(CredentialStorage):

        # First access fetches from Aden
        # Subsequent accesses use cache until TTL expires
+        # Can look up by provider name OR credential ID
        token = store.get_key("hubspot", "access_token")
    """

@@ -111,21 +114,24 @@ class AdenCachedStorage(CredentialStorage):
        self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
        self._prefer_local = prefer_local
        self._cache_timestamps: dict[str, datetime] = {}
+        # Index: provider name (e.g., "hubspot") -> credential hash ID
+        self._provider_index: dict[str, str] = {}

    def save(self, credential: CredentialObject) -> None:
        """
-        Save credential to local cache.
+        Save credential to local cache and update provider index.

        Args:
            credential: The credential to save.
        """
        self._local.save(credential)
        self._cache_timestamps[credential.id] = datetime.now(UTC)
+        self._index_provider(credential)
        logger.debug(f"Cached credential '{credential.id}'")

    def load(self, credential_id: str) -> CredentialObject | None:
        """
-        Load credential from cache, with Aden fallback.
+        Load credential from cache, with Aden fallback and provider-based lookup.

        The loading strategy depends on the `prefer_local` setting:

@@ -141,8 +147,37 @@ class AdenCachedStorage(CredentialStorage):
        2. Update local cache with response
        3. Fall back to local cache only if Aden fails

+        Provider-based lookup:
+        When a provider index mapping exists for the credential_id (e.g.,
+        "hubspot" → hash ID), the Aden-synced credential is loaded first.
+        This ensures fresh OAuth tokens from Aden take priority over stale
+        local credentials (env vars, old encrypted files).
+
        Args:
-            credential_id: The credential identifier.
+            credential_id: The credential identifier or provider name.
+
+        Returns:
+            CredentialObject if found, None otherwise.
+        """
+        # Check provider index first — Aden-synced credentials take priority
+        resolved_id = self._provider_index.get(credential_id)
+        if resolved_id and resolved_id != credential_id:
+            result = self._load_by_id(resolved_id)
+            if result is not None:
+                logger.info(
+                    f"Loaded credential '{credential_id}' via provider index (id='{resolved_id}')"
+                )
+                return result
+
+        # Direct lookup (exact credential_id match)
+        return self._load_by_id(credential_id)
+
+    def _load_by_id(self, credential_id: str) -> CredentialObject | None:
+        """
+        Load credential by exact ID from cache, with Aden fallback.
+
+        Args:
+            credential_id: The exact credential identifier.

        Returns:
            CredentialObject if found, None otherwise.
@@ -200,15 +235,21 @@ class AdenCachedStorage(CredentialStorage):

    def exists(self, credential_id: str) -> bool:
        """
-        Check if credential exists in local cache.
+        Check if credential exists in local cache (by ID or provider name).

        Args:
-            credential_id: The credential identifier.
+            credential_id: The credential identifier or provider name.

        Returns:
            True if credential exists locally.
        """
-        return self._local.exists(credential_id)
+        if self._local.exists(credential_id):
+            return True
+        # Check provider index
+        resolved_id = self._provider_index.get(credential_id)
+        if resolved_id and resolved_id != credential_id:
+            return self._local.exists(resolved_id)
+        return False

    def _is_cache_fresh(self, credential_id: str) -> bool:
        """
@@ -242,6 +283,47 @@ class AdenCachedStorage(CredentialStorage):
        self._cache_timestamps.clear()
        logger.debug("Invalidated all cache entries")

+    def _index_provider(self, credential: CredentialObject) -> None:
+        """
+        Index a credential by its provider/integration type.
+
+        Aden credentials carry an ``_integration_type`` key whose value is
+        the provider name (e.g., ``hubspot``).  This method maps that
+        provider name to the credential's hash ID so that subsequent
+        ``load("hubspot")`` calls resolve to the correct credential.
+
+        Args:
+            credential: The credential to index.
+        """
+        integration_type_key = credential.keys.get("_integration_type")
+        if integration_type_key is None:
+            return
+        provider_name = integration_type_key.value.get_secret_value()
+        if provider_name:
+            self._provider_index[provider_name] = credential.id
+            logger.debug(f"Indexed provider '{provider_name}' -> '{credential.id}'")
+
+    def rebuild_provider_index(self) -> int:
+        """
+        Rebuild the provider index from all locally cached credentials.
+
+        Useful after loading from disk when the in-memory index is empty.
+
+        Returns:
+            Number of provider mappings indexed.
+        """
+        self._provider_index.clear()
+        indexed = 0
+        for cred_id in self._local.list_all():
+            cred = self._local.load(cred_id)
+            if cred:
+                before = len(self._provider_index)
+                self._index_provider(cred)
+                if len(self._provider_index) > before:
+                    indexed += 1
+        logger.debug(f"Rebuilt provider index with {indexed} mappings")
+        return indexed
+
    def sync_all_from_aden(self) -> int:
        """
        Sync all credentials from Aden server to local cache.
@@ -589,6 +589,149 @@ class TestAdenCachedStorage:
        assert info["stale"]["is_fresh"] is False
        assert info["stale"]["ttl_remaining_seconds"] == 0

+    def test_save_indexes_provider(self, cached_storage):
+        """Test save builds the provider index from _integration_type key."""
+        cred = CredentialObject(
+            id="aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token-value"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert cached_storage._provider_index["hubspot"] == "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+
+    def test_load_by_provider_name(self, cached_storage):
+        """Test load resolves provider name to hash-based credential ID."""
+        hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("hubspot-token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        # Save builds the index
+        cached_storage.save(cred)
+
+        # Load by provider name should resolve to the hash ID
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.id == hash_id
+        assert loaded.keys["access_token"].value.get_secret_value() == "hubspot-token"
+
+    def test_load_by_direct_id_still_works(self, cached_storage):
+        """Test load by direct hash ID still works as before."""
+        hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        # Direct ID lookup should still work
+        loaded = cached_storage.load(hash_id)
+
+        assert loaded is not None
+        assert loaded.id == hash_id
+
+    def test_exists_by_provider_name(self, cached_storage):
+        """Test exists resolves provider name to hash-based credential ID."""
+        hash_id = "c2xhY2s6dGVzdDo5OTk="
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("slack-token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("slack"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert cached_storage.exists("slack") is True
+        assert cached_storage.exists(hash_id) is True
+        assert cached_storage.exists("nonexistent") is False
+
+    def test_rebuild_provider_index(self, cached_storage, local_storage):
+        """Test rebuild_provider_index reconstructs from local storage."""
+        # Manually save credentials to local storage (bypassing cached_storage.save)
+        for provider_name, hash_id in [("hubspot", "hash_hub"), ("slack", "hash_slack")]:
+            cred = CredentialObject(
+                id=hash_id,
+                credential_type=CredentialType.OAUTH2,
+                keys={
+                    "_integration_type": CredentialKey(
+                        name="_integration_type",
+                        value=SecretStr(provider_name),
+                    ),
+                },
+            )
+            local_storage.save(cred)
+
+        # Index should be empty (we bypassed save)
+        assert len(cached_storage._provider_index) == 0
+
+        # Rebuild
+        indexed = cached_storage.rebuild_provider_index()
+
+        assert indexed == 2
+        assert cached_storage._provider_index["hubspot"] == "hash_hub"
+        assert cached_storage._provider_index["slack"] == "hash_slack"
+
+    def test_save_without_integration_type_no_index(self, cached_storage):
+        """Test save does not index credentials without _integration_type key."""
+        cred = CredentialObject(
+            id="plain-cred",
+            credential_type=CredentialType.API_KEY,
+            keys={
+                "api_key": CredentialKey(
+                    name="api_key",
+                    value=SecretStr("key-value"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert "plain-cred" not in cached_storage._provider_index
+        assert len(cached_storage._provider_index) == 0
+

 # =============================================================================
 # Integration Tests
@@ -8,7 +8,7 @@ containing one or more keys (e.g., api_key, access_token, refresh_token).
 from __future__ import annotations

 from datetime import UTC, datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, SecretStr
@@ -19,7 +19,7 @@ def _utc_now() -> datetime:
    return datetime.now(UTC)


-class CredentialType(str, Enum):
+class CredentialType(StrEnum):
    """Types of credentials the store can manage."""

    API_KEY = "api_key"
@@ -96,7 +96,7 @@ class BaseOAuth2Provider(CredentialProvider):
                self._client = httpx.Client(timeout=self.config.request_timeout)
            except ImportError as e:
                raise ImportError(
-                    "OAuth2 provider requires 'httpx'. Install with: pip install httpx"
+                    "OAuth2 provider requires 'httpx'. Install with: uv pip install httpx"
                ) from e
        return self._client

@@ -11,11 +11,11 @@ from __future__ import annotations

 from dataclasses import dataclass, field
 from datetime import UTC, datetime, timedelta
-from enum import Enum
+from enum import StrEnum
 from typing import Any


-class TokenPlacement(str, Enum):
+class TokenPlacement(StrEnum):
    """Where to place the access token in HTTP requests."""

    HEADER_BEARER = "header_bearer"
@@ -136,7 +136,8 @@ class EncryptedFileStorage(CredentialStorage):
            from cryptography.fernet import Fernet
        except ImportError as e:
            raise ImportError(
-                "Encrypted storage requires 'cryptography'. Install with: pip install cryptography"
+                "Encrypted storage requires 'cryptography'. "
+                "Install with: uv pip install cryptography"
            ) from e

        self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser()
@@ -2,7 +2,7 @@
 HashiCorp Vault storage adapter.

 Provides integration with HashiCorp Vault for enterprise secret management.
-Requires the 'hvac' package: pip install hvac
+Requires the 'hvac' package: uv pip install hvac
 """

 from __future__ import annotations
@@ -66,7 +66,7 @@ class HashiCorpVaultStorage(CredentialStorage):
        - AWS IAM auth method

    Requirements:
-        pip install hvac
+        uv pip install hvac
    """

    def __init__(
@@ -97,7 +97,7 @@ class HashiCorpVaultStorage(CredentialStorage):
            import hvac
        except ImportError as e:
            raise ImportError(
-                "HashiCorp Vault support requires 'hvac'. Install with: pip install hvac"
+                "HashiCorp Vault support requires 'hvac'. Install with: uv pip install hvac"
            ) from e

        self._url = url
@@ -1,8 +1,22 @@
 """Graph structures: Goals, Nodes, Edges, and Flexible Execution."""

+from framework.graph.client_io import (
+    ActiveNodeClientIO,
+    ClientIOGateway,
+    InertNodeClientIO,
+    NodeClientIO,
+)
 from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
+from framework.graph.context_handoff import ContextHandoff, HandoffContext
 from framework.graph.conversation import ConversationStore, Message, NodeConversation
 from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.event_loop_node import (
+    EventLoopNode,
+    JudgeProtocol,
+    JudgeVerdict,
+    LoopConfig,
+    OutputAccumulator,
+)
 from framework.graph.executor import GraphExecutor
 from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
 from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
@@ -77,4 +91,18 @@ __all__ = [
    "NodeConversation",
    "ConversationStore",
    "Message",
+    # Event Loop
+    "EventLoopNode",
+    "LoopConfig",
+    "OutputAccumulator",
+    "JudgeProtocol",
+    "JudgeVerdict",
+    # Context Handoff
+    "ContextHandoff",
+    "HandoffContext",
+    # Client I/O
+    "NodeClientIO",
+    "ActiveNodeClientIO",
+    "InertNodeClientIO",
+    "ClientIOGateway",
 ]
@@ -0,0 +1,170 @@
+"""
+Client I/O gateway for graph nodes.
+
+Provides the bridge between node code and external clients:
+- ActiveNodeClientIO: for client_facing=True nodes (streams output, accepts input)
+- InertNodeClientIO: for client_facing=False nodes (logs internally, redirects input)
+- ClientIOGateway: factory that creates the right variant per node
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.runtime.event_bus import EventBus
+
+logger = logging.getLogger(__name__)
+
+
+class NodeClientIO(ABC):
+    """Abstract base for node client I/O."""
+
+    @abstractmethod
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        """Emit output content. If is_final=True, signal end of stream."""
+
+    @abstractmethod
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        """Request input. Behavior depends on whether the node is client-facing."""
+
+
+class ActiveNodeClientIO(NodeClientIO):
+    """
+    Client I/O for client_facing=True nodes.
+
+    - emit_output() queues content and publishes CLIENT_OUTPUT_DELTA.
+    - request_input() publishes CLIENT_INPUT_REQUESTED, then awaits provide_input().
+    - output_stream() yields queued content until the final sentinel.
+    """
+
+    def __init__(
+        self,
+        node_id: str,
+        event_bus: EventBus | None = None,
+    ) -> None:
+        self.node_id = node_id
+        self._event_bus = event_bus
+
+        self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
+        self._output_snapshot = ""
+
+        self._input_event: asyncio.Event | None = None
+        self._input_result: str | None = None
+
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        self._output_snapshot += content
+        await self._output_queue.put(content)
+
+        if self._event_bus is not None:
+            await self._event_bus.emit_client_output_delta(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                content=content,
+                snapshot=self._output_snapshot,
+            )
+
+        if is_final:
+            await self._output_queue.put(None)
+
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        if self._input_event is not None:
+            raise RuntimeError("request_input already pending for this node")
+
+        self._input_event = asyncio.Event()
+        self._input_result = None
+
+        if self._event_bus is not None:
+            await self._event_bus.emit_client_input_requested(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                prompt=prompt,
+            )
+
+        try:
+            if timeout is not None:
+                await asyncio.wait_for(self._input_event.wait(), timeout=timeout)
+            else:
+                await self._input_event.wait()
+        finally:
+            self._input_event = None
+
+        if self._input_result is None:
+            raise RuntimeError("input event was set but no input was provided")
+        result = self._input_result
+        self._input_result = None
+        return result
+
+    async def provide_input(self, content: str) -> None:
+        """Called externally to fulfill a pending request_input()."""
+        if self._input_event is None:
+            raise RuntimeError("no pending request_input to fulfill")
+        self._input_result = content
+        self._input_event.set()
+
+    async def output_stream(self) -> AsyncIterator[str]:
+        """Async iterator that yields output chunks until the final sentinel."""
+        while True:
+            chunk = await self._output_queue.get()
+            if chunk is None:
+                break
+            yield chunk
+
+
+class InertNodeClientIO(NodeClientIO):
+    """
+    Client I/O for client_facing=False nodes.
+
+    - emit_output() publishes NODE_INTERNAL_OUTPUT (content is not discarded).
+    - request_input() publishes NODE_INPUT_BLOCKED and returns a redirect string.
+    """
+
+    def __init__(
+        self,
+        node_id: str,
+        event_bus: EventBus | None = None,
+    ) -> None:
+        self.node_id = node_id
+        self._event_bus = event_bus
+
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        if self._event_bus is not None:
+            await self._event_bus.emit_node_internal_output(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                content=content,
+            )
+
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        if self._event_bus is not None:
+            await self._event_bus.emit_node_input_blocked(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                prompt=prompt,
+            )
+        return (
+            "You are an internal processing node. There is no user to interact with."
+            " Work with the data provided in your inputs to complete your task."
+        )
+
+
+class ClientIOGateway:
+    """Factory that creates the appropriate NodeClientIO for a node."""
+
+    def __init__(self, event_bus: EventBus | None = None) -> None:
+        self._event_bus = event_bus
+
+    def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
+        if client_facing:
+            return ActiveNodeClientIO(
+                node_id=node_id,
+                event_bus=self._event_bus,
+            )
+        return InertNodeClientIO(
+            node_id=node_id,
+            event_bus=self._event_bus,
+        )
@@ -0,0 +1,191 @@
+"""Context handoff: summarize a completed NodeConversation for the next graph node."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+from framework.graph.conversation import _try_extract_key
+
+if TYPE_CHECKING:
+    from framework.graph.conversation import NodeConversation
+    from framework.llm.provider import LLMProvider
+
+logger = logging.getLogger(__name__)
+
+_TRUNCATE_CHARS = 500
+
+
+# ---------------------------------------------------------------------------
+# Data
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class HandoffContext:
+    """Structured summary of a completed node conversation."""
+
+    source_node_id: str
+    summary: str
+    key_outputs: dict[str, Any]
+    turn_count: int
+    total_tokens_used: int
+
+
+# ---------------------------------------------------------------------------
+# ContextHandoff
+# ---------------------------------------------------------------------------
+
+
+class ContextHandoff:
+    """Summarize a completed NodeConversation into a HandoffContext.
+
+    Parameters
+    ----------
+    llm : LLMProvider | None
+        Optional LLM provider for abstractive summarization.
+        When *None*, all summarization uses the extractive fallback.
+    """
+
+    def __init__(self, llm: LLMProvider | None = None) -> None:
+        self.llm = llm
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def summarize_conversation(
+        self,
+        conversation: NodeConversation,
+        node_id: str,
+        output_keys: list[str] | None = None,
+    ) -> HandoffContext:
+        """Produce a HandoffContext from *conversation*.
+
+        1. Extracts turn_count & total_tokens_used (sync properties).
+        2. Extracts key_outputs by scanning assistant messages most-recent-first.
+        3. Builds a summary via the LLM (if available) or extractive fallback.
+        """
+        turn_count = conversation.turn_count
+        total_tokens_used = conversation.estimate_tokens()
+        messages = conversation.messages  # defensive copy
+
+        # --- key outputs ---------------------------------------------------
+        key_outputs: dict[str, Any] = {}
+        if output_keys:
+            remaining = set(output_keys)
+            for msg in reversed(messages):
+                if msg.role != "assistant" or not remaining:
+                    continue
+                for key in list(remaining):
+                    value = _try_extract_key(msg.content, key)
+                    if value is not None:
+                        key_outputs[key] = value
+                        remaining.discard(key)
+
+        # --- summary -------------------------------------------------------
+        if self.llm is not None:
+            try:
+                summary = self._llm_summary(messages, output_keys or [])
+            except Exception:
+                logger.warning(
+                    "LLM summarization failed; falling back to extractive.",
+                    exc_info=True,
+                )
+                summary = self._extractive_summary(messages)
+        else:
+            summary = self._extractive_summary(messages)
+
+        return HandoffContext(
+            source_node_id=node_id,
+            summary=summary,
+            key_outputs=key_outputs,
+            turn_count=turn_count,
+            total_tokens_used=total_tokens_used,
+        )
+
+    @staticmethod
+    def format_as_input(handoff: HandoffContext) -> str:
+        """Render *handoff* as structured plain text for the next node's input."""
+        header = (
+            f"--- CONTEXT FROM: {handoff.source_node_id} "
+            f"({handoff.turn_count} turns, ~{handoff.total_tokens_used} tokens) ---"
+        )
+
+        sections: list[str] = [header, ""]
+
+        if handoff.key_outputs:
+            sections.append("KEY OUTPUTS:")
+            for k, v in handoff.key_outputs.items():
+                sections.append(f"- {k}: {v}")
+            sections.append("")
+
+        summary_text = handoff.summary or "No summary available."
+        sections.append("SUMMARY:")
+        sections.append(summary_text)
+        sections.append("")
+        sections.append("--- END CONTEXT ---")
+
+        return "\n".join(sections)
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extractive_summary(messages: list) -> str:
+        """Build a summary from key assistant messages without an LLM.
+
+        Strategy:
+        - Include the first assistant message (initial assessment).
+        - Include the last assistant message (final conclusion).
+        - Truncate each to ~500 chars.
+        """
+        if not messages:
+            return "Empty conversation."
+
+        assistant_msgs = [m for m in messages if m.role == "assistant"]
+        if not assistant_msgs:
+            return "No assistant responses."
+
+        parts: list[str] = []
+
+        first = assistant_msgs[0].content
+        parts.append(first[:_TRUNCATE_CHARS])
+
+        if len(assistant_msgs) > 1:
+            last = assistant_msgs[-1].content
+            parts.append(last[:_TRUNCATE_CHARS])
+
+        return "\n\n".join(parts)
+
+    def _llm_summary(self, messages: list, output_keys: list[str]) -> str:
+        """Produce a summary by calling the LLM provider."""
+        if self.llm is None:
+            raise ValueError("_llm_summary called without an LLM provider")
+
+        conversation_text = "\n".join(f"[{m.role}]: {m.content}" for m in messages)
+
+        key_hint = ""
+        if output_keys:
+            key_hint = (
+                "\nThe following output keys are especially important: "
+                + ", ".join(output_keys)
+                + ".\n"
+            )
+
+        system_prompt = (
+            "You are a concise summarizer. Given the conversation below, "
+            "produce a brief summary (at most ~500 tokens) that captures the "
+            "key decisions, findings, and outcomes. Focus on what was concluded "
+            "rather than the back-and-forth process." + key_hint
+        )
+
+        response = self.llm.complete(
+            messages=[{"role": "user", "content": conversation_text}],
+            system=system_prompt,
+            max_tokens=500,
+        )
+
+        return response.content.strip()
@@ -75,6 +75,16 @@ class Message:
        )


+def _extract_spillover_filename(content: str) -> str | None:
+    """Extract spillover filename from a truncated tool result.
+
+    Matches the pattern produced by EventLoopNode._truncate_tool_result():
+        "saved to 'tool_github_list_stargazers_abc123.txt'"
+    """
+    match = re.search(r"saved to '([^']+)'", content)
+    return match.group(1) if match else None
+
+
 # ---------------------------------------------------------------------------
 # ConversationStore protocol (Phase 2)
 # ---------------------------------------------------------------------------
@@ -108,6 +118,50 @@ class ConversationStore(Protocol):
 # ---------------------------------------------------------------------------


+def _try_extract_key(content: str, key: str) -> str | None:
+    """Try 4 strategies to extract a *key*'s value from message content.
+
+    Strategies (in order):
+    1. Whole message is JSON — ``json.loads``, check for key.
+    2. Embedded JSON via ``find_json_object`` helper.
+    3. Colon format: ``key: value``.
+    4. Equals format: ``key = value``.
+    """
+    from framework.graph.node import find_json_object
+
+    # 1. Whole message is JSON
+    try:
+        parsed = json.loads(content)
+        if isinstance(parsed, dict) and key in parsed:
+            val = parsed[key]
+            return json.dumps(val) if not isinstance(val, str) else val
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    # 2. Embedded JSON via find_json_object
+    json_str = find_json_object(content)
+    if json_str:
+        try:
+            parsed = json.loads(json_str)
+            if isinstance(parsed, dict) and key in parsed:
+                val = parsed[key]
+                return json.dumps(val) if not isinstance(val, str) else val
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # 3. Colon format: key: value
+    match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
+    if match:
+        return match.group(1).strip()
+
+    # 4. Equals format: key = value
+    match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
+    if match:
+        return match.group(1).strip()
+
+    return None
+
+
 class NodeConversation:
    """Message history for a graph node with optional write-through persistence.

@@ -133,6 +187,7 @@ class NodeConversation:
        self._messages: list[Message] = []
        self._next_seq: int = 0
        self._meta_persisted: bool = False
+        self._last_api_input_tokens: int | None = None

    # --- Properties --------------------------------------------------------

@@ -205,14 +260,78 @@ class NodeConversation:
    # --- Query -------------------------------------------------------------

    def to_llm_messages(self) -> list[dict[str, Any]]:
-        """Return messages as OpenAI-format dicts (system prompt excluded)."""
-        return [m.to_llm_dict() for m in self._messages]
+        """Return messages as OpenAI-format dicts (system prompt excluded).
+
+        Automatically repairs orphaned tool_use blocks (assistant messages
+        with tool_calls that lack corresponding tool-result messages).  This
+        can happen when a loop is cancelled mid-tool-execution.
+        """
+        msgs = [m.to_llm_dict() for m in self._messages]
+        return self._repair_orphaned_tool_calls(msgs)
+
+    @staticmethod
+    def _repair_orphaned_tool_calls(
+        msgs: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        """Ensure every tool_call has a matching tool-result message."""
+        repaired: list[dict[str, Any]] = []
+        for i, m in enumerate(msgs):
+            repaired.append(m)
+            tool_calls = m.get("tool_calls")
+            if m.get("role") != "assistant" or not tool_calls:
+                continue
+            # Collect IDs of tool results that follow this assistant message
+            answered: set[str] = set()
+            for j in range(i + 1, len(msgs)):
+                if msgs[j].get("role") == "tool":
+                    tid = msgs[j].get("tool_call_id")
+                    if tid:
+                        answered.add(tid)
+                else:
+                    break  # stop at first non-tool message
+            # Patch any missing results
+            for tc in tool_calls:
+                tc_id = tc.get("id")
+                if tc_id and tc_id not in answered:
+                    repaired.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc_id,
+                            "content": "ERROR: Tool execution was interrupted.",
+                        }
+                    )
+        return repaired

    def estimate_tokens(self) -> int:
-        """Rough token estimate: total characters / 4."""
+        """Best available token estimate.
+
+        Uses actual API input token count when available (set via
+        :meth:`update_token_count`), otherwise falls back to the rough
+        ``total_chars / 4`` heuristic.
+        """
+        if self._last_api_input_tokens is not None:
+            return self._last_api_input_tokens
        total_chars = sum(len(m.content) for m in self._messages)
        return total_chars // 4

+    def update_token_count(self, actual_input_tokens: int) -> None:
+        """Store actual API input token count for more accurate compaction.
+
+        Called by EventLoopNode after each LLM call with the ``input_tokens``
+        value from the API response.  This value includes system prompt and
+        tool definitions, so it may be higher than a message-only estimate.
+        """
+        self._last_api_input_tokens = actual_input_tokens
+
+    def usage_ratio(self) -> float:
+        """Current token usage as a fraction of *max_history_tokens*.
+
+        Returns 0.0 when ``max_history_tokens`` is zero (unlimited).
+        """
+        if self._max_history_tokens <= 0:
+            return 0.0
+        return self.estimate_tokens() / self._max_history_tokens
+
    def needs_compaction(self) -> bool:
        return self.estimate_tokens() >= self._max_history_tokens * self._compaction_threshold

@@ -244,42 +363,89 @@ class NodeConversation:

    def _try_extract_key(self, content: str, key: str) -> str | None:
        """Try 4 strategies to extract a key's value from message content."""
-        from framework.graph.node import find_json_object
-
-        # 1. Whole message is JSON
-        try:
-            parsed = json.loads(content)
-            if isinstance(parsed, dict) and key in parsed:
-                val = parsed[key]
-                return json.dumps(val) if not isinstance(val, str) else val
-        except (json.JSONDecodeError, TypeError):
-            pass
-
-        # 2. Embedded JSON via find_json_object
-        json_str = find_json_object(content)
-        if json_str:
-            try:
-                parsed = json.loads(json_str)
-                if isinstance(parsed, dict) and key in parsed:
-                    val = parsed[key]
-                    return json.dumps(val) if not isinstance(val, str) else val
-            except (json.JSONDecodeError, TypeError):
-                pass
-
-        # 3. Colon format: key: value
-        match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
-        if match:
-            return match.group(1).strip()
-
-        # 4. Equals format: key = value
-        match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
-        if match:
-            return match.group(1).strip()
-
-        return None
+        return _try_extract_key(content, key)

    # --- Lifecycle ---------------------------------------------------------

+    async def prune_old_tool_results(
+        self,
+        protect_tokens: int = 5000,
+        min_prune_tokens: int = 2000,
+    ) -> int:
+        """Replace old tool result content with compact placeholders.
+
+        Walks backward through messages. Recent tool results (within
+        *protect_tokens*) are kept intact. Older tool results have their
+        content replaced with a ~100-char placeholder that preserves the
+        spillover filename reference (if any). Message structure (role,
+        seq, tool_use_id) stays valid for the LLM API.
+
+        Error tool results are never pruned — they prevent re-calling
+        failing tools.
+
+        Returns the number of messages pruned (0 if nothing was pruned).
+        """
+        if not self._messages:
+            return 0
+
+        # Phase 1: Walk backward, classify tool results as protected vs pruneable
+        protected_tokens = 0
+        pruneable: list[int] = []  # indices into self._messages
+        pruneable_tokens = 0
+
+        for i in range(len(self._messages) - 1, -1, -1):
+            msg = self._messages[i]
+            if msg.role != "tool":
+                continue
+            if msg.is_error:
+                continue  # never prune errors
+            if msg.content.startswith("[Pruned tool result"):
+                continue  # already pruned
+
+            est = len(msg.content) // 4
+            if protected_tokens < protect_tokens:
+                protected_tokens += est
+            else:
+                pruneable.append(i)
+                pruneable_tokens += est
+
+        # Phase 2: Only prune if enough to be worthwhile
+        if pruneable_tokens < min_prune_tokens:
+            return 0
+
+        # Phase 3: Replace content with compact placeholder
+        count = 0
+        for i in pruneable:
+            msg = self._messages[i]
+            orig_len = len(msg.content)
+            spillover = _extract_spillover_filename(msg.content)
+
+            if spillover:
+                placeholder = (
+                    f"[Pruned tool result: {orig_len} chars. "
+                    f"Full data in '{spillover}'. "
+                    f"Use load_data('{spillover}') to retrieve.]"
+                )
+            else:
+                placeholder = f"[Pruned tool result: {orig_len} chars cleared from context.]"
+
+            self._messages[i] = Message(
+                seq=msg.seq,
+                role=msg.role,
+                content=placeholder,
+                tool_use_id=msg.tool_use_id,
+                tool_calls=msg.tool_calls,
+                is_error=msg.is_error,
+            )
+            count += 1
+
+            if self._store:
+                await self._store.write_part(msg.seq, self._messages[i].to_storage_dict())
+
+        # Reset token estimate — content lengths changed
+        self._last_api_input_tokens = None
+        return count
+
    async def compact(self, summary: str, keep_recent: int = 2) -> None:
        """Replace old messages with a summary, optionally keeping recent ones.

@@ -294,12 +460,18 @@ class NodeConversation:
        # Clamp: must discard at least 1 message
        keep_recent = max(0, min(keep_recent, len(self._messages) - 1))

-        if keep_recent > 0:
-            old_messages = self._messages[:-keep_recent]
-            recent_messages = self._messages[-keep_recent:]
-        else:
-            old_messages = self._messages
-            recent_messages = []
+        total = len(self._messages)
+        split = total - keep_recent if keep_recent > 0 else total
+
+        # Advance split past orphaned tool results at the boundary.
+        # Tool-role messages reference a tool_use from the preceding
+        # assistant message; if that assistant message falls into the
+        # compacted (old) portion the tool_result becomes invalid.
+        while split < total and self._messages[split].role == "tool":
+            split += 1
+
+        old_messages = list(self._messages[:split])
+        recent_messages = list(self._messages[split:])

        # Extract protected values from messages being discarded
        if self._output_keys:
@@ -330,6 +502,7 @@ class NodeConversation:
            await self._store.write_cursor({"next_seq": self._next_seq})

        self._messages = [summary_msg] + recent_messages
+        self._last_api_input_tokens = None  # reset; next LLM call will recalibrate

    async def clear(self) -> None:
        """Remove all messages, keep system prompt, preserve ``_next_seq``."""
@@ -337,6 +510,7 @@ class NodeConversation:
            await self._store.delete_parts_before(self._next_seq)
            await self._store.write_cursor({"next_seq": self._next_seq})
        self._messages.clear()
+        self._last_api_input_tokens = None

    def export_summary(self) -> str:
        """Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
@@ -11,7 +11,6 @@ our edges can be created dynamically by a Builder agent based on the goal.

 Edge Types:
 - always: Always traverse after source completes
- always: Always traverse after source completes
 - on_success: Traverse only if source succeeds
 - on_failure: Traverse only if source fails
 - conditional: Traverse based on expression evaluation (SAFE SUBSET ONLY)
@@ -22,7 +21,7 @@ allowing the LLM to evaluate whether proceeding along an edge makes sense
 given the current goal, context, and execution state.
 """

-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field
@@ -30,7 +29,7 @@ from pydantic import BaseModel, Field
 from framework.graph.safe_eval import safe_eval


-class EdgeCondition(str, Enum):
+class EdgeCondition(StrEnum):
    """When an edge should be traversed."""

    ALWAYS = "always"  # Always after source completes
@@ -609,4 +608,40 @@ class GraphSpec(BaseModel):
                    continue
                errors.append(f"Node '{node.id}' is unreachable from entry")

+        # Client-facing fan-out validation
+        fan_outs = self.detect_fan_out_nodes()
+        for source_id, targets in fan_outs.items():
+            client_facing_targets = [
+                t
+                for t in targets
+                if self.get_node(t) and getattr(self.get_node(t), "client_facing", False)
+            ]
+            if len(client_facing_targets) > 1:
+                errors.append(
+                    f"Fan-out from '{source_id}' has multiple client-facing nodes: "
+                    f"{client_facing_targets}. Only one branch may be client-facing."
+                )
+
+        # Output key overlap on parallel event_loop nodes
+        for source_id, targets in fan_outs.items():
+            event_loop_targets = [
+                t
+                for t in targets
+                if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop"
+            ]
+            if len(event_loop_targets) > 1:
+                seen_keys: dict[str, str] = {}
+                for node_id in event_loop_targets:
+                    node = self.get_node(node_id)
+                    for key in getattr(node, "output_keys", []):
+                        if key in seen_keys:
+                            errors.append(
+                                f"Fan-out from '{source_id}': event_loop nodes "
+                                f"'{seen_keys[key]}' and '{node_id}' both write to "
+                                f"output_key '{key}'. Parallel event_loop nodes must "
+                                f"have disjoint output_keys to prevent last-wins data loss."
+                            )
+                        else:
+                            seen_keys[key] = node_id
+
        return errors
@@ -11,11 +11,13 @@ The executor:

 import asyncio
 import logging
+import warnings
 from collections.abc import Callable
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any

-from framework.graph.edge import EdgeSpec, GraphSpec
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
 from framework.graph.goal import Goal
 from framework.graph.node import (
    FunctionNode,
@@ -54,6 +56,9 @@ class ExecutionResult:
    had_partial_failures: bool = False  # True if any node failed but eventually succeeded
    execution_quality: str = "clean"  # "clean", "degraded", or "failed"

+    # Visit tracking (for feedback/callback edges)
+    node_visit_counts: dict[str, int] = field(default_factory=dict)  # {node_id: visit_count}
+
    @property
    def is_clean_success(self) -> bool:
        """True only if execution succeeded with no retries or failures."""
@@ -124,6 +129,9 @@ class GraphExecutor:
        cleansing_config: CleansingConfig | None = None,
        enable_parallel_execution: bool = True,
        parallel_config: ParallelExecutionConfig | None = None,
+        event_bus: Any | None = None,
+        stream_id: str = "",
+        storage_path: str | Path | None = None,
    ):
        """
        Initialize the executor.
@@ -138,6 +146,9 @@ class GraphExecutor:
            cleansing_config: Optional output cleansing configuration
            enable_parallel_execution: Enable parallel fan-out execution (default True)
            parallel_config: Configuration for parallel execution behavior
+            event_bus: Optional event bus for emitting node lifecycle events
+            stream_id: Stream ID for event correlation
+            storage_path: Optional base path for conversation persistence
        """
        self.runtime = runtime
        self.llm = llm
@@ -147,6 +158,9 @@ class GraphExecutor:
        self.approval_callback = approval_callback
        self.validator = OutputValidator()
        self.logger = logging.getLogger(__name__)
+        self._event_bus = event_bus
+        self._stream_id = stream_id
+        self._storage_path = Path(storage_path) if storage_path else None

        # Initialize output cleaner
        self.cleansing_config = cleansing_config or CleansingConfig()
@@ -250,6 +264,8 @@ class GraphExecutor:
        total_tokens = 0
        total_latency = 0
        node_retry_counts: dict[str, int] = {}  # Track retries per node
+        node_visit_counts: dict[str, int] = {}  # Track visits for feedback loops
+        _is_retry = False  # True when looping back for a retry (not a new visit)

        # Determine entry point (may differ if resuming)
        current_node_id = graph.get_entry_point(session_state)
@@ -278,6 +294,34 @@ class GraphExecutor:
                if node_spec is None:
                    raise RuntimeError(f"Node not found: {current_node_id}")

+                # Enforce max_node_visits (feedback/callback edge support)
+                # Don't increment visit count on retries — retries are not new visits
+                if not _is_retry:
+                    cnt = node_visit_counts.get(current_node_id, 0) + 1
+                    node_visit_counts[current_node_id] = cnt
+                _is_retry = False
+                max_visits = getattr(node_spec, "max_node_visits", 1)
+                if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
+                    self.logger.warning(
+                        f"   ⊘ Node '{node_spec.name}' visit limit reached "
+                        f"({node_visit_counts[current_node_id]}/{max_visits}), skipping"
+                    )
+                    # Skip execution — follow outgoing edges using current memory
+                    skip_result = NodeResult(success=True, output=memory.read_all())
+                    next_node = self._follow_edges(
+                        graph=graph,
+                        goal=goal,
+                        current_node_id=current_node_id,
+                        current_node_spec=node_spec,
+                        result=skip_result,
+                        memory=memory,
+                    )
+                    if next_node is None:
+                        self.logger.info("   → No more edges after visit limit, ending")
+                        break
+                    current_node_id = next_node
+                    continue
+
                path.append(current_node_id)

                # Check if pause (HITL) before execution
@@ -323,13 +367,33 @@ class GraphExecutor:
                        description=f"Validation errors for {current_node_id}: {validation_errors}",
                    )

+                # Emit node-started event (skip event_loop nodes — they emit their own)
+                if self._event_bus and node_spec.node_type != "event_loop":
+                    await self._event_bus.emit_node_loop_started(
+                        stream_id=self._stream_id, node_id=current_node_id
+                    )
+
                # Execute node
                self.logger.info("   Executing...")
                result = await node_impl.execute(ctx)

+                # Emit node-completed event (skip event_loop nodes)
+                if self._event_bus and node_spec.node_type != "event_loop":
+                    await self._event_bus.emit_node_loop_completed(
+                        stream_id=self._stream_id, node_id=current_node_id, iterations=1
+                    )
+
                if result.success:
-                    # Validate output before accepting it
-                    if result.output and node_spec.output_keys:
+                    # Validate output before accepting it.
+                    # Skip for event_loop nodes — their judge system is
+                    # the sole acceptance mechanism (see WP-8).  Empty
+                    # strings and other flexible outputs are legitimate
+                    # for LLM-driven nodes that already passed the judge.
+                    if (
+                        result.output
+                        and node_spec.output_keys
+                        and node_spec.node_type != "event_loop"
+                    ):
                        validation = self.validator.validate_all(
                            output=result.output,
                            expected_keys=node_spec.output_keys,
@@ -380,6 +444,15 @@ class GraphExecutor:
                    # [CORRECTED] Use node_spec.max_retries instead of hardcoded 3
                    max_retries = getattr(node_spec, "max_retries", 3)

+                    # Event loop nodes handle retry internally via judge —
+                    # executor retry is catastrophic (retry multiplication)
+                    if node_spec.node_type == "event_loop" and max_retries > 0:
+                        self.logger.warning(
+                            f"EventLoopNode '{node_spec.id}' has max_retries={max_retries}. "
+                            "Overriding to 0 — event loop nodes handle retry internally via judge."
+                        )
+                        max_retries = 0
+
                    if node_retry_counts[current_node_id] < max_retries:
                        # Retry - don't increment steps for retries
                        steps -= 1
@@ -395,49 +468,69 @@ class GraphExecutor:
                        self.logger.info(
                            f"   ↻ Retrying ({node_retry_counts[current_node_id]}/{max_retries})..."
                        )
+                        _is_retry = True
                        continue
                    else:
-                        # Max retries exceeded - fail the execution
+                        # Max retries exceeded - check for failure handlers
                        self.logger.error(
                            f"   ✗ Max retries ({max_retries}) exceeded for node {current_node_id}"
                        )
-                        self.runtime.report_problem(
-                            severity="critical",
-                            description=(
-                                f"Node {current_node_id} failed after "
-                                f"{max_retries} attempts: {result.error}"
-                            ),
-                        )
-                        self.runtime.end_run(
-                            success=False,
-                            output_data=memory.read_all(),
-                            narrative=(
-                                f"Failed at {node_spec.name} after "
-                                f"{max_retries} retries: {result.error}"
-                            ),
+
+                        # Check if there's an ON_FAILURE edge to follow
+                        next_node = self._follow_edges(
+                            graph=graph,
+                            goal=goal,
+                            current_node_id=current_node_id,
+                            current_node_spec=node_spec,
+                            result=result,  # result.success=False triggers ON_FAILURE
+                            memory=memory,
                        )

-                        # Calculate quality metrics
-                        total_retries_count = sum(node_retry_counts.values())
-                        nodes_failed = list(node_retry_counts.keys())
+                        if next_node:
+                            # Found a failure handler - route to it
+                            self.logger.info(f"   → Routing to failure handler: {next_node}")
+                            current_node_id = next_node
+                            continue  # Continue execution with handler
+                        else:
+                            # No failure handler - terminate execution
+                            self.runtime.report_problem(
+                                severity="critical",
+                                description=(
+                                    f"Node {current_node_id} failed after "
+                                    f"{max_retries} attempts: {result.error}"
+                                ),
+                            )
+                            self.runtime.end_run(
+                                success=False,
+                                output_data=memory.read_all(),
+                                narrative=(
+                                    f"Failed at {node_spec.name} after "
+                                    f"{max_retries} retries: {result.error}"
+                                ),
+                            )

-                        return ExecutionResult(
-                            success=False,
-                            error=(
-                                f"Node '{node_spec.name}' failed after "
-                                f"{max_retries} attempts: {result.error}"
-                            ),
-                            output=memory.read_all(),
-                            steps_executed=steps,
-                            total_tokens=total_tokens,
-                            total_latency_ms=total_latency,
-                            path=path,
-                            total_retries=total_retries_count,
-                            nodes_with_failures=nodes_failed,
-                            retry_details=dict(node_retry_counts),
-                            had_partial_failures=len(nodes_failed) > 0,
-                            execution_quality="failed",
-                        )
+                            # Calculate quality metrics
+                            total_retries_count = sum(node_retry_counts.values())
+                            nodes_failed = list(node_retry_counts.keys())
+
+                            return ExecutionResult(
+                                success=False,
+                                error=(
+                                    f"Node '{node_spec.name}' failed after "
+                                    f"{max_retries} attempts: {result.error}"
+                                ),
+                                output=memory.read_all(),
+                                steps_executed=steps,
+                                total_tokens=total_tokens,
+                                total_latency_ms=total_latency,
+                                path=path,
+                                total_retries=total_retries_count,
+                                nodes_with_failures=nodes_failed,
+                                retry_details=dict(node_retry_counts),
+                                had_partial_failures=len(nodes_failed) > 0,
+                                execution_quality="failed",
+                                node_visit_counts=dict(node_visit_counts),
+                            )

                # Check if we just executed a pause node - if so, save state and return
                # This must happen BEFORE determining next node, since pause nodes may have no edges
@@ -476,6 +569,7 @@ class GraphExecutor:
                        retry_details=dict(node_retry_counts),
                        had_partial_failures=len(nodes_failed) > 0,
                        execution_quality=exec_quality,
+                        node_visit_counts=dict(node_visit_counts),
                    )

                # Check if this is a terminal node - if so, we're done
@@ -596,6 +690,7 @@ class GraphExecutor:
                retry_details=dict(node_retry_counts),
                had_partial_failures=len(nodes_failed) > 0,
                execution_quality=exec_quality,
+                node_visit_counts=dict(node_visit_counts),
            )

        except Exception as e:
@@ -622,6 +717,7 @@ class GraphExecutor:
                retry_details=dict(node_retry_counts),
                had_partial_failures=len(nodes_failed) > 0,
                execution_quality="failed",
+                node_visit_counts=dict(node_visit_counts),
            )

    def _build_context(
@@ -658,7 +754,15 @@ class GraphExecutor:
        )

    # Valid node types - no ambiguous "llm" type allowed
-    VALID_NODE_TYPES = {"llm_tool_use", "llm_generate", "router", "function", "human_input"}
+    VALID_NODE_TYPES = {
+        "llm_tool_use",
+        "llm_generate",
+        "router",
+        "function",
+        "human_input",
+        "event_loop",
+    }
+    DEPRECATED_NODE_TYPES = {"llm_tool_use": "event_loop", "llm_generate": "event_loop"}

    def _get_node_implementation(
        self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
@@ -676,6 +780,17 @@ class GraphExecutor:
                f"Use 'llm_tool_use' for nodes that call tools, 'llm_generate' for text generation."
            )

+        # Warn on deprecated node types
+        if node_spec.node_type in self.DEPRECATED_NODE_TYPES:
+            replacement = self.DEPRECATED_NODE_TYPES[node_spec.node_type]
+            warnings.warn(
+                f"Node type '{node_spec.node_type}' is deprecated. "
+                f"Use '{replacement}' instead. "
+                f"Node: '{node_spec.id}'",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        # Create based on type
        if node_spec.node_type == "llm_tool_use":
            if not node_spec.tools:
@@ -713,6 +828,45 @@ class GraphExecutor:
                cleanup_llm_model=cleanup_llm_model,
            )

+        if node_spec.node_type == "event_loop":
+            # Auto-create EventLoopNode with sensible defaults.
+            # Custom configs can still be pre-registered via node_registry.
+            from framework.graph.event_loop_node import EventLoopNode, LoopConfig
+
+            # Create a FileConversationStore if a storage path is available
+            conv_store = None
+            if self._storage_path:
+                from framework.storage.conversation_store import FileConversationStore
+
+                store_path = self._storage_path / "conversations" / node_spec.id
+                conv_store = FileConversationStore(base_path=store_path)
+
+            # Auto-configure spillover directory for large tool results.
+            # When a tool result exceeds max_tool_result_chars, the full
+            # content is written to spillover_dir and the agent gets a
+            # truncated preview with instructions to use load_data().
+            spillover = None
+            if self._storage_path:
+                spillover = str(self._storage_path / "data")
+
+            node = EventLoopNode(
+                event_bus=self._event_bus,
+                judge=None,  # implicit judge: accept when output_keys are filled
+                config=LoopConfig(
+                    max_iterations=100 if node_spec.client_facing else 50,
+                    max_tool_calls_per_turn=10,
+                    stall_detection_threshold=3,
+                    max_history_tokens=32000,
+                    max_tool_result_chars=3_000,
+                    spillover_dir=spillover,
+                ),
+                tool_executor=self.tool_executor,
+                conversation_store=conv_store,
+            )
+            # Cache so inject_event() is reachable for client-facing input
+            self.node_registry[node_spec.id] = node
+            return node
+
        # Should never reach here due to validation above
        raise RuntimeError(f"Unhandled node type: {node_spec.node_type}")

@@ -740,9 +894,12 @@ class GraphExecutor:
                source_node_name=current_node_spec.name if current_node_spec else current_node_id,
                target_node_name=target_node_spec.name if target_node_spec else edge.target,
            ):
-                # Validate and clean output before mapping inputs
+                # Validate and clean output before mapping inputs.
+                # Use full memory state (not just result.output) because
+                # target input_keys may come from earlier nodes in the
+                # graph, not only from the immediate source node.
                if self.cleansing_config.enabled and target_node_spec:
-                    output_to_validate = result.output
+                    output_to_validate = memory.read_all()

                    validation = self.output_cleaner.validate_output(
                        output=output_to_validate,
@@ -823,6 +980,21 @@ class GraphExecutor:
            ):
                traversable.append(edge)

+        # Priority filtering for CONDITIONAL edges:
+        # When multiple CONDITIONAL edges match, keep only the highest-priority
+        # group.  This prevents mutually-exclusive conditional branches (e.g.
+        # forward vs. feedback) from incorrectly triggering fan-out.
+        # ON_SUCCESS / other edge types are unaffected.
+        if len(traversable) > 1:
+            conditionals = [e for e in traversable if e.condition == EdgeCondition.CONDITIONAL]
+            if len(conditionals) > 1:
+                max_prio = max(e.priority for e in conditionals)
+                traversable = [
+                    e
+                    for e in traversable
+                    if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
+                ]
+
        return traversable

    def _find_convergence_node(
@@ -909,13 +1081,27 @@ class GraphExecutor:
                branch.status = "failed"
                branch.error = f"Node {branch.node_id} not found in graph"
                return branch, RuntimeError(branch.error)
+
+            effective_max_retries = node_spec.max_retries
+            if node_spec.node_type == "event_loop":
+                if effective_max_retries > 1:
+                    self.logger.warning(
+                        f"EventLoopNode '{node_spec.id}' has "
+                        f"max_retries={effective_max_retries}. Overriding "
+                        "to 1 — event loop nodes handle retry internally."
+                    )
+                effective_max_retries = 1
+
            branch.status = "running"

            try:
-                # Validate and clean output before mapping inputs (same as _follow_edges)
+                # Validate and clean output before mapping inputs (same as _follow_edges).
+                # Use full memory state since target input_keys may come
+                # from earlier nodes, not just the immediate source.
                if self.cleansing_config.enabled and node_spec:
+                    mem_snapshot = memory.read_all()
                    validation = self.output_cleaner.validate_output(
-                        output=source_result.output,
+                        output=mem_snapshot,
                        source_node_id=source_node_spec.id if source_node_spec else "unknown",
                        target_node_spec=node_spec,
                    )
@@ -926,7 +1112,7 @@ class GraphExecutor:
                            f"{branch.node_id}: {validation.errors}"
                        )
                        cleaned_output = self.output_cleaner.clean_output(
-                            output=source_result.output,
+                            output=mem_snapshot,
                            source_node_id=source_node_spec.id if source_node_spec else "unknown",
                            target_node_spec=node_spec,
                            validation_errors=validation.errors,
@@ -942,19 +1128,31 @@ class GraphExecutor:

                # Execute with retries
                last_result = None
-                for attempt in range(node_spec.max_retries):
+                for attempt in range(effective_max_retries):
                    branch.retry_count = attempt

                    # Build context for this branch
                    ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
                    node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)

+                    # Emit node-started event (skip event_loop nodes)
+                    if self._event_bus and node_spec.node_type != "event_loop":
+                        await self._event_bus.emit_node_loop_started(
+                            stream_id=self._stream_id, node_id=branch.node_id
+                        )
+
                    self.logger.info(
                        f"      ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})"
                    )
                    result = await node_impl.execute(ctx)
                    last_result = result

+                    # Emit node-completed event (skip event_loop nodes)
+                    if self._event_bus and node_spec.node_type != "event_loop":
+                        await self._event_bus.emit_node_loop_completed(
+                            stream_id=self._stream_id, node_id=branch.node_id, iterations=1
+                        )
+
                    if result.success:
                        # Write outputs to shared memory using async write
                        for key, value in result.output.items():
@@ -970,7 +1168,7 @@ class GraphExecutor:

                    self.logger.warning(
                        f"      ↻ Branch {node_spec.name}: "
-                        f"retry {attempt + 1}/{node_spec.max_retries}"
+                        f"retry {attempt + 1}/{effective_max_retries}"
                    )

                # All retries exhausted
@@ -979,7 +1177,7 @@ class GraphExecutor:
                branch.result = last_result
                self.logger.error(
                    f"      ✗ Branch {node_spec.name}: "
-                    f"failed after {node_spec.max_retries} attempts"
+                    f"failed after {effective_max_retries} attempts"
                )
                return branch, last_result

@@ -12,13 +12,13 @@ Goals are:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class GoalStatus(str, Enum):
+class GoalStatus(StrEnum):
    """Lifecycle status of a goal."""

    DRAFT = "draft"  # Being defined
@@ -6,11 +6,11 @@ where agents need to gather input from humans.
 """

 from dataclasses import dataclass, field
-from enum import Enum
+from enum import StrEnum
 from typing import Any


-class HITLInputType(str, Enum):
+class HITLInputType(StrEnum):
    """Type of input expected from human."""

    FREE_TEXT = "free_text"  # Open-ended text response
@@ -16,10 +16,12 @@ Protocol:
 """

 import asyncio
+import inspect
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 from dataclasses import dataclass, field
+from datetime import UTC
 from typing import Any

 from pydantic import BaseModel, Field
@@ -153,7 +155,10 @@ class NodeSpec(BaseModel):
    # Node behavior type
    node_type: str = Field(
        default="llm_tool_use",
-        description="Type: 'llm_tool_use', 'llm_generate', 'function', 'router', 'human_input'",
+        description=(
+            "Type: 'event_loop', 'function', 'router', 'human_input'. "
+            "Deprecated: 'llm_tool_use', 'llm_generate' (use 'event_loop' instead)."
+        ),
    )

    # Data flow
@@ -205,6 +210,15 @@ class NodeSpec(BaseModel):
    max_retries: int = Field(default=3)
    retry_on: list[str] = Field(default_factory=list, description="Error types to retry on")

+    # Visit limits (for feedback/callback edges)
+    max_node_visits: int = Field(
+        default=1,
+        description=(
+            "Max times this node executes in one graph run. "
+            "Set >1 for feedback loops. 0 = unlimited (max_steps guards)."
+        ),
+    )
+
    # Pydantic model for output validation
    output_model: type[BaseModel] | None = Field(
        default=None,
@@ -218,6 +232,12 @@ class NodeSpec(BaseModel):
        description="Maximum retries when Pydantic validation fails (with feedback to LLM)",
    )

+    # Client-facing behavior
+    client_facing: bool = Field(
+        default=False,
+        description="If True, this node streams output to the end user and can request input.",
+    )
+
    model_config = {"extra": "allow", "arbitrary_types_allowed": True}


@@ -1348,7 +1368,9 @@ Expected output keys: {output_keys}
 LLM Response:
 {raw_response}

-Output ONLY the JSON object, nothing else."""
+Output ONLY the JSON object, nothing else.
+If no valid JSON object exists in the response, output exactly: {{"error": "NO_JSON_FOUND"}}
+Do NOT fabricate data or return empty objects."""

        try:
            result = cleaner_llm.complete(
@@ -1395,6 +1417,14 @@ Output ONLY the JSON object, nothing else."""
                parsed = json.loads(cleaned)
            except json.JSONDecodeError:
                parsed = json.loads(_fix_unescaped_newlines_in_json(cleaned))
+
+            # Validate LLM didn't return empty or fabricated data
+            if parsed.get("error") == "NO_JSON_FOUND":
+                raise ValueError("Cannot parse JSON from response")
+            if not parsed or parsed == {}:
+                raise ValueError("Cannot parse JSON from response")
+            if all(v is None for v in parsed.values()):
+                raise ValueError("Cannot parse JSON from response")
            logger.info("      ✓ LLM cleaned JSON output")
            return parsed

@@ -1504,6 +1534,8 @@ Output ONLY the JSON object, nothing else."""

    def _build_system_prompt(self, ctx: NodeContext) -> str:
        """Build the system prompt."""
+        from datetime import datetime
+
        parts = []

        if ctx.node_spec.system_prompt:
@@ -1526,6 +1558,15 @@ Output ONLY the JSON object, nothing else."""

            parts.append(prompt)

+        # Inject current datetime so LLM knows "now"
+        utc_dt = datetime.now(UTC)
+        local_dt = datetime.now().astimezone()
+        local_tz_name = local_dt.tzname() or "Unknown"
+        parts.append("\n## Runtime Context")
+        parts.append(f"- Current Date/Time (UTC): {utc_dt.isoformat()}")
+        parts.append(f"- Local Timezone: {local_tz_name}")
+        parts.append(f"- Current Date/Time (Local): {local_dt.isoformat()}")
+
        if ctx.goal_context:
            parts.append("\n# Goal Context")
            parts.append(ctx.goal_context)
@@ -1727,8 +1768,19 @@ class FunctionNode(NodeProtocol):
        start = time.time()

        try:
-            # Call the function
-            result = self.func(**ctx.input_data)
+            # Filter input_data to only declared input_keys to prevent
+            # leaking extra memory keys from upstream nodes.
+            if ctx.node_spec.input_keys:
+                filtered = {
+                    k: v for k, v in ctx.input_data.items() if k in ctx.node_spec.input_keys
+                }
+            else:
+                filtered = ctx.input_data
+
+            # Call the function (supports both sync and async)
+            result = self.func(**filtered)
+            if inspect.isawaitable(result):
+                result = await result

            latency_ms = int((time.time() - start) * 1000)

@@ -144,8 +144,11 @@ class OutputCleaner:
        errors = []
        warnings = []

-        # Check 1: Required input keys present
+        # Check 1: Required input keys present (skip nullable keys)
+        nullable = set(getattr(target_node_spec, "nullable_output_keys", None) or [])
        for key in target_node_spec.input_keys:
+            if key in nullable:
+                continue
            if key not in output:
                errors.append(f"Missing required key: '{key}'")
                continue
@@ -11,13 +11,13 @@ The Plan is the contract between the external planner and the executor:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ActionType(str, Enum):
+class ActionType(StrEnum):
    """Types of actions a PlanStep can perform."""

    LLM_CALL = "llm_call"  # Call LLM for generation
@@ -27,7 +27,7 @@ class ActionType(str, Enum):
    CODE_EXECUTION = "code_execution"  # Execute dynamic code (sandboxed)


-class StepStatus(str, Enum):
+class StepStatus(StrEnum):
    """Status of a plan step."""

    PENDING = "pending"
@@ -56,7 +56,7 @@ class StepStatus(str, Enum):
        return self == StepStatus.COMPLETED


-class ApprovalDecision(str, Enum):
+class ApprovalDecision(StrEnum):
    """Human decision on a step requiring approval."""

    APPROVE = "approve"  # Execute as planned
@@ -91,7 +91,7 @@ class ApprovalResult(BaseModel):
    model_config = {"extra": "allow"}


-class JudgmentAction(str, Enum):
+class JudgmentAction(StrEnum):
    """Actions the judge can take after evaluating a step."""

    ACCEPT = "accept"  # Step completed successfully, continue
@@ -423,7 +423,7 @@ class Plan(BaseModel):
        }


-class ExecutionStatus(str, Enum):
+class ExecutionStatus(StrEnum):
    """Status of plan execution."""

    COMPLETED = "completed"
@@ -75,16 +75,6 @@ class SafeEvalVisitor(ast.NodeVisitor):
    def visit_Constant(self, node: ast.Constant) -> Any:
        return node.value

-    # --- Number/String/Bytes/NameConstant (Python < 3.8 compat if needed) ---
-    def visit_Num(self, node: ast.Num) -> Any:
-        return node.n
-
-    def visit_Str(self, node: ast.Str) -> Any:
-        return node.s
-
-    def visit_NameConstant(self, node: ast.NameConstant) -> Any:
-        return node.value
-
    # --- Data Structures ---
    def visit_List(self, node: ast.List) -> list:
        return [self.visit(elt) for elt in node.elts]
@@ -126,14 +126,16 @@ class OutputValidator:

        for key in expected_keys:
            if key not in output:
-                errors.append(f"Missing required output key: '{key}'")
+                if key not in nullable_keys:
+                    errors.append(f"Missing required output key: '{key}'")
            elif not allow_empty:
                value = output[key]
                if value is None:
                    if key not in nullable_keys:
                        errors.append(f"Output key '{key}' is None")
                elif isinstance(value, str) and len(value.strip()) == 0:
-                    errors.append(f"Output key '{key}' is empty string")
+                    if key not in nullable_keys:
+                        errors.append(f"Output key '{key}' is empty string")

        return ValidationResult(success=len(errors) == 0, errors=errors)

@@ -205,7 +207,7 @@ class OutputValidator:
    def validate_no_hallucination(
        self,
        output: dict[str, Any],
-        max_length: int = 10000,
+        max_length: int = 50000,
    ) -> ValidationResult:
        """
        Check for signs of LLM hallucination in output values.
@@ -1,8 +1,31 @@
 """LLM provider abstraction."""

 from framework.llm.provider import LLMProvider, LLMResponse
+from framework.llm.stream_events import (
+    FinishEvent,
+    ReasoningDeltaEvent,
+    ReasoningStartEvent,
+    StreamErrorEvent,
+    StreamEvent,
+    TextDeltaEvent,
+    TextEndEvent,
+    ToolCallEvent,
+    ToolResultEvent,
+)

-__all__ = ["LLMProvider", "LLMResponse"]
+__all__ = [
+    "LLMProvider",
+    "LLMResponse",
+    "StreamEvent",
+    "TextDeltaEvent",
+    "TextEndEvent",
+    "ToolCallEvent",
+    "ToolResultEvent",
+    "ReasoningStartEvent",
+    "ReasoningDeltaEvent",
+    "FinishEvent",
+    "StreamErrorEvent",
+]

 try:
    from framework.llm.anthropic import AnthropicProvider  # noqa: F401
@@ -18,7 +18,7 @@ def _get_api_key_from_credential_store() -> str | None:
    try:
        from aden_tools.credentials import CredentialStoreAdapter

-        creds = CredentialStoreAdapter.with_env_storage()
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except ImportError:
@@ -7,10 +7,11 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """

+import asyncio
 import json
 import logging
 import time
-from collections.abc import Callable
+from collections.abc import AsyncIterator, Callable
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -23,6 +24,7 @@ except ImportError:
    RateLimitError = Exception  # type: ignore[assignment, misc]

 from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.stream_events import StreamEvent

 logger = logging.getLogger(__name__)

@@ -145,7 +147,7 @@ class LiteLLMProvider(LLMProvider):

        if litellm is None:
            raise ImportError(
-                "LiteLLM is not installed. Please install it with: pip install litellm"
+                "LiteLLM is not installed. Please install it with: uv pip install litellm"
            )

    def _completion_with_rate_limit_retry(self, **kwargs: Any) -> Any:
@@ -161,11 +163,24 @@ class LiteLLMProvider(LLMProvider):
                content = response.choices[0].message.content if response.choices else None
                has_tool_calls = bool(response.choices and response.choices[0].message.tool_calls)
                if not content and not has_tool_calls:
+                    # If the conversation ends with an assistant message,
+                    # an empty response is expected — don't retry.
+                    messages = kwargs.get("messages", [])
+                    last_role = next(
+                        (m["role"] for m in reversed(messages) if m.get("role") != "system"),
+                        None,
+                    )
+                    if last_role == "assistant":
+                        logger.debug(
+                            "[retry] Empty response after assistant message — "
+                            "expected, not retrying."
+                        )
+                        return response
+
                    finish_reason = (
                        response.choices[0].finish_reason if response.choices else "unknown"
                    )
                    # Dump full request to file for debugging
-                    messages = kwargs.get("messages", [])
                    token_count, token_method = _estimate_tokens(model, messages)
                    dump_path = _dump_failed_request(
                        model=model,
@@ -378,11 +393,18 @@ class LiteLLMProvider(LLMProvider):

            # Execute tools and add results.
            for tool_call in message.tool_calls:
-                # Parse arguments
                try:
                    args = json.loads(tool_call.function.arguments)
                except json.JSONDecodeError:
-                    args = {}
+                    # Surface error to LLM and skip tool execution
+                    current_messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tool_call.id,
+                            "content": "Invalid JSON arguments provided to tool.",
+                        }
+                    )
+                    continue

                tool_use = ToolUse(
                    id=tool_call.id,
@@ -425,3 +447,189 @@ class LiteLLMProvider(LLMProvider):
                },
            },
        }
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream a completion via litellm.acompletion(stream=True).
+
+        Yields StreamEvent objects as chunks arrive from the provider.
+        Tool call arguments are accumulated across chunks and yielded as
+        a single ToolCallEvent with fully parsed JSON when complete.
+
+        Empty responses (e.g. Gemini stealth rate-limits that return 200
+        with no content) are retried with exponential backoff, mirroring
+        the retry behaviour of ``_completion_with_rate_limit_retry``.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+            ToolCallEvent,
+        )
+
+        full_messages: list[dict[str, Any]] = []
+        if system:
+            full_messages.append({"role": "system", "content": system})
+        full_messages.extend(messages)
+
+        kwargs: dict[str, Any] = {
+            "model": self.model,
+            "messages": full_messages,
+            "max_tokens": max_tokens,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+            **self.extra_kwargs,
+        }
+        if self.api_key:
+            kwargs["api_key"] = self.api_key
+        if self.api_base:
+            kwargs["api_base"] = self.api_base
+        if tools:
+            kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
+
+        for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
+            # Post-stream events (ToolCall, TextEnd, Finish) are buffered
+            # because they depend on the full stream.  TextDeltaEvents are
+            # yielded immediately so callers see tokens in real time.
+            tail_events: list[StreamEvent] = []
+            accumulated_text = ""
+            tool_calls_acc: dict[int, dict[str, str]] = {}
+            input_tokens = 0
+            output_tokens = 0
+
+            try:
+                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
+
+                async for chunk in response:
+                    choice = chunk.choices[0] if chunk.choices else None
+                    if not choice:
+                        continue
+
+                    delta = choice.delta
+
+                    # --- Text content — yield immediately for real-time streaming ---
+                    if delta and delta.content:
+                        accumulated_text += delta.content
+                        yield TextDeltaEvent(
+                            content=delta.content,
+                            snapshot=accumulated_text,
+                        )
+
+                    # --- Tool calls (accumulate across chunks) ---
+                    if delta and delta.tool_calls:
+                        for tc in delta.tool_calls:
+                            idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
+                            if idx not in tool_calls_acc:
+                                tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
+                            if tc.id:
+                                tool_calls_acc[idx]["id"] = tc.id
+                            if tc.function:
+                                if tc.function.name:
+                                    tool_calls_acc[idx]["name"] = tc.function.name
+                                if tc.function.arguments:
+                                    tool_calls_acc[idx]["arguments"] += tc.function.arguments
+
+                    # --- Finish ---
+                    if choice.finish_reason:
+                        for _idx, tc_data in sorted(tool_calls_acc.items()):
+                            try:
+                                parsed_args = json.loads(tc_data["arguments"])
+                            except (json.JSONDecodeError, KeyError):
+                                parsed_args = {"_raw": tc_data.get("arguments", "")}
+                            tail_events.append(
+                                ToolCallEvent(
+                                    tool_use_id=tc_data["id"],
+                                    tool_name=tc_data["name"],
+                                    tool_input=parsed_args,
+                                )
+                            )
+
+                        if accumulated_text:
+                            tail_events.append(TextEndEvent(full_text=accumulated_text))
+
+                        usage = getattr(chunk, "usage", None)
+                        if usage:
+                            input_tokens = getattr(usage, "prompt_tokens", 0) or 0
+                            output_tokens = getattr(usage, "completion_tokens", 0) or 0
+
+                        tail_events.append(
+                            FinishEvent(
+                                stop_reason=choice.finish_reason,
+                                input_tokens=input_tokens,
+                                output_tokens=output_tokens,
+                                model=self.model,
+                            )
+                        )
+
+                # Check whether the stream produced any real content.
+                # (If text deltas were yielded above, has_content is True
+                # and we skip the retry path — nothing was yielded in vain.)
+                has_content = accumulated_text or tool_calls_acc
+                if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
+                    # If the conversation ends with an assistant or tool
+                    # message, an empty stream is expected — the LLM has
+                    # nothing new to say.  Don't burn retries on this;
+                    # let the caller (EventLoopNode) decide what to do.
+                    # Typical case: client_facing node where the LLM set
+                    # all outputs via set_output tool calls, and the tool
+                    # results are the last messages.
+                    last_role = next(
+                        (m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
+                        None,
+                    )
+                    if last_role in ("assistant", "tool"):
+                        logger.debug(
+                            "[stream] Empty response after %s message — expected, not retrying.",
+                            last_role,
+                        )
+                        for event in tail_events:
+                            yield event
+                        return
+                    wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                    token_count, token_method = _estimate_tokens(
+                        self.model,
+                        full_messages,
+                    )
+                    dump_path = _dump_failed_request(
+                        model=self.model,
+                        kwargs=kwargs,
+                        error_type="empty_stream",
+                        attempt=attempt,
+                    )
+                    logger.warning(
+                        f"[stream-retry] {self.model} returned empty stream — "
+                        f"~{token_count} tokens ({token_method}). "
+                        f"Request dumped to: {dump_path}. "
+                        f"Retrying in {wait}s "
+                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+
+                # Success (or final attempt) — flush remaining events.
+                for event in tail_events:
+                    yield event
+                return
+
+            except RateLimitError as e:
+                if attempt < RATE_LIMIT_MAX_RETRIES:
+                    wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                    logger.warning(
+                        f"[stream-retry] {self.model} rate limited (429): {e!s}. "
+                        f"Retrying in {wait}s "
+                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+                yield StreamErrorEvent(error=str(e), recoverable=False)
+                return
+
+            except Exception as e:
+                yield StreamErrorEvent(error=str(e), recoverable=False)
+                return
@@ -2,10 +2,16 @@

 import json
 import re
-from collections.abc import Callable
+from collections.abc import AsyncIterator, Callable
 from typing import Any

 from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.stream_events import (
+    FinishEvent,
+    StreamEvent,
+    TextDeltaEvent,
+    TextEndEvent,
+)


 class MockLLMProvider(LLMProvider):
@@ -175,3 +181,28 @@ class MockLLMProvider(LLMProvider):
            output_tokens=0,
            stop_reason="mock_complete",
        )
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream a mock completion as word-level TextDeltaEvents.
+
+        Splits the mock response into words and yields each as a separate
+        TextDeltaEvent with an accumulating snapshot, exercising the full
+        streaming pipeline without any API calls.
+        """
+        content = self._generate_mock_response(system=system, json_mode=False)
+        words = content.split(" ")
+        accumulated = ""
+
+        for i, word in enumerate(words):
+            chunk = word if i == 0 else " " + word
+            accumulated += chunk
+            yield TextDeltaEvent(content=chunk, snapshot=accumulated)
+
+        yield TextEndEvent(full_text=accumulated)
+        yield FinishEvent(stop_reason="mock_complete", model=self.model)
@@ -1,7 +1,7 @@
 """LLM Provider abstraction for pluggable LLM backends."""

 from abc import ABC, abstractmethod
-from collections.abc import Callable
+from collections.abc import AsyncIterator, Callable
 from dataclasses import dataclass, field
 from typing import Any

@@ -108,3 +108,45 @@ class LLMProvider(ABC):
            Final LLMResponse after tool use completes
        """
        pass
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator["StreamEvent"]:
+        """
+        Stream a completion as an async iterator of StreamEvents.
+
+        Default implementation wraps complete() with synthetic events.
+        Subclasses SHOULD override for true streaming.
+
+        Tool orchestration is the CALLER's responsibility:
+        - Caller detects ToolCallEvent, executes tool, adds result
+          to messages, calls stream() again.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+        )
+
+        response = self.complete(
+            messages=messages,
+            system=system,
+            tools=tools,
+            max_tokens=max_tokens,
+        )
+        yield TextDeltaEvent(content=response.content, snapshot=response.content)
+        yield TextEndEvent(full_text=response.content)
+        yield FinishEvent(
+            stop_reason=response.stop_reason,
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            model=response.model,
+        )
+
+
+# Deferred import target for type annotation
+from framework.llm.stream_events import StreamEvent as StreamEvent  # noqa: E402, F401
@@ -0,0 +1,96 @@
+"""Stream event types for LLM streaming responses.
+
+Defines a discriminated union of frozen dataclasses representing every event
+a streaming LLM call can produce. These types form the contract between the
+LLM provider layer, EventLoopNode, event bus, persistence, and monitoring.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+
+@dataclass(frozen=True)
+class TextDeltaEvent:
+    """A chunk of text produced by the LLM."""
+
+    type: Literal["text_delta"] = "text_delta"
+    content: str = ""  # this chunk's text
+    snapshot: str = ""  # accumulated text so far
+
+
+@dataclass(frozen=True)
+class TextEndEvent:
+    """Signals that text generation is complete."""
+
+    type: Literal["text_end"] = "text_end"
+    full_text: str = ""
+
+
+@dataclass(frozen=True)
+class ToolCallEvent:
+    """The LLM has requested a tool call."""
+
+    type: Literal["tool_call"] = "tool_call"
+    tool_use_id: str = ""
+    tool_name: str = ""
+    tool_input: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class ToolResultEvent:
+    """Result of executing a tool call."""
+
+    type: Literal["tool_result"] = "tool_result"
+    tool_use_id: str = ""
+    content: str = ""
+    is_error: bool = False
+
+
+@dataclass(frozen=True)
+class ReasoningStartEvent:
+    """The LLM has started a reasoning/thinking block."""
+
+    type: Literal["reasoning_start"] = "reasoning_start"
+
+
+@dataclass(frozen=True)
+class ReasoningDeltaEvent:
+    """A chunk of reasoning/thinking content."""
+
+    type: Literal["reasoning_delta"] = "reasoning_delta"
+    content: str = ""
+
+
+@dataclass(frozen=True)
+class FinishEvent:
+    """The LLM has finished generating."""
+
+    type: Literal["finish"] = "finish"
+    stop_reason: str = ""
+    input_tokens: int = 0
+    output_tokens: int = 0
+    model: str = ""
+
+
+@dataclass(frozen=True)
+class StreamErrorEvent:
+    """An error occurred during streaming."""
+
+    type: Literal["error"] = "error"
+    error: str = ""
+    recoverable: bool = False
+
+
+# Discriminated union of all stream event types
+StreamEvent = (
+    TextDeltaEvent
+    | TextEndEvent
+    | ToolCallEvent
+    | ToolResultEvent
+    | ReasoningStartEvent
+    | ReasoningDeltaEvent
+    | FinishEvent
+    | StreamErrorEvent
+)
@@ -4,7 +4,7 @@ MCP Server for Agent Building Tools
 Exposes tools for building goal-driven agents via the Model Context Protocol.

 Usage:
-    python -m framework.mcp.agent_builder_server
+    uv run python -m framework.mcp.agent_builder_server
 """

 import json
@@ -44,6 +44,7 @@ class BuildSession:
        self.nodes: list[NodeSpec] = []
        self.edges: list[EdgeSpec] = []
        self.mcp_servers: list[dict] = []  # MCP server configurations
+        self.loop_config: dict = {}  # LoopConfig parameters for EventLoopNodes
        self.created_at = datetime.now().isoformat()
        self.last_modified = datetime.now().isoformat()

@@ -56,6 +57,7 @@ class BuildSession:
            "nodes": [n.model_dump() for n in self.nodes],
            "edges": [e.model_dump() for e in self.edges],
            "mcp_servers": self.mcp_servers,
+            "loop_config": self.loop_config,
            "created_at": self.created_at,
            "last_modified": self.last_modified,
        }
@@ -102,6 +104,9 @@ class BuildSession:
        # Restore MCP servers
        session.mcp_servers = data.get("mcp_servers", [])

+        # Restore loop config
+        session.loop_config = data.get("loop_config", {})
+
        return session


@@ -516,19 +521,63 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
    return None


+def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
+    """
+    Validate and normalize agent_path.
+
+    Returns:
+        (Path, None) if valid
+        (None, error_json) if invalid
+    """
+    if not agent_path:
+        return None, json.dumps(
+            {
+                "success": False,
+                "error": "agent_path is required (e.g., 'exports/my_agent')",
+            }
+        )
+
+    path = Path(agent_path)
+
+    if not path.exists():
+        return None, json.dumps(
+            {
+                "success": False,
+                "error": f"Agent path not found: {path}",
+                "hint": "Run export_graph to create an agent in exports/ first",
+            }
+        )
+
+    return path, None
+
+
@mcp.tool()
 def add_node(
    node_id: Annotated[str, "Unique identifier for the node"],
    name: Annotated[str, "Human-readable name"],
    description: Annotated[str, "What this node does"],
-    node_type: Annotated[str, "Type: llm_generate, llm_tool_use, router, or function"],
+    node_type: Annotated[
+        str,
+        "Type: event_loop (recommended), function, router. "
+        "Deprecated: llm_generate, llm_tool_use (use event_loop instead)",
+    ],
    input_keys: Annotated[str, "JSON array of keys this node reads from shared memory"],
    output_keys: Annotated[str, "JSON array of keys this node writes to shared memory"],
    system_prompt: Annotated[str, "Instructions for LLM nodes"] = "",
-    tools: Annotated[str, "JSON array of tool names for llm_tool_use nodes"] = "[]",
+    tools: Annotated[str, "JSON array of tool names for event_loop or llm_tool_use nodes"] = "[]",
    routes: Annotated[
        str, "JSON object mapping conditions to target node IDs for router nodes"
    ] = "{}",
+    client_facing: Annotated[
+        bool, "If True, node streams output to user and blocks for input between turns"
+    ] = False,
+    nullable_output_keys: Annotated[
+        str, "JSON array of output keys that may remain unset (for mutually exclusive outputs)"
+    ] = "[]",
+    max_node_visits: Annotated[
+        int,
+        "Max times this node executes per graph run. Set >1 for feedback loop targets. 0=unlimited",
+    ] = 1,
 ) -> str:
    """Add a node to the agent graph. Nodes process inputs and produce outputs."""
    session = get_session()
@@ -539,6 +588,7 @@ def add_node(
        output_keys_list = json.loads(output_keys)
        tools_list = json.loads(tools)
        routes_dict = json.loads(routes)
+        nullable_output_keys_list = json.loads(nullable_output_keys)
    except json.JSONDecodeError as e:
        return json.dumps(
            {
@@ -567,6 +617,9 @@ def add_node(
        system_prompt=system_prompt or None,
        tools=tools_list,
        routes=routes_dict,
+        client_facing=client_facing,
+        nullable_output_keys=nullable_output_keys_list,
+        max_node_visits=max_node_visits,
    )

    session.nodes.append(node)
@@ -586,6 +639,26 @@ def add_node(
    if node_type in ("llm_generate", "llm_tool_use") and not system_prompt:
        warnings.append(f"LLM node '{node_id}' should have a system_prompt")

+    # EventLoopNode validation
+    if node_type == "event_loop" and not system_prompt:
+        warnings.append(f"Event loop node '{node_id}' should have a system_prompt")
+
+    # Deprecated type warnings
+    if node_type in ("llm_generate", "llm_tool_use"):
+        warnings.append(
+            f"Node type '{node_type}' is deprecated. Use 'event_loop' instead. "
+            "EventLoopNode supports tool use, streaming, and judge-based evaluation."
+        )
+
+    # nullable_output_keys must be a subset of output_keys
+    if nullable_output_keys_list:
+        invalid_nullable = [k for k in nullable_output_keys_list if k not in output_keys_list]
+        if invalid_nullable:
+            errors.append(
+                f"nullable_output_keys {invalid_nullable} must be a subset of "
+                f"output_keys {output_keys_list}"
+            )
+
    _save_session(session)  # Auto-save

    return json.dumps(
@@ -662,6 +735,7 @@ def add_edge(

    # Validate
    errors = []
+    warnings = []

    if not any(n.id == source for n in session.nodes):
        errors.append(f"Source node '{source}' not found")
@@ -670,12 +744,24 @@ def add_edge(
    if edge_condition == EdgeCondition.CONDITIONAL and not condition_expr:
        errors.append(f"Conditional edge '{edge_id}' needs condition_expr")

+    # Feedback edge validation
+    if priority < 0:
+        target_node = next((n for n in session.nodes if n.id == target), None)
+        if target_node and target_node.max_node_visits <= 1:
+            warnings.append(
+                f"Edge '{edge_id}' has negative priority (feedback edge) "
+                f"targeting '{target}', but node '{target}' has "
+                f"max_node_visits={target_node.max_node_visits}. "
+                "Consider increasing max_node_visits on the target node."
+            )
+
    _save_session(session)  # Auto-save

    return json.dumps(
        {
            "valid": len(errors) == 0,
            "errors": errors,
+            "warnings": warnings,
            "edge": edge.model_dump(),
            "total_edges": len(session.edges),
            "approval_required": True,
@@ -709,12 +795,23 @@ def update_node(
    node_id: Annotated[str, "ID of the node to update"],
    name: Annotated[str, "Updated human-readable name"] = "",
    description: Annotated[str, "Updated description"] = "",
-    node_type: Annotated[str, "Updated type: llm_generate, llm_tool_use, router, or function"] = "",
+    node_type: Annotated[
+        str,
+        "Updated type: event_loop (recommended), function, router. "
+        "Deprecated: llm_generate, llm_tool_use",
+    ] = "",
    input_keys: Annotated[str, "Updated JSON array of input keys"] = "",
    output_keys: Annotated[str, "Updated JSON array of output keys"] = "",
    system_prompt: Annotated[str, "Updated instructions for LLM nodes"] = "",
    tools: Annotated[str, "Updated JSON array of tool names"] = "",
    routes: Annotated[str, "Updated JSON object mapping conditions to target node IDs"] = "",
+    client_facing: Annotated[
+        str, "Updated client-facing flag ('true'/'false', empty=no change)"
+    ] = "",
+    nullable_output_keys: Annotated[
+        str, "Updated JSON array of nullable output keys (empty=no change)"
+    ] = "",
+    max_node_visits: Annotated[int, "Updated max node visits per graph run. 0=no change"] = 0,
 ) -> str:
    """Update an existing node in the agent graph. Only provided fields will be updated."""
    session = get_session()
@@ -735,6 +832,9 @@ def update_node(
        output_keys_list = json.loads(output_keys) if output_keys else None
        tools_list = json.loads(tools) if tools else None
        routes_dict = json.loads(routes) if routes else None
+        nullable_output_keys_list = (
+            json.loads(nullable_output_keys) if nullable_output_keys else None
+        )
    except json.JSONDecodeError as e:
        return json.dumps(
            {
@@ -767,6 +867,12 @@ def update_node(
        node.tools = tools_list
    if routes_dict is not None:
        node.routes = routes_dict
+    if client_facing:
+        node.client_facing = client_facing.lower() == "true"
+    if nullable_output_keys_list is not None:
+        node.nullable_output_keys = nullable_output_keys_list
+    if max_node_visits > 0:
+        node.max_node_visits = max_node_visits

    # Validate
    errors = []
@@ -779,6 +885,26 @@ def update_node(
    if node.node_type in ("llm_generate", "llm_tool_use") and not node.system_prompt:
        warnings.append(f"LLM node '{node_id}' should have a system_prompt")

+    # EventLoopNode validation
+    if node.node_type == "event_loop" and not node.system_prompt:
+        warnings.append(f"Event loop node '{node_id}' should have a system_prompt")
+
+    # Deprecated type warnings
+    if node.node_type in ("llm_generate", "llm_tool_use"):
+        warnings.append(
+            f"Node type '{node.node_type}' is deprecated. Use 'event_loop' instead. "
+            "EventLoopNode supports tool use, streaming, and judge-based evaluation."
+        )
+
+    # nullable_output_keys must be a subset of output_keys
+    if node.nullable_output_keys:
+        invalid_nullable = [k for k in node.nullable_output_keys if k not in node.output_keys]
+        if invalid_nullable:
+            errors.append(
+                f"nullable_output_keys {invalid_nullable} must be a subset of "
+                f"output_keys {node.output_keys}"
+            )
+
    _save_session(session)  # Auto-save

    return json.dumps(
@@ -979,17 +1105,30 @@ def validate_graph() -> str:
                errors.append(f"Unreachable nodes: {unreachable}")

    # === CONTEXT FLOW VALIDATION ===
-    # Build dependency map (node_id -> list of nodes it depends on)
+    # Build dependency maps — separate forward edges from feedback edges.
+    # Feedback edges (priority < 0) create cycles; they must not block the
+    # topological sort.  Context they carry arrives on *revisits*, not on
+    # the first execution of a node.
+    feedback_edge_ids = {e.id for e in session.edges if e.priority < 0}
+    forward_dependencies: dict[str, list[str]] = {node.id: [] for node in session.nodes}
+    feedback_sources: dict[str, list[str]] = {node.id: [] for node in session.nodes}
+    # Combined map kept for error-message generation (all deps)
    dependencies: dict[str, list[str]] = {node.id: [] for node in session.nodes}
+
    for edge in session.edges:
-        if edge.target in dependencies:
-            dependencies[edge.target].append(edge.source)
+        if edge.target not in forward_dependencies:
+            continue
+        dependencies[edge.target].append(edge.source)
+        if edge.id in feedback_edge_ids:
+            feedback_sources[edge.target].append(edge.source)
+        else:
+            forward_dependencies[edge.target].append(edge.source)

    # Build output map (node_id -> keys it produces)
    node_outputs: dict[str, set[str]] = {node.id: set(node.output_keys) for node in session.nodes}

    # Compute available context for each node (what keys it can read)
-    # Using topological order
+    # Using topological order on the forward-edge DAG
    available_context: dict[str, set[str]] = {}
    computed = set()
    nodes_by_id = {n.id: n for n in session.nodes}
@@ -999,7 +1138,8 @@ def validate_graph() -> str:
    # Entry nodes can only read from initial context
    initial_context_keys: set[str] = set()

-    # Compute in topological order
+    # Compute in topological order (forward edges only — feedback edges
+    # don't block, since their context arrives on revisits)
    remaining = {n.id for n in session.nodes}
    max_iterations = len(session.nodes) * 2

@@ -1008,18 +1148,23 @@ def validate_graph() -> str:
            break

        for node_id in list(remaining):
-            deps = dependencies.get(node_id, [])
+            fwd_deps = forward_dependencies.get(node_id, [])

-            # Can compute if all dependencies are computed (or no dependencies)
-            if all(d in computed for d in deps):
-                # Collect outputs from all dependencies
+            # Can compute if all FORWARD dependencies are computed
+            if all(d in computed for d in fwd_deps):
+                # Collect outputs from all forward dependencies
                available = set(initial_context_keys)
-                for dep_id in deps:
-                    # Add outputs from dependency
+                for dep_id in fwd_deps:
                    available.update(node_outputs.get(dep_id, set()))
-                    # Also add what was available to the dependency (transitive)
                    available.update(available_context.get(dep_id, set()))

+                # Also include context from already-computed feedback
+                # sources (bonus, not blocking)
+                for fb_src in feedback_sources.get(node_id, []):
+                    if fb_src in computed:
+                        available.update(node_outputs.get(fb_src, set()))
+                        available.update(available_context.get(fb_src, set()))
+
                available_context[node_id] = available
                computed.add(node_id)
                remaining.remove(node_id)
@@ -1029,15 +1174,37 @@ def validate_graph() -> str:
    context_errors = []
    context_warnings = []
    missing_inputs: dict[str, list[str]] = {}
+    feedback_only_inputs: dict[str, list[str]] = {}

    for node in session.nodes:
        available = available_context.get(node.id, set())

        for input_key in node.input_keys:
            if input_key not in available:
-                if node.id not in missing_inputs:
-                    missing_inputs[node.id] = []
-                missing_inputs[node.id].append(input_key)
+                # Check if this input is provided by a feedback source
+                fb_provides = set()
+                for fb_src in feedback_sources.get(node.id, []):
+                    fb_provides.update(node_outputs.get(fb_src, set()))
+                    fb_provides.update(available_context.get(fb_src, set()))
+
+                if input_key in fb_provides:
+                    # Input arrives via feedback edge — warn, don't error
+                    if node.id not in feedback_only_inputs:
+                        feedback_only_inputs[node.id] = []
+                    feedback_only_inputs[node.id].append(input_key)
+                else:
+                    if node.id not in missing_inputs:
+                        missing_inputs[node.id] = []
+                    missing_inputs[node.id].append(input_key)
+
+    # Warn about feedback-only inputs (available on revisits, not first run)
+    for node_id, fb_keys in feedback_only_inputs.items():
+        fb_srcs = feedback_sources.get(node_id, [])
+        context_warnings.append(
+            f"Node '{node_id}' input(s) {fb_keys} are only provided via "
+            f"feedback edge(s) from {fb_srcs}. These will be available on "
+            f"revisits but not on the first execution."
+        )

    # Generate helpful error messages
    for node_id, missing in missing_inputs.items():
@@ -1117,6 +1284,87 @@ def validate_graph() -> str:
    errors.extend(context_errors)
    warnings.extend(context_warnings)

+    # === EventLoopNode-specific validation ===
+    from collections import defaultdict
+
+    # Detect fan-out: multiple ON_SUCCESS edges from same source
+    outgoing_success: dict[str, list[str]] = defaultdict(list)
+    for edge in session.edges:
+        cond = edge.condition.value if hasattr(edge.condition, "value") else edge.condition
+        if cond == "on_success":
+            outgoing_success[edge.source].append(edge.target)
+
+    for source_id, targets in outgoing_success.items():
+        if len(targets) > 1:
+            # Client-facing fan-out: cannot target multiple client_facing nodes
+            cf_targets = [
+                t for t in targets if any(n.id == t and n.client_facing for n in session.nodes)
+            ]
+            if len(cf_targets) > 1:
+                errors.append(
+                    f"Fan-out from '{source_id}' targets multiple client_facing "
+                    f"nodes: {cf_targets}. Only one branch may be client-facing."
+                )
+
+            # Output key overlap on parallel event_loop nodes
+            el_targets = [
+                t
+                for t in targets
+                if any(n.id == t and n.node_type == "event_loop" for n in session.nodes)
+            ]
+            if len(el_targets) > 1:
+                seen_keys: dict[str, str] = {}
+                for nid in el_targets:
+                    node_obj = next((n for n in session.nodes if n.id == nid), None)
+                    if node_obj:
+                        for key in node_obj.output_keys:
+                            if key in seen_keys:
+                                errors.append(
+                                    f"Fan-out from '{source_id}': event_loop "
+                                    f"nodes '{seen_keys[key]}' and '{nid}' both "
+                                    f"write to output_key '{key}'. Parallel "
+                                    "nodes must have disjoint output_keys."
+                                )
+                            else:
+                                seen_keys[key] = nid
+
+    # Feedback loop validation: targets should allow re-visits
+    for edge in session.edges:
+        if edge.priority < 0:
+            target_node = next((n for n in session.nodes if n.id == edge.target), None)
+            if target_node and target_node.max_node_visits <= 1:
+                warnings.append(
+                    f"Feedback edge '{edge.id}' targets '{edge.target}' "
+                    f"which has max_node_visits={target_node.max_node_visits}. "
+                    "Consider setting max_node_visits > 1."
+                )
+
+    # nullable_output_keys must be subset of output_keys
+    for node in session.nodes:
+        if node.nullable_output_keys:
+            invalid = [k for k in node.nullable_output_keys if k not in node.output_keys]
+            if invalid:
+                errors.append(
+                    f"Node '{node.id}': nullable_output_keys {invalid} "
+                    f"must be a subset of output_keys {node.output_keys}"
+                )
+
+    # Deprecated node type warnings
+    deprecated_nodes = [
+        {"node_id": n.id, "type": n.node_type, "replacement": "event_loop"}
+        for n in session.nodes
+        if n.node_type in ("llm_generate", "llm_tool_use")
+    ]
+    for dn in deprecated_nodes:
+        warnings.append(
+            f"Node '{dn['node_id']}' uses deprecated type '{dn['type']}'. Use 'event_loop' instead."
+        )
+
+    # Collect summary info
+    event_loop_nodes = [n.id for n in session.nodes if n.node_type == "event_loop"]
+    client_facing_nodes = [n.id for n in session.nodes if n.client_facing]
+    feedback_edges = [e.id for e in session.edges if e.priority < 0]
+
    return json.dumps(
        {
            "valid": len(errors) == 0,
@@ -1133,6 +1381,10 @@ def validate_graph() -> str:
            "context_flow": {node_id: list(keys) for node_id, keys in available_context.items()}
            if available_context
            else None,
+            "event_loop_nodes": event_loop_nodes,
+            "client_facing_nodes": client_facing_nodes,
+            "feedback_edges": feedback_edges,
+            "deprecated_node_types": deprecated_nodes,
        }
    )

@@ -1183,6 +1435,12 @@ def _generate_readme(session: BuildSession, export_data: dict, all_tools: set) -
        if node.routes:
            routes_str = ", ".join([f"{k}→{v}" for k, v in node.routes.items()])
            node_info.append(f"   - Routes: {routes_str}")
+        if node.client_facing:
+            node_info.append("   - Client-facing: Yes (blocks for user input)")
+        if node.nullable_output_keys:
+            node_info.append(f"   - Nullable outputs: `{', '.join(node.nullable_output_keys)}`")
+        if node.max_node_visits > 1:
+            node_info.append(f"   - Max visits: {node.max_node_visits}")
        nodes_section.append("\n".join(node_info))

    # Build success criteria section
@@ -1236,7 +1494,12 @@ def _generate_readme(session: BuildSession, export_data: dict, all_tools: set) -

    for edge in edges:
        cond = edge.condition.value if hasattr(edge.condition, "value") else edge.condition
-        readme += f"- `{edge.source}` → `{edge.target}` (condition: {cond})\n"
+        priority_note = f", priority={edge.priority}" if edge.priority != 0 else ""
+        feedback_note = " **[FEEDBACK]**" if edge.priority < 0 else ""
+        readme += (
+            f"- `{edge.source}` → `{edge.target}` "
+            f"(condition: {cond}{priority_note}){feedback_note}\n"
+        )

    readme += f"""

@@ -1451,6 +1714,10 @@ def export_graph() -> str:
        "created_at": datetime.now().isoformat(),
    }

+    # Include loop config if configured
+    if session.loop_config:
+        graph_spec["loop_config"] = session.loop_config
+
    # Collect all tools referenced by nodes
    all_tools = set()
    for node in session.nodes:
@@ -1566,6 +1833,50 @@ def get_session_status() -> str:
            "nodes": [n.id for n in session.nodes],
            "edges": [(e.source, e.target) for e in session.edges],
            "mcp_servers": [s["name"] for s in session.mcp_servers],
+            "event_loop_nodes": [n.id for n in session.nodes if n.node_type == "event_loop"],
+            "client_facing_nodes": [n.id for n in session.nodes if n.client_facing],
+            "deprecated_nodes": [
+                n.id for n in session.nodes if n.node_type in ("llm_generate", "llm_tool_use")
+            ],
+            "feedback_edges": [e.id for e in session.edges if e.priority < 0],
+        }
+    )
+
+
+@mcp.tool()
+def configure_loop(
+    max_iterations: Annotated[int, "Maximum loop iterations per node execution (default 50)"] = 50,
+    max_tool_calls_per_turn: Annotated[int, "Maximum tool calls per LLM turn (default 10)"] = 10,
+    stall_detection_threshold: Annotated[
+        int, "Consecutive identical responses before stall detection triggers (default 3)"
+    ] = 3,
+    max_history_tokens: Annotated[
+        int, "Maximum conversation history tokens before compaction (default 32000)"
+    ] = 32000,
+) -> str:
+    """Configure event loop parameters for EventLoopNode execution.
+
+    These settings control how EventLoopNodes behave at runtime:
+    - max_iterations: prevents infinite loops
+    - max_tool_calls_per_turn: limits tool calls per LLM response
+    - stall_detection_threshold: detects when LLM repeats itself
+    - max_history_tokens: triggers conversation compaction
+    """
+    session = get_session()
+
+    session.loop_config = {
+        "max_iterations": max_iterations,
+        "max_tool_calls_per_turn": max_tool_calls_per_turn,
+        "stall_detection_threshold": stall_detection_threshold,
+        "max_history_tokens": max_history_tokens,
+    }
+
+    _save_session(session)
+
+    return json.dumps(
+        {
+            "success": True,
+            "loop_config": session.loop_config,
        }
    )

@@ -1861,10 +2172,41 @@ def test_node(
        result["routing_options"] = node_spec.routes
        result["simulation"] = "Router would evaluate routes based on input and select target node"

-    elif node_spec.node_type in ("llm_generate", "llm_tool_use"):
-        # Show what prompt would be sent
+    elif node_spec.node_type == "event_loop":
+        # EventLoopNode simulation
        result["system_prompt"] = node_spec.system_prompt
        result["available_tools"] = node_spec.tools
+        result["client_facing"] = node_spec.client_facing
+        result["nullable_output_keys"] = node_spec.nullable_output_keys
+        result["max_node_visits"] = node_spec.max_node_visits
+
+        if mock_llm_response:
+            result["mock_response"] = mock_llm_response
+            result["simulation"] = (
+                "EventLoopNode would run a multi-turn streaming loop. "
+                "Each iteration: LLM call -> tool execution -> judge evaluation. "
+                "Loop continues until judge ACCEPTs or max_iterations reached."
+            )
+        else:
+            cf_note = (
+                "Node is client-facing: will block for user input between turns. "
+                if node_spec.client_facing
+                else ""
+            )
+            result["simulation"] = (
+                "EventLoopNode would stream LLM responses, execute tool calls, "
+                "and use judge evaluation to decide when to stop. "
+                + cf_note
+                + f"Max visits per graph run: {node_spec.max_node_visits}."
+            )
+
+    elif node_spec.node_type in ("llm_generate", "llm_tool_use"):
+        # Legacy LLM node types
+        result["system_prompt"] = node_spec.system_prompt
+        result["available_tools"] = node_spec.tools
+        result["deprecation_warning"] = (
+            f"Node type '{node_spec.node_type}' is deprecated. Use 'event_loop' instead."
+        )

        if mock_llm_response:
            result["mock_response"] = mock_llm_response
@@ -1879,6 +2221,7 @@ def test_node(
    result["expected_memory_state"] = {
        "inputs_available": {k: input_data.get(k, "<not provided>") for k in node_spec.input_keys},
        "outputs_to_write": node_spec.output_keys,
+        "nullable_outputs": node_spec.nullable_output_keys or [],
    }

    return json.dumps(
@@ -1967,13 +2310,19 @@ def test_graph(
            "writes": current_node.output_keys,
        }

-        if current_node.node_type in ("llm_generate", "llm_tool_use"):
+        if current_node.node_type in ("llm_generate", "llm_tool_use", "event_loop"):
            step_info["prompt_preview"] = (
                current_node.system_prompt[:200] + "..."
                if current_node.system_prompt and len(current_node.system_prompt) > 200
                else current_node.system_prompt
            )
            step_info["tools_available"] = current_node.tools
+            if current_node.node_type == "event_loop":
+                step_info["event_loop_config"] = {
+                    "client_facing": current_node.client_facing,
+                    "max_node_visits": current_node.max_node_visits,
+                    "nullable_output_keys": current_node.nullable_output_keys,
+                }

        execution_trace.append(step_info)

@@ -1982,16 +2331,32 @@ def test_graph(
            step_info["is_terminal"] = True
            break

-        # Find next node via edges
+        # Find next node via edges (sorted by priority, highest first)
+        outgoing = sorted(
+            [e for e in session.edges if e.source == current_node_id],
+            key=lambda e: -e.priority,
+        )
        next_node = None
-        for edge in session.edges:
-            if edge.source == current_node_id:
-                # In dry run, assume success path
-                if edge.condition.value in ("always", "on_success"):
-                    next_node = edge.target
-                    step_info["next_node"] = next_node
-                    step_info["edge_condition"] = edge.condition.value
-                    break
+        for edge in outgoing:
+            # In dry run, follow success/always edges (highest priority first)
+            if edge.condition.value in ("always", "on_success"):
+                next_node = edge.target
+                step_info["next_node"] = next_node
+                step_info["edge_condition"] = edge.condition.value
+                step_info["edge_priority"] = edge.priority
+                break
+
+        # Note any feedback edges from this node
+        feedback = [e for e in outgoing if e.priority < 0]
+        if feedback:
+            step_info["feedback_edges"] = [
+                {
+                    "target": e.target,
+                    "condition_expr": e.condition_expr,
+                    "priority": e.priority,
+                }
+                for e in feedback
+            ]

        if next_node is None:
            step_info["note"] = "No outgoing edge found (end of path)"
@@ -2597,10 +2962,11 @@ def generate_constraint_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    agent_module = _get_agent_module_from_path(agent_path)
+    agent_module = _get_agent_module_from_path(path)

    # Format constraints for display
    constraints_formatted = (
@@ -2619,9 +2985,9 @@ def generate_constraint_tests(
    return json.dumps(
        {
            "goal_id": goal_id,
-            "agent_path": agent_path,
+            "agent_path": str(path),
            "agent_module": agent_module,
-            "output_file": f"{agent_path}/tests/test_constraints.py",
+            "output_file": f"{str(path)}/tests/test_constraints.py",
            "constraints": [c.model_dump() for c in goal.constraints] if goal.constraints else [],
            "constraints_formatted": constraints_formatted,
            "test_guidelines": {
@@ -2677,10 +3043,11 @@ def generate_success_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    agent_module = _get_agent_module_from_path(agent_path)
+    agent_module = _get_agent_module_from_path(path)

    # Parse node/tool names for context
    nodes = [n.strip() for n in node_names.split(",") if n.strip()]
@@ -2705,9 +3072,9 @@ def generate_success_tests(
    return json.dumps(
        {
            "goal_id": goal_id,
-            "agent_path": agent_path,
+            "agent_path": str(path),
            "agent_module": agent_module,
-            "output_file": f"{agent_path}/tests/test_success_criteria.py",
+            "output_file": f"{str(path)}/tests/test_success_criteria.py",
            "success_criteria": [c.model_dump() for c in goal.success_criteria]
            if goal.success_criteria
            else [],
@@ -2766,7 +3133,11 @@ def run_tests(
    import re
    import subprocess

-    tests_dir = Path(agent_path) / "tests"
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err
+
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -2957,10 +3328,11 @@ def debug_test(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    tests_dir = Path(agent_path) / "tests"
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -3101,10 +3473,11 @@ def list_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    tests_dir = Path(agent_path) / "tests"
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -3379,7 +3752,7 @@ def store_credential(
    display_name: Annotated[str, "Human-readable name (e.g., 'HubSpot Access Token')"] = "",
 ) -> str:
    """
-    Store a credential securely in the encrypted credential store at ~/.hive/credentials.
+    Store a credential securely in the local encrypted store at ~/.hive/credentials.

    Uses Fernet encryption (AES-128-CBC + HMAC). Requires HIVE_CREDENTIAL_KEY env var.
    """
@@ -3421,7 +3794,7 @@ def store_credential(
@mcp.tool()
 def list_stored_credentials() -> str:
    """
-    List all credentials currently stored in the encrypted credential store.
+    List all credentials currently stored in the local encrypted store.

    Returns credential IDs and metadata (never returns secret values).
    """
@@ -3461,7 +3834,7 @@ def delete_stored_credential(
    credential_name: Annotated[str, "Logical credential name to delete (e.g., 'hubspot')"],
 ) -> str:
    """
-    Delete a credential from the encrypted credential store.
+    Delete a credential from the local encrypted store.
    """
    try:
        store = _get_credential_store()
@@ -56,6 +56,18 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
        action="store_true",
        help="Show detailed execution logs (steps, LLM calls, etc.)",
    )
+    run_parser.add_argument(
+        "--tui",
+        action="store_true",
+        help="Launch interactive terminal dashboard",
+    )
+    run_parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        default=None,
+        help="LLM model to use (any LiteLLM-compatible name)",
+    )
    run_parser.set_defaults(func=cmd_run)

    # info command
@@ -205,38 +217,83 @@ def cmd_run(args: argparse.Namespace) -> int:
            print(f"Error reading input file: {e}", file=sys.stderr)
            return 1

-    # Load and run agent
-    try:
-        runner = AgentRunner.load(
-            args.agent_path,
-            mock_mode=args.mock,
-            model=getattr(args, "model", "claude-haiku-4-5-20251001"),
-        )
-    except FileNotFoundError as e:
-        print(f"Error: {e}", file=sys.stderr)
-        return 1
+    # Run the agent (with TUI or standard)
+    if getattr(args, "tui", False):
+        from framework.tui.app import AdenTUI

-    # Auto-inject user_id if the agent expects it but it's not provided
-    entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
-    if "user_id" in entry_input_keys and context.get("user_id") is None:
-        import os
+        async def run_with_tui():
+            try:
+                # Load runner inside the async loop to ensure strict loop affinity
+                # (only one load — avoids spawning duplicate MCP subprocesses)
+                try:
+                    runner = AgentRunner.load(
+                        args.agent_path,
+                        mock_mode=args.mock,
+                        model=args.model,
+                        enable_tui=True,
+                    )
+                except Exception as e:
+                    print(f"Error loading agent: {e}")
+                    return

-        context["user_id"] = os.environ.get("USER", "default_user")
+                # Force setup inside the loop
+                if runner._agent_runtime is None:
+                    runner._setup()

-    if not args.quiet:
-        info = runner.info()
-        print(f"Agent: {info.name}")
-        print(f"Goal: {info.goal_name}")
-        print(f"Steps: {info.node_count}")
-        print(f"Input: {json.dumps(context)}")
-        print()
-        print("=" * 60)
-        print("Executing agent...")
-        print("=" * 60)
-        print()
+                # Start runtime before TUI so it's ready for user input
+                if runner._agent_runtime and not runner._agent_runtime.is_running:
+                    await runner._agent_runtime.start()

-    # Run the agent
-    result = asyncio.run(runner.run(context))
+                app = AdenTUI(runner._agent_runtime)
+
+                # TUI handles execution via ChatRepl — user submits input,
+                # ChatRepl calls runtime.trigger_and_wait(). No auto-launch.
+                await app.run_async()
+            except Exception as e:
+                import traceback
+
+                traceback.print_exc()
+                print(f"TUI error: {e}")
+
+            await runner.cleanup_async()
+            return None
+
+        asyncio.run(run_with_tui())
+        print("TUI session ended.")
+        return 0
+    else:
+        # Standard execution — load runner here (not shared with TUI path)
+        try:
+            runner = AgentRunner.load(
+                args.agent_path,
+                mock_mode=args.mock,
+                model=args.model,
+                enable_tui=False,
+            )
+        except FileNotFoundError as e:
+            print(f"Error: {e}", file=sys.stderr)
+            return 1
+
+        # Auto-inject user_id if the agent expects it but it's not provided
+        entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
+        if "user_id" in entry_input_keys and context.get("user_id") is None:
+            import os
+
+            context["user_id"] = os.environ.get("USER", "default_user")
+
+        if not args.quiet:
+            info = runner.info()
+            print(f"Agent: {info.name}")
+            print(f"Goal: {info.goal_name}")
+            print(f"Steps: {info.node_count}")
+            print(f"Input: {json.dumps(context)}")
+            print()
+            print("=" * 60)
+            print("Executing agent...")
+            print("=" * 60)
+            print()
+
+        result = asyncio.run(runner.run(context))

    # Format output
    output = {
@@ -362,6 +362,15 @@ class MCPClient:
        # Call tool using persistent session
        result = await self._session.call_tool(tool_name, arguments=arguments)

+        # Check for server-side errors (validation failures, tool exceptions, etc.)
+        if getattr(result, "isError", False):
+            error_text = ""
+            if result.content:
+                content_item = result.content[0]
+                if hasattr(content_item, "text"):
+                    error_text = content_item.text
+            raise RuntimeError(f"MCP tool '{tool_name}' failed: {error_text}")
+
        # Extract content
        if result.content:
            # MCP returns content as a list of content items
@@ -28,6 +28,33 @@ logger = logging.getLogger(__name__)

 # Configuration paths
 HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+
+
+def _ensure_credential_key_env() -> None:
+    """Load HIVE_CREDENTIAL_KEY from shell config if not already in environment.
+
+    The setup-credentials skill writes the encryption key to ~/.zshrc or ~/.bashrc.
+    If the user hasn't sourced their config in the current shell, this reads it
+    directly so the runner (and any MCP subprocesses it spawns) can unlock the
+    encrypted credential store.
+
+    Only HIVE_CREDENTIAL_KEY is loaded this way — all other secrets (API keys, etc.)
+    come from the credential store itself.
+    """
+    if os.environ.get("HIVE_CREDENTIAL_KEY"):
+        return
+
+    try:
+        from aden_tools.credentials.shell_config import check_env_var_in_shell_config
+
+        found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
+        if found and value:
+            os.environ["HIVE_CREDENTIAL_KEY"] = value
+            logger.debug("Loaded HIVE_CREDENTIAL_KEY from shell config")
+    except ImportError:
+        pass
+
+
 CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"


@@ -236,6 +263,15 @@ class AgentRunner:
        result = await runner.run({"lead_id": "123"})
    """

+    @staticmethod
+    def _resolve_default_model() -> str:
+        """Resolve the default model from ~/.hive/configuration.json."""
+        config = get_hive_config()
+        llm = config.get("llm", {})
+        if llm.get("provider") and llm.get("model"):
+            return f"{llm['provider']}/{llm['model']}"
+        return "anthropic/claude-sonnet-4-20250514"
+
    def __init__(
        self,
        agent_path: Path,
@@ -243,7 +279,8 @@ class AgentRunner:
        goal: Goal,
        mock_mode: bool = False,
        storage_path: Path | None = None,
-        model: str = "cerebras/zai-glm-4.7",
+        model: str | None = None,
+        enable_tui: bool = False,
    ):
        """
        Initialize the runner (use AgentRunner.load() instead).
@@ -254,14 +291,15 @@ class AgentRunner:
            goal: Loaded Goal object
            mock_mode: If True, use mock LLM responses
            storage_path: Path for runtime storage (defaults to temp)
-            model: Model to use - any LiteLLM-compatible model name
-                   (e.g., "claude-sonnet-4-20250514", "gpt-4o-mini", "gemini/gemini-pro")
+            model: Model to use (reads from agent config or ~/.hive/configuration.json if None)
+            enable_tui: If True, forces use of AgentRuntime with EventBus
        """
        self.agent_path = agent_path
        self.graph = graph
        self.goal = goal
        self.mock_mode = mock_mode
-        self.model = model
+        self.model = model or self._resolve_default_model()
+        self.enable_tui = enable_tui

        # Set up storage
        if storage_path:
@@ -275,6 +313,10 @@ class AgentRunner:
            self._storage_path = default_storage
            self._temp_dir = None

+        # Load HIVE_CREDENTIAL_KEY from shell config if not in env.
+        # Must happen before MCP subprocesses are spawned so they inherit it.
+        _ensure_credential_key_env()
+
        # Initialize components
        self._tool_registry = ToolRegistry()
        self._runtime: Runtime | None = None
@@ -296,32 +338,121 @@ class AgentRunner:
        if mcp_config_path.exists():
            self._load_mcp_servers_from_config(mcp_config_path)

+    @staticmethod
+    def _import_agent_module(agent_path: Path):
+        """Import an agent package from its directory path.
+
+        Tries package import first (works when exports/ is on sys.path,
+        which cli.py:_configure_paths() ensures). Falls back to direct
+        file import of agent.py via importlib.util.
+        """
+        import importlib
+
+        package_name = agent_path.name
+
+        # Try importing as a package (works when exports/ is on sys.path)
+        try:
+            return importlib.import_module(package_name)
+        except ImportError:
+            pass
+
+        # Fallback: import agent.py directly via file path
+        import importlib.util
+
+        agent_py = agent_path / "agent.py"
+        if not agent_py.exists():
+            raise FileNotFoundError(
+                f"No importable agent found at {agent_path}. "
+                f"Expected a Python package with agent.py."
+            )
+        spec = importlib.util.spec_from_file_location(
+            f"{package_name}.agent",
+            agent_py,
+            submodule_search_locations=[str(agent_path)],
+        )
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        return module
+
    @classmethod
    def load(
        cls,
        agent_path: str | Path,
        mock_mode: bool = False,
        storage_path: Path | None = None,
-        model: str = "cerebras/zai-glm-4.7",
+        model: str | None = None,
+        enable_tui: bool = False,
    ) -> "AgentRunner":
        """
        Load an agent from an export folder.

+        Imports the agent's Python package and reads module-level variables
+        (goal, nodes, edges, etc.) to build a GraphSpec. Falls back to
+        agent.json if no Python module is found.
+
        Args:
-            agent_path: Path to agent folder (containing agent.json)
+            agent_path: Path to agent folder
            mock_mode: If True, use mock LLM responses
-            storage_path: Path for runtime storage (defaults to temp)
-            model: LLM model to use (any LiteLLM-compatible model name)
+            storage_path: Path for runtime storage (defaults to ~/.hive/storage/{name})
+            model: LLM model to use (reads from agent's default_config if None)
+            enable_tui: If True, forces use of AgentRuntime with EventBus

        Returns:
            AgentRunner instance ready to run
        """
        agent_path = Path(agent_path)

-        # Load agent.json
+        # Try loading from Python module first (code-based agents)
+        agent_py = agent_path / "agent.py"
+        if agent_py.exists():
+            agent_module = cls._import_agent_module(agent_path)
+
+            goal = getattr(agent_module, "goal", None)
+            nodes = getattr(agent_module, "nodes", None)
+            edges = getattr(agent_module, "edges", None)
+
+            if goal is None or nodes is None or edges is None:
+                raise ValueError(
+                    f"Agent at {agent_path} must define 'goal', 'nodes', and 'edges' "
+                    f"in agent.py (or __init__.py)"
+                )
+
+            # Read model and max_tokens from agent's config if not explicitly provided
+            agent_config = getattr(agent_module, "default_config", None)
+            if model is None:
+                if agent_config and hasattr(agent_config, "model"):
+                    model = agent_config.model
+
+            max_tokens = getattr(agent_config, "max_tokens", 1024) if agent_config else 1024
+
+            # Build GraphSpec from module-level variables
+            graph = GraphSpec(
+                id=f"{agent_path.name}-graph",
+                goal_id=goal.id,
+                version="1.0.0",
+                entry_node=getattr(agent_module, "entry_node", nodes[0].id),
+                entry_points=getattr(agent_module, "entry_points", {}),
+                terminal_nodes=getattr(agent_module, "terminal_nodes", []),
+                pause_nodes=getattr(agent_module, "pause_nodes", []),
+                nodes=nodes,
+                edges=edges,
+                max_tokens=max_tokens,
+            )
+
+            return cls(
+                agent_path=agent_path,
+                graph=graph,
+                goal=goal,
+                mock_mode=mock_mode,
+                storage_path=storage_path,
+                model=model,
+                enable_tui=enable_tui,
+            )
+
+        # Fallback: load from agent.json (legacy JSON-based agents)
        agent_json_path = agent_path / "agent.json"
        if not agent_json_path.exists():
-            raise FileNotFoundError(f"agent.json not found in {agent_path}")
+            raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")

        with open(agent_json_path) as f:
            graph, goal = load_agent_export(f.read())
@@ -333,6 +464,7 @@ class AgentRunner:
            mock_mode=mock_mode,
            storage_path=storage_path,
            model=model,
+            enable_tui=enable_tui,
        )

    def register_tool(
@@ -411,25 +543,8 @@ class AgentRunner:
        return self._tool_registry.register_mcp_server(server_config)

    def _load_mcp_servers_from_config(self, config_path: Path) -> None:
-        """
-        Load and register MCP servers from a configuration file.
-
-        Args:
-            config_path: Path to mcp_servers.json file
-        """
-        try:
-            with open(config_path) as f:
-                config = json.load(f)
-
-            servers = config.get("servers", [])
-            for server_config in servers:
-                try:
-                    self._tool_registry.register_mcp_server(server_config)
-                except Exception as e:
-                    server_name = server_config.get("name", "unknown")
-                    logger.warning(f"Failed to register MCP server '{server_name}': {e}")
-        except Exception as e:
-            logger.warning(f"Failed to load MCP servers config from {config_path}: {e}")
+        """Load and register MCP servers from a configuration file."""
+        self._tool_registry.load_mcp_config(config_path)

    def set_approval_callback(self, callback: Callable) -> None:
        """
@@ -488,16 +603,25 @@ class AgentRunner:
                api_key_env = self._get_api_key_env_var(self.model)
                if api_key_env and os.environ.get(api_key_env):
                    self._llm = LiteLLMProvider(model=self.model)
-                elif api_key_env:
-                    print(f"Warning: {api_key_env} not set. LLM calls will fail.")
-                    print(f"Set it with: export {api_key_env}=your-api-key")
+                else:
+                    # Fall back to credential store
+                    api_key = self._get_api_key_from_credential_store()
+                    if api_key:
+                        self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
+                        # Set env var so downstream code (e.g. cleanup LLM in
+                        # node._extract_json) can also find it
+                        if api_key_env:
+                            os.environ[api_key_env] = api_key
+                    elif api_key_env:
+                        print(f"Warning: {api_key_env} not set. LLM calls will fail.")
+                        print(f"Set it with: export {api_key_env}=your-api-key")

        # Get tools for executor/runtime
        tools = list(self._tool_registry.get_tools().values())
        tool_executor = self._tool_registry.get_executor()

-        if self._uses_async_entry_points:
-            # Multi-entry-point mode: use AgentRuntime
+        if self._uses_async_entry_points or self.enable_tui:
+            # Multi-entry-point mode or TUI mode: use AgentRuntime
            self._setup_agent_runtime(tools, tool_executor)
        else:
            # Single-entry-point mode: use legacy GraphExecutor
@@ -535,6 +659,33 @@ class AgentRunner:
            # Default: assume OpenAI-compatible
            return "OPENAI_API_KEY"

+    def _get_api_key_from_credential_store(self) -> str | None:
+        """Get the LLM API key from the encrypted credential store.
+
+        Maps model name to credential store ID (e.g. "anthropic/..." -> "anthropic")
+        and retrieves the key via CredentialStore.get().
+        """
+        if not os.environ.get("HIVE_CREDENTIAL_KEY"):
+            return None
+
+        # Map model prefix to credential store ID
+        model_lower = self.model.lower()
+        cred_id = None
+        if model_lower.startswith("anthropic/") or model_lower.startswith("claude"):
+            cred_id = "anthropic"
+        # Add more mappings as providers are added to LLM_CREDENTIALS
+
+        if cred_id is None:
+            return None
+
+        try:
+            from framework.credentials import CredentialStore
+
+            store = CredentialStore.with_encrypted_storage()
+            return store.get(cred_id)
+        except Exception:
+            return None
+
    def _setup_legacy_executor(self, tools: list, tool_executor: Callable | None) -> None:
        """Set up legacy single-entry-point execution using GraphExecutor."""
        # Create runtime
@@ -566,6 +717,19 @@ class AgentRunner:
            )
            entry_points.append(ep)

+        # If TUI enabled but no entry points (single-entry agent), create default
+        if not entry_points and self.enable_tui and self.graph.entry_node:
+            logger.info("Creating default entry point for TUI")
+            entry_points.append(
+                EntryPointSpec(
+                    id="default",
+                    name="Default",
+                    entry_node=self.graph.entry_node,
+                    trigger_type="manual",
+                    isolation_level="shared",
+                )
+            )
+
        # Create AgentRuntime with all entry points
        self._agent_runtime = create_agent_runtime(
            graph=self.graph,
@@ -616,7 +780,7 @@ class AgentRunner:
                error=error_msg,
            )

-        if self._uses_async_entry_points:
+        if self._uses_async_entry_points or self.enable_tui:
            # Multi-entry-point mode: use AgentRuntime
            return await self._run_with_agent_runtime(
                input_data=input_data or {},
@@ -908,15 +1072,25 @@ class AgentRunner:
                EnvVarStorage,
            )

-            # Build env mapping for fallback
+            # Build env mapping for credential lookup
            env_mapping = {
                (spec.credential_id or name): spec.env_var
                for name, spec in CREDENTIAL_SPECS.items()
            }
-            storage = CompositeStorage(
-                primary=EncryptedFileStorage(),
-                fallbacks=[EnvVarStorage(env_mapping=env_mapping)],
-            )
+
+            # Only use EncryptedFileStorage if the encryption key is configured;
+            # otherwise just check env vars (avoids generating a throwaway key)
+            storages: list = [EnvVarStorage(env_mapping=env_mapping)]
+            if os.environ.get("HIVE_CREDENTIAL_KEY"):
+                storages.insert(0, EncryptedFileStorage())
+
+            if len(storages) == 1:
+                storage = storages[0]
+            else:
+                storage = CompositeStorage(
+                    primary=storages[0],
+                    fallbacks=storages[1:],
+                )
            store = CredentialStore(storage=storage)

            # Build reverse mappings
@@ -33,6 +33,11 @@ class ToolRegistry:
    4. Manually registered tools
    """

+    # Framework-internal context keys injected into tool calls.
+    # Stripped from LLM-facing schemas (the LLM doesn't know these values)
+    # and auto-injected at call time for tools that accept them.
+    CONTEXT_PARAMS = frozenset({"workspace_id", "agent_id", "session_id"})
+
    def __init__(self):
        self._tools: dict[str, RegisteredTool] = {}
        self._mcp_clients: list[Any] = []  # List of MCPClient instances
@@ -257,6 +262,43 @@ class ToolRegistry:
        """
        self._session_context.update(context)

+    def load_mcp_config(self, config_path: Path) -> None:
+        """
+        Load and register MCP servers from a config file.
+
+        Resolves relative ``cwd`` paths against the config file's parent
+        directory so callers never need to handle path resolution themselves.
+
+        Args:
+            config_path: Path to an ``mcp_servers.json`` file.
+        """
+        try:
+            with open(config_path) as f:
+                config = json.load(f)
+        except Exception as e:
+            logger.warning(f"Failed to load MCP config from {config_path}: {e}")
+            return
+
+        base_dir = config_path.parent
+
+        # Support both formats:
+        #   {"servers": [{"name": "x", ...}]}        (list format)
+        #   {"server-name": {"transport": ...}, ...}  (dict format)
+        server_list = config.get("servers", [])
+        if not server_list and "servers" not in config:
+            # Treat top-level keys as server names
+            server_list = [{"name": name, **cfg} for name, cfg in config.items()]
+
+        for server_config in server_list:
+            cwd = server_config.get("cwd")
+            if cwd and not Path(cwd).is_absolute():
+                server_config["cwd"] = str((base_dir / cwd).resolve())
+            try:
+                self.register_mcp_server(server_config)
+            except Exception as e:
+                name = server_config.get("name", "unknown")
+                logger.warning(f"Failed to register MCP server '{name}': {e}")
+
    def register_mcp_server(
        self,
        server_config: dict[str, Any],
@@ -305,15 +347,25 @@ class ToolRegistry:
            # Register each tool
            count = 0
            for mcp_tool in client.list_tools():
-                # Convert MCP tool to framework Tool
+                # Convert MCP tool to framework Tool (strips context params from LLM schema)
                tool = self._convert_mcp_tool_to_framework_tool(mcp_tool)

                # Create executor that calls the MCP server
-                def make_mcp_executor(client_ref: MCPClient, tool_name: str, registry_ref):
+                def make_mcp_executor(
+                    client_ref: MCPClient,
+                    tool_name: str,
+                    registry_ref,
+                    tool_params: set[str],
+                ):
                    def executor(inputs: dict) -> Any:
                        try:
-                            # Inject session context for tools that need it
-                            merged_inputs = {**registry_ref._session_context, **inputs}
+                            # Only inject session context params the tool accepts
+                            filtered_context = {
+                                k: v
+                                for k, v in registry_ref._session_context.items()
+                                if k in tool_params
+                            }
+                            merged_inputs = {**filtered_context, **inputs}
                            result = client_ref.call_tool(tool_name, merged_inputs)
                            # MCP tools return content array, extract the result
                            if isinstance(result, list) and len(result) > 0:
@@ -327,10 +379,11 @@ class ToolRegistry:

                    return executor

+                tool_params = set(mcp_tool.input_schema.get("properties", {}).keys())
                self.register(
                    mcp_tool.name,
                    tool,
-                    make_mcp_executor(client, mcp_tool.name, self),
+                    make_mcp_executor(client, mcp_tool.name, self, tool_params),
                )
                count += 1

@@ -356,6 +409,11 @@ class ToolRegistry:
        properties = input_schema.get("properties", {})
        required = input_schema.get("required", [])

+        # Strip framework-internal context params from LLM-facing schema.
+        # The LLM can't know these values; they're auto-injected at call time.
+        properties = {k: v for k, v in properties.items() if k not in self.CONTEXT_PARAMS}
+        required = [r for r in required if r not in self.CONTEXT_PARAMS]
+
        # Convert to framework Tool format
        tool = Tool(
            name=mcp_tool.name,
@@ -296,6 +296,25 @@ class AgentRuntime:
            raise ValueError(f"Entry point '{entry_point_id}' not found")
        return await stream.wait_for_completion(exec_id, timeout)

+    async def inject_input(self, node_id: str, content: str) -> bool:
+        """Inject user input into a running client-facing node.
+
+        Routes input to the EventLoopNode identified by ``node_id``
+        across all active streams. Used by the TUI ChatRepl to deliver
+        user responses during client-facing node execution.
+
+        Args:
+            node_id: The node currently waiting for input
+            content: The user's input text
+
+        Returns:
+            True if input was delivered, False if no matching node found
+        """
+        for stream in self._streams.values():
+            if await stream.inject_input(node_id, content):
+                return True
+        return False
+
    async def get_goal_progress(self) -> dict[str, Any]:
        """
        Evaluate goal progress across all streams.
@@ -12,13 +12,13 @@ import logging
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass, field
 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 logger = logging.getLogger(__name__)


-class EventType(str, Enum):
+class EventType(StrEnum):
    """Types of events that can be published."""

    # Execution lifecycle
@@ -41,6 +41,28 @@ class EventType(str, Enum):
    STREAM_STARTED = "stream_started"
    STREAM_STOPPED = "stream_stopped"

+    # Node event-loop lifecycle
+    NODE_LOOP_STARTED = "node_loop_started"
+    NODE_LOOP_ITERATION = "node_loop_iteration"
+    NODE_LOOP_COMPLETED = "node_loop_completed"
+
+    # LLM streaming observability
+    LLM_TEXT_DELTA = "llm_text_delta"
+    LLM_REASONING_DELTA = "llm_reasoning_delta"
+
+    # Tool lifecycle
+    TOOL_CALL_STARTED = "tool_call_started"
+    TOOL_CALL_COMPLETED = "tool_call_completed"
+
+    # Client I/O (client_facing=True nodes only)
+    CLIENT_OUTPUT_DELTA = "client_output_delta"
+    CLIENT_INPUT_REQUESTED = "client_input_requested"
+
+    # Internal node observability (client_facing=False nodes)
+    NODE_INTERNAL_OUTPUT = "node_internal_output"
+    NODE_INPUT_BLOCKED = "node_input_blocked"
+    NODE_STALLED = "node_stalled"
+
    # Custom events
    CUSTOM = "custom"

@@ -51,6 +73,7 @@ class AgentEvent:

    type: EventType
    stream_id: str
+    node_id: str | None = None  # Which node emitted this event
    execution_id: str | None = None
    data: dict[str, Any] = field(default_factory=dict)
    timestamp: datetime = field(default_factory=datetime.now)
@@ -61,6 +84,7 @@ class AgentEvent:
        return {
            "type": self.type.value,
            "stream_id": self.stream_id,
+            "node_id": self.node_id,
            "execution_id": self.execution_id,
            "data": self.data,
            "timestamp": self.timestamp.isoformat(),
@@ -80,6 +104,7 @@ class Subscription:
    event_types: set[EventType]
    handler: EventHandler
    filter_stream: str | None = None  # Only receive events from this stream
+    filter_node: str | None = None  # Only receive events from this node
    filter_execution: str | None = None  # Only receive events from this execution


@@ -138,6 +163,7 @@ class EventBus:
        event_types: list[EventType],
        handler: EventHandler,
        filter_stream: str | None = None,
+        filter_node: str | None = None,
        filter_execution: str | None = None,
    ) -> str:
        """
@@ -147,6 +173,7 @@ class EventBus:
            event_types: Types of events to receive
            handler: Async function to call when event occurs
            filter_stream: Only receive events from this stream
+            filter_node: Only receive events from this node
            filter_execution: Only receive events from this execution

        Returns:
@@ -160,6 +187,7 @@ class EventBus:
            event_types=set(event_types),
            handler=handler,
            filter_stream=filter_stream,
+            filter_node=filter_node,
            filter_execution=filter_execution,
        )

@@ -218,6 +246,10 @@ class EventBus:
        if subscription.filter_stream and subscription.filter_stream != event.stream_id:
            return False

+        # Check node filter
+        if subscription.filter_node and subscription.filter_node != event.node_id:
+            return False
+
        # Check execution filter
        if subscription.filter_execution and subscription.filter_execution != event.execution_id:
            return False
@@ -359,6 +391,248 @@ class EventBus:
            )
        )

+    # === NODE EVENT-LOOP PUBLISHERS ===
+
+    async def emit_node_loop_started(
+        self,
+        stream_id: str,
+        node_id: str,
+        execution_id: str | None = None,
+        max_iterations: int | None = None,
+    ) -> None:
+        """Emit node loop started event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_STARTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"max_iterations": max_iterations},
+            )
+        )
+
+    async def emit_node_loop_iteration(
+        self,
+        stream_id: str,
+        node_id: str,
+        iteration: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node loop iteration event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_ITERATION,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"iteration": iteration},
+            )
+        )
+
+    async def emit_node_loop_completed(
+        self,
+        stream_id: str,
+        node_id: str,
+        iterations: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node loop completed event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_COMPLETED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"iterations": iterations},
+            )
+        )
+
+    # === LLM STREAMING PUBLISHERS ===
+
+    async def emit_llm_text_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        snapshot: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit LLM text delta event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content, "snapshot": snapshot},
+            )
+        )
+
+    async def emit_llm_reasoning_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit LLM reasoning delta event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.LLM_REASONING_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content},
+            )
+        )
+
+    # === TOOL LIFECYCLE PUBLISHERS ===
+
+    async def emit_tool_call_started(
+        self,
+        stream_id: str,
+        node_id: str,
+        tool_use_id: str,
+        tool_name: str,
+        tool_input: dict[str, Any] | None = None,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit tool call started event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_STARTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "tool_use_id": tool_use_id,
+                    "tool_name": tool_name,
+                    "tool_input": tool_input or {},
+                },
+            )
+        )
+
+    async def emit_tool_call_completed(
+        self,
+        stream_id: str,
+        node_id: str,
+        tool_use_id: str,
+        tool_name: str,
+        result: str = "",
+        is_error: bool = False,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit tool call completed event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_COMPLETED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "tool_use_id": tool_use_id,
+                    "tool_name": tool_name,
+                    "result": result,
+                    "is_error": is_error,
+                },
+            )
+        )
+
+    # === CLIENT I/O PUBLISHERS ===
+
+    async def emit_client_output_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        snapshot: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit client output delta event (client_facing=True nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.CLIENT_OUTPUT_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content, "snapshot": snapshot},
+            )
+        )
+
+    async def emit_client_input_requested(
+        self,
+        stream_id: str,
+        node_id: str,
+        prompt: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit client input requested event (client_facing=True nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.CLIENT_INPUT_REQUESTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"prompt": prompt},
+            )
+        )
+
+    # === INTERNAL NODE PUBLISHERS ===
+
+    async def emit_node_internal_output(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node internal output event (client_facing=False nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_INTERNAL_OUTPUT,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content},
+            )
+        )
+
+    async def emit_node_stalled(
+        self,
+        stream_id: str,
+        node_id: str,
+        reason: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node stalled event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_STALLED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"reason": reason},
+            )
+        )
+
+    async def emit_node_input_blocked(
+        self,
+        stream_id: str,
+        node_id: str,
+        prompt: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node input blocked event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_INPUT_BLOCKED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"prompt": prompt},
+            )
+        )
+
    # === QUERY OPERATIONS ===

    def get_history(
@@ -410,6 +684,7 @@ class EventBus:
        self,
        event_type: EventType,
        stream_id: str | None = None,
+        node_id: str | None = None,
        execution_id: str | None = None,
        timeout: float | None = None,
    ) -> AgentEvent | None:
@@ -419,6 +694,7 @@ class EventBus:
        Args:
            event_type: Type of event to wait for
            stream_id: Filter by stream
+            node_id: Filter by node
            execution_id: Filter by execution
            timeout: Maximum time to wait (seconds)

@@ -438,6 +714,7 @@ class EventBus:
            event_types=[event_type],
            handler=handler,
            filter_stream=stream_id,
+            filter_node=node_id,
            filter_execution=execution_id,
        )

@@ -153,6 +153,7 @@ class ExecutionStream:
        # Execution tracking
        self._active_executions: dict[str, ExecutionContext] = {}
        self._execution_tasks: dict[str, asyncio.Task] = {}
+        self._active_executors: dict[str, GraphExecutor] = {}
        self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
        self._execution_result_times: dict[str, float] = {}
        self._completion_events: dict[str, asyncio.Event] = {}
@@ -237,6 +238,21 @@ class ExecutionStream:
                )
            )

+    async def inject_input(self, node_id: str, content: str) -> bool:
+        """Inject user input into a running client-facing EventLoopNode.
+
+        Searches active executors for a node matching ``node_id`` and calls
+        its ``inject_event()`` method to unblock ``_await_user_input()``.
+
+        Returns True if input was delivered, False otherwise.
+        """
+        for executor in self._active_executors.values():
+            node = executor.node_registry.get(node_id)
+            if node is not None and hasattr(node, "inject_event"):
+                await node.inject_event(content)
+                return True
+        return False
+
    async def execute(
        self,
        input_data: dict[str, Any],
@@ -314,13 +330,21 @@ class ExecutionStream:
                # Create runtime adapter for this execution
                runtime_adapter = StreamRuntimeAdapter(self._runtime, execution_id)

-                # Create executor for this execution
+                # Create executor for this execution.
+                # Scope storage by execution_id so each execution gets
+                # fresh conversations and spillover directories.
+                exec_storage = self._storage.base_path / "sessions" / execution_id
                executor = GraphExecutor(
                    runtime=runtime_adapter,
                    llm=self._llm,
                    tools=self._tools,
                    tool_executor=self._tool_executor,
+                    event_bus=self._event_bus,
+                    stream_id=self.stream_id,
+                    storage_path=exec_storage,
                )
+                # Track executor so inject_input() can reach EventLoopNode instances
+                self._active_executors[execution_id] = executor

                # Create modified graph with entry point
                # We need to override the entry_node to use our entry point
@@ -334,6 +358,9 @@ class ExecutionStream:
                    session_state=ctx.session_state,
                )

+                # Clean up executor reference
+                self._active_executors.pop(execution_id, None)
+
                # Store result with retention
                self._record_execution_result(execution_id, result)

@@ -11,13 +11,13 @@ import asyncio
 import logging
 import time
 from dataclasses import dataclass, field
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 logger = logging.getLogger(__name__)


-class IsolationLevel(str, Enum):
+class IsolationLevel(StrEnum):
    """State isolation level for concurrent executions."""

    ISOLATED = "isolated"  # Private state per execution
@@ -25,7 +25,7 @@ class IsolationLevel(str, Enum):
    SYNCHRONIZED = "synchronized"  # Shared with write locks (strong consistency)


-class StateScope(str, Enum):
+class StateScope(StrEnum):
    """Scope for state operations."""

    EXECUTION = "execution"  # Local to a single execution
@@ -10,13 +10,13 @@ This is MORE important than actions because:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, computed_field


-class DecisionType(str, Enum):
+class DecisionType(StrEnum):
    """Types of decisions an agent can make."""

    TOOL_SELECTION = "tool_selection"  # Which tool to use
@@ -6,7 +6,7 @@ summaries and metrics that Builder needs to understand what happened.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, computed_field
@@ -14,7 +14,7 @@ from pydantic import BaseModel, Field, computed_field
 from framework.schemas.decision import Decision, Outcome


-class RunStatus(str, Enum):
+class RunStatus(StrEnum):
    """Status of a run."""

    RUNNING = "running"
@@ -167,14 +167,18 @@ class ConcurrentStorage:
            run: Run to save
            immediate: If True, save immediately (bypasses batching)
        """
+        # Invalidate summary cache since the run data is changing
+        # This ensures load_summary() fetches fresh data after the save
+        self._cache.pop(f"summary:{run.id}", None)
+
        if immediate or not self._running:
            await self._save_run_locked(run)
+            # Update cache only after successful immediate write
+            self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
        else:
+            # For batched writes, cache will be updated in _flush_batch after successful write
            await self._write_queue.put(("run", run))

-        # Update cache
-        self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
-
    async def _save_run_locked(self, run: Run) -> None:
        """Save a run with file locking, including index locks."""
        lock_key = f"run:{run.id}"
@@ -363,8 +367,12 @@ class ConcurrentStorage:
            try:
                if item_type == "run":
                    await self._save_run_locked(item)
+                    # Update cache only after successful batched write
+                    # This fixes the race condition where cache was updated before write completed
+                    self._cache[f"run:{item.id}"] = CacheEntry(item, time.time())
            except Exception as e:
                logger.error(f"Failed to save {item_type}: {e}")
+                # Cache is NOT updated on failure - prevents stale/inconsistent cache state

    async def _flush_pending(self) -> None:
        """Flush all pending writes."""
@@ -26,9 +26,9 @@ Testing tools are integrated into the main agent_builder_server.py:
 ## CLI Commands

 ```bash
-python -m framework test-run <agent_path> --goal <goal_id>
-python -m framework test-debug <goal_id> <test_id>
-python -m framework test-list <agent_path> --goal <goal_id>
+uv run python -m framework test-run <agent_path> --goal <goal_id>
+uv run python -m framework test-debug <goal_id> <test_id>
+uv run python -m framework test-list <agent_path> --goal <goal_id>
 ```
 """

@@ -6,13 +6,13 @@ programmatic/MCP-based approval.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ApprovalAction(str, Enum):
+class ApprovalAction(StrEnum):
    """Actions a user can take on a generated test."""

    APPROVE = "approve"  # Accept as-is
@@ -24,7 +24,7 @@ def _get_api_key():
    # 1. Try CredentialStoreAdapter for Anthropic
    try:
        from aden_tools.credentials import CredentialStoreAdapter
-        creds = CredentialStoreAdapter.with_env_storage()
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except (ImportError, KeyError):
@@ -57,7 +57,7 @@ def _get_api_key():
    """Get API key from CredentialStoreAdapter or environment."""
    try:
        from aden_tools.credentials import CredentialStoreAdapter
-        creds = CredentialStoreAdapter.with_env_storage()
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except (ImportError, KeyError):
@@ -6,13 +6,13 @@ but require mandatory user approval before being stored.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ApprovalStatus(str, Enum):
+class ApprovalStatus(StrEnum):
    """Status of user approval for a generated test."""

    PENDING = "pending"  # Awaiting user review
@@ -21,7 +21,7 @@ class ApprovalStatus(str, Enum):
    REJECTED = "rejected"  # User declined (with reason)


-class TestType(str, Enum):
+class TestType(StrEnum):
    """Type of test based on what it validates."""

    __test__ = False  # Not a pytest test class
@@ -6,13 +6,13 @@ categorization for guiding iteration strategy.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ErrorCategory(str, Enum):
+class ErrorCategory(StrEnum):
    """
    Category of test failure for guiding iteration.

@@ -0,0 +1,518 @@
+import logging
+import time
+
+from textual.app import App, ComposeResult
+from textual.binding import Binding
+from textual.containers import Container, Horizontal, Vertical
+from textual.widgets import Footer, Label
+
+from framework.runtime.agent_runtime import AgentRuntime
+from framework.runtime.event_bus import AgentEvent, EventType
+from framework.tui.widgets.chat_repl import ChatRepl
+from framework.tui.widgets.graph_view import GraphOverview
+from framework.tui.widgets.log_pane import LogPane
+
+
+class StatusBar(Container):
+    """Live status bar showing agent execution state."""
+
+    DEFAULT_CSS = """
+    StatusBar {
+        dock: top;
+        height: 1;
+        background: $panel;
+        color: $text;
+        padding: 0 1;
+    }
+    StatusBar > Label {
+        width: 100%;
+    }
+    """
+
+    def __init__(self, graph_id: str = ""):
+        super().__init__()
+        self._graph_id = graph_id
+        self._state = "idle"
+        self._active_node: str | None = None
+        self._node_detail: str = ""
+        self._start_time: float | None = None
+        self._final_elapsed: float | None = None
+
+    def compose(self) -> ComposeResult:
+        yield Label(id="status-content")
+
+    def on_mount(self) -> None:
+        self._refresh()
+        self.set_interval(1.0, self._refresh)
+
+    def _format_elapsed(self, seconds: float) -> str:
+        total = int(seconds)
+        hours, remainder = divmod(total, 3600)
+        mins, secs = divmod(remainder, 60)
+        if hours:
+            return f"{hours}:{mins:02d}:{secs:02d}"
+        return f"{mins}:{secs:02d}"
+
+    def _refresh(self) -> None:
+        parts: list[str] = []
+
+        if self._graph_id:
+            parts.append(f"[bold]{self._graph_id}[/bold]")
+
+        if self._state == "idle":
+            parts.append("[dim]○ idle[/dim]")
+        elif self._state == "running":
+            parts.append("[bold green]● running[/bold green]")
+        elif self._state == "completed":
+            parts.append("[green]✓ done[/green]")
+        elif self._state == "failed":
+            parts.append("[bold red]✗ failed[/bold red]")
+
+        if self._active_node:
+            node_str = f"[cyan]{self._active_node}[/cyan]"
+            if self._node_detail:
+                node_str += f" [dim]({self._node_detail})[/dim]"
+            parts.append(node_str)
+
+        if self._state == "running" and self._start_time:
+            parts.append(f"[dim]{self._format_elapsed(time.time() - self._start_time)}[/dim]")
+        elif self._final_elapsed is not None:
+            parts.append(f"[dim]{self._format_elapsed(self._final_elapsed)}[/dim]")
+
+        try:
+            label = self.query_one("#status-content", Label)
+            label.update(" │ ".join(parts))
+        except Exception:
+            pass
+
+    def set_graph_id(self, graph_id: str) -> None:
+        self._graph_id = graph_id
+        self._refresh()
+
+    def set_running(self, entry_node: str = "") -> None:
+        self._state = "running"
+        self._active_node = entry_node or None
+        self._node_detail = ""
+        self._start_time = time.time()
+        self._final_elapsed = None
+        self._refresh()
+
+    def set_completed(self) -> None:
+        self._state = "completed"
+        if self._start_time:
+            self._final_elapsed = time.time() - self._start_time
+        self._active_node = None
+        self._node_detail = ""
+        self._start_time = None
+        self._refresh()
+
+    def set_failed(self, error: str = "") -> None:
+        self._state = "failed"
+        if self._start_time:
+            self._final_elapsed = time.time() - self._start_time
+        self._node_detail = error[:40] if error else ""
+        self._start_time = None
+        self._refresh()
+
+    def set_active_node(self, node_id: str, detail: str = "") -> None:
+        self._active_node = node_id
+        self._node_detail = detail
+        self._refresh()
+
+    def set_node_detail(self, detail: str) -> None:
+        self._node_detail = detail
+        self._refresh()
+
+
+class AdenTUI(App):
+    TITLE = "Aden TUI Dashboard"
+    COMMAND_PALETTE_BINDING = "ctrl+o"
+    CSS = """
+    Screen {
+        layout: vertical;
+        background: $surface;
+    }
+
+    #left-pane {
+        width: 60%;
+        height: 100%;
+        layout: vertical;
+        background: $surface;
+    }
+
+    GraphOverview {
+        height: 40%;
+        background: $panel;
+        padding: 0;
+    }
+
+    LogPane {
+        height: 60%;
+        background: $surface;
+        padding: 0;
+        margin-bottom: 1;
+    }
+
+    ChatRepl {
+        width: 40%;
+        height: 100%;
+        background: $panel;
+        border-left: tall $primary;
+        padding: 0;
+    }
+
+    #chat-history {
+        height: 1fr;
+        width: 100%;
+        background: $surface;
+        border: none;
+        scrollbar-background: $panel;
+        scrollbar-color: $primary;
+    }
+
+    RichLog {
+        background: $surface;
+        border: none;
+        scrollbar-background: $panel;
+        scrollbar-color: $primary;
+    }
+
+    Input {
+        background: $surface;
+        border: tall $primary;
+        margin-top: 1;
+    }
+
+    Input:focus {
+        border: tall $accent;
+    }
+
+    StatusBar {
+        background: $panel;
+        color: $text;
+        height: 1;
+        padding: 0 1;
+    }
+
+    Footer {
+        background: $panel;
+        color: $text-muted;
+    }
+    """
+
+    BINDINGS = [
+        Binding("q", "quit", "Quit"),
+        Binding("ctrl+s", "screenshot", "Screenshot (SVG)", show=True, priority=True),
+        Binding("tab", "focus_next", "Next Panel", show=True),
+        Binding("shift+tab", "focus_previous", "Previous Panel", show=False),
+    ]
+
+    def __init__(self, runtime: AgentRuntime):
+        super().__init__()
+
+        self.runtime = runtime
+        self.log_pane = LogPane()
+        self.graph_view = GraphOverview(runtime)
+        self.chat_repl = ChatRepl(runtime)
+        self.status_bar = StatusBar(graph_id=runtime.graph.id)
+        self.is_ready = False
+
+    def compose(self) -> ComposeResult:
+        yield self.status_bar
+
+        yield Horizontal(
+            Vertical(
+                self.log_pane,
+                self.graph_view,
+                id="left-pane",
+            ),
+            self.chat_repl,
+        )
+
+        yield Footer()
+
+    async def on_mount(self) -> None:
+        """Called when app starts."""
+        self.title = "Aden TUI Dashboard"
+
+        # Add logging setup
+        self._setup_logging_queue()
+
+        # Set ready immediately so _poll_logs can process messages
+        self.is_ready = True
+
+        # Add event subscription with delay to ensure TUI is fully initialized
+        self.call_later(self._init_runtime_connection)
+
+        # Delay initial log messages until layout is fully rendered
+        def write_initial_logs():
+            logging.info("TUI Dashboard initialized successfully")
+            logging.info("Waiting for agent execution to start...")
+
+        # Wait for layout to be fully rendered before writing logs
+        self.set_timer(0.2, write_initial_logs)
+
+    def _setup_logging_queue(self) -> None:
+        """Setup a thread-safe queue for logs."""
+        try:
+            import queue
+            from logging.handlers import QueueHandler
+
+            self.log_queue = queue.Queue()
+            self.queue_handler = QueueHandler(self.log_queue)
+            self.queue_handler.setLevel(logging.INFO)
+
+            # Get root logger
+            root_logger = logging.getLogger()
+
+            # Remove ALL existing handlers to prevent stdout output
+            # This is critical - StreamHandlers cause text to appear in header
+            for handler in root_logger.handlers[:]:
+                root_logger.removeHandler(handler)
+
+            # Add ONLY our queue handler
+            root_logger.addHandler(self.queue_handler)
+            root_logger.setLevel(logging.INFO)
+
+            # Suppress LiteLLM logging completely
+            litellm_logger = logging.getLogger("LiteLLM")
+            litellm_logger.setLevel(logging.CRITICAL)  # Only show critical errors
+            litellm_logger.propagate = False  # Don't propagate to root logger
+
+            # Start polling
+            self.set_interval(0.1, self._poll_logs)
+        except Exception:
+            pass
+
+    def _poll_logs(self) -> None:
+        """Poll the log queue and update UI."""
+        if not self.is_ready:
+            return
+
+        try:
+            while not self.log_queue.empty():
+                record = self.log_queue.get_nowait()
+                # Filter out framework/library logs
+                if record.name.startswith(("textual", "LiteLLM", "litellm")):
+                    continue
+
+                self.log_pane.write_python_log(record)
+        except Exception:
+            pass
+
+    _EVENT_TYPES = [
+        EventType.LLM_TEXT_DELTA,
+        EventType.CLIENT_OUTPUT_DELTA,
+        EventType.TOOL_CALL_STARTED,
+        EventType.TOOL_CALL_COMPLETED,
+        EventType.EXECUTION_STARTED,
+        EventType.EXECUTION_COMPLETED,
+        EventType.EXECUTION_FAILED,
+        EventType.NODE_LOOP_STARTED,
+        EventType.NODE_LOOP_ITERATION,
+        EventType.NODE_LOOP_COMPLETED,
+        EventType.CLIENT_INPUT_REQUESTED,
+        EventType.NODE_STALLED,
+        EventType.GOAL_PROGRESS,
+        EventType.GOAL_ACHIEVED,
+        EventType.CONSTRAINT_VIOLATION,
+        EventType.STATE_CHANGED,
+        EventType.NODE_INPUT_BLOCKED,
+    ]
+
+    _LOG_PANE_EVENTS = frozenset(_EVENT_TYPES) - {
+        EventType.LLM_TEXT_DELTA,
+        EventType.CLIENT_OUTPUT_DELTA,
+    }
+
+    async def _init_runtime_connection(self) -> None:
+        """Subscribe to runtime events with an async handler."""
+        try:
+            self._subscription_id = self.runtime.subscribe_to_events(
+                event_types=self._EVENT_TYPES,
+                handler=self._handle_event,
+            )
+        except Exception:
+            pass
+
+    async def _handle_event(self, event: AgentEvent) -> None:
+        """Called from the agent thread — bridge to Textual's main thread."""
+        try:
+            self.call_from_thread(self._route_event, event)
+        except Exception:
+            pass
+
+    def _route_event(self, event: AgentEvent) -> None:
+        """Route incoming events to widgets. Runs on Textual's main thread."""
+        if not self.is_ready:
+            return
+
+        try:
+            et = event.type
+
+            # --- Chat REPL events ---
+            if et in (EventType.LLM_TEXT_DELTA, EventType.CLIENT_OUTPUT_DELTA):
+                self.chat_repl.handle_text_delta(
+                    event.data.get("content", ""),
+                    event.data.get("snapshot", ""),
+                )
+            elif et == EventType.TOOL_CALL_STARTED:
+                self.chat_repl.handle_tool_started(
+                    event.data.get("tool_name", "unknown"),
+                    event.data.get("tool_input", {}),
+                )
+            elif et == EventType.TOOL_CALL_COMPLETED:
+                self.chat_repl.handle_tool_completed(
+                    event.data.get("tool_name", "unknown"),
+                    event.data.get("result", ""),
+                    event.data.get("is_error", False),
+                )
+            elif et == EventType.EXECUTION_COMPLETED:
+                self.chat_repl.handle_execution_completed(event.data.get("output", {}))
+            elif et == EventType.EXECUTION_FAILED:
+                self.chat_repl.handle_execution_failed(event.data.get("error", "Unknown error"))
+            elif et == EventType.CLIENT_INPUT_REQUESTED:
+                self.chat_repl.handle_input_requested(
+                    event.node_id or event.data.get("node_id", ""),
+                )
+
+            # --- Graph view events ---
+            if et in (
+                EventType.EXECUTION_STARTED,
+                EventType.EXECUTION_COMPLETED,
+                EventType.EXECUTION_FAILED,
+            ):
+                self.graph_view.update_execution(event)
+
+            if et == EventType.NODE_LOOP_STARTED:
+                self.graph_view.handle_node_loop_started(event.node_id or "")
+            elif et == EventType.NODE_LOOP_ITERATION:
+                self.graph_view.handle_node_loop_iteration(
+                    event.node_id or "",
+                    event.data.get("iteration", 0),
+                )
+            elif et == EventType.NODE_LOOP_COMPLETED:
+                self.graph_view.handle_node_loop_completed(event.node_id or "")
+            elif et == EventType.NODE_STALLED:
+                self.graph_view.handle_stalled(
+                    event.node_id or "",
+                    event.data.get("reason", ""),
+                )
+
+            if et == EventType.TOOL_CALL_STARTED:
+                self.graph_view.handle_tool_call(
+                    event.node_id or "",
+                    event.data.get("tool_name", "unknown"),
+                    started=True,
+                )
+            elif et == EventType.TOOL_CALL_COMPLETED:
+                self.graph_view.handle_tool_call(
+                    event.node_id or "",
+                    event.data.get("tool_name", "unknown"),
+                    started=False,
+                )
+
+            # --- Status bar events ---
+            if et == EventType.EXECUTION_STARTED:
+                entry_node = event.data.get("entry_node") or (
+                    self.runtime.graph.entry_node if self.runtime else ""
+                )
+                self.status_bar.set_running(entry_node)
+            elif et == EventType.EXECUTION_COMPLETED:
+                self.status_bar.set_completed()
+            elif et == EventType.EXECUTION_FAILED:
+                self.status_bar.set_failed(event.data.get("error", ""))
+            elif et == EventType.NODE_LOOP_STARTED:
+                self.status_bar.set_active_node(event.node_id or "", "thinking...")
+            elif et == EventType.NODE_LOOP_ITERATION:
+                self.status_bar.set_node_detail(f"step {event.data.get('iteration', '?')}")
+            elif et == EventType.TOOL_CALL_STARTED:
+                self.status_bar.set_node_detail(f"{event.data.get('tool_name', '')}...")
+            elif et == EventType.TOOL_CALL_COMPLETED:
+                self.status_bar.set_node_detail("thinking...")
+            elif et == EventType.NODE_STALLED:
+                self.status_bar.set_node_detail(f"stalled: {event.data.get('reason', '')}")
+
+            # --- Log pane events ---
+            if et in self._LOG_PANE_EVENTS:
+                self.log_pane.write_event(event)
+        except Exception:
+            pass
+
+    def save_screenshot(self, filename: str | None = None) -> str:
+        """Save a screenshot of the current screen as SVG (viewable in browsers).
+
+        Args:
+            filename: Optional filename for the screenshot. If None, generates timestamp-based name.
+
+        Returns:
+            Path to the saved SVG file.
+        """
+        from datetime import datetime
+        from pathlib import Path
+
+        # Create screenshots directory
+        screenshots_dir = Path("screenshots")
+        screenshots_dir.mkdir(exist_ok=True)
+
+        # Generate filename if not provided
+        if filename is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"tui_screenshot_{timestamp}.svg"
+
+        # Ensure .svg extension
+        if not filename.endswith(".svg"):
+            filename += ".svg"
+
+        # Full path
+        filepath = screenshots_dir / filename
+
+        # Temporarily hide borders for cleaner screenshot
+        chat_widget = self.query_one(ChatRepl)
+        original_chat_border = chat_widget.styles.border_left
+        chat_widget.styles.border_left = ("none", "transparent")
+
+        # Hide all Input widget borders
+        input_widgets = self.query("Input")
+        original_input_borders = []
+        for input_widget in input_widgets:
+            original_input_borders.append(input_widget.styles.border)
+            input_widget.styles.border = ("none", "transparent")
+
+        try:
+            # Get SVG data from Textual and save it
+            svg_data = self.export_screenshot()
+            filepath.write_text(svg_data, encoding="utf-8")
+        finally:
+            # Restore the original borders
+            chat_widget.styles.border_left = original_chat_border
+            for i, input_widget in enumerate(input_widgets):
+                input_widget.styles.border = original_input_borders[i]
+
+        return str(filepath)
+
+    def action_screenshot(self) -> None:
+        """Take a screenshot (bound to Ctrl+S)."""
+        try:
+            filepath = self.save_screenshot()
+            self.notify(
+                f"Screenshot saved: {filepath} (SVG - open in browser)",
+                severity="information",
+                timeout=5,
+            )
+        except Exception as e:
+            self.notify(f"Screenshot failed: {e}", severity="error", timeout=5)
+
+    async def on_unmount(self) -> None:
+        """Cleanup on app shutdown."""
+        self.is_ready = False
+        try:
+            if hasattr(self, "_subscription_id"):
+                self.runtime.unsubscribe_from_events(self._subscription_id)
+        except Exception:
+            pass
+        try:
+            if hasattr(self, "queue_handler"):
+                logging.getLogger().removeHandler(self.queue_handler)
+        except Exception:
+            pass
@@ -0,0 +1,303 @@
+"""
+Chat / REPL Widget - Uses RichLog for append-only, selection-safe display.
+
+Streaming display approach:
+- The processing-indicator Label is used as a live status bar during streaming
+  (Label.update() replaces text in-place, unlike RichLog which is append-only).
+- On EXECUTION_COMPLETED, the final output is written to RichLog as permanent history.
+- Tool events are written directly to RichLog as discrete status lines.
+
+Client-facing input:
+- When a client_facing=True EventLoopNode emits CLIENT_INPUT_REQUESTED, the
+  ChatRepl transitions to "waiting for input" state: input is re-enabled and
+  subsequent submissions are routed to runtime.inject_input() instead of
+  starting a new execution.
+"""
+
+import asyncio
+import threading
+from typing import Any
+
+from textual.app import ComposeResult
+from textual.containers import Vertical
+from textual.widgets import Input, Label, RichLog
+
+from framework.runtime.agent_runtime import AgentRuntime
+
+
+class ChatRepl(Vertical):
+    """Widget for interactive chat/REPL."""
+
+    DEFAULT_CSS = """
+    ChatRepl {
+        width: 100%;
+        height: 100%;
+        layout: vertical;
+    }
+
+    ChatRepl > RichLog {
+        width: 100%;
+        height: 1fr;
+        background: $surface;
+        border: none;
+        scrollbar-background: $panel;
+        scrollbar-color: $primary;
+    }
+
+    ChatRepl > #processing-indicator {
+        width: 100%;
+        height: 1;
+        background: $primary 20%;
+        color: $text;
+        text-style: bold;
+        display: none;
+    }
+
+    ChatRepl > Input {
+        width: 100%;
+        height: auto;
+        dock: bottom;
+        background: $surface;
+        border: tall $primary;
+        margin-top: 1;
+    }
+
+    ChatRepl > Input:focus {
+        border: tall $accent;
+    }
+    """
+
+    def __init__(self, runtime: AgentRuntime):
+        super().__init__()
+        self.runtime = runtime
+        self._current_exec_id: str | None = None
+        self._streaming_snapshot: str = ""
+        self._waiting_for_input: bool = False
+        self._input_node_id: str | None = None
+
+        # Dedicated event loop for agent execution.
+        # Keeps blocking runtime code (LLM calls, MCP tools) off
+        # the Textual event loop so the UI stays responsive.
+        self._agent_loop = asyncio.new_event_loop()
+        self._agent_thread = threading.Thread(
+            target=self._agent_loop.run_forever,
+            daemon=True,
+            name="agent-execution",
+        )
+        self._agent_thread.start()
+
+    def compose(self) -> ComposeResult:
+        yield RichLog(id="chat-history", highlight=True, markup=True, auto_scroll=False, wrap=True)
+        yield Label("Agent is processing...", id="processing-indicator")
+        yield Input(placeholder="Enter input for agent...", id="chat-input")
+
+    def _write_history(self, content: str) -> None:
+        """Write to chat history, only auto-scrolling if user is at the bottom."""
+        history = self.query_one("#chat-history", RichLog)
+        was_at_bottom = history.is_vertical_scroll_end
+        history.write(content)
+        if was_at_bottom:
+            history.scroll_end(animate=False)
+
+    def on_mount(self) -> None:
+        """Add welcome message when widget mounts."""
+        history = self.query_one("#chat-history", RichLog)
+        history.write("[bold cyan]Chat REPL Ready[/bold cyan] — Type your input below\n")
+
+    async def on_input_submitted(self, message: Input.Submitted) -> None:
+        """Handle input submission — either start new execution or inject input."""
+        user_input = message.value.strip()
+        if not user_input:
+            return
+
+        # Client-facing input: route to the waiting node
+        if self._waiting_for_input and self._input_node_id:
+            self._write_history(f"[bold green]You:[/bold green] {user_input}")
+            message.input.value = ""
+
+            # Disable input while agent processes the response
+            chat_input = self.query_one("#chat-input", Input)
+            chat_input.disabled = True
+            chat_input.placeholder = "Enter input for agent..."
+            self._waiting_for_input = False
+
+            indicator = self.query_one("#processing-indicator", Label)
+            indicator.update("Thinking...")
+
+            node_id = self._input_node_id
+            self._input_node_id = None
+
+            try:
+                future = asyncio.run_coroutine_threadsafe(
+                    self.runtime.inject_input(node_id, user_input),
+                    self._agent_loop,
+                )
+                await asyncio.wrap_future(future)
+            except Exception as e:
+                self._write_history(f"[bold red]Error delivering input:[/bold red] {e}")
+            return
+
+        # Double-submit guard: reject input while an execution is in-flight
+        if self._current_exec_id is not None:
+            self._write_history("[dim]Agent is still running — please wait.[/dim]")
+            return
+
+        indicator = self.query_one("#processing-indicator", Label)
+
+        # Append user message and clear input
+        self._write_history(f"[bold green]You:[/bold green] {user_input}")
+        message.input.value = ""
+
+        try:
+            # Get entry point
+            entry_points = self.runtime.get_entry_points()
+            if not entry_points:
+                self._write_history("[bold red]Error:[/bold red] No entry points")
+                return
+
+            # Determine the input key from the entry node
+            entry_point = entry_points[0]
+            entry_node = self.runtime.graph.get_node(entry_point.entry_node)
+
+            if entry_node and entry_node.input_keys:
+                input_key = entry_node.input_keys[0]
+            else:
+                input_key = "input"
+
+            # Reset streaming state
+            self._streaming_snapshot = ""
+
+            # Show processing indicator
+            indicator.update("Thinking...")
+            indicator.display = True
+
+            # Disable input while the agent is working
+            chat_input = self.query_one("#chat-input", Input)
+            chat_input.disabled = True
+
+            # Submit execution to the dedicated agent loop so blocking
+            # runtime code (LLM, MCP tools) never touches Textual's loop.
+            # trigger() returns immediately with an exec_id; the heavy
+            # execution task runs entirely on the agent thread.
+            future = asyncio.run_coroutine_threadsafe(
+                self.runtime.trigger(
+                    entry_point_id=entry_point.id,
+                    input_data={input_key: user_input},
+                ),
+                self._agent_loop,
+            )
+            # wrap_future lets us await without blocking Textual's loop
+            self._current_exec_id = await asyncio.wrap_future(future)
+
+        except Exception as e:
+            indicator.display = False
+            self._current_exec_id = None
+            # Re-enable input on error
+            chat_input = self.query_one("#chat-input", Input)
+            chat_input.disabled = False
+            self._write_history(f"[bold red]Error:[/bold red] {e}")
+
+    # -- Event handlers called by app.py _handle_event --
+
+    def handle_text_delta(self, content: str, snapshot: str) -> None:
+        """Handle a streaming text token from the LLM."""
+        self._streaming_snapshot = snapshot
+
+        # Show a truncated live preview in the indicator label
+        indicator = self.query_one("#processing-indicator", Label)
+        preview = snapshot[-80:] if len(snapshot) > 80 else snapshot
+        # Replace newlines for single-line display
+        preview = preview.replace("\n", " ")
+        indicator.update(
+            f"Thinking: ...{preview}" if len(snapshot) > 80 else f"Thinking: {preview}"
+        )
+
+    def handle_tool_started(self, tool_name: str, tool_input: dict[str, Any]) -> None:
+        """Handle a tool call starting."""
+        # Update indicator to show tool activity
+        indicator = self.query_one("#processing-indicator", Label)
+        indicator.update(f"Using tool: {tool_name}...")
+
+        # Write a discrete status line to history
+        self._write_history(f"[dim]Tool: {tool_name}[/dim]")
+
+    def handle_tool_completed(self, tool_name: str, result: str, is_error: bool) -> None:
+        """Handle a tool call completing."""
+        result_str = str(result)
+        preview = result_str[:200] + "..." if len(result_str) > 200 else result_str
+        preview = preview.replace("\n", " ")
+
+        if is_error:
+            self._write_history(f"[dim red]Tool {tool_name} error: {preview}[/dim red]")
+        else:
+            self._write_history(f"[dim]Tool {tool_name} result: {preview}[/dim]")
+
+        # Restore thinking indicator
+        indicator = self.query_one("#processing-indicator", Label)
+        indicator.update("Thinking...")
+
+    def handle_execution_completed(self, output: dict[str, Any]) -> None:
+        """Handle execution finishing successfully."""
+        indicator = self.query_one("#processing-indicator", Label)
+        indicator.display = False
+
+        # Write the final streaming snapshot to permanent history (if any)
+        if self._streaming_snapshot:
+            self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
+        else:
+            output_str = str(output.get("output_string", output))
+            self._write_history(f"[bold blue]Agent:[/bold blue] {output_str}")
+        self._write_history("")  # separator
+
+        self._current_exec_id = None
+        self._streaming_snapshot = ""
+        self._waiting_for_input = False
+        self._input_node_id = None
+
+        # Re-enable input
+        chat_input = self.query_one("#chat-input", Input)
+        chat_input.disabled = False
+        chat_input.placeholder = "Enter input for agent..."
+        chat_input.focus()
+
+    def handle_execution_failed(self, error: str) -> None:
+        """Handle execution failing."""
+        indicator = self.query_one("#processing-indicator", Label)
+        indicator.display = False
+
+        self._write_history(f"[bold red]Error:[/bold red] {error}")
+        self._write_history("")  # separator
+
+        self._current_exec_id = None
+        self._streaming_snapshot = ""
+        self._waiting_for_input = False
+        self._input_node_id = None
+
+        # Re-enable input
+        chat_input = self.query_one("#chat-input", Input)
+        chat_input.disabled = False
+        chat_input.placeholder = "Enter input for agent..."
+        chat_input.focus()
+
+    def handle_input_requested(self, node_id: str) -> None:
+        """Handle a client-facing node requesting user input.
+
+        Transitions to 'waiting for input' state: flushes the current
+        streaming snapshot to history, re-enables the input widget,
+        and sets a flag so the next submission routes to inject_input().
+        """
+        # Flush accumulated streaming text as agent output
+        if self._streaming_snapshot:
+            self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
+            self._streaming_snapshot = ""
+
+        self._waiting_for_input = True
+        self._input_node_id = node_id or None
+
+        indicator = self.query_one("#processing-indicator", Label)
+        indicator.update("Waiting for your input...")
+
+        chat_input = self.query_one("#chat-input", Input)
+        chat_input.disabled = False
+        chat_input.placeholder = "Type your response..."
+        chat_input.focus()
@@ -0,0 +1,194 @@
+"""
+Graph/Tree Overview Widget - Displays real agent graph structure.
+"""
+
+from textual.app import ComposeResult
+from textual.containers import Vertical
+from textual.widgets import RichLog
+
+from framework.runtime.agent_runtime import AgentRuntime
+from framework.runtime.event_bus import EventType
+
+
+class GraphOverview(Vertical):
+    """Widget to display Agent execution graph/tree with real data."""
+
+    DEFAULT_CSS = """
+    GraphOverview {
+        width: 100%;
+        height: 100%;
+        background: $panel;
+    }
+
+    GraphOverview > RichLog {
+        width: 100%;
+        height: 100%;
+        background: $panel;
+        border: none;
+        scrollbar-background: $surface;
+        scrollbar-color: $primary;
+    }
+    """
+
+    def __init__(self, runtime: AgentRuntime):
+        super().__init__()
+        self.runtime = runtime
+        self.active_node: str | None = None
+        self.execution_path: list[str] = []
+        # Per-node status strings shown next to the node in the graph display.
+        # e.g. {"planner": "thinking...", "searcher": "web_search..."}
+        self._node_status: dict[str, str] = {}
+
+    def compose(self) -> ComposeResult:
+        # Use RichLog for formatted output
+        yield RichLog(id="graph-display", highlight=True, markup=True)
+
+    def on_mount(self) -> None:
+        """Display initial graph structure."""
+        self._display_graph()
+
+    def _topo_order(self) -> list[str]:
+        """BFS from entry_node following edges."""
+        graph = self.runtime.graph
+        visited: list[str] = []
+        seen: set[str] = set()
+        queue = [graph.entry_node]
+        while queue:
+            nid = queue.pop(0)
+            if nid in seen:
+                continue
+            seen.add(nid)
+            visited.append(nid)
+            for edge in graph.get_outgoing_edges(nid):
+                if edge.target not in seen:
+                    queue.append(edge.target)
+        # Append orphan nodes not reachable from entry
+        for node in graph.nodes:
+            if node.id not in seen:
+                visited.append(node.id)
+        return visited
+
+    def _render_node_line(self, node_id: str) -> str:
+        """Render a single node with status symbol and optional status text."""
+        graph = self.runtime.graph
+        is_terminal = node_id in (graph.terminal_nodes or [])
+        is_active = node_id == self.active_node
+        is_done = node_id in self.execution_path and not is_active
+        status = self._node_status.get(node_id, "")
+
+        if is_active:
+            sym = "[bold green]●[/bold green]"
+        elif is_done:
+            sym = "[dim]✓[/dim]"
+        elif is_terminal:
+            sym = "[yellow]■[/yellow]"
+        else:
+            sym = "○"
+
+        if is_active:
+            name = f"[bold green]{node_id}[/bold green]"
+        elif is_done:
+            name = f"[dim]{node_id}[/dim]"
+        else:
+            name = node_id
+
+        suffix = f"  [italic]{status}[/italic]" if status else ""
+        return f"  {sym} {name}{suffix}"
+
+    def _render_edges(self, node_id: str) -> list[str]:
+        """Render edge connectors from this node to its targets."""
+        edges = self.runtime.graph.get_outgoing_edges(node_id)
+        if not edges:
+            return []
+        if len(edges) == 1:
+            return ["  │", "  ▼"]
+        # Fan-out: show branches
+        lines: list[str] = []
+        for i, edge in enumerate(edges):
+            connector = "└" if i == len(edges) - 1 else "├"
+            cond = ""
+            if edge.condition.value not in ("always", "on_success"):
+                cond = f" [dim]({edge.condition.value})[/dim]"
+            lines.append(f"  {connector}──▶ {edge.target}{cond}")
+        return lines
+
+    def _display_graph(self) -> None:
+        """Display the graph as an ASCII DAG with edge connectors."""
+        display = self.query_one("#graph-display", RichLog)
+        display.clear()
+
+        graph = self.runtime.graph
+        display.write(f"[bold cyan]Agent Graph:[/bold cyan] {graph.id}\n")
+
+        # Render each node in topological order with edges
+        ordered = self._topo_order()
+        for node_id in ordered:
+            display.write(self._render_node_line(node_id))
+            for edge_line in self._render_edges(node_id):
+                display.write(edge_line)
+
+        # Execution path footer
+        if self.execution_path:
+            display.write("")
+            display.write(f"[dim]Path:[/dim] {' → '.join(self.execution_path[-5:])}")
+
+    def update_active_node(self, node_id: str) -> None:
+        """Update the currently active node."""
+        self.active_node = node_id
+        if node_id not in self.execution_path:
+            self.execution_path.append(node_id)
+        self._display_graph()
+
+    def update_execution(self, event) -> None:
+        """Update the displayed node status based on execution lifecycle events."""
+        if event.type == EventType.EXECUTION_STARTED:
+            self._node_status.clear()
+            self.execution_path.clear()
+            entry_node = event.data.get("entry_node") or (
+                self.runtime.graph.entry_node if self.runtime else None
+            )
+            if entry_node:
+                self.update_active_node(entry_node)
+
+        elif event.type == EventType.EXECUTION_COMPLETED:
+            self.active_node = None
+            self._node_status.clear()
+            self._display_graph()
+
+        elif event.type == EventType.EXECUTION_FAILED:
+            error = event.data.get("error", "Unknown error")
+            if self.active_node:
+                self._node_status[self.active_node] = f"[red]FAILED: {error}[/red]"
+            self.active_node = None
+            self._display_graph()
+
+    # -- Event handlers called by app.py _handle_event --
+
+    def handle_node_loop_started(self, node_id: str) -> None:
+        """A node's event loop has started."""
+        self._node_status[node_id] = "thinking..."
+        self.update_active_node(node_id)
+
+    def handle_node_loop_iteration(self, node_id: str, iteration: int) -> None:
+        """A node advanced to a new loop iteration."""
+        self._node_status[node_id] = f"step {iteration}"
+        self._display_graph()
+
+    def handle_node_loop_completed(self, node_id: str) -> None:
+        """A node's event loop completed."""
+        self._node_status.pop(node_id, None)
+        self._display_graph()
+
+    def handle_tool_call(self, node_id: str, tool_name: str, *, started: bool) -> None:
+        """Show tool activity next to the active node."""
+        if started:
+            self._node_status[node_id] = f"{tool_name}..."
+        else:
+            # Restore to generic thinking status after tool completes
+            self._node_status[node_id] = "thinking..."
+        self._display_graph()
+
+    def handle_stalled(self, node_id: str, reason: str) -> None:
+        """Highlight a stalled node."""
+        self._node_status[node_id] = f"[red]stalled: {reason}[/red]"
+        self._display_graph()
@@ -0,0 +1,147 @@
+"""
+Log Pane Widget - Uses RichLog for reliable rendering.
+"""
+
+import logging
+from datetime import datetime
+
+from textual.app import ComposeResult
+from textual.containers import Container
+from textual.widgets import RichLog
+
+from framework.runtime.event_bus import AgentEvent, EventType
+
+
+class LogPane(Container):
+    """Widget to display logs with reliable rendering."""
+
+    _EVENT_FORMAT: dict[EventType, tuple[str, str]] = {
+        EventType.EXECUTION_STARTED: (">>", "bold cyan"),
+        EventType.EXECUTION_COMPLETED: ("<<", "bold green"),
+        EventType.EXECUTION_FAILED: ("!!", "bold red"),
+        EventType.TOOL_CALL_STARTED: ("->", "yellow"),
+        EventType.TOOL_CALL_COMPLETED: ("<-", "green"),
+        EventType.NODE_LOOP_STARTED: ("@@", "cyan"),
+        EventType.NODE_LOOP_ITERATION: ("..", "dim"),
+        EventType.NODE_LOOP_COMPLETED: ("@@", "dim"),
+        EventType.NODE_STALLED: ("!!", "bold yellow"),
+        EventType.NODE_INPUT_BLOCKED: ("!!", "yellow"),
+        EventType.GOAL_PROGRESS: ("%%", "blue"),
+        EventType.GOAL_ACHIEVED: ("**", "bold green"),
+        EventType.CONSTRAINT_VIOLATION: ("!!", "bold red"),
+        EventType.STATE_CHANGED: ("~~", "dim"),
+        EventType.CLIENT_INPUT_REQUESTED: ("??", "magenta"),
+    }
+
+    _LOG_LEVEL_COLORS = {
+        logging.DEBUG: "dim",
+        logging.INFO: "",
+        logging.WARNING: "yellow",
+        logging.ERROR: "red",
+        logging.CRITICAL: "bold red",
+    }
+
+    DEFAULT_CSS = """
+    LogPane {
+        width: 100%;
+        height: 100%;
+    }
+
+    LogPane > RichLog {
+        width: 100%;
+        height: 100%;
+        background: $surface;
+        border: none;
+        scrollbar-background: $panel;
+        scrollbar-color: $primary;
+    }
+    """
+
+    def compose(self) -> ComposeResult:
+        # RichLog is designed for log display and doesn't have TextArea's rendering issues
+        yield RichLog(id="main-log", highlight=True, markup=True, auto_scroll=False)
+
+    def write_event(self, event: AgentEvent) -> None:
+        """Format an AgentEvent with timestamp + symbol and write to the log."""
+        ts = event.timestamp.strftime("%H:%M:%S")
+        symbol, color = self._EVENT_FORMAT.get(event.type, ("--", "dim"))
+        text = self._extract_event_text(event)
+        self.write_log(f"[dim]{ts}[/dim] [{color}]{symbol} {text}[/{color}]")
+
+    def _extract_event_text(self, event: AgentEvent) -> str:
+        """Extract human-readable text from an event's data dict."""
+        et = event.type
+        data = event.data
+
+        if et == EventType.EXECUTION_STARTED:
+            return "Execution started"
+        elif et == EventType.EXECUTION_COMPLETED:
+            return "Execution completed"
+        elif et == EventType.EXECUTION_FAILED:
+            return f"Execution FAILED: {data.get('error', 'unknown')}"
+        elif et == EventType.TOOL_CALL_STARTED:
+            return f"Tool call: {data.get('tool_name', 'unknown')}"
+        elif et == EventType.TOOL_CALL_COMPLETED:
+            name = data.get("tool_name", "unknown")
+            if data.get("is_error"):
+                preview = str(data.get("result", ""))[:80]
+                return f"Tool error: {name} - {preview}"
+            return f"Tool done: {name}"
+        elif et == EventType.NODE_LOOP_STARTED:
+            return f"Node started: {event.node_id or 'unknown'}"
+        elif et == EventType.NODE_LOOP_ITERATION:
+            return f"{event.node_id or 'unknown'} iteration {data.get('iteration', '?')}"
+        elif et == EventType.NODE_LOOP_COMPLETED:
+            return f"Node done: {event.node_id or 'unknown'}"
+        elif et == EventType.NODE_STALLED:
+            reason = data.get("reason", "")
+            node = event.node_id or "unknown"
+            return f"Node stalled: {node} - {reason}" if reason else f"Node stalled: {node}"
+        elif et == EventType.NODE_INPUT_BLOCKED:
+            return f"Node input blocked: {event.node_id or 'unknown'}"
+        elif et == EventType.GOAL_PROGRESS:
+            return f"Goal progress: {data.get('progress', '?')}"
+        elif et == EventType.GOAL_ACHIEVED:
+            return "Goal achieved"
+        elif et == EventType.CONSTRAINT_VIOLATION:
+            return f"Constraint violated: {data.get('description', 'unknown')}"
+        elif et == EventType.STATE_CHANGED:
+            return f"State changed: {data.get('key', 'unknown')}"
+        elif et == EventType.CLIENT_INPUT_REQUESTED:
+            return "Waiting for user input"
+        else:
+            return f"{et.value}: {data}"
+
+    def write_python_log(self, record: logging.LogRecord) -> None:
+        """Format a Python log record with timestamp and severity color."""
+        ts = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
+        color = self._LOG_LEVEL_COLORS.get(record.levelno, "")
+        msg = record.getMessage()
+        if color:
+            self.write_log(f"[dim]{ts}[/dim] [{color}]{record.levelname}[/{color}] {msg}")
+        else:
+            self.write_log(f"[dim]{ts}[/dim] {record.levelname} {msg}")
+
+    def write_log(self, message: str) -> None:
+        """Write a log message to the log pane."""
+        try:
+            # Check if widget is mounted
+            if not self.is_mounted:
+                return
+
+            log = self.query_one("#main-log", RichLog)
+
+            # Check if log is mounted
+            if not log.is_mounted:
+                return
+
+            # Only auto-scroll if user is already at the bottom
+            was_at_bottom = log.is_vertical_scroll_end
+
+            log.write(message)
+
+            if was_at_bottom:
+                log.scroll_end(animate=False)
+
+        except Exception:
+            pass
@@ -11,16 +11,22 @@ dependencies = [
  "litellm>=1.81.0",
  "mcp>=1.0.0",
  "fastmcp>=2.0.0",
+  "textual>=1.0.0",
  "pytest>=8.0",
  "pytest-asyncio>=0.23",
  "pytest-xdist>=3.0",
+  "tools",
 ]

-# [project.optional-dependencies]
+[project.optional-dependencies]
+tui = ["textual>=0.75.0"]

 [project.scripts]
 hive = "framework.cli:main"

+[tool.uv.sources]
+tools = { workspace = true }
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
@@ -43,6 +49,7 @@ lint.select = [
  "W", # pycodestyle warnings
 ]

+lint.per-file-ignores."demos/*" = ["E501"]
 lint.isort.combine-as-imports = true
 lint.isort.known-first-party = ["framework"]
 lint.isort.section-order = [
@@ -1,10 +0,0 @@
-# Development dependencies
-r requirements.txt
-
-# Testing
-pytest>=8.0
-pytest-asyncio>=0.23
-
-# Linting & type checking
-ruff>=0.1.0
-mypy>=1.0
@@ -1,14 +0,0 @@
-# Core dependencies
-pydantic>=2.0
-anthropic>=0.40.0
-httpx>=0.27.0
-litellm>=1.81.0
-
-# MCP server dependencies
-mcp
-fastmcp
-
-# Testing (required for test framework)
-pytest>=8.0
-pytest-asyncio>=0.23
-pytest-xdist>=3.0
@@ -143,7 +143,7 @@ def main():
    logger.info("The MCP server is now ready to use!")
    logger.info("")
    logger.info(f"{Colors.BLUE}To start the MCP server manually:{Colors.NC}")
-    logger.info("  python -m framework.mcp.agent_builder_server")
+    logger.info("  uv run python -m framework.mcp.agent_builder_server")
    logger.info("")
    logger.info(f"{Colors.BLUE}MCP Configuration location:{Colors.NC}")
    logger.info(f"  {mcp_config_path}")
@@ -19,7 +19,7 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 cd "$SCRIPT_DIR"

 echo -e "${YELLOW}Step 1: Installing framework package...${NC}"
-pip install -e . || {
+uv pip install -e . || {
    echo -e "${RED}Failed to install framework package${NC}"
    exit 1
 }
@@ -27,7 +27,7 @@ echo -e "${GREEN}✓ Framework package installed${NC}"
 echo ""

 echo -e "${YELLOW}Step 2: Installing MCP dependencies...${NC}"
-pip install mcp fastmcp || {
+uv pip install mcp fastmcp || {
    echo -e "${RED}Failed to install MCP dependencies${NC}"
    exit 1
 }
@@ -59,7 +59,7 @@ fi
 echo ""

 echo -e "${YELLOW}Step 4: Testing MCP server...${NC}"
-python -c "from framework.mcp import agent_builder_server; print('✓ MCP server module loads successfully')" || {
+uv run python -c "from framework.mcp import agent_builder_server; print('✓ MCP server module loads successfully')" || {
    echo -e "${RED}Failed to import MCP server module${NC}"
    exit 1
 }
@@ -71,7 +71,7 @@ echo ""
 echo "The MCP server is now ready to use!"
 echo ""
 echo "To start the MCP server manually:"
-echo "  python -m framework.mcp.agent_builder_server"
+echo "  uv run python -m framework.mcp.agent_builder_server"
 echo ""
 echo "MCP Configuration location:"
 echo "  $SCRIPT_DIR/.mcp.json"
@@ -0,0 +1,237 @@
+"""
+Tests for client-facing fan-out and event_loop output_key overlap validation.
+
+Validates two rules added to GraphSpec.validate():
+1. Fan-out must not have multiple client_facing=True targets.
+2. Parallel event_loop nodes must have disjoint output_keys.
+"""
+
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.node import NodeSpec
+
+# ---------------------------------------------------------------------------
+# Rule 1: client_facing fan-out
+# ---------------------------------------------------------------------------
+
+
+class TestClientFacingFanOut:
+    """Fan-out to multiple client_facing=True targets must be rejected."""
+
+    def test_fan_out_two_client_facing_fails(self):
+        """Two client-facing targets on the same fan-out -> error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(id="b", name="b", description="Node b", client_facing=True),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 1
+        assert "'src'" in cf_errors[0]
+
+    def test_fan_out_one_client_facing_passes(self):
+        """Only one client-facing target -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(id="b", name="b", description="Node b", client_facing=False),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 0
+
+    def test_fan_out_zero_client_facing_passes(self):
+        """No client-facing targets at all -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a"),
+                NodeSpec(id="b", name="b", description="Node b"),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 0
+
+
+# ---------------------------------------------------------------------------
+# Rule 2: event_loop output_key overlap
+# ---------------------------------------------------------------------------
+
+
+class TestEventLoopOutputKeyOverlap:
+    """Parallel event_loop nodes with overlapping output_keys must be rejected."""
+
+    def test_overlapping_output_keys_event_loop_fails(self):
+        """Two event_loop nodes sharing an output_key -> error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="event_loop",
+                    output_keys=["status", "shared"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["result", "shared"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 1
+        assert "'shared'" in key_errors[0]
+
+    def test_disjoint_output_keys_event_loop_passes(self):
+        """Two event_loop nodes with disjoint output_keys -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="event_loop",
+                    output_keys=["status"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["result"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 0
+
+    def test_overlapping_keys_non_event_loop_no_error(self):
+        """Non-event_loop nodes with overlapping keys -> no error (last-wins OK)."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="llm_generate",
+                    output_keys=["shared"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="llm_generate",
+                    output_keys=["shared"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 0
+
+
+# ---------------------------------------------------------------------------
+# Baseline: no fan-out -> no errors from these rules
+# ---------------------------------------------------------------------------
+
+
+class TestNoFanOutUnaffected:
+    """Linear graphs should not trigger either validation rule."""
+
+    def test_no_fan_out_unaffected(self):
+        """Linear chain with client_facing and event_loop nodes -> no errors."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="a",
+            terminal_nodes=["c"],
+            nodes=[
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["x"],
+                ),
+                NodeSpec(
+                    id="c",
+                    name="c",
+                    description="Node c",
+                    client_facing=True,
+                    node_type="event_loop",
+                    output_keys=["x"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="a->b", source="a", target="b", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="b->c", source="b", target="c", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(cf_errors) == 0
+        assert len(key_errors) == 0
@@ -0,0 +1,150 @@
+"""
+Tests for ClientIO gateway (WP-9).
+
+Covers:
+- ActiveNodeClientIO: emit_output → output_stream round-trip, request_input, timeout
+- InertNodeClientIO: emit_output publishes NODE_INTERNAL_OUTPUT, request_input returns redirect
+- ClientIOGateway: factory creates correct variant
+"""
+
+import asyncio
+
+import pytest
+
+from framework.graph.client_io import (
+    ActiveNodeClientIO,
+    ClientIOGateway,
+    InertNodeClientIO,
+    NodeClientIO,
+)
+from framework.runtime.event_bus import AgentEvent, EventType
+
+_AGENT_EVENT_FIELDS = {"stream_id", "node_id", "execution_id", "correlation_id"}
+
+
+class MockEventBus:
+    """Lightweight stand-in for EventBus that records published events."""
+
+    def __init__(self) -> None:
+        self.events: list[AgentEvent] = []
+
+    async def _record(self, event_type: EventType, **kwargs) -> None:
+        agent_kwargs = {k: v for k, v in kwargs.items() if k in _AGENT_EVENT_FIELDS}
+        data = {k: v for k, v in kwargs.items() if k not in _AGENT_EVENT_FIELDS}
+        self.events.append(AgentEvent(type=event_type, **agent_kwargs, data=data))
+
+    async def emit_client_output_delta(self, **kwargs) -> None:
+        await self._record(EventType.CLIENT_OUTPUT_DELTA, **kwargs)
+
+    async def emit_client_input_requested(self, **kwargs) -> None:
+        await self._record(EventType.CLIENT_INPUT_REQUESTED, **kwargs)
+
+    async def emit_node_internal_output(self, **kwargs) -> None:
+        await self._record(EventType.NODE_INTERNAL_OUTPUT, **kwargs)
+
+    async def emit_node_input_blocked(self, **kwargs) -> None:
+        await self._record(EventType.NODE_INPUT_BLOCKED, **kwargs)
+
+
+# --- ActiveNodeClientIO tests ---
+
+
+@pytest.mark.asyncio
+async def test_active_emit_and_consume():
+    """emit_output → output_stream round-trip works correctly."""
+    bus = MockEventBus()
+    io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
+
+    await io.emit_output("Hello ")
+    await io.emit_output("World", is_final=True)
+
+    chunks = []
+    async for chunk in io.output_stream():
+        chunks.append(chunk)
+
+    assert chunks == ["Hello ", "World"]
+    assert len(bus.events) == 2
+    assert all(e.type == EventType.CLIENT_OUTPUT_DELTA for e in bus.events)
+    # Verify snapshot accumulates
+    assert bus.events[0].data["snapshot"] == "Hello "
+    assert bus.events[1].data["snapshot"] == "Hello World"
+
+
+@pytest.mark.asyncio
+async def test_active_request_input():
+    """request_input blocks until provide_input is called."""
+    bus = MockEventBus()
+    io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
+
+    async def fulfill_later():
+        await asyncio.sleep(0.01)
+        await io.provide_input("user says hi")
+
+    task = asyncio.create_task(fulfill_later())
+    result = await io.request_input(prompt="What?")
+    await task
+
+    assert result == "user says hi"
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.CLIENT_INPUT_REQUESTED
+    assert bus.events[0].data["prompt"] == "What?"
+
+
+@pytest.mark.asyncio
+async def test_active_request_input_timeout():
+    """request_input raises TimeoutError when timeout expires."""
+    io = ActiveNodeClientIO(node_id="n1")
+
+    with pytest.raises(TimeoutError):
+        await io.request_input(prompt="waiting", timeout=0.01)
+
+
+# --- InertNodeClientIO tests ---
+
+
+@pytest.mark.asyncio
+async def test_inert_emit_publishes_internal():
+    """InertNodeClientIO.emit_output publishes NODE_INTERNAL_OUTPUT."""
+    bus = MockEventBus()
+    io = InertNodeClientIO(node_id="n2", event_bus=bus)
+
+    await io.emit_output("internal log")
+
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.NODE_INTERNAL_OUTPUT
+    assert bus.events[0].data["content"] == "internal log"
+
+
+@pytest.mark.asyncio
+async def test_inert_request_input_returns_redirect():
+    """request_input returns a redirect string and publishes NODE_INPUT_BLOCKED."""
+    bus = MockEventBus()
+    io = InertNodeClientIO(node_id="n2", event_bus=bus)
+
+    result = await io.request_input(prompt="need data")
+
+    assert "internal processing node" in result
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.NODE_INPUT_BLOCKED
+    assert bus.events[0].data["prompt"] == "need data"
+
+
+# --- ClientIOGateway tests ---
+
+
+def test_gateway_creates_active_for_client_facing():
+    """ClientIOGateway.create_io returns ActiveNodeClientIO when client_facing=True."""
+    gateway = ClientIOGateway()
+    io = gateway.create_io(node_id="n1", client_facing=True)
+
+    assert isinstance(io, ActiveNodeClientIO)
+    assert isinstance(io, NodeClientIO)
+
+
+def test_gateway_creates_inert_for_internal():
+    """ClientIOGateway.create_io returns InertNodeClientIO when client_facing=False."""
+    gateway = ClientIOGateway()
+    io = gateway.create_io(node_id="n2", client_facing=False)
+
+    assert isinstance(io, InertNodeClientIO)
+    assert isinstance(io, NodeClientIO)
@@ -0,0 +1,165 @@
+"""Tests for ConcurrentStorage race condition and cache invalidation fixes."""
+
+import asyncio
+from pathlib import Path
+
+import pytest
+
+from framework.schemas.run import Run, RunMetrics, RunStatus
+from framework.storage.concurrent import ConcurrentStorage
+
+
+def create_test_run(
+    run_id: str, goal_id: str = "test-goal", status: RunStatus = RunStatus.RUNNING
+) -> Run:
+    """Create a minimal test Run object."""
+    return Run(
+        id=run_id,
+        goal_id=goal_id,
+        status=status,
+        narrative="Test run",
+        metrics=RunMetrics(
+            nodes_executed=[],
+        ),
+        decisions=[],
+        problems=[],
+    )
+
+
+@pytest.mark.asyncio
+async def test_cache_invalidation_on_save(tmp_path: Path):
+    """Test that summary cache is invalidated when a run is saved.
+
+    This tests the fix for the cache invalidation bug where load_summary()
+    would return stale data after a run was updated.
+    """
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-1"
+
+        # Create and save initial run
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=True)
+
+        # Load summary to populate the cache
+        summary = await storage.load_summary(run_id)
+        assert summary is not None
+        assert summary.status == RunStatus.RUNNING
+
+        # Update run with new status
+        run.status = RunStatus.COMPLETED
+        await storage.save_run(run, immediate=True)
+
+        # Load summary again - should get fresh data, not cached stale data
+        summary = await storage.load_summary(run_id)
+        assert summary is not None
+        assert summary.status == RunStatus.COMPLETED, (
+            "Summary cache should be invalidated on save - got stale data"
+        )
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_batched_write_cache_consistency(tmp_path: Path):
+    """Test that cache is only updated after successful batched write.
+
+    This tests the fix for the race condition where cache was updated
+    before the batched write completed.
+    """
+    storage = ConcurrentStorage(tmp_path, batch_interval=0.05)
+    await storage.start()
+
+    try:
+        run_id = "test-run-2"
+
+        # Save via batching (immediate=False)
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=False)
+
+        # Before batch flush, cache should NOT contain the run
+        # (This is the fix - previously cache was updated immediately)
+        cache_key = f"run:{run_id}"
+        assert cache_key not in storage._cache, (
+            "Cache should not be updated before batch is flushed"
+        )
+
+        # Wait for batch to flush (poll instead of fixed sleep for CI reliability)
+        for _ in range(500):  # 500 * 0.01s = 5s max
+            if cache_key in storage._cache:
+                break
+            await asyncio.sleep(0.01)
+
+        # After batch flush, cache should contain the run
+        assert cache_key in storage._cache, "Cache should be updated after batch flush"
+
+        # Verify data on disk matches cache
+        loaded_run = await storage.load_run(run_id, use_cache=False)
+        assert loaded_run is not None
+        assert loaded_run.id == run_id
+        assert loaded_run.status == RunStatus.RUNNING
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_immediate_write_updates_cache(tmp_path: Path):
+    """Test that immediate writes still update cache correctly."""
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-3"
+
+        # Save with immediate=True
+        run = create_test_run(run_id, status=RunStatus.COMPLETED)
+        await storage.save_run(run, immediate=True)
+
+        # Cache should be updated immediately for immediate writes
+        cache_key = f"run:{run_id}"
+        assert cache_key in storage._cache, "Cache should be updated after immediate write"
+
+        # Verify cached value is correct
+        cached_run = storage._cache[cache_key].value
+        assert cached_run.id == run_id
+        assert cached_run.status == RunStatus.COMPLETED
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_summary_cache_invalidated_on_multiple_saves(tmp_path: Path):
+    """Test that summary cache is invalidated on each save, not just the first."""
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-4"
+
+        # First save
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=True)
+
+        # Load summary to cache it
+        summary1 = await storage.load_summary(run_id)
+        assert summary1.status == RunStatus.RUNNING
+
+        # Second save with new status
+        run.status = RunStatus.RUNNING
+        await storage.save_run(run, immediate=True)
+
+        # Load summary - should be fresh
+        summary2 = await storage.load_summary(run_id)
+        assert summary2.status == RunStatus.RUNNING
+
+        # Third save with final status
+        run.status = RunStatus.COMPLETED
+        await storage.save_run(run, immediate=True)
+
+        # Load summary - should be fresh again
+        summary3 = await storage.load_summary(run_id)
+        assert summary3.status == RunStatus.COMPLETED
+    finally:
+        await storage.stop()
@@ -0,0 +1,326 @@
+"""Tests for ContextHandoff and HandoffContext."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from framework.graph.context_handoff import ContextHandoff, HandoffContext
+from framework.graph.conversation import NodeConversation
+from framework.llm.mock import MockLLMProvider
+from framework.llm.provider import LLMProvider, LLMResponse
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class SpyLLMProvider(MockLLMProvider):
+    """MockLLMProvider that records whether complete() was called."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.complete_called = False
+        self.complete_call_args: dict[str, Any] | None = None
+
+    def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
+        self.complete_called = True
+        self.complete_call_args = {"messages": messages, **kwargs}
+        return super().complete(messages, **kwargs)
+
+
+class FailingLLMProvider(LLMProvider):
+    """LLM provider that always raises."""
+
+    def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
+        raise RuntimeError("LLM unavailable")
+
+    def complete_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list,
+        tool_executor: Any,
+        max_iterations: int = 10,
+    ) -> LLMResponse:
+        raise RuntimeError("LLM unavailable")
+
+
+async def _build_conversation(*pairs: tuple[str, str]) -> NodeConversation:
+    """Build a NodeConversation from (user, assistant) message pairs."""
+    conv = NodeConversation()
+    for user_msg, assistant_msg in pairs:
+        await conv.add_user_message(user_msg)
+        await conv.add_assistant_message(assistant_msg)
+    return conv
+
+
+# ---------------------------------------------------------------------------
+# TestHandoffContext
+# ---------------------------------------------------------------------------
+
+
+class TestHandoffContext:
+    def test_instantiation(self) -> None:
+        hc = HandoffContext(
+            source_node_id="node_A",
+            summary="Summary text",
+            key_outputs={"result": "42"},
+            turn_count=3,
+            total_tokens_used=1200,
+        )
+        assert hc.source_node_id == "node_A"
+        assert hc.summary == "Summary text"
+        assert hc.key_outputs == {"result": "42"}
+        assert hc.turn_count == 3
+        assert hc.total_tokens_used == 1200
+
+    def test_field_access(self) -> None:
+        hc = HandoffContext(
+            source_node_id="n1",
+            summary="s",
+            key_outputs={},
+            turn_count=0,
+            total_tokens_used=0,
+        )
+        assert hc.key_outputs == {}
+
+
+# ---------------------------------------------------------------------------
+# TestExtractiveSummary
+# ---------------------------------------------------------------------------
+
+
+class TestExtractiveSummary:
+    @pytest.mark.asyncio
+    async def test_extractive_summary_includes_first_last(self) -> None:
+        conv = await _build_conversation(
+            ("hello", "First response here."),
+            ("continue", "Middle response."),
+            ("finish", "Final conclusion."),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="test_node")
+
+        assert "First response here." in hc.summary
+        assert "Final conclusion." in hc.summary
+
+    @pytest.mark.asyncio
+    async def test_extractive_summary_metadata(self) -> None:
+        conv = await _build_conversation(
+            ("hi", "hello"),
+            ("bye", "goodbye"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="node_42")
+
+        assert hc.source_node_id == "node_42"
+        assert hc.turn_count == 2
+        assert hc.total_tokens_used > 0
+
+    @pytest.mark.asyncio
+    async def test_extractive_with_output_keys_colon(self) -> None:
+        conv = await _build_conversation(
+            ("what is the answer?", "answer: 42"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["answer"])
+
+        assert hc.key_outputs["answer"] == "42"
+
+    @pytest.mark.asyncio
+    async def test_extractive_with_output_keys_equals(self) -> None:
+        conv = await _build_conversation(
+            ("compute", "result = success"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["result"])
+
+        assert hc.key_outputs["result"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_extractive_json_output_keys(self) -> None:
+        conv = await _build_conversation(
+            ("give me json", '{"score": 95, "grade": "A"}'),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
+
+        assert hc.key_outputs["score"] == "95"
+        assert hc.key_outputs["grade"] == "A"
+
+    @pytest.mark.asyncio
+    async def test_extractive_empty_conversation(self) -> None:
+        conv = NodeConversation()
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="empty")
+
+        assert hc.summary == "Empty conversation."
+        assert hc.turn_count == 0
+        assert hc.key_outputs == {}
+
+    @pytest.mark.asyncio
+    async def test_extractive_no_assistant_messages(self) -> None:
+        conv = NodeConversation()
+        await conv.add_user_message("hello?")
+        await conv.add_user_message("anyone there?")
+
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="silent")
+
+        assert hc.summary == "No assistant responses."
+
+    @pytest.mark.asyncio
+    async def test_extractive_most_recent_wins(self) -> None:
+        conv = await _build_conversation(
+            ("first", "status: old_value"),
+            ("second", "status: new_value"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["status"])
+
+        assert hc.key_outputs["status"] == "new_value"
+
+    @pytest.mark.asyncio
+    async def test_extractive_truncation(self) -> None:
+        long_text = "x" * 1000
+        conv = await _build_conversation(
+            ("go", long_text),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n")
+
+        # Summary should be truncated to ~500 chars
+        assert len(hc.summary) <= 500
+
+
+# ---------------------------------------------------------------------------
+# TestLLMSummary
+# ---------------------------------------------------------------------------
+
+
+class TestLLMSummary:
+    @pytest.mark.asyncio
+    async def test_llm_summary_calls_provider(self) -> None:
+        llm = SpyLLMProvider()
+        conv = await _build_conversation(
+            ("hi", "hello back"),
+            ("what now?", "we are done"),
+        )
+        ch = ContextHandoff(llm=llm)
+        hc = ch.summarize_conversation(conv, node_id="llm_node")
+
+        assert llm.complete_called, "LLM complete() was never invoked"
+        assert hc.summary == "This is a mock response for testing purposes."
+
+    @pytest.mark.asyncio
+    async def test_llm_summary_includes_output_key_hint(self) -> None:
+        llm = SpyLLMProvider()
+        conv = await _build_conversation(
+            ("compute", '{"score": 95}'),
+        )
+        ch = ContextHandoff(llm=llm)
+        ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
+
+        assert llm.complete_call_args is not None
+        system = llm.complete_call_args.get("system", "")
+        assert "score" in system
+        assert "grade" in system
+
+    @pytest.mark.asyncio
+    async def test_llm_fallback_on_error(self) -> None:
+        llm = FailingLLMProvider()
+        conv = await _build_conversation(
+            ("start", "First assistant message."),
+            ("end", "Last assistant message."),
+        )
+        ch = ContextHandoff(llm=llm)
+        hc = ch.summarize_conversation(conv, node_id="fallback_node")
+
+        # Should fall back to extractive (first + last assistant messages)
+        assert "First assistant message." in hc.summary
+        assert "Last assistant message." in hc.summary
+
+
+# ---------------------------------------------------------------------------
+# TestFormatAsInput
+# ---------------------------------------------------------------------------
+
+
+class TestFormatAsInput:
+    def test_format_structure(self) -> None:
+        hc = HandoffContext(
+            source_node_id="analyzer",
+            summary="Analysis complete.",
+            key_outputs={"score": "95"},
+            turn_count=5,
+            total_tokens_used=2000,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "--- CONTEXT FROM: analyzer" in output
+        assert "KEY OUTPUTS:" in output
+        assert "SUMMARY:" in output
+        assert "--- END CONTEXT ---" in output
+
+    def test_format_no_key_outputs(self) -> None:
+        hc = HandoffContext(
+            source_node_id="simple",
+            summary="Done.",
+            key_outputs={},
+            turn_count=1,
+            total_tokens_used=100,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "KEY OUTPUTS:" not in output
+        assert "SUMMARY:" in output
+
+    def test_format_content_values(self) -> None:
+        hc = HandoffContext(
+            source_node_id="node_X",
+            summary="Found 3 bugs.",
+            key_outputs={"bugs": "3", "severity": "high"},
+            turn_count=7,
+            total_tokens_used=5000,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "node_X" in output
+        assert "7 turns" in output
+        assert "~5000 tokens" in output
+        assert "- bugs: 3" in output
+        assert "- severity: high" in output
+        assert "Found 3 bugs." in output
+
+    def test_format_empty_summary(self) -> None:
+        hc = HandoffContext(
+            source_node_id="n",
+            summary="",
+            key_outputs={},
+            turn_count=0,
+            total_tokens_used=0,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "No summary available." in output
+
+    @pytest.mark.asyncio
+    async def test_format_as_input_usable_as_message(self) -> None:
+        """Formatted output can be fed into a NodeConversation as a user message."""
+        hc = HandoffContext(
+            source_node_id="prev_node",
+            summary="Completed analysis.",
+            key_outputs={"result": "42"},
+            turn_count=3,
+            total_tokens_used=900,
+        )
+        text = ContextHandoff.format_as_input(hc)
+
+        conv = NodeConversation()
+        msg = await conv.add_user_message(text)
+
+        assert msg.role == "user"
+        assert "CONTEXT FROM: prev_node" in msg.content
+        assert conv.turn_count == 1
--- a/Show More
+++ b/Show More