chore: agents.md update

fix: auto install node 20
chore: best effort alignment of windows quickstart
2026-03-04 12:12:27 -08:00 · 2026-03-04 12:11:29 -08:00 · 2026-03-04 11:43:50 -08:00 · 2026-03-04 11:35:12 -08:00 · 2026-03-04 10:42:57 -08:00 · 2026-03-04 10:33:34 -08:00
514 changed files with 86836 additions and 24655 deletions
@@ -195,7 +195,7 @@ class DeepResearchAgent:
            max_tokens=self.config.max_tokens,
            loop_config={
                "max_iterations": 100,
-                "max_tool_calls_per_turn": 20,
+                "max_tool_calls_per_turn": 30,
                "max_history_tokens": 32000,
            },
            conversation_mode="continuous",
@@ -71,6 +71,12 @@ Important:
 - Track which URL each finding comes from (you'll need citations later)
 - Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)

+Context management:
+- Your tool results are automatically saved to files. After compaction, the file \
+references remain in the conversation — use load_data() to recover any content you need.
+- Use append_data('research_notes.md', ...) to maintain a running log of key findings \
+as you go. This survives compaction and helps the report node produce a detailed report.
+
 When done, use set_output (one key at a time, separate turns):
 - set_output("findings", "Structured summary: key findings with source URLs for each claim. \
 Include themes, contradictions, and confidence levels.")
@@ -161,6 +167,9 @@ Requirements:
 - Every factual claim must cite its source with [n] notation
 - Be objective — present multiple viewpoints where sources disagree
 - Answer the original research questions from the brief
+- If findings appear incomplete or summarized, call list_data_files() and load_data() \
+to access the detailed source material from the research phase. The research node's \
+tool results and research_notes.md contain the full data.

 Save the HTML:
  save_data(filename="report.html", data="<html>...</html>")
@@ -46,6 +46,7 @@ coverage/

 # TypeScript
 *.tsbuildinfo
+vite.config.d.ts

 # Python
 __pycache__/
@@ -69,6 +70,7 @@ exports/*
 .agent-builder-sessions/*

 .claude/settings.local.json
+.claude/skills/ship-it/

 .venv

@@ -77,3 +79,4 @@ core/tests/*dumps/*

 screenshots/*

+.gemini/*
@@ -0,0 +1,34 @@
+# Repository Guidelines
+
+Shared agent instructions for this workspace.
+
+## Deprecations
+
+- **TUI is deprecated.** The terminal UI (`hive tui`) is no longer maintained. Use the browser-based interface (`hive open`) instead.
+
+## Coding Agent Notes
+
+- 
+- When working on a GitHub Issue or PR, print the full URL at the end of the task.
+- When answering questions, respond with high-confidence answers only: verify in code; do not guess.
+- Do not update dependencies casually. Version bumps, patched dependencies, overrides, or vendored dependency changes require explicit approval.
+- Add brief comments for tricky logic. Keep files reasonably small when practical; split or refactor large files instead of growing them indefinitely.
+- If shared guardrails are available locally, review them; otherwise follow this repo's guidance.
+- Use `uv` for Python execution and package management. Do not use `python` or `python3` directly unless the user explicitly asks for it.
+- Prefer `uv run` for scripts and tests, and `uv pip` for package operations.
+
+
+## Multi-Agent Safety
+
+- Do not create, apply, or drop `git stash` entries unless explicitly requested.
+- Do not create, remove, or modify `git worktree` checkouts unless explicitly requested.
+- Do not switch branches or check out a different branch unless explicitly requested.
+- When the user says `push`, you may `git pull --rebase` to integrate latest changes, but never discard other in-progress work.
+- When the user says `commit`, commit only your changes. When the user says `commit all`, commit everything in grouped chunks.
+- When you see unrecognized files or unrelated changes, keep going and focus on your scoped changes.
+
+## Change Hygiene
+
+- If staged and unstaged diffs are formatting-only, resolve them without asking.
+- If a commit or push was already requested, include formatting-only follow-up changes in that same commit when practical.
+- Only stop to ask for confirmation when changes are semantic and may alter behavior.
@@ -0,0 +1 @@
+AGENTS.md
@@ -1,4 +1,4 @@
-.PHONY: lint format check test install-hooks help
+.PHONY: lint format check test install-hooks help frontend-install frontend-dev frontend-build

 help: ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
@@ -26,3 +26,12 @@ test: ## Run all tests
 install-hooks: ## Install pre-commit hooks
 	uv pip install pre-commit
 	pre-commit install
+
+frontend-install: ## Install frontend npm packages
+	cd core/frontend && npm install
+
+frontend-dev: ## Start frontend dev server
+	cd core/frontend && npm run dev
+
+frontend-build: ## Build frontend for production
+	cd core/frontend && npm run build
@@ -14,7 +14,7 @@
 </p>

 <p align="center">
-  <a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
+  <a href="https://github.com/aden-hive/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
  <a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
  <a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
  <a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
@@ -37,11 +37,11 @@

 ## Overview

-Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
+Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with hive coding agent(queen), and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

-https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
+[![Hive Demo](https://img.youtube.com/vi/XDOG9fOaLjU/maxresdefault.jpg)](https://www.youtube.com/watch?v=XDOG9fOaLjU)

 ## Who Is Hive For?

@@ -50,7 +50,7 @@ Hive is designed for developers and teams who want to build **production-grade A
 Hive is a good fit if you:

 - Want AI agents that **execute real business processes**, not demos
- Prefer **goal-driven development** over hardcoded workflows
+- Need **fast or high volume agent execution** over open workflow
 - Need **self-healing and adaptive agents** that improve over time
 - Require **human-in-the-loop control**, observability, and cost limits
 - Plan to run agents in **production environments**
@@ -71,7 +71,7 @@ Use Hive when you need:

 - **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
 - **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
- **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
+- **[Changelog](https://github.com/aden-hive/hive/releases)** - Latest updates and releases
 - **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
 - **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
 - **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs
@@ -81,7 +81,7 @@ Use Hive when you need:
 ### Prerequisites

 - Python 3.11+ for agent development
- Claude Code, Codex CLI, or Cursor for utilizing agent skills
+- An LLM provider that powers the agents

 > **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.

@@ -94,9 +94,10 @@ Use Hive when you need:

 ```bash
 # Clone the repository
-git clone https://github.com/adenhq/hive.git
+git clone https://github.com/aden-hive/hive.git
 cd hive

+
 # Run quickstart setup
 ./quickstart.sh
 ```
@@ -109,77 +110,43 @@ This sets up:
 - **LLM provider** - Interactive default model configuration
 - All required Python dependencies with `uv`

+- At last, it will initiate the open hive interface in your browser
+
+> **Tip:** To reopen the dashboard later, run `hive open` from the project directory.
+
+<img width="2500" height="1214" alt="home-screen" src="https://github.com/user-attachments/assets/134d897f-5e75-4874-b00b-e0505f6b45c4" />
+
 ### Build Your First Agent

-```bash
-# Build an agent using Claude Code
-claude> /hive
+Type the agent you want to build in the home input box

-# Test your agent
-claude> /hive-debugger
+<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/1ce19141-a78b-46f5-8d64-dbf987e048f4" />

-# (at separate terminal) Launch the interactive dashboard
-hive tui
+### Use Template Agents

-# Or run directly
-hive run exports/your_agent_name --input '{"key": "value"}'
-```
+Click "Try a sample agent" and check the templates. You can run a templates directly or choose to build your version on top of the existing template.

-## Coding Agent Support
+### Run Agents

-### Codex CLI
+Now you can run an agent by selectiing the agent (either an existing agent or example agent). You can click the Run button on the top left, or talk to the queen agent and it can run the agent for you.

-Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
-
-1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
-2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
-3. **Launch:** Run `codex` in the repo root, then type `use hive`
-
-Example:
-
-```
-codex> use hive
-```
-
-### Opencode
-
-Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
-
-1. **Setup:** Run the quickstart script
-2. **Launch:** Open Opencode in the project root.
-3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
-4. **Verify:** Ask the agent _"List your tools"_ to confirm the connection.
-
-The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
-
-**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
-
-### Antigravity IDE Support
-
-Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
-
-```bash
-./scripts/setup-antigravity-mcp.sh
-```
-
-**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.
+<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/71c38206-2ad5-49aa-bde8-6698d0bc55f5" />

 ## Features

- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
+- **Browser-Use** - Control the browser on your computer to achieve hard tasks
+- **Parallel Execution** - Execute the generated graph in parallel. This way you can have multiple agent compelteing the jobs for you
+- **[Goal-Driven Generation](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
 - **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
 - **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
 - **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
 - **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
 - **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
 - **Production-Ready** - Self-hostable, built for scale and reliability

 ## Integration

-<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
-
+<a href="https://github.com/aden-hive/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
 Hive is built to be model-agnostic and system-agnostic.

 - **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
@@ -240,35 +207,10 @@ flowchart LR
 4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
 5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically

-## Run Agents
-
-The `hive` CLI is the primary interface for running agents.
-
-```bash
-# Browse and run agents interactively (Recommended)
-hive tui
-
-# Run a specific agent directly
-hive run exports/my_agent --input '{"task": "Your input here"}'
-
-# Run a specific agent with the TUI dashboard
-hive run exports/my_agent --tui
-
-# Interactive REPL
-hive shell
-```
-
-The TUI scans both `exports/` and `examples/templates/` for available agents.
-
-> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
-
-See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.
-
 ## Documentation

 - **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
 - [Getting Started](docs/getting-started.md) - Quick setup instructions
- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
 - [Configuration Guide](docs/configuration.md) - All configuration options
 - [Architecture Overview](docs/architecture/README.md) - System design and structure

@@ -398,8 +340,7 @@ flowchart TB
 ```

 ## Contributing
-
-We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/aden-hive/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.

 **Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.

@@ -436,7 +377,7 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS

 **Q: What LLM providers does Hive support?**

-Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
+Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name. We recommend using Claude, GLM and Gemini as they have the best performance.

 **Q: Can I use Hive with local AI models like Ollama?**

@@ -478,14 +419,6 @@ Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API refer

 Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.

-**Q: When will my team start seeing results from Aden's adaptive agents?**
-
-Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
-
-**Q: How does Hive compare to other agent frameworks?**
-
-Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
-
 ---

 <p align="center">
@@ -0,0 +1,31 @@
+perf: reduce subprocess spawning in quickstart scripts (#4427)
+
+## Problem
+Windows process creation (CreateProcess) is 10-100x slower than Linux fork/exec.
+The quickstart scripts were spawning 4+ separate `uv run python -c "import X"` 
+processes to verify imports, adding ~600ms overhead on Windows.
+
+## Solution
+Consolidated all import checks into a single batch script that checks multiple 
+modules in one subprocess call, reducing spawn overhead by ~75%.
+
+## Changes
+- **New**: `scripts/check_requirements.py` - Batched import checker
+- **New**: `scripts/test_check_requirements.py` - Test suite  
+- **New**: `scripts/benchmark_quickstart.ps1` - Performance benchmark tool
+- **Modified**: `quickstart.ps1` - Updated import verification (2 sections)
+- **Modified**: `quickstart.sh` - Updated import verification
+
+## Performance Impact
+**Benchmark results on Windows:**
+- Before: ~19.8 seconds for import checks
+- After: ~4.9 seconds for import checks
+- **Improvement: 14.9 seconds saved (75.2% faster)**
+
+## Testing
+- ✅ All functional tests pass (`scripts/test_check_requirements.py`)
+- ✅ Quickstart scripts work correctly on Windows
+- ✅ Error handling verified (invalid imports reported correctly)
+- ✅ Performance benchmark confirms 75%+ improvement
+
+Fixes #4427
@@ -64,7 +64,7 @@ To use the agent builder with Claude Desktop or other MCP clients, add this to y
    "agent-builder": {
      "command": "python",
      "args": ["-m", "framework.mcp.agent_builder_server"],
-      "cwd": "/path/to/goal-agent"
+      "cwd": "/path/to/hive/core"
    }
  }
 }
@@ -85,14 +85,14 @@ The MCP server provides tools for:
 Run an LLM-powered calculator:

 ```bash
-# Single calculation
-uv run python -m framework calculate "2 + 3 * 4"
+# Run an exported agent
+uv run python -m framework run exports/calculator --input '{"expression": "2 + 3 * 4"}'

-# Interactive mode
-uv run python -m framework interactive
+# Interactive shell session
+uv run python -m framework shell exports/calculator

-# Analyze runs with Builder
-uv run python -m framework analyze calculator
+# Show agent info
+uv run python -m framework info exports/calculator
 ```

 ### Using the Runtime
@@ -141,8 +141,8 @@ uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
 # Debug failed tests
 uv run python -m framework test-debug <agent_path> <test_name>

-# List tests for a goal
-uv run python -m framework test-list <goal_id>
+# List tests for an agent
+uv run python -m framework test-list <agent_path>
 ```

 For detailed testing workflows, see the [hive-test skill](../.claude/skills/hive-test/SKILL.md).
@@ -0,0 +1,387 @@
+"""OpenAI Codex OAuth PKCE login flow.
+
+Runs the full browser-based OAuth flow so users can authenticate with their
+ChatGPT Plus/Pro subscription without needing the Codex CLI installed.
+
+Usage (from quickstart.sh):
+    uv run python codex_oauth.py
+
+Exit codes:
+    0 - success (credentials saved to ~/.codex/auth.json)
+    1 - failure (user cancelled, timeout, or token exchange error)
+"""
+
+import base64
+import hashlib
+import http.server
+import json
+import os
+import platform
+import secrets
+import subprocess
+import sys
+import threading
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import UTC, datetime
+from pathlib import Path
+
+# OAuth constants (from the Codex CLI binary)
+CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize"
+TOKEN_URL = "https://auth.openai.com/oauth/token"
+REDIRECT_URI = "http://localhost:1455/auth/callback"
+SCOPE = "openid profile email offline_access"
+CALLBACK_PORT = 1455
+
+# Where to save credentials (same location the Codex CLI uses)
+CODEX_AUTH_FILE = Path.home() / ".codex" / "auth.json"
+
+# JWT claim path for account_id
+JWT_CLAIM_PATH = "https://api.openai.com/auth"
+
+
+def _base64url(data: bytes) -> str:
+    return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
+
+
+def generate_pkce() -> tuple[str, str]:
+    """Generate PKCE code_verifier and code_challenge (S256)."""
+    verifier_bytes = secrets.token_bytes(32)
+    verifier = _base64url(verifier_bytes)
+    challenge = _base64url(hashlib.sha256(verifier.encode("ascii")).digest())
+    return verifier, challenge
+
+
+def build_authorize_url(state: str, challenge: str) -> str:
+    """Build the OpenAI OAuth authorize URL with PKCE."""
+    params = urllib.parse.urlencode(
+        {
+            "response_type": "code",
+            "client_id": CLIENT_ID,
+            "redirect_uri": REDIRECT_URI,
+            "scope": SCOPE,
+            "code_challenge": challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+            "id_token_add_organizations": "true",
+            "codex_cli_simplified_flow": "true",
+            "originator": "hive",
+        }
+    )
+    return f"{AUTHORIZE_URL}?{params}"
+
+
+def exchange_code_for_tokens(code: str, verifier: str) -> dict | None:
+    """Exchange the authorization code for tokens."""
+    data = urllib.parse.urlencode(
+        {
+            "grant_type": "authorization_code",
+            "client_id": CLIENT_ID,
+            "code": code,
+            "code_verifier": verifier,
+            "redirect_uri": REDIRECT_URI,
+        }
+    ).encode("utf-8")
+
+    req = urllib.request.Request(
+        TOKEN_URL,
+        data=data,
+        headers={"Content-Type": "application/x-www-form-urlencoded"},
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            token_data = json.loads(resp.read())
+    except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
+        print(f"\033[0;31mToken exchange failed: {exc}\033[0m", file=sys.stderr)
+        return None
+
+    if not token_data.get("access_token") or not token_data.get("refresh_token"):
+        print("\033[0;31mToken response missing required fields\033[0m", file=sys.stderr)
+        return None
+
+    return token_data
+
+
+def decode_jwt_payload(token: str) -> dict | None:
+    """Decode the payload of a JWT (no signature verification)."""
+    try:
+        parts = token.split(".")
+        if len(parts) != 3:
+            return None
+        payload = parts[1]
+        # Add padding
+        padding = 4 - len(payload) % 4
+        if padding != 4:
+            payload += "=" * padding
+        decoded = base64.urlsafe_b64decode(payload)
+        return json.loads(decoded)
+    except Exception:
+        return None
+
+
+def get_account_id(access_token: str) -> str | None:
+    """Extract the ChatGPT account_id from the access token JWT."""
+    payload = decode_jwt_payload(access_token)
+    if not payload:
+        return None
+    auth = payload.get(JWT_CLAIM_PATH)
+    if isinstance(auth, dict):
+        account_id = auth.get("chatgpt_account_id")
+        if isinstance(account_id, str) and account_id:
+            return account_id
+    return None
+
+
+def save_credentials(token_data: dict, account_id: str) -> None:
+    """Save credentials to ~/.codex/auth.json in the same format the Codex CLI uses."""
+    auth_data = {
+        "tokens": {
+            "access_token": token_data["access_token"],
+            "refresh_token": token_data["refresh_token"],
+            "account_id": account_id,
+        },
+        "auth_mode": "chatgpt",
+        "last_refresh": datetime.now(UTC).isoformat(),
+    }
+    if "id_token" in token_data:
+        auth_data["tokens"]["id_token"] = token_data["id_token"]
+
+    CODEX_AUTH_FILE.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+    fd = os.open(CODEX_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w") as f:
+        json.dump(auth_data, f, indent=2)
+
+
+def open_browser(url: str) -> bool:
+    """Open the URL in the user's default browser."""
+    system = platform.system()
+    try:
+        devnull = subprocess.DEVNULL
+        if system == "Darwin":
+            subprocess.Popen(["open", url], stdout=devnull, stderr=devnull)
+        elif system == "Windows":
+            subprocess.Popen(["cmd", "/c", "start", url], stdout=devnull, stderr=devnull)
+        else:
+            subprocess.Popen(["xdg-open", url], stdout=devnull, stderr=devnull)
+        return True
+    except OSError:
+        return False
+
+
+class OAuthCallbackHandler(http.server.BaseHTTPRequestHandler):
+    """HTTP handler that captures the OAuth callback."""
+
+    auth_code: str | None = None
+    received_state: str | None = None
+
+    def do_GET(self) -> None:
+        parsed = urllib.parse.urlparse(self.path)
+        if parsed.path != "/auth/callback":
+            self.send_response(404)
+            self.end_headers()
+            self.wfile.write(b"Not found")
+            return
+
+        params = urllib.parse.parse_qs(parsed.query)
+        code = params.get("code", [None])[0]
+        state = params.get("state", [None])[0]
+
+        if not code:
+            self.send_response(400)
+            self.end_headers()
+            self.wfile.write(b"Missing authorization code")
+            return
+
+        OAuthCallbackHandler.auth_code = code
+        OAuthCallbackHandler.received_state = state
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.end_headers()
+        self.wfile.write(
+            b"<!doctype html><html><head><meta charset='utf-8'/></head>"
+            b"<body><h2>Authentication successful</h2>"
+            b"<p>Return to your terminal to continue.</p></body></html>"
+        )
+
+    def log_message(self, format: str, *args: object) -> None:
+        # Suppress request logging
+        pass
+
+
+def wait_for_callback(state: str, timeout_secs: int = 120) -> str | None:
+    """Start a local HTTP server and wait for the OAuth callback.
+
+    Returns the authorization code on success, None on timeout.
+    """
+    OAuthCallbackHandler.auth_code = None
+    OAuthCallbackHandler.received_state = None
+
+    server = http.server.HTTPServer(("127.0.0.1", CALLBACK_PORT), OAuthCallbackHandler)
+    server.timeout = 1
+
+    deadline = time.time() + timeout_secs
+    server_thread = threading.Thread(target=_serve_until_done, args=(server, deadline, state))
+    server_thread.daemon = True
+    server_thread.start()
+    server_thread.join(timeout=timeout_secs + 2)
+
+    server.server_close()
+
+    if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
+        return OAuthCallbackHandler.auth_code
+    return None
+
+
+def _serve_until_done(server: http.server.HTTPServer, deadline: float, state: str) -> None:
+    while time.time() < deadline:
+        server.handle_request()
+        if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
+            return
+
+
+def parse_manual_input(value: str, expected_state: str) -> str | None:
+    """Parse user-pasted redirect URL or auth code."""
+    value = value.strip()
+    if not value:
+        return None
+    try:
+        parsed = urllib.parse.urlparse(value)
+        params = urllib.parse.parse_qs(parsed.query)
+        code = params.get("code", [None])[0]
+        state = params.get("state", [None])[0]
+        if state and state != expected_state:
+            return None
+        return code
+    except Exception:
+        pass
+    # Maybe it's just the raw code
+    if len(value) > 10 and " " not in value:
+        return value
+    return None
+
+
+def main() -> int:
+    # Generate PKCE and state
+    verifier, challenge = generate_pkce()
+    state = secrets.token_hex(16)
+
+    # Build URL
+    auth_url = build_authorize_url(state, challenge)
+
+    print()
+    print("\033[1mOpenAI Codex OAuth Login\033[0m")
+    print()
+
+    # Try to start the local callback server first
+    try:
+        server_available = True
+        # Quick test that port is free
+        import socket
+
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.settimeout(1)
+        result = sock.connect_ex(("127.0.0.1", CALLBACK_PORT))
+        sock.close()
+        if result == 0:
+            print(f"\033[1;33mPort {CALLBACK_PORT} is in use. Using manual paste mode.\033[0m")
+            server_available = False
+    except Exception:
+        server_available = True
+
+    # Open browser
+    browser_opened = open_browser(auth_url)
+    if browser_opened:
+        print("  Browser opened for OpenAI sign-in...")
+    else:
+        print("  Could not open browser automatically.")
+
+    print()
+    print("  If the browser didn't open, visit this URL:")
+    print(f"  \033[0;36m{auth_url}\033[0m")
+    print()
+
+    code = None
+
+    if server_available:
+        print("  Waiting for authentication (up to 2 minutes)...")
+        print("  \033[2mOr paste the redirect URL below if the callback didn't work:\033[0m")
+        print()
+
+        # Start callback server in background
+        callback_result: list[str | None] = [None]
+
+        def run_server() -> None:
+            callback_result[0] = wait_for_callback(state, timeout_secs=120)
+
+        server_thread = threading.Thread(target=run_server)
+        server_thread.daemon = True
+        server_thread.start()
+
+        # Also accept manual input in parallel
+        # We poll for both the server result and stdin
+        try:
+            import select
+
+            while server_thread.is_alive():
+                # Check if stdin has data (non-blocking on unix)
+                if hasattr(select, "select"):
+                    ready, _, _ = select.select([sys.stdin], [], [], 0.5)
+                    if ready:
+                        manual = sys.stdin.readline()
+                        if manual.strip():
+                            code = parse_manual_input(manual, state)
+                            if code:
+                                break
+                else:
+                    time.sleep(0.5)
+
+                if callback_result[0]:
+                    code = callback_result[0]
+                    break
+        except (KeyboardInterrupt, EOFError):
+            print("\n\033[0;31mCancelled.\033[0m")
+            return 1
+
+        if not code:
+            code = callback_result[0]
+    else:
+        # Manual paste mode
+        try:
+            manual = input("  Paste the redirect URL: ").strip()
+            code = parse_manual_input(manual, state)
+        except (KeyboardInterrupt, EOFError):
+            print("\n\033[0;31mCancelled.\033[0m")
+            return 1
+
+    if not code:
+        print("\n\033[0;31mAuthentication timed out or failed.\033[0m")
+        return 1
+
+    # Exchange code for tokens
+    print()
+    print("  Exchanging authorization code for tokens...")
+    token_data = exchange_code_for_tokens(code, verifier)
+    if not token_data:
+        return 1
+
+    # Extract account_id from JWT
+    account_id = get_account_id(token_data["access_token"])
+    if not account_id:
+        print("\033[0;31mFailed to extract account ID from token.\033[0m", file=sys.stderr)
+        return 1
+
+    # Save credentials
+    save_credentials(token_data, account_id)
+    print("  \033[0;32mAuthentication successful!\033[0m")
+    print(f"  Credentials saved to {CODEX_AUTH_FILE}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -1768,7 +1768,7 @@ async def _run_pipeline(websocket, initial_message: str):
            judge=judge,
            config=LoopConfig(
                max_iterations=30,
-                max_tool_calls_per_turn=15,
+                max_tool_calls_per_turn=30,
                max_history_tokens=64000,
                max_tool_result_chars=8_000,
                spillover_dir=str(_DATA_DIR),
@@ -751,7 +751,7 @@ async def _run_pipeline(websocket, topic: str):
        judge=None,  # implicit judge: accept when output_keys filled
        config=LoopConfig(
            max_iterations=20,
-            max_tool_calls_per_turn=10,
+            max_tool_calls_per_turn=30,
            max_history_tokens=32_000,
        ),
        conversation_store=store_a,
@@ -849,7 +849,7 @@ async def _run_pipeline(websocket, topic: str):
        judge=None,  # implicit judge
        config=LoopConfig(
            max_iterations=10,
-            max_tool_calls_per_turn=5,
+            max_tool_calls_per_turn=30,
            max_history_tokens=32_000,
        ),
        conversation_store=store_b,
@@ -1257,7 +1257,7 @@ async def _run_org_pipeline(websocket, topic: str):
            judge=judge,
            config=LoopConfig(
                max_iterations=30,
-                max_tool_calls_per_turn=25,
+                max_tool_calls_per_turn=30,
                max_history_tokens=32_000,
            ),
            conversation_store=store,
@@ -453,7 +453,7 @@ identity_prompt = (
 )
 loop_config = {
    "max_iterations": 50,
-    "max_tool_calls_per_turn": 10,
+    "max_tool_calls_per_turn": 30,
    "max_history_tokens": 32000,
 }

@@ -539,7 +539,7 @@ class CredentialTesterAgent:
            max_tokens=self.config.max_tokens,
            loop_config={
                "max_iterations": 50,
-                "max_tool_calls_per_turn": 10,
+                "max_tool_calls_per_turn": 30,
                "max_history_tokens": 32000,
            },
            conversation_mode="continuous",
@@ -13,6 +13,7 @@ from framework.runtime.execution_stream import EntryPointSpec

 from .config import default_config, metadata
 from .nodes import coder_node, queen_node
+
 # ticket_receiver is no longer needed — the queen runs as an independent
 # GraphExecutor and receives escalation tickets via inject_event().
 # Keeping the import commented for reference:
@@ -68,7 +69,7 @@ goal = Goal(
            id="dynamic-tool-discovery",
            description=(
                "Always discover available tools dynamically via "
-                "discover_mcp_tools before referencing tools in agent designs"
+                "list_agent_tools before referencing tools in agent designs"
            ),
            constraint_type="hard",
            category="correctness",
@@ -126,7 +127,7 @@ identity_prompt = (
 )
 loop_config = {
    "max_iterations": 100,
-    "max_tool_calls_per_turn": 20,
+    "max_tool_calls_per_turn": 30,
    "max_history_tokens": 32000,
 }

@@ -159,8 +160,8 @@ queen_graph = GraphSpec(
    edges=[],
    conversation_mode="continuous",
    loop_config={
-        "max_iterations": 200,
-        "max_tool_calls_per_turn": 10,
+        "max_iterations": 999_999,
+        "max_tool_calls_per_turn": 30,
        "max_history_tokens": 32000,
    },
 )
@@ -10,7 +10,7 @@ def _load_preferred_model() -> str:
    config_path = Path.home() / ".hive" / "configuration.json"
    if config_path.exists():
        try:
-            with open(config_path) as f:
+            with open(config_path, encoding="utf-8") as f:
                config = json.load(f)
            llm = config.get("llm", {})
            if llm.get("provider") and llm.get("model"):
@@ -24,7 +24,7 @@ def _load_preferred_model() -> str:
 class RuntimeConfig:
    model: str = field(default_factory=_load_preferred_model)
    temperature: float = 0.7
-    max_tokens: int = 40000
+    max_tokens: int = 8000
    api_key: str | None = None
    api_base: str | None = None

@@ -24,7 +24,7 @@

 9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.

-10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `discover_mcp_tools()`. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
+10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `list_agent_tools()` before designing and `validate_agent_tools()` after building. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`, `bulk_fetch_emails`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).

 ## Design Errors

@@ -48,11 +48,11 @@ profile_setup → daily_intake → update_tracker → analyze_progress → gener
 ```
 `analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.

-**Good example** (3 nodes):
+**Good example** (2 nodes):
 ```
-intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back)
+process (autonomous: track + analyze + plan) → review (client-facing) → process (loop back)
 ```
-One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved.
+The queen handles intake (gathering requirements from the user) and passes the task via `run_agent_with_input(task)`. One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved. One client-facing node handles review/approval when needed.

 12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.

@@ -80,7 +80,7 @@ One client-facing node handles ALL user interaction (setup, logging, reports). O
 - Validate graph structure (nodes, edges, entry points)
 - Verify node specs (tools, prompts, client-facing flag)
 - Check goal/constraints/success criteria definitions
- Test that `AgentRunner.load()` + `_setup()` succeeds (skip if no API key)
+- Test that `AgentRunner.load()` succeeds (structural, no API key needed)

 **What NOT to do:**
 ```python
@@ -105,3 +105,9 @@ def test_research_routes_back_to_interact(self):
 23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.

 24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
+
+25. **Manually wiring browser tools on event_loop nodes** — If the agent needs browser automation, use `node_type="gcu"` which auto-includes all browser tools and prepends best-practices guidance. Do NOT manually list browser tool names on event_loop nodes — they may not exist in the MCP server or may be incomplete. See the GCU Guide appendix.
+
+26. **Using GCU nodes as regular graph nodes** — GCU nodes (`node_type="gcu"`) are exclusively subagents. They must ONLY appear in a parent node's `sub_agents=["gcu-node-id"]` list and be invoked via `delegate_to_sub_agent()`. They must NEVER be connected via edges, used as entry nodes, or used as terminal nodes. If a GCU node appears as an edge source or target, the graph will fail pre-load validation.
+
+27. **Adding a client-facing intake node to worker agents** — The queen owns intake. She defines the entry node's `input_keys` at build time and fills them via `run_agent_with_input(task)` at run time. Worker agents should start with an autonomous processing node, NOT a client-facing intake node that asks the user for requirements. Client-facing nodes in workers are for mid-execution review/approval only.
@@ -57,51 +57,28 @@ metadata = AgentMetadata()

 from framework.graph import NodeSpec

-# Node 1: Intake (client-facing)
-intake_node = NodeSpec(
-    id="intake",
-    name="Intake",
-    description="Gather requirements from the user",
+# Node 1: Process (autonomous entry node)
+# The queen handles intake and passes structured input via
+# run_agent_with_input(task). NO client-facing intake node.
+# The queen defines input_keys at build time and fills them at run time.
+process_node = NodeSpec(
+    id="process",
+    name="Process",
+    description="Execute the task using available tools",
    node_type="event_loop",
-    client_facing=True,
    max_node_visits=0,  # Unlimited for forever-alive
-    input_keys=["topic"],
-    output_keys=["brief"],
-    success_criteria="The brief is specific and actionable.",
-    system_prompt="""\
-You are an intake specialist.
-
-**STEP 1 — Read and respond (text only, NO tool calls):**
-1. Read the topic provided
-2. If vague, ask 1-2 clarifying questions
-3. If clear, confirm your understanding
-
-**STEP 2 — After the user confirms, call set_output:**
- set_output("brief", "Clear description of what to do")
-""",
-    tools=[],
-)
-
-# Node 2: Worker (autonomous)
-worker_node = NodeSpec(
-    id="worker",
-    name="Worker",
-    description="Do the main work",
-    node_type="event_loop",
-    max_node_visits=0,
-    input_keys=["brief", "feedback"],
+    input_keys=["user_request", "feedback"],
    output_keys=["results"],
    nullable_output_keys=["feedback"],  # Only on feedback edge
    success_criteria="Results are complete and accurate.",
    system_prompt="""\
-You are a worker agent. Given a brief, do the work.
-
-If feedback is provided, this is a follow-up — address the feedback.
+You are a processing agent. Your task is in memory under "user_request". \
+If "feedback" is present, this is a revision — address the feedback.

 Work in phases:
 1. Use tools to gather/process data
 2. Analyze results
-3. Call set_output for each key in a SEPARATE turn:
+3. Call set_output in a SEPARATE turn:
   - set_output("results", "structured results")
 """,
    tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
@@ -115,7 +92,7 @@ review_node = NodeSpec(
    node_type="event_loop",
    client_facing=True,
    max_node_visits=0,
-    input_keys=["results", "brief"],
+    input_keys=["results", "user_request"],
    output_keys=["next_action", "feedback"],
    nullable_output_keys=["feedback"],
    success_criteria="User has reviewed and decided next steps.",
@@ -128,14 +105,14 @@ Present the results to the user.
 3. Ask: satisfied, or want changes?

 **STEP 2 — After user responds, call set_output:**
- set_output("next_action", "new_topic")   — if starting fresh
+- set_output("next_action", "done")        — if satisfied
 - set_output("next_action", "revise")      — if changes needed
 - set_output("feedback", "what to change") — only if revising
 """,
    tools=[],
 )

-__all__ = ["intake_node", "worker_node", "review_node"]
+__all__ = ["process_node", "review_node"]
 ```

 ## agent.py
@@ -155,7 +132,7 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
 from framework.runtime.execution_stream import EntryPointSpec

 from .config import default_config, metadata
-from .nodes import intake_node, worker_node, review_node
+from .nodes import process_node, review_node

 # Goal definition
 goal = Goal(
@@ -172,27 +149,26 @@ goal = Goal(
 )

 # Node list
-nodes = [intake_node, worker_node, review_node]
+nodes = [process_node, review_node]

 # Edge definitions
 edges = [
-    EdgeSpec(id="intake-to-worker", source="intake", target="worker",
+    EdgeSpec(id="process-to-review", source="process", target="review",
             condition=EdgeCondition.ON_SUCCESS, priority=1),
-    EdgeSpec(id="worker-to-review", source="worker", target="review",
-             condition=EdgeCondition.ON_SUCCESS, priority=1),
-    # Feedback loop
-    EdgeSpec(id="review-to-worker", source="review", target="worker",
+    # Feedback loop — revise results
+    EdgeSpec(id="review-to-process", source="review", target="process",
             condition=EdgeCondition.CONDITIONAL,
             condition_expr="str(next_action).lower() == 'revise'", priority=2),
-    # Loop back for new topic
-    EdgeSpec(id="review-to-intake", source="review", target="intake",
+    # Loop back for next task (queen sends new input)
+    EdgeSpec(id="review-done", source="review", target="process",
             condition=EdgeCondition.CONDITIONAL,
-             condition_expr="str(next_action).lower() == 'new_topic'", priority=1),
+             condition_expr="str(next_action).lower() == 'done'", priority=1),
 ]

-# Graph configuration
-entry_node = "intake"
-entry_points = {"start": "intake"}
+# Graph configuration — entry is the autonomous process node
+# The queen handles intake and passes the task via run_agent_with_input(task)
+entry_node = "process"
+entry_points = {"start": "process"}
 pause_nodes = []
 terminal_nodes = []  # Forever-alive

@@ -208,7 +184,7 @@ class MyAgent:
        self.goal = goal
        self.nodes = nodes
        self.edges = edges
-        self.entry_node = entry_node
+        self.entry_node = entry_node  # "process" — autonomous entry
        self.entry_points = entry_points
        self.pause_nodes = pause_nodes
        self.terminal_nodes = terminal_nodes
@@ -235,16 +211,14 @@ class MyAgent:
            identity_prompt=identity_prompt,
        )

-    def _setup(self, mock_mode=False):
+    def _setup(self):
        self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
        self._storage_path.mkdir(parents=True, exist_ok=True)
        self._tool_registry = ToolRegistry()
        mcp_config = Path(__file__).parent / "mcp_servers.json"
        if mcp_config.exists():
            self._tool_registry.load_mcp_config(mcp_config)
-        llm = None
-        if not mock_mode:
-            llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
+        llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
        tools = list(self._tool_registry.get_tools().values())
        tool_executor = self._tool_registry.get_executor()
        self._graph = self._build_graph()
@@ -257,9 +231,9 @@ class MyAgent:
                                                checkpoint_max_age_days=7, async_checkpoint=True),
        )

-    async def start(self, mock_mode=False):
+    async def start(self):
        if self._agent_runtime is None:
-            self._setup(mock_mode=mock_mode)
+            self._setup()
        if not self._agent_runtime.is_running:
            await self._agent_runtime.start()

@@ -274,8 +248,8 @@ class MyAgent:
        return await self._agent_runtime.trigger_and_wait(
            entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)

-    async def run(self, context, mock_mode=False, session_state=None):
-        await self.start(mock_mode=mock_mode)
+    async def run(self, context, session_state=None):
+        await self.start()
        try:
            result = await self.trigger_and_wait("default", context, session_state=session_state)
            return result or ExecutionResult(success=False, error="Execution timeout")
@@ -471,19 +445,17 @@ def cli():

@cli.command()
@click.option("--topic", "-t", required=True)
-@click.option("--mock", is_flag=True)
@click.option("--verbose", "-v", is_flag=True)
-def run(topic, mock, verbose):
+def run(topic, verbose):
    """Execute the agent."""
    setup_logging(verbose=verbose)
-    result = asyncio.run(default_agent.run({"topic": topic}, mock_mode=mock))
+    result = asyncio.run(default_agent.run({"topic": topic}))
    click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
    sys.exit(0 if result.success else 1)


@cli.command()
-@click.option("--mock", is_flag=True)
-def tui(mock):
+def tui():
    """Launch TUI dashboard."""
    from pathlib import Path
    from framework.tui.app import AdenTUI
@@ -499,10 +471,10 @@ def tui(mock):
        storage.mkdir(parents=True, exist_ok=True)
        mcp_cfg = Path(__file__).parent / "mcp_servers.json"
        if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
-        llm = None if mock else LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
+        llm = LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
        runtime = create_agent_runtime(
            graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
-            entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")],
+            entry_points=[EntryPointSpec(id="start", name="Start", entry_node="process", trigger_type="manual", isolation_level="isolated")],
            llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
        await runtime.start()
        try:
@@ -564,7 +536,6 @@ import sys
 from pathlib import Path

 import pytest
-import pytest_asyncio

 _repo_root = Path(__file__).resolve().parents[3]
 for _p in ["exports", "core"]:
@@ -576,18 +547,17 @@ AGENT_PATH = str(Path(__file__).resolve().parents[1])


@pytest.fixture(scope="session")
-def mock_mode():
-    return True
+def agent_module():
+    """Import the agent package for structural validation."""
+    import importlib
+    return importlib.import_module(Path(AGENT_PATH).name)


-@pytest_asyncio.fixture(scope="session")
-async def runner(tmp_path_factory, mock_mode):
+@pytest.fixture(scope="session")
+def runner_loaded():
+    """Load the agent through AgentRunner (structural only, no LLM needed)."""
    from framework.runner.runner import AgentRunner
-    storage = tmp_path_factory.mktemp("agent_storage")
-    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
-    r._setup()
-    yield r
-    await r.cleanup_async()
+    return AgentRunner.load(AGENT_PATH)
 ```

 ## entry_points Format
@@ -72,7 +72,7 @@ goal = Goal(
 | id | str | required | kebab-case identifier |
 | name | str | required | Display name |
 | description | str | required | What the node does |
-| node_type | str | required | Always `"event_loop"` |
+| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
 | input_keys | list[str] | required | Memory keys this node reads |
 | output_keys | list[str] | required | Memory keys this node writes via set_output |
 | system_prompt | str | "" | LLM instructions |
@@ -131,13 +131,19 @@ downstream node only sees the serialized summary string.
 - A "report" node that presents analysis → merge into the client-facing node
 - A "confirm" or "schedule" node that doesn't call any external service → remove

-**Typical agent structure (3 nodes):**
+**Typical agent structure (2 nodes):**
 ```
-intake (client-facing) ←→ process (autonomous) ←→ review (client-facing)
+process (autonomous) ←→ review (client-facing)
 ```
-Or for simpler agents, just 2 nodes:
+The queen owns intake — she gathers requirements from the user, then
+passes structured input via `run_agent_with_input(task)`. When building
+the agent, design the entry node's `input_keys` to match what the queen
+will provide at run time. Worker agents should NOT have a client-facing
+intake node. Client-facing nodes are for mid-execution review/approval only.
+
+For simpler agents, just 1 autonomous node:
 ```
-interact (client-facing) → process (autonomous) → interact (loop)
+process (autonomous) — loops back to itself
 ```

 ### nullable_output_keys
@@ -397,7 +403,7 @@ from .agent import (
 ### Reference Agent

 See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
- Primary client-facing intake node (user configures rules)
+- Primary client-facing node (user configures rules)
 - Timer-based scheduled inbox checks (every 20 min)
 - Webhook-triggered email event handling
 - Shared isolation for memory access across streams
@@ -413,16 +419,18 @@ See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
 ## Tool Discovery

 Do NOT rely on a static tool list — it will be outdated. Always use
-`discover_mcp_tools()` to get the current tool catalog from the
-hive-tools MCP server. This returns full schemas including parameter
-names, types, and descriptions.
+`list_agent_tools()` to discover available tools, grouped by category.

 ```
-discover_mcp_tools()                          # default: hive-tools
-discover_mcp_tools("exports/my_agent/mcp_servers.json")  # specific agent
+list_agent_tools()                            # names + descriptions, all groups
+list_agent_tools(output_schema="full")        # include input_schema
+list_agent_tools(group="gmail")               # only gmail_* tools
+list_agent_tools("exports/my_agent/mcp_servers.json")  # specific agent's tools
 ```

-Common tool categories (verify via discover_mcp_tools):
+After building, validate tools exist: `validate_agent_tools("exports/{name}")`
+
+Common tool categories (verify via list_agent_tools):
 - **Web**: search, scrape, PDF
 - **Data**: save/load/append/list data files, serve to user
 - **File**: view, write, replace, diff, list, grep
@@ -0,0 +1,119 @@
+# GCU Browser Automation Guide
+
+## When to Use GCU Nodes
+
+Use `node_type="gcu"` when:
+- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
+- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
+- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
+
+Do NOT use GCU for:
+- Static content that `web_scrape` handles fine
+- API-accessible data (use the API directly)
+- PDF/file processing
+- Anything that doesn't require a browser UI
+
+## What GCU Nodes Are
+
+- `node_type="gcu"` — a declarative enhancement over `event_loop`
+- Framework auto-prepends browser best-practices system prompt
+- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
+- Same underlying `EventLoopNode` class — no new imports needed
+- `tools=[]` is correct — tools are auto-populated at runtime
+
+## GCU Architecture Pattern  
+
+GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+
+- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
+- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
+- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
+- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
+
+## GCU Node Definition Template
+
+```python
+gcu_browser_node = NodeSpec(
+    id="gcu-browser-worker",
+    name="Browser Worker",
+    description="Browser subagent that does X.",
+    node_type="gcu",
+    client_facing=False,
+    max_node_visits=1,
+    input_keys=[],
+    output_keys=["result"],
+    tools=[],  # Auto-populated with all browser tools
+    system_prompt="""\
+You are a browser agent. Your job: [specific task].
+
+## Workflow
+1. browser_start (only if no browser is running yet)
+2. browser_open(url=TARGET_URL) — note the returned targetId
+3. browser_snapshot to read the page
+4. [task-specific steps]
+5. set_output("result", JSON)
+
+## Output format
+set_output("result", JSON) with:
+- [field]: [type and description]
+""",
+)
+```
+
+## Parent Node Template (orchestrating GCU subagents)
+
+```python
+orchestrator_node = NodeSpec(
+    id="orchestrator",
+    ...
+    node_type="event_loop",
+    sub_agents=["gcu-browser-worker"],
+    system_prompt="""\
+...
+delegate_to_sub_agent(
+    agent_id="gcu-browser-worker",
+    task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
+)
+...
+""",
+    tools=[],  # Orchestrator doesn't need browser tools
+)
+```
+
+## mcp_servers.json with GCU
+
+```json
+{
+  "hive-tools": { ... },
+  "gcu-tools": {
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
+    "cwd": "../../tools",
+    "description": "GCU tools for browser automation"
+  }
+}
+```
+
+Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
+
+## GCU System Prompt Best Practices
+
+Key rules to bake into GCU node prompts:
+
+- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
+- Always `browser_wait` after navigation
+- Use large scroll amounts (~2000-5000) for lazy-loaded content
+- For spillover files, use `run_command` with grep, not `read_file`
+- If auth wall detected, report immediately — don't attempt login
+- Keep tool calls per turn ≤10
+- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
+
+## GCU Anti-Patterns
+
+- Using `browser_screenshot` to read text (use `browser_snapshot`)
+- Re-navigating after scrolling (resets scroll position)
+- Attempting login on auth walls
+- Forgetting `target_id` in multi-tab scenarios
+- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
+- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
@@ -660,7 +660,7 @@ class GraphBuilder:
        # Generate Python code
        code = self._generate_code(graph)

-        Path(path).write_text(code)
+        Path(path).write_text(code, encoding="utf-8")
        self.session.phase = BuildPhase.EXPORTED
        self._save_session()

@@ -754,14 +754,14 @@ class GraphBuilder:
        """Save session to disk."""
        self.session.updated_at = datetime.now()
        path = self.storage_path / f"{self.session.id}.json"
-        path.write_text(self.session.model_dump_json(indent=2))
+        path.write_text(self.session.model_dump_json(indent=2), encoding="utf-8")

    def _load_session(self, session_id: str) -> BuildSession:
        """Load session from disk."""
        path = self.storage_path / f"{session_id}.json"
        if not path.exists():
            raise FileNotFoundError(f"Session not found: {session_id}")
-        return BuildSession.model_validate_json(path.read_text())
+        return BuildSession.model_validate_json(path.read_text(encoding="utf-8"))

    @classmethod
    def list_sessions(cls, storage_path: Path | str | None = None) -> list[str]:
@@ -50,12 +50,14 @@ def get_max_tokens() -> int:


 def get_api_key() -> str | None:
-    """Return the API key, supporting env var, Claude Code subscription, and ZAI Code.
+    """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.

    Priority:
    1. Claude Code subscription (``use_claude_code_subscription: true``)
       reads the OAuth token from ``~/.claude/.credentials.json``.
-    2. Environment variable named in ``api_key_env_var``.
+    2. Codex subscription (``use_codex_subscription: true``)
+       reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
+    3. Environment variable named in ``api_key_env_var``.
    """
    llm = get_hive_config().get("llm", {})

@@ -70,6 +72,17 @@ def get_api_key() -> str | None:
        except ImportError:
            pass

+    # Codex subscription: read OAuth token from Keychain / auth.json
+    if llm.get("use_codex_subscription"):
+        try:
+            from framework.runner.runner import get_codex_token
+
+            token = get_codex_token()
+            if token:
+                return token
+        except ImportError:
+            pass
+
    # Standard env-var path (covers ZAI Code and all API-key providers)
    api_key_env_var = llm.get("api_key_env_var")
    if api_key_env_var:
@@ -77,9 +90,18 @@ def get_api_key() -> str | None:
    return None


+def get_gcu_enabled() -> bool:
+    """Return whether GCU (browser automation) is enabled in user config."""
+    return get_hive_config().get("gcu_enabled", True)
+
+
 def get_api_base() -> str | None:
    """Return the api_base URL for OpenAI-compatible endpoints, if configured."""
-    return get_hive_config().get("llm", {}).get("api_base")
+    llm = get_hive_config().get("llm", {})
+    if llm.get("use_codex_subscription"):
+        # Codex subscription routes through the ChatGPT backend, not api.openai.com.
+        return "https://chatgpt.com/backend-api/codex"
+    return llm.get("api_base")


 def get_llm_extra_kwargs() -> dict[str, Any]:
@@ -88,6 +110,10 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
    When ``use_claude_code_subscription`` is enabled, returns
    ``extra_headers`` with the OAuth Bearer token so that litellm's
    built-in Anthropic OAuth handler adds the required beta headers.
+
+    When ``use_codex_subscription`` is enabled, returns
+    ``extra_headers`` with the Bearer token, ``ChatGPT-Account-Id``,
+    and ``store=False`` (required by the ChatGPT backend).
    """
    llm = get_hive_config().get("llm", {})
    if llm.get("use_claude_code_subscription"):
@@ -96,6 +122,26 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
            return {
                "extra_headers": {"authorization": f"Bearer {api_key}"},
            }
+    if llm.get("use_codex_subscription"):
+        api_key = get_api_key()
+        if api_key:
+            headers: dict[str, str] = {
+                "Authorization": f"Bearer {api_key}",
+                "User-Agent": "CodexBar",
+            }
+            try:
+                from framework.runner.runner import get_codex_account_id
+
+                account_id = get_codex_account_id()
+                if account_id:
+                    headers["ChatGPT-Account-Id"] = account_id
+            except ImportError:
+                pass
+            return {
+                "extra_headers": headers,
+                "store": False,
+                "allowed_openai_params": ["store"],
+            }
    return {}


@@ -42,6 +42,14 @@ For Vault integration:
    from core.framework.credentials.vault import HashiCorpVaultStorage
 """

+from .key_storage import (
+    delete_aden_api_key,
+    generate_and_save_credential_key,
+    load_aden_api_key,
+    load_credential_key,
+    save_aden_api_key,
+    save_credential_key,
+)
 from .models import (
    CredentialDecryptionError,
    CredentialError,
@@ -63,7 +71,7 @@ from .setup import (
    CredentialSetupSession,
    MissingCredential,
    SetupResult,
-    detect_missing_credentials_from_nodes,
+    load_agent_nodes,
    run_credential_setup_cli,
 )
 from .storage import (
@@ -75,7 +83,12 @@ from .storage import (
 )
 from .store import CredentialStore
 from .template import TemplateResolver
-from .validation import ensure_credential_key_env, validate_agent_credentials
+from .validation import (
+    CredentialStatus,
+    CredentialValidationResult,
+    ensure_credential_key_env,
+    validate_agent_credentials,
+)

 # Aden sync components (lazy import to avoid httpx dependency when not needed)
 # Usage: from core.framework.credentials.aden import AdenSyncProvider
@@ -127,14 +140,23 @@ __all__ = [
    "CredentialRefreshError",
    "CredentialValidationError",
    "CredentialDecryptionError",
+    # Key storage (bootstrap credentials)
+    "load_credential_key",
+    "save_credential_key",
+    "generate_and_save_credential_key",
+    "load_aden_api_key",
+    "save_aden_api_key",
+    "delete_aden_api_key",
    # Validation
    "ensure_credential_key_env",
    "validate_agent_credentials",
+    "CredentialStatus",
+    "CredentialValidationResult",
    # Interactive setup
    "CredentialSetupSession",
    "MissingCredential",
    "SetupResult",
-    "detect_missing_credentials_from_nodes",
+    "load_agent_nodes",
    "run_credential_setup_cli",
    # Aden sync (optional - requires httpx)
    "AdenSyncProvider",
@@ -26,7 +26,7 @@ Usage:
    storage = AdenCachedStorage(
        local_storage=EncryptedFileStorage(),
        aden_provider=provider,
-        cache_ttl_seconds=300,  # Re-check Aden every 5 minutes
+        cache_ttl_seconds=600,  # Re-check Aden every 5 minutes
    )

    # Create store
@@ -77,7 +77,7 @@ class AdenCachedStorage(CredentialStorage):
        storage = AdenCachedStorage(
            local_storage=EncryptedFileStorage(),
            aden_provider=provider,
-            cache_ttl_seconds=300,  # 5 minutes
+            cache_ttl_seconds=00,  # 5 minutes
        )

        store = CredentialStore(
@@ -193,23 +193,24 @@ class AdenCachedStorage(CredentialStorage):
            logger.debug(f"Using cached credential '{credential_id}'")
            return local_cred

-        # Try to fetch from Aden
+        # If nothing local, there's nothing to refresh from Aden.
+        # sync_all() already fetched all available credentials — anything
+        # not in local storage doesn't exist on the Aden server.
+        if local_cred is None:
+            return None
+
+        # Try to refresh stale local credential from Aden
        try:
            aden_cred = self._aden_provider.fetch_from_aden(credential_id)
            if aden_cred:
-                # Update local cache
                self.save(aden_cred)
                logger.debug(f"Fetched credential '{credential_id}' from Aden")
                return aden_cred
        except Exception as e:
            logger.warning(f"Failed to fetch '{credential_id}' from Aden: {e}")
+            logger.info(f"Using stale cached credential '{credential_id}'")
+            return local_cred

-            # Fall back to local cache if Aden fails
-            if local_cred:
-                logger.info(f"Using stale cached credential '{credential_id}'")
-                return local_cred
-
-        # Return local credential if it exists (may be None)
        return local_cred

    def load_all_for_provider(self, provider_name: str) -> list[CredentialObject]:
@@ -0,0 +1,201 @@
+"""
+Dedicated file-based storage for bootstrap credentials.
+
+HIVE_CREDENTIAL_KEY -> ~/.hive/secrets/credential_key  (plain text, chmod 600)
+ADEN_API_KEY        -> ~/.hive/credentials/             (encrypted via EncryptedFileStorage)
+
+Boot order:
+  1. load_credential_key()   -- reads/generates the Fernet key, sets os.environ
+  2. load_aden_api_key()     -- uses the encrypted store (which needs the key from step 1)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import stat
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+CREDENTIAL_KEY_PATH = Path.home() / ".hive" / "secrets" / "credential_key"
+CREDENTIAL_KEY_ENV_VAR = "HIVE_CREDENTIAL_KEY"
+ADEN_CREDENTIAL_ID = "aden_api_key"
+ADEN_ENV_VAR = "ADEN_API_KEY"
+
+
+# ---------------------------------------------------------------------------
+# HIVE_CREDENTIAL_KEY
+# ---------------------------------------------------------------------------
+
+
+def load_credential_key() -> str | None:
+    """Load HIVE_CREDENTIAL_KEY with priority: env > file > shell config.
+
+    Sets ``os.environ["HIVE_CREDENTIAL_KEY"]`` as a side-effect when found.
+    Returns the key string, or ``None`` if unavailable everywhere.
+    """
+    # 1. Already in environment (set by parent process, CI, Windows Registry, etc.)
+    key = os.environ.get(CREDENTIAL_KEY_ENV_VAR)
+    if key:
+        return key
+
+    # 2. Dedicated secrets file
+    key = _read_credential_key_file()
+    if key:
+        os.environ[CREDENTIAL_KEY_ENV_VAR] = key
+        return key
+
+    # 3. Shell config fallback (backward compat for old installs)
+    key = _read_from_shell_config(CREDENTIAL_KEY_ENV_VAR)
+    if key:
+        os.environ[CREDENTIAL_KEY_ENV_VAR] = key
+        return key
+
+    return None
+
+
+def save_credential_key(key: str) -> Path:
+    """Save HIVE_CREDENTIAL_KEY to ``~/.hive/secrets/credential_key``.
+
+    Creates parent dirs with mode 700, writes the file with mode 600.
+    Also sets ``os.environ["HIVE_CREDENTIAL_KEY"]``.
+
+    Returns:
+        The path that was written.
+    """
+    path = CREDENTIAL_KEY_PATH
+    path.parent.mkdir(parents=True, exist_ok=True)
+    # Restrict the secrets directory itself
+    path.parent.chmod(stat.S_IRWXU)  # 0o700
+
+    path.write_text(key, encoding="utf-8")
+    path.chmod(stat.S_IRUSR | stat.S_IWUSR)  # 0o600
+
+    os.environ[CREDENTIAL_KEY_ENV_VAR] = key
+    return path
+
+
+def generate_and_save_credential_key() -> str:
+    """Generate a new Fernet key and persist it to ``~/.hive/secrets/credential_key``.
+
+    Returns:
+        The generated key string.
+    """
+    from cryptography.fernet import Fernet
+
+    key = Fernet.generate_key().decode()
+    save_credential_key(key)
+    return key
+
+
+# ---------------------------------------------------------------------------
+# ADEN_API_KEY
+# ---------------------------------------------------------------------------
+
+
+def load_aden_api_key() -> str | None:
+    """Load ADEN_API_KEY with priority: env > encrypted store > shell config.
+
+    **Must** be called after ``load_credential_key()`` because the encrypted
+    store depends on HIVE_CREDENTIAL_KEY.
+
+    Sets ``os.environ["ADEN_API_KEY"]`` as a side-effect when found.
+    Returns the key string, or ``None`` if unavailable everywhere.
+    """
+    # 1. Already in environment
+    key = os.environ.get(ADEN_ENV_VAR)
+    if key:
+        return key
+
+    # 2. Encrypted credential store
+    key = _read_aden_from_encrypted_store()
+    if key:
+        os.environ[ADEN_ENV_VAR] = key
+        return key
+
+    # 3. Shell config fallback (backward compat)
+    key = _read_from_shell_config(ADEN_ENV_VAR)
+    if key:
+        os.environ[ADEN_ENV_VAR] = key
+        return key
+
+    return None
+
+
+def save_aden_api_key(key: str) -> None:
+    """Save ADEN_API_KEY to the encrypted credential store.
+
+    Also sets ``os.environ["ADEN_API_KEY"]``.
+    """
+    from pydantic import SecretStr
+
+    from .models import CredentialKey, CredentialObject
+    from .storage import EncryptedFileStorage
+
+    storage = EncryptedFileStorage()
+    cred = CredentialObject(
+        id=ADEN_CREDENTIAL_ID,
+        keys={"api_key": CredentialKey(name="api_key", value=SecretStr(key))},
+    )
+    storage.save(cred)
+    os.environ[ADEN_ENV_VAR] = key
+
+
+def delete_aden_api_key() -> None:
+    """Remove ADEN_API_KEY from the encrypted store and ``os.environ``."""
+    try:
+        from .storage import EncryptedFileStorage
+
+        storage = EncryptedFileStorage()
+        storage.delete(ADEN_CREDENTIAL_ID)
+    except Exception:
+        logger.debug("Could not delete %s from encrypted store", ADEN_CREDENTIAL_ID)
+
+    os.environ.pop(ADEN_ENV_VAR, None)
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _read_credential_key_file() -> str | None:
+    """Read the credential key from ``~/.hive/secrets/credential_key``."""
+    try:
+        if CREDENTIAL_KEY_PATH.is_file():
+            value = CREDENTIAL_KEY_PATH.read_text(encoding="utf-8").strip()
+            if value:
+                return value
+    except Exception:
+        logger.debug("Could not read %s", CREDENTIAL_KEY_PATH)
+    return None
+
+
+def _read_from_shell_config(env_var: str) -> str | None:
+    """Fallback: read an env var from ~/.zshrc or ~/.bashrc."""
+    try:
+        from aden_tools.credentials.shell_config import check_env_var_in_shell_config
+
+        found, value = check_env_var_in_shell_config(env_var)
+        if found and value:
+            return value
+    except ImportError:
+        pass
+    return None
+
+
+def _read_aden_from_encrypted_store() -> str | None:
+    """Try to load ADEN_API_KEY from the encrypted credential store."""
+    if not os.environ.get(CREDENTIAL_KEY_ENV_VAR):
+        return None
+    try:
+        from .storage import EncryptedFileStorage
+
+        storage = EncryptedFileStorage()
+        cred = storage.load(ADEN_CREDENTIAL_ID)
+        if cred:
+            return cred.get_key("api_key")
+    except Exception:
+        logger.debug("Could not load %s from encrypted store", ADEN_CREDENTIAL_ID)
+    return None
@@ -73,6 +73,7 @@ from .provider import (
    TokenExpiredError,
    TokenPlacement,
 )
+from .zoho_provider import ZohoOAuth2Provider

 __all__ = [
    # Types
@@ -82,6 +83,7 @@ __all__ = [
    # Providers
    "BaseOAuth2Provider",
    "HubSpotOAuth2Provider",
+    "ZohoOAuth2Provider",
    # Lifecycle
    "TokenLifecycleManager",
    "TokenRefreshResult",
@@ -0,0 +1,198 @@
+"""
+Zoho CRM-specific OAuth2 provider.
+
+Pre-configured for Zoho's OAuth2 endpoints and CRM scopes.
+Extends BaseOAuth2Provider for Zoho-specific behavior.
+
+Usage:
+    provider = ZohoOAuth2Provider(
+        client_id="your-client-id",
+        client_secret="your-client-secret",
+        accounts_domain="https://accounts.zoho.com",  # or .in, .eu, etc.
+    )
+
+    # Use with credential store
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),
+        providers=[provider],
+    )
+
+See: https://www.zoho.com/crm/developer/docs/api/v2/access-refresh.html
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any
+
+from ..models import CredentialObject, CredentialRefreshError, CredentialType
+from .base_provider import BaseOAuth2Provider
+from .provider import OAuth2Config, OAuth2Token, TokenPlacement
+
+logger = logging.getLogger(__name__)
+
+# Default CRM scopes for Phase 1 (Leads, Contacts, Accounts, Deals, Notes)
+ZOHO_DEFAULT_SCOPES = [
+    "ZohoCRM.modules.leads.ALL",
+    "ZohoCRM.modules.contacts.ALL",
+    "ZohoCRM.modules.accounts.ALL",
+    "ZohoCRM.modules.deals.ALL",
+    "ZohoCRM.modules.notes.CREATE",
+]
+
+
+class ZohoOAuth2Provider(BaseOAuth2Provider):
+    """
+    Zoho CRM OAuth2 provider with pre-configured endpoints.
+
+    Handles Zoho-specific OAuth2 behavior:
+    - Pre-configured token and authorization URLs (region-aware)
+    - Default CRM scopes for Leads, Contacts, Accounts, Deals, Notes
+    - Token validation via Zoho CRM API
+    - Authorization header format: "Authorization: Zoho-oauthtoken {token}"
+
+    Example:
+        provider = ZohoOAuth2Provider(
+            client_id="your-zoho-client-id",
+            client_secret="your-zoho-client-secret",
+            accounts_domain="https://accounts.zoho.com",  # US
+            # or "https://accounts.zoho.in" for India
+            # or "https://accounts.zoho.eu" for EU
+        )
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        accounts_domain: str = "https://accounts.zoho.com",
+        api_domain: str | None = None,
+        scopes: list[str] | None = None,
+    ):
+        """
+        Initialize Zoho OAuth2 provider.
+
+        Args:
+            client_id: Zoho OAuth2 client ID
+            client_secret: Zoho OAuth2 client secret
+            accounts_domain: Zoho accounts domain (region-specific)
+                - US: https://accounts.zoho.com
+                - India: https://accounts.zoho.in
+                - EU: https://accounts.zoho.eu
+                - etc.
+            api_domain: Zoho API domain for CRM calls (used in validate).
+                Defaults to ZOHO_API_DOMAIN env or https://www.zohoapis.com
+            scopes: Override default scopes if needed
+        """
+        base = accounts_domain.rstrip("/")
+        token_url = f"{base}/oauth/v2/token"
+        auth_url = f"{base}/oauth/v2/auth"
+
+        config = OAuth2Config(
+            token_url=token_url,
+            authorization_url=auth_url,
+            client_id=client_id,
+            client_secret=client_secret,
+            default_scopes=scopes or ZOHO_DEFAULT_SCOPES,
+            token_placement=TokenPlacement.HEADER_CUSTOM,
+            custom_header_name="Authorization",
+        )
+        super().__init__(config, provider_id="zoho_crm_oauth2")
+        self._accounts_domain = base
+        self._api_domain = (
+            api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")
+        ).rstrip("/")
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.OAUTH2]
+
+    def format_for_request(self, token: OAuth2Token) -> dict[str, Any]:
+        """
+        Format token for Zoho CRM API requests.
+
+        Zoho uses Authorization header: "Zoho-oauthtoken {access_token}"
+        (not Bearer).
+        """
+        return {
+            "headers": {
+                "Authorization": f"Zoho-oauthtoken {token.access_token}",
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+            }
+        }
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate Zoho credential by making a lightweight API call.
+
+        Uses GET /crm/v2/users?type=CurrentUser (doesn't require module access).
+        Treats 429 as valid-but-rate-limited.
+        """
+        access_token = credential.get_key("access_token")
+        if not access_token:
+            return False
+
+        try:
+            client = self._get_client()
+            response = client.get(
+                f"{self._api_domain}/crm/v2/users?type=CurrentUser",
+                headers={
+                    "Authorization": f"Zoho-oauthtoken {access_token}",
+                    "Accept": "application/json",
+                },
+                timeout=self.config.request_timeout,
+            )
+            return response.status_code in (200, 429)
+        except Exception as e:
+            logger.debug("Zoho credential validation failed: %s", e)
+            return False
+
+    def _parse_token_response(self, response_data: dict[str, Any]) -> OAuth2Token:
+        """
+        Parse Zoho token response.
+
+        Zoho returns:
+        {
+            "access_token": "...",
+            "refresh_token": "...",
+            "expires_in": 3600,
+            "api_domain": "https://www.zohoapis.com",
+            "token_type": "Bearer"
+        }
+        """
+        token = OAuth2Token.from_token_response(response_data)
+        if "api_domain" in response_data:
+            token.raw_response["api_domain"] = response_data["api_domain"]
+        return token
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """Refresh Zoho OAuth2 credential and persist DC metadata."""
+        refresh_tok = credential.get_key("refresh_token")
+        if not refresh_tok:
+            raise CredentialRefreshError(f"Credential '{credential.id}' has no refresh_token")
+
+        try:
+            new_token = self.refresh_access_token(refresh_tok)
+        except Exception as e:
+            raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
+
+        credential.set_key("access_token", new_token.access_token, expires_at=new_token.expires_at)
+
+        if new_token.refresh_token and new_token.refresh_token != refresh_tok:
+            credential.set_key("refresh_token", new_token.refresh_token)
+
+        api_domain = new_token.raw_response.get("api_domain")
+        if isinstance(api_domain, str) and api_domain:
+            credential.set_key("api_domain", api_domain.rstrip("/"))
+
+        accounts_server = new_token.raw_response.get("accounts-server")
+        if isinstance(accounts_server, str) and accounts_server:
+            credential.set_key("accounts_domain", accounts_server.rstrip("/"))
+
+        location = new_token.raw_response.get("location")
+        if isinstance(location, str) and location:
+            credential.set_key("location", location.strip().lower())
+
+        return credential
@@ -160,27 +160,35 @@ class CredentialSetupSession:
    @classmethod
    def from_nodes(cls, nodes: list[NodeSpec]) -> CredentialSetupSession:
        """Create a setup session by detecting missing credentials from nodes."""
-        missing = detect_missing_credentials_from_nodes(nodes)
+        from framework.credentials.validation import _status_to_missing, validate_agent_credentials
+
+        result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
+        missing = [_status_to_missing(c) for c in result.credentials if not c.available]
        return cls(missing)

    @classmethod
-    def from_agent_path(cls, agent_path: str | Path) -> CredentialSetupSession:
-        """Create a setup session for an agent by path."""
-        agent_path = Path(agent_path)
+    def from_agent_path(
+        cls,
+        agent_path: str | Path,
+        *,
+        missing_only: bool = True,
+    ) -> CredentialSetupSession:
+        """Create a setup session for an agent by path.

-        # Load agent to get nodes
-        agent_json = agent_path / "agent.json"
-        agent_py = agent_path / "agent.py"
+        Args:
+            agent_path: Path to agent folder.
+            missing_only: If True (default), only include credentials that
+                are NOT yet available. If False, include all required
+                credentials regardless of availability.
+        """
+        from framework.credentials.validation import _status_to_missing, validate_agent_credentials

-        nodes = []
-        if agent_py.exists():
-            # Python-based agent
-            nodes = _load_nodes_from_python_agent(agent_path)
-        elif agent_json.exists():
-            # JSON-based agent
-            nodes = _load_nodes_from_json_agent(agent_json)
-
-        missing = detect_missing_credentials_from_nodes(nodes)
+        nodes = load_agent_nodes(agent_path)
+        result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
+        if missing_only:
+            missing = [_status_to_missing(c) for c in result.credentials if not c.available]
+        else:
+            missing = [_status_to_missing(c) for c in result.credentials]
        return cls(missing)

    def run_interactive(self) -> SetupResult:
@@ -248,57 +256,23 @@ class CredentialSetupSession:

    def _ensure_credential_key(self) -> bool:
        """Ensure HIVE_CREDENTIAL_KEY is available for encrypted storage."""
-        if os.environ.get("HIVE_CREDENTIAL_KEY"):
+        from .key_storage import generate_and_save_credential_key, load_credential_key
+
+        if load_credential_key():
            return True

-        # Try to load from shell config
-        try:
-            from aden_tools.credentials.shell_config import check_env_var_in_shell_config
-
-            found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
-            if found and value:
-                os.environ["HIVE_CREDENTIAL_KEY"] = value
-                return True
-        except ImportError:
-            pass
-
        # Generate a new key
        self._print(f"{Colors.YELLOW}Initializing credential store...{Colors.NC}")
        try:
-            from cryptography.fernet import Fernet
-
-            generated_key = Fernet.generate_key().decode()
-            os.environ["HIVE_CREDENTIAL_KEY"] = generated_key
-
-            # Save to shell config
-            self._save_key_to_shell_config(generated_key)
+            generate_and_save_credential_key()
+            self._print(
+                f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}"
+            )
            return True
        except Exception as e:
            self._print(f"{Colors.RED}Failed to initialize credential store: {e}{Colors.NC}")
            return False

-    def _save_key_to_shell_config(self, key: str) -> None:
-        """Save HIVE_CREDENTIAL_KEY to shell config."""
-        try:
-            from aden_tools.credentials.shell_config import (
-                add_env_var_to_shell_config,
-            )
-
-            success, config_path = add_env_var_to_shell_config(
-                "HIVE_CREDENTIAL_KEY",
-                key,
-                comment="Encryption key for Hive credential store",
-            )
-            if success:
-                self._print(f"{Colors.GREEN}✓ Encryption key saved to {config_path}{Colors.NC}")
-        except Exception:
-            # Fallback: just tell the user
-            self._print("\n")
-            self._print(
-                f"{Colors.YELLOW}Add this to your shell config (~/.zshrc or ~/.bashrc):{Colors.NC}"
-            )
-            self._print(f'  export HIVE_CREDENTIAL_KEY="{key}"')
-
    def _setup_single_credential(self, cred: MissingCredential) -> bool:
        """Set up a single credential. Returns True if configured."""
        self._print(f"\n{Colors.CYAN}{'─' * 60}{Colors.NC}")
@@ -436,19 +410,10 @@ class CredentialSetupSession:
                self._print(f"{Colors.YELLOW}No key entered. Skipping.{Colors.NC}")
                return False

-            os.environ["ADEN_API_KEY"] = aden_key
+            # Persist to encrypted store and set os.environ
+            from .key_storage import save_aden_api_key

-            # Save to shell config
-            try:
-                from aden_tools.credentials.shell_config import add_env_var_to_shell_config
-
-                add_env_var_to_shell_config(
-                    "ADEN_API_KEY",
-                    aden_key,
-                    comment="Aden Platform API key",
-                )
-            except Exception:
-                pass
+            save_aden_api_key(aden_key)

        # Sync from Aden
        try:
@@ -552,115 +517,24 @@ class CredentialSetupSession:
        self._print("")


-def detect_missing_credentials_from_nodes(nodes: list) -> list[MissingCredential]:
-    """
-    Detect missing credentials for a list of nodes.
+def load_agent_nodes(agent_path: str | Path) -> list:
+    """Load NodeSpec list from an agent's agent.py or agent.json.

    Args:
-        nodes: List of NodeSpec objects
+        agent_path: Path to agent directory.

    Returns:
-        List of MissingCredential objects for credentials that need setup
+        List of NodeSpec objects (empty list if agent can't be loaded).
    """
-    try:
-        from aden_tools.credentials import CREDENTIAL_SPECS
+    agent_path = Path(agent_path)
+    agent_py = agent_path / "agent.py"
+    agent_json = agent_path / "agent.json"

-        from framework.credentials import CredentialStore
-        from framework.credentials.storage import (
-            CompositeStorage,
-            EncryptedFileStorage,
-            EnvVarStorage,
-        )
-    except ImportError:
-        return []
-
-    # Collect required tools and node types
-    required_tools: set[str] = set()
-    node_types: set[str] = set()
-
-    for node in nodes:
-        if hasattr(node, "tools") and node.tools:
-            required_tools.update(node.tools)
-        if hasattr(node, "node_type"):
-            node_types.add(node.node_type)
-
-    # Build credential store to check availability.
-    # Env vars take priority over encrypted store (fresh key wins over stale).
-    env_mapping = {
-        (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
-    }
-    env_storage = EnvVarStorage(env_mapping=env_mapping)
-    if os.environ.get("HIVE_CREDENTIAL_KEY"):
-        storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()])
-    else:
-        storage = env_storage
-    store = CredentialStore(storage=storage)
-
-    # Build reverse mappings
-    tool_to_cred: dict[str, str] = {}
-    node_type_to_cred: dict[str, str] = {}
-    for cred_name, spec in CREDENTIAL_SPECS.items():
-        for tool_name in spec.tools:
-            tool_to_cred[tool_name] = cred_name
-        for nt in spec.node_types:
-            node_type_to_cred[nt] = cred_name
-
-    missing: list[MissingCredential] = []
-    checked: set[str] = set()
-
-    # Check tool credentials
-    for tool_name in sorted(required_tools):
-        cred_name = tool_to_cred.get(tool_name)
-        if cred_name is None or cred_name in checked:
-            continue
-        checked.add(cred_name)
-
-        spec = CREDENTIAL_SPECS[cred_name]
-        cred_id = spec.credential_id or cred_name
-        if spec.required and not store.is_available(cred_id):
-            affected_tools = sorted(t for t in required_tools if t in spec.tools)
-            missing.append(
-                MissingCredential(
-                    credential_name=cred_name,
-                    env_var=spec.env_var,
-                    description=spec.description,
-                    help_url=spec.help_url,
-                    api_key_instructions=spec.api_key_instructions,
-                    tools=affected_tools,
-                    aden_supported=spec.aden_supported,
-                    direct_api_key_supported=spec.direct_api_key_supported,
-                    credential_id=spec.credential_id,
-                    credential_key=spec.credential_key,
-                )
-            )
-
-    # Check node type credentials
-    for nt in sorted(node_types):
-        cred_name = node_type_to_cred.get(nt)
-        if cred_name is None or cred_name in checked:
-            continue
-        checked.add(cred_name)
-
-        spec = CREDENTIAL_SPECS[cred_name]
-        cred_id = spec.credential_id or cred_name
-        if spec.required and not store.is_available(cred_id):
-            affected_types = sorted(t for t in node_types if t in spec.node_types)
-            missing.append(
-                MissingCredential(
-                    credential_name=cred_name,
-                    env_var=spec.env_var,
-                    description=spec.description,
-                    help_url=spec.help_url,
-                    api_key_instructions=spec.api_key_instructions,
-                    node_types=affected_types,
-                    aden_supported=spec.aden_supported,
-                    direct_api_key_supported=spec.direct_api_key_supported,
-                    credential_id=spec.credential_id,
-                    credential_key=spec.credential_key,
-                )
-            )
-
-    return missing
+    if agent_py.exists():
+        return _load_nodes_from_python_agent(agent_path)
+    elif agent_json.exists():
+        return _load_nodes_from_json_agent(agent_json)
+    return []


 def _load_nodes_from_python_agent(agent_path: Path) -> list:
@@ -694,7 +568,7 @@ def _load_nodes_from_python_agent(agent_path: Path) -> list:
 def _load_nodes_from_json_agent(agent_json: Path) -> list:
    """Load nodes from a JSON-based agent."""
    try:
-        with open(agent_json) as f:
+        with open(agent_json, encoding="utf-8") as f:
            data = json.load(f)

        from framework.graph import NodeSpec
@@ -227,7 +227,7 @@ class EncryptedFileStorage(CredentialStorage):
        index_path = self.base_path / "metadata" / "index.json"
        if not index_path.exists():
            return []
-        with open(index_path) as f:
+        with open(index_path, encoding="utf-8") as f:
            index = json.load(f)
        return list(index.get("credentials", {}).keys())

@@ -268,7 +268,7 @@ class EncryptedFileStorage(CredentialStorage):
        index_path = self.base_path / "metadata" / "index.json"

        if index_path.exists():
-            with open(index_path) as f:
+            with open(index_path, encoding="utf-8") as f:
                index = json.load(f)
        else:
            index = {"credentials": {}, "version": "1.0"}
@@ -283,7 +283,7 @@ class EncryptedFileStorage(CredentialStorage):

        index["last_modified"] = datetime.now(UTC).isoformat()

-        with open(index_path, "w") as f:
+        with open(index_path, "w", encoding="utf-8") as f:
            json.dump(index, f, indent=2)


@@ -396,6 +396,11 @@ class CredentialStore:
        Returns:
            CredentialObject if found, None otherwise.
        """
+        # LLMs sometimes pass "provider/alias" as the alias (e.g. "google/wrok"
+        # instead of just "wrok").  Strip the provider prefix when present.
+        if alias.startswith(f"{provider_name}/"):
+            alias = alias[len(provider_name) + 1 :]
+
        if hasattr(self._storage, "load_by_alias"):
            return self._storage.load_by_alias(provider_name, alias)

@@ -422,6 +427,10 @@ class CredentialStore:
        """
        return self.get_credential(credential_id, refresh_if_needed=False) is not None

+    def exists(self, credential_id: str) -> bool:
+        """Check if a credential exists in storage without triggering provider fetches."""
+        return self._storage.exists(credential_id)
+
    # --- Validation ---

    def validate_for_usage(self, credential_id: str) -> list[str]:
@@ -14,56 +14,165 @@ logger = logging.getLogger(__name__)


 def ensure_credential_key_env() -> None:
-    """Load credentials from shell config if not in environment.
+    """Load bootstrap credentials into ``os.environ``.

-    The quickstart.sh and setup-credentials skill write API keys to ~/.zshrc
-    or ~/.bashrc. If the user hasn't sourced their config in the current shell,
-    this reads them directly so the runner (and any MCP subprocesses) can use them.
+    Priority chain for each credential:
+      1. ``os.environ`` (already set — nothing to do)
+      2. Dedicated file storage (``~/.hive/secrets/`` or encrypted store)
+      3. Shell config fallback (``~/.zshrc`` / ``~/.bashrc``) for backward compat

-    Loads:
-    - HIVE_CREDENTIAL_KEY (encrypted credential store)
-    - ADEN_API_KEY (Aden OAuth sync)
-    - All LLM API keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, ZAI_API_KEY, etc.)
+    Boot order matters: HIVE_CREDENTIAL_KEY must load BEFORE ADEN_API_KEY
+    because the encrypted store depends on it.
+
+    Remaining LLM/tool API keys still load from shell config.
    """
+    from .key_storage import load_aden_api_key, load_credential_key
+
+    # Step 1: HIVE_CREDENTIAL_KEY (must come first — encrypted store depends on it)
+    load_credential_key()
+
+    # Step 2: ADEN_API_KEY (uses encrypted store, then shell config fallback)
+    load_aden_api_key()
+
+    # Step 3: Load remaining LLM/tool API keys from shell config
    try:
        from aden_tools.credentials.shell_config import check_env_var_in_shell_config
    except ImportError:
        return

-    # Core credentials that are always checked
-    env_vars_to_load = ["HIVE_CREDENTIAL_KEY", "ADEN_API_KEY"]
-
-    # Add all LLM/tool API keys from CREDENTIAL_SPECS
    try:
        from aden_tools.credentials import CREDENTIAL_SPECS

        for spec in CREDENTIAL_SPECS.values():
-            if spec.env_var and spec.env_var not in env_vars_to_load:
-                env_vars_to_load.append(spec.env_var)
+            var_name = spec.env_var
+            if var_name and var_name not in ("HIVE_CREDENTIAL_KEY", "ADEN_API_KEY"):
+                if not os.environ.get(var_name):
+                    found, value = check_env_var_in_shell_config(var_name)
+                    if found and value:
+                        os.environ[var_name] = value
+                        logger.debug("Loaded %s from shell config", var_name)
    except ImportError:
        pass

-    for var_name in env_vars_to_load:
-        if os.environ.get(var_name):
-            continue
-        found, value = check_env_var_in_shell_config(var_name)
-        if found and value:
-            os.environ[var_name] = value
-            logger.debug("Loaded %s from shell config", var_name)
+
+@dataclass
+class CredentialStatus:
+    """Status of a single required credential after validation."""
+
+    credential_name: str
+    credential_id: str
+    env_var: str
+    description: str
+    help_url: str
+    api_key_instructions: str
+    tools: list[str]
+    node_types: list[str]
+    available: bool
+    valid: bool | None  # None = not checked
+    validation_message: str | None
+    aden_supported: bool
+    direct_api_key_supported: bool
+    credential_key: str
+    aden_not_connected: bool  # Aden-only cred, ADEN_API_KEY set, but integration missing
+    alternative_group: str | None = None  # non-None when multiple providers can satisfy a tool


@dataclass
-class _CredentialCheck:
-    """Result of checking a single credential."""
+class CredentialValidationResult:
+    """Result of validating all credentials required by an agent."""

-    env_var: str
-    source: str
-    used_by: str
-    available: bool
-    help_url: str = ""
+    credentials: list[CredentialStatus]
+    has_aden_key: bool
+
+    @property
+    def failed(self) -> list[CredentialStatus]:
+        """Credentials that are missing, invalid, or Aden-not-connected.
+
+        For alternative groups (multi-provider tools like send_email), the group
+        is satisfied if ANY member is available and valid — only report failures
+        when the entire group is unsatisfied.
+        """
+        # Check which alternative groups are satisfied
+        alt_satisfied: dict[str, bool] = {}
+        for c in self.credentials:
+            if not c.alternative_group:
+                continue
+            if c.alternative_group not in alt_satisfied:
+                alt_satisfied[c.alternative_group] = False
+            if c.available and c.valid is not False:
+                alt_satisfied[c.alternative_group] = True
+
+        result = []
+        for c in self.credentials:
+            if c.alternative_group:
+                # Skip if any alternative in the group is satisfied
+                if alt_satisfied.get(c.alternative_group, False):
+                    continue
+                if not c.available or c.valid is False:
+                    result.append(c)
+            else:
+                if not c.available or c.valid is False:
+                    result.append(c)
+        return result
+
+    @property
+    def has_errors(self) -> bool:
+        return bool(self.failed)
+
+    @property
+    def failed_cred_names(self) -> list[str]:
+        """Credential names that need (re-)collection, excluding Aden-not-connected."""
+        return [c.credential_name for c in self.failed if not c.aden_not_connected]
+
+    def format_error_message(self) -> str:
+        """Format a human-readable error message for CLI/runner output."""
+        missing = [c for c in self.credentials if not c.available and not c.aden_not_connected]
+        invalid = [c for c in self.credentials if c.available and c.valid is False]
+        aden_nc = [c for c in self.credentials if c.aden_not_connected]
+
+        lines: list[str] = []
+        if missing:
+            lines.append("Missing credentials:\n")
+            for c in missing:
+                entry = f"  {c.env_var} for {_label(c)}"
+                if c.help_url:
+                    entry += f"\n    Get it at: {c.help_url}"
+                lines.append(entry)
+        if invalid:
+            if missing:
+                lines.append("")
+            lines.append("Invalid or expired credentials:\n")
+            for c in invalid:
+                entry = f"  {c.env_var} for {_label(c)} — {c.validation_message}"
+                if c.help_url:
+                    entry += f"\n    Get a new key at: {c.help_url}"
+                lines.append(entry)
+        if aden_nc:
+            if missing or invalid:
+                lines.append("")
+            lines.append(
+                "Aden integrations not connected "
+                "(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
+            )
+            for c in aden_nc:
+                lines.append(
+                    f"  {c.env_var} for {_label(c)}"
+                    f"\n    Connect this integration at hive.adenhq.com first."
+                )
+        lines.append("\nIf you've already set up credentials, restart your terminal to load them.")
+        return "\n".join(lines)


-def _presync_aden_tokens(credential_specs: dict) -> None:
+def _label(c: CredentialStatus) -> str:
+    """Build a human-readable label from tools/node_types."""
+    if c.tools:
+        return ", ".join(c.tools)
+    if c.node_types:
+        return ", ".join(c.node_types) + " nodes"
+    return c.credential_name
+
+
+def _presync_aden_tokens(credential_specs: dict, *, force: bool = False) -> None:
    """Sync Aden-backed OAuth tokens into env vars for validation.

    When ADEN_API_KEY is available, fetches fresh OAuth tokens from the Aden
@@ -71,6 +180,11 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
    tokens instead of stale or mis-stored values in the encrypted store.
    Only touches credentials that are ``aden_supported`` AND whose env var
    is not already set (so explicit user exports always win).
+
+    Args:
+        force: When True, overwrite env vars that are already set.  Used by
+            the credentials modal to pick up freshly reauthorized tokens
+            from Aden instead of reusing stale values from a prior sync.
    """
    from framework.credentials.store import CredentialStore

@@ -83,9 +197,14 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
    for name, spec in credential_specs.items():
        if not spec.aden_supported:
            continue
-        if os.environ.get(spec.env_var):
+        if not force and os.environ.get(spec.env_var):
            continue  # Already set — don't overwrite
        cred_id = spec.credential_id or name
+        # sync_all() already fetched everything available from Aden.
+        # Skip credentials not in the store — they aren't connected,
+        # so fetching individually would fail with "Invalid integration ID".
+        if not aden_store.exists(cred_id):
+            continue
        try:
            value = aden_store.get_key(cred_id, spec.credential_key)
            if value:
@@ -107,7 +226,13 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
            )


-def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool = True) -> None:
+def validate_agent_credentials(
+    nodes: list,
+    quiet: bool = False,
+    verify: bool = True,
+    raise_on_error: bool = True,
+    force_refresh: bool = False,
+) -> CredentialValidationResult:
    """Check that required credentials are available and valid before running an agent.

    Two-phase validation:
@@ -119,15 +244,30 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
        nodes: List of NodeSpec objects from the agent graph.
        quiet: If True, suppress the credential summary output.
        verify: If True (default), run health checks on present credentials.
+        raise_on_error: If True (default), raise CredentialError when validation
+            fails.  Set to False to get the result without raising.
+        force_refresh: If True, force re-sync of Aden OAuth tokens even when
+            env vars are already set.  Used by the credentials modal after
+            reauthorization.
+
+    Returns:
+        CredentialValidationResult with status of ALL required credentials.
    """
+    empty_result = CredentialValidationResult(credentials=[], has_aden_key=False)
+
    # Collect required tools and node types
-    required_tools = {tool for node in nodes if node.tools for tool in node.tools}
-    node_types = {node.node_type for node in nodes}
+    required_tools: set[str] = set()
+    node_types: set[str] = set()
+    for node in nodes:
+        if hasattr(node, "tools") and node.tools:
+            required_tools.update(node.tools)
+        if hasattr(node, "node_type"):
+            node_types.add(node.node_type)

    try:
        from aden_tools.credentials import CREDENTIAL_SPECS
    except ImportError:
-        return  # aden_tools not installed, skip check
+        return empty_result  # aden_tools not installed, skip check

    from framework.credentials.storage import CompositeStorage, EncryptedFileStorage, EnvVarStorage
    from framework.credentials.store import CredentialStore
@@ -140,7 +280,7 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
    # into env vars so validation sees fresh tokens instead of stale values
    # in the encrypted store (e.g., a previously mis-stored google.enc).
    if os.environ.get("ADEN_API_KEY"):
-        _presync_aden_tokens(CREDENTIAL_SPECS)
+        _presync_aden_tokens(CREDENTIAL_SPECS, force=force_refresh)

    env_mapping = {
        (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
@@ -152,57 +292,114 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
        storage = env_storage
    store = CredentialStore(storage=storage)

-    # Build reverse mappings
-    tool_to_cred: dict[str, str] = {}
+    # Build reverse mappings — 1:many for multi-provider tools (e.g. send_email → resend OR google)
+    tool_to_creds: dict[str, list[str]] = {}
    node_type_to_cred: dict[str, str] = {}
    for cred_name, spec in CREDENTIAL_SPECS.items():
        for tool_name in spec.tools:
-            tool_to_cred[tool_name] = cred_name
+            tool_to_creds.setdefault(tool_name, []).append(cred_name)
        for nt in spec.node_types:
            node_type_to_cred[nt] = cred_name

-    missing: list[str] = []
-    invalid: list[str] = []
-    # Aden-backed creds where ADEN_API_KEY is set but integration not connected
-    aden_not_connected: list[str] = []
-    failed_cred_names: list[str] = []  # all cred names that need (re-)collection
    has_aden_key = bool(os.environ.get("ADEN_API_KEY"))
    checked: set[str] = set()
+    all_credentials: list[CredentialStatus] = []
    # Credentials that are present and should be health-checked
-    to_verify: list[tuple[str, str]] = []  # (cred_name, used_by_label)
+    to_verify: list[int] = []  # indices into all_credentials

-    def _check_credential(spec, cred_name: str, label: str) -> None:
+    def _check_credential(
+        spec,
+        cred_name: str,
+        affected_tools: list[str],
+        affected_node_types: list[str],
+        alternative_group: str | None = None,
+    ) -> None:
        cred_id = spec.credential_id or cred_name
-        if not store.is_available(cred_id):
-            # If ADEN_API_KEY is set and this is an Aden-only credential,
-            # the issue is that the integration isn't connected on hive.adenhq.com,
-            # NOT that the user needs to re-enter ADEN_API_KEY.
-            if has_aden_key and spec.aden_supported and not spec.direct_api_key_supported:
-                aden_not_connected.append(
-                    f"  {spec.env_var} for {label}"
-                    f"\n    Connect this integration at hive.adenhq.com first."
-                )
-            else:
-                entry = f"  {spec.env_var} for {label}"
-                if spec.help_url:
-                    entry += f"\n    Get it at: {spec.help_url}"
-                missing.append(entry)
-                failed_cred_names.append(cred_name)
-        elif verify and spec.health_check_endpoint:
-            to_verify.append((cred_name, label))
+        available = store.is_available(cred_id)
+
+        # Aden-not-connected: ADEN_API_KEY set, Aden-only cred, but integration missing
+        is_aden_nc = (
+            not available
+            and has_aden_key
+            and spec.aden_supported
+            and not spec.direct_api_key_supported
+        )
+
+        status = CredentialStatus(
+            credential_name=cred_name,
+            credential_id=cred_id,
+            env_var=spec.env_var,
+            description=spec.description,
+            help_url=spec.help_url,
+            api_key_instructions=getattr(spec, "api_key_instructions", ""),
+            tools=affected_tools,
+            node_types=affected_node_types,
+            available=available,
+            valid=None,
+            validation_message=None,
+            aden_supported=spec.aden_supported,
+            direct_api_key_supported=spec.direct_api_key_supported,
+            credential_key=spec.credential_key,
+            aden_not_connected=is_aden_nc,
+            alternative_group=alternative_group,
+        )
+        all_credentials.append(status)
+
+        if available and verify and spec.health_check_endpoint:
+            to_verify.append(len(all_credentials) - 1)

    # Check tool credentials
    for tool_name in sorted(required_tools):
-        cred_name = tool_to_cred.get(tool_name)
-        if cred_name is None or cred_name in checked:
+        cred_names = tool_to_creds.get(tool_name)
+        if cred_names is None:
            continue
-        checked.add(cred_name)
-        spec = CREDENTIAL_SPECS[cred_name]
-        if not spec.required:
+
+        # Filter to credentials we haven't already checked
+        unchecked = [cn for cn in cred_names if cn not in checked]
+        if not unchecked:
            continue
-        affected = sorted(t for t in required_tools if t in spec.tools)
-        label = ", ".join(affected)
-        _check_credential(spec, cred_name, label)
+
+        # Single provider — existing behavior
+        if len(unchecked) == 1:
+            cred_name = unchecked[0]
+            checked.add(cred_name)
+            spec = CREDENTIAL_SPECS[cred_name]
+            if not spec.required:
+                continue
+            affected = sorted(t for t in required_tools if t in spec.tools)
+            _check_credential(spec, cred_name, affected_tools=affected, affected_node_types=[])
+            continue
+
+        # Multi-provider (e.g. send_email → resend OR google):
+        # satisfied if ANY provider credential is available.
+        available_cn = None
+        for cn in unchecked:
+            spec = CREDENTIAL_SPECS[cn]
+            cred_id = spec.credential_id or cn
+            if store.is_available(cred_id):
+                available_cn = cn
+                break
+
+        if available_cn is not None:
+            # Found an available provider — check (and health-check) it
+            checked.add(available_cn)
+            spec = CREDENTIAL_SPECS[available_cn]
+            affected = sorted(t for t in required_tools if t in spec.tools)
+            _check_credential(spec, available_cn, affected_tools=affected, affected_node_types=[])
+        else:
+            # None available — report ALL alternatives so the modal can show them
+            group_key = tool_name  # e.g. "send_email"
+            for cn in unchecked:
+                checked.add(cn)
+                spec = CREDENTIAL_SPECS[cn]
+                affected = sorted(t for t in required_tools if t in spec.tools)
+                _check_credential(
+                    spec,
+                    cn,
+                    affected_tools=affected,
+                    affected_node_types=[],
+                    alternative_group=group_key,
+                )

    # Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
    for nt in sorted(node_types):
@@ -214,8 +411,7 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
        if not spec.required:
            continue
        affected_types = sorted(t for t in node_types if t in spec.node_types)
-        label = ", ".join(affected_types) + " nodes"
-        _check_credential(spec, cred_name, label)
+        _check_credential(spec, cred_name, affected_tools=[], affected_node_types=affected_types)

    # Phase 2: health-check present credentials
    if to_verify:
@@ -225,69 +421,52 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
            check_credential_health = None  # type: ignore[assignment]

        if check_credential_health is not None:
-            for cred_name, label in to_verify:
-                spec = CREDENTIAL_SPECS[cred_name]
-                cred_id = spec.credential_id or cred_name
-                value = store.get(cred_id)
+            for idx in to_verify:
+                status = all_credentials[idx]
+                spec = CREDENTIAL_SPECS[status.credential_name]
+                value = store.get(status.credential_id)
                if not value:
                    continue
                try:
                    result = check_credential_health(
-                        cred_name,
+                        status.credential_name,
                        value,
                        health_check_endpoint=spec.health_check_endpoint,
                        health_check_method=spec.health_check_method,
                    )
-                    if not result.valid:
-                        entry = f"  {spec.env_var} for {label} — {result.message}"
-                        if spec.help_url:
-                            entry += f"\n    Get a new key at: {spec.help_url}"
-                        invalid.append(entry)
-                        failed_cred_names.append(cred_name)
-                    elif result.valid:
+                    status.valid = result.valid
+                    status.validation_message = result.message
+                    if result.valid:
                        # Persist identity from health check (best-effort)
                        identity_data = result.details.get("identity")
                        if identity_data and isinstance(identity_data, dict):
                            try:
-                                cred_obj = store.get_credential(cred_id, refresh_if_needed=False)
+                                cred_obj = store.get_credential(
+                                    status.credential_id, refresh_if_needed=False
+                                )
                                if cred_obj:
                                    cred_obj.set_identity(**identity_data)
                                    store.save_credential(cred_obj)
                            except Exception:
                                pass  # Identity persistence is best-effort
                except Exception as exc:
-                    logger.debug("Health check for %s failed: %s", cred_name, exc)
+                    logger.debug("Health check for %s failed: %s", status.credential_name, exc)

-    errors = missing + invalid + aden_not_connected
-    if errors:
+    validation_result = CredentialValidationResult(
+        credentials=all_credentials,
+        has_aden_key=has_aden_key,
+    )
+
+    if raise_on_error and validation_result.has_errors:
        from framework.credentials.models import CredentialError

-        lines: list[str] = []
-        if missing:
-            lines.append("Missing credentials:\n")
-            lines.extend(missing)
-        if invalid:
-            if missing:
-                lines.append("")
-            lines.append("Invalid or expired credentials:\n")
-            lines.extend(invalid)
-        if aden_not_connected:
-            if missing or invalid:
-                lines.append("")
-            lines.append(
-                "Aden integrations not connected "
-                "(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
-            )
-            lines.extend(aden_not_connected)
-        lines.append(
-            "\nTo fix: run /hive-credentials in Claude Code."
-            "\nIf you've already set up credentials, "
-            "restart your terminal to load them."
-        )
-        exc = CredentialError("\n".join(lines))
-        exc.failed_cred_names = failed_cred_names  # type: ignore[attr-defined]
+        exc = CredentialError(validation_result.format_error_message())
+        exc.validation_result = validation_result  # type: ignore[attr-defined]
+        exc.failed_cred_names = validation_result.failed_cred_names  # type: ignore[attr-defined]
        raise exc

+    return validation_result
+

 def build_setup_session_from_error(
    credential_error: Exception,
@@ -296,56 +475,44 @@ def build_setup_session_from_error(
 ):
    """Build a ``CredentialSetupSession`` that covers all failed credentials.

-    ``validate_agent_credentials`` attaches ``failed_cred_names`` (both missing
-    and invalid) to the ``CredentialError``.  This helper converts those names
-    into ``MissingCredential`` entries so the setup screen can re-collect them.
-
-    Falls back to the normal ``from_nodes`` / ``from_agent_path`` detection
-    when the attribute is absent.
+    Uses the ``CredentialValidationResult`` attached to the ``CredentialError``
+    when available.  Falls back to re-detecting from nodes / agent_path.

    Args:
        credential_error: The ``CredentialError`` raised by validation.
        nodes: Graph nodes (preferred — avoids re-loading from disk).
        agent_path: Agent directory path (used when nodes aren't available).
    """
-    from framework.credentials.setup import CredentialSetupSession, MissingCredential
+    from framework.credentials.setup import CredentialSetupSession

-    # Start with normal detection (picks up truly missing creds)
+    # Prefer the validation result attached to the exception
+    result: CredentialValidationResult | None = getattr(credential_error, "validation_result", None)
+    if result is not None:
+        missing = [_status_to_missing(c) for c in result.failed]
+        return CredentialSetupSession(missing)
+
+    # Fallback: re-detect from nodes or agent_path
    if nodes is not None:
-        session = CredentialSetupSession.from_nodes(nodes)
+        return CredentialSetupSession.from_nodes(nodes)
    elif agent_path is not None:
-        session = CredentialSetupSession.from_agent_path(agent_path)
-    else:
-        session = CredentialSetupSession(missing=[])
+        return CredentialSetupSession.from_agent_path(agent_path)
+    return CredentialSetupSession(missing=[])

-    # Add credentials that are present but failed health checks
-    already = {m.credential_name for m in session.missing}
-    failed_names: list[str] = getattr(credential_error, "failed_cred_names", [])
-    if failed_names:
-        try:
-            from aden_tools.credentials import CREDENTIAL_SPECS

-            for name in failed_names:
-                if name in already:
-                    continue
-                spec = CREDENTIAL_SPECS.get(name)
-                if spec is None:
-                    continue
-                session.missing.append(
-                    MissingCredential(
-                        credential_name=name,
-                        env_var=spec.env_var,
-                        description=spec.description,
-                        help_url=spec.help_url,
-                        api_key_instructions=spec.api_key_instructions,
-                        tools=list(spec.tools),
-                        aden_supported=spec.aden_supported,
-                        direct_api_key_supported=spec.direct_api_key_supported,
-                        credential_id=spec.credential_id,
-                        credential_key=spec.credential_key,
-                    )
-                )
-        except ImportError:
-            pass
+def _status_to_missing(c: CredentialStatus):
+    """Convert a CredentialStatus to a MissingCredential for the setup flow."""
+    from framework.credentials.setup import MissingCredential

-    return session
+    return MissingCredential(
+        credential_name=c.credential_name,
+        env_var=c.env_var,
+        description=c.description,
+        help_url=c.help_url,
+        api_key_instructions=c.api_key_instructions,
+        tools=c.tools,
+        node_types=c.node_types,
+        aden_supported=c.aden_supported,
+        direct_api_key_supported=c.direct_api_key_supported,
+        credential_id=c.credential_id,
+        credential_key=c.credential_key,
+    )
@@ -46,9 +46,11 @@ class ActiveNodeClientIO(NodeClientIO):
        self,
        node_id: str,
        event_bus: EventBus | None = None,
+        execution_id: str = "",
    ) -> None:
        self.node_id = node_id
        self._event_bus = event_bus
+        self._execution_id = execution_id

        self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
        self._output_snapshot = ""
@@ -66,6 +68,7 @@ class ActiveNodeClientIO(NodeClientIO):
                node_id=self.node_id,
                content=content,
                snapshot=self._output_snapshot,
+                execution_id=self._execution_id or None,
            )

        if is_final:
@@ -83,6 +86,7 @@ class ActiveNodeClientIO(NodeClientIO):
                stream_id=self.node_id,
                node_id=self.node_id,
                prompt=prompt,
+                execution_id=self._execution_id or None,
            )

        try:
@@ -158,11 +162,12 @@ class ClientIOGateway:
    def __init__(self, event_bus: EventBus | None = None) -> None:
        self._event_bus = event_bus

-    def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
+    def create_io(self, node_id: str, client_facing: bool, execution_id: str = "") -> NodeClientIO:
        if client_facing:
            return ActiveNodeClientIO(
                node_id=node_id,
                event_bus=self._event_bus,
+                execution_id=execution_id,
            )
        return InertNodeClientIO(
            node_id=node_id,
@@ -5,6 +5,7 @@ from __future__ import annotations
 import json
 import re
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Any, Literal, Protocol, runtime_checkable


@@ -30,6 +31,8 @@ class Message:
    # Phase-aware compaction metadata (continuous mode)
    phase_id: str | None = None
    is_transition_marker: bool = False
+    # True when this message is real human input (from /chat), not a system prompt
+    is_client_input: bool = False

    def to_llm_dict(self) -> dict[str, Any]:
        """Convert to OpenAI-format message dict."""
@@ -67,6 +70,8 @@ class Message:
            d["phase_id"] = self.phase_id
        if self.is_transition_marker:
            d["is_transition_marker"] = self.is_transition_marker
+        if self.is_client_input:
+            d["is_client_input"] = self.is_client_input
        return d

    @classmethod
@@ -81,19 +86,138 @@ class Message:
            is_error=data.get("is_error", False),
            phase_id=data.get("phase_id"),
            is_transition_marker=data.get("is_transition_marker", False),
+            is_client_input=data.get("is_client_input", False),
        )


 def _extract_spillover_filename(content: str) -> str | None:
-    """Extract spillover filename from a truncated tool result.
+    """Extract spillover filename from a tool result annotation.

-    Matches the pattern produced by EventLoopNode._truncate_tool_result():
-        "saved to 'tool_github_list_stargazers_abc123.txt'"
+    Matches patterns produced by EventLoopNode._truncate_tool_result():
+        - Large result:  "saved to 'web_search_1.txt'"
+        - Small result:  "[Saved to 'web_search_1.txt']"
    """
-    match = re.search(r"saved to '([^']+)'", content)
+    match = re.search(r"[Ss]aved to '([^']+)'", content)
    return match.group(1) if match else None


+_TC_ARG_LIMIT = 200  # max chars per tool_call argument after compaction
+
+
+def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Truncate tool_call arguments to save context tokens during compaction.
+
+    Preserves ``id``, ``type``, and ``function.name`` exactly.  When arguments
+    exceed ``_TC_ARG_LIMIT``, replaces the full JSON string with a compact
+    **valid** JSON summary.  The Anthropic API parses tool_call arguments and
+    rejects requests with malformed JSON (e.g. unterminated strings), so we
+    must never produce broken JSON here.
+    """
+    compact = []
+    for tc in tool_calls:
+        func = tc.get("function", {})
+        args = func.get("arguments", "")
+        if len(args) > _TC_ARG_LIMIT:
+            # Build a valid JSON summary instead of slicing mid-string.
+            # Try to extract top-level keys for a meaningful preview.
+            try:
+                parsed = json.loads(args)
+                if isinstance(parsed, dict):
+                    # Preserve key names, truncate values
+                    summary_parts = []
+                    for k, v in parsed.items():
+                        v_str = str(v)
+                        if len(v_str) > 60:
+                            v_str = v_str[:60] + "..."
+                        summary_parts.append(f"{k}={v_str}")
+                    summary = ", ".join(summary_parts)
+                    if len(summary) > _TC_ARG_LIMIT:
+                        summary = summary[:_TC_ARG_LIMIT] + "..."
+                    args = json.dumps({"_compacted": summary})
+                else:
+                    args = json.dumps({"_compacted": str(parsed)[:_TC_ARG_LIMIT]})
+            except (json.JSONDecodeError, TypeError):
+                # Args were already invalid JSON — wrap the preview safely
+                args = json.dumps({"_compacted": args[:_TC_ARG_LIMIT]})
+        compact.append(
+            {
+                "id": tc.get("id", ""),
+                "type": tc.get("type", "function"),
+                "function": {
+                    "name": func.get("name", ""),
+                    "arguments": args,
+                },
+            }
+        )
+    return compact
+
+
+def extract_tool_call_history(messages: list[Message], max_entries: int = 30) -> str:
+    """Build a compact tool call history from a list of messages.
+
+    Used in compaction summaries to prevent the LLM from re-calling
+    tools it already called.  Extracts tool call details, files saved,
+    outputs set, and errors encountered.
+    """
+    tool_calls_detail: dict[str, list[str]] = {}
+    files_saved: list[str] = []
+    outputs_set: list[str] = []
+    errors: list[str] = []
+
+    def _summarize_input(name: str, args: dict) -> str:
+        if name == "web_search":
+            return args.get("query", "")
+        if name == "web_scrape":
+            return args.get("url", "")
+        if name in ("load_data", "save_data"):
+            return args.get("filename", "")
+        return ""
+
+    for msg in messages:
+        if msg.role == "assistant" and msg.tool_calls:
+            for tc in msg.tool_calls:
+                func = tc.get("function", {})
+                name = func.get("name", "unknown")
+                try:
+                    args = json.loads(func.get("arguments", "{}"))
+                except (json.JSONDecodeError, TypeError):
+                    args = {}
+
+                summary = _summarize_input(name, args)
+                tool_calls_detail.setdefault(name, []).append(summary)
+
+                if name == "save_data" and args.get("filename"):
+                    files_saved.append(args["filename"])
+                if name == "set_output" and args.get("key"):
+                    outputs_set.append(args["key"])
+
+        if msg.role == "tool" and msg.is_error:
+            preview = msg.content[:120].replace("\n", " ")
+            errors.append(preview)
+
+    parts: list[str] = []
+    if tool_calls_detail:
+        lines: list[str] = []
+        for name, inputs in list(tool_calls_detail.items())[:max_entries]:
+            count = len(inputs)
+            non_empty = [s for s in inputs if s]
+            if non_empty:
+                detail_lines = [f"    - {s[:120]}" for s in non_empty[:8]]
+                lines.append(f"  {name} ({count}x):\n" + "\n".join(detail_lines))
+            else:
+                lines.append(f"  {name} ({count}x)")
+        parts.append("TOOLS ALREADY CALLED:\n" + "\n".join(lines))
+    if files_saved:
+        unique = list(dict.fromkeys(files_saved))
+        parts.append("FILES SAVED: " + ", ".join(unique))
+    if outputs_set:
+        unique = list(dict.fromkeys(outputs_set))
+        parts.append("OUTPUTS SET: " + ", ".join(unique))
+    if errors:
+        parts.append("ERRORS (do NOT retry these):\n" + "\n".join(f"  - {e}" for e in errors[:10]))
+    return "\n\n".join(parts)
+
+
 # ---------------------------------------------------------------------------
 # ConversationStore protocol (Phase 2)
 # ---------------------------------------------------------------------------
@@ -248,6 +372,7 @@ class NodeConversation:
        content: str,
        *,
        is_transition_marker: bool = False,
+        is_client_input: bool = False,
    ) -> Message:
        msg = Message(
            seq=self._next_seq,
@@ -255,6 +380,7 @@ class NodeConversation:
            content=content,
            phase_id=self._current_phase,
            is_transition_marker=is_transition_marker,
+            is_client_input=is_client_input,
        )
        self._messages.append(msg)
        self._next_seq += 1
@@ -313,9 +439,36 @@ class NodeConversation:
    def _repair_orphaned_tool_calls(
        msgs: list[dict[str, Any]],
    ) -> list[dict[str, Any]]:
-        """Ensure every tool_call has a matching tool-result message."""
+        """Ensure tool_call / tool_result pairs are consistent.
+
+        1. **Orphaned tool results** (tool_result with no preceding tool_use)
+           are dropped.  This happens when compaction removes an assistant
+           message but leaves its tool-result messages behind.
+        2. **Orphaned tool calls** (tool_use with no following tool_result)
+           get a synthetic error result appended.  This happens when a loop
+           is cancelled mid-tool-execution.
+        """
+        # Pass 1: collect all tool_call IDs from assistant messages so we
+        # can identify orphaned tool-result messages.
+        all_tool_call_ids: set[str] = set()
+        for m in msgs:
+            if m.get("role") == "assistant":
+                for tc in m.get("tool_calls") or []:
+                    tc_id = tc.get("id")
+                    if tc_id:
+                        all_tool_call_ids.add(tc_id)
+
+        # Pass 2: build repaired list — drop orphaned tool results, patch
+        # missing tool results.
        repaired: list[dict[str, Any]] = []
        for i, m in enumerate(msgs):
+            # Drop tool-result messages whose tool_call_id has no matching
+            # tool_use in any assistant message (orphaned by compaction).
+            if m.get("role") == "tool":
+                tid = m.get("tool_call_id")
+                if tid and tid not in all_tool_call_ids:
+                    continue  # skip orphaned result
+
            repaired.append(m)
            tool_calls = m.get("tool_calls")
            if m.get("role") != "assistant" or not tool_calls:
@@ -346,12 +499,20 @@ class NodeConversation:
        """Best available token estimate.

        Uses actual API input token count when available (set via
-        :meth:`update_token_count`), otherwise falls back to the rough
-        ``total_chars / 4`` heuristic.
+        :meth:`update_token_count`), otherwise falls back to a
+        ``total_chars / 4`` heuristic that includes both message content
+        AND tool_call argument sizes.
        """
        if self._last_api_input_tokens is not None:
            return self._last_api_input_tokens
-        total_chars = sum(len(m.content) for m in self._messages)
+        total_chars = 0
+        for m in self._messages:
+            total_chars += len(m.content)
+            if m.tool_calls:
+                for tc in m.tool_calls:
+                    func = tc.get("function", {})
+                    total_chars += len(func.get("arguments", ""))
+                    total_chars += len(func.get("name", ""))
        return total_chars // 4

    def update_token_count(self, actual_input_tokens: int) -> None:
@@ -580,6 +741,210 @@ class NodeConversation:
        self._messages = [summary_msg] + recent_messages
        self._last_api_input_tokens = None  # reset; next LLM call will recalibrate

+    async def compact_preserving_structure(
+        self,
+        spillover_dir: str,
+        keep_recent: int = 4,
+        phase_graduated: bool = False,
+        aggressive: bool = False,
+    ) -> None:
+        """Structure-preserving compaction: save freeform text to file, keep tool messages.
+
+        Unlike ``compact()`` which replaces ALL old messages with a single LLM
+        summary, this method preserves the tool call structure (assistant
+        messages with tool_calls + tool result messages) that are already tiny
+        after pruning.  Only freeform text exchanges (user messages,
+        text-only assistant messages) are saved to a file and removed.
+
+        When *aggressive* is True, non-essential tool call pairs are also
+        collapsed into a compact summary instead of being kept individually.
+        Only ``set_output`` calls and error results are preserved; all other
+        old tool pairs are replaced by a tool-call history summary.
+
+        The result: the agent retains exact knowledge of what tools it called,
+        where each result is stored, and can load the conversation text if
+        needed.  No LLM summary call.  No heuristics.  Nothing lost.
+        """
+        if not self._messages:
+            return
+
+        total = len(self._messages)
+
+        # Determine split point (same logic as compact)
+        if phase_graduated and self._current_phase:
+            split = self._find_phase_graduated_split()
+        else:
+            split = None
+
+        if split is None:
+            keep_recent = max(0, min(keep_recent, total - 1))
+            split = total - keep_recent if keep_recent > 0 else total
+
+        # Advance split past orphaned tool results at the boundary
+        while split < total and self._messages[split].role == "tool":
+            split += 1
+
+        if split == 0:
+            return
+
+        old_messages = self._messages[:split]
+
+        # Classify old messages: structural (keep) vs freeform (save to file)
+        kept_structural: list[Message] = []
+        freeform_lines: list[str] = []
+        collapsed_msgs: list[Message] = []
+
+        if aggressive:
+            # Aggressive: only keep set_output tool pairs and error results.
+            # Everything else is collapsed into a tool-call history summary.
+            # We need to track tool_call IDs to pair assistant messages with
+            # their tool results.
+            protected_tc_ids: set[str] = set()
+            collapsible_tc_ids: set[str] = set()
+
+            # First pass: classify assistant messages
+            for msg in old_messages:
+                if msg.role != "assistant" or not msg.tool_calls:
+                    continue
+                has_protected = any(
+                    tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls
+                )
+                tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
+                if has_protected:
+                    protected_tc_ids |= tc_ids
+                else:
+                    collapsible_tc_ids |= tc_ids
+
+            # Second pass: classify all messages
+            for msg in old_messages:
+                if msg.role == "tool":
+                    tc_id = msg.tool_use_id or ""
+                    if tc_id in protected_tc_ids:
+                        kept_structural.append(msg)
+                    elif msg.is_error:
+                        # Error results are always protected
+                        kept_structural.append(msg)
+                        # Protect the parent assistant message too
+                        protected_tc_ids.add(tc_id)
+                    else:
+                        collapsed_msgs.append(msg)
+                elif msg.role == "assistant" and msg.tool_calls:
+                    tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
+                    if tc_ids & protected_tc_ids:
+                        # Has at least one protected tool call — keep entire msg
+                        compact_tcs = _compact_tool_calls(msg.tool_calls)
+                        kept_structural.append(
+                            Message(
+                                seq=msg.seq,
+                                role=msg.role,
+                                content="",
+                                tool_calls=compact_tcs,
+                                is_error=msg.is_error,
+                                phase_id=msg.phase_id,
+                                is_transition_marker=msg.is_transition_marker,
+                            )
+                        )
+                    else:
+                        collapsed_msgs.append(msg)
+                else:
+                    # Freeform text — save to file
+                    role_label = msg.role
+                    text = msg.content
+                    if len(text) > 2000:
+                        text = text[:2000] + "…"
+                    freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
+        else:
+            # Standard mode: keep all tool call pairs as structural
+            for msg in old_messages:
+                if msg.role == "tool":
+                    kept_structural.append(msg)
+                elif msg.role == "assistant" and msg.tool_calls:
+                    compact_tcs = _compact_tool_calls(msg.tool_calls)
+                    kept_structural.append(
+                        Message(
+                            seq=msg.seq,
+                            role=msg.role,
+                            content="",
+                            tool_calls=compact_tcs,
+                            is_error=msg.is_error,
+                            phase_id=msg.phase_id,
+                            is_transition_marker=msg.is_transition_marker,
+                        )
+                    )
+                else:
+                    role_label = msg.role
+                    text = msg.content
+                    if len(text) > 2000:
+                        text = text[:2000] + "…"
+                    freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
+
+        # Write freeform text to a numbered conversation file
+        spill_path = Path(spillover_dir)
+        spill_path.mkdir(parents=True, exist_ok=True)
+
+        # Find next conversation file number
+        existing = sorted(spill_path.glob("conversation_*.md"))
+        next_n = len(existing) + 1
+        conv_filename = f"conversation_{next_n}.md"
+
+        if freeform_lines:
+            header = f"## Compacted conversation (messages 1-{split})\n\n"
+            conv_text = header + "\n\n".join(freeform_lines)
+            (spill_path / conv_filename).write_text(conv_text, encoding="utf-8")
+        else:
+            # Nothing to save — skip file creation
+            conv_filename = ""
+
+        # Build reference message
+        ref_parts: list[str] = []
+        if conv_filename:
+            ref_parts.append(
+                f"[Previous conversation saved to '{conv_filename}'. "
+                f"Use load_data('{conv_filename}') to review if needed.]"
+            )
+        elif not collapsed_msgs:
+            ref_parts.append("[Previous freeform messages compacted.]")
+
+        # Aggressive: add collapsed tool-call history to the reference
+        if collapsed_msgs:
+            tool_history = extract_tool_call_history(collapsed_msgs)
+            if tool_history:
+                ref_parts.append(tool_history)
+            elif not ref_parts:
+                ref_parts.append("[Previous tool calls compacted.]")
+
+        ref_content = "\n\n".join(ref_parts)
+
+        # Use a seq just before the first kept message
+        recent_messages = list(self._messages[split:])
+        if kept_structural:
+            ref_seq = kept_structural[0].seq - 1
+        elif recent_messages:
+            ref_seq = recent_messages[0].seq - 1
+        else:
+            ref_seq = self._next_seq
+            self._next_seq += 1
+
+        ref_msg = Message(seq=ref_seq, role="user", content=ref_content)
+
+        # Persist: delete old messages from store, write reference + kept structural.
+        # In aggressive mode, collapsed messages may be interspersed with kept
+        # messages, so we delete everything before the recent boundary and
+        # rewrite only what we want to keep.
+        if self._store:
+            recent_boundary = recent_messages[0].seq if recent_messages else self._next_seq
+            await self._store.delete_parts_before(recent_boundary)
+            # Write the reference message
+            await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict())
+            # Write kept structural messages (they may have been modified)
+            for msg in kept_structural:
+                await self._store.write_part(msg.seq, msg.to_storage_dict())
+            await self._store.write_cursor({"next_seq": self._next_seq})
+
+        # Reassemble: reference + kept structural (in original order) + recent
+        self._messages = [ref_msg] + kept_structural + recent_messages
+        self._last_api_input_tokens = None
+
    def _find_phase_graduated_split(self) -> int | None:
        """Find split point that preserves current + previous phase.

@@ -103,7 +103,12 @@ FEEDBACK: (reason if RETRY, empty if ACCEPT)"""


 def _extract_recent_context(conversation: NodeConversation, max_messages: int = 10) -> str:
-    """Extract recent conversation messages for evaluation."""
+    """Extract recent conversation messages for evaluation.
+
+    Includes tool-call summaries from assistant messages so the judge
+    can see what tools were invoked (especially set_output values) even
+    when the assistant message body is empty.
+    """
    messages = conversation.messages
    recent = messages[-max_messages:] if len(messages) > max_messages else messages

@@ -112,8 +117,24 @@ def _extract_recent_context(conversation: NodeConversation, max_messages: int =
        role = msg.role.upper()
        content = msg.content or ""
        # Truncate long tool results
-        if msg.role == "tool" and len(content) > 200:
-            content = content[:200] + "..."
+        if msg.role == "tool" and len(content) > 500:
+            content = content[:500] + "..."
+        # For assistant messages with empty content but tool_calls,
+        # summarise the tool calls so the judge knows what happened.
+        if msg.role == "assistant" and not content.strip():
+            tool_calls = getattr(msg, "tool_calls", None)
+            if tool_calls:
+                tc_parts = []
+                for tc in tool_calls:
+                    fn = tc.get("function", {}) if isinstance(tc, dict) else {}
+                    name = fn.get("name", "")
+                    args = fn.get("arguments", "")
+                    if name == "set_output":
+                        # Show the value so the judge can evaluate content quality
+                        tc_parts.append(f"  called {name}({args[:1000]})")
+                    else:
+                        tc_parts.append(f"  called {name}(...)")
+                content = "Tool calls:\n" + "\n".join(tc_parts)
        if content.strip():
            parts.append(f"[{role}]: {content.strip()}")

@@ -125,6 +146,10 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:

    Lists and dicts get structural formatting so the judge can assess
    quantity and structure, not just a truncated stringification.
+
+    String values are given a generous limit (2000 chars) so the judge
+    can verify substantive content (e.g. a research brief with key
+    questions, scope boundaries, and deliverables).
    """
    if not accumulator_state:
        return "(none)"
@@ -144,12 +169,12 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
                val_str += f"\n    ... and {len(value) - 8} more"
        elif isinstance(value, dict):
            val_str = str(value)
-            if len(val_str) > 400:
-                val_str = val_str[:400] + "..."
+            if len(val_str) > 2000:
+                val_str = val_str[:2000] + "..."
        else:
            val_str = str(value)
-            if len(val_str) > 300:
-                val_str = val_str[:300] + "..."
+            if len(val_str) > 2000:
+                val_str = val_str[:2000] + "..."
        parts.append(f"  {key}: {val_str}")
    return "\n".join(parts)

@@ -338,6 +338,10 @@ class AsyncEntryPointSpec(BaseModel):
    max_concurrent: int = Field(
        default=10, description="Maximum concurrent executions for this entry point"
    )
+    max_resurrections: int = Field(
+        default=3,
+        description="Auto-restart on non-fatal failure (0 to disable)",
+    )

    model_config = {"extra": "allow"}

@@ -644,6 +648,13 @@ class GraphSpec(BaseModel):
            for edge in self.get_outgoing_edges(current):
                to_visit.append(edge.target)

+        # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
+        for node in self.nodes:
+            if node.id in reachable:
+                sub_agents = getattr(node, "sub_agents", []) or []
+                for sub_agent_id in sub_agents:
+                    reachable.add(sub_agent_id)
+
        # Build set of async entry point nodes for quick lookup
        async_entry_nodes = {ep.entry_node for ep in self.async_entry_points}

@@ -695,4 +706,48 @@ class GraphSpec(BaseModel):
                        else:
                            seen_keys[key] = node_id

+        # GCU nodes must only be used as subagents
+        gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
+        if gcu_node_ids:
+            # GCU nodes must not be entry nodes
+            if self.entry_node in gcu_node_ids:
+                errors.append(
+                    f"GCU node '{self.entry_node}' is used as entry node. "
+                    "GCU nodes must only be used as subagents via delegate_to_sub_agent()."
+                )
+
+            # GCU nodes must not be terminal nodes
+            for term in self.terminal_nodes:
+                if term in gcu_node_ids:
+                    errors.append(
+                        f"GCU node '{term}' is used as terminal node. "
+                        "GCU nodes must only be used as subagents."
+                    )
+
+            # GCU nodes must not be connected via edges
+            for edge in self.edges:
+                if edge.source in gcu_node_ids:
+                    errors.append(
+                        f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
+                        "GCU nodes must only be used as subagents, not connected via edges."
+                    )
+                if edge.target in gcu_node_ids:
+                    errors.append(
+                        f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
+                        "GCU nodes must only be used as subagents, not connected via edges."
+                    )
+
+            # GCU nodes must be referenced in at least one parent's sub_agents
+            referenced_subagents = set()
+            for node in self.nodes:
+                for sa_id in node.sub_agents or []:
+                    referenced_subagents.add(sa_id)
+
+            orphaned = gcu_node_ids - referenced_subagents
+            for nid in orphaned:
+                errors.append(
+                    f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
+                    "GCU nodes must be declared as subagents of a parent node."
+                )
+
        return errors
@@ -138,6 +138,7 @@ class GraphExecutor:
        accounts_prompt: str = "",
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
+        dynamic_tools_provider: Callable | None = None,
    ):
        """
        Initialize the executor.
@@ -160,6 +161,8 @@ class GraphExecutor:
            accounts_prompt: Connected accounts block for system prompt injection
            accounts_data: Raw account data for per-node prompt generation
            tool_provider_map: Tool name to provider name mapping for account routing
+            dynamic_tools_provider: Optional callback returning current
+                tool list (for mode switching)
        """
        self.runtime = runtime
        self.llm = llm
@@ -178,6 +181,7 @@ class GraphExecutor:
        self.accounts_prompt = accounts_prompt
        self.accounts_data = accounts_data
        self.tool_provider_map = tool_provider_map
+        self.dynamic_tools_provider = dynamic_tools_provider

        # Initialize output cleaner
        self.cleansing_config = cleansing_config or CleansingConfig()
@@ -193,6 +197,9 @@ class GraphExecutor:
        # Pause/resume control
        self._pause_requested = asyncio.Event()

+        # Track the currently executing node for external injection routing
+        self.current_node_id: str | None = None
+
    def _write_progress(
        self,
        current_node: str,
@@ -283,6 +290,125 @@ class GraphExecutor:

        return errors

+    # Max chars of formatted messages before proactively splitting for LLM.
+    _PHASE_LLM_CHAR_LIMIT = 240_000
+    _PHASE_LLM_MAX_DEPTH = 10
+
+    async def _phase_llm_compact(
+        self,
+        conversation: Any,
+        next_spec: NodeSpec,
+        messages: list,
+        _depth: int = 0,
+    ) -> str:
+        """Summarise messages for phase-boundary compaction.
+
+        Uses the same recursive binary-search splitting as EventLoopNode.
+        """
+        from framework.graph.conversation import extract_tool_call_history
+        from framework.graph.event_loop_node import _is_context_too_large_error
+
+        if _depth > self._PHASE_LLM_MAX_DEPTH:
+            raise RuntimeError("Phase LLM compaction recursion limit")
+
+        # Format messages
+        lines: list[str] = []
+        for m in messages:
+            if m.role == "tool":
+                c = m.content[:500] + ("..." if len(m.content) > 500 else "")
+                lines.append(f"[tool result]: {c}")
+            elif m.role == "assistant" and m.tool_calls:
+                names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
+                lines.append(
+                    f"[assistant (calls: {', '.join(names)})]: "
+                    f"{m.content[:200] if m.content else ''}"
+                )
+            else:
+                lines.append(f"[{m.role}]: {m.content}")
+        formatted = "\n\n".join(lines)
+
+        # Proactive split
+        if len(formatted) > self._PHASE_LLM_CHAR_LIMIT and len(messages) > 1:
+            summary = await self._phase_llm_compact_split(
+                conversation,
+                next_spec,
+                messages,
+                _depth,
+            )
+        else:
+            max_tokens = getattr(conversation, "_max_history_tokens", 32000)
+            target_tokens = max_tokens // 2
+            target_chars = target_tokens * 4
+
+            prompt = (
+                "You are compacting an AI agent's conversation history "
+                "at a phase boundary.\n\n"
+                f"NEXT PHASE: {next_spec.name}\n"
+            )
+            if next_spec.description:
+                prompt += f"NEXT PHASE PURPOSE: {next_spec.description}\n"
+            prompt += (
+                f"\nCONVERSATION MESSAGES:\n{formatted}\n\n"
+                "INSTRUCTIONS:\n"
+                f"Write a summary of approximately {target_chars} characters "
+                f"(~{target_tokens} tokens).\n"
+                "Preserve user-stated rules, constraints, and preferences "
+                "verbatim. Preserve key decisions and results from earlier "
+                "phases. Preserve context needed for the next phase.\n"
+            )
+            summary_budget = max(1024, max_tokens // 2)
+            try:
+                response = await self._llm.acomplete(
+                    messages=[{"role": "user", "content": prompt}],
+                    system=(
+                        "You are a conversation compactor. Write a detailed "
+                        "summary preserving context for the next phase."
+                    ),
+                    max_tokens=summary_budget,
+                )
+                summary = response.content
+            except Exception as e:
+                if _is_context_too_large_error(e) and len(messages) > 1:
+                    summary = await self._phase_llm_compact_split(
+                        conversation,
+                        next_spec,
+                        messages,
+                        _depth,
+                    )
+                else:
+                    raise
+
+        # Append tool history at top level only
+        if _depth == 0:
+            tool_history = extract_tool_call_history(messages)
+            if tool_history and "TOOLS ALREADY CALLED" not in summary:
+                summary += "\n\n" + tool_history
+
+        return summary
+
+    async def _phase_llm_compact_split(
+        self,
+        conversation: Any,
+        next_spec: NodeSpec,
+        messages: list,
+        _depth: int,
+    ) -> str:
+        """Split messages in half and summarise each half."""
+        mid = max(1, len(messages) // 2)
+        s1 = await self._phase_llm_compact(
+            conversation,
+            next_spec,
+            messages[:mid],
+            _depth + 1,
+        )
+        s2 = await self._phase_llm_compact(
+            conversation,
+            next_spec,
+            messages[mid:],
+            _depth + 1,
+        )
+        return s1 + "\n\n" + s2
+
    async def execute(
        self,
        graph: GraphSpec,
@@ -338,6 +464,9 @@ class GraphExecutor:
        cumulative_tool_names: set[str] = set()
        cumulative_output_keys: list[str] = []  # Output keys from all visited nodes

+        # Build node registry for subagent lookup
+        node_registry: dict[str, NodeSpec] = {node.id: node for node in graph.nodes}
+
        # Initialize checkpoint store if checkpointing is enabled
        checkpoint_store: CheckpointStore | None = None
        if checkpoint_config and checkpoint_config.enabled and self._storage_path:
@@ -694,6 +823,9 @@ class GraphExecutor:
                    # Execute this node, then pause
                    # (We'll check again after execution and save state)

+                # Expose current node for external injection routing
+                self.current_node_id = current_node_id
+
                self.logger.info(f"\n▶ Step {steps}: {node_spec.name} ({node_spec.node_type})")
                self.logger.info(f"   Inputs: {node_spec.input_keys}")
                self.logger.info(f"   Outputs: {node_spec.output_keys}")
@@ -709,6 +841,14 @@ class GraphExecutor:
                        if k not in cumulative_output_keys:
                            cumulative_output_keys.append(k)

+                # Build resume narrative (Layer 2) when restoring a session
+                # so the EventLoopNode can rebuild the full 3-layer system prompt.
+                _resume_narrative = ""
+                if _is_resuming and path:
+                    from framework.graph.prompt_composer import build_narrative
+
+                    _resume_narrative = build_narrative(memory, path, graph)
+
                # Build context for node
                ctx = self._build_context(
                    node_spec=node_spec,
@@ -721,6 +861,10 @@ class GraphExecutor:
                    override_tools=cumulative_tools if is_continuous else None,
                    cumulative_output_keys=cumulative_output_keys if is_continuous else None,
                    event_triggered=_event_triggered,
+                    node_registry=node_registry,
+                    identity_prompt=getattr(graph, "identity_prompt", ""),
+                    narrative=_resume_narrative,
+                    graph=graph,
                )

                # Log actual input data being read
@@ -771,7 +915,8 @@ class GraphExecutor:
                # Emit node-started event (skip event_loop nodes — they emit their own)
                if self._event_bus and node_spec.node_type != "event_loop":
                    await self._event_bus.emit_node_loop_started(
-                        stream_id=self._stream_id, node_id=current_node_id,
+                        stream_id=self._stream_id,
+                        node_id=current_node_id,
                        execution_id=self._execution_id,
                    )

@@ -782,7 +927,9 @@ class GraphExecutor:
                # Emit node-completed event (skip event_loop nodes)
                if self._event_bus and node_spec.node_type != "event_loop":
                    await self._event_bus.emit_node_loop_completed(
-                        stream_id=self._stream_id, node_id=current_node_id, iterations=1,
+                        stream_id=self._stream_id,
+                        node_id=current_node_id,
+                        iterations=1,
                        execution_id=self._execution_id,
                    )

@@ -1117,6 +1264,7 @@ class GraphExecutor:
                            source_result=result,
                            source_node_spec=node_spec,
                            path=path,
+                            node_registry=node_registry,
                        )

                        total_tokens += branch_tokens
@@ -1266,27 +1414,78 @@ class GraphExecutor:
                        # Set current phase for phase-aware compaction
                        continuous_conversation.set_current_phase(next_spec.id)

-                        # Opportunistic compaction at transition:
-                        # 1. Prune old tool results (free, no LLM call)
-                        # 2. If still over 80%, do a phase-graduated compact
+                        # Phase-boundary compaction (same flow as EventLoopNode._compact)
                        if continuous_conversation.usage_ratio() > 0.5:
                            await continuous_conversation.prune_old_tool_results(
                                protect_tokens=2000,
                            )
                        if continuous_conversation.needs_compaction():
+                            _phase_ratio = continuous_conversation.usage_ratio()
                            self.logger.info(
                                "   Phase-boundary compaction (%.0f%% usage)",
-                                continuous_conversation.usage_ratio() * 100,
+                                _phase_ratio * 100,
                            )
-                            summary = (
-                                f"Summary of earlier phases (before {next_spec.name}). "
-                                "See transition markers for phase details."
-                            )
-                            await continuous_conversation.compact(
-                                summary,
-                                keep_recent=4,
-                                phase_graduated=True,
+                            _data_dir = (
+                                str(self._storage_path / "data") if self._storage_path else None
                            )
+                            # Step 1: Structural compaction (>=80%)
+                            if _data_dir:
+                                _pre = continuous_conversation.usage_ratio()
+                                await continuous_conversation.compact_preserving_structure(
+                                    spillover_dir=_data_dir,
+                                    keep_recent=4,
+                                    phase_graduated=True,
+                                )
+                                if continuous_conversation.usage_ratio() >= 0.9 * _pre:
+                                    await continuous_conversation.compact_preserving_structure(
+                                        spillover_dir=_data_dir,
+                                        keep_recent=4,
+                                        phase_graduated=True,
+                                        aggressive=True,
+                                    )
+
+                            # Step 2: LLM compaction (>95%)
+                            if (
+                                continuous_conversation.usage_ratio() > 0.95
+                                and self._llm is not None
+                            ):
+                                self.logger.info(
+                                    "   LLM phase-boundary compaction (%.0f%% usage)",
+                                    continuous_conversation.usage_ratio() * 100,
+                                )
+                                try:
+                                    _llm_summary = await self._phase_llm_compact(
+                                        continuous_conversation,
+                                        next_spec,
+                                        list(continuous_conversation.messages),
+                                    )
+                                    await continuous_conversation.compact(
+                                        _llm_summary,
+                                        keep_recent=2,
+                                        phase_graduated=True,
+                                    )
+                                except Exception as e:
+                                    self.logger.warning(
+                                        "   Phase LLM compaction failed: %s",
+                                        e,
+                                    )
+
+                            # Step 3: Emergency (only if still over budget)
+                            if continuous_conversation.needs_compaction():
+                                self.logger.warning(
+                                    "   Emergency phase compaction (%.0f%%)",
+                                    continuous_conversation.usage_ratio() * 100,
+                                )
+                                summary = (
+                                    f"Summary of earlier phases "
+                                    f"(before {next_spec.name}). "
+                                    "See transition markers for phase details."
+                                )
+                                await continuous_conversation.compact(
+                                    summary,
+                                    keep_recent=1,
+                                    phase_graduated=True,
+                                )

                # Update input_data for next node
                input_data = result.output
@@ -1361,9 +1560,7 @@ class GraphExecutor:
                try:
                    import json as _json

-                    cursor_path = (
-                        self._storage_path / "conversations" / "cursor.json"
-                    )
+                    cursor_path = self._storage_path / "conversations" / "cursor.json"
                    if cursor_path.exists():
                        cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
                        wip_outputs = cursor_data.get("outputs", {})
@@ -1464,9 +1661,7 @@ class GraphExecutor:
                try:
                    import json as _json

-                    cursor_path = (
-                        self._storage_path / "conversations" / "cursor.json"
-                    )
+                    cursor_path = self._storage_path / "conversations" / "cursor.json"
                    if cursor_path.exists():
                        cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
                        for key, value in cursor_data.get("outputs", {}).items():
@@ -1542,6 +1737,10 @@ class GraphExecutor:
        override_tools: list | None = None,
        cumulative_output_keys: list[str] | None = None,
        event_triggered: bool = False,
+        identity_prompt: str = "",
+        narrative: str = "",
+        node_registry: dict[str, NodeSpec] | None = None,
+        graph: "GraphSpec | None" = None,
    ) -> NodeContext:
        """Build execution context for a node."""
        # Filter tools to those available to this node
@@ -1570,6 +1769,8 @@ class GraphExecutor:
                node_tool_names=node_spec.tools,
            )

+        goal_context = goal.to_prompt_context()
+
        return NodeContext(
            runtime=self.runtime,
            node_id=node_spec.id,
@@ -1578,7 +1779,7 @@ class GraphExecutor:
            input_data=input_data,
            llm=self.llm,
            available_tools=available_tools,
-            goal_context=goal.to_prompt_context(),
+            goal_context=goal_context,
            goal=goal,  # Pass Goal object for LLM-powered routers
            max_tokens=max_tokens,
            runtime_logger=self.runtime_logger,
@@ -1588,12 +1789,19 @@ class GraphExecutor:
            cumulative_output_keys=cumulative_output_keys or [],
            event_triggered=event_triggered,
            accounts_prompt=node_accounts_prompt,
+            identity_prompt=identity_prompt,
+            narrative=narrative,
            execution_id=self._execution_id,
            stream_id=self._stream_id,
+            node_registry=node_registry or {},
+            all_tools=list(self.tools),  # Full catalog for subagent tool resolution
+            shared_node_registry=self.node_registry,  # For subagent escalation routing
+            dynamic_tools_provider=self.dynamic_tools_provider,
        )

    VALID_NODE_TYPES = {
        "event_loop",
+        "gcu",
    }
    # Node types removed in v0.5 — provide migration guidance
    REMOVED_NODE_TYPES = {
@@ -1628,8 +1836,8 @@ class GraphExecutor:
                f"Must be one of: {sorted(self.VALID_NODE_TYPES)}."
            )

-        # Create based on type (only event_loop is valid)
-        if node_spec.node_type == "event_loop":
+        # Create based on type
+        if node_spec.node_type in ("event_loop", "gcu"):
            # Auto-create EventLoopNode with sensible defaults.
            # Custom configs can still be pre-registered via node_registry.
            from framework.graph.event_loop_node import EventLoopNode, LoopConfig
@@ -1659,11 +1867,11 @@ class GraphExecutor:
                judge=None,  # implicit judge: accept when output_keys are filled
                config=LoopConfig(
                    max_iterations=lc.get("max_iterations", default_max_iter),
-                    max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 10),
+                    max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
                    tool_call_overflow_margin=lc.get("tool_call_overflow_margin", 0.5),
                    stall_detection_threshold=lc.get("stall_detection_threshold", 3),
                    max_history_tokens=lc.get("max_history_tokens", 32000),
-                    max_tool_result_chars=lc.get("max_tool_result_chars", 3_000),
+                    max_tool_result_chars=lc.get("max_tool_result_chars", 30_000),
                    spillover_dir=spillover,
                ),
                tool_executor=self.tool_executor,
@@ -1846,6 +2054,7 @@ class GraphExecutor:
        source_result: NodeResult,
        source_node_spec: Any,
        path: list[str],
+        node_registry: dict[str, NodeSpec] | None = None,
    ) -> tuple[dict[str, NodeResult], int, int]:
        """
        Execute multiple branches in parallel using asyncio.gather.
@@ -1943,13 +2152,22 @@ class GraphExecutor:
                    branch.retry_count = attempt

                    # Build context for this branch
-                    ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
+                    ctx = self._build_context(
+                        node_spec,
+                        memory,
+                        goal,
+                        mapped,
+                        graph.max_tokens,
+                        node_registry=node_registry,
+                        graph=graph,
+                    )
                    node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)

                    # Emit node-started event (skip event_loop nodes)
                    if self._event_bus and node_spec.node_type != "event_loop":
                        await self._event_bus.emit_node_loop_started(
-                            stream_id=self._stream_id, node_id=branch.node_id,
+                            stream_id=self._stream_id,
+                            node_id=branch.node_id,
                            execution_id=self._execution_id,
                        )

@@ -1974,7 +2192,9 @@ class GraphExecutor:
                    # Emit node-completed event (skip event_loop nodes)
                    if self._event_bus and node_spec.node_type != "event_loop":
                        await self._event_bus.emit_node_loop_completed(
-                            stream_id=self._stream_id, node_id=branch.node_id, iterations=1,
+                            stream_id=self._stream_id,
+                            node_id=branch.node_id,
+                            iterations=1,
                            execution_id=self._execution_id,
                        )

@@ -0,0 +1,23 @@
+"""File tools MCP server constants.
+
+Analogous to ``gcu.py`` — defines the server name and default stdio config
+so the runner can auto-register the files MCP server for any agent that has
+``event_loop`` or ``gcu`` nodes.
+"""
+
+# ---------------------------------------------------------------------------
+# MCP server identity
+# ---------------------------------------------------------------------------
+
+FILES_MCP_SERVER_NAME = "files-tools"
+"""Name used to identify the file tools MCP server in ``mcp_servers.json``."""
+
+FILES_MCP_SERVER_CONFIG: dict = {
+    "name": FILES_MCP_SERVER_NAME,
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "files_server.py", "--stdio"],
+    "cwd": "../../tools",
+    "description": "File tools for reading, writing, editing, and searching files",
+}
+"""Default stdio config for the file tools MCP server (relative to exports/<agent>/)."""
@@ -0,0 +1,86 @@
+"""GCU (browser automation) node type constants.
+
+A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
+1. A canonical browser best-practices system prompt is prepended.
+2. All tools from the GCU MCP server are auto-included.
+
+No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
+signal processed by the runner and executor at setup time.
+"""
+
+# ---------------------------------------------------------------------------
+# MCP server identity
+# ---------------------------------------------------------------------------
+
+GCU_SERVER_NAME = "gcu-tools"
+"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
+
+GCU_MCP_SERVER_CONFIG: dict = {
+    "name": GCU_SERVER_NAME,
+    "transport": "stdio",
+    "command": "uv",
+    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
+    "cwd": "../../tools",
+    "description": "GCU tools for browser automation",
+}
+"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
+
+# ---------------------------------------------------------------------------
+# Browser best-practices system prompt
+# ---------------------------------------------------------------------------
+
+GCU_BROWSER_SYSTEM_PROMPT = """\
+# Browser Automation Best Practices
+
+Follow these rules for reliable, efficient browser interaction.
+
+## Reading Pages
+- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")`
+  — it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
+- Use `browser_snapshot_aria` when you need full ARIA properties
+  for detailed element inspection.
+- Do NOT use `browser_screenshot` for reading text content
+  — it produces huge base64 images with no searchable text.
+- Only fall back to `browser_get_text` for extracting specific
+  small elements by CSS selector.
+
+## Navigation & Waiting
+- Always call `browser_wait` after navigation actions
+  (`browser_open`, `browser_navigate`, `browser_click` on links)
+  to let the page load.
+- NEVER re-navigate to the same URL after scrolling
+  — this resets your scroll position and loses loaded content.
+
+## Scrolling
+- Use large scroll amounts ~2000 when loading more content
+  — sites like twitter and linkedin have lazy loading for paging.
+- After scrolling, take a new `browser_snapshot` to see updated content.
+
+## Error Recovery
+- If a tool fails, retry once with the same approach.
+- If it fails a second time, STOP retrying and switch approach.
+- If `browser_snapshot` fails → try `browser_get_text` with a
+  specific small selector as fallback.
+- If `browser_open` fails or page seems stale → `browser_stop`,
+  then `browser_start`, then retry.
+
+## Tab Management
+- Use `browser_tabs` to list open tabs when managing multiple pages.
+- Pass `target_id` to tools when operating on a specific tab.
+- Open background tabs with `browser_open(url=..., background=true)`
+  to avoid losing your current context.
+- Close tabs you no longer need with `browser_close` to free resources.
+
+## Login & Auth Walls
+- If you see a "Log in" or "Sign up" prompt instead of expected
+  content, report the auth wall immediately — do NOT attempt to log in.
+- Check for cookie consent banners and dismiss them if they block content.
+
+## Efficiency
+- Minimize tool calls — combine actions where possible.
+- When a snapshot result is saved to a spillover file, use
+  `run_command` with grep to extract specific data rather than
+  re-reading the full file.
+- Call `set_output` in the same turn as your last browser action
+  when possible — don't waste a turn.
+"""
@@ -176,7 +176,17 @@ class Goal(BaseModel):
        return True

    def to_prompt_context(self) -> str:
-        """Generate context string for LLM prompts."""
+        """Generate context string for LLM prompts.
+
+        Returns empty string when the goal is a stub (no success criteria,
+        no constraints, no context). Stub goals are metadata-only — used for
+        graph identification but not communicated to the LLM as actionable
+        intent. This prevents runtime agents (e.g. the queen) from
+        misinterpreting their own goal as a user request.
+        """
+        if not self.success_criteria and not self.constraints and not self.context:
+            return ""
+
        lines = [
            f"# Goal: {self.name}",
            f"{self.description}",
@@ -166,7 +166,7 @@ class NodeSpec(BaseModel):
    # Node behavior type
    node_type: str = Field(
        default="event_loop",
-        description="Type: 'event_loop' (recommended), 'router', 'human_input'.",
+        description="Type: 'event_loop' (recommended), 'gcu' (browser automation).",
    )

    # Data flow
@@ -204,6 +204,16 @@ class NodeSpec(BaseModel):
        default=None, description="Specific model to use (defaults to graph default)"
    )

+    # For subagent delegation
+    sub_agents: list[str] = Field(
+        default_factory=list,
+        description="Node IDs that can be invoked as subagents from this node",
+    )
+    # For function nodes
+    function: str | None = Field(
+        default=None, description="Function name or path for function nodes"
+    )
+
    # For router nodes
    routes: dict[str, str] = Field(
        default_factory=dict, description="Condition -> target_node_id mapping for routers"
@@ -505,6 +515,11 @@ class NodeContext:
    # Connected accounts prompt (injected from runner)
    accounts_prompt: str = ""

+    # Resume context — Layer 1 (identity) and Layer 2 (narrative) for
+    # rebuilding the full system prompt when restoring from conversation store.
+    identity_prompt: str = ""
+    narrative: str = ""
+
    # Event-triggered execution (no interactive user attached)
    event_triggered: bool = False

@@ -515,6 +530,25 @@ class NodeContext:
    # Falls back to node_id when not set (legacy / standalone executor).
    stream_id: str = ""

+    # Subagent mode
+    is_subagent_mode: bool = False  # True when running as a subagent (prevents nested delegation)
+    report_callback: Any = None  # async (message: str, data: dict | None) -> None
+    node_registry: dict[str, "NodeSpec"] = field(default_factory=dict)  # For subagent lookup
+
+    # Full tool catalog (unfiltered) — used by _execute_subagent to resolve
+    # subagent tools that aren't in the parent node's filtered available_tools.
+    all_tools: list[Tool] = field(default_factory=list)
+
+    # Shared reference to the executor's node_registry — used by subagent
+    # escalation (_EscalationReceiver) to register temporary receivers that
+    # the inject_input() routing chain can find.
+    shared_node_registry: dict[str, Any] = field(default_factory=dict)
+
+    # Dynamic tool provider — when set, EventLoopNode rebuilds the tool
+    # list from this callback at the start of each iteration.  Used by
+    # the queen to switch between building-mode and running-mode tools.
+    dynamic_tools_provider: Any = None  # Callable[[], list[Tool]] | None
+

@dataclass
 class NodeResult:
@@ -280,7 +280,7 @@ def build_transition_marker(
                ]
                if file_lines:
                    sections.append(
-                        "\nData files (use load_data to access):\n" + "\n".join(file_lines)
+                        "\nData files (use read_file to access):\n" + "\n".join(file_lines)
                    )

    # Agent working memory
@@ -1,11 +1,10 @@
 """Anthropic Claude LLM provider - backward compatible wrapper around LiteLLM."""

 import os
-from collections.abc import Callable
 from typing import Any

 from framework.llm.litellm import LiteLLMProvider
-from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.provider import LLMProvider, LLMResponse, Tool


 def _get_api_key_from_credential_store() -> str | None:
@@ -83,23 +82,6 @@ class AnthropicProvider(LLMProvider):
            max_retries=max_retries,
        )

-    def complete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-    ) -> LLMResponse:
-        """Run a tool-use loop until Claude produces a final response (via LiteLLM)."""
-        return self._provider.complete_with_tools(
-            messages=messages,
-            system=system,
-            tools=tools,
-            tool_executor=tool_executor,
-            max_iterations=max_iterations,
-        )
-
    async def acomplete(
        self,
        messages: list[dict[str, Any]],
@@ -120,20 +102,3 @@ class AnthropicProvider(LLMProvider):
            json_mode=json_mode,
            max_retries=max_retries,
        )
-
-    async def acomplete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-    ) -> LLMResponse:
-        """Async tool-use loop via LiteLLM."""
-        return await self._provider.acomplete_with_tools(
-            messages=messages,
-            system=system,
-            tools=tools,
-            tool_executor=tool_executor,
-            max_iterations=max_iterations,
-        )
@@ -11,7 +11,7 @@ import asyncio
 import json
 import logging
 import time
-from collections.abc import AsyncIterator, Callable
+from collections.abc import AsyncIterator
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -23,7 +23,7 @@ except ImportError:
    litellm = None  # type: ignore[assignment]
    RateLimitError = Exception  # type: ignore[assignment, misc]

-from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.provider import LLMProvider, LLMResponse, Tool
 from framework.llm.stream_events import StreamEvent

 logger = logging.getLogger(__name__)
@@ -70,13 +70,59 @@ def _patch_litellm_anthropic_oauth() -> None:
    AnthropicModelInfo.validate_environment = _patched_validate_environment


+def _patch_litellm_metadata_nonetype() -> None:
+    """Patch litellm entry points to prevent metadata=None TypeError.
+
+    litellm bug: the @client decorator in utils.py has four places that do
+        "model_group" in kwargs.get("metadata", {})
+    but kwargs["metadata"] can be explicitly None (set internally by
+    litellm_params), causing:
+        TypeError: argument of type 'NoneType' is not iterable
+    This masks the real API error with a confusing APIConnectionError.
+
+    Fix: wrap the four litellm entry points (completion, acompletion,
+    responses, aresponses) to pop metadata=None before the @client
+    decorator's error handler can crash on it.
+    """
+    import functools
+
+    for fn_name in ("completion", "acompletion", "responses", "aresponses"):
+        original = getattr(litellm, fn_name, None)
+        if original is None:
+            continue
+        if asyncio.iscoroutinefunction(original):
+
+            @functools.wraps(original)
+            async def _async_wrapper(*args, _orig=original, **kwargs):
+                if kwargs.get("metadata") is None:
+                    kwargs.pop("metadata", None)
+                return await _orig(*args, **kwargs)
+
+            setattr(litellm, fn_name, _async_wrapper)
+        else:
+
+            @functools.wraps(original)
+            def _sync_wrapper(*args, _orig=original, **kwargs):
+                if kwargs.get("metadata") is None:
+                    kwargs.pop("metadata", None)
+                return _orig(*args, **kwargs)
+
+            setattr(litellm, fn_name, _sync_wrapper)
+
+
 if litellm is not None:
    _patch_litellm_anthropic_oauth()
+    _patch_litellm_metadata_nonetype()

 RATE_LIMIT_MAX_RETRIES = 10
 RATE_LIMIT_BACKOFF_BASE = 2  # seconds
 RATE_LIMIT_MAX_DELAY = 120  # seconds - cap to prevent absurd waits

+# Empty-stream retries use a short fixed delay, not the rate-limit backoff.
+# Conversation-structure issues are deterministic — long waits don't help.
+EMPTY_STREAM_MAX_RETRIES = 3
+EMPTY_STREAM_RETRY_DELAY = 1.0  # seconds
+
 # Directory for dumping failed requests
 FAILED_REQUESTS_DIR = Path.home() / ".hive" / "failed_requests"

@@ -124,7 +170,7 @@ def _dump_failed_request(
        "temperature": kwargs.get("temperature"),
    }

-    with open(filepath, "w") as f:
+    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(dump_data, f, indent=2, default=str)

    return str(filepath)
@@ -191,6 +237,11 @@ def _is_stream_transient_error(exc: BaseException) -> bool:

    Transient errors (recoverable=True): network issues, server errors, timeouts.
    Permanent errors (recoverable=False): auth, bad request, context window, etc.
+
+    NOTE: "Failed to parse tool call arguments" (malformed LLM output) is NOT
+    transient at the stream level — retrying with the same messages produces the
+    same malformed output.  This error is handled at the EventLoopNode level
+    where the conversation can be modified before retrying.
    """
    try:
        from litellm.exceptions import (
@@ -275,12 +326,21 @@ class LiteLLMProvider(LLMProvider):
        self.api_key = api_key
        self.api_base = api_base
        self.extra_kwargs = kwargs
+        # The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
+        # several standard OpenAI params: max_output_tokens, stream_options.
+        self._codex_backend = bool(api_base and "chatgpt.com/backend-api/codex" in api_base)

        if litellm is None:
            raise ImportError(
                "LiteLLM is not installed. Please install it with: uv pip install litellm"
            )

+        # Note: The Codex ChatGPT backend is a Responses API endpoint at
+        # chatgpt.com/backend-api/codex/responses.  LiteLLM's model registry
+        # correctly marks codex models with mode="responses", so we do NOT
+        # override the mode.  The responses_api_bridge in litellm handles
+        # converting Chat Completions requests to Responses API format.
+
    def _completion_with_rate_limit_retry(
        self, max_retries: int | None = None, **kwargs: Any
    ) -> Any:
@@ -328,6 +388,20 @@ class LiteLLMProvider(LLMProvider):
                        f"Full request dumped to: {dump_path}"
                    )

+                    # finish_reason=length means the model exhausted max_tokens
+                    # before producing content. Retrying with the same max_tokens
+                    # will never help — return immediately instead of looping.
+                    if finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[retry] {model} returned empty content with "
+                            f"finish_reason=length (max_tokens={max_tok}). "
+                            f"The model exhausted its token budget before "
+                            f"producing visible output. Increase max_tokens "
+                            f"or use a different model. Not retrying."
+                        )
+                        return response
+
                    if attempt == retries:
                        logger.error(
                            f"[retry] GAVE UP on {model} after {retries + 1} "
@@ -390,6 +464,21 @@ class LiteLLMProvider(LLMProvider):
        max_retries: int | None = None,
    ) -> LLMResponse:
        """Generate a completion using LiteLLM."""
+        # Codex ChatGPT backend requires streaming — delegate to the unified
+        # async streaming path which properly handles tool calls.
+        if self._codex_backend:
+            return asyncio.run(
+                self.acomplete(
+                    messages=messages,
+                    system=system,
+                    tools=tools,
+                    max_tokens=max_tokens,
+                    response_format=response_format,
+                    json_mode=json_mode,
+                    max_retries=max_retries,
+                )
+            )
+
        # Prepare messages with system prompt
        full_messages = []
        if system:
@@ -452,127 +541,6 @@ class LiteLLMProvider(LLMProvider):
            raw_response=response,
        )

-    def complete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-        max_tokens: int = 4096,
-    ) -> LLMResponse:
-        """Run a tool-use loop until the LLM produces a final response."""
-        # Prepare messages with system prompt
-        current_messages = []
-        if system:
-            current_messages.append({"role": "system", "content": system})
-        current_messages.extend(messages)
-
-        total_input_tokens = 0
-        total_output_tokens = 0
-
-        # Convert tools to OpenAI format
-        openai_tools = [self._tool_to_openai_format(t) for t in tools]
-
-        for _ in range(max_iterations):
-            # Build kwargs
-            kwargs: dict[str, Any] = {
-                "model": self.model,
-                "messages": current_messages,
-                "max_tokens": max_tokens,
-                "tools": openai_tools,
-                **self.extra_kwargs,
-            }
-
-            if self.api_key:
-                kwargs["api_key"] = self.api_key
-            if self.api_base:
-                kwargs["api_base"] = self.api_base
-
-            response = self._completion_with_rate_limit_retry(**kwargs)
-
-            # Track tokens
-            usage = response.usage
-            if usage:
-                total_input_tokens += usage.prompt_tokens
-                total_output_tokens += usage.completion_tokens
-
-            choice = response.choices[0]
-            message = choice.message
-
-            # Check if we're done (no tool calls)
-            if choice.finish_reason == "stop" or not message.tool_calls:
-                return LLMResponse(
-                    content=message.content or "",
-                    model=response.model or self.model,
-                    input_tokens=total_input_tokens,
-                    output_tokens=total_output_tokens,
-                    stop_reason=choice.finish_reason or "stop",
-                    raw_response=response,
-                )
-
-            # Process tool calls.
-            # Add assistant message with tool calls.
-            current_messages.append(
-                {
-                    "role": "assistant",
-                    "content": message.content,
-                    "tool_calls": [
-                        {
-                            "id": tc.id,
-                            "type": "function",
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments,
-                            },
-                        }
-                        for tc in message.tool_calls
-                    ],
-                }
-            )
-
-            # Execute tools and add results.
-            for tool_call in message.tool_calls:
-                try:
-                    args = json.loads(tool_call.function.arguments)
-                except json.JSONDecodeError:
-                    # Surface error to LLM and skip tool execution
-                    current_messages.append(
-                        {
-                            "role": "tool",
-                            "tool_call_id": tool_call.id,
-                            "content": "Invalid JSON arguments provided to tool.",
-                        }
-                    )
-                    continue
-
-                tool_use = ToolUse(
-                    id=tool_call.id,
-                    name=tool_call.function.name,
-                    input=args,
-                )
-
-                result = tool_executor(tool_use)
-
-                # Add tool result message
-                current_messages.append(
-                    {
-                        "role": "tool",
-                        "tool_call_id": result.tool_use_id,
-                        "content": result.content,
-                    }
-                )
-
-        # Max iterations reached
-        return LLMResponse(
-            content="Max tool iterations reached",
-            model=self.model,
-            input_tokens=total_input_tokens,
-            output_tokens=total_output_tokens,
-            stop_reason="max_iterations",
-            raw_response=None,
-        )
-
    # ------------------------------------------------------------------
    # Async variants — non-blocking on the event loop
    # ------------------------------------------------------------------
@@ -621,6 +589,20 @@ class LiteLLMProvider(LLMProvider):
                        f"Full request dumped to: {dump_path}"
                    )

+                    # finish_reason=length means the model exhausted max_tokens
+                    # before producing content. Retrying with the same max_tokens
+                    # will never help — return immediately instead of looping.
+                    if finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[async-retry] {model} returned empty content with "
+                            f"finish_reason=length (max_tokens={max_tok}). "
+                            f"The model exhausted its token budget before "
+                            f"producing visible output. Increase max_tokens "
+                            f"or use a different model. Not retrying."
+                        )
+                        return response
+
                    if attempt == retries:
                        logger.error(
                            f"[async-retry] GAVE UP on {model} after {retries + 1} "
@@ -681,6 +663,19 @@ class LiteLLMProvider(LLMProvider):
        max_retries: int | None = None,
    ) -> LLMResponse:
        """Async version of complete(). Uses litellm.acompletion — non-blocking."""
+        # Codex ChatGPT backend requires streaming — route through stream() which
+        # already handles Codex quirks and has proper tool call accumulation.
+        if self._codex_backend:
+            stream_iter = self.stream(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                json_mode=json_mode,
+            )
+            return await self._collect_stream_to_response(stream_iter)
+
        full_messages: list[dict[str, Any]] = []
        if system:
            full_messages.append({"role": "system", "content": system})
@@ -725,115 +720,6 @@ class LiteLLMProvider(LLMProvider):
            raw_response=response,
        )

-    async def acomplete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-        max_tokens: int = 4096,
-    ) -> LLMResponse:
-        """Async version of complete_with_tools(). Uses litellm.acompletion — non-blocking."""
-        current_messages: list[dict[str, Any]] = []
-        if system:
-            current_messages.append({"role": "system", "content": system})
-        current_messages.extend(messages)
-
-        total_input_tokens = 0
-        total_output_tokens = 0
-        openai_tools = [self._tool_to_openai_format(t) for t in tools]
-
-        for _ in range(max_iterations):
-            kwargs: dict[str, Any] = {
-                "model": self.model,
-                "messages": current_messages,
-                "max_tokens": max_tokens,
-                "tools": openai_tools,
-                **self.extra_kwargs,
-            }
-
-            if self.api_key:
-                kwargs["api_key"] = self.api_key
-            if self.api_base:
-                kwargs["api_base"] = self.api_base
-
-            response = await self._acompletion_with_rate_limit_retry(**kwargs)
-
-            usage = response.usage
-            if usage:
-                total_input_tokens += usage.prompt_tokens
-                total_output_tokens += usage.completion_tokens
-
-            choice = response.choices[0]
-            message = choice.message
-
-            if choice.finish_reason == "stop" or not message.tool_calls:
-                return LLMResponse(
-                    content=message.content or "",
-                    model=response.model or self.model,
-                    input_tokens=total_input_tokens,
-                    output_tokens=total_output_tokens,
-                    stop_reason=choice.finish_reason or "stop",
-                    raw_response=response,
-                )
-
-            current_messages.append(
-                {
-                    "role": "assistant",
-                    "content": message.content,
-                    "tool_calls": [
-                        {
-                            "id": tc.id,
-                            "type": "function",
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments,
-                            },
-                        }
-                        for tc in message.tool_calls
-                    ],
-                }
-            )
-
-            for tool_call in message.tool_calls:
-                try:
-                    args = json.loads(tool_call.function.arguments)
-                except json.JSONDecodeError:
-                    current_messages.append(
-                        {
-                            "role": "tool",
-                            "tool_call_id": tool_call.id,
-                            "content": "Invalid JSON arguments provided to tool.",
-                        }
-                    )
-                    continue
-
-                tool_use = ToolUse(
-                    id=tool_call.id,
-                    name=tool_call.function.name,
-                    input=args,
-                )
-
-                result = tool_executor(tool_use)
-
-                current_messages.append(
-                    {
-                        "role": "tool",
-                        "tool_call_id": result.tool_use_id,
-                        "content": result.content,
-                    }
-                )
-
-        return LLMResponse(
-            content="Max tool iterations reached",
-            model=self.model,
-            input_tokens=total_input_tokens,
-            output_tokens=total_output_tokens,
-            stop_reason="max_iterations",
-            raw_response=None,
-        )
-
    def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
        """Convert Tool to OpenAI function calling format."""
        return {
@@ -855,6 +741,8 @@ class LiteLLMProvider(LLMProvider):
        system: str = "",
        tools: list[Tool] | None = None,
        max_tokens: int = 4096,
+        response_format: dict[str, Any] | None = None,
+        json_mode: bool = False,
    ) -> AsyncIterator[StreamEvent]:
        """Stream a completion via litellm.acompletion(stream=True).

@@ -879,6 +767,31 @@ class LiteLLMProvider(LLMProvider):
            full_messages.append({"role": "system", "content": system})
        full_messages.extend(messages)

+        # Codex Responses API requires an `instructions` field (system prompt).
+        # Inject a minimal one when callers don't provide a system message.
+        if self._codex_backend and not any(m["role"] == "system" for m in full_messages):
+            full_messages.insert(0, {"role": "system", "content": "You are a helpful assistant."})
+
+        # Add JSON mode via prompt engineering (works across all providers)
+        if json_mode:
+            json_instruction = "\n\nPlease respond with a valid JSON object."
+            if full_messages and full_messages[0]["role"] == "system":
+                full_messages[0]["content"] += json_instruction
+            else:
+                full_messages.insert(0, {"role": "system", "content": json_instruction.strip()})
+
+        # Remove ghost empty assistant messages (content="" and no tool_calls).
+        # These arise when a model returns an empty stream after a tool result
+        # (an "expected" no-op turn). Keeping them in history confuses some
+        # models (notably Codex/gpt-5.3) and causes cascading empty streams.
+        full_messages = [
+            m
+            for m in full_messages
+            if not (
+                m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls")
+            )
+        ]
+
        kwargs: dict[str, Any] = {
            "model": self.model,
            "messages": full_messages,
@@ -893,6 +806,12 @@ class LiteLLMProvider(LLMProvider):
            kwargs["api_base"] = self.api_base
        if tools:
            kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
+        if response_format:
+            kwargs["response_format"] = response_format
+        # The Codex ChatGPT backend (Responses API) rejects several params.
+        if self._codex_backend:
+            kwargs.pop("max_tokens", None)
+            kwargs.pop("stream_options", None)

        for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
            # Post-stream events (ToolCall, TextEnd, Finish) are buffered
@@ -901,8 +820,10 @@ class LiteLLMProvider(LLMProvider):
            tail_events: list[StreamEvent] = []
            accumulated_text = ""
            tool_calls_acc: dict[int, dict[str, str]] = {}
+            _last_tool_idx = 0  # tracks most recently opened tool call slot
            input_tokens = 0
            output_tokens = 0
+            stream_finish_reason: str | None = None

            try:
                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
@@ -923,9 +844,36 @@ class LiteLLMProvider(LLMProvider):
                        )

                    # --- Tool calls (accumulate across chunks) ---
+                    # The Codex/Responses API bridge (litellm bug) hardcodes
+                    # index=0 on every ChatCompletionToolCallChunk, even for
+                    # parallel tool calls.  We work around this by using tc.id
+                    # (set on output_item.added events) as a "new tool call"
+                    # signal and tracking the most recently opened slot for
+                    # argument deltas that arrive with id=None.
                    if delta and delta.tool_calls:
                        for tc in delta.tool_calls:
                            idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
+
+                            if tc.id:
+                                # New tool call announced (or done event re-sent).
+                                # Check if this id already has a slot.
+                                existing_idx = next(
+                                    (k for k, v in tool_calls_acc.items() if v["id"] == tc.id),
+                                    None,
+                                )
+                                if existing_idx is not None:
+                                    idx = existing_idx
+                                elif idx in tool_calls_acc and tool_calls_acc[idx]["id"] not in (
+                                    "",
+                                    tc.id,
+                                ):
+                                    # Slot taken by a different call — assign new index
+                                    idx = max(tool_calls_acc.keys()) + 1
+                                _last_tool_idx = idx
+                            else:
+                                # Argument delta with no id — route to last opened slot
+                                idx = _last_tool_idx
+
                            if idx not in tool_calls_acc:
                                tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
                            if tc.id:
@@ -938,6 +886,7 @@ class LiteLLMProvider(LLMProvider):

                    # --- Finish ---
                    if choice.finish_reason:
+                        stream_finish_reason = choice.finish_reason
                        for _idx, tc_data in sorted(tool_calls_acc.items()):
                            try:
                                parsed_args = json.loads(tc_data["arguments"])
@@ -972,48 +921,67 @@ class LiteLLMProvider(LLMProvider):
                # (If text deltas were yielded above, has_content is True
                # and we skip the retry path — nothing was yielded in vain.)
                has_content = accumulated_text or tool_calls_acc
-                if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
-                    # If the conversation ends with an assistant or tool
-                    # message, an empty stream is expected — the LLM has
-                    # nothing new to say.  Don't burn retries on this;
-                    # let the caller (EventLoopNode) decide what to do.
-                    # Typical case: client_facing node where the LLM set
-                    # all outputs via set_output tool calls, and the tool
-                    # results are the last messages.
-                    last_role = next(
-                        (m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
-                        None,
-                    )
-                    if last_role in ("assistant", "tool"):
-                        logger.debug(
-                            "[stream] Empty response after %s message — expected, not retrying.",
-                            last_role,
+                if not has_content:
+                    # finish_reason=length means the model exhausted
+                    # max_tokens before producing content. Retrying with
+                    # the same max_tokens will never help.
+                    if stream_finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[stream] {self.model} returned empty content "
+                            f"with finish_reason=length "
+                            f"(max_tokens={max_tok}). The model exhausted "
+                            f"its token budget before producing visible "
+                            f"output. Increase max_tokens or use a "
+                            f"different model. Not retrying."
                        )
                        for event in tail_events:
                            yield event
                        return
-                    wait = _compute_retry_delay(attempt)
-                    token_count, token_method = _estimate_tokens(
-                        self.model,
-                        full_messages,
-                    )
-                    dump_path = _dump_failed_request(
-                        model=self.model,
-                        kwargs=kwargs,
-                        error_type="empty_stream",
-                        attempt=attempt,
-                    )
-                    logger.warning(
-                        f"[stream-retry] {self.model} returned empty stream — "
-                        f"~{token_count} tokens ({token_method}). "
-                        f"Request dumped to: {dump_path}. "
-                        f"Retrying in {wait}s "
-                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
-                    )
-                    await asyncio.sleep(wait)
-                    continue

-                # Success (or final attempt) — flush remaining events.
+                    # Empty stream — always retry regardless of last message
+                    # role.  Ghost empty streams after tool results are NOT
+                    # expected no-ops; they create infinite loops when the
+                    # conversation doesn't change between iterations.
+                    # After retries, return the empty result and let the
+                    # caller (EventLoopNode) decide how to handle it.
+                    last_role = next(
+                        (m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
+                        None,
+                    )
+                    if attempt < EMPTY_STREAM_MAX_RETRIES:
+                        token_count, token_method = _estimate_tokens(
+                            self.model,
+                            full_messages,
+                        )
+                        dump_path = _dump_failed_request(
+                            model=self.model,
+                            kwargs=kwargs,
+                            error_type="empty_stream",
+                            attempt=attempt,
+                        )
+                        logger.warning(
+                            f"[stream-retry] {self.model} returned empty stream "
+                            f"after {last_role} message — "
+                            f"~{token_count} tokens ({token_method}). "
+                            f"Request dumped to: {dump_path}. "
+                            f"Retrying in {EMPTY_STREAM_RETRY_DELAY}s "
+                            f"(attempt {attempt + 1}/{EMPTY_STREAM_MAX_RETRIES})"
+                        )
+                        await asyncio.sleep(EMPTY_STREAM_RETRY_DELAY)
+                        continue
+
+                    # All retries exhausted — log and return the empty
+                    # result.  EventLoopNode's empty response guard will
+                    # accept if all outputs are set, or handle the ghost
+                    # stream case if outputs are still missing.
+                    logger.error(
+                        f"[stream] {self.model} returned empty stream after "
+                        f"{EMPTY_STREAM_MAX_RETRIES} retries "
+                        f"(last_role={last_role}). Returning empty result."
+                    )
+
+                # Success (or empty after exhausted retries) — flush events.
                for event in tail_events:
                    yield event
                return
@@ -1045,3 +1013,56 @@ class LiteLLMProvider(LLMProvider):
                recoverable = _is_stream_transient_error(e)
                yield StreamErrorEvent(error=str(e), recoverable=recoverable)
                return
+
+    async def _collect_stream_to_response(
+        self,
+        stream: AsyncIterator[StreamEvent],
+    ) -> LLMResponse:
+        """Consume a stream() iterator and collect it into a single LLMResponse.
+
+        Used by acomplete() to route through the unified streaming path so that
+        all backends (including Codex) get proper tool call handling.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            ToolCallEvent,
+        )
+
+        content = ""
+        tool_calls: list[dict[str, Any]] = []
+        input_tokens = 0
+        output_tokens = 0
+        stop_reason = ""
+        model = self.model
+
+        async for event in stream:
+            if isinstance(event, TextDeltaEvent):
+                content = event.snapshot  # snapshot is the accumulated text
+            elif isinstance(event, ToolCallEvent):
+                tool_calls.append(
+                    {
+                        "id": event.tool_use_id,
+                        "name": event.tool_name,
+                        "input": event.tool_input,
+                    }
+                )
+            elif isinstance(event, FinishEvent):
+                input_tokens = event.input_tokens
+                output_tokens = event.output_tokens
+                stop_reason = event.stop_reason
+                if event.model:
+                    model = event.model
+            elif isinstance(event, StreamErrorEvent):
+                if not event.recoverable:
+                    raise RuntimeError(f"Stream error: {event.error}")
+
+        return LLMResponse(
+            content=content,
+            model=model,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            stop_reason=stop_reason,
+            raw_response={"tool_calls": tool_calls} if tool_calls else None,
+        )
@@ -2,10 +2,10 @@

 import json
 import re
-from collections.abc import AsyncIterator, Callable
+from collections.abc import AsyncIterator
 from typing import Any

-from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.provider import LLMProvider, LLMResponse, Tool
 from framework.llm.stream_events import (
    FinishEvent,
    StreamEvent,
@@ -146,43 +146,6 @@ class MockLLMProvider(LLMProvider):
            stop_reason="mock_complete",
        )

-    def complete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-    ) -> LLMResponse:
-        """
-        Generate a mock completion without tool use.
-
-        In mock mode, we skip tool execution and return a final response immediately.
-
-        Args:
-            messages: Initial conversation (ignored in mock mode)
-            system: System prompt (used to extract expected output keys)
-            tools: Available tools (ignored in mock mode)
-            tool_executor: Tool executor function (ignored in mock mode)
-            max_iterations: Max iterations (ignored in mock mode)
-
-        Returns:
-            LLMResponse with mock content
-        """
-        # In mock mode, we don't execute tools - just return a final response
-        # Try to generate JSON if the system prompt suggests structured output
-        json_mode = "json" in system.lower() or "output_keys" in system.lower()
-
-        content = self._generate_mock_response(system=system, json_mode=json_mode)
-
-        return LLMResponse(
-            content=content,
-            model=self.model,
-            input_tokens=0,
-            output_tokens=0,
-            stop_reason="mock_complete",
-        )
-
    async def acomplete(
        self,
        messages: list[dict[str, Any]],
@@ -204,23 +167,6 @@ class MockLLMProvider(LLMProvider):
            max_retries=max_retries,
        )

-    async def acomplete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[[ToolUse], ToolResult],
-        max_iterations: int = 10,
-    ) -> LLMResponse:
-        """Async mock tool-use completion (no I/O, returns immediately)."""
-        return self.complete_with_tools(
-            messages=messages,
-            system=system,
-            tools=tools,
-            tool_executor=tool_executor,
-            max_iterations=max_iterations,
-        )
-
    async def stream(
        self,
        messages: list[dict[str, Any]],
@@ -2,7 +2,7 @@

 import asyncio
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Callable
+from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from functools import partial
 from typing import Any
@@ -90,30 +90,6 @@ class LLMProvider(ABC):
        """
        pass

-    @abstractmethod
-    def complete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list[Tool],
-        tool_executor: Callable[["ToolUse"], "ToolResult"],
-        max_iterations: int = 10,
-    ) -> LLMResponse:
-        """
-        Run a tool-use loop until the LLM produces a final response.
-
-        Args:
-            messages: Initial conversation
-            system: System prompt
-            tools: Available tools
-            tool_executor: Function to execute tools: (ToolUse) -> ToolResult
-            max_iterations: Max tool calls before stopping
-
-        Returns:
-            Final LLMResponse after tool use completes
-        """
-        pass
-
    async def acomplete(
        self,
        messages: list[dict[str, Any]],
@@ -144,32 +120,6 @@ class LLMProvider(ABC):
            ),
        )

-    async def acomplete_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        system: str,
-        tools: list["Tool"],
-        tool_executor: Callable[["ToolUse"], "ToolResult"],
-        max_iterations: int = 10,
-    ) -> "LLMResponse":
-        """Async version of complete_with_tools(). Non-blocking on the event loop.
-
-        Default implementation offloads the sync complete_with_tools() to a thread pool.
-        Subclasses SHOULD override for native async I/O.
-        """
-        loop = asyncio.get_running_loop()
-        return await loop.run_in_executor(
-            None,
-            partial(
-                self.complete_with_tools,
-                messages=messages,
-                system=system,
-                tools=tools,
-                tool_executor=tool_executor,
-                max_iterations=max_iterations,
-            ),
-        )
-
    async def stream(
        self,
        messages: list[dict[str, Any]],
@@ -10,6 +10,7 @@ Usage:
 import json
 import logging
 import os
+import shutil
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -161,7 +162,7 @@ def _load_session(session_id: str) -> BuildSession:
    if not session_file.exists():
        raise ValueError(f"Session '{session_id}' not found")

-    with open(session_file) as f:
+    with open(session_file, encoding="utf-8") as f:
        data = json.load(f)

    return BuildSession.from_dict(data)
@@ -173,7 +174,7 @@ def _load_active_session() -> BuildSession | None:
        return None

    try:
-        with open(ACTIVE_SESSION_FILE) as f:
+        with open(ACTIVE_SESSION_FILE, encoding="utf-8") as f:
            session_id = f.read().strip()

        if session_id:
@@ -227,7 +228,7 @@ def list_sessions() -> str:
    if SESSIONS_DIR.exists():
        for session_file in SESSIONS_DIR.glob("*.json"):
            try:
-                with open(session_file) as f:
+                with open(session_file, encoding="utf-8") as f:
                    data = json.load(f)
                    sessions.append(
                        {
@@ -247,7 +248,7 @@ def list_sessions() -> str:
    active_id = None
    if ACTIVE_SESSION_FILE.exists():
        try:
-            with open(ACTIVE_SESSION_FILE) as f:
+            with open(ACTIVE_SESSION_FILE, encoding="utf-8") as f:
                active_id = f.read().strip()
        except Exception:
            pass
@@ -309,7 +310,7 @@ def delete_session(session_id: Annotated[str, "ID of the session to delete"]) ->
            _session = None

        if ACTIVE_SESSION_FILE.exists():
-            with open(ACTIVE_SESSION_FILE) as f:
+            with open(ACTIVE_SESSION_FILE, encoding="utf-8") as f:
                active_id = f.read().strip()
                if active_id == session_id:
                    ACTIVE_SESSION_FILE.unlink()
@@ -562,16 +563,29 @@ def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
    path = Path(agent_path)

    # Resolve relative paths against project root (not MCP server's cwd)
-    if not path.is_absolute() and not path.exists():
-        resolved = _PROJECT_ROOT / path
-        if resolved.exists():
-            path = resolved
+    if not path.is_absolute():
+        path = _PROJECT_ROOT / path
+
+    # Restrict to allowed directories BEFORE checking existence to prevent
+    # leaking whether arbitrary filesystem paths exist on disk.
+    from framework.server.app import validate_agent_path
+
+    try:
+        path = validate_agent_path(path)
+    except ValueError:
+        return None, json.dumps(
+            {
+                "success": False,
+                "error": "agent_path must be inside an allowed directory "
+                "(exports/, examples/, or ~/.hive/agents/)",
+            }
+        )

    if not path.exists():
        return None, json.dumps(
            {
                "success": False,
-                "error": f"Agent path not found: {path}",
+                "error": f"Agent path not found: {agent_path}",
                "hint": "Run export_graph to create an agent in exports/ first",
            }
        )
@@ -586,7 +600,7 @@ def add_node(
    description: Annotated[str, "What this node does"],
    node_type: Annotated[
        str,
-        "Type: event_loop (recommended), router.",
+        "Type: event_loop (recommended), gcu (browser automation), router.",
    ],
    input_keys: Annotated[str, "JSON array of keys this node reads from shared memory"],
    output_keys: Annotated[str, "JSON array of keys this node writes to shared memory"],
@@ -675,8 +689,23 @@ def add_node(
    if node_type == "event_loop" and not system_prompt:
        warnings.append(f"Event loop node '{node_id}' should have a system_prompt")

+    # GCU node validation
+    if node_type == "gcu":
+        if tools_list:
+            warnings.append(
+                f"GCU node '{node_id}' auto-includes all browser tools from the "
+                f"gcu-tools MCP server. Manually listed tools {tools_list} will be "
+                f"merged with the auto-included set."
+            )
+        if not system_prompt:
+            warnings.append(
+                f"GCU node '{node_id}' has a default browser best-practices prompt. "
+                f"Consider adding a task-specific system_prompt — it will be appended "
+                f"after the browser instructions."
+            )
+
    # Warn about client_facing on nodes with tools (likely autonomous work)
-    if node_type == "event_loop" and client_facing and tools_list:
+    if node_type in ("event_loop", "gcu") and client_facing and tools_list:
        warnings.append(
            f"Node '{node_id}' is client_facing=True but has tools {tools_list}. "
            "Nodes with tools typically do autonomous work and should be "
@@ -1774,6 +1803,14 @@ def export_graph() -> str:
            enriched_criteria.append(crit_dict)
        export_data["goal"]["success_criteria"] = enriched_criteria

+    # Auto-add GCU MCP server if any node uses the gcu type
+    has_gcu_nodes = any(n.node_type == "gcu" for n in session.nodes)
+    if has_gcu_nodes:
+        from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
+
+        if not any(s.get("name") == GCU_SERVER_NAME for s in session.mcp_servers):
+            session.mcp_servers.append(dict(GCU_MCP_SERVER_CONFIG))
+
    # === WRITE FILES TO DISK ===
    # Create exports directory
    exports_dir = Path("exports") / session.name
@@ -1864,7 +1901,7 @@ def import_from_export(
        return json.dumps({"success": False, "error": f"File not found: {agent_json_path}"})

    try:
-        data = json.loads(path.read_text())
+        data = json.loads(path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as e:
        return json.dumps({"success": False, "error": f"Invalid JSON: {e}"})

@@ -1946,7 +1983,7 @@ def get_session_status() -> str:
@mcp.tool()
 def configure_loop(
    max_iterations: Annotated[int, "Maximum loop iterations per node execution (default 50)"] = 50,
-    max_tool_calls_per_turn: Annotated[int, "Maximum tool calls per LLM turn (default 10)"] = 10,
+    max_tool_calls_per_turn: Annotated[int, "Maximum tool calls per LLM turn (default 30)"] = 30,
    stall_detection_threshold: Annotated[
        int, "Consecutive identical responses before stall detection triggers (default 3)"
    ] = 3,
@@ -2772,6 +2809,21 @@ def run_tests(
    import re
    import subprocess

+    # Guard: pytest must be available as a subprocess command.
+    # Install with: pip install 'framework[testing]'
+    if shutil.which("pytest") is None:
+        return json.dumps(
+            {
+                "goal_id": goal_id,
+                "error": (
+                    "pytest is not installed or not on PATH. "
+                    "Hive's test runner requires pytest at runtime. "
+                    "Install it with: pip install 'framework[testing]' "
+                    "or: uv pip install 'framework[testing]'"
+                ),
+            }
+        )
+
    path, err = _validate_agent_path(agent_path)
    if err:
        return err
@@ -2842,6 +2894,7 @@ def run_tests(
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=600,  # 10 minute timeout
@@ -2965,6 +3018,22 @@ def debug_test(
    import re
    import subprocess

+    # Guard: pytest must be available as a subprocess command.
+    # Install with: pip install 'framework[testing]'
+    if shutil.which("pytest") is None:
+        return json.dumps(
+            {
+                "goal_id": goal_id,
+                "test_name": test_name,
+                "error": (
+                    "pytest is not installed or not on PATH. "
+                    "Hive's test runner requires pytest at runtime. "
+                    "Install it with: pip install 'framework[testing]' "
+                    "or: uv pip install 'framework[testing]'"
+                ),
+            }
+        )
+
    # Derive agent_path from session if not provided
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"
@@ -2986,7 +3055,7 @@ def debug_test(
    # Find which file contains the test
    test_file = None
    for py_file in tests_dir.glob("test_*.py"):
-        content = py_file.read_text()
+        content = py_file.read_text(encoding="utf-8")
        if f"def {test_name}" in content or f"async def {test_name}" in content:
            test_file = py_file
            break
@@ -3017,6 +3086,7 @@ def debug_test(
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=120,  # 2 minute timeout for single test
@@ -3138,7 +3208,7 @@ def list_tests(
    tests = []
    for test_file in sorted(tests_dir.glob("test_*.py")):
        try:
-            content = test_file.read_text()
+            content = test_file.read_text(encoding="utf-8")
            tree = ast.parse(content)

            # Find all async function definitions that start with "test_"
@@ -108,8 +108,8 @@ judge_node = NodeSpec(
        "degradation pattern is detected."
    ),
    node_type="event_loop",
-    client_facing=False,   # Autonomous monitor, not interactive
-    max_node_visits=0,     # Unbounded — runs on every timer tick
+    client_facing=False,  # Autonomous monitor, not interactive
+    max_node_visits=0,  # Unbounded — runs on every timer tick
    input_keys=[],
    output_keys=["health_verdict"],
    nullable_output_keys=["health_verdict"],
@@ -244,15 +244,15 @@ judge_graph = GraphSpec(
    version="1.0.0",
    entry_node="judge",
    entry_points={"health_check": "judge"},
-    terminal_nodes=[],   # Forever-alive: fires on every timer tick
+    terminal_nodes=[],  # Forever-alive: fires on every timer tick
    pause_nodes=[],
    nodes=[judge_node],
    edges=[],
    conversation_mode="continuous",  # Conversation persists across timer ticks
    async_entry_points=[HEALTH_JUDGE_ENTRY_POINT],
    loop_config={
-        "max_iterations": 10,          # One check shouldn't take many turns
+        "max_iterations": 10,  # One check shouldn't take many turns
        "max_tool_calls_per_turn": 3,  # get_summary + optionally emit_ticket
-        "max_history_tokens": 16000,   # Compact — judge only needs recent context
+        "max_history_tokens": 16000,  # Compact — judge only needs recent context
    },
 )
@@ -360,6 +360,84 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    )
    setup_creds_parser.set_defaults(func=cmd_setup_credentials)

+    # serve command (HTTP API server)
+    serve_parser = subparsers.add_parser(
+        "serve",
+        help="Start HTTP API server",
+        description="Start an HTTP server exposing REST + SSE APIs for agent control.",
+    )
+    serve_parser.add_argument(
+        "--host",
+        type=str,
+        default="127.0.0.1",
+        help="Host to bind (default: 127.0.0.1)",
+    )
+    serve_parser.add_argument(
+        "--port",
+        "-p",
+        type=int,
+        default=8787,
+        help="Port to listen on (default: 8787)",
+    )
+    serve_parser.add_argument(
+        "--agent",
+        "-a",
+        type=str,
+        action="append",
+        default=[],
+        help="Agent path to preload (repeatable)",
+    )
+    serve_parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        default=None,
+        help="LLM model for preloaded agents",
+    )
+    serve_parser.add_argument(
+        "--open",
+        action="store_true",
+        help="Open dashboard in browser after server starts",
+    )
+    serve_parser.set_defaults(func=cmd_serve)
+
+    # open command (serve + auto-open browser)
+    open_parser = subparsers.add_parser(
+        "open",
+        help="Start HTTP server and open dashboard in browser",
+        description="Shortcut for 'hive serve --open'. "
+        "Starts the HTTP server and opens the dashboard.",
+    )
+    open_parser.add_argument(
+        "--host",
+        type=str,
+        default="127.0.0.1",
+        help="Host to bind (default: 127.0.0.1)",
+    )
+    open_parser.add_argument(
+        "--port",
+        "-p",
+        type=int,
+        default=8787,
+        help="Port to listen on (default: 8787)",
+    )
+    open_parser.add_argument(
+        "--agent",
+        "-a",
+        type=str,
+        action="append",
+        default=[],
+        help="Agent path to preload (repeatable)",
+    )
+    open_parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        default=None,
+        help="LLM model for preloaded agents",
+    )
+    open_parser.set_defaults(func=cmd_open)
+

 def _load_resume_state(
    agent_path: str, session_id: str, checkpoint_id: str | None = None
@@ -387,7 +465,7 @@ def _load_resume_state(
        if not cp_path.exists():
            return None
        try:
-            cp_data = json.loads(cp_path.read_text())
+            cp_data = json.loads(cp_path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError):
            return None
        return {
@@ -403,7 +481,7 @@ def _load_resume_state(
        if not state_path.exists():
            return None
        try:
-            state_data = json.loads(state_path.read_text())
+            state_data = json.loads(state_path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError):
            return None
        progress = state_data.get("progress", {})
@@ -476,7 +554,7 @@ def cmd_run(args: argparse.Namespace) -> int:
            return 1
    elif args.input_file:
        try:
-            with open(args.input_file) as f:
+            with open(args.input_file, encoding="utf-8") as f:
                context = json.load(f)
        except (FileNotFoundError, json.JSONDecodeError) as e:
            print(f"Error reading input file: {e}", file=sys.stderr)
@@ -618,7 +696,7 @@ def cmd_run(args: argparse.Namespace) -> int:

    # Output results
    if args.output:
-        with open(args.output, "w") as f:
+        with open(args.output, "w", encoding="utf-8") as f:
            json.dump(output, f, indent=2, default=str)
        if not args.quiet:
            print(f"Results written to {args.output}")
@@ -798,7 +876,7 @@ def cmd_list(args: argparse.Namespace) -> int:

    agents = []
    for path in directory.iterdir():
-        if path.is_dir() and (path / "agent.json").exists():
+        if _is_valid_agent_dir(path):
            try:
                runner = AgentRunner.load(path)
                info = runner.info()
@@ -865,14 +943,14 @@ def cmd_dispatch(args: argparse.Namespace) -> int:
        # Use specific agents
        for agent_name in args.agents:
            agent_path = agents_dir / agent_name
-            if not (agent_path / "agent.json").exists():
+            if not _is_valid_agent_dir(agent_path):
                print(f"Agent not found: {agent_path}", file=sys.stderr)
                return 1
            agent_paths.append((agent_name, agent_path))
    else:
        # Discover all agents
        for path in agents_dir.iterdir():
-            if path.is_dir() and (path / "agent.json").exists():
+            if _is_valid_agent_dir(path):
                agent_paths.append((path.name, path))

    if not agent_paths:
@@ -1476,7 +1554,7 @@ def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
        return fallback_name, fallback_desc

    try:
-        with open(config_path) as f:
+        with open(config_path, encoding="utf-8") as f:
            tree = ast.parse(f.read())

        # Find AgentMetadata class definition
@@ -1623,16 +1701,7 @@ def _select_agent(agents_dir: Path) -> str | None:
        # Display agents for current page (with global numbering)
        for i, agent_path in enumerate(page_agents, start_idx + 1):
            try:
-                agent_json = agent_path / "agent.json"
-                if agent_json.exists():
-                    with open(agent_json) as f:
-                        data = json.load(f)
-                    agent_meta = data.get("agent", {})
-                    name = agent_meta.get("name", agent_path.name)
-                    desc = agent_meta.get("description", "")
-                else:
-                    # Python-based agent - extract from config.py
-                    name, desc = _extract_python_agent_metadata(agent_path)
+                name, desc = _extract_python_agent_metadata(agent_path)
                desc = desc[:50] + "..." if len(desc) > 50 else desc
                print(f"  {i}. {name}")
                print(f"     {desc}")
@@ -1891,3 +1960,170 @@ def cmd_setup_credentials(args: argparse.Namespace) -> int:

    result = session.run_interactive()
    return 0 if result.success else 1
+
+
+def _open_browser(url: str) -> None:
+    """Open URL in the default browser (best-effort, non-blocking)."""
+    import subprocess
+    import sys
+
+    try:
+        if sys.platform == "darwin":
+            subprocess.Popen(
+                ["open", url],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                encoding="utf-8",
+            )
+        elif sys.platform == "linux":
+            subprocess.Popen(
+                ["xdg-open", url],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                encoding="utf-8",
+            )
+    except Exception:
+        pass  # Best-effort — don't crash if browser can't open
+
+
+def _build_frontend() -> bool:
+    """Build the frontend if source is newer than dist. Returns True if dist exists."""
+    import subprocess
+
+    # Find the frontend directory relative to this file or cwd
+    candidates = [
+        Path("core/frontend"),
+        Path(__file__).resolve().parent.parent.parent / "frontend",
+    ]
+    frontend_dir: Path | None = None
+    for c in candidates:
+        if (c / "package.json").is_file():
+            frontend_dir = c.resolve()
+            break
+
+    if frontend_dir is None:
+        return False
+
+    dist_dir = frontend_dir / "dist"
+    src_dir = frontend_dir / "src"
+
+    # Skip build if dist is up-to-date (newest src file older than dist index.html)
+    index_html = dist_dir / "index.html"
+    if index_html.exists() and src_dir.is_dir():
+        dist_mtime = index_html.stat().st_mtime
+        needs_build = False
+        for f in src_dir.rglob("*"):
+            if f.is_file() and f.stat().st_mtime > dist_mtime:
+                needs_build = True
+                break
+        if not needs_build:
+            return True
+
+    # Need to build
+    print("Building frontend...")
+    try:
+        # Ensure deps are installed
+        subprocess.run(
+            ["npm", "install", "--no-fund", "--no-audit"],
+            encoding="utf-8",
+            cwd=frontend_dir,
+            check=True,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["npm", "run", "build"],
+            encoding="utf-8",
+            cwd=frontend_dir,
+            check=True,
+            capture_output=True,
+        )
+        print("Frontend built.")
+        return True
+    except FileNotFoundError:
+        print("Node.js not found — skipping frontend build.")
+        return dist_dir.is_dir()
+    except subprocess.CalledProcessError as exc:
+        stderr = exc.stderr.decode(errors="replace") if exc.stderr else ""
+        print(f"Frontend build failed: {stderr[:500]}")
+        return dist_dir.is_dir()
+
+
+def cmd_serve(args: argparse.Namespace) -> int:
+    """Start the HTTP API server."""
+    import logging
+
+    from aiohttp import web
+
+    _build_frontend()
+
+    from framework.server.app import create_app
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+
+    model = getattr(args, "model", None)
+    app = create_app(model=model)
+
+    async def run_server():
+        manager = app["manager"]
+
+        # Preload agents specified via --agent
+        for agent_path in args.agent:
+            try:
+                session = await manager.create_session_with_worker(agent_path, model=model)
+                info = session.worker_info
+                name = info.name if info else session.worker_id
+                print(f"Loaded agent: {session.worker_id} ({name})")
+            except Exception as e:
+                print(f"Error loading {agent_path}: {e}")
+
+        # Start server using AppRunner/TCPSite (same pattern as webhook_server.py)
+        runner = web.AppRunner(app, access_log=None)
+        await runner.setup()
+        site = web.TCPSite(runner, args.host, args.port)
+        await site.start()
+
+        # Check if frontend is being served
+        dist_candidates = [
+            Path("frontend/dist"),
+            Path("core/frontend/dist"),
+        ]
+        has_frontend = any((c / "index.html").exists() for c in dist_candidates if c.is_dir())
+        dashboard_url = f"http://{args.host}:{args.port}"
+
+        print()
+        print(f"Hive API server running on {dashboard_url}")
+        if has_frontend:
+            print(f"Dashboard: {dashboard_url}")
+        print(f"Health: {dashboard_url}/api/health")
+        print(f"Agents loaded: {sum(1 for s in manager.list_sessions() if s.worker_runtime)}")
+        print()
+        print("Press Ctrl+C to stop")
+
+        # Auto-open browser if --open flag is set and frontend exists
+        if getattr(args, "open", False) and has_frontend:
+            _open_browser(dashboard_url)
+
+        # Run forever until interrupted
+        try:
+            await asyncio.Event().wait()
+        except asyncio.CancelledError:
+            pass
+        finally:
+            await manager.shutdown_all()
+            await runner.cleanup()
+
+    try:
+        asyncio.run(run_server())
+    except KeyboardInterrupt:
+        print("\nServer stopped.")
+
+    return 0
+
+
+def cmd_open(args: argparse.Namespace) -> int:
+    """Start the HTTP API server and open the dashboard in the browser."""
+    args.open = True
+    return cmd_serve(args)
@@ -0,0 +1,185 @@
+"""Pre-load validation for agent graphs.
+
+Runs structural and credential checks before MCP servers are spawned.
+Fails fast with actionable error messages.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.graph.edge import GraphSpec
+    from framework.graph.node import NodeSpec
+
+logger = logging.getLogger(__name__)
+
+
+class PreloadValidationError(Exception):
+    """Raised when pre-load validation fails."""
+
+    def __init__(self, errors: list[str]):
+        self.errors = errors
+        msg = "Pre-load validation failed:\n" + "\n".join(f"  - {e}" for e in errors)
+        super().__init__(msg)
+
+
+@dataclass
+class PreloadResult:
+    """Result of pre-load validation."""
+
+    valid: bool
+    errors: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+
+
+def validate_graph_structure(graph: GraphSpec) -> list[str]:
+    """Run graph structural validation (includes GCU subagent-only checks).
+
+    Delegates to GraphSpec.validate() which checks entry/terminal nodes,
+    edge references, reachability, fan-out rules, and GCU constraints.
+    """
+    return graph.validate()
+
+
+def validate_credentials(
+    nodes: list[NodeSpec],
+    *,
+    interactive: bool = True,
+    skip: bool = False,
+) -> None:
+    """Validate agent credentials.
+
+    Calls ``validate_agent_credentials`` which performs two-phase validation:
+    1. Presence check (env var, encrypted store, Aden sync)
+    2. Health check (lightweight HTTP call to verify the key works)
+
+    On failure raises ``CredentialError`` with ``validation_result`` and
+    ``failed_cred_names`` attributes preserved from the upstream check.
+
+    In interactive mode (CLI with TTY), attempts recovery via the
+    credential setup flow before re-raising.
+    """
+    if skip:
+        return
+
+    from framework.credentials.validation import validate_agent_credentials
+
+    if not interactive:
+        # Non-interactive: let CredentialError propagate with full context.
+        # validate_agent_credentials attaches .validation_result and
+        # .failed_cred_names to the exception automatically.
+        validate_agent_credentials(nodes)
+        return
+
+    import sys
+
+    from framework.credentials.models import CredentialError
+
+    try:
+        validate_agent_credentials(nodes)
+    except CredentialError as e:
+        if not sys.stdin.isatty():
+            raise
+
+        print(f"\n{e}", file=sys.stderr)
+
+        from framework.credentials.validation import build_setup_session_from_error
+
+        session = build_setup_session_from_error(e, nodes=nodes)
+        if not session.missing:
+            raise
+
+        result = session.run_interactive()
+        if not result.success:
+            # Preserve the original validation_result so callers can
+            # inspect which credentials are still missing.
+            exc = CredentialError(
+                "Credential setup incomplete. Run again after configuring the required credentials."
+            )
+            if hasattr(e, "validation_result"):
+                exc.validation_result = e.validation_result  # type: ignore[attr-defined]
+            if hasattr(e, "failed_cred_names"):
+                exc.failed_cred_names = e.failed_cred_names  # type: ignore[attr-defined]
+            raise exc from None
+
+        # Re-validate after successful setup — this will raise if still broken,
+        # with fresh validation_result attached to the new exception.
+        validate_agent_credentials(nodes)
+
+
+def credential_errors_to_json(exc: Exception) -> dict:
+    """Extract structured credential failure details from a CredentialError.
+
+    Returns a dict suitable for JSON serialization with enough detail for
+    the queen to report actionable guidance to the user.  Falls back to
+    ``str(exc)`` when rich metadata is not available.
+    """
+    result = getattr(exc, "validation_result", None)
+    if result is None:
+        return {
+            "error": "credentials_required",
+            "message": str(exc),
+        }
+
+    failed = result.failed
+    missing = []
+    for c in failed:
+        if c.available:
+            status = "invalid"
+        elif c.aden_not_connected:
+            status = "aden_not_connected"
+        else:
+            status = "missing"
+        entry: dict = {
+            "credential": c.credential_name,
+            "env_var": c.env_var,
+            "status": status,
+        }
+        if c.tools:
+            entry["tools"] = c.tools
+        if c.node_types:
+            entry["node_types"] = c.node_types
+        if c.help_url:
+            entry["help_url"] = c.help_url
+        if c.validation_message:
+            entry["validation_message"] = c.validation_message
+        missing.append(entry)
+
+    return {
+        "error": "credentials_required",
+        "message": str(exc),
+        "missing_credentials": missing,
+    }
+
+
+def run_preload_validation(
+    graph: GraphSpec,
+    *,
+    interactive: bool = True,
+    skip_credential_validation: bool = False,
+) -> PreloadResult:
+    """Run all pre-load validations.
+
+    Order:
+    1. Graph structure (includes GCU subagent-only checks) — non-recoverable
+    2. Credentials — potentially recoverable via interactive setup
+
+    Raises PreloadValidationError for structural issues.
+    Raises CredentialError for credential issues.
+    """
+    # 1. Structural validation (calls graph.validate() which includes GCU checks)
+    graph_errors = validate_graph_structure(graph)
+    if graph_errors:
+        raise PreloadValidationError(graph_errors)
+
+    # 2. Credential validation
+    validate_credentials(
+        graph.nodes,
+        interactive=interactive,
+        skip=skip_credential_validation,
+    )
+
+    return PreloadResult(valid=True)
@@ -5,13 +5,13 @@ import logging
 import os
 from collections.abc import Callable
 from dataclasses import dataclass, field
+from datetime import UTC
 from pathlib import Path
 from typing import TYPE_CHECKING, Any

 from framework.config import get_hive_config, get_preferred_model
 from framework.credentials.validation import (
    ensure_credential_key_env as _ensure_credential_key_env,
-    validate_agent_credentials,
 )
 from framework.graph import Goal
 from framework.graph.edge import (
@@ -24,6 +24,7 @@ from framework.graph.edge import (
 from framework.graph.executor import ExecutionResult
 from framework.graph.node import NodeSpec
 from framework.llm.provider import LLMProvider, Tool
+from framework.runner.preload_validation import run_preload_validation
 from framework.runner.tool_registry import ToolRegistry
 from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
 from framework.runtime.execution_stream import EntryPointSpec
@@ -38,10 +39,108 @@ logger = logging.getLogger(__name__)
 CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
 CLAUDE_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
 CLAUDE_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+CLAUDE_KEYCHAIN_SERVICE = "Claude Code-credentials"

 # Buffer in seconds before token expiry to trigger a proactive refresh
 _TOKEN_REFRESH_BUFFER_SECS = 300  # 5 minutes

+# Codex (OpenAI) subscription auth
+CODEX_AUTH_FILE = Path.home() / ".codex" / "auth.json"
+CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
+CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+CODEX_KEYCHAIN_SERVICE = "Codex Auth"
+_CODEX_TOKEN_LIFETIME_SECS = 3600  # 1 hour (no explicit expiry field)
+
+
+def _read_claude_keychain() -> dict | None:
+    """Read Claude Code credentials from macOS Keychain.
+
+    Returns the parsed JSON dict, or None if not on macOS or entry missing.
+    """
+    import getpass
+    import platform
+    import subprocess
+
+    if platform.system() != "Darwin":
+        return None
+
+    try:
+        account = getpass.getuser()
+        result = subprocess.run(
+            [
+                "security",
+                "find-generic-password",
+                "-s",
+                CLAUDE_KEYCHAIN_SERVICE,
+                "-a",
+                account,
+                "-w",
+            ],
+            capture_output=True,
+            encoding="utf-8",
+            timeout=5,
+        )
+        if result.returncode != 0:
+            return None
+        raw = result.stdout.strip()
+        if not raw:
+            return None
+        return json.loads(raw)
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError) as exc:
+        logger.debug("Claude keychain read failed: %s", exc)
+        return None
+
+
+def _save_claude_keychain(creds: dict) -> bool:
+    """Write Claude Code credentials to macOS Keychain. Returns True on success."""
+    import getpass
+    import platform
+    import subprocess
+
+    if platform.system() != "Darwin":
+        return False
+
+    try:
+        account = getpass.getuser()
+        data = json.dumps(creds)
+        result = subprocess.run(
+            [
+                "security",
+                "add-generic-password",
+                "-U",
+                "-s",
+                CLAUDE_KEYCHAIN_SERVICE,
+                "-a",
+                account,
+                "-w",
+                data,
+            ],
+            capture_output=True,
+            timeout=5,
+        )
+        return result.returncode == 0
+    except (subprocess.TimeoutExpired, OSError) as exc:
+        logger.debug("Claude keychain write failed: %s", exc)
+        return False
+
+
+def _read_claude_credentials() -> dict | None:
+    """Read Claude Code credentials from Keychain (macOS) or file (Linux/Windows)."""
+    # Try macOS Keychain first
+    creds = _read_claude_keychain()
+    if creds:
+        return creds
+
+    # Fall back to file
+    if not CLAUDE_CREDENTIALS_FILE.exists():
+        return None
+
+    try:
+        with open(CLAUDE_CREDENTIALS_FILE, encoding="utf-8") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return None
+

 def _refresh_claude_code_token(refresh_token: str) -> dict | None:
    """Refresh the Claude Code OAuth token using the refresh token.
@@ -81,16 +180,14 @@ def _refresh_claude_code_token(refresh_token: str) -> dict | None:


 def _save_refreshed_credentials(token_data: dict) -> None:
-    """Write refreshed token data back to ~/.claude/.credentials.json."""
+    """Write refreshed token data back to Keychain (macOS) or credentials file."""
    import time

-    if not CLAUDE_CREDENTIALS_FILE.exists():
+    creds = _read_claude_credentials()
+    if not creds:
        return

    try:
-        with open(CLAUDE_CREDENTIALS_FILE) as f:
-            creds = json.load(f)
-
        oauth = creds.get("claudeAiOauth", {})
        oauth["accessToken"] = token_data["access_token"]
        if "refresh_token" in token_data:
@@ -99,9 +196,15 @@ def _save_refreshed_credentials(token_data: dict) -> None:
            oauth["expiresAt"] = int((time.time() + token_data["expires_in"]) * 1000)
        creds["claudeAiOauth"] = oauth

-        with open(CLAUDE_CREDENTIALS_FILE, "w") as f:
-            json.dump(creds, f, indent=2)
-        logger.debug("Claude Code credentials refreshed successfully")
+        # Try Keychain first (macOS), fall back to file
+        if _save_claude_keychain(creds):
+            logger.debug("Claude Code credentials refreshed in Keychain")
+            return
+
+        if CLAUDE_CREDENTIALS_FILE.exists():
+            with open(CLAUDE_CREDENTIALS_FILE, "w", encoding="utf-8") as f:
+                json.dump(creds, f, indent=2)
+            logger.debug("Claude Code credentials refreshed in file")
    except (json.JSONDecodeError, OSError, KeyError) as exc:
        logger.debug("Failed to save refreshed credentials: %s", exc)

@@ -109,8 +212,8 @@ def _save_refreshed_credentials(token_data: dict) -> None:
 def get_claude_code_token() -> str | None:
    """Get the OAuth token from Claude Code subscription with auto-refresh.

-    Reads from ~/.claude/.credentials.json which is created by the
-    Claude Code CLI when users authenticate with their subscription.
+    Reads from macOS Keychain (on Darwin) or ~/.claude/.credentials.json
+    (on Linux/Windows), as created by the Claude Code CLI.

    If the token is expired or close to expiry, attempts an automatic
    refresh using the stored refresh token.
@@ -120,13 +223,8 @@ def get_claude_code_token() -> str | None:
    """
    import time

-    if not CLAUDE_CREDENTIALS_FILE.exists():
-        return None
-
-    try:
-        with open(CLAUDE_CREDENTIALS_FILE) as f:
-            creds = json.load(f)
-    except (json.JSONDecodeError, OSError):
+    creds = _read_claude_credentials()
+    if not creds:
        return None

    oauth = creds.get("claudeAiOauth", {})
@@ -161,6 +259,264 @@ def get_claude_code_token() -> str | None:
    return access_token


+# ---------------------------------------------------------------------------
+# Codex (OpenAI) subscription token helpers
+# ---------------------------------------------------------------------------
+
+
+def _get_codex_keychain_account() -> str:
+    """Compute the macOS Keychain account name used by the Codex CLI.
+
+    The Codex CLI stores credentials under the account
+    ``cli|<sha256(~/.codex)[:16]>`` in the ``Codex Auth`` service.
+    """
+    import hashlib
+
+    codex_dir = str(Path.home() / ".codex")
+    digest = hashlib.sha256(codex_dir.encode()).hexdigest()[:16]
+    return f"cli|{digest}"
+
+
+def _read_codex_keychain() -> dict | None:
+    """Read Codex auth data from macOS Keychain (macOS only).
+
+    Returns the parsed JSON from the Keychain entry, or None if not
+    available (wrong platform, entry missing, etc.).
+    """
+    import platform
+    import subprocess
+
+    if platform.system() != "Darwin":
+        return None
+
+    try:
+        account = _get_codex_keychain_account()
+        result = subprocess.run(
+            [
+                "security",
+                "find-generic-password",
+                "-s",
+                CODEX_KEYCHAIN_SERVICE,
+                "-a",
+                account,
+                "-w",
+            ],
+            capture_output=True,
+            encoding="utf-8",
+            timeout=5,
+        )
+        if result.returncode != 0:
+            return None
+        raw = result.stdout.strip()
+        if not raw:
+            return None
+        return json.loads(raw)
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError) as exc:
+        logger.debug("Codex keychain read failed: %s", exc)
+        return None
+
+
+def _read_codex_auth_file() -> dict | None:
+    """Read Codex auth data from ~/.codex/auth.json (fallback)."""
+    if not CODEX_AUTH_FILE.exists():
+        return None
+    try:
+        with open(CODEX_AUTH_FILE, encoding="utf-8") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
+def _is_codex_token_expired(auth_data: dict) -> bool:
+    """Check whether the Codex token is expired or close to expiry.
+
+    The Codex auth.json has no explicit ``expiresAt`` field, so we infer
+    expiry as ``last_refresh + _CODEX_TOKEN_LIFETIME_SECS``.  Falls back
+    to the file mtime when ``last_refresh`` is absent.
+    """
+    import time
+    from datetime import datetime
+
+    now = time.time()
+    last_refresh = auth_data.get("last_refresh")
+
+    if last_refresh is None:
+        # Fall back to file modification time
+        try:
+            last_refresh = CODEX_AUTH_FILE.stat().st_mtime
+        except OSError:
+            # Cannot determine age — assume expired
+            return True
+    elif isinstance(last_refresh, str):
+        # Codex stores last_refresh as an ISO 8601 timestamp string —
+        # convert to Unix epoch float for arithmetic.
+        try:
+            last_refresh = datetime.fromisoformat(last_refresh.replace("Z", "+00:00")).timestamp()
+        except (ValueError, TypeError):
+            return True
+
+    expires_at = last_refresh + _CODEX_TOKEN_LIFETIME_SECS
+    return now >= (expires_at - _TOKEN_REFRESH_BUFFER_SECS)
+
+
+def _refresh_codex_token(refresh_token: str) -> dict | None:
+    """Refresh the Codex OAuth token using the refresh token.
+
+    POSTs to the OpenAI auth endpoint with form-urlencoded data.
+
+    Returns:
+        Dict with new token data on success, None on failure.
+    """
+    import urllib.error
+    import urllib.parse
+    import urllib.request
+
+    data = urllib.parse.urlencode(
+        {
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": CODEX_OAUTH_CLIENT_ID,
+        }
+    ).encode("utf-8")
+
+    req = urllib.request.Request(
+        CODEX_OAUTH_TOKEN_URL,
+        data=data,
+        headers={"Content-Type": "application/x-www-form-urlencoded"},
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read())
+    except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
+        logger.debug("Codex token refresh failed: %s", exc)
+        return None
+
+
+def _save_refreshed_codex_credentials(auth_data: dict, token_data: dict) -> None:
+    """Write refreshed tokens back to ~/.codex/auth.json only (not Keychain).
+
+    The Codex CLI manages its own Keychain entries, so we only update the
+    file-based credentials.
+    """
+    from datetime import datetime
+
+    try:
+        tokens = auth_data.get("tokens", {})
+        tokens["access_token"] = token_data["access_token"]
+        if "refresh_token" in token_data:
+            tokens["refresh_token"] = token_data["refresh_token"]
+        if "id_token" in token_data:
+            tokens["id_token"] = token_data["id_token"]
+        auth_data["tokens"] = tokens
+        auth_data["last_refresh"] = datetime.now(UTC).isoformat()
+
+        CODEX_AUTH_FILE.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+        fd = os.open(CODEX_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            json.dump(auth_data, f, indent=2)
+        logger.debug("Codex credentials refreshed successfully")
+    except (OSError, KeyError) as exc:
+        logger.debug("Failed to save refreshed Codex credentials: %s", exc)
+
+
+def get_codex_token() -> str | None:
+    """Get the OAuth token from Codex subscription with auto-refresh.
+
+    Reads from macOS Keychain first, then falls back to
+    ``~/.codex/auth.json``.  If the token is expired or close to
+    expiry, attempts an automatic refresh.
+
+    Returns:
+        The access token if available, None otherwise.
+    """
+    # Try Keychain first, then file
+    auth_data = _read_codex_keychain() or _read_codex_auth_file()
+    if not auth_data:
+        return None
+
+    tokens = auth_data.get("tokens", {})
+    access_token = tokens.get("access_token")
+    if not access_token:
+        return None
+
+    # Check if token is still valid
+    if not _is_codex_token_expired(auth_data):
+        return access_token
+
+    # Token is expired or near expiry — attempt refresh
+    refresh_token = tokens.get("refresh_token")
+    if not refresh_token:
+        logger.warning("Codex token expired and no refresh token available")
+        return access_token  # Return expired token; it may still work briefly
+
+    logger.info("Codex token expired or near expiry, refreshing...")
+    token_data = _refresh_codex_token(refresh_token)
+
+    if token_data and "access_token" in token_data:
+        _save_refreshed_codex_credentials(auth_data, token_data)
+        return token_data["access_token"]
+
+    # Refresh failed — return the existing token and warn
+    logger.warning("Codex token refresh failed. Run 'codex' to re-authenticate.")
+    return access_token
+
+
+def _get_account_id_from_jwt(access_token: str) -> str | None:
+    """Extract the ChatGPT account_id from the access token JWT.
+
+    The OpenAI access token JWT contains a claim at
+    ``https://api.openai.com/auth`` with a ``chatgpt_account_id`` field.
+    This is used as a fallback when the auth.json doesn't store the
+    account_id explicitly.
+    """
+    import base64
+
+    try:
+        parts = access_token.split(".")
+        if len(parts) != 3:
+            return None
+        payload = parts[1]
+        # Add base64 padding
+        padding = 4 - len(payload) % 4
+        if padding != 4:
+            payload += "=" * padding
+        decoded = base64.urlsafe_b64decode(payload)
+        claims = json.loads(decoded)
+        auth = claims.get("https://api.openai.com/auth")
+        if isinstance(auth, dict):
+            account_id = auth.get("chatgpt_account_id")
+            if isinstance(account_id, str) and account_id:
+                return account_id
+    except Exception:
+        pass
+    return None
+
+
+def get_codex_account_id() -> str | None:
+    """Extract the account ID from Codex auth data for the ChatGPT-Account-Id header.
+
+    Checks the ``tokens.account_id`` field first, then falls back to
+    decoding the account ID from the access token JWT.
+
+    Returns:
+        The account_id string if available, None otherwise.
+    """
+    auth_data = _read_codex_keychain() or _read_codex_auth_file()
+    if not auth_data:
+        return None
+    tokens = auth_data.get("tokens", {})
+    account_id = tokens.get("account_id")
+    if account_id:
+        return account_id
+    # Fallback: extract from JWT
+    access_token = tokens.get("access_token")
+    if access_token:
+        return _get_account_id_from_jwt(access_token)
+    return None
+
+
@dataclass
 class AgentInfo:
    """Information about an exported agent."""
@@ -355,6 +711,7 @@ class AgentRunner:
        requires_account_selection: bool = False,
        configure_for_account: Callable | None = None,
        list_accounts: Callable | None = None,
+        credential_store: Any | None = None,
    ):
        """
        Initialize the runner (use AgentRunner.load() instead).
@@ -374,6 +731,7 @@ class AgentRunner:
            requires_account_selection: If True, TUI shows account picker before starting.
            configure_for_account: Callback(runner, account_dict) to scope tools after selection.
            list_accounts: Callback() -> list[dict] to fetch available accounts.
+            credential_store: Optional shared CredentialStore (avoids creating redundant stores).
        """
        self.agent_path = agent_path
        self.graph = graph
@@ -387,6 +745,7 @@ class AgentRunner:
        self.requires_account_selection = requires_account_selection
        self._configure_for_account = configure_for_account
        self._list_accounts = list_accounts
+        self._credential_store = credential_store

        # Set up storage
        if storage_path:
@@ -413,103 +772,67 @@ class AgentRunner:
        self._agent_runtime: AgentRuntime | None = None
        self._uses_async_entry_points = self.graph.has_async_entry_points()

-        # Validate credentials before spawning MCP servers.
+        # Pre-load validation: structural checks + credentials.
        # Fails fast with actionable guidance — no MCP noise on screen.
-        self._validate_credentials()
+        run_preload_validation(
+            self.graph,
+            interactive=self._interactive,
+            skip_credential_validation=self.skip_credential_validation,
+        )

        # Auto-discover tools from tools.py
        tools_path = agent_path / "tools.py"
        if tools_path.exists():
            self._tool_registry.discover_from_module(tools_path)

+        # Set environment variables for MCP subprocesses
+        # These are inherited by MCP servers (e.g., GCU browser tools)
+        os.environ["HIVE_AGENT_NAME"] = agent_path.name
+        os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
+
        # Auto-discover MCP servers from mcp_servers.json
        mcp_config_path = agent_path / "mcp_servers.json"
        if mcp_config_path.exists():
            self._load_mcp_servers_from_config(mcp_config_path)

-    def _validate_credentials(self) -> None:
-        """Check that required credentials are available before spawning MCP servers.
-
-        If ``interactive`` is True and stdin is a TTY, automatically launches
-        the interactive credential setup flow so the user can fix the issue
-        in-place.  Re-validates after setup succeeds.
-
-        When ``interactive`` is False (e.g. TUI callers), the CredentialError
-        propagates immediately so the caller can handle it with its own UI.
-        """
-        if self.skip_credential_validation:
-            return
-
-        if not self._interactive:
-            # Let the CredentialError propagate — caller handles UI.
-            validate_agent_credentials(self.graph.nodes)
-            return
-
-        import sys
-
-        from framework.credentials.models import CredentialError
-
-        try:
-            validate_agent_credentials(self.graph.nodes)
-            return  # All good
-        except CredentialError as e:
-            if not sys.stdin.isatty():
-                raise
-
-            # Interactive: show the error then enter credential setup
-            print(f"\n{e}", file=sys.stderr)
-
-            from framework.credentials.validation import build_setup_session_from_error
-
-            session = build_setup_session_from_error(e, nodes=self.graph.nodes)
-            if not session.missing:
-                raise
-
-            result = session.run_interactive()
-            if not result.success:
-                raise CredentialError(
-                    "Credential setup incomplete. "
-                    "Run again after configuring the required credentials."
-                ) from None
-
-            # Re-validate after setup
-            validate_agent_credentials(self.graph.nodes)
-
    @staticmethod
    def _import_agent_module(agent_path: Path):
        """Import an agent package from its directory path.

-        Tries package import first (works when exports/ is on sys.path,
-        which cli.py:_configure_paths() ensures). Falls back to direct
-        file import of agent.py via importlib.util.
+        Ensures the agent's parent directory is on sys.path so the package
+        can be imported normally (supports relative imports within the agent).
+
+        Always reloads the package and its submodules so that code changes
+        made since the last import (or since a previous session load in the
+        same server process) are picked up.
        """
        import importlib
+        import sys

        package_name = agent_path.name
+        parent_dir = str(agent_path.resolve().parent)

-        # Try importing as a package (works when exports/ is on sys.path)
-        try:
-            return importlib.import_module(package_name)
-        except ImportError:
-            pass
+        # Always place the correct parent directory first on sys.path.
+        # Multiple agent dirs can contain packages with the same name
+        # (e.g. exports/deep_research_agent and examples/deep_research_agent).
+        # Without this, a previously-added parent dir could shadow the
+        # agent we actually want to load.
+        if parent_dir in sys.path:
+            sys.path.remove(parent_dir)
+        sys.path.insert(0, parent_dir)

-        # Fallback: import agent.py directly via file path
-        import importlib.util
+        # Evict cached submodules first (e.g. deep_research_agent.nodes,
+        # deep_research_agent.agent) so the top-level reload picks up
+        # changes in the entire package — not just __init__.py.
+        stale = [
+            name
+            for name in sys.modules
+            if name == package_name or name.startswith(f"{package_name}.")
+        ]
+        for name in stale:
+            del sys.modules[name]

-        agent_py = agent_path / "agent.py"
-        if not agent_py.exists():
-            raise FileNotFoundError(
-                f"No importable agent found at {agent_path}. "
-                f"Expected a Python package with agent.py."
-            )
-        spec = importlib.util.spec_from_file_location(
-            f"{package_name}.agent",
-            agent_py,
-            submodule_search_locations=[str(agent_path)],
-        )
-        module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(module)
-        return module
+        return importlib.import_module(package_name)

    @classmethod
    def load(
@@ -519,6 +842,8 @@ class AgentRunner:
        storage_path: Path | None = None,
        model: str | None = None,
        interactive: bool = True,
+        skip_credential_validation: bool | None = None,
+        credential_store: Any | None = None,
    ) -> "AgentRunner":
        """
        Load an agent from an export folder.
@@ -534,6 +859,9 @@ class AgentRunner:
            model: LLM model to use (reads from agent's default_config if None)
            interactive: If True (default), offer interactive credential setup.
                Set to False from TUI callers that handle setup via their own UI.
+            skip_credential_validation: If True, skip credential checks at load time.
+                When None (default), uses the agent module's setting.
+            credential_store: Optional shared CredentialStore (avoids creating redundant stores).

        Returns:
            AgentRunner instance ready to run
@@ -603,6 +931,8 @@ class AgentRunner:

            # Read pre-run hooks (e.g., credential_tester needs account selection)
            skip_cred = getattr(agent_module, "skip_credential_validation", False)
+            if skip_credential_validation is not None:
+                skip_cred = skip_credential_validation
            needs_acct = getattr(agent_module, "requires_account_selection", False)
            configure_fn = getattr(agent_module, "configure_for_account", None)
            list_accts_fn = getattr(agent_module, "list_connected_accounts", None)
@@ -621,6 +951,7 @@ class AgentRunner:
                requires_account_selection=needs_acct,
                configure_for_account=configure_fn,
                list_accounts=list_accts_fn,
+                credential_store=credential_store,
            )

        # Fallback: load from agent.json (legacy JSON-based agents)
@@ -628,7 +959,7 @@ class AgentRunner:
        if not agent_json_path.exists():
            raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")

-        with open(agent_json_path) as f:
+        with open(agent_json_path, encoding="utf-8") as f:
            graph, goal = load_agent_export(f.read())

        return cls(
@@ -639,6 +970,8 @@ class AgentRunner:
            storage_path=storage_path,
            model=model,
            interactive=interactive,
+            skip_credential_validation=skip_credential_validation or False,
+            credential_store=credential_store,
        )

    def register_tool(
@@ -729,7 +1062,7 @@ class AgentRunner:
        """
        self._approval_callback = callback

-    def _setup(self) -> None:
+    def _setup(self, event_bus=None) -> None:
        """Set up runtime, LLM, and executor."""
        # Configure structured logging (auto-detects JSON vs human-readable)
        from framework.observability import configure_logging
@@ -758,10 +1091,11 @@ class AgentRunner:
        else:
            from framework.llm.litellm import LiteLLMProvider

-            # Check if Claude Code subscription is configured
+            # Check if a subscription mode is configured
            config = get_hive_config()
            llm_config = config.get("llm", {})
            use_claude_code = llm_config.get("use_claude_code_subscription", False)
+            use_codex = llm_config.get("use_codex_subscription", False)
            api_base = llm_config.get("api_base")

            api_key = None
@@ -771,6 +1105,12 @@ class AgentRunner:
                if not api_key:
                    print("Warning: Claude Code subscription configured but no token found.")
                    print("Run 'claude' to authenticate, then try again.")
+            elif use_codex:
+                # Get OAuth token from Codex subscription
+                api_key = get_codex_token()
+                if not api_key:
+                    print("Warning: Codex subscription configured but no token found.")
+                    print("Run 'codex' to authenticate, then try again.")

            if api_key and use_claude_code:
                # Use litellm's built-in Anthropic OAuth support.
@@ -782,6 +1122,25 @@ class AgentRunner:
                    api_base=api_base,
                    extra_headers={"authorization": f"Bearer {api_key}"},
                )
+            elif api_key and use_codex:
+                # OpenAI Codex subscription routes through the ChatGPT backend
+                # (chatgpt.com/backend-api/codex/responses), NOT the standard
+                # OpenAI API.  The consumer OAuth token lacks platform API scopes.
+                extra_headers: dict[str, str] = {
+                    "Authorization": f"Bearer {api_key}",
+                    "User-Agent": "CodexBar",
+                }
+                account_id = get_codex_account_id()
+                if account_id:
+                    extra_headers["ChatGPT-Account-Id"] = account_id
+                self._llm = LiteLLMProvider(
+                    model=self.model,
+                    api_key=api_key,
+                    api_base="https://chatgpt.com/backend-api/codex",
+                    extra_headers=extra_headers,
+                    store=False,
+                    allowed_openai_params=["store"],
+                )
            else:
                # Local models (e.g. Ollama) don't need an API key
                if self._is_local_model(self.model):
@@ -818,7 +1177,9 @@ class AgentRunner:

            # Fail fast if the agent needs an LLM but none was configured
            if self._llm is None:
-                has_llm_nodes = any(node.node_type == "event_loop" for node in self.graph.nodes)
+                has_llm_nodes = any(
+                    node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+                )
                if has_llm_nodes:
                    from framework.credentials.models import CredentialError

@@ -836,6 +1197,52 @@ class AgentRunner:
                    )
                    raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")

+        # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
+        has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
+        if has_gcu_nodes:
+            from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
+
+            # Auto-register GCU MCP server if tools aren't loaded yet
+            gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
+            if not gcu_tool_names:
+                # Resolve cwd to repo-level tools/ (not relative to agent_path)
+                gcu_config = dict(GCU_MCP_SERVER_CONFIG)
+                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
+                gcu_config["cwd"] = str(_repo_root / "tools")
+                self._tool_registry.register_mcp_server(gcu_config)
+                gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
+
+            # Expand each GCU node's tools list to include all GCU server tools
+            if gcu_tool_names:
+                for node in self.graph.nodes:
+                    if node.node_type == "gcu":
+                        existing = set(node.tools)
+                        for tool_name in sorted(gcu_tool_names):
+                            if tool_name not in existing:
+                                node.tools.append(tool_name)
+
+        # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
+        has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
+        if has_loop_nodes:
+            from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
+
+            files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
+            if not files_tool_names:
+                # Resolve cwd to repo-level tools/ (not relative to agent_path)
+                files_config = dict(FILES_MCP_SERVER_CONFIG)
+                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
+                files_config["cwd"] = str(_repo_root / "tools")
+                self._tool_registry.register_mcp_server(files_config)
+                files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
+
+            if files_tool_names:
+                for node in self.graph.nodes:
+                    if node.node_type in ("event_loop", "gcu"):
+                        existing = set(node.tools)
+                        for tool_name in sorted(files_tool_names):
+                            if tool_name not in existing:
+                                node.tools.append(tool_name)
+
        # Get tools for runtime
        tools = list(self._tool_registry.get_tools().values())
        tool_executor = self._tool_registry.get_executor()
@@ -847,7 +1254,10 @@ class AgentRunner:
        try:
            from aden_tools.credentials.store_adapter import CredentialStoreAdapter

-            adapter = CredentialStoreAdapter.default()
+            if self._credential_store is not None:
+                adapter = CredentialStoreAdapter(store=self._credential_store)
+            else:
+                adapter = CredentialStoreAdapter.default()
            accounts_data = adapter.get_all_account_info()
            tool_provider_map = adapter.get_tool_provider_map()
            if accounts_data:
@@ -863,6 +1273,7 @@ class AgentRunner:
            accounts_prompt=accounts_prompt,
            accounts_data=accounts_data,
            tool_provider_map=tool_provider_map,
+            event_bus=event_bus,
        )

    def _get_api_key_env_var(self, model: str) -> str | None:
@@ -917,9 +1328,11 @@ class AgentRunner:
            return None

        try:
-            from framework.credentials import CredentialStore
+            store = self._credential_store
+            if store is None:
+                from framework.credentials import CredentialStore

-            store = CredentialStore.with_encrypted_storage()
+                store = CredentialStore.with_encrypted_storage()
            return store.get(cred_id)
        except Exception:
            return None
@@ -947,6 +1360,7 @@ class AgentRunner:
        accounts_prompt: str = "",
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
+        event_bus=None,
    ) -> None:
        """Set up multi-entry-point execution using AgentRuntime."""
        # Convert AsyncEntryPointSpec to EntryPointSpec for AgentRuntime
@@ -961,6 +1375,7 @@ class AgentRunner:
                isolation_level=async_ep.isolation_level,
                priority=async_ep.priority,
                max_concurrent=async_ep.max_concurrent,
+                max_resurrections=async_ep.max_resurrections,
            )
            entry_points.append(ep)

@@ -993,17 +1408,14 @@ class AgentRunner:
            async_checkpoint=True,  # Non-blocking
        )

-        # Handle runtime_config - ensure it's AgentRuntimeConfig, not RuntimeConfig
-        # RuntimeConfig is for LLM settings; AgentRuntimeConfig is for AgentRuntime settings
+        # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
+        # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
+        # that would crash AgentRuntime if passed through.
        runtime_config = None
        if self.runtime_config is not None:
-            from framework.config import RuntimeConfig
+            from framework.runtime.agent_runtime import AgentRuntimeConfig

-            # If it's a RuntimeConfig (LLM config), don't pass it
-            if isinstance(self.runtime_config, RuntimeConfig):
-                runtime_config = None
-            else:
-                # It's already an AgentRuntimeConfig or compatible type
+            if isinstance(self.runtime_config, AgentRuntimeConfig):
                runtime_config = self.runtime_config

        self._agent_runtime = create_agent_runtime(
@@ -1021,6 +1433,7 @@ class AgentRunner:
            accounts_prompt=accounts_prompt,
            accounts_data=accounts_data,
            tool_provider_map=tool_provider_map,
+            event_bus=event_bus,
        )

        # Pass intro_message through for TUI display
@@ -1372,7 +1785,9 @@ class AgentRunner:
                warnings.append(warning_msg)
        except ImportError:
            # aden_tools not installed - fall back to direct check
-            has_llm_nodes = any(node.node_type == "event_loop" for node in self.graph.nodes)
+            has_llm_nodes = any(
+                node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+            )
            if has_llm_nodes:
                api_key_env = self._get_api_key_env_var(self.model)
                if api_key_env and not os.environ.get(api_key_env):
@@ -6,6 +6,7 @@ import importlib.util
 import inspect
 import json
 import logging
+import os
 from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
@@ -47,11 +48,20 @@ class ToolRegistry:
    # and auto-injected at call time for tools that accept them.
    CONTEXT_PARAMS = frozenset({"workspace_id", "agent_id", "session_id", "data_dir"})

+    # Credential directory used for change detection
+    _CREDENTIAL_DIR = Path("~/.hive/credentials/credentials").expanduser()
+
    def __init__(self):
        self._tools: dict[str, RegisteredTool] = {}
        self._mcp_clients: list[Any] = []  # List of MCPClient instances
        self._session_context: dict[str, Any] = {}  # Auto-injected context for tools
        self._provider_index: dict[str, set[str]] = {}  # provider -> tool names
+        # MCP resync tracking
+        self._mcp_config_path: Path | None = None  # Path used for initial load
+        self._mcp_tool_names: set[str] = set()  # Tool names registered from MCP
+        self._mcp_cred_snapshot: set[str] = set()  # Credential filenames at MCP load time
+        self._mcp_aden_key_snapshot: str | None = None  # ADEN_API_KEY value at MCP load time
+        self._mcp_server_tools: dict[str, set[str]] = {}  # server name -> tool names

    def register(
        self,
@@ -285,6 +295,10 @@ class ToolRegistry:
        """Check if a tool is registered."""
        return name in self._tools

+    def get_server_tool_names(self, server_name: str) -> set[str]:
+        """Return tool names registered from a specific MCP server."""
+        return set(self._mcp_server_tools.get(server_name, set()))
+
    def set_session_context(self, **context) -> None:
        """
        Set session context to auto-inject into tool calls.
@@ -322,8 +336,11 @@ class ToolRegistry:
        Args:
            config_path: Path to an ``mcp_servers.json`` file.
        """
+        # Remember config path for potential resync later
+        self._mcp_config_path = Path(config_path)
+
        try:
-            with open(config_path) as f:
+            with open(config_path, encoding="utf-8") as f:
                config = json.load(f)
        except Exception as e:
            logger.warning(f"Failed to load MCP config from {config_path}: {e}")
@@ -349,6 +366,10 @@ class ToolRegistry:
                name = server_config.get("name", "unknown")
                logger.warning(f"Failed to register MCP server '{name}': {e}")

+        # Snapshot credential files and ADEN_API_KEY so we can detect mid-session changes
+        self._mcp_cred_snapshot = self._snapshot_credentials()
+        self._mcp_aden_key_snapshot = os.environ.get("ADEN_API_KEY")
+
    def register_mcp_server(
        self,
        server_config: dict[str, Any],
@@ -395,6 +416,9 @@ class ToolRegistry:
            self._mcp_clients.append(client)

            # Register each tool
+            server_name = server_config["name"]
+            if server_name not in self._mcp_server_tools:
+                self._mcp_server_tools[server_name] = set()
            count = 0
            for mcp_tool in client.list_tools():
                # Convert MCP tool to framework Tool (strips context params from LLM schema)
@@ -419,7 +443,15 @@ class ToolRegistry:
                            filtered_context = {
                                k: v for k, v in base_context.items() if k in tool_params
                            }
-                            merged_inputs = {**filtered_context, **inputs}
+                            # Strip context params from LLM inputs — the framework
+                            # values are authoritative (prevents the LLM from passing
+                            # e.g. data_dir="/data" and overriding the real path).
+                            clean_inputs = {
+                                k: v
+                                for k, v in inputs.items()
+                                if k not in registry_ref.CONTEXT_PARAMS
+                            }
+                            merged_inputs = {**clean_inputs, **filtered_context}
                            result = client_ref.call_tool(tool_name, merged_inputs)
                            # MCP tools return content array, extract the result
                            if isinstance(result, list) and len(result) > 0:
@@ -439,6 +471,8 @@ class ToolRegistry:
                    tool,
                    make_mcp_executor(client, mcp_tool.name, self, tool_params),
                )
+                self._mcp_tool_names.add(mcp_tool.name)
+                self._mcp_server_tools[server_name].add(mcp_tool.name)
                count += 1

            logger.info(f"Registered {count} tools from MCP server '{config.name}'")
@@ -531,6 +565,67 @@ class ToolRegistry:
            all_names.update(names)
        return sorted(name for name in self._tools if name in all_names)

+    # ------------------------------------------------------------------
+    # MCP credential resync
+    # ------------------------------------------------------------------
+
+    def _snapshot_credentials(self) -> set[str]:
+        """Return the set of credential filenames currently on disk."""
+        try:
+            return set(self._CREDENTIAL_DIR.iterdir()) if self._CREDENTIAL_DIR.is_dir() else set()
+        except OSError:
+            return set()
+
+    def resync_mcp_servers_if_needed(self) -> bool:
+        """Restart MCP servers if credential files changed since last load.
+
+        Compares the current credential directory listing against the snapshot
+        taken when MCP servers were first loaded.  If new files appeared (e.g.
+        user connected an OAuth account mid-session), disconnects all MCP
+        clients and re-loads them so the new subprocess picks up the fresh
+        credentials.
+
+        Returns True if a resync was performed, False otherwise.
+        """
+        if not self._mcp_clients or self._mcp_config_path is None:
+            return False
+
+        current = self._snapshot_credentials()
+        current_aden_key = os.environ.get("ADEN_API_KEY")
+        files_changed = current != self._mcp_cred_snapshot
+        aden_key_changed = current_aden_key != self._mcp_aden_key_snapshot
+
+        if not files_changed and not aden_key_changed:
+            return False
+
+        reason = (
+            "Credential files and ADEN_API_KEY changed"
+            if files_changed and aden_key_changed
+            else "ADEN_API_KEY changed"
+            if aden_key_changed
+            else "Credential files changed"
+        )
+        logger.info("%s — resyncing MCP servers", reason)
+
+        # 1. Disconnect existing MCP clients
+        for client in self._mcp_clients:
+            try:
+                client.disconnect()
+            except Exception as e:
+                logger.warning(f"Error disconnecting MCP client during resync: {e}")
+        self._mcp_clients.clear()
+
+        # 2. Remove MCP-registered tools
+        for name in self._mcp_tool_names:
+            self._tools.pop(name, None)
+        self._mcp_tool_names.clear()
+
+        # 3. Re-load MCP servers (spawns fresh subprocesses with new credentials)
+        self.load_mcp_config(self._mcp_config_path)
+
+        logger.info("MCP server resync complete")
+        return True
+
    def cleanup(self) -> None:
        """Clean up all MCP client connections."""
        for client in self._mcp_clients:
@@ -19,9 +19,9 @@ from framework.graph.executor import ExecutionResult
 from framework.runtime.event_bus import EventBus
 from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
 from framework.runtime.outcome_aggregator import OutcomeAggregator
+from framework.runtime.runtime_log_store import RuntimeLogStore
 from framework.runtime.shared_state import SharedStateManager
 from framework.storage.concurrent import ConcurrentStorage
-from framework.runtime.runtime_log_store import RuntimeLogStore
 from framework.storage.session_store import SessionStore

 if TYPE_CHECKING:
@@ -130,6 +130,7 @@ class AgentRuntime:
        accounts_prompt: str = "",
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
+        event_bus: "EventBus | None" = None,
    ):
        """
        Initialize agent runtime.
@@ -148,6 +149,9 @@ class AgentRuntime:
            accounts_prompt: Connected accounts block for system prompt injection
            accounts_data: Raw account data for per-node prompt generation
            tool_provider_map: Tool name to provider name mapping for account routing
+            event_bus: Optional external EventBus. If provided, the runtime shares
+                this bus instead of creating its own. Used by SessionManager to
+                share a single bus between queen, worker, and judge.
        """
        self.graph = graph
        self.goal = goal
@@ -179,7 +183,7 @@ class AgentRuntime:

        # Initialize shared components
        self._state_manager = SharedStateManager()
-        self._event_bus = EventBus(max_history=self._config.max_history)
+        self._event_bus = event_bus or EventBus(max_history=self._config.max_history)
        self._outcome_aggregator = OutcomeAggregator(goal, self._event_bus)

        # LLM and tools
@@ -205,6 +209,7 @@ class AgentRuntime:

        # State
        self._running = False
+        self._timers_paused = False
        self._lock = asyncio.Lock()

        # Optional greeting shown to user on TUI load (set by AgentRunner)
@@ -406,7 +411,12 @@ class AgentRuntime:
                        )
                        continue

-                    def _make_cron_timer(entry_point_id: str, expr: str, immediate: bool):
+                    def _make_cron_timer(
+                        entry_point_id: str,
+                        expr: str,
+                        immediate: bool,
+                        idle_timeout: float = 300,
+                    ):
                        async def _cron_loop():
                            from croniter import croniter

@@ -420,19 +430,80 @@ class AgentRuntime:
                                )
                                await asyncio.sleep(max(0, sleep_secs))
                            while self._running:
+                                # Calculate next fire time upfront (used by skip paths too)
+                                cron = croniter(expr, datetime.now())
+                                next_dt = cron.get_next(datetime)
+                                sleep_secs = (next_dt - datetime.now()).total_seconds()
+
+                                # Gate: skip tick if timers are explicitly paused
+                                if self._timers_paused:
+                                    logger.debug(
+                                        "Cron '%s': paused, skipping tick",
+                                        entry_point_id,
+                                    )
+                                    self._timer_next_fire[entry_point_id] = (
+                                        time.monotonic() + sleep_secs
+                                    )
+                                    await asyncio.sleep(max(0, sleep_secs))
+                                    continue
+
+                                # Gate: skip tick if ANY stream is actively working.
+                                # If the execution is idle (no LLM/tool activity
+                                # beyond idle_timeout) let the timer proceed —
+                                # execute() will cancel the stale execution.
+                                _any_active = False
+                                _min_idle = float("inf")
+                                for _s in self._streams.values():
+                                    if _s.active_execution_ids:
+                                        _any_active = True
+                                        _idle = _s.agent_idle_seconds
+                                        if _idle < _min_idle:
+                                            _min_idle = _idle
+                                logger.info(
+                                    "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+                                    entry_point_id,
+                                    _any_active,
+                                    _min_idle,
+                                    idle_timeout,
+                                )
+                                if _any_active and _min_idle < idle_timeout:
+                                    logger.info(
+                                        "Cron '%s': agent actively working, skipping tick",
+                                        entry_point_id,
+                                    )
+                                    self._timer_next_fire[entry_point_id] = (
+                                        time.monotonic() + sleep_secs
+                                    )
+                                    await asyncio.sleep(max(0, sleep_secs))
+                                    continue
+
                                self._timer_next_fire.pop(entry_point_id, None)
                                try:
                                    ep_spec = self._entry_points.get(entry_point_id)
                                    is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
                                    if is_isolated:
                                        if _persistent_session_id:
-                                            session_state = {"resume_session_id": _persistent_session_id}
+                                            session_state = {
+                                                "resume_session_id": _persistent_session_id
+                                            }
                                        else:
                                            session_state = None
                                    else:
                                        session_state = self._get_primary_session_state(
                                            exclude_entry_point=entry_point_id
                                        )
+                                        # Gate: skip tick if no active session
+                                        if session_state is None:
+                                            logger.debug(
+                                                "Cron '%s': no active session, skipping",
+                                                entry_point_id,
+                                            )
+                                            self._timer_next_fire[entry_point_id] = (
+                                                time.monotonic() + sleep_secs
+                                            )
+                                            await asyncio.sleep(max(0, sleep_secs))
+                                            continue
+
                                    exec_id = await self.trigger(
                                        entry_point_id,
                                        {
@@ -468,7 +539,12 @@ class AgentRuntime:
                        return _cron_loop

                    task = asyncio.create_task(
-                        _make_cron_timer(ep_id, cron_expr, run_immediately)()
+                        _make_cron_timer(
+                            ep_id,
+                            cron_expr,
+                            run_immediately,
+                            idle_timeout=tc.get("idle_timeout_seconds", 300),
+                        )()
                    )
                    self._timer_tasks.append(task)
                    logger.info(
@@ -480,7 +556,12 @@ class AgentRuntime:

                elif interval and interval > 0:
                    # Fixed interval mode (original behavior)
-                    def _make_timer(entry_point_id: str, mins: float, immediate: bool):
+                    def _make_timer(
+                        entry_point_id: str,
+                        mins: float,
+                        immediate: bool,
+                        idle_timeout: float = 300,
+                    ):
                        async def _timer_loop():
                            interval_secs = mins * 60
                            _persistent_session_id: str | None = None
@@ -490,19 +571,73 @@ class AgentRuntime:
                                )
                                await asyncio.sleep(interval_secs)
                            while self._running:
+                                # Gate: skip tick if timers are explicitly paused
+                                if self._timers_paused:
+                                    logger.debug(
+                                        "Timer '%s': paused, skipping tick",
+                                        entry_point_id,
+                                    )
+                                    self._timer_next_fire[entry_point_id] = (
+                                        time.monotonic() + interval_secs
+                                    )
+                                    await asyncio.sleep(interval_secs)
+                                    continue
+
+                                # Gate: skip tick if agent is actively working.
+                                # Gate: skip tick if ANY stream is actively working.
+                                _any_active = False
+                                _min_idle = float("inf")
+                                for _s in self._streams.values():
+                                    if _s.active_execution_ids:
+                                        _any_active = True
+                                        _idle = _s.agent_idle_seconds
+                                        if _idle < _min_idle:
+                                            _min_idle = _idle
+                                logger.info(
+                                    "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+                                    entry_point_id,
+                                    _any_active,
+                                    _min_idle,
+                                    idle_timeout,
+                                )
+                                if _any_active and _min_idle < idle_timeout:
+                                    logger.info(
+                                        "Timer '%s': agent actively working, skipping tick",
+                                        entry_point_id,
+                                    )
+                                    self._timer_next_fire[entry_point_id] = (
+                                        time.monotonic() + interval_secs
+                                    )
+                                    await asyncio.sleep(interval_secs)
+                                    continue
+
                                self._timer_next_fire.pop(entry_point_id, None)
                                try:
                                    ep_spec = self._entry_points.get(entry_point_id)
                                    is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
                                    if is_isolated:
                                        if _persistent_session_id:
-                                            session_state = {"resume_session_id": _persistent_session_id}
+                                            session_state = {
+                                                "resume_session_id": _persistent_session_id
+                                            }
                                        else:
                                            session_state = None
                                    else:
                                        session_state = self._get_primary_session_state(
                                            exclude_entry_point=entry_point_id
                                        )
+                                        # Gate: skip tick if no active session
+                                        if session_state is None:
+                                            logger.debug(
+                                                "Timer '%s': no active session, skipping",
+                                                entry_point_id,
+                                            )
+                                            self._timer_next_fire[entry_point_id] = (
+                                                time.monotonic() + interval_secs
+                                            )
+                                            await asyncio.sleep(interval_secs)
+                                            continue
+
                                    exec_id = await self.trigger(
                                        entry_point_id,
                                        {
@@ -533,7 +668,14 @@ class AgentRuntime:

                        return _timer_loop

-                    task = asyncio.create_task(_make_timer(ep_id, interval, run_immediately)())
+                    task = asyncio.create_task(
+                        _make_timer(
+                            ep_id,
+                            interval,
+                            run_immediately,
+                            idle_timeout=tc.get("idle_timeout_seconds", 300),
+                        )()
+                    )
                    self._timer_tasks.append(task)
                    logger.info(
                        "Started timer for entry point '%s' every %s min%s",
@@ -562,6 +704,7 @@ class AgentRuntime:
            )

            self._running = True
+            self._timers_paused = False
            logger.info(f"AgentRuntime started with {len(self._streams)} streams")

    async def stop(self) -> None:
@@ -603,6 +746,19 @@ class AgentRuntime:
            self._running = False
            logger.info("AgentRuntime stopped")

+    def pause_timers(self) -> None:
+        """Pause all timer-driven entry points.
+
+        Timers will skip their ticks until ``resume_timers()`` is called.
+        """
+        self._timers_paused = True
+        logger.info("Timers paused")
+
+    def resume_timers(self) -> None:
+        """Resume timer-driven entry points after a pause."""
+        self._timers_paused = False
+        logger.info("Timers resumed")
+
    def _resolve_stream(
        self,
        entry_point_id: str,
@@ -847,12 +1003,19 @@ class AgentRuntime:
            if interval and interval > 0 and self._running:
                logger.info(
                    "Creating timer for '%s::%s': interval=%s min, immediate=%s, loop=%s",
-                    graph_id, ep_id, interval, run_immediately,
+                    graph_id,
+                    ep_id,
+                    interval,
+                    run_immediately,
                    id(asyncio.get_event_loop()),
                )

                def _make_timer(
-                    gid: str, local_ep: str, mins: float, immediate: bool,
+                    gid: str,
+                    local_ep: str,
+                    mins: float,
+                    immediate: bool,
+                    idle_timeout: float = 300,
                ):
                    async def _timer_loop():
                        interval_secs = mins * 60
@@ -863,12 +1026,54 @@ class AgentRuntime:

                        logger.info(
                            "Timer loop started for '%s::%s' (sleep %ss)",
-                            gid, local_ep, interval_secs,
+                            gid,
+                            local_ep,
+                            interval_secs,
                        )
                        if not immediate:
                            timer_next_fire[local_ep] = time.monotonic() + interval_secs
                            await asyncio.sleep(interval_secs)
                        while self._running and gid in self._graphs:
+                            # Gate: skip tick if timers are explicitly paused
+                            if self._timers_paused:
+                                logger.debug(
+                                    "Timer '%s::%s': paused, skipping tick",
+                                    gid,
+                                    local_ep,
+                                )
+                                timer_next_fire[local_ep] = time.monotonic() + interval_secs
+                                await asyncio.sleep(interval_secs)
+                                continue
+
+                            # Gate: skip tick if ANY stream in this graph is actively working.
+                            _reg = self._graphs.get(gid)
+                            _any_active = False
+                            _min_idle = float("inf")
+                            if _reg:
+                                for _sid, _s in _reg.streams.items():
+                                    if _s.active_execution_ids:
+                                        _any_active = True
+                                        _idle = _s.agent_idle_seconds
+                                        if _idle < _min_idle:
+                                            _min_idle = _idle
+                            logger.info(
+                                "Timer '%s::%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+                                gid,
+                                local_ep,
+                                _any_active,
+                                _min_idle,
+                                idle_timeout,
+                            )
+                            if _any_active and _min_idle < idle_timeout:
+                                logger.info(
+                                    "Timer '%s::%s': agent actively working, skipping tick",
+                                    gid,
+                                    local_ep,
+                                )
+                                timer_next_fire[local_ep] = time.monotonic() + interval_secs
+                                await asyncio.sleep(interval_secs)
+                                continue
+
                            logger.info("Timer firing for '%s::%s'", gid, local_ep)
                            timer_next_fire.pop(local_ep, None)
                            try:
@@ -878,28 +1083,47 @@ class AgentRuntime:
                                    break
                                stream = reg.streams.get(local_ep)
                                if not stream:
-                                    logger.warning("Timer: no stream '%s' in '%s', stopping", local_ep, gid)
+                                    logger.warning(
+                                        "Timer: no stream '%s' in '%s', stopping", local_ep, gid
+                                    )
                                    break
                                # Isolated entry points get their own session;
                                # shared ones join the primary session.
                                ep_spec = reg.entry_points.get(local_ep)
                                if ep_spec and ep_spec.isolation_level == "isolated":
                                    if _persistent_session_id:
-                                        session_state = {"resume_session_id": _persistent_session_id}
+                                        session_state = {
+                                            "resume_session_id": _persistent_session_id
+                                        }
                                    else:
                                        session_state = None
                                else:
                                    session_state = self._get_primary_session_state(
                                        local_ep, source_graph_id=gid
                                    )
+                                    # Gate: skip tick if no active session
+                                    if session_state is None:
+                                        logger.debug(
+                                            "Timer '%s::%s': no active session, skipping",
+                                            gid,
+                                            local_ep,
+                                        )
+                                        timer_next_fire[local_ep] = time.monotonic() + interval_secs
+                                        await asyncio.sleep(interval_secs)
+                                        continue
+
                                exec_id = await stream.execute(
                                    {"event": {"source": "timer", "reason": "scheduled"}},
                                    session_state=session_state,
                                )
                                # Remember session ID for reuse on next tick
-                                if not _persistent_session_id and ep_spec and ep_spec.isolation_level == "isolated":
+                                if (
+                                    not _persistent_session_id
+                                    and ep_spec
+                                    and ep_spec.isolation_level == "isolated"
+                                ):
                                    _persistent_session_id = exec_id
-                            except Exception as exc:
+                            except Exception:
                                logger.error(
                                    "Timer trigger failed for '%s::%s'",
                                    gid,
@@ -913,7 +1137,13 @@ class AgentRuntime:
                    return _timer_loop

                task = asyncio.create_task(
-                    _make_timer(graph_id, ep_id, interval, run_immediately)()
+                    _make_timer(
+                        graph_id,
+                        ep_id,
+                        interval,
+                        run_immediately,
+                        idle_timeout=tc.get("idle_timeout_seconds", 300),
+                    )()
                )
                timer_tasks.append(task)
                logger.info("Timer task created for '%s::%s': %s", graph_id, ep_id, task)
@@ -1021,10 +1251,61 @@ class AgentRuntime:
            return float("inf")
        return time.monotonic() - self._last_user_input_time

+    @property
+    def agent_idle_seconds(self) -> float:
+        """Seconds since any stream last had activity (LLM call, tool call, etc.).
+
+        Returns the *minimum* idle time across all streams with active
+        executions.  Returns ``float('inf')`` if nothing is running.
+        """
+        min_idle = float("inf")
+        for reg in self._graphs.values():
+            for stream in reg.streams.values():
+                idle = stream.agent_idle_seconds
+                if idle < min_idle:
+                    min_idle = idle
+        return min_idle
+
    def get_graph_registration(self, graph_id: str) -> _GraphRegistration | None:
        """Get the registration for a specific graph (or None)."""
        return self._graphs.get(graph_id)

+    def cancel_all_tasks(self, loop: asyncio.AbstractEventLoop) -> bool:
+        """Cancel all running execution tasks across all graphs.
+
+        Schedules the cancellation on *loop* (the agent event loop) so
+        that ``_execution_tasks`` is only read from the thread that owns
+        it, avoiding cross-thread dict access.  Safe to call from any
+        thread (e.g. the Textual UI thread).
+
+        Blocks the caller for up to 5 seconds waiting for the result.
+        For async callers, use :meth:`cancel_all_tasks_async` instead.
+        """
+        future = asyncio.run_coroutine_threadsafe(self.cancel_all_tasks_async(), loop)
+        try:
+            return future.result(timeout=5)
+        except Exception:
+            logger.warning("cancel_all_tasks: timed out or failed")
+            return False
+
+    async def cancel_all_tasks_async(self) -> bool:
+        """Cancel all running execution tasks (runs on the agent loop).
+
+        Iterates ``_execution_tasks`` and calls ``task.cancel()`` directly.
+        Must be awaited on the agent event loop so dict access is
+        thread-safe.  Returns True if at least one task was cancelled.
+        """
+        cancelled = False
+        for gid in self.list_graphs():
+            reg = self.get_graph_registration(gid)
+            if reg:
+                for stream in reg.streams.values():
+                    for task in list(stream._execution_tasks.values()):
+                        if task and not task.done():
+                            task.cancel()
+                            cancelled = True
+        return cancelled
+
    def _get_primary_session_state(
        self,
        exclude_entry_point: str,
@@ -1119,7 +1400,14 @@ class AgentRuntime:
                    )
        return None

-    async def inject_input(self, node_id: str, content: str, graph_id: str | None = None) -> bool:
+    async def inject_input(
+        self,
+        node_id: str,
+        content: str,
+        graph_id: str | None = None,
+        *,
+        is_client_input: bool = False,
+    ) -> bool:
        """Inject user input into a running client-facing node.

        Routes input to the EventLoopNode identified by ``node_id``.
@@ -1129,6 +1417,8 @@ class AgentRuntime:
            node_id: The node currently waiting for input
            content: The user's input text
            graph_id: Optional graph to search first (defaults to active graph)
+            is_client_input: True when the message originates from a real
+                human user (e.g. /chat endpoint), False for external events.

        Returns:
            True if input was delivered, False if no matching node found
@@ -1140,7 +1430,7 @@ class AgentRuntime:
        target = graph_id or self._active_graph_id
        if target in self._graphs:
            for stream in self._graphs[target].streams.values():
-                if await stream.inject_input(node_id, content):
+                if await stream.inject_input(node_id, content, is_client_input=is_client_input):
                    return True

        # Then search all other graphs
@@ -1148,7 +1438,7 @@ class AgentRuntime:
            if gid == target:
                continue
            for stream in reg.streams.values():
-                if await stream.inject_input(node_id, content):
+                if await stream.inject_input(node_id, content, is_client_input=is_client_input):
                    return True
        return False

@@ -1206,9 +1496,43 @@ class AgentRuntime:
        # Fallback: primary graph
        return list(self._entry_points.values())

-    def get_stream(self, entry_point_id: str, graph_id: str | None = None) -> ExecutionStream | None:
-        """Get a specific execution stream (searches active graph first)."""
-        return self._resolve_stream(entry_point_id, graph_id)
+    def get_timer_next_fire_in(self, entry_point_id: str) -> float | None:
+        """Return seconds until the next timer fire for *entry_point_id*.
+
+        Checks the primary graph's ``_timer_next_fire`` dict as well as
+        all registered secondary graphs.  Returns ``None`` when no fire
+        time is recorded (e.g. the timer is currently executing or the
+        entry point is not a timer).
+        """
+        mono = self._timer_next_fire.get(entry_point_id)
+        if mono is not None:
+            return max(0.0, mono - time.monotonic())
+        for reg in self._graphs.values():
+            mono = reg.timer_next_fire.get(entry_point_id)
+            if mono is not None:
+                return max(0.0, mono - time.monotonic())
+        return None
+
+    def get_stream(self, entry_point_id: str) -> ExecutionStream | None:
+        """Get a specific execution stream."""
+        return self._streams.get(entry_point_id)
+
+    def find_awaiting_node(self) -> tuple[str | None, str | None]:
+        """Find a node that is currently awaiting user input.
+
+        Searches all graphs and their streams for any active executor
+        whose node has ``_awaiting_input`` set to ``True``.
+
+        Returns:
+            (node_id, graph_id) if found, else (None, None).
+        """
+        for graph_id, reg in self._graphs.items():
+            for stream in reg.streams.values():
+                for executor in stream._active_executors.values():
+                    for node_id, node in executor.node_registry.items():
+                        if getattr(node, "_awaiting_input", False):
+                            return node_id, graph_id
+        return None, None

    def get_execution_result(
        self,
@@ -1284,14 +1608,16 @@ class AgentRuntime:
                active = stream.active_execution_ids
                if not active:
                    continue
-                result.append({
-                    "graph_id": graph_id,
-                    "stream_id": stream.stream_id,
-                    "entry_point_id": ep_id,
-                    "active_execution_ids": active,
-                    "is_awaiting_input": stream.is_awaiting_input,
-                    "waiting_nodes": stream.get_waiting_nodes(),
-                })
+                result.append(
+                    {
+                        "graph_id": graph_id,
+                        "stream_id": stream.stream_id,
+                        "entry_point_id": ep_id,
+                        "active_execution_ids": active,
+                        "is_awaiting_input": stream.is_awaiting_input,
+                        "waiting_nodes": stream.get_waiting_nodes(),
+                    }
+                )
        return result

    def get_waiting_nodes(self) -> list[dict[str, Any]]:
@@ -1304,11 +1630,13 @@ class AgentRuntime:
        for graph_id, reg in self._graphs.items():
            for _ep_id, stream in reg.streams.items():
                for waiting in stream.get_waiting_nodes():
-                    result.append({
-                        "graph_id": graph_id,
-                        "stream_id": stream.stream_id,
-                        **waiting,
-                    })
+                    result.append(
+                        {
+                            "graph_id": graph_id,
+                            "stream_id": stream.stream_id,
+                            **waiting,
+                        }
+                    )
        return result

    # === PROPERTIES ===
@@ -1333,6 +1661,11 @@ class AgentRuntime:
        """Access the webhook server (None if no webhook entry points)."""
        return self._webhook_server

+    @property
+    def timers_paused(self) -> bool:
+        """True when timer-driven entry points are paused (e.g. by stop_worker)."""
+        return self._timers_paused
+
    @property
    def is_running(self) -> bool:
        """Check if runtime is running."""
@@ -1358,6 +1691,7 @@ def create_agent_runtime(
    accounts_prompt: str = "",
    accounts_data: list[dict] | None = None,
    tool_provider_map: dict[str, str] | None = None,
+    event_bus: "EventBus | None" = None,
 ) -> AgentRuntime:
    """
    Create and configure an AgentRuntime with entry points.
@@ -1383,6 +1717,7 @@ def create_agent_runtime(
        graph_id: Optional identifier for the primary graph (defaults to "primary").
        accounts_data: Raw account data for per-node prompt generation.
        tool_provider_map: Tool name to provider name mapping for account routing.
+        event_bus: Optional external EventBus to share with other components.

    Returns:
        Configured AgentRuntime (not yet started)
@@ -1408,6 +1743,7 @@ def create_agent_runtime(
        accounts_prompt=accounts_prompt,
        accounts_data=accounts_data,
        tool_provider_map=tool_provider_map,
+        event_bus=event_bus,
    )

    for spec in entry_points:
@@ -27,13 +27,13 @@ class EscalationTicket(BaseModel):

    # Problem characterization (filled by judge via LLM deliberation)
    severity: Literal["low", "medium", "high", "critical"]
-    cause: str            # Human-readable: "Node has produced 18 RETRY verdicts..."
+    cause: str  # Human-readable: "Node has produced 18 RETRY verdicts..."
    judge_reasoning: str  # Judge's own deliberation chain
-    suggested_action: str # "Restart node", "Human review", "Kill session", etc.
+    suggested_action: str  # "Restart node", "Human review", "Kill session", etc.

    # Evidence
-    recent_verdicts: list[str]   # e.g. ["RETRY", "RETRY", "CONTINUE", "RETRY"]
-    total_steps_checked: int     # How many steps the judge saw
-    steps_since_last_accept: int # Steps with no ACCEPT verdict
+    recent_verdicts: list[str]  # e.g. ["RETRY", "RETRY", "CONTINUE", "RETRY"]
+    total_steps_checked: int  # How many steps the judge saw
+    steps_since_last_accept: int  # Steps with no ACCEPT verdict
    stall_minutes: float | None  # Wall-clock minutes since last new log step (None if active)
-    evidence_snippet: str        # Brief excerpt from recent LLM output or error
+    evidence_snippet: str  # Brief excerpt from recent LLM output or error
@@ -83,10 +83,12 @@ class EventType(StrEnum):
    NODE_LOOP_STARTED = "node_loop_started"
    NODE_LOOP_ITERATION = "node_loop_iteration"
    NODE_LOOP_COMPLETED = "node_loop_completed"
+    NODE_ACTION_PLAN = "node_action_plan"

    # LLM streaming observability
    LLM_TEXT_DELTA = "llm_text_delta"
    LLM_REASONING_DELTA = "llm_reasoning_delta"
+    LLM_TURN_COMPLETE = "llm_turn_complete"

    # Tool lifecycle
    TOOL_CALL_STARTED = "tool_call_started"
@@ -128,6 +130,19 @@ class EventType(StrEnum):
    WORKER_ESCALATION_TICKET = "worker_escalation_ticket"
    QUEEN_INTERVENTION_REQUESTED = "queen_intervention_requested"

+    # Execution resurrection (auto-restart on non-fatal failure)
+    EXECUTION_RESURRECTED = "execution_resurrected"
+
+    # Worker lifecycle (session manager → frontend)
+    WORKER_LOADED = "worker_loaded"
+    CREDENTIALS_REQUIRED = "credentials_required"
+
+    # Queen mode changes (building ↔ running)
+    QUEEN_MODE_CHANGED = "queen_mode_changed"
+
+    # Subagent reports (one-way progress updates from sub-agents)
+    SUBAGENT_REPORT = "subagent_report"
+

@dataclass
 class AgentEvent:
@@ -533,6 +548,24 @@ class EventBus:
            )
        )

+    async def emit_node_action_plan(
+        self,
+        stream_id: str,
+        node_id: str,
+        plan: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node action plan event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_ACTION_PLAN,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"plan": plan},
+            )
+        )
+
    # === LLM STREAMING PUBLISHERS ===

    async def emit_llm_text_delta(
@@ -572,6 +605,36 @@ class EventBus:
            )
        )

+    async def emit_llm_turn_complete(
+        self,
+        stream_id: str,
+        node_id: str,
+        stop_reason: str,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        execution_id: str | None = None,
+        iteration: int | None = None,
+    ) -> None:
+        """Emit LLM turn completion with stop reason and model metadata."""
+        data: dict = {
+            "stop_reason": stop_reason,
+            "model": model,
+            "input_tokens": input_tokens,
+            "output_tokens": output_tokens,
+        }
+        if iteration is not None:
+            data["iteration"] = iteration
+        await self.publish(
+            AgentEvent(
+                type=EventType.LLM_TURN_COMPLETE,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data=data,
+            )
+        )
+
    # === TOOL LIFECYCLE PUBLISHERS ===

    async def emit_tool_call_started(
@@ -633,15 +696,19 @@ class EventBus:
        content: str,
        snapshot: str,
        execution_id: str | None = None,
+        iteration: int | None = None,
    ) -> None:
        """Emit client output delta event (client_facing=True nodes)."""
+        data: dict = {"content": content, "snapshot": snapshot}
+        if iteration is not None:
+            data["iteration"] = iteration
        await self.publish(
            AgentEvent(
                type=EventType.CLIENT_OUTPUT_DELTA,
                stream_id=stream_id,
                node_id=node_id,
                execution_id=execution_id,
-                data={"content": content, "snapshot": snapshot},
+                data=data,
            )
        )

@@ -651,15 +718,24 @@ class EventBus:
        node_id: str,
        prompt: str = "",
        execution_id: str | None = None,
+        options: list[str] | None = None,
    ) -> None:
-        """Emit client input requested event (client_facing=True nodes)."""
+        """Emit client input requested event (client_facing=True nodes).
+
+        Args:
+            options: Optional predefined choices for the user (1-3 items).
+                     The frontend appends an "Other" free-text option automatically.
+        """
+        data: dict[str, Any] = {"prompt": prompt}
+        if options:
+            data["options"] = options
        await self.publish(
            AgentEvent(
                type=EventType.CLIENT_INPUT_REQUESTED,
                stream_id=stream_id,
                node_id=node_id,
                execution_id=execution_id,
-                data={"prompt": prompt},
+                data=data,
            )
        )

@@ -954,6 +1030,30 @@ class EventBus:
            )
        )

+    async def emit_subagent_report(
+        self,
+        stream_id: str,
+        node_id: str,
+        subagent_id: str,
+        message: str,
+        data: dict[str, Any] | None = None,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit a one-way progress report from a sub-agent."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.SUBAGENT_REPORT,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "subagent_id": subagent_id,
+                    "message": message,
+                    "data": data,
+                },
+            )
+        )
+
    # === QUERY OPERATIONS ===

    def get_history(
@@ -32,6 +32,19 @@ if TYPE_CHECKING:
    from framework.storage.concurrent import ConcurrentStorage
    from framework.storage.session_store import SessionStore

+
+class ExecutionAlreadyRunningError(RuntimeError):
+    """Raised when attempting to start an execution on a stream that already has one running."""
+
+    def __init__(self, stream_id: str, active_ids: list[str]):
+        self.stream_id = stream_id
+        self.active_ids = active_ids
+        super().__init__(
+            f"Stream '{stream_id}' already has an active execution: {active_ids}. "
+            "Concurrent executions on the same stream are not allowed."
+        )
+
+
 logger = logging.getLogger(__name__)


@@ -56,9 +69,11 @@ class GraphScopedEventBus(EventBus):
        # (subscriptions, history, semaphore, etc.) to the real bus.
        self._real_bus = bus
        self._scope_graph_id = graph_id
+        self.last_activity_time: float = time.monotonic()

    async def publish(self, event: "AgentEvent") -> None:  # type: ignore[override]
        event.graph_id = self._scope_graph_id
+        self.last_activity_time = time.monotonic()
        await self._real_bus.publish(event)

    # --- Delegate state-reading methods to the real bus ---
@@ -93,6 +108,7 @@ class EntryPointSpec:
    isolation_level: str = "shared"  # "isolated" | "shared" | "synchronized"
    priority: int = 0
    max_concurrent: int = 10  # Max concurrent executions for this entry point
+    max_resurrections: int = 3  # Auto-restart on non-fatal failure (0 to disable)

    def get_isolation_level(self) -> IsolationLevel:
        """Convert string isolation level to enum."""
@@ -233,9 +249,11 @@ class ExecutionStream:
        self._lock = asyncio.Lock()

        # Graph-scoped event bus (stamps graph_id on published events)
-        self._scoped_event_bus = self._event_bus
-        if self._event_bus and self.graph_id:
-            self._scoped_event_bus = GraphScopedEventBus(self._event_bus, self.graph_id)
+        # Always wrap in GraphScopedEventBus so we can track last_activity_time.
+        if self._event_bus:
+            self._scoped_event_bus = GraphScopedEventBus(self._event_bus, self.graph_id or "")
+        else:
+            self._scoped_event_bus = None

        # State
        self._running = False
@@ -265,6 +283,21 @@ class ExecutionStream:
        """Return IDs of all currently active executions."""
        return list(self._active_executions.keys())

+    @property
+    def agent_idle_seconds(self) -> float:
+        """Seconds since the last agent activity (LLM call, tool call, node transition).
+
+        Returns ``float('inf')`` if no event bus is attached or no events have
+        been published yet.  When there are no active executions, also returns
+        ``float('inf')`` (nothing to be idle *about*).
+        """
+        if not self._active_executions:
+            return float("inf")
+        bus = self._scoped_event_bus
+        if isinstance(bus, GraphScopedEventBus):
+            return time.monotonic() - bus.last_activity_time
+        return float("inf")
+
    @property
    def is_awaiting_input(self) -> bool:
        """True when an active execution is blocked waiting for client input."""
@@ -292,13 +325,21 @@ class ExecutionStream:
        """Return nodes that support message injection (have ``inject_event``).

        Each entry is ``{"node_id": ..., "execution_id": ...}``.
+        The currently executing node is placed first so that
+        ``inject_worker_message`` targets the active node, not a stale one.
        """
        injectable: list[dict[str, str]] = []
+        current_first: list[dict[str, str]] = []
        for exec_id, executor in self._active_executors.items():
+            current = getattr(executor, "current_node_id", None)
            for node_id, node in executor.node_registry.items():
                if hasattr(node, "inject_event"):
-                    injectable.append({"node_id": node_id, "execution_id": exec_id})
-        return injectable
+                    entry = {"node_id": node_id, "execution_id": exec_id}
+                    if node_id == current:
+                        current_first.append(entry)
+                    else:
+                        injectable.append(entry)
+        return current_first + injectable

    def _record_execution_result(self, execution_id: str, result: ExecutionResult) -> None:
        """Record a completed execution result with retention pruning."""
@@ -329,20 +370,21 @@ class ExecutionStream:
        self._running = False

        # Cancel all active executions
+        tasks_to_wait = []
        for _, task in self._execution_tasks.items():
            if not task.done():
                task.cancel()
-                try:
-                    await task
-                except asyncio.CancelledError:
-                    pass
-                except RuntimeError as e:
-                    # Task may be attached to a different event loop (e.g., when TUI
-                    # uses a separate loop). Log and continue cleanup.
-                    if "attached to a different loop" in str(e):
-                        logger.warning(f"Task cleanup skipped (different event loop): {e}")
-                    else:
-                        raise
+                tasks_to_wait.append(task)
+
+        if tasks_to_wait:
+            # Wait briefly — don't block indefinitely if tasks are stuck
+            # in long-running operations (LLM calls, tool executions).
+            _, pending = await asyncio.wait(tasks_to_wait, timeout=5.0)
+            if pending:
+                logger.warning(
+                    "%d execution task(s) did not finish within 5s after cancellation",
+                    len(pending),
+                )

        self._execution_tasks.clear()
        self._active_executions.clear()
@@ -360,7 +402,13 @@ class ExecutionStream:
                )
            )

-    async def inject_input(self, node_id: str, content: str) -> bool:
+    async def inject_input(
+        self,
+        node_id: str,
+        content: str,
+        *,
+        is_client_input: bool = False,
+    ) -> bool:
        """Inject user input into a running client-facing EventLoopNode.

        Searches active executors for a node matching ``node_id`` and calls
@@ -371,7 +419,7 @@ class ExecutionStream:
        for executor in self._active_executors.values():
            node = executor.node_registry.get(node_id)
            if node is not None and hasattr(node, "inject_event"):
-                await node.inject_event(content)
+                await node.inject_event(content, is_client_input=is_client_input)
                return True
        return False

@@ -397,6 +445,27 @@ class ExecutionStream:
        if not self._running:
            raise RuntimeError(f"ExecutionStream '{self.stream_id}' is not running")

+        # Only one execution may run on a stream at a time — concurrent
+        # executions corrupt shared session state.  Cancel any running
+        # execution before starting the new one.  The cancelled execution
+        # writes its state to disk before cleanup, and the new execution
+        # runs in the same session directory (via resume_session_id).
+        active = self.active_execution_ids
+        for eid in active:
+            logger.info(
+                "Cancelling running execution %s on stream '%s' before starting new one",
+                eid,
+                self.stream_id,
+            )
+            executor = self._active_executors.get(eid)
+            if executor:
+                for node in executor.node_registry.values():
+                    if hasattr(node, "signal_shutdown"):
+                        node.signal_shutdown()
+                    if hasattr(node, "cancel_current_turn"):
+                        node.cancel_current_turn()
+            await self.cancel_execution(eid)
+
        # When resuming, reuse the original session ID so the execution
        # continues in the same session directory instead of creating a new one.
        resume_session_id = session_state.get("resume_session_id") if session_state else None
@@ -442,8 +511,44 @@ class ExecutionStream:
        logger.debug(f"Queued execution {execution_id} for stream {self.stream_id}")
        return execution_id

+    # Errors that indicate resurrection won't help — the same error will recur.
+    # Includes both configuration/environment errors and deterministic node
+    # failures where the conversation/state hasn't changed.
+    _FATAL_ERROR_PATTERNS: tuple[str, ...] = (
+        # Configuration / environment
+        "credential",
+        "authentication",
+        "unauthorized",
+        "forbidden",
+        "api key",
+        "import error",
+        "module not found",
+        "no module named",
+        "permission denied",
+        "invalid api",
+        "configuration error",
+        # Deterministic node failures — resurrecting at the same node with
+        # the same conversation produces the same result.
+        "node stalled",
+        "ghost empty stream",
+        "max iterations",
+    )
+
+    @classmethod
+    def _is_fatal_error(cls, error: str | None) -> bool:
+        """Return True if the error is life-threatening (no point resurrecting)."""
+        if not error:
+            return False
+        error_lower = error.lower()
+        return any(pat in error_lower for pat in cls._FATAL_ERROR_PATTERNS)
+
    async def _run_execution(self, ctx: ExecutionContext) -> None:
-        """Run a single execution within the stream."""
+        """Run a single execution within the stream.
+
+        Supports automatic resurrection: when the execution fails with a
+        non-fatal error, it restarts from the failed node up to
+        ``entry_spec.max_resurrections`` times (default 3).
+        """
        execution_id = ctx.id

        # When sharing a session with another entry point (resume_session_id),
@@ -451,6 +556,11 @@ class ExecutionStream:
        # owns the state.json and _write_progress() keeps memory up-to-date.
        _is_shared_session = bool(ctx.session_state and ctx.session_state.get("resume_session_id"))

+        max_resurrections = self.entry_spec.max_resurrections
+        _resurrection_count = 0
+        _current_session_state = ctx.session_state
+        _current_input_data = ctx.input_data
+
        # Acquire semaphore to limit concurrency
        async with self._semaphore:
            ctx.status = "running"
@@ -491,12 +601,6 @@ class ExecutionStream:
                        store=self._runtime_log_store, agent_id=self.graph.id
                    )

-                # Create executor for this execution.
-                # Each execution gets its own storage under sessions/{exec_id}/
-                # so conversations, spillover, and data files are all scoped
-                # to this execution.  The executor sets data_dir via execution
-                # context (contextvars) so data tools and spillover share the
-                # same session-scoped directory.
                # Derive storage from session_store (graph-specific for secondary
                # graphs) so that all files — conversations, state, checkpoints,
                # data — land under the graph's own sessions/ directory, not the
@@ -505,43 +609,106 @@ class ExecutionStream:
                    exec_storage = self._session_store.sessions_dir / execution_id
                else:
                    exec_storage = self._storage.base_path / "sessions" / execution_id
-                executor = GraphExecutor(
-                    runtime=runtime_adapter,
-                    llm=self._llm,
-                    tools=self._tools,
-                    tool_executor=self._tool_executor,
-                    event_bus=self._scoped_event_bus,
-                    stream_id=self.stream_id,
-                    execution_id=execution_id,
-                    storage_path=exec_storage,
-                    runtime_logger=runtime_logger,
-                    loop_config=self.graph.loop_config,
-                    accounts_prompt=self._accounts_prompt,
-                    accounts_data=self._accounts_data,
-                    tool_provider_map=self._tool_provider_map,
-                )
-                # Track executor so inject_input() can reach EventLoopNode instances
-                self._active_executors[execution_id] = executor
-
-                # Write initial session state
-                if not _is_shared_session:
-                    await self._write_session_state(execution_id, ctx)

                # Create modified graph with entry point
                # We need to override the entry_node to use our entry point
                modified_graph = self._create_modified_graph()

-                # Execute
-                result = await executor.execute(
-                    graph=modified_graph,
-                    goal=self.goal,
-                    input_data=ctx.input_data,
-                    session_state=ctx.session_state,
-                    checkpoint_config=self._checkpoint_config,
-                )
+                # Write initial session state
+                if not _is_shared_session:
+                    await self._write_session_state(execution_id, ctx)

-                # Clean up executor reference
-                self._active_executors.pop(execution_id, None)
+                # --- Resurrection loop ---
+                # Each iteration creates a fresh executor. On non-fatal failure,
+                # the executor's session_state (memory + resume_from) carries
+                # forward so the next attempt resumes at the failed node.
+                while True:
+                    # Create executor for this execution.
+                    # Each execution gets its own storage under sessions/{exec_id}/
+                    # so conversations, spillover, and data files are all scoped
+                    # to this execution.  The executor sets data_dir via execution
+                    # context (contextvars) so data tools and spillover share the
+                    # same session-scoped directory.
+                    executor = GraphExecutor(
+                        runtime=runtime_adapter,
+                        llm=self._llm,
+                        tools=self._tools,
+                        tool_executor=self._tool_executor,
+                        event_bus=self._scoped_event_bus,
+                        stream_id=self.stream_id,
+                        execution_id=execution_id,
+                        storage_path=exec_storage,
+                        runtime_logger=runtime_logger,
+                        loop_config=self.graph.loop_config,
+                        accounts_prompt=self._accounts_prompt,
+                        accounts_data=self._accounts_data,
+                        tool_provider_map=self._tool_provider_map,
+                    )
+                    # Track executor so inject_input() can reach EventLoopNode instances
+                    self._active_executors[execution_id] = executor
+
+                    # Execute
+                    result = await executor.execute(
+                        graph=modified_graph,
+                        goal=self.goal,
+                        input_data=_current_input_data,
+                        session_state=_current_session_state,
+                        checkpoint_config=self._checkpoint_config,
+                    )
+
+                    # Clean up executor reference
+                    self._active_executors.pop(execution_id, None)
+
+                    # Check if resurrection is appropriate
+                    if (
+                        not result.success
+                        and not result.paused_at
+                        and _resurrection_count < max_resurrections
+                        and result.session_state
+                        and not self._is_fatal_error(result.error)
+                    ):
+                        _resurrection_count += 1
+                        logger.warning(
+                            "Execution %s failed (%s) — resurrecting (%d/%d) from node '%s'",
+                            execution_id,
+                            (result.error or "unknown")[:200],
+                            _resurrection_count,
+                            max_resurrections,
+                            result.session_state.get("resume_from", "?"),
+                        )
+
+                        # Emit resurrection event
+                        if self._scoped_event_bus:
+                            from framework.runtime.event_bus import AgentEvent, EventType
+
+                            await self._scoped_event_bus.publish(
+                                AgentEvent(
+                                    type=EventType.EXECUTION_RESURRECTED,
+                                    stream_id=self.stream_id,
+                                    execution_id=execution_id,
+                                    data={
+                                        "attempt": _resurrection_count,
+                                        "max_resurrections": max_resurrections,
+                                        "error": (result.error or "")[:500],
+                                        "resume_from": result.session_state.get("resume_from"),
+                                    },
+                                )
+                            )
+
+                        # Resume from the failed node with preserved memory
+                        _current_session_state = {
+                            **result.session_state,
+                            "resume_session_id": execution_id,
+                        }
+                        # On resurrection, input_data is already in memory —
+                        # pass empty so we don't overwrite intermediate results.
+                        _current_input_data = {}
+
+                        # Brief cooldown before resurrection
+                        await asyncio.sleep(2.0)
+                        continue
+
+                    break  # success, fatal failure, or resurrections exhausted

                # Store result with retention
                self._record_execution_result(execution_id, result)
@@ -563,7 +730,7 @@ class ExecutionStream:
                if not _is_shared_session:
                    await self._write_session_state(execution_id, ctx, result=result)

-                # Emit completion/failure event
+                # Emit completion/failure/pause event
                if self._scoped_event_bus:
                    if result.success:
                        await self._scoped_event_bus.emit_execution_completed(
@@ -572,6 +739,16 @@ class ExecutionStream:
                            output=result.output,
                            correlation_id=ctx.correlation_id,
                        )
+                    elif result.paused_at:
+                        # The executor returns paused_at on CancelledError but
+                        # does NOT emit execution_paused itself — we must emit
+                        # it here so the frontend can transition out of "running".
+                        await self._scoped_event_bus.emit_execution_paused(
+                            stream_id=self.stream_id,
+                            node_id=result.paused_at,
+                            reason=result.error or "Execution paused",
+                            execution_id=execution_id,
+                        )
                    else:
                        await self._scoped_event_bus.emit_execution_failed(
                            stream_id=self.stream_id,
@@ -621,6 +798,25 @@ class ExecutionStream:
                            execution_id, ctx, error="Execution cancelled"
                        )

+                # Emit SSE event so the frontend knows the execution stopped.
+                # The executor does NOT emit on CancelledError, so there is no
+                # risk of double-emitting.
+                if self._scoped_event_bus:
+                    if has_result and result.paused_at:
+                        await self._scoped_event_bus.emit_execution_paused(
+                            stream_id=self.stream_id,
+                            node_id=result.paused_at,
+                            reason="Execution cancelled",
+                            execution_id=execution_id,
+                        )
+                    else:
+                        await self._scoped_event_bus.emit_execution_failed(
+                            stream_id=self.stream_id,
+                            execution_id=execution_id,
+                            error="Execution cancelled",
+                            correlation_id=ctx.correlation_id,
+                        )
+
                # Don't re-raise - we've handled it and saved state

            except Exception as e:
@@ -871,10 +1067,11 @@ class ExecutionStream:
        task = self._execution_tasks.get(execution_id)
        if task and not task.done():
            task.cancel()
-            try:
-                await task
-            except asyncio.CancelledError:
-                pass
+            # Wait briefly for the task to finish. Don't block indefinitely —
+            # the task may be stuck in a long LLM API call that doesn't
+            # respond to cancellation quickly. The cancellation is already
+            # requested; the task will clean up in the background.
+            done, _ = await asyncio.wait({task}, timeout=5.0)
            return True
        return False

@@ -0,0 +1,85 @@
+"""HIVE_LLM_DEBUG — write every LLM turn to a JSONL file for replay/debugging.
+
+Set the env var to enable:
+  HIVE_LLM_DEBUG=1          → writes to ~/.hive/llm_logs/<ts>.jsonl
+  HIVE_LLM_DEBUG=/some/path → writes to that directory
+
+Each line is a JSON object with the full LLM turn: assistant text, tool calls,
+tool results, and token counts.  The file is opened lazily on first call and
+flushed after every write.  Errors are silently swallowed — this must never
+break the agent.
+"""
+
+import json
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import IO, Any
+
+logger = logging.getLogger(__name__)
+
+_LLM_DEBUG_RAW = os.environ.get("HIVE_LLM_DEBUG", "").strip()
+_LLM_DEBUG_ENABLED = _LLM_DEBUG_RAW.lower() in ("1", "true") or (
+    bool(_LLM_DEBUG_RAW) and _LLM_DEBUG_RAW.lower() not in ("0", "false", "")
+)
+
+_log_file: IO[str] | None = None
+_log_ready = False  # lazy init guard
+
+
+def _open_log() -> IO[str] | None:
+    """Open a JSONL log file.  Returns None if disabled."""
+    if not _LLM_DEBUG_ENABLED:
+        return None
+    raw = _LLM_DEBUG_RAW
+    if raw.lower() in ("1", "true"):
+        log_dir = Path.home() / ".hive" / "llm_logs"
+    else:
+        log_dir = Path(raw)
+    log_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    path = log_dir / f"{ts}.jsonl"
+    logger.info("LLM debug log → %s", path)
+    return open(path, "a", encoding="utf-8")  # noqa: SIM115
+
+
+def log_llm_turn(
+    *,
+    node_id: str,
+    stream_id: str,
+    execution_id: str,
+    iteration: int,
+    assistant_text: str,
+    tool_calls: list[dict[str, Any]],
+    tool_results: list[dict[str, Any]],
+    token_counts: dict[str, Any],
+) -> None:
+    """Write one JSONL line capturing a complete LLM turn.
+
+    No-op when HIVE_LLM_DEBUG is not set.  Never raises.
+    """
+    if not _LLM_DEBUG_ENABLED:
+        return
+    try:
+        global _log_file, _log_ready  # noqa: PLW0603
+        if not _log_ready:
+            _log_file = _open_log()
+            _log_ready = True
+        if _log_file is None:
+            return
+        record = {
+            "timestamp": datetime.now().isoformat(),
+            "node_id": node_id,
+            "stream_id": stream_id,
+            "execution_id": execution_id,
+            "iteration": iteration,
+            "assistant_text": assistant_text,
+            "tool_calls": tool_calls,
+            "tool_results": tool_results,
+            "token_counts": token_counts,
+        }
+        _log_file.write(json.dumps(record, default=str) + "\n")
+        _log_file.flush()
+    except Exception:
+        pass  # never break the agent
@@ -24,6 +24,8 @@ class ToolCallLog(BaseModel):
    tool_input: dict[str, Any] = Field(default_factory=dict)
    result: str = ""
    is_error: bool = False
+    start_timestamp: str = ""  # ISO 8601 timestamp when tool execution started
+    duration_s: float = 0.0  # Wall-clock execution time in seconds


 class NodeStepLog(BaseModel):
@@ -114,6 +114,8 @@ class RuntimeLogger:
                    tool_input=tc.get("tool_input", {}),
                    result=tc.get("content", ""),
                    is_error=tc.get("is_error", False),
+                    start_timestamp=tc.get("start_timestamp", ""),
+                    duration_s=tc.get("duration_s", 0.0),
                )
            )

@@ -821,5 +821,148 @@ class TestTimerEntryPoints:
            await runtime.stop()


+# === Cancel All Tasks Tests ===
+
+
+class TestCancelAllTasks:
+    """Tests for cancel_all_tasks and cancel_all_tasks_async."""
+
+    @pytest.mark.asyncio
+    async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
+        self, sample_graph, sample_goal, temp_storage
+    ):
+        """Test that cancel_all_tasks_async returns False with no running tasks."""
+        runtime = AgentRuntime(
+            graph=sample_graph,
+            goal=sample_goal,
+            storage_path=temp_storage,
+        )
+
+        entry_spec = EntryPointSpec(
+            id="webhook",
+            name="Webhook",
+            entry_node="process-webhook",
+            trigger_type="webhook",
+        )
+        runtime.register_entry_point(entry_spec)
+        await runtime.start()
+
+        try:
+            result = await runtime.cancel_all_tasks_async()
+            assert result is False
+        finally:
+            await runtime.stop()
+
+    @pytest.mark.asyncio
+    async def test_cancel_all_tasks_async_cancels_running_task(
+        self, sample_graph, sample_goal, temp_storage
+    ):
+        """Test that cancel_all_tasks_async cancels a running task and returns True."""
+        runtime = AgentRuntime(
+            graph=sample_graph,
+            goal=sample_goal,
+            storage_path=temp_storage,
+        )
+
+        entry_spec = EntryPointSpec(
+            id="webhook",
+            name="Webhook",
+            entry_node="process-webhook",
+            trigger_type="webhook",
+        )
+        runtime.register_entry_point(entry_spec)
+        await runtime.start()
+
+        try:
+            # Inject a fake running task into the stream
+            stream = runtime._streams["webhook"]
+
+            async def hang_forever():
+                await asyncio.get_event_loop().create_future()
+
+            fake_task = asyncio.ensure_future(hang_forever())
+            stream._execution_tasks["fake-exec"] = fake_task
+
+            result = await runtime.cancel_all_tasks_async()
+            assert result is True
+
+            # Let the CancelledError propagate
+            try:
+                await fake_task
+            except asyncio.CancelledError:
+                pass
+            assert fake_task.cancelled()
+
+            # Clean up
+            del stream._execution_tasks["fake-exec"]
+        finally:
+            await runtime.stop()
+
+    @pytest.mark.asyncio
+    async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
+        self, sample_graph, sample_goal, temp_storage
+    ):
+        """Test that cancel_all_tasks_async cancels tasks across multiple streams."""
+        runtime = AgentRuntime(
+            graph=sample_graph,
+            goal=sample_goal,
+            storage_path=temp_storage,
+        )
+
+        # Register two entry points so we get two streams
+        runtime.register_entry_point(
+            EntryPointSpec(
+                id="stream-a",
+                name="Stream A",
+                entry_node="process-webhook",
+                trigger_type="webhook",
+            )
+        )
+        runtime.register_entry_point(
+            EntryPointSpec(
+                id="stream-b",
+                name="Stream B",
+                entry_node="process-webhook",
+                trigger_type="webhook",
+            )
+        )
+        await runtime.start()
+
+        try:
+
+            async def hang_forever():
+                await asyncio.get_event_loop().create_future()
+
+            stream_a = runtime._streams["stream-a"]
+            stream_b = runtime._streams["stream-b"]
+
+            # Two tasks in stream A, one task in stream B
+            task_a1 = asyncio.ensure_future(hang_forever())
+            task_a2 = asyncio.ensure_future(hang_forever())
+            task_b1 = asyncio.ensure_future(hang_forever())
+
+            stream_a._execution_tasks["exec-a1"] = task_a1
+            stream_a._execution_tasks["exec-a2"] = task_a2
+            stream_b._execution_tasks["exec-b1"] = task_b1
+
+            result = await runtime.cancel_all_tasks_async()
+            assert result is True
+
+            # Let CancelledErrors propagate
+            for task in [task_a1, task_a2, task_b1]:
+                try:
+                    await task
+                except asyncio.CancelledError:
+                    pass
+                assert task.cancelled()
+
+            # Clean up
+            del stream_a._execution_tasks["exec-a1"]
+            del stream_a._execution_tasks["exec-a2"]
+            del stream_b._execution_tasks["exec-b1"]
+        finally:
+            await runtime.stop()
+
+
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])
@@ -0,0 +1,346 @@
+# Hive Server
+
+HTTP API backend for the Hive agent framework. Built on **aiohttp**, fully async, serving the frontend workspace and external clients.
+
+## Architecture
+
+Sessions are the primary entity. A session owns an EventBus + LLM and always has a queen executor. Workers are optional — they can be loaded into and unloaded from a session at any time.
+
+```
+Session {
+    event_bus       # owned by session, shared with queen + worker
+    llm             # owned by session
+    queen_executor  # always present
+    worker_runtime? # optional — loaded/unloaded independently
+}
+```
+
+## Structure
+
+```
+server/
+├── app.py                 # Application factory, middleware, static serving
+├── session_manager.py     # Session lifecycle (create/load worker/unload/stop)
+├── sse.py                 # Server-Sent Events helper
+├── routes_sessions.py     # Session lifecycle, info, worker-session browsing, discovery
+├── routes_execution.py    # Trigger, inject, chat, stop, resume, replay
+├── routes_events.py       # SSE event streaming
+├── routes_graphs.py       # Graph topology & node inspection
+├── routes_logs.py         # Execution logs (summary/details/tools)
+├── routes_credentials.py  # Credential management & validation
+├── routes_agents.py       # Legacy backward-compat routes
+└── tests/
+    └── test_api.py        # Full test suite with mocked runtimes
+```
+
+## Core Components
+
+### `app.py` — Application Factory
+
+`create_app(model)` builds the aiohttp `Application` with:
+
+- **CORS middleware** — allows localhost origins
+- **Error middleware** — catches exceptions, returns JSON errors
+- **Static serving** — serves the frontend SPA with index.html fallback
+- **Graceful shutdown** — stops all sessions on exit
+
+### `session_manager.py` — Session Lifecycle Manager
+
+Manages `Session` objects. Key methods:
+
+- **`create_session()`** — creates EventBus + LLM, starts queen (no worker)
+- **`create_session_with_worker()`** — one-step: session + worker + judge
+- **`load_worker()`** — loads agent into existing session, starts judge
+- **`unload_worker()`** — removes worker + judge, queen stays alive
+- **`stop_session()`** — tears down everything (worker + queen)
+
+Three-conversation model:
+1. **Queen** — persistent interactive executor for user chat (always present)
+2. **Worker** — `AgentRuntime` that executes graphs (optional)
+3. **Judge** — timer-driven background executor for health monitoring (active when worker is loaded)
+
+### `sse.py` — SSE Helper
+
+Thin wrapper around `aiohttp.StreamResponse` for Server-Sent Events with keepalive pings.
+
+## API Reference
+
+All session-scoped routes use the `session_id` returned from `POST /api/sessions`.
+
+### Discovery
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/discover` | Discover agents from filesystem |
+
+Returns agents grouped by category with metadata (name, description, node count, tags, etc.).
+
+### Session Lifecycle
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `POST` | `/api/sessions` | Create a session |
+| `GET` | `/api/sessions` | List all active sessions |
+| `GET` | `/api/sessions/{session_id}` | Session detail (includes entry points + graphs if worker loaded) |
+| `DELETE` | `/api/sessions/{session_id}` | Stop session entirely |
+
+**Create session** has two modes:
+
+```jsonc
+// Queen-only session (no worker)
+POST /api/sessions
+{}
+// or with custom ID:
+{ "session_id": "my-custom-id" }
+
+// Session with worker (one-step)
+POST /api/sessions
+{
+  "agent_path": "exports/my-agent",
+  "agent_id": "custom-worker-name",  // optional
+  "model": "claude-sonnet-4-20250514"      // optional
+}
+```
+
+- Returns `201` with session object on success
+- Returns `409` with `{"loading": true}` if agent is currently loading
+- Returns `404` if agent_path doesn't exist
+
+**Get session** returns `202` with `{"loading": true}` while loading, `404` if not found.
+
+### Worker Lifecycle
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `POST` | `/api/sessions/{session_id}/worker` | Load a worker into session |
+| `DELETE` | `/api/sessions/{session_id}/worker` | Unload worker (queen stays alive) |
+
+```jsonc
+// Load worker into existing session
+POST /api/sessions/{session_id}/worker
+{
+  "agent_path": "exports/my-agent",
+  "worker_id": "custom-name",  // optional
+  "model": "..."               // optional
+}
+
+// Unload worker
+DELETE /api/sessions/{session_id}/worker
+```
+
+### Execution Control
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `POST` | `/api/sessions/{session_id}/trigger` | Start a new execution |
+| `POST` | `/api/sessions/{session_id}/inject` | Inject input into a waiting node |
+| `POST` | `/api/sessions/{session_id}/chat` | Smart chat routing |
+| `POST` | `/api/sessions/{session_id}/stop` | Cancel a running execution |
+| `POST` | `/api/sessions/{session_id}/pause` | Alias for stop |
+| `POST` | `/api/sessions/{session_id}/resume` | Resume a paused execution |
+| `POST` | `/api/sessions/{session_id}/replay` | Re-run from a checkpoint |
+| `GET` | `/api/sessions/{session_id}/goal-progress` | Evaluate goal progress |
+
+**Trigger:**
+```jsonc
+POST /api/sessions/{session_id}/trigger
+{
+  "entry_point_id": "default",
+  "input_data": { "query": "research topic X" },
+  "session_state": {}  // optional
+}
+// Returns: { "execution_id": "..." }
+```
+
+**Chat** routes messages with priority:
+1. Worker awaiting input -> inject into worker node
+2. Queen active -> inject into queen conversation
+3. Neither available -> 503
+
+```jsonc
+POST /api/sessions/{session_id}/chat
+{ "message": "hello" }
+// Returns: { "status": "injected"|"queen", "delivered": true }
+```
+
+**Inject** into a specific node:
+```jsonc
+POST /api/sessions/{session_id}/inject
+{ "node_id": "gather_info", "content": "user response", "graph_id": "main" }
+```
+
+**Stop:**
+```jsonc
+POST /api/sessions/{session_id}/stop
+{ "execution_id": "..." }
+```
+
+**Resume:**
+```jsonc
+POST /api/sessions/{session_id}/resume
+{
+  "session_id": "session_20260224_...",    // worker session to resume
+  "checkpoint_id": "cp_..."               // optional — resumes from latest if omitted
+}
+```
+
+**Replay** (re-run from checkpoint):
+```jsonc
+POST /api/sessions/{session_id}/replay
+{
+  "session_id": "session_20260224_...",
+  "checkpoint_id": "cp_..."               // required
+}
+```
+
+### SSE Event Streaming
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/sessions/{session_id}/events` | SSE event stream |
+
+```
+GET /api/sessions/{session_id}/events
+GET /api/sessions/{session_id}/events?types=CLIENT_OUTPUT_DELTA,EXECUTION_COMPLETED
+```
+
+Keepalive ping every 15s. Streams from the session's EventBus (covers both queen and worker events).
+
+Default event types: `CLIENT_OUTPUT_DELTA`, `CLIENT_INPUT_REQUESTED`, `LLM_TEXT_DELTA`, `TOOL_CALL_STARTED`, `TOOL_CALL_COMPLETED`, `EXECUTION_STARTED`, `EXECUTION_COMPLETED`, `EXECUTION_FAILED`, `EXECUTION_PAUSED`, `NODE_LOOP_STARTED`, `NODE_LOOP_ITERATION`, `NODE_LOOP_COMPLETED`, `NODE_ACTION_PLAN`, `EDGE_TRAVERSED`, `GOAL_PROGRESS`, `QUEEN_INTERVENTION_REQUESTED`, `WORKER_ESCALATION_TICKET`, `NODE_INTERNAL_OUTPUT`, `NODE_STALLED`, `NODE_RETRY`, `NODE_TOOL_DOOM_LOOP`, `CONTEXT_COMPACTED`, `WORKER_LOADED`.
+
+### Session Info
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/sessions/{session_id}/stats` | Runtime statistics |
+| `GET` | `/api/sessions/{session_id}/entry-points` | List entry points |
+| `GET` | `/api/sessions/{session_id}/graphs` | List loaded graph IDs |
+
+### Graph & Node Inspection
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/sessions/{session_id}/graphs/{graph_id}/nodes` | List nodes + edges |
+| `GET` | `/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}` | Node detail + outgoing edges |
+| `GET` | `/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/criteria` | Success criteria + last execution info |
+| `GET` | `/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/tools` | Resolved tool metadata |
+
+**List nodes** supports optional enrichment with session progress:
+```
+GET /api/sessions/{session_id}/graphs/{graph_id}/nodes?session_id=worker_session_id
+```
+Adds `visit_count`, `has_failures`, `is_current`, `in_path` to each node.
+
+### Logs
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/sessions/{session_id}/logs` | Session-level logs |
+| `GET` | `/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/logs` | Node-scoped logs |
+
+```
+# List recent runs
+GET /api/sessions/{session_id}/logs?level=summary&limit=20
+
+# Detailed per-node execution for a specific worker session
+GET /api/sessions/{session_id}/logs?session_id=ws_id&level=details
+
+# Tool call logs
+GET /api/sessions/{session_id}/logs?session_id=ws_id&level=tools
+
+# Node-scoped (requires session_id query param)
+GET .../nodes/{node_id}/logs?session_id=ws_id&level=all
+```
+
+Log levels: `summary` (run stats), `details` (per-node execution), `tools` (tool calls + LLM text).
+
+### Worker Session Browsing
+
+Browse persisted execution runs on disk.
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/sessions/{session_id}/worker-sessions` | List worker sessions |
+| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}` | Worker session state |
+| `DELETE` | `/api/sessions/{session_id}/worker-sessions/{ws_id}` | Delete worker session |
+| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints` | List checkpoints |
+| `POST` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{cp_id}/restore` | Restore from checkpoint |
+| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/messages` | Get conversation messages |
+
+**Messages** support filtering:
+```
+GET .../messages?node_id=gather_info      # filter by node
+GET .../messages?client_only=true         # only user inputs + client-facing assistant outputs
+```
+
+### Credentials
+
+| Method | Route | Description |
+|--------|-------|-------------|
+| `GET` | `/api/credentials` | List credential metadata (no secrets) |
+| `POST` | `/api/credentials` | Save a credential |
+| `GET` | `/api/credentials/{credential_id}` | Get credential metadata |
+| `DELETE` | `/api/credentials/{credential_id}` | Delete a credential |
+| `POST` | `/api/credentials/check-agent` | Validate agent credentials |
+
+**Save credential:**
+```jsonc
+POST /api/credentials
+{ "credential_id": "brave_search", "keys": { "api_key": "BSA..." } }
+```
+
+**Check agent credentials** — two-phase validation (same as runtime startup):
+```jsonc
+POST /api/credentials/check-agent
+{
+  "agent_path": "exports/my-agent",
+  "verify": true    // optional, default true — run health checks
+}
+// Returns:
+{
+  "required": [
+    {
+      "credential_name": "brave_search",
+      "credential_id": "brave_search",
+      "env_var": "BRAVE_SEARCH_API_KEY",
+      "description": "Brave Search API key",
+      "help_url": "https://...",
+      "tools": ["brave_web_search"],
+      "node_types": [],
+      "available": true,
+      "valid": true,              // true/false/null (null = not checked)
+      "validation_message": "OK",  // human-readable health check result
+      "direct_api_key_supported": true,
+      "aden_supported": true,
+      "credential_key": "api_key"
+    }
+  ]
+}
+```
+
+When `verify: true`, runs health checks (lightweight HTTP calls) against each available credential to confirm it actually works — not just that it exists.
+
+## Key Patterns
+
+- **Session-primary** — sessions are the lookup key for all routes, workers are optional children
+- **Per-request manager access** — routes get `SessionManager` via `request.app["manager"]`
+- **Path validation** — user-provided path segments validated with `safe_path_segment()` to prevent directory traversal
+- **Event-driven streaming** — per-client buffer queues (max 1000 events) with 15s keepalive pings
+- **Shared EventBus** — session owns the bus, queen and worker both publish to it, SSE always connects to `session.event_bus`
+- **No secrets in responses** — credential endpoints never return secret values
+
+## Storage Paths
+
+```
+~/.hive/
+├── queen/session/{session_id}/       # Queen conversation state
+├── judge/session/{session_id}/       # Judge state
+├── agents/{agent_name}/sessions/     # Worker execution sessions
+└── credentials/                      # Encrypted credential store
+```
+
+## Running Tests
+
+```bash
+pytest framework/server/tests/ -v
+```
@@ -0,0 +1 @@
+"""HTTP API server for the Hive agent framework."""
@@ -0,0 +1,36 @@
+"""Backward-compatibility shim.
+
+The primary implementation is now in ``session_manager.py``.
+This module re-exports ``SessionManager`` as ``AgentManager`` and
+keeps ``AgentSlot`` for test compatibility.
+"""
+
+import asyncio
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from framework.server.session_manager import Session, SessionManager  # noqa: F401
+
+
+@dataclass
+class AgentSlot:
+    """Legacy data class — kept for test compatibility only.
+
+    New code should use ``Session`` from ``session_manager``.
+    """
+
+    id: str
+    agent_path: Path
+    runner: Any
+    runtime: Any
+    info: Any
+    loaded_at: float
+    queen_executor: Any = None
+    queen_task: asyncio.Task | None = None
+    judge_task: asyncio.Task | None = None
+    escalation_sub: str | None = None
+
+
+# Backward compat alias
+AgentManager = SessionManager
@@ -0,0 +1,270 @@
+"""aiohttp Application factory for the Hive HTTP API server."""
+
+import logging
+import os
+from pathlib import Path
+
+from aiohttp import web
+
+from framework.server.session_manager import Session, SessionManager
+
+logger = logging.getLogger(__name__)
+
+
+# Anchor to the repository root so allowed roots are independent of CWD.
+# app.py lives at core/framework/server/app.py, so four .parent calls
+# reach the repo root where exports/ and examples/ live.
+_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
+
+_ALLOWED_AGENT_ROOTS: tuple[Path, ...] | None = None
+
+
+def _get_allowed_agent_roots() -> tuple[Path, ...]:
+    """Return resolved allowed root directories for agent loading.
+
+    Roots are anchored to the repository root (derived from ``__file__``)
+    so the allowlist is correct regardless of the process's working
+    directory.
+    """
+    global _ALLOWED_AGENT_ROOTS
+    if _ALLOWED_AGENT_ROOTS is None:
+        _ALLOWED_AGENT_ROOTS = (
+            (_REPO_ROOT / "exports").resolve(),
+            (_REPO_ROOT / "examples").resolve(),
+            (Path.home() / ".hive" / "agents").resolve(),
+        )
+    return _ALLOWED_AGENT_ROOTS
+
+
+def validate_agent_path(agent_path: str | Path) -> Path:
+    """Validate that an agent path resolves inside an allowed directory.
+
+    Prevents arbitrary code execution via ``importlib.import_module`` by
+    restricting agent loading to known safe directories: ``exports/``,
+    ``examples/``, and ``~/.hive/agents/``.
+
+    Returns the resolved ``Path`` on success.
+
+    Raises:
+        ValueError: If the path is outside all allowed roots.
+    """
+    resolved = Path(agent_path).expanduser().resolve()
+    for root in _get_allowed_agent_roots():
+        if resolved.is_relative_to(root) and resolved != root:
+            return resolved
+    raise ValueError(
+        "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
+    )
+
+
+def safe_path_segment(value: str) -> str:
+    """Validate a URL path parameter is a safe filesystem name.
+
+    Raises HTTPBadRequest if the value contains path separators or
+    traversal sequences.  aiohttp decodes ``%2F`` inside route params,
+    so a raw ``{session_id}`` can contain ``/`` or ``..`` after decoding.
+    """
+    if not value or value == "." or "/" in value or "\\" in value or ".." in value:
+        raise web.HTTPBadRequest(reason="Invalid path parameter")
+    return value
+
+
+def resolve_session(request: web.Request):
+    """Resolve a Session from {session_id} in the URL.
+
+    Returns (session, None) on success or (None, error_response) on failure.
+    """
+    manager: SessionManager = request.app["manager"]
+    sid = request.match_info["session_id"]
+    session = manager.get_session(sid)
+    if not session:
+        return None, web.json_response({"error": f"Session '{sid}' not found"}, status=404)
+    return session, None
+
+
+def sessions_dir(session: Session) -> Path:
+    """Resolve the worker sessions directory for a session.
+
+    Storage layout: ~/.hive/agents/{agent_name}/sessions/
+    Requires a worker to be loaded (worker_path must be set).
+    """
+    if session.worker_path is None:
+        raise ValueError("No worker loaded — no worker sessions directory")
+    agent_name = session.worker_path.name
+    return Path.home() / ".hive" / "agents" / agent_name / "sessions"
+
+
+# Allowed CORS origins (localhost on any port)
+_CORS_ORIGINS = {"http://localhost", "http://127.0.0.1"}
+
+
+def _is_cors_allowed(origin: str) -> bool:
+    """Check if origin is localhost/127.0.0.1 on any port."""
+    if not origin:
+        return False
+    for base in _CORS_ORIGINS:
+        if origin == base or origin.startswith(base + ":"):
+            return True
+    return False
+
+
+@web.middleware
+async def cors_middleware(request: web.Request, handler):
+    """CORS middleware scoped to localhost origins."""
+    origin = request.headers.get("Origin", "")
+
+    # Handle preflight
+    if request.method == "OPTIONS":
+        response = web.Response(status=204)
+    else:
+        try:
+            response = await handler(request)
+        except web.HTTPException as exc:
+            response = exc
+
+    if _is_cors_allowed(origin):
+        response.headers["Access-Control-Allow-Origin"] = origin
+        response.headers["Access-Control-Allow-Methods"] = "GET, POST, DELETE, OPTIONS"
+        response.headers["Access-Control-Allow-Headers"] = "Content-Type"
+        response.headers["Access-Control-Max-Age"] = "3600"
+
+    return response
+
+
+@web.middleware
+async def error_middleware(request: web.Request, handler):
+    """Catch exceptions and return JSON error responses."""
+    try:
+        return await handler(request)
+    except web.HTTPException:
+        raise  # Let aiohttp handle its own HTTP exceptions
+    except Exception as e:
+        logger.exception(f"Unhandled error: {e}")
+        return web.json_response(
+            {"error": str(e), "type": type(e).__name__},
+            status=500,
+        )
+
+
+async def _on_shutdown(app: web.Application) -> None:
+    """Gracefully unload all agents on server shutdown."""
+    manager: SessionManager = app["manager"]
+    await manager.shutdown_all()
+
+
+async def handle_health(request: web.Request) -> web.Response:
+    """GET /api/health — simple health check."""
+    manager: SessionManager = request.app["manager"]
+    sessions = manager.list_sessions()
+    return web.json_response(
+        {
+            "status": "ok",
+            "sessions": len(sessions),
+            "agents_loaded": sum(1 for s in sessions if s.worker_runtime is not None),
+        }
+    )
+
+
+def create_app(model: str | None = None) -> web.Application:
+    """Create and configure the aiohttp Application.
+
+    Args:
+        model: Default LLM model for agent loading.
+
+    Returns:
+        Configured aiohttp Application ready to run.
+    """
+    app = web.Application(middlewares=[cors_middleware, error_middleware])
+
+    # Initialize credential store (before SessionManager so it can be shared)
+    from framework.credentials.store import CredentialStore
+
+    try:
+        from framework.credentials.validation import ensure_credential_key_env
+
+        # Load ALL credentials: HIVE_CREDENTIAL_KEY, ADEN_API_KEY, and LLM keys
+        ensure_credential_key_env()
+
+        # Auto-generate credential key for web-only users who never ran the TUI
+        if not os.environ.get("HIVE_CREDENTIAL_KEY"):
+            try:
+                from framework.credentials.key_storage import generate_and_save_credential_key
+
+                generate_and_save_credential_key()
+                logger.info(
+                    "Generated and persisted HIVE_CREDENTIAL_KEY to ~/.hive/secrets/credential_key"
+                )
+            except Exception as exc:
+                logger.warning("Could not auto-persist HIVE_CREDENTIAL_KEY: %s", exc)
+
+        credential_store = CredentialStore.with_aden_sync()
+    except Exception:
+        logger.debug("Encrypted credential store unavailable, using in-memory fallback")
+        credential_store = CredentialStore.for_testing({})
+
+    app["credential_store"] = credential_store
+    app["manager"] = SessionManager(model=model, credential_store=credential_store)
+
+    # Register shutdown hook
+    app.on_shutdown.append(_on_shutdown)
+
+    # Health check
+    app.router.add_get("/api/health", handle_health)
+
+    # Register route modules
+    from framework.server.routes_credentials import register_routes as register_credential_routes
+    from framework.server.routes_events import register_routes as register_event_routes
+    from framework.server.routes_execution import register_routes as register_execution_routes
+    from framework.server.routes_graphs import register_routes as register_graph_routes
+    from framework.server.routes_logs import register_routes as register_log_routes
+    from framework.server.routes_sessions import register_routes as register_session_routes
+
+    register_credential_routes(app)
+    register_execution_routes(app)
+    register_event_routes(app)
+    register_session_routes(app)
+    register_graph_routes(app)
+    register_log_routes(app)
+
+    # Static file serving — Option C production mode
+    # If frontend/dist/ exists, serve built frontend files on /
+    _setup_static_serving(app)
+
+    return app
+
+
+def _setup_static_serving(app: web.Application) -> None:
+    """Serve frontend static files if the dist directory exists."""
+    # Try: CWD/frontend/dist, core/frontend/dist, repo_root/frontend/dist
+    _here = Path(__file__).resolve().parent  # core/framework/server/
+    candidates = [
+        Path("frontend/dist"),
+        _here.parent.parent / "frontend" / "dist",  # core/frontend/dist
+        _here.parent.parent.parent / "frontend" / "dist",  # repo_root/frontend/dist
+    ]
+
+    dist_dir: Path | None = None
+    for candidate in candidates:
+        if candidate.is_dir() and (candidate / "index.html").exists():
+            dist_dir = candidate.resolve()
+            break
+
+    if dist_dir is None:
+        logger.debug("No frontend/dist found — skipping static file serving")
+        return
+
+    logger.info(f"Serving frontend from {dist_dir}")
+
+    async def handle_spa(request: web.Request) -> web.FileResponse:
+        """Serve static files with SPA fallback to index.html."""
+        rel_path = request.match_info.get("path", "")
+        file_path = (dist_dir / rel_path).resolve()
+
+        if file_path.is_file() and file_path.is_relative_to(dist_dir):
+            return web.FileResponse(file_path)
+
+        # SPA fallback
+        return web.FileResponse(dist_dir / "index.html")
+
+    # Catch-all for SPA — must be registered LAST so /api routes take priority
+    app.router.add_get("/{path:.*}", handle_spa)
@@ -0,0 +1,211 @@
+"""Credential CRUD routes."""
+
+import asyncio
+import logging
+
+from aiohttp import web
+from pydantic import SecretStr
+
+from framework.credentials.models import CredentialKey, CredentialObject
+from framework.credentials.store import CredentialStore
+from framework.server.app import validate_agent_path
+
+logger = logging.getLogger(__name__)
+
+
+def _get_store(request: web.Request) -> CredentialStore:
+    return request.app["credential_store"]
+
+
+def _credential_to_dict(cred: CredentialObject) -> dict:
+    """Serialize a CredentialObject to JSON — never include secret values."""
+    return {
+        "credential_id": cred.id,
+        "credential_type": str(cred.credential_type),
+        "key_names": list(cred.keys.keys()),
+        "created_at": cred.created_at.isoformat() if cred.created_at else None,
+        "updated_at": cred.updated_at.isoformat() if cred.updated_at else None,
+    }
+
+
+async def handle_list_credentials(request: web.Request) -> web.Response:
+    """GET /api/credentials — list all credential metadata (no secrets)."""
+    store = _get_store(request)
+    cred_ids = store.list_credentials()
+    credentials = []
+    for cid in cred_ids:
+        cred = store.get_credential(cid, refresh_if_needed=False)
+        if cred:
+            credentials.append(_credential_to_dict(cred))
+    return web.json_response({"credentials": credentials})
+
+
+async def handle_get_credential(request: web.Request) -> web.Response:
+    """GET /api/credentials/{credential_id} — get single credential metadata."""
+    credential_id = request.match_info["credential_id"]
+    store = _get_store(request)
+    cred = store.get_credential(credential_id, refresh_if_needed=False)
+    if cred is None:
+        return web.json_response({"error": f"Credential '{credential_id}' not found"}, status=404)
+    return web.json_response(_credential_to_dict(cred))
+
+
+async def handle_save_credential(request: web.Request) -> web.Response:
+    """POST /api/credentials — store a credential.
+
+    Body: {"credential_id": "...", "keys": {"key_name": "value", ...}}
+    """
+    body = await request.json()
+
+    credential_id = body.get("credential_id")
+    keys = body.get("keys")
+
+    if not credential_id or not keys or not isinstance(keys, dict):
+        return web.json_response({"error": "credential_id and keys are required"}, status=400)
+
+    # ADEN_API_KEY is stored in the encrypted store via key_storage module
+    if credential_id == "aden_api_key":
+        key = keys.get("api_key", "").strip()
+        if not key:
+            return web.json_response({"error": "api_key is required"}, status=400)
+
+        from framework.credentials.key_storage import save_aden_api_key
+
+        save_aden_api_key(key)
+
+        # Immediately sync OAuth tokens from Aden (runs in executor because
+        # _presync_aden_tokens makes blocking HTTP calls to the Aden server).
+        try:
+            from aden_tools.credentials import CREDENTIAL_SPECS
+
+            from framework.credentials.validation import _presync_aden_tokens
+
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(None, _presync_aden_tokens, CREDENTIAL_SPECS)
+        except Exception as exc:
+            logger.warning("Aden token sync after key save failed: %s", exc)
+
+        return web.json_response({"saved": "aden_api_key"}, status=201)
+
+    store = _get_store(request)
+    cred = CredentialObject(
+        id=credential_id,
+        keys={k: CredentialKey(name=k, value=SecretStr(v)) for k, v in keys.items()},
+    )
+    store.save_credential(cred)
+    return web.json_response({"saved": credential_id}, status=201)
+
+
+async def handle_delete_credential(request: web.Request) -> web.Response:
+    """DELETE /api/credentials/{credential_id} — delete a credential."""
+    credential_id = request.match_info["credential_id"]
+
+    if credential_id == "aden_api_key":
+        from framework.credentials.key_storage import delete_aden_api_key
+
+        delete_aden_api_key()
+        return web.json_response({"deleted": True})
+
+    store = _get_store(request)
+    deleted = store.delete_credential(credential_id)
+    if not deleted:
+        return web.json_response({"error": f"Credential '{credential_id}' not found"}, status=404)
+    return web.json_response({"deleted": True})
+
+
+async def handle_check_agent(request: web.Request) -> web.Response:
+    """POST /api/credentials/check-agent — check and validate agent credentials.
+
+    Uses the same ``validate_agent_credentials`` as agent startup:
+    1. Presence — is the credential available (env, encrypted store, Aden)?
+    2. Health check — does the credential actually work (lightweight HTTP call)?
+
+    Body: {"agent_path": "...", "verify": true}
+    """
+    body = await request.json()
+    agent_path = body.get("agent_path")
+    verify = body.get("verify", True)
+
+    if not agent_path:
+        return web.json_response({"error": "agent_path is required"}, status=400)
+
+    try:
+        agent_path = str(validate_agent_path(agent_path))
+    except ValueError as e:
+        return web.json_response({"error": str(e)}, status=400)
+
+    try:
+        from framework.credentials.setup import load_agent_nodes
+        from framework.credentials.validation import (
+            ensure_credential_key_env,
+            validate_agent_credentials,
+        )
+
+        # Load env vars from shell config (same as runtime startup)
+        ensure_credential_key_env()
+
+        nodes = load_agent_nodes(agent_path)
+        result = validate_agent_credentials(
+            nodes, verify=verify, raise_on_error=False, force_refresh=True
+        )
+
+        # If any credential needs Aden, include ADEN_API_KEY as a first-class row
+        if any(c.aden_supported for c in result.credentials):
+            aden_key_status = {
+                "credential_name": "Aden Platform",
+                "credential_id": "aden_api_key",
+                "env_var": "ADEN_API_KEY",
+                "description": "API key from the Developers tab in Settings",
+                "help_url": "https://hive.adenhq.com/",
+                "tools": [],
+                "node_types": [],
+                "available": result.has_aden_key,
+                "valid": None,
+                "validation_message": None,
+                "direct_api_key_supported": True,
+                "aden_supported": True,  # renders with "Authorize" button to open Aden
+                "credential_key": "api_key",
+            }
+            required = [aden_key_status] + [_status_to_dict(c) for c in result.credentials]
+        else:
+            required = [_status_to_dict(c) for c in result.credentials]
+
+        return web.json_response(
+            {
+                "required": required,
+                "has_aden_key": result.has_aden_key,
+            }
+        )
+    except Exception as e:
+        logger.exception(f"Error checking agent credentials: {e}")
+        return web.json_response({"error": str(e)}, status=500)
+
+
+def _status_to_dict(c) -> dict:
+    """Convert a CredentialStatus to the JSON dict expected by the frontend."""
+    return {
+        "credential_name": c.credential_name,
+        "credential_id": c.credential_id,
+        "env_var": c.env_var,
+        "description": c.description,
+        "help_url": c.help_url,
+        "tools": c.tools,
+        "node_types": c.node_types,
+        "available": c.available,
+        "direct_api_key_supported": c.direct_api_key_supported,
+        "aden_supported": c.aden_supported,
+        "credential_key": c.credential_key,
+        "valid": c.valid,
+        "validation_message": c.validation_message,
+        "alternative_group": c.alternative_group,
+    }
+
+
+def register_routes(app: web.Application) -> None:
+    """Register credential routes on the application."""
+    # check-agent must be registered BEFORE the {credential_id} wildcard
+    app.router.add_post("/api/credentials/check-agent", handle_check_agent)
+    app.router.add_get("/api/credentials", handle_list_credentials)
+    app.router.add_post("/api/credentials", handle_save_credential)
+    app.router.add_get("/api/credentials/{credential_id}", handle_get_credential)
+    app.router.add_delete("/api/credentials/{credential_id}", handle_delete_credential)
@@ -0,0 +1,201 @@
+"""SSE event streaming route."""
+
+import asyncio
+import logging
+
+from aiohttp import web
+
+from framework.runtime.event_bus import EventType
+from framework.server.app import resolve_session
+
+logger = logging.getLogger(__name__)
+
+# Default event types streamed to clients
+DEFAULT_EVENT_TYPES = [
+    EventType.CLIENT_OUTPUT_DELTA,
+    EventType.CLIENT_INPUT_REQUESTED,
+    EventType.LLM_TEXT_DELTA,
+    EventType.TOOL_CALL_STARTED,
+    EventType.TOOL_CALL_COMPLETED,
+    EventType.EXECUTION_STARTED,
+    EventType.EXECUTION_COMPLETED,
+    EventType.EXECUTION_FAILED,
+    EventType.EXECUTION_PAUSED,
+    EventType.NODE_LOOP_STARTED,
+    EventType.NODE_LOOP_ITERATION,
+    EventType.NODE_LOOP_COMPLETED,
+    EventType.LLM_TURN_COMPLETE,
+    EventType.NODE_ACTION_PLAN,
+    EventType.EDGE_TRAVERSED,
+    EventType.GOAL_PROGRESS,
+    EventType.QUEEN_INTERVENTION_REQUESTED,
+    EventType.WORKER_ESCALATION_TICKET,
+    EventType.NODE_INTERNAL_OUTPUT,
+    EventType.NODE_STALLED,
+    EventType.NODE_RETRY,
+    EventType.NODE_TOOL_DOOM_LOOP,
+    EventType.CONTEXT_COMPACTED,
+    EventType.WORKER_LOADED,
+    EventType.CREDENTIALS_REQUIRED,
+    EventType.SUBAGENT_REPORT,
+    EventType.QUEEN_MODE_CHANGED,
+]
+
+# Keepalive interval in seconds
+KEEPALIVE_INTERVAL = 15.0
+
+
+def _parse_event_types(query_param: str | None) -> list[EventType]:
+    """Parse comma-separated event type names into EventType values.
+
+    Falls back to DEFAULT_EVENT_TYPES if param is empty or invalid.
+    """
+    if not query_param:
+        return DEFAULT_EVENT_TYPES
+
+    result = []
+    for name in query_param.split(","):
+        name = name.strip()
+        try:
+            result.append(EventType(name))
+        except ValueError:
+            logger.warning(f"Unknown event type filter: {name}")
+
+    return result or DEFAULT_EVENT_TYPES
+
+
+async def handle_events(request: web.Request) -> web.StreamResponse:
+    """SSE event stream for a session.
+
+    Query params:
+        types: Comma-separated event type names to filter (optional).
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    # Session always has an event_bus — no runtime guard needed
+    event_bus = session.event_bus
+    event_types = _parse_event_types(request.query.get("types"))
+
+    # Per-client buffer queue
+    queue: asyncio.Queue = asyncio.Queue(maxsize=1000)
+
+    # Lifecycle events drive frontend state transitions and must never be lost.
+    _CRITICAL_EVENTS = {
+        "execution_started",
+        "execution_completed",
+        "execution_failed",
+        "execution_paused",
+        "client_input_requested",
+        "node_loop_iteration",
+        "node_loop_started",
+        "credentials_required",
+        "worker_loaded",
+        "queen_mode_changed",
+    }
+
+    client_disconnected = asyncio.Event()
+
+    async def on_event(event) -> None:
+        """Push event dict into queue; drop non-critical events if full."""
+        if client_disconnected.is_set():
+            return
+
+        evt_dict = event.to_dict()
+        if evt_dict.get("type") in _CRITICAL_EVENTS:
+            try:
+                queue.put_nowait(evt_dict)
+            except asyncio.QueueFull:
+                logger.warning(
+                    "SSE client queue full on critical event; disconnecting session='%s'",
+                    session.id,
+                )
+                client_disconnected.set()
+        else:
+            try:
+                queue.put_nowait(evt_dict)
+            except asyncio.QueueFull:
+                pass  # high-frequency events can be dropped; client will catch up
+
+    # Subscribe to EventBus
+    from framework.server.sse import SSEResponse
+
+    sub_id = event_bus.subscribe(
+        event_types=event_types,
+        handler=on_event,
+    )
+
+    sse = SSEResponse()
+    await sse.prepare(request)
+    logger.info(
+        "SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types)
+    )
+
+    # Replay buffered events that were published before this SSE connected.
+    # The EventBus keeps a history ring-buffer; we replay the subset that
+    # produces visible chat messages so the frontend never misses early
+    # queen output.  Lifecycle events are NOT replayed to avoid duplicate
+    # state transitions (turn counter increments, etc.).
+    _REPLAY_TYPES = {
+        EventType.CLIENT_OUTPUT_DELTA.value,
+        EventType.EXECUTION_STARTED.value,
+        EventType.CLIENT_INPUT_REQUESTED.value,
+    }
+    event_type_values = {et.value for et in event_types}
+    replay_types = _REPLAY_TYPES & event_type_values
+    replayed = 0
+    for past_event in event_bus._event_history:
+        if past_event.type.value in replay_types:
+            try:
+                queue.put_nowait(past_event.to_dict())
+                replayed += 1
+            except asyncio.QueueFull:
+                break
+    if replayed:
+        logger.info("SSE replayed %d buffered events for session='%s'", replayed, session.id)
+
+    event_count = 0
+    close_reason = "unknown"
+    try:
+        while not client_disconnected.is_set():
+            try:
+                data = await asyncio.wait_for(queue.get(), timeout=KEEPALIVE_INTERVAL)
+                await sse.send_event(data)
+                event_count += 1
+                if event_count == 1:
+                    logger.info(
+                        "SSE first event: session='%s', type='%s'", session.id, data.get("type")
+                    )
+            except TimeoutError:
+                await sse.send_keepalive()
+            except (ConnectionResetError, ConnectionError):
+                close_reason = "client_disconnected"
+                break
+            except Exception as exc:
+                close_reason = f"error: {exc}"
+                break
+
+        if client_disconnected.is_set() and close_reason == "unknown":
+            close_reason = "slow_client"
+    except asyncio.CancelledError:
+        close_reason = "cancelled"
+    finally:
+        try:
+            event_bus.unsubscribe(sub_id)
+        except Exception:
+            pass
+        logger.info(
+            "SSE disconnected: session='%s', events_sent=%d, reason='%s'",
+            session.id,
+            event_count,
+            close_reason,
+        )
+
+    return sse.response
+
+
+def register_routes(app: web.Application) -> None:
+    """Register SSE event streaming routes."""
+    # Session-primary route
+    app.router.add_get("/api/sessions/{session_id}/events", handle_events)
@@ -0,0 +1,424 @@
+"""Execution control routes — trigger, inject, chat, resume, stop, replay."""
+
+import asyncio
+import json
+import logging
+
+from aiohttp import web
+
+from framework.credentials.validation import validate_agent_credentials
+from framework.server.app import resolve_session, safe_path_segment, sessions_dir
+from framework.server.routes_sessions import _credential_error_response
+
+logger = logging.getLogger(__name__)
+
+
+async def handle_trigger(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/trigger — start an execution.
+
+    Body: {"entry_point_id": "default", "input_data": {...}, "session_state": {...}?}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    # Validate credentials before running — deferred from load time to avoid
+    # showing the modal before the user clicks Run.  Runs in executor because
+    # validate_agent_credentials makes blocking HTTP health-check calls.
+    if session.runner:
+        loop = asyncio.get_running_loop()
+        try:
+            await loop.run_in_executor(
+                None, lambda: validate_agent_credentials(session.runner.graph.nodes)
+            )
+        except Exception as e:
+            agent_path = str(session.worker_path) if session.worker_path else ""
+            resp = _credential_error_response(e, agent_path)
+            if resp is not None:
+                return resp
+
+        # Resync MCP servers if credentials were added since the worker loaded
+        # (e.g. user connected an OAuth account mid-session via Aden UI).
+        try:
+            await loop.run_in_executor(
+                None, lambda: session.runner._tool_registry.resync_mcp_servers_if_needed()
+            )
+        except Exception as e:
+            logger.warning("MCP resync failed: %s", e)
+
+    body = await request.json()
+    entry_point_id = body.get("entry_point_id", "default")
+    input_data = body.get("input_data", {})
+    session_state = body.get("session_state") or {}
+
+    # Scope the worker execution to the live session ID
+    if "resume_session_id" not in session_state:
+        session_state["resume_session_id"] = session.id
+
+    execution_id = await session.worker_runtime.trigger(
+        entry_point_id,
+        input_data,
+        session_state=session_state,
+    )
+
+    # Cancel queen's in-progress LLM turn so it picks up the mode change cleanly
+    if session.queen_executor:
+        node = session.queen_executor.node_registry.get("queen")
+        if node and hasattr(node, "cancel_current_turn"):
+            node.cancel_current_turn()
+
+    # Switch queen to running mode (mirrors run_agent_with_input tool behavior)
+    if session.mode_state is not None:
+        await session.mode_state.switch_to_running(source="frontend")
+
+    return web.json_response({"execution_id": execution_id})
+
+
+async def handle_inject(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/inject — inject input into a waiting node.
+
+    Body: {"node_id": "...", "content": "...", "graph_id": "..."}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    body = await request.json()
+    node_id = body.get("node_id")
+    content = body.get("content", "")
+    graph_id = body.get("graph_id")
+
+    if not node_id:
+        return web.json_response({"error": "node_id is required"}, status=400)
+
+    delivered = await session.worker_runtime.inject_input(node_id, content, graph_id=graph_id)
+    return web.json_response({"delivered": delivered})
+
+
+async def handle_chat(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/chat — send a message to the queen.
+
+    The input box is permanently connected to the queen agent.
+    Worker input is handled separately via /worker-input.
+
+    Body: {"message": "hello"}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    body = await request.json()
+    message = body.get("message", "")
+
+    if not message:
+        return web.json_response({"error": "message is required"}, status=400)
+
+    queen_executor = session.queen_executor
+    if queen_executor is not None:
+        node = queen_executor.node_registry.get("queen")
+        if node is not None and hasattr(node, "inject_event"):
+            await node.inject_event(message, is_client_input=True)
+            return web.json_response(
+                {
+                    "status": "queen",
+                    "delivered": True,
+                }
+            )
+
+    return web.json_response({"error": "Queen not available"}, status=503)
+
+
+async def handle_queen_context(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/queen-context — queue context for the queen.
+
+    Unlike /chat, this does NOT trigger an LLM response. The message is
+    queued in the queen's injection queue and will be drained on her next
+    natural iteration (prefixed with [External event]:).
+
+    Body: {"message": "..."}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    body = await request.json()
+    message = body.get("message", "")
+
+    if not message:
+        return web.json_response({"error": "message is required"}, status=400)
+
+    queen_executor = session.queen_executor
+    if queen_executor is not None:
+        node = queen_executor.node_registry.get("queen")
+        if node is not None and hasattr(node, "inject_event"):
+            await node.inject_event(message, is_client_input=False)
+            return web.json_response({"status": "queued", "delivered": True})
+
+    return web.json_response({"error": "Queen not available"}, status=503)
+
+
+async def handle_worker_input(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/worker-input — send input to waiting worker node.
+
+    Auto-discovers the worker node currently awaiting input and injects the message.
+    Returns 404 if no worker node is awaiting input.
+
+    Body: {"message": "..."}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    body = await request.json()
+    message = body.get("message", "")
+
+    if not message:
+        return web.json_response({"error": "message is required"}, status=400)
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded"}, status=503)
+
+    node_id, graph_id = session.worker_runtime.find_awaiting_node()
+    if not node_id:
+        return web.json_response({"error": "No worker node awaiting input"}, status=404)
+
+    delivered = await session.worker_runtime.inject_input(
+        node_id,
+        message,
+        graph_id=graph_id,
+        is_client_input=True,
+    )
+    return web.json_response(
+        {
+            "status": "injected",
+            "node_id": node_id,
+            "delivered": delivered,
+        }
+    )
+
+
+async def handle_goal_progress(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/goal-progress — evaluate goal progress."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    progress = await session.worker_runtime.get_goal_progress()
+    return web.json_response(progress, dumps=lambda obj: json.dumps(obj, default=str))
+
+
+async def handle_resume(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/resume — resume a paused execution.
+
+    Body: {"session_id": "...", "checkpoint_id": "..." (optional)}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    body = await request.json()
+    worker_session_id = body.get("session_id")
+    checkpoint_id = body.get("checkpoint_id")
+
+    if not worker_session_id:
+        return web.json_response({"error": "session_id is required"}, status=400)
+
+    worker_session_id = safe_path_segment(worker_session_id)
+    if checkpoint_id:
+        checkpoint_id = safe_path_segment(checkpoint_id)
+
+    # Read session state
+    session_dir = sessions_dir(session) / worker_session_id
+    state_path = session_dir / "state.json"
+    if not state_path.exists():
+        return web.json_response({"error": "Session not found"}, status=404)
+
+    try:
+        state = json.loads(state_path.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError) as e:
+        return web.json_response({"error": f"Failed to read session: {e}"}, status=500)
+
+    if checkpoint_id:
+        resume_session_state = {
+            "resume_session_id": worker_session_id,
+            "resume_from_checkpoint": checkpoint_id,
+        }
+    else:
+        progress = state.get("progress", {})
+        paused_at = progress.get("paused_at") or progress.get("resume_from")
+        resume_session_state = {
+            "resume_session_id": worker_session_id,
+            "memory": state.get("memory", {}),
+            "execution_path": progress.get("path", []),
+            "node_visit_counts": progress.get("node_visit_counts", {}),
+        }
+        if paused_at:
+            resume_session_state["paused_at"] = paused_at
+
+    entry_points = session.worker_runtime.get_entry_points()
+    if not entry_points:
+        return web.json_response({"error": "No entry points available"}, status=400)
+
+    input_data = state.get("input_data", {})
+
+    execution_id = await session.worker_runtime.trigger(
+        entry_points[0].id,
+        input_data=input_data,
+        session_state=resume_session_state,
+    )
+
+    return web.json_response(
+        {
+            "execution_id": execution_id,
+            "resumed_from": worker_session_id,
+            "checkpoint_id": checkpoint_id,
+        }
+    )
+
+
+async def handle_stop(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/stop — cancel a running execution.
+
+    Body: {"execution_id": "..."}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    body = await request.json()
+    execution_id = body.get("execution_id")
+
+    if not execution_id:
+        return web.json_response({"error": "execution_id is required"}, status=400)
+
+    for graph_id in session.worker_runtime.list_graphs():
+        reg = session.worker_runtime.get_graph_registration(graph_id)
+        if reg is None:
+            continue
+        for _ep_id, stream in reg.streams.items():
+            # Signal shutdown on active nodes to abort in-flight LLM streams
+            for executor in stream._active_executors.values():
+                for node in executor.node_registry.values():
+                    if hasattr(node, "signal_shutdown"):
+                        node.signal_shutdown()
+                    if hasattr(node, "cancel_current_turn"):
+                        node.cancel_current_turn()
+
+            cancelled = await stream.cancel_execution(execution_id)
+            if cancelled:
+                # Cancel queen's in-progress LLM turn
+                if session.queen_executor:
+                    node = session.queen_executor.node_registry.get("queen")
+                    if node and hasattr(node, "cancel_current_turn"):
+                        node.cancel_current_turn()
+
+                # Switch to staging (agent still loaded, ready to re-run)
+                if session.mode_state is not None:
+                    await session.mode_state.switch_to_staging(source="frontend")
+
+                return web.json_response(
+                    {
+                        "stopped": True,
+                        "execution_id": execution_id,
+                    }
+                )
+
+    return web.json_response({"stopped": False, "error": "Execution not found"}, status=404)
+
+
+async def handle_replay(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/replay — re-run from a checkpoint.
+
+    Body: {"session_id": "...", "checkpoint_id": "..."}
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    body = await request.json()
+    worker_session_id = body.get("session_id")
+    checkpoint_id = body.get("checkpoint_id")
+
+    if not worker_session_id:
+        return web.json_response({"error": "session_id is required"}, status=400)
+    if not checkpoint_id:
+        return web.json_response({"error": "checkpoint_id is required"}, status=400)
+
+    worker_session_id = safe_path_segment(worker_session_id)
+    checkpoint_id = safe_path_segment(checkpoint_id)
+
+    cp_path = sessions_dir(session) / worker_session_id / "checkpoints" / f"{checkpoint_id}.json"
+    if not cp_path.exists():
+        return web.json_response({"error": "Checkpoint not found"}, status=404)
+
+    entry_points = session.worker_runtime.get_entry_points()
+    if not entry_points:
+        return web.json_response({"error": "No entry points available"}, status=400)
+
+    replay_session_state = {
+        "resume_session_id": worker_session_id,
+        "resume_from_checkpoint": checkpoint_id,
+    }
+
+    execution_id = await session.worker_runtime.trigger(
+        entry_points[0].id,
+        input_data={},
+        session_state=replay_session_state,
+    )
+
+    return web.json_response(
+        {
+            "execution_id": execution_id,
+            "replayed_from": worker_session_id,
+            "checkpoint_id": checkpoint_id,
+        }
+    )
+
+
+async def handle_cancel_queen(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/cancel-queen — cancel the queen's current LLM turn."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+    queen_executor = session.queen_executor
+    if queen_executor is None:
+        return web.json_response({"cancelled": False, "error": "Queen not active"}, status=404)
+    node = queen_executor.node_registry.get("queen")
+    if node is None or not hasattr(node, "cancel_current_turn"):
+        return web.json_response({"cancelled": False, "error": "Queen node not found"}, status=404)
+    node.cancel_current_turn()
+    return web.json_response({"cancelled": True})
+
+
+def register_routes(app: web.Application) -> None:
+    """Register execution control routes."""
+    # Session-primary routes
+    app.router.add_post("/api/sessions/{session_id}/trigger", handle_trigger)
+    app.router.add_post("/api/sessions/{session_id}/inject", handle_inject)
+    app.router.add_post("/api/sessions/{session_id}/chat", handle_chat)
+    app.router.add_post("/api/sessions/{session_id}/queen-context", handle_queen_context)
+    app.router.add_post("/api/sessions/{session_id}/worker-input", handle_worker_input)
+    app.router.add_post("/api/sessions/{session_id}/pause", handle_stop)
+    app.router.add_post("/api/sessions/{session_id}/resume", handle_resume)
+    app.router.add_post("/api/sessions/{session_id}/stop", handle_stop)
+    app.router.add_post("/api/sessions/{session_id}/cancel-queen", handle_cancel_queen)
+    app.router.add_post("/api/sessions/{session_id}/replay", handle_replay)
+    app.router.add_get("/api/sessions/{session_id}/goal-progress", handle_goal_progress)
@@ -0,0 +1,251 @@
+"""Graph and node inspection routes — node list, node detail, node criteria."""
+
+import json
+import logging
+
+from aiohttp import web
+
+from framework.server.app import resolve_session, safe_path_segment
+
+logger = logging.getLogger(__name__)
+
+
+def _get_graph_registration(session, graph_id: str):
+    """Get _GraphRegistration for a graph_id. Returns (reg, None) or (None, error_response)."""
+    if not session.worker_runtime:
+        return None, web.json_response({"error": "No worker loaded in this session"}, status=503)
+    reg = session.worker_runtime.get_graph_registration(graph_id)
+    if reg is None:
+        return None, web.json_response({"error": f"Graph '{graph_id}' not found"}, status=404)
+    return reg, None
+
+
+def _get_graph_spec(session, graph_id: str):
+    """Get GraphSpec for a graph_id. Returns (graph_spec, None) or (None, error_response)."""
+    reg, err = _get_graph_registration(session, graph_id)
+    if err:
+        return None, err
+    return reg.graph, None
+
+
+def _node_to_dict(node) -> dict:
+    """Serialize a NodeSpec to a JSON-friendly dict."""
+    return {
+        "id": node.id,
+        "name": node.name,
+        "description": node.description,
+        "node_type": node.node_type,
+        "input_keys": node.input_keys,
+        "output_keys": node.output_keys,
+        "nullable_output_keys": node.nullable_output_keys,
+        "tools": node.tools,
+        "routes": node.routes,
+        "max_retries": node.max_retries,
+        "max_node_visits": node.max_node_visits,
+        "client_facing": node.client_facing,
+        "success_criteria": node.success_criteria,
+        "system_prompt": node.system_prompt or "",
+        "sub_agents": node.sub_agents,
+    }
+
+
+async def handle_list_nodes(request: web.Request) -> web.Response:
+    """List nodes in a graph."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    graph_id = request.match_info["graph_id"]
+    reg, err = _get_graph_registration(session, graph_id)
+    if err:
+        return err
+
+    graph = reg.graph
+    nodes = [_node_to_dict(n) for n in graph.nodes]
+
+    # Optionally enrich with session progress
+    worker_session_id = request.query.get("session_id")
+    if worker_session_id and session.worker_path:
+        worker_session_id = safe_path_segment(worker_session_id)
+        from pathlib import Path
+
+        state_path = (
+            Path.home()
+            / ".hive"
+            / "agents"
+            / session.worker_path.name
+            / "sessions"
+            / worker_session_id
+            / "state.json"
+        )
+        if state_path.exists():
+            try:
+                state = json.loads(state_path.read_text(encoding="utf-8"))
+                progress = state.get("progress", {})
+                visit_counts = progress.get("node_visit_counts", {})
+                failures = progress.get("nodes_with_failures", [])
+                current = progress.get("current_node")
+                path = progress.get("path", [])
+
+                for node in nodes:
+                    nid = node["id"]
+                    node["visit_count"] = visit_counts.get(nid, 0)
+                    node["has_failures"] = nid in failures
+                    node["is_current"] = nid == current
+                    node["in_path"] = nid in path
+            except (json.JSONDecodeError, OSError):
+                pass
+
+    edges = [
+        {"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority}
+        for e in graph.edges
+    ]
+    rt = session.worker_runtime
+    entry_points = [
+        {
+            "id": ep.id,
+            "name": ep.name,
+            "entry_node": ep.entry_node,
+            "trigger_type": ep.trigger_type,
+            "trigger_config": ep.trigger_config,
+            **(
+                {"next_fire_in": nf}
+                if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
+                else {}
+            ),
+        }
+        for ep in reg.entry_points.values()
+    ]
+    return web.json_response(
+        {
+            "nodes": nodes,
+            "edges": edges,
+            "entry_node": graph.entry_node,
+            "entry_points": entry_points,
+        }
+    )
+
+
+async def handle_get_node(request: web.Request) -> web.Response:
+    """Get node detail."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    graph_id = request.match_info["graph_id"]
+    node_id = request.match_info["node_id"]
+
+    graph, err = _get_graph_spec(session, graph_id)
+    if err:
+        return err
+
+    node_spec = graph.get_node(node_id)
+    if node_spec is None:
+        return web.json_response({"error": f"Node '{node_id}' not found"}, status=404)
+
+    data = _node_to_dict(node_spec)
+    edges = [
+        {"target": e.target, "condition": e.condition, "priority": e.priority}
+        for e in graph.edges
+        if e.source == node_id
+    ]
+    data["edges"] = edges
+
+    return web.json_response(data)
+
+
+async def handle_node_criteria(request: web.Request) -> web.Response:
+    """Get node success criteria and last execution info."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    graph_id = request.match_info["graph_id"]
+    node_id = request.match_info["node_id"]
+
+    graph, err = _get_graph_spec(session, graph_id)
+    if err:
+        return err
+
+    node_spec = graph.get_node(node_id)
+    if node_spec is None:
+        return web.json_response({"error": f"Node '{node_id}' not found"}, status=404)
+
+    result: dict = {
+        "node_id": node_id,
+        "success_criteria": node_spec.success_criteria,
+        "output_keys": node_spec.output_keys,
+    }
+
+    worker_session_id = request.query.get("session_id")
+    if worker_session_id and session.worker_runtime:
+        log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
+        if log_store:
+            details = await log_store.load_details(worker_session_id)
+            if details:
+                node_details = [n for n in details.nodes if n.node_id == node_id]
+                if node_details:
+                    latest = node_details[-1]
+                    result["last_execution"] = {
+                        "success": latest.success,
+                        "error": latest.error,
+                        "retry_count": latest.retry_count,
+                        "needs_attention": latest.needs_attention,
+                        "attention_reasons": latest.attention_reasons,
+                    }
+
+    return web.json_response(result, dumps=lambda obj: json.dumps(obj, default=str))
+
+
+async def handle_node_tools(request: web.Request) -> web.Response:
+    """Get tools available to a node."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    graph_id = request.match_info["graph_id"]
+    node_id = request.match_info["node_id"]
+
+    graph, err = _get_graph_spec(session, graph_id)
+    if err:
+        return err
+
+    node_spec = graph.get_node(node_id)
+    if node_spec is None:
+        return web.json_response({"error": f"Node '{node_id}' not found"}, status=404)
+
+    tools_out = []
+    registry = getattr(session.runner, "_tool_registry", None) if session.runner else None
+    all_tools = registry.get_tools() if registry else {}
+
+    for name in node_spec.tools:
+        tool = all_tools.get(name)
+        if tool:
+            tools_out.append(
+                {
+                    "name": tool.name,
+                    "description": tool.description,
+                    "parameters": tool.parameters,
+                }
+            )
+        else:
+            tools_out.append({"name": name, "description": "", "parameters": {}})
+
+    return web.json_response({"tools": tools_out})
+
+
+def register_routes(app: web.Application) -> None:
+    """Register graph/node inspection routes."""
+    # Session-primary routes
+    app.router.add_get("/api/sessions/{session_id}/graphs/{graph_id}/nodes", handle_list_nodes)
+    app.router.add_get(
+        "/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}", handle_get_node
+    )
+    app.router.add_get(
+        "/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/criteria",
+        handle_node_criteria,
+    )
+    app.router.add_get(
+        "/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/tools",
+        handle_node_tools,
+    )
@@ -0,0 +1,114 @@
+"""Log and observability routes — agent logs, node-scoped logs."""
+
+import json
+import logging
+
+from aiohttp import web
+
+from framework.server.app import resolve_session
+
+logger = logging.getLogger(__name__)
+
+
+async def handle_logs(request: web.Request) -> web.Response:
+    """Session-level logs.
+
+    Query params:
+        session_id: Scope to a specific worker session (optional).
+        level: "summary" | "details" | "tools" (default: "summary").
+        limit: Max results when listing summaries (default: 20).
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
+    if log_store is None:
+        return web.json_response({"error": "Logging not enabled for this agent"}, status=404)
+
+    worker_session_id = request.query.get("session_id")
+    level = request.query.get("level", "summary")
+    try:
+        limit = min(int(request.query.get("limit", "20")), 1000)
+    except (ValueError, TypeError):
+        limit = 20
+
+    if not worker_session_id:
+        summaries = await log_store.list_runs(limit=limit)
+        return web.json_response(
+            {"logs": [s.model_dump() for s in summaries]},
+            dumps=lambda obj: json.dumps(obj, default=str),
+        )
+
+    if level == "details":
+        details = await log_store.load_details(worker_session_id)
+        if details is None:
+            return web.json_response({"error": "No detail logs found"}, status=404)
+        return web.json_response(
+            {"session_id": worker_session_id, "nodes": [n.model_dump() for n in details.nodes]},
+            dumps=lambda obj: json.dumps(obj, default=str),
+        )
+    elif level == "tools":
+        tool_logs = await log_store.load_tool_logs(worker_session_id)
+        if tool_logs is None:
+            return web.json_response({"error": "No tool logs found"}, status=404)
+        return web.json_response(
+            {"session_id": worker_session_id, "steps": [s.model_dump() for s in tool_logs.steps]},
+            dumps=lambda obj: json.dumps(obj, default=str),
+        )
+    else:
+        summary = await log_store.load_summary(worker_session_id)
+        if summary is None:
+            return web.json_response({"error": "No summary log found"}, status=404)
+        return web.json_response(
+            summary.model_dump(),
+            dumps=lambda obj: json.dumps(obj, default=str),
+        )
+
+
+async def handle_node_logs(request: web.Request) -> web.Response:
+    """Node-scoped logs."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    node_id = request.match_info["node_id"]
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
+    if log_store is None:
+        return web.json_response({"error": "Logging not enabled"}, status=404)
+
+    worker_session_id = request.query.get("session_id")
+    if not worker_session_id:
+        return web.json_response({"error": "session_id query param is required"}, status=400)
+
+    level = request.query.get("level", "all")
+    result: dict = {"session_id": worker_session_id, "node_id": node_id}
+
+    if level in ("details", "all"):
+        details = await log_store.load_details(worker_session_id)
+        if details:
+            result["details"] = [n.model_dump() for n in details.nodes if n.node_id == node_id]
+
+    if level in ("tools", "all"):
+        tool_logs = await log_store.load_tool_logs(worker_session_id)
+        if tool_logs:
+            result["tool_logs"] = [s.model_dump() for s in tool_logs.steps if s.node_id == node_id]
+
+    return web.json_response(result, dumps=lambda obj: json.dumps(obj, default=str))
+
+
+def register_routes(app: web.Application) -> None:
+    """Register log routes."""
+    # Session-primary routes
+    app.router.add_get("/api/sessions/{session_id}/logs", handle_logs)
+    app.router.add_get(
+        "/api/sessions/{session_id}/graphs/{graph_id}/nodes/{node_id}/logs",
+        handle_node_logs,
+    )
@@ -0,0 +1,738 @@
+"""Session lifecycle, info, and worker-session browsing routes.
+
+Session-primary routes:
+- POST   /api/sessions                               — create session (with or without worker)
+- GET    /api/sessions                               — list all active sessions
+- GET    /api/sessions/{session_id}                  — session detail
+- DELETE /api/sessions/{session_id}                  — stop session entirely
+- POST   /api/sessions/{session_id}/worker           — load a worker into session
+- DELETE /api/sessions/{session_id}/worker           — unload worker from session
+- GET    /api/sessions/{session_id}/stats            — runtime statistics
+- GET    /api/sessions/{session_id}/entry-points     — list entry points
+- GET    /api/sessions/{session_id}/graphs           — list graph IDs
+- GET    /api/sessions/{session_id}/queen-messages   — queen conversation history
+
+Worker session browsing (persisted execution runs on disk):
+- GET    /api/sessions/{session_id}/worker-sessions                             — list
+- GET    /api/sessions/{session_id}/worker-sessions/{ws_id}                     — detail
+- DELETE /api/sessions/{session_id}/worker-sessions/{ws_id}                     — delete
+- GET    /api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints         — list CPs
+- POST   /api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{cp}/restore
+- GET    /api/sessions/{session_id}/worker-sessions/{ws_id}/messages            — messages
+
+"""
+
+import json
+import logging
+import shutil
+import time
+from pathlib import Path
+
+from aiohttp import web
+
+from framework.server.app import (
+    resolve_session,
+    safe_path_segment,
+    sessions_dir,
+    validate_agent_path,
+)
+from framework.server.session_manager import SessionManager
+
+logger = logging.getLogger(__name__)
+
+
+def _get_manager(request: web.Request) -> SessionManager:
+    return request.app["manager"]
+
+
+def _session_to_live_dict(session) -> dict:
+    """Serialize a live Session to the session-primary JSON shape."""
+    info = session.worker_info
+    mode_state = getattr(session, "mode_state", None)
+    return {
+        "session_id": session.id,
+        "worker_id": session.worker_id,
+        "worker_name": info.name if info else session.worker_id,
+        "has_worker": session.worker_runtime is not None,
+        "agent_path": str(session.worker_path) if session.worker_path else "",
+        "description": info.description if info else "",
+        "goal": info.goal_name if info else "",
+        "node_count": info.node_count if info else 0,
+        "loaded_at": session.loaded_at,
+        "uptime_seconds": round(time.time() - session.loaded_at, 1),
+        "intro_message": getattr(session.runner, "intro_message", "") or "",
+        "queen_mode": mode_state.mode if mode_state else "building",
+    }
+
+
+def _credential_error_response(exc: Exception, agent_path: str | None) -> web.Response | None:
+    """If *exc* is a CredentialError, return a 424 with structured credential info.
+
+    Returns None if *exc* is not a credential error (caller should handle it).
+    Uses the CredentialValidationResult attached by validate_agent_credentials.
+    """
+    from framework.credentials.models import CredentialError
+
+    if not isinstance(exc, CredentialError):
+        return None
+
+    from framework.server.routes_credentials import _status_to_dict
+
+    # Prefer the structured validation result attached to the exception
+    validation_result = getattr(exc, "validation_result", None)
+    if validation_result is not None:
+        required = [_status_to_dict(c) for c in validation_result.failed]
+    else:
+        # Fallback for exceptions without a validation result
+        required = []
+
+    return web.json_response(
+        {
+            "error": "credentials_required",
+            "message": str(exc),
+            "agent_path": agent_path or "",
+            "required": required,
+        },
+        status=424,
+    )
+
+
+# ------------------------------------------------------------------
+# Session lifecycle
+# ------------------------------------------------------------------
+
+
+async def handle_create_session(request: web.Request) -> web.Response:
+    """POST /api/sessions — create a session.
+
+    Body: {
+        "agent_path": "..." (optional — if provided, creates session with worker),
+        "agent_id": "..." (optional — worker ID override),
+        "session_id": "..." (optional — custom session ID),
+        "model": "..." (optional),
+        "initial_prompt": "..." (optional — first user message for the queen),
+    }
+
+    When agent_path is provided, creates a session with a worker in one step
+    (equivalent to the old POST /api/agents). Otherwise creates a queen-only
+    session that can later have a worker loaded via POST /sessions/{id}/worker.
+    """
+    manager = _get_manager(request)
+    body = await request.json() if request.can_read_body else {}
+    agent_path = body.get("agent_path")
+    agent_id = body.get("agent_id")
+    session_id = body.get("session_id")
+    model = body.get("model")
+    initial_prompt = body.get("initial_prompt")
+
+    if agent_path:
+        try:
+            agent_path = str(validate_agent_path(agent_path))
+        except ValueError as e:
+            return web.json_response({"error": str(e)}, status=400)
+
+    try:
+        if agent_path:
+            # One-step: create session + load worker
+            session = await manager.create_session_with_worker(
+                agent_path,
+                agent_id=agent_id,
+                model=model,
+                initial_prompt=initial_prompt,
+            )
+        else:
+            # Queen-only session
+            session = await manager.create_session(
+                session_id=session_id,
+                model=model,
+                initial_prompt=initial_prompt,
+            )
+    except ValueError as e:
+        msg = str(e)
+        if "currently loading" in msg:
+            resolved_id = agent_id or (Path(agent_path).name if agent_path else "")
+            return web.json_response(
+                {"error": msg, "worker_id": resolved_id, "loading": True},
+                status=409,
+            )
+        return web.json_response({"error": msg}, status=409)
+    except FileNotFoundError:
+        return web.json_response(
+            {"error": f"Agent not found: {agent_path or 'no path'}"},
+            status=404,
+        )
+    except Exception as e:
+        resp = _credential_error_response(e, agent_path)
+        if resp is not None:
+            return resp
+        logger.exception("Error creating session: %s", e)
+        return web.json_response({"error": "Internal server error"}, status=500)
+
+    return web.json_response(_session_to_live_dict(session), status=201)
+
+
+async def handle_list_live_sessions(request: web.Request) -> web.Response:
+    """GET /api/sessions — list all active sessions."""
+    manager = _get_manager(request)
+    sessions = [_session_to_live_dict(s) for s in manager.list_sessions()]
+    return web.json_response({"sessions": sessions})
+
+
+async def handle_get_live_session(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id} — get session detail."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+    session = manager.get_session(session_id)
+
+    if session is None:
+        if manager.is_loading(session_id):
+            return web.json_response(
+                {"session_id": session_id, "loading": True},
+                status=202,
+            )
+        return web.json_response(
+            {"error": f"Session '{session_id}' not found"},
+            status=404,
+        )
+
+    data = _session_to_live_dict(session)
+
+    if session.worker_runtime:
+        rt = session.worker_runtime
+        data["entry_points"] = [
+            {
+                "id": ep.id,
+                "name": ep.name,
+                "entry_node": ep.entry_node,
+                "trigger_type": ep.trigger_type,
+                "trigger_config": ep.trigger_config,
+                **(
+                    {"next_fire_in": nf}
+                    if (nf := rt.get_timer_next_fire_in(ep.id)) is not None
+                    else {}
+                ),
+            }
+            for ep in rt.get_entry_points()
+        ]
+        data["graphs"] = session.worker_runtime.list_graphs()
+
+    return web.json_response(data)
+
+
+async def handle_stop_session(request: web.Request) -> web.Response:
+    """DELETE /api/sessions/{session_id} — stop a session entirely."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+
+    stopped = await manager.stop_session(session_id)
+    if not stopped:
+        return web.json_response(
+            {"error": f"Session '{session_id}' not found"},
+            status=404,
+        )
+
+    return web.json_response({"session_id": session_id, "stopped": True})
+
+
+# ------------------------------------------------------------------
+# Worker lifecycle
+# ------------------------------------------------------------------
+
+
+async def handle_load_worker(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/worker — load a worker into a session.
+
+    Body: {"agent_path": "...", "worker_id": "..." (optional), "model": "..." (optional)}
+    """
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+    body = await request.json()
+
+    agent_path = body.get("agent_path")
+    if not agent_path:
+        return web.json_response({"error": "agent_path is required"}, status=400)
+
+    try:
+        agent_path = str(validate_agent_path(agent_path))
+    except ValueError as e:
+        return web.json_response({"error": str(e)}, status=400)
+
+    worker_id = body.get("worker_id")
+    model = body.get("model")
+
+    try:
+        session = await manager.load_worker(
+            session_id,
+            agent_path,
+            worker_id=worker_id,
+            model=model,
+        )
+    except ValueError as e:
+        return web.json_response({"error": str(e)}, status=409)
+    except FileNotFoundError:
+        return web.json_response({"error": f"Agent not found: {agent_path}"}, status=404)
+    except Exception as e:
+        resp = _credential_error_response(e, agent_path)
+        if resp is not None:
+            return resp
+        logger.exception("Error loading worker: %s", e)
+        return web.json_response({"error": "Internal server error"}, status=500)
+
+    return web.json_response(_session_to_live_dict(session))
+
+
+async def handle_unload_worker(request: web.Request) -> web.Response:
+    """DELETE /api/sessions/{session_id}/worker — unload worker, keep queen alive."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+
+    removed = await manager.unload_worker(session_id)
+    if not removed:
+        session = manager.get_session(session_id)
+        if session is None:
+            return web.json_response(
+                {"error": f"Session '{session_id}' not found"},
+                status=404,
+            )
+        return web.json_response(
+            {"error": "No worker loaded in this session"},
+            status=409,
+        )
+
+    return web.json_response({"session_id": session_id, "worker_unloaded": True})
+
+
+# ------------------------------------------------------------------
+# Session info (worker details)
+# ------------------------------------------------------------------
+
+
+async def handle_session_stats(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/stats — runtime statistics."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+    session = manager.get_session(session_id)
+
+    if session is None:
+        return web.json_response(
+            {"error": f"Session '{session_id}' not found"},
+            status=404,
+        )
+
+    stats = session.worker_runtime.get_stats() if session.worker_runtime else {}
+    return web.json_response(stats)
+
+
+async def handle_session_entry_points(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/entry-points — list entry points."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+    session = manager.get_session(session_id)
+
+    if session is None:
+        return web.json_response(
+            {"error": f"Session '{session_id}' not found"},
+            status=404,
+        )
+
+    rt = session.worker_runtime
+    eps = rt.get_entry_points() if rt else []
+    return web.json_response(
+        {
+            "entry_points": [
+                {
+                    "id": ep.id,
+                    "name": ep.name,
+                    "entry_node": ep.entry_node,
+                    "trigger_type": ep.trigger_type,
+                    "trigger_config": ep.trigger_config,
+                    **(
+                        {"next_fire_in": nf}
+                        if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
+                        else {}
+                    ),
+                }
+                for ep in eps
+            ]
+        }
+    )
+
+
+async def handle_session_graphs(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/graphs — list loaded graphs."""
+    manager = _get_manager(request)
+    session_id = request.match_info["session_id"]
+    session = manager.get_session(session_id)
+
+    if session is None:
+        return web.json_response(
+            {"error": f"Session '{session_id}' not found"},
+            status=404,
+        )
+
+    graphs = session.worker_runtime.list_graphs() if session.worker_runtime else []
+    return web.json_response({"graphs": graphs})
+
+
+# ------------------------------------------------------------------
+# Worker session browsing (persisted execution runs on disk)
+# ------------------------------------------------------------------
+
+
+async def handle_list_worker_sessions(request: web.Request) -> web.Response:
+    """List worker sessions on disk."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_path:
+        return web.json_response({"sessions": []})
+
+    sess_dir = sessions_dir(session)
+    if not sess_dir.exists():
+        return web.json_response({"sessions": []})
+
+    sessions = []
+    for d in sorted(sess_dir.iterdir(), reverse=True):
+        if not d.is_dir() or not d.name.startswith("session_"):
+            continue
+
+        entry: dict = {"session_id": d.name}
+
+        state_path = d / "state.json"
+        if state_path.exists():
+            try:
+                state = json.loads(state_path.read_text(encoding="utf-8"))
+                entry["status"] = state.get("status", "unknown")
+                entry["started_at"] = state.get("started_at")
+                entry["completed_at"] = state.get("completed_at")
+                progress = state.get("progress", {})
+                entry["steps"] = progress.get("steps_executed", 0)
+                entry["paused_at"] = progress.get("paused_at")
+            except (json.JSONDecodeError, OSError):
+                entry["status"] = "error"
+
+        cp_dir = d / "checkpoints"
+        if cp_dir.exists():
+            entry["checkpoint_count"] = sum(1 for f in cp_dir.iterdir() if f.suffix == ".json")
+        else:
+            entry["checkpoint_count"] = 0
+
+        sessions.append(entry)
+
+    return web.json_response({"sessions": sessions})
+
+
+async def handle_get_worker_session(request: web.Request) -> web.Response:
+    """Get worker session detail from disk."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_path:
+        return web.json_response({"error": "No worker loaded"}, status=503)
+
+    # Support both URL param names: ws_id (new) or session_id (legacy)
+    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
+    ws_id = safe_path_segment(ws_id)
+
+    state_path = sessions_dir(session) / ws_id / "state.json"
+    if not state_path.exists():
+        return web.json_response({"error": "Session not found"}, status=404)
+
+    try:
+        state = json.loads(state_path.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError) as e:
+        return web.json_response({"error": f"Failed to read session: {e}"}, status=500)
+
+    return web.json_response(state)
+
+
+async def handle_list_checkpoints(request: web.Request) -> web.Response:
+    """List checkpoints for a worker session."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_path:
+        return web.json_response({"error": "No worker loaded"}, status=503)
+
+    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
+    ws_id = safe_path_segment(ws_id)
+
+    cp_dir = sessions_dir(session) / ws_id / "checkpoints"
+    if not cp_dir.exists():
+        return web.json_response({"checkpoints": []})
+
+    checkpoints = []
+    for f in sorted(cp_dir.iterdir(), reverse=True):
+        if f.suffix != ".json":
+            continue
+        try:
+            data = json.loads(f.read_text(encoding="utf-8"))
+            checkpoints.append(
+                {
+                    "checkpoint_id": f.stem,
+                    "current_node": data.get("current_node"),
+                    "next_node": data.get("next_node"),
+                    "is_clean": data.get("is_clean", False),
+                    "timestamp": data.get("timestamp"),
+                }
+            )
+        except (json.JSONDecodeError, OSError):
+            checkpoints.append({"checkpoint_id": f.stem, "error": "unreadable"})
+
+    return web.json_response({"checkpoints": checkpoints})
+
+
+async def handle_delete_worker_session(request: web.Request) -> web.Response:
+    """Delete a worker session from disk."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_path:
+        return web.json_response({"error": "No worker loaded"}, status=503)
+
+    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
+    ws_id = safe_path_segment(ws_id)
+
+    session_path = sessions_dir(session) / ws_id
+    if not session_path.exists():
+        return web.json_response({"error": "Session not found"}, status=404)
+
+    shutil.rmtree(session_path)
+    return web.json_response({"deleted": ws_id})
+
+
+async def handle_restore_checkpoint(request: web.Request) -> web.Response:
+    """Restore from a checkpoint."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_runtime:
+        return web.json_response({"error": "No worker loaded in this session"}, status=503)
+
+    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
+    ws_id = safe_path_segment(ws_id)
+    checkpoint_id = safe_path_segment(request.match_info["checkpoint_id"])
+
+    cp_path = sessions_dir(session) / ws_id / "checkpoints" / f"{checkpoint_id}.json"
+    if not cp_path.exists():
+        return web.json_response({"error": "Checkpoint not found"}, status=404)
+
+    entry_points = session.worker_runtime.get_entry_points()
+    if not entry_points:
+        return web.json_response({"error": "No entry points available"}, status=400)
+
+    restore_session_state = {
+        "resume_session_id": ws_id,
+        "resume_from_checkpoint": checkpoint_id,
+    }
+
+    execution_id = await session.worker_runtime.trigger(
+        entry_points[0].id,
+        input_data={},
+        session_state=restore_session_state,
+    )
+
+    return web.json_response(
+        {
+            "execution_id": execution_id,
+            "restored_from": ws_id,
+            "checkpoint_id": checkpoint_id,
+        }
+    )
+
+
+async def handle_messages(request: web.Request) -> web.Response:
+    """Get messages for a worker session."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    if not session.worker_path:
+        return web.json_response({"error": "No worker loaded"}, status=503)
+
+    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
+    ws_id = safe_path_segment(ws_id)
+
+    convs_dir = sessions_dir(session) / ws_id / "conversations"
+    if not convs_dir.exists():
+        return web.json_response({"messages": []})
+
+    filter_node = request.query.get("node_id")
+    all_messages = []
+
+    for node_dir in convs_dir.iterdir():
+        if not node_dir.is_dir():
+            continue
+        if filter_node and node_dir.name != filter_node:
+            continue
+
+        parts_dir = node_dir / "parts"
+        if not parts_dir.exists():
+            continue
+
+        for part_file in sorted(parts_dir.iterdir()):
+            if part_file.suffix != ".json":
+                continue
+            try:
+                part = json.loads(part_file.read_text(encoding="utf-8"))
+                part["_node_id"] = node_dir.name
+                part.setdefault("created_at", part_file.stat().st_mtime)
+                all_messages.append(part)
+            except (json.JSONDecodeError, OSError):
+                continue
+
+    all_messages.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))
+
+    client_only = request.query.get("client_only", "").lower() in ("true", "1")
+    if client_only:
+        client_facing_nodes: set[str] = set()
+        if session.runner and hasattr(session.runner, "graph"):
+            for node in session.runner.graph.nodes:
+                if node.client_facing:
+                    client_facing_nodes.add(node.id)
+
+        if client_facing_nodes:
+            all_messages = [
+                m
+                for m in all_messages
+                if not m.get("is_transition_marker")
+                and m["role"] != "tool"
+                and not (m["role"] == "assistant" and m.get("tool_calls"))
+                and (
+                    (m["role"] == "user" and m.get("is_client_input"))
+                    or (m["role"] == "assistant" and m.get("_node_id") in client_facing_nodes)
+                )
+            ]
+
+    return web.json_response({"messages": all_messages})
+
+
+async def handle_queen_messages(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/queen-messages — get queen conversation."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    queen_dir = Path.home() / ".hive" / "queen" / "session" / session.id
+    convs_dir = queen_dir / "conversations"
+    if not convs_dir.exists():
+        return web.json_response({"messages": []})
+
+    all_messages: list[dict] = []
+    for node_dir in convs_dir.iterdir():
+        if not node_dir.is_dir():
+            continue
+        parts_dir = node_dir / "parts"
+        if not parts_dir.exists():
+            continue
+        for part_file in sorted(parts_dir.iterdir()):
+            if part_file.suffix != ".json":
+                continue
+            try:
+                part = json.loads(part_file.read_text(encoding="utf-8"))
+                part["_node_id"] = node_dir.name
+                # Use file mtime as created_at so frontend can order
+                # queen and worker messages chronologically.
+                part.setdefault("created_at", part_file.stat().st_mtime)
+                all_messages.append(part)
+            except (json.JSONDecodeError, OSError):
+                continue
+
+    all_messages.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))
+
+    # Filter to client-facing messages only
+    all_messages = [
+        m
+        for m in all_messages
+        if not m.get("is_transition_marker")
+        and m["role"] != "tool"
+        and not (m["role"] == "assistant" and m.get("tool_calls"))
+    ]
+
+    return web.json_response({"messages": all_messages})
+
+
+# ------------------------------------------------------------------
+# Agent discovery (not session-specific)
+# ------------------------------------------------------------------
+
+
+async def handle_discover(request: web.Request) -> web.Response:
+    """GET /api/discover — discover agents from filesystem."""
+    from framework.tui.screens.agent_picker import discover_agents
+
+    manager = _get_manager(request)
+    loaded_paths = {str(s.worker_path) for s in manager.list_sessions() if s.worker_path}
+
+    groups = discover_agents()
+    result = {}
+    for category, entries in groups.items():
+        result[category] = [
+            {
+                "path": str(entry.path),
+                "name": entry.name,
+                "description": entry.description,
+                "category": entry.category,
+                "session_count": entry.session_count,
+                "node_count": entry.node_count,
+                "tool_count": entry.tool_count,
+                "tags": entry.tags,
+                "last_active": entry.last_active,
+                "is_loaded": str(entry.path) in loaded_paths,
+            }
+            for entry in entries
+        ]
+    return web.json_response(result)
+
+
+# ------------------------------------------------------------------
+# Route registration
+# ------------------------------------------------------------------
+
+
+def register_routes(app: web.Application) -> None:
+    """Register session routes."""
+    # Discovery
+    app.router.add_get("/api/discover", handle_discover)
+
+    # Session lifecycle
+    app.router.add_post("/api/sessions", handle_create_session)
+    app.router.add_get("/api/sessions", handle_list_live_sessions)
+    app.router.add_get("/api/sessions/{session_id}", handle_get_live_session)
+    app.router.add_delete("/api/sessions/{session_id}", handle_stop_session)
+
+    # Worker lifecycle
+    app.router.add_post("/api/sessions/{session_id}/worker", handle_load_worker)
+    app.router.add_delete("/api/sessions/{session_id}/worker", handle_unload_worker)
+
+    # Session info
+    app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats)
+    app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points)
+    app.router.add_get("/api/sessions/{session_id}/graphs", handle_session_graphs)
+    app.router.add_get("/api/sessions/{session_id}/queen-messages", handle_queen_messages)
+
+    # Worker session browsing (session-primary)
+    app.router.add_get("/api/sessions/{session_id}/worker-sessions", handle_list_worker_sessions)
+    app.router.add_get(
+        "/api/sessions/{session_id}/worker-sessions/{ws_id}", handle_get_worker_session
+    )
+    app.router.add_delete(
+        "/api/sessions/{session_id}/worker-sessions/{ws_id}", handle_delete_worker_session
+    )
+    app.router.add_get(
+        "/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints",
+        handle_list_checkpoints,
+    )
+    app.router.add_post(
+        "/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{checkpoint_id}/restore",
+        handle_restore_checkpoint,
+    )
+    app.router.add_get(
+        "/api/sessions/{session_id}/worker-sessions/{ws_id}/messages",
+        handle_messages,
+    )
@@ -0,0 +1,782 @@
+"""Session-primary lifecycle manager for the HTTP API server.
+
+Sessions (queen) are the primary entity. Workers are optional and can be
+loaded/unloaded while the queen stays alive.
+
+Architecture:
+- Session owns EventBus + LLM, shared with queen and worker
+- Queen is always present once a session starts
+- Worker is optional — loaded into an existing session
+- Judge is active only when a worker is loaded
+"""
+
+import asyncio
+import json
+import logging
+import time
+import uuid
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Session:
+    """A live session with a queen and optional worker."""
+
+    id: str
+    event_bus: Any  # EventBus — owned by session
+    llm: Any  # LLMProvider — owned by session
+    loaded_at: float
+    # Queen (always present once started)
+    queen_executor: Any = None  # GraphExecutor for queen input injection
+    queen_task: asyncio.Task | None = None
+    # Worker (optional)
+    worker_id: str | None = None
+    worker_path: Path | None = None
+    runner: Any | None = None  # AgentRunner
+    worker_runtime: Any | None = None  # AgentRuntime
+    worker_info: Any | None = None  # AgentInfo
+    # Queen mode state (building/staging/running)
+    mode_state: Any = None  # QueenModeState
+    # Judge (active when worker is loaded)
+    judge_task: asyncio.Task | None = None
+    escalation_sub: str | None = None
+
+
+class SessionManager:
+    """Manages session lifecycles.
+
+    Thread-safe via asyncio.Lock. Workers are loaded via run_in_executor
+    (blocking I/O) then started on the event loop.
+    """
+
+    def __init__(self, model: str | None = None, credential_store=None) -> None:
+        self._sessions: dict[str, Session] = {}
+        self._loading: set[str] = set()
+        self._model = model
+        self._credential_store = credential_store
+        self._lock = asyncio.Lock()
+
+    # ------------------------------------------------------------------
+    # Session lifecycle
+    # ------------------------------------------------------------------
+
+    async def _create_session_core(
+        self,
+        session_id: str | None = None,
+        model: str | None = None,
+    ) -> Session:
+        """Create session infrastructure (EventBus, LLM) without starting queen.
+
+        Internal helper — use create_session() or create_session_with_worker().
+        """
+        from framework.config import RuntimeConfig
+        from framework.llm.litellm import LiteLLMProvider
+        from framework.runtime.event_bus import EventBus
+
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        resolved_id = session_id or f"session_{ts}_{uuid.uuid4().hex[:8]}"
+
+        async with self._lock:
+            if resolved_id in self._sessions:
+                raise ValueError(f"Session '{resolved_id}' already exists")
+
+        # Load LLM config from ~/.hive/configuration.json
+        rc = RuntimeConfig(model=model or self._model or RuntimeConfig().model)
+
+        # Session owns these — shared with queen and worker
+        llm = LiteLLMProvider(
+            model=rc.model,
+            api_key=rc.api_key,
+            api_base=rc.api_base,
+            **rc.extra_kwargs,
+        )
+        event_bus = EventBus()
+
+        session = Session(
+            id=resolved_id,
+            event_bus=event_bus,
+            llm=llm,
+            loaded_at=time.time(),
+        )
+
+        async with self._lock:
+            self._sessions[resolved_id] = session
+
+        return session
+
+    async def create_session(
+        self,
+        session_id: str | None = None,
+        model: str | None = None,
+        initial_prompt: str | None = None,
+    ) -> Session:
+        """Create a new session with a queen but no worker.
+
+        The queen starts immediately with MCP coding tools.
+        A worker can be loaded later via load_worker().
+        """
+        session = await self._create_session_core(session_id=session_id, model=model)
+
+        # Start queen immediately (queen-only, no worker tools yet)
+        await self._start_queen(session, worker_identity=None, initial_prompt=initial_prompt)
+
+        logger.info("Session '%s' created (queen-only)", session.id)
+        return session
+
+    async def create_session_with_worker(
+        self,
+        agent_path: str | Path,
+        agent_id: str | None = None,
+        model: str | None = None,
+        initial_prompt: str | None = None,
+    ) -> Session:
+        """Create a session and load a worker in one step.
+
+        Backward-compatible with the old POST /api/agents flow.
+        Loads the worker FIRST so the queen starts with full lifecycle
+        and monitoring tools available.
+
+        The session gets an auto-generated unique ID. The agent name
+        becomes the worker_id (used by the frontend as backendAgentId).
+        """
+        from framework.tools.queen_lifecycle_tools import build_worker_profile
+
+        agent_path = Path(agent_path)
+        resolved_worker_id = agent_id or agent_path.name
+
+        # Auto-generate session ID (not the agent name)
+        session = await self._create_session_core(model=model)
+        try:
+            # Load worker FIRST (before queen) so queen gets full tools
+            await self._load_worker_core(
+                session,
+                agent_path,
+                worker_id=resolved_worker_id,
+                model=model,
+            )
+
+            # Start queen with worker profile + lifecycle + monitoring tools
+            worker_identity = (
+                build_worker_profile(session.worker_runtime, agent_path=agent_path)
+                if session.worker_runtime
+                else None
+            )
+            await self._start_queen(
+                session, worker_identity=worker_identity, initial_prompt=initial_prompt
+            )
+
+            # Health judge disabled for simplicity.
+            # if agent_path.name != "hive_coder" and session.worker_runtime:
+            #     await self._start_judge(session, session.runner._storage_path)
+
+        except Exception:
+            # If anything fails, tear down the session
+            await self.stop_session(session.id)
+            raise
+        return session
+
+    # ------------------------------------------------------------------
+    # Worker lifecycle
+    # ------------------------------------------------------------------
+
+    async def _load_worker_core(
+        self,
+        session: Session,
+        agent_path: str | Path,
+        worker_id: str | None = None,
+        model: str | None = None,
+    ) -> None:
+        """Load a worker agent into a session (core logic).
+
+        Sets up the runner, runtime, and session fields. Does NOT start the
+        judge or notify the queen — callers handle those steps.
+        """
+        from framework.runner import AgentRunner
+
+        agent_path = Path(agent_path)
+        resolved_worker_id = worker_id or agent_path.name
+
+        if session.worker_runtime is not None:
+            raise ValueError(f"Session '{session.id}' already has worker '{session.worker_id}'")
+
+        async with self._lock:
+            if session.id in self._loading:
+                raise ValueError(f"Session '{session.id}' is currently loading a worker")
+            self._loading.add(session.id)
+
+        try:
+            # Blocking I/O — load in executor
+            loop = asyncio.get_running_loop()
+            resolved_model = model or self._model
+            runner = await loop.run_in_executor(
+                None,
+                lambda: AgentRunner.load(
+                    agent_path,
+                    model=resolved_model,
+                    interactive=False,
+                    skip_credential_validation=True,
+                    credential_store=self._credential_store,
+                ),
+            )
+
+            # Setup with session's event bus
+            if runner._agent_runtime is None:
+                await loop.run_in_executor(
+                    None,
+                    lambda: runner._setup(event_bus=session.event_bus),
+                )
+
+            runtime = runner._agent_runtime
+
+            # Start runtime on event loop
+            if runtime and not runtime.is_running:
+                await runtime.start()
+
+            # Clean up stale "active" sessions from previous (dead) processes
+            self._cleanup_stale_active_sessions(agent_path)
+
+            info = runner.info()
+
+            # Update session
+            session.worker_id = resolved_worker_id
+            session.worker_path = agent_path
+            session.runner = runner
+            session.worker_runtime = runtime
+            session.worker_info = info
+
+            async with self._lock:
+                self._loading.discard(session.id)
+
+            logger.info(
+                "Worker '%s' loaded into session '%s'",
+                resolved_worker_id,
+                session.id,
+            )
+
+        except Exception:
+            async with self._lock:
+                self._loading.discard(session.id)
+            raise
+
+    def _cleanup_stale_active_sessions(self, agent_path: Path) -> None:
+        """Mark stale 'active' sessions on disk as 'cancelled'.
+
+        When a new runtime starts, any on-disk session still marked 'active'
+        is from a process that no longer exists. 'Paused' sessions are left
+        intact so they remain resumable.
+        """
+        sessions_path = Path.home() / ".hive" / "agents" / agent_path.name / "sessions"
+        if not sessions_path.exists():
+            return
+
+        for d in sessions_path.iterdir():
+            if not d.is_dir() or not d.name.startswith("session_"):
+                continue
+            state_path = d / "state.json"
+            if not state_path.exists():
+                continue
+            try:
+                state = json.loads(state_path.read_text(encoding="utf-8"))
+                if state.get("status") != "active":
+                    continue
+                state["status"] = "cancelled"
+                state.setdefault("result", {})["error"] = "Stale session: runtime restarted"
+                state.setdefault("timestamps", {})["updated_at"] = datetime.now().isoformat()
+                state_path.write_text(json.dumps(state, indent=2), encoding="utf-8")
+                logger.info(
+                    "Marked stale session '%s' as cancelled for agent '%s'", d.name, agent_path.name
+                )
+            except (json.JSONDecodeError, OSError) as e:
+                logger.warning("Failed to clean up stale session %s: %s", d.name, e)
+
+    async def load_worker(
+        self,
+        session_id: str,
+        agent_path: str | Path,
+        worker_id: str | None = None,
+        model: str | None = None,
+    ) -> Session:
+        """Load a worker agent into an existing session (with running queen).
+
+        Starts the worker runtime, health judge, and notifies the queen.
+        """
+        agent_path = Path(agent_path)
+
+        session = self._sessions.get(session_id)
+        if session is None:
+            raise ValueError(f"Session '{session_id}' not found")
+
+        await self._load_worker_core(
+            session,
+            agent_path,
+            worker_id=worker_id,
+            model=model,
+        )
+
+        # Notify queen about the loaded worker (skip for hive_coder itself).
+        # Health judge disabled for simplicity.
+        if agent_path.name != "hive_coder" and session.worker_runtime:
+            # await self._start_judge(session, session.runner._storage_path)
+            await self._notify_queen_worker_loaded(session)
+
+        # Emit SSE event so the frontend can update UI
+        await self._emit_worker_loaded(session)
+
+        return session
+
+    async def unload_worker(self, session_id: str) -> bool:
+        """Unload the worker from a session. Queen stays alive."""
+        session = self._sessions.get(session_id)
+        if session is None:
+            return False
+        if session.worker_runtime is None:
+            return False
+
+        # Stop judge + escalation
+        self._stop_judge(session)
+
+        # Cleanup worker
+        if session.runner:
+            try:
+                await session.runner.cleanup_async()
+            except Exception as e:
+                logger.error("Error cleaning up worker '%s': %s", session.worker_id, e)
+
+        worker_id = session.worker_id
+        session.worker_id = None
+        session.worker_path = None
+        session.runner = None
+        session.worker_runtime = None
+        session.worker_info = None
+
+        # Notify queen
+        await self._notify_queen_worker_unloaded(session)
+
+        logger.info("Worker '%s' unloaded from session '%s'", worker_id, session_id)
+        return True
+
+    # ------------------------------------------------------------------
+    # Session teardown
+    # ------------------------------------------------------------------
+
+    async def stop_session(self, session_id: str) -> bool:
+        """Stop a session entirely — unload worker + cancel queen."""
+        async with self._lock:
+            session = self._sessions.pop(session_id, None)
+
+        if session is None:
+            return False
+
+        # Stop judge
+        self._stop_judge(session)
+
+        # Stop queen
+        if session.queen_task is not None:
+            session.queen_task.cancel()
+            session.queen_task = None
+        session.queen_executor = None
+
+        # Cleanup worker
+        if session.runner:
+            try:
+                await session.runner.cleanup_async()
+            except Exception as e:
+                logger.error("Error cleaning up worker: %s", e)
+
+        logger.info("Session '%s' stopped", session_id)
+        return True
+
+    # ------------------------------------------------------------------
+    # Queen startup
+    # ------------------------------------------------------------------
+
+    async def _start_queen(
+        self,
+        session: Session,
+        worker_identity: str | None,
+        initial_prompt: str | None = None,
+    ) -> None:
+        """Start the queen executor for a session."""
+        from framework.agents.hive_coder.agent import (
+            queen_goal,
+            queen_graph as _queen_graph,
+        )
+        from framework.graph.executor import GraphExecutor
+        from framework.runner.tool_registry import ToolRegistry
+        from framework.runtime.core import Runtime
+
+        hive_home = Path.home() / ".hive"
+        queen_dir = hive_home / "queen" / "session" / session.id
+        queen_dir.mkdir(parents=True, exist_ok=True)
+
+        # Register MCP coding tools
+        queen_registry = ToolRegistry()
+        import framework.agents.hive_coder as _hive_coder_pkg
+
+        hive_coder_dir = Path(_hive_coder_pkg.__file__).parent
+        mcp_config = hive_coder_dir / "mcp_servers.json"
+        if mcp_config.exists():
+            try:
+                queen_registry.load_mcp_config(mcp_config)
+                logger.info("Queen: loaded MCP tools from %s", mcp_config)
+            except Exception:
+                logger.warning("Queen: MCP config failed to load", exc_info=True)
+
+        # Mode state for building/running mode switching
+        from framework.tools.queen_lifecycle_tools import (
+            QueenModeState,
+            register_queen_lifecycle_tools,
+        )
+
+        # Start in staging when the caller provided an agent, building otherwise.
+        initial_mode = "staging" if worker_identity else "building"
+        mode_state = QueenModeState(mode=initial_mode, event_bus=session.event_bus)
+        session.mode_state = mode_state
+
+        # Always register lifecycle tools — they check session.worker_runtime
+        # at call time, so they work even if no worker is loaded yet.
+        register_queen_lifecycle_tools(
+            queen_registry,
+            session=session,
+            session_id=session.id,
+            session_manager=self,
+            manager_session_id=session.id,
+            mode_state=mode_state,
+        )
+
+        # Monitoring tools need concrete worker paths — only register when present
+        if session.worker_runtime:
+            from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
+
+            register_worker_monitoring_tools(
+                queen_registry,
+                session.event_bus,
+                session.worker_path,
+                stream_id="queen",
+                worker_graph_id=session.worker_runtime._graph_id,
+            )
+
+        queen_tools = list(queen_registry.get_tools().values())
+        queen_tool_executor = queen_registry.get_executor()
+
+        # Partition tools into mode-specific sets
+        from framework.agents.hive_coder.nodes import (
+            _QUEEN_BUILDING_TOOLS,
+            _QUEEN_RUNNING_TOOLS,
+            _QUEEN_STAGING_TOOLS,
+        )
+
+        building_names = set(_QUEEN_BUILDING_TOOLS)
+        staging_names = set(_QUEEN_STAGING_TOOLS)
+        running_names = set(_QUEEN_RUNNING_TOOLS)
+
+        registered_names = {t.name for t in queen_tools}
+        missing_building = building_names - registered_names
+        if missing_building:
+            logger.warning(
+                "Queen: %d/%d building tools NOT registered: %s",
+                len(missing_building),
+                len(building_names),
+                sorted(missing_building),
+            )
+        logger.info("Queen: registered tools: %s", sorted(registered_names))
+
+        mode_state.building_tools = [t for t in queen_tools if t.name in building_names]
+        mode_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
+        mode_state.running_tools = [t for t in queen_tools if t.name in running_names]
+
+        # Build queen graph with adjusted prompt + tools
+        _orig_node = _queen_graph.nodes[0]
+        base_prompt = _orig_node.system_prompt or ""
+
+        if worker_identity is None:
+            worker_identity = (
+                "\n\n# Worker Profile\n"
+                "No worker agent loaded. You are operating independently.\n"
+                "Handle all tasks directly using your coding tools."
+            )
+
+        registered_tool_names = set(queen_registry.get_tools().keys())
+        declared_tools = _orig_node.tools or []
+        available_tools = [t for t in declared_tools if t in registered_tool_names]
+
+        node_updates: dict = {
+            "system_prompt": base_prompt + worker_identity,
+        }
+        if set(available_tools) != set(declared_tools):
+            missing = sorted(set(declared_tools) - registered_tool_names)
+            if missing:
+                logger.warning("Queen: tools not available: %s", missing)
+            node_updates["tools"] = available_tools
+
+        adjusted_node = _orig_node.model_copy(update=node_updates)
+        queen_graph = _queen_graph.model_copy(update={"nodes": [adjusted_node]})
+
+        queen_runtime = Runtime(hive_home / "queen")
+
+        async def _queen_loop():
+            try:
+                executor = GraphExecutor(
+                    runtime=queen_runtime,
+                    llm=session.llm,
+                    tools=queen_tools,
+                    tool_executor=queen_tool_executor,
+                    event_bus=session.event_bus,
+                    stream_id="queen",
+                    storage_path=queen_dir,
+                    loop_config=queen_graph.loop_config,
+                    execution_id=session.id,
+                    dynamic_tools_provider=mode_state.get_current_tools,
+                )
+                session.queen_executor = executor
+
+                # Wire inject_notification so mode switches notify the queen LLM
+                async def _inject_mode_notification(content: str) -> None:
+                    node = executor.node_registry.get("queen")
+                    if node is not None and hasattr(node, "inject_event"):
+                        await node.inject_event(content)
+
+                mode_state.inject_notification = _inject_mode_notification
+
+                # Auto-switch to staging when worker execution finishes naturally
+                from framework.runtime.event_bus import EventType as _ET
+
+                async def _on_worker_done(event):
+                    if event.stream_id == "queen":
+                        return
+                    if mode_state.mode == "running":
+                        await mode_state.switch_to_staging(source="auto")
+
+                session.event_bus.subscribe(
+                    event_types=[_ET.EXECUTION_COMPLETED, _ET.EXECUTION_FAILED],
+                    handler=_on_worker_done,
+                )
+
+                logger.info(
+                    "Queen starting in %s mode with %d tools: %s",
+                    mode_state.mode,
+                    len(mode_state.get_current_tools()),
+                    [t.name for t in mode_state.get_current_tools()],
+                )
+                result = await executor.execute(
+                    graph=queen_graph,
+                    goal=queen_goal,
+                    input_data={"greeting": initial_prompt or "Session started."},
+                    session_state={"resume_session_id": session.id},
+                )
+                if result.success:
+                    logger.warning("Queen executor returned (should be forever-alive)")
+                else:
+                    logger.error(
+                        "Queen executor failed: %s",
+                        result.error or "(no error message)",
+                    )
+            except Exception:
+                logger.error("Queen conversation crashed", exc_info=True)
+            finally:
+                session.queen_executor = None
+
+        session.queen_task = asyncio.create_task(_queen_loop())
+
+    # ------------------------------------------------------------------
+    # Judge startup / teardown
+    # ------------------------------------------------------------------
+
+    async def _start_judge(
+        self,
+        session: Session,
+        worker_storage_path: str | Path,
+    ) -> None:
+        """Start the health judge for a session's worker."""
+        from framework.graph.executor import GraphExecutor
+        from framework.monitoring import judge_goal, judge_graph
+        from framework.runner.tool_registry import ToolRegistry
+        from framework.runtime.core import Runtime
+        from framework.runtime.event_bus import EventType as _ET
+        from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
+
+        worker_storage_path = Path(worker_storage_path)
+
+        try:
+            # Monitoring tools
+            monitoring_registry = ToolRegistry()
+            register_worker_monitoring_tools(
+                monitoring_registry,
+                session.event_bus,
+                worker_storage_path,
+                worker_graph_id=session.worker_runtime._graph_id,
+            )
+
+            hive_home = Path.home() / ".hive"
+            judge_dir = hive_home / "judge" / "session" / session.id
+            judge_dir.mkdir(parents=True, exist_ok=True)
+
+            judge_runtime = Runtime(hive_home / "judge")
+            monitoring_tools = list(monitoring_registry.get_tools().values())
+            monitoring_executor = monitoring_registry.get_executor()
+
+            async def _judge_loop():
+                interval = 300  # 5 minutes between checks
+                # Wait before the first check — let the worker actually do something
+                await asyncio.sleep(interval)
+                while True:
+                    try:
+                        executor = GraphExecutor(
+                            runtime=judge_runtime,
+                            llm=session.llm,
+                            tools=monitoring_tools,
+                            tool_executor=monitoring_executor,
+                            event_bus=session.event_bus,
+                            stream_id="judge",
+                            storage_path=judge_dir,
+                            loop_config=judge_graph.loop_config,
+                        )
+                        await executor.execute(
+                            graph=judge_graph,
+                            goal=judge_goal,
+                            input_data={
+                                "event": {"source": "timer", "reason": "scheduled"},
+                            },
+                            session_state={"resume_session_id": session.id},
+                        )
+                    except Exception:
+                        logger.error("Health judge tick failed", exc_info=True)
+                    await asyncio.sleep(interval)
+
+            session.judge_task = asyncio.create_task(_judge_loop())
+
+            # Escalation: judge → queen
+            async def _on_escalation(event):
+                ticket = event.data.get("ticket", {})
+                executor = session.queen_executor
+                if executor is None:
+                    logger.warning("Escalation received but queen executor is None")
+                    return
+                node = executor.node_registry.get("queen")
+                if node is not None and hasattr(node, "inject_event"):
+                    msg = "[ESCALATION TICKET from Health Judge]\n" + json.dumps(
+                        ticket, indent=2, ensure_ascii=False
+                    )
+                    await node.inject_event(msg)
+                else:
+                    logger.warning("Escalation received but queen node not ready")
+
+            session.escalation_sub = session.event_bus.subscribe(
+                event_types=[_ET.WORKER_ESCALATION_TICKET],
+                handler=_on_escalation,
+            )
+
+            logger.info("Judge started for session '%s'", session.id)
+
+        except Exception as e:
+            logger.error(
+                "Failed to start judge for session '%s': %s",
+                session.id,
+                e,
+                exc_info=True,
+            )
+
+    def _stop_judge(self, session: Session) -> None:
+        """Cancel judge task and unsubscribe escalation events."""
+        if session.judge_task is not None:
+            session.judge_task.cancel()
+            session.judge_task = None
+        if session.escalation_sub is not None:
+            try:
+                session.event_bus.unsubscribe(session.escalation_sub)
+            except Exception:
+                pass
+            session.escalation_sub = None
+
+    # ------------------------------------------------------------------
+    # Queen notifications
+    # ------------------------------------------------------------------
+
+    async def _notify_queen_worker_loaded(self, session: Session) -> None:
+        """Inject a system message into the queen about the loaded worker."""
+        from framework.tools.queen_lifecycle_tools import build_worker_profile
+
+        executor = session.queen_executor
+        if executor is None:
+            return
+        node = executor.node_registry.get("queen")
+        if node is None or not hasattr(node, "inject_event"):
+            return
+
+        profile = build_worker_profile(session.worker_runtime, agent_path=session.worker_path)
+        await node.inject_event(f"[SYSTEM] Worker loaded.{profile}")
+
+    async def _emit_worker_loaded(self, session: Session) -> None:
+        """Publish a WORKER_LOADED event so the frontend can update."""
+        from framework.runtime.event_bus import AgentEvent, EventType
+
+        info = session.worker_info
+        await session.event_bus.publish(
+            AgentEvent(
+                type=EventType.WORKER_LOADED,
+                stream_id="queen",
+                data={
+                    "worker_id": session.worker_id,
+                    "worker_name": info.name if info else session.worker_id,
+                    "agent_path": str(session.worker_path) if session.worker_path else "",
+                    "goal": info.goal_name if info else "",
+                    "node_count": info.node_count if info else 0,
+                },
+            )
+        )
+
+    async def _notify_queen_worker_unloaded(self, session: Session) -> None:
+        """Notify the queen that the worker has been unloaded."""
+        executor = session.queen_executor
+        if executor is None:
+            return
+        node = executor.node_registry.get("queen")
+        if node is None or not hasattr(node, "inject_event"):
+            return
+
+        await node.inject_event(
+            "[SYSTEM] Worker unloaded. You are now operating independently. "
+            "Handle all tasks directly using your coding tools."
+        )
+
+    # ------------------------------------------------------------------
+    # Lookups
+    # ------------------------------------------------------------------
+
+    def get_session(self, session_id: str) -> Session | None:
+        return self._sessions.get(session_id)
+
+    def get_session_by_worker_id(self, worker_id: str) -> Session | None:
+        """Find a session by its loaded worker's ID."""
+        for s in self._sessions.values():
+            if s.worker_id == worker_id:
+                return s
+        return None
+
+    def get_session_for_agent(self, agent_id: str) -> Session | None:
+        """Resolve an agent_id to a session (backward compat).
+
+        Checks session.id first, then session.worker_id.
+        """
+        s = self._sessions.get(agent_id)
+        if s:
+            return s
+        return self.get_session_by_worker_id(agent_id)
+
+    def is_loading(self, session_id: str) -> bool:
+        return session_id in self._loading
+
+    def list_sessions(self) -> list[Session]:
+        return list(self._sessions.values())
+
+    async def shutdown_all(self) -> None:
+        """Gracefully stop all sessions. Called on server shutdown."""
+        session_ids = list(self._sessions.keys())
+        for sid in session_ids:
+            await self.stop_session(sid)
+        logger.info("All sessions stopped")
@@ -0,0 +1,73 @@
+"""Server-Sent Events helper wrapping aiohttp StreamResponse."""
+
+import json
+import logging
+
+from aiohttp import web
+
+logger = logging.getLogger(__name__)
+
+
+class SSEResponse:
+    """Thin wrapper around aiohttp StreamResponse for SSE streaming.
+
+    Usage:
+        sse = SSEResponse()
+        await sse.prepare(request)
+        await sse.send_event({"key": "value"}, event="update")
+        await sse.send_keepalive()
+    """
+
+    def __init__(self) -> None:
+        self._response: web.StreamResponse | None = None
+
+    async def prepare(self, request: web.Request) -> web.StreamResponse:
+        """Prepare the SSE response with correct headers."""
+        self._response = web.StreamResponse(
+            status=200,
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
+        )
+        await self._response.prepare(request)
+        return self._response
+
+    async def send_event(
+        self,
+        data: dict,
+        event: str | None = None,
+        id: str | None = None,
+    ) -> None:
+        """Serialize and send an SSE event.
+
+        Args:
+            data: JSON-serializable dict to send as the data field.
+            event: Optional SSE event type.
+            id: Optional SSE event id.
+        """
+        if self._response is None:
+            raise RuntimeError("SSEResponse not prepared; call prepare() first")
+
+        parts: list[str] = []
+        if id is not None:
+            parts.append(f"id: {id}\n")
+        if event is not None:
+            parts.append(f"event: {event}\n")
+        payload = json.dumps(data, default=str)
+        parts.append(f"data: {payload}\n")
+        parts.append("\n")
+
+        await self._response.write("".join(parts).encode("utf-8"))
+
+    async def send_keepalive(self) -> None:
+        """Send an SSE comment as a keepalive heartbeat."""
+        if self._response is None:
+            raise RuntimeError("SSEResponse not prepared; call prepare() first")
+        await self._response.write(b": keepalive\n\n")
+
+    @property
+    def response(self) -> web.StreamResponse | None:
+        return self._response
@@ -95,7 +95,7 @@ class CheckpointStore:
                return None

            try:
-                return Checkpoint.model_validate_json(checkpoint_path.read_text())
+                return Checkpoint.model_validate_json(checkpoint_path.read_text(encoding="utf-8"))
            except Exception as e:
                logger.error(f"Failed to load checkpoint {checkpoint_id}: {e}")
                return None
@@ -123,7 +123,9 @@ class CheckpointStore:
                return None

            try:
-                return CheckpointIndex.model_validate_json(self.index_path.read_text())
+                return CheckpointIndex.model_validate_json(
+                    self.index_path.read_text(encoding="utf-8")
+                )
            except Exception as e:
                logger.error(f"Failed to load checkpoint index: {e}")
                return None
@@ -114,7 +114,7 @@ class SessionStore:
            if not state_path.exists():
                return None

-            return SessionState.model_validate_json(state_path.read_text())
+            return SessionState.model_validate_json(state_path.read_text(encoding="utf-8"))

        return await asyncio.to_thread(_read)

@@ -151,7 +151,7 @@ class SessionStore:
                    continue

                try:
-                    state = SessionState.model_validate_json(state_path.read_text())
+                    state = SessionState.model_validate_json(state_path.read_text(encoding="utf-8"))

                    # Apply filters
                    if status and state.status != status:
@@ -270,10 +270,10 @@ def _edit_test_code(code: str) -> str:

    try:
        # Open editor
-        subprocess.run([editor, temp_path], check=True)
+        subprocess.run([editor, temp_path], check=True, encoding="utf-8")

        # Read edited code
-        with open(temp_path) as f:
+        with open(temp_path, encoding="utf-8") as f:
            return f.read()
    except subprocess.CalledProcessError:
        print("Editor failed, keeping original code")
@@ -11,10 +11,35 @@ Provides commands:
 import argparse
 import ast
 import os
+import shutil
 import subprocess
+import sys
 from pathlib import Path


+def _check_pytest_available() -> bool:
+    """Check if pytest is available as a runnable command.
+
+    Returns True if pytest is found, otherwise prints an error message
+    with install instructions and returns False.
+    """
+    if shutil.which("pytest") is None:
+        print(
+            "Error: pytest is not installed or not on PATH.\n"
+            "Hive's testing commands require pytest at runtime.\n"
+            "Install it with:\n"
+            "\n"
+            "  pip install 'framework[testing]'\n"
+            "\n"
+            "or if using uv:\n"
+            "\n"
+            "  uv pip install 'framework[testing]'",
+            file=sys.stderr,
+        )
+        return False
+    return True
+
+
 def register_testing_commands(subparsers: argparse._SubParsersAction) -> None:
    """Register testing CLI commands."""

@@ -105,6 +130,9 @@ def register_testing_commands(subparsers: argparse._SubParsersAction) -> None:

 def cmd_test_run(args: argparse.Namespace) -> int:
    """Run tests for an agent using pytest subprocess."""
+    if not _check_pytest_available():
+        return 1
+
    agent_path = Path(args.agent_path)
    tests_dir = agent_path / "tests"

@@ -162,6 +190,7 @@ def cmd_test_run(args: argparse.Namespace) -> int:
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            env=env,
            timeout=600,  # 10 minute timeout
        )
@@ -177,7 +206,8 @@ def cmd_test_run(args: argparse.Namespace) -> int:

 def cmd_test_debug(args: argparse.Namespace) -> int:
    """Debug a failed test by re-running with verbose output."""
-    import subprocess
+    if not _check_pytest_available():
+        return 1

    agent_path = Path(args.agent_path)
    test_name = args.test_name
@@ -190,7 +220,7 @@ def cmd_test_debug(args: argparse.Namespace) -> int:
    # Find which file contains the test
    test_file = None
    for py_file in tests_dir.glob("test_*.py"):
-        content = py_file.read_text()
+        content = py_file.read_text(encoding="utf-8")
        if f"def {test_name}" in content or f"async def {test_name}" in content:
            test_file = py_file
            break
@@ -219,6 +249,7 @@ def cmd_test_debug(args: argparse.Namespace) -> int:
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            env=env,
            timeout=120,  # 2 minute timeout for single test
        )
@@ -238,7 +269,7 @@ def _scan_test_files(tests_dir: Path) -> list[dict]:

    for test_file in sorted(tests_dir.glob("test_*.py")):
        try:
-            content = test_file.read_text()
+            content = test_file.read_text(encoding="utf-8")
            tree = ast.parse(content)

            for node in ast.walk(tree):
@@ -18,7 +18,6 @@ from __future__ import annotations

 import json
 import logging
-from pathlib import Path
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
@@ -48,10 +47,14 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
        """
        from framework.runner.runner import AgentRunner
        from framework.runtime.execution_stream import EntryPointSpec
+        from framework.server.app import validate_agent_path

-        path = Path(agent_path).resolve()
+        try:
+            path = validate_agent_path(agent_path)
+        except ValueError as e:
+            return json.dumps({"error": str(e)})
        if not path.exists():
-            return json.dumps({"error": f"Agent path does not exist: {path}"})
+            return json.dumps({"error": f"Agent path does not exist: {agent_path}"})

        try:
            runner = AgentRunner.load(path)
@@ -42,8 +42,8 @@ _DEFAULT_LAST_N_STEPS = 40


 def register_worker_monitoring_tools(
-    registry: "ToolRegistry",
-    event_bus: "EventBus",
+    registry: ToolRegistry,
+    event_bus: EventBus,
    storage_path: Path,
    stream_id: str = "judge",
    worker_graph_id: str | None = None,
@@ -101,8 +101,7 @@ def register_worker_monitoring_tools(
                return json.dumps({"error": "No sessions found — worker has not started yet"})

            candidates = [
-                d for d in sessions_dir.iterdir()
-                if d.is_dir() and (d / "state.json").exists()
+                d for d in sessions_dir.iterdir() if d.is_dir() and (d / "state.json").exists()
            ]
            if not candidates:
                return json.dumps({"error": "No sessions found — worker has not started yet"})
@@ -219,7 +218,9 @@ def register_worker_monitoring_tools(
                },
                "last_n_steps": {
                    "type": "integer",
-                    "description": f"How many recent log steps to include (default {_DEFAULT_LAST_N_STEPS})",
+                    "description": (
+                        f"How many recent log steps to include (default {_DEFAULT_LAST_N_STEPS})"
+                    ),
                },
            },
            "required": [],
@@ -241,8 +241,8 @@ class AdenTUI(App):
        # Health judge + queen monitoring graphs (loaded alongside worker agents)
        self._queen_graph_id: str | None = None
        self._judge_graph_id: str | None = None
-        self._judge_task = None   # concurrent.futures.Future for the judge loop
-        self._queen_task = None   # concurrent.futures.Future for the queen loop
+        self._judge_task = None  # concurrent.futures.Future for the judge loop
+        self._queen_task = None  # concurrent.futures.Future for the queen loop
        self._queen_executor = None  # GraphExecutor for queen input injection
        self._queen_escalation_sub = None  # EventBus subscription for queen

@@ -256,7 +256,7 @@ class AdenTUI(App):
        """Override to use native `open` for file:// URLs on macOS."""
        if url.startswith("file://") and platform.system() == "Darwin":
            path = url.removeprefix("file://")
-            subprocess.Popen(["open", path])
+            subprocess.Popen(["open", path], encoding="utf-8")
        else:
            super().open_url(url, new_tab=new_tab)

@@ -434,9 +434,7 @@ class AdenTUI(App):
        if self.runtime and not self.runtime.is_running:
            try:
                agent_loop = self.chat_repl._agent_loop
-                future = asyncio.run_coroutine_threadsafe(
-                    self.runtime.start(), agent_loop
-                )
+                future = asyncio.run_coroutine_threadsafe(self.runtime.start(), agent_loop)
                await asyncio.wrap_future(future)
            except Exception as e:
                self.status_bar.set_graph_id("")
@@ -470,17 +468,20 @@ class AdenTUI(App):
        into the worker runtime.  The worker is completely untouched.
        """
        import asyncio
+        import uuid
+        from datetime import datetime
        from pathlib import Path

        from framework.graph.executor import GraphExecutor
-        from framework.monitoring import judge_goal, judge_graph
        from framework.runner.tool_registry import ToolRegistry
        from framework.runtime.core import Runtime
-        from framework.runtime.event_bus import EventType as _ET
-        from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools
+        from framework.tools.queen_lifecycle_tools import (
+            QueenModeState,
+            register_queen_lifecycle_tools,
+        )
        from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools

-        log = logging.getLogger("tui.judge")
+        log = logging.getLogger("tui.queen")

        try:
            storage_path = Path(storage_path)
@@ -488,6 +489,10 @@ class AdenTUI(App):
            llm = self.runtime._llm
            agent_loop = self.chat_repl._agent_loop

+            # Generate a shared session ID for queen, judge, and worker.
+            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+            session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
+
            # 1. Monitoring tools (health summary, emit ticket, notify operator).
            #    Registered on a standalone registry — NOT merged into the worker.
            monitoring_registry = ToolRegistry()
@@ -498,62 +503,16 @@ class AdenTUI(App):
                worker_graph_id=self.runtime._graph_id,
            )

-            # 2. Storage dirs — under worker's base path but completely owned
-            #    by the judge/queen.  Worker never writes here.
-            judge_dir = storage_path / "graphs" / "judge" / "session"
-            judge_dir.mkdir(parents=True, exist_ok=True)
-            queen_dir = storage_path / "graphs" / "queen" / "session"
+            # 2. Storage dirs — global, not per-agent.
+            hive_home = Path.home() / ".hive"
+            queen_dir = hive_home / "queen" / "session" / session_id
            queen_dir.mkdir(parents=True, exist_ok=True)

-            # ---------------------------------------------------------------
-            # 3. Health judge — background task, fires every 2 minutes.
-            # ---------------------------------------------------------------
-            judge_runtime = Runtime(storage_path / "graphs" / "judge")
-            monitoring_tools = list(monitoring_registry.get_tools().values())
-            monitoring_executor = monitoring_registry.get_executor()
-
-            # Scoped event buses — stamp graph_id on every event so
-            # downstream routing (queen-primary mode) can distinguish
-            # queen/judge/worker events.
+            # Health judge disabled for simplicity.
            from framework.runtime.execution_stream import GraphScopedEventBus

-            judge_event_bus = GraphScopedEventBus(event_bus, "judge")
            queen_event_bus = GraphScopedEventBus(event_bus, "queen")

-            async def _judge_loop():
-                interval = 120  # seconds
-                first = True
-                while True:
-                    if not first:
-                        await asyncio.sleep(interval)
-                    first = False
-                    try:
-                        executor = GraphExecutor(
-                            runtime=judge_runtime,
-                            llm=llm,
-                            tools=monitoring_tools,
-                            tool_executor=monitoring_executor,
-                            event_bus=judge_event_bus,
-                            stream_id="judge",
-                            storage_path=judge_dir,
-                            loop_config=judge_graph.loop_config,
-                        )
-                        await executor.execute(
-                            graph=judge_graph,
-                            goal=judge_goal,
-                            input_data={
-                                "event": {"source": "timer", "reason": "scheduled"},
-                            },
-                            session_state={"resume_session_id": "persistent"},
-                        )
-                    except Exception:
-                        log.error("Health judge tick failed", exc_info=True)
-
-            self._judge_task = asyncio.run_coroutine_threadsafe(
-                _judge_loop(), agent_loop,
-            )
-            self._judge_graph_id = "judge"
-
            # ---------------------------------------------------------------
            # 4. Queen — persistent interactive conversation.
            #    Runs a continuous event_loop node that is the user's
@@ -580,11 +539,16 @@ class AdenTUI(App):
                except Exception:
                    log.warning("Queen: MCP config failed to load", exc_info=True)

+            # Worker is already loaded in TUI path → start in staging mode.
+            mode_state = QueenModeState(mode="staging", event_bus=event_bus)
+
            register_queen_lifecycle_tools(
                queen_registry,
                worker_runtime=self.runtime,
                event_bus=event_bus,
                storage_path=storage_path,
+                session_id=session_id,
+                mode_state=mode_state,
            )
            register_worker_monitoring_tools(
                queen_registry,
@@ -596,21 +560,27 @@ class AdenTUI(App):
            queen_tools = list(queen_registry.get_tools().values())
            queen_tool_executor = queen_registry.get_executor()

-            # Build worker identity to inject into the queen's system prompt.
-            # This must be in the system prompt (not input_data) because
-            # persistent sessions restore the old conversation and skip
-            # _build_initial_message — the queen would lose context.
-            worker_graph_id = self.runtime._graph_id
-            worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
-            worker_goal_desc = getattr(self.runtime.goal, "description", "")
-            worker_identity = (
-                f"\n\n# Current Session\n"
-                f"Worker agent: {worker_graph_id}\n"
-                f"Goal: {worker_goal_name}\n"
+            # Partition tools into mode-specific sets
+            from framework.agents.hive_coder.nodes import (
+                _QUEEN_BUILDING_TOOLS,
+                _QUEEN_RUNNING_TOOLS,
+                _QUEEN_STAGING_TOOLS,
+            )
+
+            building_names = set(_QUEEN_BUILDING_TOOLS)
+            staging_names = set(_QUEEN_STAGING_TOOLS)
+            running_names = set(_QUEEN_RUNNING_TOOLS)
+            mode_state.building_tools = [t for t in queen_tools if t.name in building_names]
+            mode_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
+            mode_state.running_tools = [t for t in queen_tools if t.name in running_names]
+
+            # Build worker profile for queen's system prompt.
+            from framework.tools.queen_lifecycle_tools import build_worker_profile
+
+            worker_identity = build_worker_profile(
+                self.runtime,
+                agent_path=self._runner.agent_path if self._runner else None,
            )
-            if worker_goal_desc:
-                worker_identity += f"Description: {worker_goal_desc}\n"
-            worker_identity += "Status at session start: idle (not started)."

            # Adjust queen graph: filter tools to what's registered and
            # append worker identity to the system prompt.
@@ -631,7 +601,7 @@ class AdenTUI(App):
            adjusted_node = _orig_queen_node.model_copy(update=node_updates)
            queen_graph = queen_graph.model_copy(update={"nodes": [adjusted_node]})

-            queen_runtime = Runtime(storage_path / "graphs" / "queen")
+            queen_runtime = Runtime(hive_home / "queen")

            async def _queen_loop():
                try:
@@ -644,12 +614,23 @@ class AdenTUI(App):
                        stream_id="queen",
                        storage_path=queen_dir,
                        loop_config=queen_graph.loop_config,
+                        dynamic_tools_provider=mode_state.get_current_tools,
                    )
                    self._queen_executor = executor
+
+                    # Wire inject_notification so mode switches notify the queen LLM
+                    async def _inject_mode_notification(content: str) -> None:
+                        node = executor.node_registry.get("queen")
+                        if node is not None and hasattr(node, "inject_event"):
+                            await node.inject_event(content)
+
+                    mode_state.inject_notification = _inject_mode_notification
+
                    log.info(
-                        "Queen starting with %d tools: %s",
-                        len(queen_tools),
-                        [t.name for t in queen_tools],
+                        "Queen starting in %s mode with %d tools: %s",
+                        mode_state.mode,
+                        len(mode_state.get_current_tools()),
+                        [t.name for t in mode_state.get_current_tools()],
                    )
                    # The queen's event_loop node runs forever (continuous mode).
                    # It blocks on _await_user_input() after each LLM turn,
@@ -658,7 +639,7 @@ class AdenTUI(App):
                        graph=queen_graph,
                        goal=queen_goal,
                        input_data={"greeting": "Session started."},
-                        session_state={"resume_session_id": "persistent"},
+                        session_state={"resume_session_id": session_id},
                    )
                    # Should never reach here — queen is forever-alive.
                    log.warning(
@@ -671,7 +652,8 @@ class AdenTUI(App):
                    self._queen_executor = None

            self._queen_task = asyncio.run_coroutine_threadsafe(
-                _queen_loop(), agent_loop,
+                _queen_loop(),
+                agent_loop,
            )
            self._queen_graph_id = "queen"

@@ -690,31 +672,8 @@ class AdenTUI(App):

            self.chat_repl._queen_inject_callback = _inject_queen

-            # Judge escalation → inject into queen conversation as a message.
-            async def _on_escalation(event):
-                ticket = event.data.get("ticket", {})
-                executor = self._queen_executor
-                if executor is None:
-                    log.warning("Escalation received but queen executor is None")
-                    return
-                node = executor.node_registry.get("queen")
-                if node is not None and hasattr(node, "inject_event"):
-                    import json as _json
-                    msg = (
-                        "[ESCALATION TICKET from Health Judge]\n"
-                        + _json.dumps(ticket, indent=2, ensure_ascii=False)
-                    )
-                    await node.inject_event(msg)
-                else:
-                    log.warning("Escalation received but queen node not ready for injection")
-
-            self._queen_escalation_sub = event_bus.subscribe(
-                event_types=[_ET.WORKER_ESCALATION_TICKET],
-                handler=_on_escalation,
-            )
-
            self.notify(
-                "Queen + health judge active",
+                "Queen active",
                severity="information",
                timeout=3,
            )
@@ -1041,9 +1000,7 @@ class AdenTUI(App):
        if not coder_runtime.is_running:
            try:
                agent_loop = self.chat_repl._agent_loop
-                future = asyncio.run_coroutine_threadsafe(
-                    coder_runtime.start(), agent_loop
-                )
+                future = asyncio.run_coroutine_threadsafe(coder_runtime.start(), agent_loop)
                await asyncio.wrap_future(future)
            except Exception as e:
                self.notify(f"Failed to start coder runtime: {e}", severity="error")
@@ -1340,9 +1297,7 @@ class AdenTUI(App):
                        return
                    elif et == EventType.EXECUTION_FAILED:
                        error = event.data.get("error", "Unknown error")[:200]
-                        self._inject_worker_status_into_queen(
-                            f"Worker execution failed: {error}"
-                        )
+                        self._inject_worker_status_into_queen(f"Worker execution failed: {error}")
                        return
                    elif et in (
                        EventType.LLM_TEXT_DELTA,
@@ -1365,7 +1320,11 @@ class AdenTUI(App):
            # --- Multi-graph filtering (non-queen mode) ---
            # If the event has a graph_id and it's not the active graph,
            # show a notification for important events and drop the rest.
-            if not _queen_active and event.graph_id is not None and event.graph_id != self.runtime.active_graph_id:
+            if (
+                not _queen_active
+                and event.graph_id is not None
+                and event.graph_id != self.runtime.active_graph_id
+            ):
                if et == EventType.CLIENT_INPUT_REQUESTED:
                    self.notify(
                        f"[bold]{event.graph_id}[/bold] is waiting for input",
@@ -1684,46 +1643,20 @@ class AdenTUI(App):
        self.notify(f"Logs {mode}", severity="information", timeout=2)

    def action_pause_execution(self) -> None:
-        """Immediately pause execution by cancelling task (bound to Ctrl+Z)."""
+        """Immediately pause execution by cancelling all running tasks (bound to Ctrl+Z)."""
        if self.chat_repl is None or self.runtime is None:
            return
        try:
-            if not self.chat_repl._current_exec_id:
+            if self.runtime.cancel_all_tasks(self.chat_repl._agent_loop):
+                self.chat_repl._current_exec_id = None
                self.notify(
-                    "No active execution to pause",
+                    "All executions stopped",
                    severity="information",
                    timeout=3,
                )
-                return
-
-            task_cancelled = False
-            all_streams = []
-            active_reg = self.runtime.get_graph_registration(self.runtime.active_graph_id)
-            if active_reg:
-                all_streams.extend(active_reg.streams.values())
-            for gid in self.runtime.list_graphs():
-                if gid == self.runtime.active_graph_id:
-                    continue
-                reg = self.runtime.get_graph_registration(gid)
-                if reg:
-                    all_streams.extend(reg.streams.values())
-
-            for stream in all_streams:
-                exec_id = self.chat_repl._current_exec_id
-                task = stream._execution_tasks.get(exec_id)
-                if task and not task.done():
-                    task.cancel()
-                    task_cancelled = True
-                    self.notify(
-                        "Execution paused - state saved",
-                        severity="information",
-                        timeout=3,
-                    )
-                    break
-
-            if not task_cancelled:
+            else:
                self.notify(
-                    "Execution already completed",
+                    "No active executions",
                    severity="information",
                    timeout=2,
                )
@@ -37,6 +37,29 @@ class AgentEntry:
    node_count: int = 0
    tool_count: int = 0
    tags: list[str] = field(default_factory=list)
+    last_active: str | None = None
+
+
+def _get_last_active(agent_name: str) -> str | None:
+    """Return the most recent updated_at timestamp across all sessions."""
+    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
+    if not sessions_dir.exists():
+        return None
+    latest: str | None = None
+    for session_dir in sessions_dir.iterdir():
+        if not session_dir.is_dir() or not session_dir.name.startswith("session_"):
+            continue
+        state_file = session_dir / "state.json"
+        if not state_file.exists():
+            continue
+        try:
+            data = json.loads(state_file.read_text(encoding="utf-8"))
+            ts = data.get("timestamps", {}).get("updated_at")
+            if ts and (latest is None or ts > latest):
+                latest = ts
+        except Exception:
+            continue
+    return latest


 def _count_sessions(agent_name: str) -> int:
@@ -47,19 +70,50 @@ def _count_sessions(agent_name: str) -> int:
    return sum(1 for d in sessions_dir.iterdir() if d.is_dir() and d.name.startswith("session_"))


-def _extract_agent_stats(agent_json_path: Path) -> tuple[int, int, list[str]]:
-    """Extract node count, tool count, and tags from agent.json."""
-    try:
-        data = json.loads(agent_json_path.read_text())
-        nodes = data.get("nodes", [])
-        node_count = len(nodes)
-        tools: set[str] = set()
-        for node in nodes:
-            tools.update(node.get("tools", []))
-        tags = data.get("agent", {}).get("tags", [])
-        return node_count, len(tools), tags
-    except Exception:
-        return 0, 0, []
+def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
+    """Extract node count, tool count, and tags from an agent directory.
+
+    Prefers agent.py (AST-parsed) over agent.json for node/tool counts
+    since agent.json may be stale.  Tags are only available from agent.json.
+    """
+    import ast
+
+    node_count, tool_count, tags = 0, 0, []
+
+    # Try agent.py first — source of truth for nodes
+    agent_py = agent_path / "agent.py"
+    if agent_py.exists():
+        try:
+            tree = ast.parse(agent_py.read_text(encoding="utf-8"))
+            for node in ast.walk(tree):
+                # Find `nodes = [...]` assignment
+                if isinstance(node, ast.Assign):
+                    for target in node.targets:
+                        if isinstance(target, ast.Name) and target.id == "nodes":
+                            if isinstance(node.value, ast.List):
+                                node_count = len(node.value.elts)
+        except Exception:
+            pass
+
+    # Fall back to / supplement from agent.json
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            json_nodes = data.get("nodes", [])
+            if node_count == 0:
+                node_count = len(json_nodes)
+            # Tool count: use whichever source gave us nodes, but agent.json
+            # has the structured tool lists so prefer it for tool counting
+            tools: set[str] = set()
+            for n in json_nodes:
+                tools.update(n.get("tools", []))
+            tool_count = len(tools)
+            tags = data.get("agent", {}).get("tags", [])
+        except Exception:
+            pass
+
+    return node_count, tool_count, tags


 def discover_agents() -> dict[str, list[AgentEntry]]:
@@ -85,20 +139,23 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
            if not _is_valid_agent_dir(path):
                continue

-            agent_json = path / "agent.json"
-            node_count, tool_count, tags = 0, 0, []
-            if agent_json.exists():
-                try:
-                    data = json.loads(agent_json.read_text())
-                    meta = data.get("agent", {})
-                    name = meta.get("name", path.name)
-                    desc = meta.get("description", "")
-                except Exception:
-                    name = path.name
-                    desc = "(error reading agent.json)"
-                node_count, tool_count, tags = _extract_agent_stats(agent_json)
-            else:
-                name, desc = _extract_python_agent_metadata(path)
+            # config.py is source of truth for name/description
+            name, desc = _extract_python_agent_metadata(path)
+            config_fallback_name = path.name.replace("_", " ").title()
+            used_config = name != config_fallback_name
+
+            node_count, tool_count, tags = _extract_agent_stats(path)
+            if not used_config:
+                # config.py didn't provide values, fall back to agent.json
+                agent_json = path / "agent.json"
+                if agent_json.exists():
+                    try:
+                        data = json.loads(agent_json.read_text(encoding="utf-8"))
+                        meta = data.get("agent", {})
+                        name = meta.get("name", name)
+                        desc = meta.get("description", desc)
+                    except Exception:
+                        pass

            entries.append(
                AgentEntry(
@@ -110,6 +167,7 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
                    node_count=node_count,
                    tool_count=tool_count,
                    tags=tags,
+                    last_active=_get_last_active(path.name),
                )
            )
        if entries:
@@ -160,20 +160,9 @@ class CredentialSetupScreen(ModalScreen[bool | None]):
            aden_input = self.query_one("#key-aden", Input)
            aden_key = aden_input.value.strip()
            if aden_key:
-                os.environ["ADEN_API_KEY"] = aden_key
-                # Persist to shell config
-                try:
-                    from aden_tools.credentials.shell_config import (
-                        add_env_var_to_shell_config,
-                    )
+                from framework.credentials.key_storage import save_aden_api_key

-                    add_env_var_to_shell_config(
-                        "ADEN_API_KEY",
-                        aden_key,
-                        comment="Aden Platform API key",
-                    )
-                except Exception:
-                    pass
+                save_aden_api_key(aden_key)
                configured += 1  # ADEN_API_KEY itself counts as configured

            # Run Aden sync for all Aden-backed creds (best-effort)
@@ -488,7 +488,7 @@ class ChatRepl(Vertical):
                if not state_file.exists():
                    continue

-                with open(state_file) as f:
+                with open(state_file, encoding="utf-8") as f:
                    state = json.load(f)

                status = state.get("status", "").lower()
@@ -547,7 +547,7 @@ class ChatRepl(Vertical):

            # Read session state
            try:
-                with open(state_file) as f:
+                with open(state_file, encoding="utf-8") as f:
                    state = json.load(f)

                # Track this session for /resume <number> lookup
@@ -599,7 +599,7 @@ class ChatRepl(Vertical):
        try:
            import json

-            with open(state_file) as f:
+            with open(state_file, encoding="utf-8") as f:
                state = json.load(f)

            # Basic info
@@ -640,7 +640,7 @@ class ChatRepl(Vertical):
                    # Load and show checkpoints
                    for i, cp_file in enumerate(checkpoint_files[-5:], 1):  # Last 5
                        try:
-                            with open(cp_file) as f:
+                            with open(cp_file, encoding="utf-8") as f:
                                cp_data = json.load(f)

                            cp_id = cp_data.get("checkpoint_id", cp_file.stem)
@@ -687,7 +687,7 @@ class ChatRepl(Vertical):

            import json

-            with open(state_file) as f:
+            with open(state_file, encoding="utf-8") as f:
                state = json.load(f)

            # Resume from session state (not checkpoint)
@@ -868,27 +868,17 @@ class ChatRepl(Vertical):
            self._write_history(f"[dim]{traceback.format_exc()}[/dim]")

    async def _cmd_pause(self) -> None:
-        """Immediately pause execution by cancelling task (same as Ctrl+Z)."""
-        # Check if there's a current execution
-        if not self._current_exec_id:
-            self._write_history("[bold yellow]No active execution to pause[/bold yellow]")
-            self._write_history("  Start an execution first, then use /pause during execution")
-            return
-
-        # Find and cancel the execution task - executor will catch and save state
-        task_cancelled = False
-        for stream in self.runtime._streams.values():
-            exec_id = self._current_exec_id
-            task = stream._execution_tasks.get(exec_id)
-            if task and not task.done():
-                task.cancel()
-                task_cancelled = True
-                self._write_history("[bold green]⏸ Execution paused - state saved[/bold green]")
-                self._write_history("  Resume later with: [bold]/resume[/bold]")
-                break
-
-        if not task_cancelled:
-            self._write_history("[bold yellow]Execution already completed[/bold yellow]")
+        """Immediately pause execution by cancelling all running tasks (same as Ctrl+Z)."""
+        future = asyncio.run_coroutine_threadsafe(
+            self.runtime.cancel_all_tasks_async(), self._agent_loop
+        )
+        result = await asyncio.wrap_future(future)
+        if result:
+            self._current_exec_id = None
+            self._write_history("[bold green]⏸ All executions stopped[/bold green]")
+            self._write_history("  Resume later with: [bold]/resume[/bold]")
+        else:
+            self._write_history("[bold yellow]No active executions[/bold yellow]")

    async def _cmd_coder(self, reason: str = "") -> None:
        """User-initiated escalation to Hive Coder."""
@@ -1112,7 +1102,7 @@ class ChatRepl(Vertical):
                    continue

                try:
-                    with open(state_file) as f:
+                    with open(state_file, encoding="utf-8") as f:
                        state = json.load(f)

                    status = state.get("status", "").lower()
@@ -1294,10 +1284,7 @@ class ChatRepl(Vertical):
            # Get entry points for the active graph, preferring manual
            # (interactive) ones over event/timer-driven ones.
            entry_points = self.runtime.get_entry_points()
-            manual_eps = [
-                ep for ep in entry_points
-                if ep.trigger_type in ("manual", "api")
-            ]
+            manual_eps = [ep for ep in entry_points if ep.trigger_type in ("manual", "api")]
            if not manual_eps:
                manual_eps = entry_points  # fallback: use whatever is available
            if not manual_eps:
@@ -1463,10 +1450,6 @@ class ChatRepl(Vertical):
            indicator.update("Preparing question...")
            return

-        if tool_name == "escalate_to_coder":
-            indicator.update("Escalating to coder...")
-            return
-
        # Update indicator to show tool activity
        indicator.update(f"Using tool: {tool_name}...")

@@ -1478,7 +1461,7 @@ class ChatRepl(Vertical):

    def handle_tool_completed(self, tool_name: str, result: str, is_error: bool) -> None:
        """Handle a tool call completing."""
-        if tool_name in ("ask_user", "escalate_to_coder"):
+        if tool_name == "ask_user":
            return

        result_str = str(result)
@@ -38,6 +38,7 @@ def _linux_file_dialog() -> subprocess.CompletedProcess | None:
                "--title=Select a PDF file",
                "--file-filter=PDF files (*.pdf)|*.pdf",
            ],
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=300,
@@ -54,6 +55,7 @@ def _linux_file_dialog() -> subprocess.CompletedProcess | None:
                ".",
                "PDF files (*.pdf)",
            ],
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=300,
@@ -79,6 +81,7 @@ def _pick_pdf_subprocess() -> Path | None:
                    'POSIX path of (choose file of type {"com.adobe.pdf"} '
                    'with prompt "Select a PDF file")',
                ],
+                encoding="utf-8",
                capture_output=True,
                text=True,
                timeout=300,
@@ -93,6 +96,7 @@ def _pick_pdf_subprocess() -> Path | None:
            )
            result = subprocess.run(
                ["powershell", "-NoProfile", "-Command", ps_script],
+                encoding="utf-8",
                capture_output=True,
                text=True,
                timeout=300,
@@ -25,6 +25,7 @@ EVENT_FORMAT: dict[EventType, tuple[str, str]] = {
    EventType.NODE_LOOP_STARTED: ("@@", "cyan"),
    EventType.NODE_LOOP_ITERATION: ("..", "dim"),
    EventType.NODE_LOOP_COMPLETED: ("@@", "dim"),
+    EventType.LLM_TURN_COMPLETE: ("◆", "green"),
    EventType.NODE_STALLED: ("!!", "bold yellow"),
    EventType.NODE_INPUT_BLOCKED: ("!!", "yellow"),
    EventType.GOAL_PROGRESS: ("%%", "blue"),
@@ -87,6 +88,12 @@ def extract_event_text(event: AgentEvent) -> str:
        return f"State changed: {data.get('key', 'unknown')}"
    elif et == EventType.CLIENT_INPUT_REQUESTED:
        return "Waiting for user input"
+    elif et == EventType.LLM_TURN_COMPLETE:
+        stop = data.get("stop_reason", "?")
+        model = data.get("model", "?")
+        inp = data.get("input_tokens", 0)
+        out = data.get("output_tokens", 0)
+        return f"{model} → {stop} ({inp}+{out} tokens)"
    else:
        return f"{et.value}: {data}"

@@ -199,10 +199,11 @@ def _copy_to_clipboard(text: str) -> None:
    """Copy text to system clipboard using platform-native tools."""
    try:
        if sys.platform == "darwin":
-            subprocess.run(["pbcopy"], input=text.encode(), check=True, timeout=5)
+            subprocess.run(["pbcopy"], encoding="utf-8", input=text.encode(), check=True, timeout=5)
        elif sys.platform == "win32":
            subprocess.run(
                ["clip.exe"],
+                encoding="utf-8",
                input=text.encode("utf-16le"),
                check=True,
                timeout=5,
@@ -211,6 +212,7 @@ def _copy_to_clipboard(text: str) -> None:
            try:
                subprocess.run(
                    ["xclip", "-selection", "clipboard"],
+                    encoding="utf-8",
                    input=text.encode(),
                    check=True,
                    timeout=5,
@@ -218,6 +220,7 @@ def _copy_to_clipboard(text: str) -> None:
            except (subprocess.SubprocessError, FileNotFoundError):
                subprocess.run(
                    ["xsel", "--clipboard", "--input"],
+                    encoding="utf-8",
                    input=text.encode(),
                    check=True,
                    timeout=5,
@@ -0,0 +1,21 @@
+{
+  "$schema": "https://ui.shadcn.com/schema.json",
+  "style": "default",
+  "rsc": false,
+  "tsx": true,
+  "tailwind": {
+    "config": "",
+    "css": "src/index.css",
+    "baseColor": "neutral",
+    "cssVariables": true,
+    "prefix": ""
+  },
+  "aliases": {
+    "components": "@/components",
+    "utils": "@/lib/utils",
+    "ui": "@/components/ui",
+    "lib": "@/lib",
+    "hooks": "@/hooks"
+  },
+  "iconLibrary": "lucide"
+}
@@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html lang="en" class="dark">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <link rel="icon" type="image/png" href="/favicon.png" />
+    <title>Hive</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,33 @@
+{
+  "name": "hive-frontend",
+  "private": true,
+  "version": "0.1.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "preview": "vite preview",
+    "test": "vitest run"
+  },
+  "dependencies": {
+    "clsx": "^2.1.1",
+    "lucide-react": "^0.575.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-markdown": "^10.1.0",
+    "react-router-dom": "^7.1.0",
+    "remark-gfm": "^4.0.1",
+    "tailwind-merge": "^3.5.0"
+  },
+  "devDependencies": {
+    "@tailwindcss/vite": "^4.0.0",
+    "@types/node": "^25.3.0",
+    "@types/react": "^18.3.18",
+    "@types/react-dom": "^18.3.5",
+    "@vitejs/plugin-react": "^4.3.4",
+    "tailwindcss": "^4.0.0",
+    "typescript": "~5.6.2",
+    "vite": "^6.0.0",
+    "vitest": "^4.0.18"
+  }
+}
@@ -0,0 +1,16 @@
+import { Routes, Route } from "react-router-dom";
+import Home from "./pages/home";
+import MyAgents from "./pages/my-agents";
+import Workspace from "./pages/workspace";
+
+function App() {
+  return (
+    <Routes>
+      <Route path="/" element={<Home />} />
+      <Route path="/my-agents" element={<MyAgents />} />
+      <Route path="/workspace" element={<Workspace />} />
+    </Routes>
+  );
+}
+
+export default App;
@@ -0,0 +1,6 @@
+import { api } from "./client";
+import type { DiscoverResult } from "./types";
+
+export const agentsApi = {
+  discover: () => api.get<DiscoverResult>("/discover"),
+};
@@ -0,0 +1,41 @@
+const API_BASE = "/api";
+
+export class ApiError extends Error {
+  constructor(
+    public status: number,
+    public body: { error: string; type?: string; [key: string]: unknown },
+  ) {
+    super(body.error);
+    this.name = "ApiError";
+  }
+}
+
+async function request<T>(path: string, options: RequestInit = {}): Promise<T> {
+  const url = `${API_BASE}${path}`;
+  const response = await fetch(url, {
+    ...options,
+    headers: {
+      "Content-Type": "application/json",
+      ...options.headers,
+    },
+  });
+
+  if (!response.ok) {
+    const body = await response
+      .json()
+      .catch(() => ({ error: response.statusText }));
+    throw new ApiError(response.status, body);
+  }
+
+  return response.json();
+}
+
+export const api = {
+  get: <T>(path: string) => request<T>(path),
+  post: <T>(path: string, body?: unknown) =>
+    request<T>(path, {
+      method: "POST",
+      body: body ? JSON.stringify(body) : undefined,
+    }),
+  delete: <T>(path: string) => request<T>(path, { method: "DELETE" }),
+};
@@ -0,0 +1,49 @@
+import { api } from "./client";
+
+export interface CredentialInfo {
+  credential_id: string;
+  credential_type: string;
+  key_names: string[];
+  created_at: string | null;
+  updated_at: string | null;
+}
+
+export interface AgentCredentialRequirement {
+  credential_name: string;
+  credential_id: string;
+  env_var: string;
+  description: string;
+  help_url: string;
+  tools: string[];
+  node_types: string[];
+  available: boolean;
+  valid: boolean | null;
+  validation_message: string | null;
+  direct_api_key_supported: boolean;
+  aden_supported: boolean;
+  credential_key: string;
+  alternative_group: string | null;
+}
+
+export const credentialsApi = {
+  list: () =>
+    api.get<{ credentials: CredentialInfo[] }>("/credentials"),
+
+  get: (credentialId: string) =>
+    api.get<CredentialInfo>(`/credentials/${credentialId}`),
+
+  save: (credentialId: string, keys: Record<string, string>) =>
+    api.post<{ saved: string }>("/credentials", {
+      credential_id: credentialId,
+      keys,
+    }),
+
+  delete: (credentialId: string) =>
+    api.delete<{ deleted: boolean }>(`/credentials/${credentialId}`),
+
+  checkAgent: (agentPath: string) =>
+    api.post<{ required: AgentCredentialRequirement[]; has_aden_key: boolean }>(
+      "/credentials/check-agent",
+      { agent_path: agentPath },
+    ),
+};
@@ -0,0 +1,74 @@
+import { api } from "./client";
+import type {
+  TriggerResult,
+  InjectResult,
+  ChatResult,
+  StopResult,
+  ResumeResult,
+  ReplayResult,
+  GoalProgress,
+} from "./types";
+
+export const executionApi = {
+  trigger: (
+    sessionId: string,
+    entryPointId: string,
+    inputData: Record<string, unknown>,
+    sessionState?: Record<string, unknown>,
+  ) =>
+    api.post<TriggerResult>(`/sessions/${sessionId}/trigger`, {
+      entry_point_id: entryPointId,
+      input_data: inputData,
+      session_state: sessionState,
+    }),
+
+  inject: (
+    sessionId: string,
+    nodeId: string,
+    content: string,
+    graphId?: string,
+  ) =>
+    api.post<InjectResult>(`/sessions/${sessionId}/inject`, {
+      node_id: nodeId,
+      content,
+      graph_id: graphId,
+    }),
+
+  chat: (sessionId: string, message: string) =>
+    api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message }),
+
+  /** Queue context for the queen without triggering an LLM response. */
+  queenContext: (sessionId: string, message: string) =>
+    api.post<ChatResult>(`/sessions/${sessionId}/queen-context`, { message }),
+
+  workerInput: (sessionId: string, message: string) =>
+    api.post<ChatResult>(`/sessions/${sessionId}/worker-input`, { message }),
+
+  stop: (sessionId: string, executionId: string) =>
+    api.post<StopResult>(`/sessions/${sessionId}/stop`, {
+      execution_id: executionId,
+    }),
+
+  pause: (sessionId: string, executionId: string) =>
+    api.post<StopResult>(`/sessions/${sessionId}/pause`, {
+      execution_id: executionId,
+    }),
+
+  cancelQueen: (sessionId: string) =>
+    api.post<{ cancelled: boolean }>(`/sessions/${sessionId}/cancel-queen`),
+
+  resume: (sessionId: string, workerSessionId: string, checkpointId?: string) =>
+    api.post<ResumeResult>(`/sessions/${sessionId}/resume`, {
+      session_id: workerSessionId,
+      checkpoint_id: checkpointId,
+    }),
+
+  replay: (sessionId: string, workerSessionId: string, checkpointId: string) =>
+    api.post<ReplayResult>(`/sessions/${sessionId}/replay`, {
+      session_id: workerSessionId,
+      checkpoint_id: checkpointId,
+    }),
+
+  goalProgress: (sessionId: string) =>
+    api.get<GoalProgress>(`/sessions/${sessionId}/goal-progress`),
+};
@@ -0,0 +1,29 @@
+import { api } from "./client";
+import type { GraphTopology, NodeDetail, NodeCriteria, ToolInfo } from "./types";
+
+export const graphsApi = {
+  nodes: (sessionId: string, graphId: string, workerSessionId?: string) =>
+    api.get<GraphTopology>(
+      `/sessions/${sessionId}/graphs/${graphId}/nodes${workerSessionId ? `?session_id=${workerSessionId}` : ""}`,
+    ),
+
+  node: (sessionId: string, graphId: string, nodeId: string) =>
+    api.get<NodeDetail>(
+      `/sessions/${sessionId}/graphs/${graphId}/nodes/${nodeId}`,
+    ),
+
+  nodeCriteria: (
+    sessionId: string,
+    graphId: string,
+    nodeId: string,
+    workerSessionId?: string,
+  ) =>
+    api.get<NodeCriteria>(
+      `/sessions/${sessionId}/graphs/${graphId}/nodes/${nodeId}/criteria${workerSessionId ? `?session_id=${workerSessionId}` : ""}`,
+    ),
+
+  nodeTools: (sessionId: string, graphId: string, nodeId: string) =>
+    api.get<{ tools: ToolInfo[] }>(
+      `/sessions/${sessionId}/graphs/${graphId}/nodes/${nodeId}/tools`,
+    ),
+};
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""HTTP API server for the Hive agent framework."""`