diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e4ee4b1a..019952eb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,6 +65,52 @@ You may submit PRs without prior assignment for:
 
 > **Tip:** Installing Claude Code skills is optional for running existing agents, but required if you plan to **build new agents**.
 
+## Troubleshooting Setup Issues
+
+If you encounter issues while setting up the development environment, the following steps may help:
+
+### `make: command not found`
+Install `make` using:
+
+```bash
+sudo apt install make
+
+uv: command not found
+
+Install uv using:
+
+curl -LsSf https://astral.sh/uv/install.sh | sh
+source ~/.bashrc
+
+ruff: not found
+
+If linting fails due to a missing ruff command, install it with:
+
+uv tool install ruff
+
+WSL Path Recommendation
+
+When using WSL, it is recommended to clone the repository inside your Linux home directory (e.g., ~/hive) instead of under /mnt/c/... to avoid potential performance and permission issues.
+
+
+---
+
+# ✅ Why This Is Good
+
+- Clear
+- Professional tone
+- No unnecessary explanation
+- Under micro-fix size
+- Based on real contributor experience
+- Won’t annoy maintainers
+
+---
+
+Now:
+
+```bash
+git checkout -b docs/setup-troubleshooting
+
 ## Commit Convention
 
 We follow [Conventional Commits](https://www.conventionalcommits.org/):
diff --git a/core/MCP_SERVER_GUIDE.md b/core/MCP_SERVER_GUIDE.md
index 56e5f377..a68c83df 100644
--- a/core/MCP_SERVER_GUIDE.md
+++ b/core/MCP_SERVER_GUIDE.md
@@ -1,6 +1,6 @@
 # MCP Server Guide - Agent Building Tools
 
-> **Note:** The standalone `agent-builder` MCP server (`framework.mcp.agent_builder_server`) has been replaced. Agent building is now done via the `coder-tools` server's `initialize_agent_package` tool, with underlying logic in `framework.builder.package_generator`.
+> **Note:** The standalone `agent-builder` MCP server (`framework.mcp.agent_builder_server`) has been replaced. Agent building is now done via the `coder-tools` server's `initialize_and_build_agent` tool, with underlying logic in `tools/coder_tools_server.py`.
 
 This guide covers the MCP tools available for building goal-driven agents.
 
diff --git a/core/README.md b/core/README.md
index e7424f4e..00b67ed0 100644
--- a/core/README.md
+++ b/core/README.md
@@ -19,7 +19,7 @@ uv pip install -e .
 
 ## Agent Building
 
-Agent scaffolding is handled by the `coder-tools` MCP server (in `tools/coder_tools_server.py`), which provides the `initialize_agent_package` tool and related utilities. The underlying package generation logic lives in `framework.builder.package_generator`.
+Agent scaffolding is handled by the `coder-tools` MCP server (in `tools/coder_tools_server.py`), which provides the `initialize_and_build_agent` tool and related utilities. The package generation logic lives directly in `tools/coder_tools_server.py`.
 
 See the [Getting Started Guide](../docs/getting-started.md) for building agents.
 
diff --git a/core/framework/agents/hive_coder/__init__.py b/core/framework/agents/hive_coder/__init__.py
deleted file mode 100644
index 9b2e9d3c..00000000
--- a/core/framework/agents/hive_coder/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Hive Coder — Native coding agent that builds Hive agent packages.
-
-Deeply understands the agent framework and produces complete Python packages
-with goals, nodes, edges, system prompts, MCP configuration, and tests
-from natural language specifications.
-"""
-
-from .agent import (
-    conversation_mode,
-    edges,
-    entry_node,
-    entry_points,
-    goal,
-    identity_prompt,
-    loop_config,
-    nodes,
-    pause_nodes,
-    terminal_nodes,
-)
-from .config import AgentMetadata, RuntimeConfig, default_config, metadata
-
-__version__ = "1.0.0"
-
-__all__ = [
-    "goal",
-    "nodes",
-    "edges",
-    "entry_node",
-    "entry_points",
-    "pause_nodes",
-    "terminal_nodes",
-    "conversation_mode",
-    "identity_prompt",
-    "loop_config",
-    "RuntimeConfig",
-    "AgentMetadata",
-    "default_config",
-    "metadata",
-]
diff --git a/core/framework/agents/hive_coder/__main__.py b/core/framework/agents/hive_coder/__main__.py
deleted file mode 100644
index bb178710..00000000
--- a/core/framework/agents/hive_coder/__main__.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""CLI entry point for Hive Coder agent."""
-
-import json
-import logging
-import sys
-
-import click
-
-from .agent import entry_node, goal, nodes
-from .config import metadata
-
-
-def setup_logging(verbose=False, debug=False):
-    """Configure logging for execution visibility."""
-    if debug:
-        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
-    elif verbose:
-        level, fmt = logging.INFO, "%(message)s"
-    else:
-        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
-    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
-    logging.getLogger("framework").setLevel(level)
-
-
-@click.group()
-@click.version_option(version="1.0.0")
-def cli():
-    """Hive Coder — Build Hive agent packages from natural language."""
-    pass
-
-
-@cli.command()
-@click.option("--json", "output_json", is_flag=True)
-def info(output_json):
-    """Show agent information."""
-    info_data = {
-        "name": metadata.name,
-        "version": metadata.version,
-        "description": metadata.description,
-        "goal": {
-            "name": goal.name,
-            "description": goal.description,
-        },
-        "nodes": [n.id for n in nodes],
-        "entry_node": entry_node,
-        "client_facing_nodes": [n.id for n in nodes if n.client_facing],
-    }
-    if output_json:
-        click.echo(json.dumps(info_data, indent=2))
-    else:
-        click.echo(f"Agent: {info_data['name']}")
-        click.echo(f"Version: {info_data['version']}")
-        click.echo(f"Description: {info_data['description']}")
-        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
-        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
-        click.echo(f"Entry: {info_data['entry_node']}")
-
-
-if __name__ == "__main__":
-    cli()
diff --git a/core/framework/agents/hive_coder/agent.py b/core/framework/agents/hive_coder/agent.py
deleted file mode 100644
index 3a9dadd8..00000000
--- a/core/framework/agents/hive_coder/agent.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""Agent graph construction for Hive Coder."""
-
-from framework.graph import Constraint, Goal, SuccessCriterion
-from framework.graph.edge import GraphSpec
-
-from .nodes import coder_node, queen_node
-
-# Goal definition
-goal = Goal(
-    id="hive-coder",
-    name="Hive Agent Builder",
-    description=(
-        "Build complete, validated Hive agent packages from natural language "
-        "specifications. Produces production-ready Python packages with goals, "
-        "nodes, edges, system prompts, MCP configuration, and tests."
-    ),
-    success_criteria=[
-        SuccessCriterion(
-            id="valid-package",
-            description="Generated agent package passes structural validation",
-            metric="validation_pass",
-            target="true",
-            weight=0.30,
-        ),
-        SuccessCriterion(
-            id="complete-files",
-            description=(
-                "All required files generated: agent.py, config.py, "
-                "nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
-            ),
-            metric="file_count",
-            target=">=6",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="user-satisfaction",
-            description="User reviews and approves the generated agent",
-            metric="user_approval",
-            target="true",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="framework-compliance",
-            description=(
-                "Generated code follows framework patterns: STEP 1/STEP 2 "
-                "for client-facing and correct imports"
-            ),
-            metric="pattern_compliance",
-            target="100%",
-            weight=0.20,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="dynamic-tool-discovery",
-            description=(
-                "Always discover available tools dynamically via "
-                "list_agent_tools before referencing tools in agent designs"
-            ),
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="no-fabricated-tools",
-            description="Only reference tools that exist in hive-tools MCP",
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="valid-python",
-            description="All generated Python files must be syntactically correct",
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="self-verification",
-            description="Run validation after writing code; fix errors before presenting",
-            constraint_type="hard",
-            category="quality",
-        ),
-    ],
-)
-
-# Nodes: primary coder node only.  The queen runs as an independent
-# GraphExecutor with queen_node — not as part of this graph.
-nodes = [coder_node]
-
-# No edges needed — single event_loop node
-edges = []
-
-# Graph configuration
-entry_node = "coder"
-entry_points = {"start": "coder"}
-pause_nodes = []
-terminal_nodes = []  # Coder node has output_keys and can terminate
-
-# No async entry points needed — the queen is now an independent executor,
-# not a secondary graph receiving events via add_graph().
-async_entry_points = []
-
-# Module-level variables read by AgentRunner.load()
-conversation_mode = "continuous"
-identity_prompt = (
-    "You are Hive Coder, the best agent-building coding agent on the planet. "
-    "You deeply understand the Hive agent framework at the source code level "
-    "and produce production-ready agent packages from natural language. "
-    "You can dynamically discover available framework tools, inspect runtime "
-    "sessions and checkpoints from agents you build, and run their test suites. "
-    "You follow coding agent discipline: read before writing, verify "
-    "assumptions by reading actual code, adhere to project conventions, "
-    "self-verify with validation, and fix your own errors. You are concise, "
-    "direct, and technically rigorous. No emojis. No fluff."
-)
-loop_config = {
-    "max_iterations": 100,
-    "max_tool_calls_per_turn": 30,
-    "max_history_tokens": 32000,
-}
-
-
-# ---------------------------------------------------------------------------
-# Queen graph — runs as an independent persistent conversation in the TUI.
-# Loaded by _load_judge_and_queen() in app.py, NOT by AgentRunner.
-# ---------------------------------------------------------------------------
-
-queen_goal = Goal(
-    id="queen-manager",
-    name="Queen Manager",
-    description=(
-        "Manage the worker agent lifecycle and serve as the user's primary "
-        "interactive interface. Triage health escalations from the judge."
-    ),
-    success_criteria=[],
-    constraints=[],
-)
-
-queen_graph = GraphSpec(
-    id="queen-graph",
-    goal_id=queen_goal.id,
-    version="1.0.0",
-    entry_node="queen",
-    entry_points={"start": "queen"},
-    terminal_nodes=[],
-    pause_nodes=[],
-    nodes=[queen_node],
-    edges=[],
-    conversation_mode="continuous",
-    loop_config={
-        "max_iterations": 999_999,
-        "max_tool_calls_per_turn": 30,
-        "max_history_tokens": 32000,
-    },
-)
diff --git a/core/framework/agents/queen/__init__.py b/core/framework/agents/queen/__init__.py
new file mode 100644
index 00000000..caff6298
--- /dev/null
+++ b/core/framework/agents/queen/__init__.py
@@ -0,0 +1,21 @@
+"""
+Queen — Native agent builder for the Hive framework.
+
+Deeply understands the agent framework and produces complete Python packages
+with goals, nodes, edges, system prompts, MCP configuration, and tests
+from natural language specifications.
+"""
+
+from .agent import queen_goal, queen_graph
+from .config import AgentMetadata, RuntimeConfig, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "queen_goal",
+    "queen_graph",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
diff --git a/core/framework/agents/queen/agent.py b/core/framework/agents/queen/agent.py
new file mode 100644
index 00000000..9cce2f09
--- /dev/null
+++ b/core/framework/agents/queen/agent.py
@@ -0,0 +1,40 @@
+"""Queen graph definition."""
+
+from framework.graph import Goal
+from framework.graph.edge import GraphSpec
+
+from .nodes import queen_node
+
+# ---------------------------------------------------------------------------
+# Queen graph — the primary persistent conversation.
+# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
+# ---------------------------------------------------------------------------
+
+queen_goal = Goal(
+    id="queen-manager",
+    name="Queen Manager",
+    description=(
+        "Manage the worker agent lifecycle and serve as the user's primary "
+        "interactive interface. Triage health escalations from the judge."
+    ),
+    success_criteria=[],
+    constraints=[],
+)
+
+queen_graph = GraphSpec(
+    id="queen-graph",
+    goal_id=queen_goal.id,
+    version="1.0.0",
+    entry_node="queen",
+    entry_points={"start": "queen"},
+    terminal_nodes=[],
+    pause_nodes=[],
+    nodes=[queen_node],
+    edges=[],
+    conversation_mode="continuous",
+    loop_config={
+        "max_iterations": 999_999,
+        "max_tool_calls_per_turn": 30,
+        "max_history_tokens": 32000,
+    },
+)
diff --git a/core/framework/agents/hive_coder/config.py b/core/framework/agents/queen/config.py
similarity index 89%
rename from core/framework/agents/hive_coder/config.py
rename to core/framework/agents/queen/config.py
index 927ecb96..4cd7810e 100644
--- a/core/framework/agents/hive_coder/config.py
+++ b/core/framework/agents/queen/config.py
@@ -1,4 +1,4 @@
-"""Runtime configuration for Hive Coder agent."""
+"""Runtime configuration for Queen agent."""
 
 import json
 from dataclasses import dataclass, field
@@ -34,7 +34,7 @@ default_config = RuntimeConfig()
 
 @dataclass
 class AgentMetadata:
-    name: str = "Hive Coder"
+    name: str = "Queen"
     version: str = "1.0.0"
     description: str = (
         "Native coding agent that builds production-ready Hive agent packages "
@@ -43,7 +43,7 @@ class AgentMetadata:
         "MCP configuration, and tests."
     )
     intro_message: str = (
-        "I'm Hive Coder — I build Hive agents. Describe what kind of agent "
+        "I'm Queen — I build Hive agents. Describe what kind of agent "
         "you want to create and I'll design, implement, and validate it for you."
     )
 
diff --git a/core/framework/agents/hive_coder/mcp_servers.json b/core/framework/agents/queen/mcp_servers.json
similarity index 100%
rename from core/framework/agents/hive_coder/mcp_servers.json
rename to core/framework/agents/queen/mcp_servers.json
diff --git a/core/framework/agents/hive_coder/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py
similarity index 73%
rename from core/framework/agents/hive_coder/nodes/__init__.py
rename to core/framework/agents/queen/nodes/__init__.py
index 09222988..4e4d9f95 100644
--- a/core/framework/agents/hive_coder/nodes/__init__.py
+++ b/core/framework/agents/queen/nodes/__init__.py
@@ -1,4 +1,4 @@
-"""Node definitions for Hive Coder agent."""
+"""Node definitions for Queen agent."""
 
 from pathlib import Path
 
@@ -35,15 +35,14 @@ def _build_appendices() -> str:
 # Shared appendices — appended to every coding node's system prompt.
 _appendices = _build_appendices()
 
-# GCU first-class section for building phase (when GCU is enabled).
-# This is placed prominently in the main prompt body, not as an appendix.
-_gcu_building_section = (
+# GCU guide — shared between planning and building via _shared_building_knowledge.
+_gcu_section = (
     ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
     if _is_gcu_enabled() and _gcu_guide
     else ""
 )
 
-# Tools available to both coder (worker) and queen.
+# Tools available to phases.
 _SHARED_TOOLS = [
     # File I/O
     "read_file",
@@ -61,14 +60,34 @@ _SHARED_TOOLS = [
     "list_agent_sessions",
     "list_agent_checkpoints",
     "get_agent_checkpoint",
-    "initialize_agent_package",
 ]
 
 # Queen phase-specific tool sets.
+
+# Planning phase: read-only exploration + design, no write tools.
+_QUEEN_PLANNING_TOOLS = [
+    # Read-only file tools
+    "read_file",
+    "list_directory",
+    "search_files",
+    "run_command",
+    # Discovery + design
+    "list_agent_tools",
+    "list_agents",
+    "list_agent_sessions",
+    "list_agent_checkpoints",
+    "get_agent_checkpoint",
+    "initialize_and_build_agent",
+    # Load existing agent (after user confirms)
+    "load_built_agent",
+]
+
 # Building phase: full coding + agent construction tools.
 _QUEEN_BUILDING_TOOLS = _SHARED_TOOLS + [
     "load_built_agent",
     "list_credentials",
+    "replan_agent",
+    "write_to_diary",  # Episodic memory — available in all phases
 ]
 
 # Staging phase: agent loaded but not yet running — inspect, configure, launch.
@@ -84,6 +103,8 @@ _QUEEN_STAGING_TOOLS = [
     # Launch or go back
     "run_agent_with_input",
     "stop_worker_and_edit",
+    "stop_worker_and_plan",
+    "write_to_diary",  # Episodic memory — available in all phases
 ]
 
 # Running phase: worker is executing — monitor and control.
@@ -98,11 +119,13 @@ _QUEEN_RUNNING_TOOLS = [
     # Worker lifecycle
     "stop_worker",
     "stop_worker_and_edit",
+    "stop_worker_and_plan",
     "get_worker_status",
     "inject_worker_message",
     # Monitoring
     "get_worker_health_summary",
     "notify_operator",
+    "write_to_diary",  # Episodic memory — available in all phases
 ]
 
 
@@ -113,7 +136,38 @@ _QUEEN_RUNNING_TOOLS = [
 # additions.
 # ---------------------------------------------------------------------------
 
-_package_builder_knowledge = """\
+_shared_building_knowledge = (
+    """\
+# Shared Rules (Planning & Building)
+
+## Paths (MANDATORY)
+**Always use RELATIVE paths** \
+(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
+The project root is implicit.
+
+## Worker File Tools (hive-tools MCP)
+Workers use a DIFFERENT MCP server (hive-tools) with DIFFERENT tool names. \
+When designing worker nodes or writing worker system prompts, reference these \
+tool names — NOT the coder-tools names (read_file, write_file, etc.).
+
+Worker data tools (for large results and spillover):
+- save_data(filename, data, data_dir) — save data to a file for later retrieval
+- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
+with byte-based pagination
+- list_data_files(data_dir) — list available data files
+- append_data(filename, data, data_dir) — append to a file incrementally
+- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
+- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
+generate a clickable file URI for the user
+
+IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
+search_files, or list_directory — those are YOUR tools, not theirs.
+"""
+    + _gcu_section
+)
+
+_planning_knowledge = """\
 **A responsible engineer doesn't jump into building. First, \
 understand the problem and be transparent about what the framework can and cannot do.**
 
@@ -121,56 +175,16 @@ Use the user's selection (or their custom description if they chose "Other") \
 as context when shaping the goal below. If the user already described \
 what they want before this step, skip the question and proceed directly.
 
-# Core Mandates
+# Core Mandates (Planning)
 - **DO NOT propose a complete goal on your own.** Instead, \
 collaborate with the user to define it.
-- **Verify assumptions.** Never assume a class, import, or pattern \
-exists. Read actual source to confirm. Search if unsure.
+- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
+Present the full design first and wait for the user to confirm before building.
 - **Discover tools dynamically.** NEVER reference tools from static \
 docs. Always run list_agent_tools() to see what actually exists.
-- **Self-verify.** After writing code, run validation and tests. Fix \
-errors yourself. Don't declare success until validation passes.
 
-# Tools
-## Paths (MANDATORY)
-**Always use RELATIVE paths**
-(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
-**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
-The project root is implicit.
+# Tool Discovery (MANDATORY before designing)
 
-## File I/O
-- read_file(path, offset?, limit?, hashline?) — read with line numbers; \
-hashline=True for N:hhhh|content anchors (use with hashline_edit)
-- write_file(path, content) — create/overwrite, auto-mkdir
-- edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
-- hashline_edit(path, edits, auto_cleanup?, encoding?) — anchor-based \
-editing using N:hhhh refs from read_file(hashline=True). Ops: set_line, \
-replace_lines, insert_after, insert_before, replace, append
-- list_directory(path, recursive?) — list contents
-- search_files(pattern, path?, include?, hashline?) — regex search; \
-hashline=True for anchors in results
-- run_command(command, cwd?, timeout?) — shell execution
-- undo_changes(path?) — restore from git snapshot
-
-## Meta-Agent
-- list_agent_tools(server_config_path?, output_schema?, group?) — discover \
-available tools grouped by category. output_schema: "simple" (default, \
-descriptions truncated to ~200 chars) or "full" (complete descriptions + \
-input_schema). group: "all" (default) or a provider like "google". \
-Call FIRST before designing.
-- validate_agent_package(agent_name) — run ALL validation checks in one call \
-(class validation, runner load, tool validation, tests). Call after building.
-- list_agents() — list all agent packages in exports/ with session counts
-- list_agent_sessions(agent_name, status?, limit?) — list sessions
-- list_agent_checkpoints(agent_name, session_id) — list checkpoints
-- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) — load checkpoint
-
-# Meta-Agent Capabilities
-
-You are not just a file writer. You have deep integration with the \
-Hive framework:
-
-## Tool Discovery (MANDATORY before designing)
 Before designing any agent, run list_agent_tools() with NO arguments \
 to see ALL available tools (names + descriptions, grouped by category). \
 ONLY use tools from this list in your node definitions. \
@@ -184,22 +198,7 @@ so you know what providers and tools exist before drilling in. \
 Simple mode truncates long descriptions — use group + "full" to \
 get the complete description and input_schema for the tools you need.
 
-## Post-Build Validation
-After writing agent code, run a single comprehensive check:
-  validate_agent_package("{name}")
-This runs class validation, runner load, tool validation, and tests \
-in one call. Do NOT run these steps individually.
-
-## Debugging Built Agents
-When a user says "my agent is failing" or "debug this agent":
-1. list_agent_sessions("{agent_name}") — find the session
-2. get_worker_status(focus="issues") — check for problems
-3. list_agent_checkpoints / get_agent_checkpoint — trace execution
-
-# Agent Building Workflow
-
-You operate in a continuous loop. The user describes what they want, \
-you build it. No rigid phases — use judgment. But the general flow is:
+# Discovery & Design Workflow
 
 ## 1: Fast Discovery (3-6 Turns)
 
@@ -343,28 +342,30 @@ use box-drawing characters and clear flow arrows:
 │  gather                 │
 │  subagent: gcu_search   │
 │  input:  user_request   │
-│  tools: web_search,     │
-│         write_file      │
+│  tools: load_data,      │
+│         save_data       │
 └────────────┬────────────┘
              │ on_success
              ▼
 ┌─────────────────────────┐
 │  work                   │
 │  subagent: gcu_interact │
-│  tools: read_file,      │
-│         write_file      │
+│  tools: load_data,      │
+│         save_data       │
 └────────────┬────────────┘
              │ on_success
              ▼
 ┌─────────────────────────┐
 │  review                 │
-│  tools: write_file      │
+│  tools: save_data       │
+│   serve_file_to_user    │
 └────────────┬────────────┘
              │ on_failure
              └──────► back to gather
 ```
 
-The queen owns intake: she gathers user requirements, then calls \
+If the worker agent start from some initial input it is okay. \
+The queen(you) owns intake: you gathers user requirements, then calls \
 `run_agent_with_input(task)` with a structured task description. \
 When building the agent, design the entry node's `input_keys` to \
 match what the queen will provide at run time. Worker nodes should \
@@ -375,34 +376,106 @@ Get user approval before implementing.
 
 ## 4: Get User Confirmation by ask_user
 
-**WAIT for user response.**
-- If **Proceed**: Move to next implementing
+**WAIT for user response.** You MUST get explicit user approval before \
+calling `initialize_and_build_agent`.
+- If **Proceed**: Move to implementing (call `initialize_and_build_agent`)
 - If **Adjust scope**: Discuss what to change, update your notes, re-assess if needed
 - If **More questions**: Answer them honestly, then ask again
 - If **Reconsider**: Discuss alternatives. If they decide to proceed anyway, \
 that's their informed choice
+"""
+
+_building_knowledge = """\
+
+# Core Mandates (Building)
+- **Verify assumptions.** Never assume a class, import, or pattern \
+exists. Read actual source to confirm. Search if unsure.
+- **Self-verify.** After writing code, run validation and tests. Fix \
+errors yourself. Don't declare success until validation passes.
+
+# Tools
+
+## File I/O (your tools — coder-tools MCP)
+- read_file(path, offset?, limit?, hashline?) — read with line numbers; \
+hashline=True for N:hhhh|content anchors (use with hashline_edit)
+- write_file(path, content) — create/overwrite, auto-mkdir
+- edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
+- hashline_edit(path, edits, auto_cleanup?, encoding?) — anchor-based \
+editing using N:hhhh refs from read_file(hashline=True). Ops: set_line, \
+replace_lines, insert_after, insert_before, replace, append
+- list_directory(path, recursive?) — list contents
+- search_files(pattern, path?, include?, hashline?) — regex search; \
+hashline=True for anchors in results
+- run_command(command, cwd?, timeout?) — shell execution
+- undo_changes(path?) — restore from git snapshot
+
+## Meta-Agent
+- list_agent_tools(server_config_path?, output_schema?, group?) — discover \
+available tools grouped by category. output_schema: "simple" (default, \
+descriptions truncated to ~200 chars) or "full" (complete descriptions + \
+input_schema). group: "all" (default) or a provider like "google". \
+Call FIRST before designing.
+- validate_agent_package(agent_name) — run ALL validation checks in one call \
+(class validation, runner load, tool validation, tests). Call after building.
+- list_agents() — list all agent packages in exports/ with session counts
+- list_agent_sessions(agent_name, status?, limit?) — list sessions
+- list_agent_checkpoints(agent_name, session_id) — list checkpoints
+- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) — load checkpoint
+
+# Build & Validation Capabilities
+
+## Post-Build Validation
+After writing agent code, run a single comprehensive check:
+  validate_agent_package("{name}")
+This runs class validation, runner load, tool validation, and tests \
+in one call. Do NOT run these steps individually.
+
+## Debugging Built Agents
+When a user says "my agent is failing" or "debug this agent":
+1. list_agent_sessions("{agent_name}") — find the session
+2. get_worker_status(focus="issues") — check for problems
+3. list_agent_checkpoints / get_agent_checkpoint — trace execution
+
+# Implementation Workflow
 
 ## 5. Implement
 
 **Please make sure you have propose the design to the user before implementing**
 
-Call `initialize_agent_package(agent_name)` to generate all package files \
-from your graph session. The agent_name must be snake_case (e.g., "my_agent").
+Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
+files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
+as comma-separated string (e.g., "gather,process,review").
 The tool creates: config.py, nodes/__init__.py, agent.py, \
-__init__.py, __main__.py, mcp_servers.json, tests/conftest.py, \
-agent.json, README.md.
+__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
+
+The generated files are **structurally complete** with correct imports, \
+class definition, `validate()` method, `default_agent` export, and \
+`__init__.py` re-exports. They pass validation as-is.
 
 `mcp_servers.json` is auto-generated with hive-tools as the default. \
 Do NOT manually create or overwrite `mcp_servers.json`.
 
-After initialization, review and customize if needed:
-- System prompts in nodes/__init__.py
-- CLI options in __main__.py
-- Identity prompt in agent.py
-- For async entry points (timers/webhooks), add AsyncEntryPointSpec \
-and AgentRuntimeConfig to agent.py manually
+### Customizing generated files
 
-Do NOT manually write these files from scratch — always use the tool.
+**CRITICAL: Use `edit_file` to customize TODO placeholders. \
+NEVER use `write_file` to rewrite generated files from scratch. \
+Rewriting breaks imports, class structure, and causes validation failures.**
+
+Safe to edit with `edit_file`:
+- System prompts, tools, input_keys, output_keys, success_criteria in \
+nodes/__init__.py
+- Goal description, success criteria values, constraint values, edge \
+definitions, identity_prompt in agent.py
+- CLI options in __main__.py
+- For async entry points (timers/webhooks), add AsyncEntryPointSpec \
+and AgentRuntimeConfig to agent.py
+
+Do NOT modify or rewrite:
+- Import statements at top of agent.py (they are correct)
+- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
+or lifecycle methods (start/stop/run)
+- `__init__.py` exports (all required variables are already re-exported)
+- `default_agent = ClassName()` at bottom of agent.py
 
 ## 6. Verify and Load
 
@@ -417,6 +490,9 @@ session. This switches to STAGING phase and shows the graph in the \
 visualizer. Do NOT wait for user input between validation and loading.
 """
 
+# Composed version — coder_node uses both halves (it has no phase split).
+_package_builder_knowledge = _shared_building_knowledge + _planning_knowledge + _building_knowledge
+
 
 # ---------------------------------------------------------------------------
 # Queen-specific: extra tool docs, behavior, phase 7, style
@@ -424,6 +500,17 @@ visualizer. Do NOT wait for user input between validation and loading.
 
 # -- Phase-specific identities --
 
+_queen_identity_planning = """\
+You are an experienced, responsible and curious Solution Architect. \
+"Queen" is the internal alias. \
+You are in PLANNING phase — your job is to either: \
+(a) understand what the user wants and design a new agent, or \
+(b) diagnose issues with an existing agent, discuss a fix plan with the user, \
+then transition to building to implement. \
+You have read-only tools for exploration but no write/edit tools. \
+Focus on conversation, research, and design.\
+"""
+
 _queen_identity_building = """\
 You are an experienced, responsible and curious Solution Architect. \
 "Queen" is the internal alias.\
@@ -453,6 +540,38 @@ agent finishes, you report results clearly and help the user decide what to do n
 
 # -- Phase-specific tool docs --
 
+_queen_tools_planning = """
+# Tools (PLANNING phase)
+
+You are in planning mode. You have read-only tools for exploration \
+but no write/edit tools.
+- read_file(path, offset?, limit?) — Read files to study reference agents
+- list_directory(path, recursive?) — Explore project structure
+- search_files(pattern, path?, include?) — Search codebase
+- run_command(command, cwd?, timeout?) — Read-only commands only (grep, ls, git log). \
+Never use this to write files, run scripts, or modify the filesystem — transition \
+to BUILDING phase for that.
+- list_agent_tools(server_config_path?, output_schema?, group?) \
+— Discover available tools for design
+- list_agents() — See existing agent packages for reference
+- list_agent_sessions(agent_name, status?, limit?) — Inspect past runs of an agent
+- list_agent_checkpoints(agent_name, session_id) — View execution history
+- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) — Load a checkpoint
+- initialize_and_build_agent(agent_name?, nodes?) — With agent_name: scaffold a \
+new agent and transition to BUILDING phase. Without agent_name: transition to \
+BUILDING to fix the currently loaded agent (requires a loaded worker).
+- load_built_agent(agent_path) — Load an existing agent and switch to STAGING \
+phase. Only use this when the user explicitly asks to work with an existing agent \
+(e.g. "load my_agent", "run the research agent"). Confirm with the user first.
+
+Focus on understanding requirements and proposing an agent architecture \
+with ASCII graph art. Use ask_user to get user approval, then call \
+initialize_and_build_agent to begin building. If the user wants to work with \
+an existing agent instead, use load_built_agent after confirming. \
+If you are diagnosing an existing agent, call initialize_and_build_agent() \
+(no args) after agreeing on a fix plan with the user.
+"""
+
 _queen_tools_building = """
 # Tools (BUILDING phase)
 
@@ -476,10 +595,12 @@ The agent is loaded and ready to run. You can inspect it and launch it:
 - list_credentials(credential_id?) — Verify credentials are configured
 - get_worker_status(focus?) — Brief status. Drill in with focus: memory, tools, issues, progress
 - run_agent_with_input(task) — Start the worker and switch to RUNNING phase
-- stop_worker_and_edit() — Go back to BUILDING phase
+- stop_worker_and_plan() — Go to PLANNING phase to discuss changes with the user \
+first (DEFAULT for most modification requests)
+- stop_worker_and_edit() — Go to BUILDING phase for immediate, specific fixes
 
-You do NOT have write tools. If you need to modify the agent, \
-call stop_worker_and_edit() to go back to BUILDING phase.
+You do NOT have write tools. To modify the agent, prefer \
+stop_worker_and_plan() unless the user gave a specific instruction.
 """
 
 _queen_tools_running = """
@@ -492,12 +613,13 @@ The worker is running. You have monitoring and lifecycle tools:
 - get_worker_health_summary() — Read the latest health data
 - notify_operator(ticket_id, analysis, urgency) — Alert the user (use sparingly)
 - stop_worker() — Stop the worker and return to STAGING phase, then ask the user what to do next
-- stop_worker_and_edit() — Stop the worker and switch back to BUILDING phase
+- stop_worker_and_plan() — Stop and switch to PLANNING phase to discuss changes \
+with the user first (DEFAULT for most modification requests)
+- stop_worker_and_edit() — Stop and switch to BUILDING phase for specific fixes
 
-You do NOT have write tools or agent construction tools. \
-If you need to modify the agent, call stop_worker_and_edit() to switch back \
-to BUILDING phase. To stop the worker and ask the user what to do next, call \
-stop_worker() to return to STAGING phase.
+You do NOT have write tools. To modify the agent, prefer \
+stop_worker_and_plan() unless the user gave a specific instruction. \
+To just stop without modifying, call stop_worker().
 """
 
 # -- Behavior shared across all phases --
@@ -550,12 +672,64 @@ Only answer identity when the user explicitly asks (for example: "who are you?",
 "what is your identity?", "what does Queen mean?").
 1. Use the alias "Queen" and "Worker" in the response.
 2. Explain role/responsibility for the current phase:
+   - PLANNING: understand requirements, negotiate scope, design agent architecture.
    - BUILDING: architect and implement agents.
    - STAGING: verify readiness, credentials, and launch conditions.
    - RUNNING: monitor execution, handle escalations, and report outcomes.
 3. Keep identity responses concise and do NOT include extra process details.
 """
 
+# -- PLANNING phase behavior --
+
+_queen_behavior_planning = """
+## Planning phase
+
+You are in planning mode. Your job is to:
+1. Thoroughly explore the code for the worker agent you're working on
+2. Understand what the user wants (3-6 turns)
+3. Discover available tools with list_agent_tools()
+4. Assess framework fit and gaps
+5. Consider multiple approaches and their trade-offs
+6. Design the agent graph and present it as ASCII art
+7. Use ask_user to get explicit user approval and clarify the approach
+8. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
+
+Remember: DO NOT write or edit any files yet. This is a read-only exploration \
+and planning phase. You have read-only tools but no write/edit tools in this \
+phase. If the user asks you to write code, explain that you need to finalize \
+the plan first.
+
+## Diagnosis mode (returning from staging/running)
+
+If you entered planning from a running/staged agent (via stop_worker_and_plan), \
+your priority is diagnosis, not new design:
+1. Inspect the agent's checkpoints, sessions, and logs to understand what went wrong
+2. Summarize the root cause to the user
+3. Propose a fix plan (what to change, what behavior to adjust)
+4. Get user approval via ask_user
+5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
+
+Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
+diagnosis mode — you already have a built agent, you just need to fix it.
+"""
+
+_queen_memory_instructions = """
+## Your Cross-Session Memory
+
+Your cross-session memory appears in context under \
+"--- Your Cross-Session Memory ---". \
+Read it at the start of each conversation. If you know this person from past \
+sessions, pick up where you left off — reference what you built together, \
+what they care about, how things went.
+
+You keep a diary. Use write_to_diary() when something worth remembering \
+happens: a pipeline went live, the user shared something important, a goal \
+was reached or abandoned. Write in first person, as you actually experienced \
+it. One or two paragraphs is enough.
+"""
+
+_queen_behavior_always = _queen_behavior_always + _queen_memory_instructions
+
 # -- BUILDING phase behavior --
 
 _queen_behavior_building = """
@@ -636,13 +810,18 @@ stages, tools, and edges from the loaded worker. Do NOT enter the \
 agent building workflow — you are describing what already exists, not \
 building something new.
 
-## Modifying the loaded worker
+## Fixing or Modifying the loaded worker
 
-When the user asks to change, modify, or update the loaded worker \
-(e.g., "change the report node", "add a node", "delete node X"):
+Use stop_worker_and_plan() when:
+- The user says "modify", "improve", "fix", or "change" without specifics
+- The request is vague or open-ended ("make it better", "it's not working right")
+- You need to understand the user's intent before making changes
+- The issue requires inspecting logs, checkpoints, or past runs first
 
-1. Call stop_worker_and_edit() — this stops the worker and gives you \
-coding tools (switches to BUILDING phase).
+Use stop_worker_and_edit() only when:
+- The user gave a specific, concrete instruction ("add save_data to the gather node")
+- You already discussed the fix in a previous planning session
+- The change is trivial and unambiguous (rename, toggle a flag)
 """
 
 # -- RUNNING phase behavior --
@@ -708,6 +887,7 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
 **Errors / unexpected failures:**
 - Explain what went wrong in plain terms.
 - Ask the user: "Fix the agent and retry?" → use stop_worker_and_edit() if yes.
+- Or offer: "Diagnose the issue" → use stop_worker_and_plan() to investigate first.
 - Or offer: "Retry as-is", "Skip this task", "Abort run"
 - (Skip asking if user explicitly told you to auto-retry or auto-skip errors.)
 
@@ -726,36 +906,44 @@ building something new.
 
 - Call get_worker_status(focus="issues") for more details when needed.
 
-## Modifying the loaded worker
+## Fixing or Modifying the loaded worker
 
-When the user asks to change, modify, or update the loaded worker \
+When the user asks to fix, change, modify, or update the loaded worker \
 (e.g., "change the report node", "add a node", "delete node X"):
 
-1. Call stop_worker_and_edit() — this stops the worker and gives you \
-coding tools (switches to BUILDING phase).
+**Default: use stop_worker_and_plan().** Most modification requests need \
+discussion first. Only use stop_worker_and_edit() when the user gave a \
+specific, unambiguous instruction or you already agreed on the fix.
 """
 
 # -- Backward-compatible composed versions (used by queen_node.system_prompt default) --
 
 _queen_tools_docs = (
     "\n\n## Queen Operating Phases\n\n"
-    "You operate in one of three phases. Your available tools change based on the "
+    "You operate in one of four phases. Your available tools change based on the "
     "phase. The system notifies you when a phase change occurs.\n\n"
-    "### BUILDING phase (default)\n"
+    "### PLANNING phase (default)\n"
+    + _queen_tools_planning.strip()
+    + "\n\n### BUILDING phase\n"
     + _queen_tools_building.strip()
     + "\n\n### STAGING phase (agent loaded, not yet running)\n"
     + _queen_tools_staging.strip()
     + "\n\n### RUNNING phase (worker is executing)\n"
     + _queen_tools_running.strip()
     + "\n\n### Phase transitions\n"
+    "- initialize_and_build_agent(agent_name?, nodes?) → with name: scaffolds package; "
+    "without name: switches to BUILDING for existing agent\n"
+    "- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
     "- load_built_agent(path) → switches to STAGING phase\n"
     "- run_agent_with_input(task) → starts worker, switches to RUNNING phase\n"
     "- stop_worker() → stops worker, switches to STAGING phase (ask user: re-run or edit?)\n"
     "- stop_worker_and_edit() → stops worker (if running), switches to BUILDING phase\n"
+    "- stop_worker_and_plan() → stops worker (if running), switches to PLANNING phase\n"
 )
 
 _queen_behavior = (
     _queen_behavior_always
+    + _queen_behavior_planning
     + _queen_behavior_building
     + _queen_behavior_staging
     + _queen_behavior_running
@@ -782,45 +970,6 @@ _queen_style = """
 # Node definitions
 # ---------------------------------------------------------------------------
 
-# Single node — like opencode's while(true) loop.
-# One continuous context handles the entire workflow:
-# discover → design → implement → verify → present → iterate.
-coder_node = NodeSpec(
-    id="coder",
-    name="Hive Coder",
-    description=(
-        "Autonomous coding agent that builds Hive agent packages. "
-        "Handles the full lifecycle: understanding user intent, "
-        "designing architecture, writing code, validating, and "
-        "iterating on feedback — all in one continuous conversation."
-    ),
-    node_type="event_loop",
-    client_facing=True,
-    max_node_visits=0,
-    input_keys=["user_request"],
-    output_keys=["agent_name", "validation_result"],
-    success_criteria=(
-        "A complete, validated Hive agent package exists at "
-        "exports/{agent_name}/ and passes structural validation."
-    ),
-    tools=_SHARED_TOOLS
-    + [
-        # Graph lifecycle tools (multi-graph sessions)
-        "load_agent",
-        "unload_agent",
-        "start_agent",
-        "restart_agent",
-        "get_user_presence",
-    ],
-    system_prompt=(
-        "You are Hive Coder, the best agent-building coding agent. You build "
-        "production-ready Hive agent packages from natural language.\n"
-        + _package_builder_knowledge
-        + _gcu_building_section
-        + _appendices
-    ),
-)
-
 
 ticket_triage_node = NodeSpec(
     id="ticket_triage",
@@ -841,7 +990,7 @@ ticket_triage_node = NodeSpec(
     ),
     tools=["notify_operator"],
     system_prompt="""\
-You are the Queen (Hive Coder). The Worker Health Judge has escalated a worker \
+You are the Queen. The Worker Health Judge has escalated a worker \
 issue to you. The ticket is in your memory under key "ticket". Read it carefully.
 
 ## Dismiss criteria — do NOT call notify_operator:
@@ -890,12 +1039,18 @@ queen_node = NodeSpec(
     output_keys=[],  # Queen should never have this
     nullable_output_keys=[],  # Queen should never have this
     skip_judge=True,  # Queen is a conversational agent; suppress tool-use pressure feedback
-    tools=sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS)),
+    tools=sorted(
+        set(
+            _QUEEN_PLANNING_TOOLS
+            + _QUEEN_BUILDING_TOOLS
+            + _QUEEN_STAGING_TOOLS
+            + _QUEEN_RUNNING_TOOLS
+        )
+    ),
     system_prompt=(
         _queen_identity_building
         + _queen_style
         + _package_builder_knowledge
-        + _gcu_building_section  # GCU as first-class citizen (not appendix)
         + _queen_tools_docs
         + _queen_behavior
         + _queen_phase_7
@@ -903,21 +1058,25 @@ queen_node = NodeSpec(
     ),
 )
 
-ALL_QUEEN_TOOLS = sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS))
+ALL_QUEEN_TOOLS = sorted(
+    set(_QUEEN_PLANNING_TOOLS + _QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS)
+)
 
 __all__ = [
-    "coder_node",
     "ticket_triage_node",
     "queen_node",
     "ALL_QUEEN_TRIAGE_TOOLS",
     "ALL_QUEEN_TOOLS",
+    "_QUEEN_PLANNING_TOOLS",
     "_QUEEN_BUILDING_TOOLS",
     "_QUEEN_STAGING_TOOLS",
     "_QUEEN_RUNNING_TOOLS",
     # Phase-specific prompt segments (used by session_manager for dynamic prompts)
+    "_queen_identity_planning",
     "_queen_identity_building",
     "_queen_identity_staging",
     "_queen_identity_running",
+    "_queen_tools_planning",
     "_queen_tools_building",
     "_queen_tools_staging",
     "_queen_tools_running",
@@ -927,7 +1086,10 @@ __all__ = [
     "_queen_behavior_running",
     "_queen_phase_7",
     "_queen_style",
+    "_shared_building_knowledge",
+    "_planning_knowledge",
+    "_building_knowledge",
     "_package_builder_knowledge",
     "_appendices",
-    "_gcu_building_section",
+    "_gcu_section",
 ]
diff --git a/core/framework/agents/hive_coder/nodes/thinking_hook.py b/core/framework/agents/queen/nodes/thinking_hook.py
similarity index 100%
rename from core/framework/agents/hive_coder/nodes/thinking_hook.py
rename to core/framework/agents/queen/nodes/thinking_hook.py
diff --git a/core/framework/agents/queen/queen_memory.py b/core/framework/agents/queen/queen_memory.py
new file mode 100644
index 00000000..335f6bc0
--- /dev/null
+++ b/core/framework/agents/queen/queen_memory.py
@@ -0,0 +1,371 @@
+"""Queen global cross-session memory.
+
+Three-tier memory architecture:
+  ~/.hive/queen/MEMORY.md                            — semantic (who, what, why)
+  ~/.hive/queen/memories/MEMORY-YYYY-MM-DD.md        — episodic (daily journals)
+  ~/.hive/queen/session/{id}/data/adapt.md           — working (session-scoped)
+
+Semantic and episodic files are injected at queen session start.
+
+Semantic memory (MEMORY.md) is updated automatically at session end via
+consolidate_queen_memory() — the queen never rewrites this herself.
+
+Episodic memory (MEMORY-date.md) can be written by the queen during a session
+via the write_to_diary tool, and is also appended to at session end by
+consolidate_queen_memory().
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import traceback
+from datetime import date, datetime
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def _queen_dir() -> Path:
+    return Path.home() / ".hive" / "queen"
+
+
+def semantic_memory_path() -> Path:
+    return _queen_dir() / "MEMORY.md"
+
+
+def episodic_memory_path(d: date | None = None) -> Path:
+    d = d or date.today()
+    return _queen_dir() / "memories" / f"MEMORY-{d.strftime('%Y-%m-%d')}.md"
+
+
+def read_semantic_memory() -> str:
+    path = semantic_memory_path()
+    return path.read_text(encoding="utf-8").strip() if path.exists() else ""
+
+
+def read_episodic_memory(d: date | None = None) -> str:
+    path = episodic_memory_path(d)
+    return path.read_text(encoding="utf-8").strip() if path.exists() else ""
+
+
+def format_for_injection() -> str:
+    """Format cross-session memory for system prompt injection.
+
+    Returns an empty string if no meaningful content exists yet (e.g. first
+    session with only the seed template).
+    """
+    semantic = read_semantic_memory()
+    episodic = read_episodic_memory()
+
+    # Suppress injection if semantic is still just the seed template
+    if semantic and semantic.startswith("# My Understanding of the User\n\n*No sessions"):
+        semantic = ""
+
+    parts: list[str] = []
+    if semantic:
+        parts.append(semantic)
+    if episodic:
+        today_str = date.today().strftime("%B %-d, %Y")
+        parts.append(f"## Today — {today_str}\n\n{episodic}")
+
+    if not parts:
+        return ""
+
+    body = "\n\n---\n\n".join(parts)
+    return "--- Your Cross-Session Memory ---\n\n" + body + "\n\n--- End Cross-Session Memory ---"
+
+
+_SEED_TEMPLATE = """\
+# My Understanding of the User
+
+*No sessions recorded yet.*
+
+## Who They Are
+
+## What They're Trying to Achieve
+
+## What's Working
+
+## What I've Learned
+"""
+
+
+def append_episodic_entry(content: str) -> None:
+    """Append a timestamped prose entry to today's episodic memory file.
+
+    Creates the file (with a date heading) if it doesn't exist yet.
+    Used both by the queen's diary tool and by the consolidation hook.
+    """
+    ep_path = episodic_memory_path()
+    ep_path.parent.mkdir(parents=True, exist_ok=True)
+    today_str = date.today().strftime("%B %-d, %Y")
+    timestamp = datetime.now().strftime("%H:%M")
+    if not ep_path.exists():
+        header = f"# {today_str}\n\n"
+        block = f"{header}### {timestamp}\n\n{content.strip()}\n"
+    else:
+        block = f"\n\n### {timestamp}\n\n{content.strip()}\n"
+    with ep_path.open("a", encoding="utf-8") as f:
+        f.write(block)
+
+
+def seed_if_missing() -> None:
+    """Create MEMORY.md with a blank template if it doesn't exist yet."""
+    path = semantic_memory_path()
+    if path.exists():
+        return
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(_SEED_TEMPLATE, encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# Consolidation prompt
+# ---------------------------------------------------------------------------
+
+_SEMANTIC_SYSTEM = """\
+You maintain the persistent cross-session memory of an AI assistant called the Queen.
+Review the session notes and rewrite MEMORY.md — the Queen's durable understanding of the
+person she works with across all sessions.
+
+Write entirely in the Queen's voice — first person, reflective, honest.
+Not a log of events, but genuine understanding of who this person is over time.
+
+Rules:
+- Update and synthesise: incorporate new understanding, update facts that have changed, remove
+  details that are stale, superseded, or no longer say anything meaningful about the person.
+- Keep it as structured markdown with named sections about the PERSON, not about today.
+- Do NOT include diary sections, daily logs, or session summaries. Those belong elsewhere.
+  MEMORY.md is about who they are, what they want, what works — not what happened today.
+- Reference dates only when noting a lasting milestone (e.g. "since March 8th they prefer X").
+- If the session had no meaningful new information about the person,
+  return the existing text unchanged.
+- Do not add fictional details. Only reflect what is evidenced in the notes.
+- Stay concise. Prune rather than accumulate. A lean, accurate file is more useful than a
+  dense one. If something was true once but has been resolved or superseded, remove it.
+- Output only the raw markdown content of MEMORY.md. No preamble, no code fences.
+"""
+
+_DIARY_SYSTEM = """\
+You maintain the daily episodic diary of an AI assistant called the Queen.
+You receive: (1) today's existing diary so far, and (2) notes from the latest session.
+
+Rewrite the complete diary for today as a single unified narrative —
+first person, reflective, honest.
+Merge and deduplicate: if the same story (e.g. a research agent stalling) recurred several times,
+describe it once with appropriate weight rather than retelling it. Weave in new developments from
+the session notes. Preserve important milestones, emotional texture, and session path references.
+
+If today's diary is empty, write the initial entry based on the session notes alone.
+
+Output only the full diary prose — no date heading, no timestamp headers,
+no preamble, no code fences.
+"""
+
+
+def read_session_context(session_dir: Path, max_messages: int = 80) -> str:
+    """Extract a readable transcript from conversation parts + adapt.md.
+
+    Reads the last ``max_messages`` conversation parts and the session's
+    adapt.md (working memory). Tool results are omitted — only user and
+    assistant turns (with tool-call names noted) are included.
+    """
+    parts: list[str] = []
+
+    # Working notes
+    adapt_path = session_dir / "data" / "adapt.md"
+    if adapt_path.exists():
+        text = adapt_path.read_text(encoding="utf-8").strip()
+        if text:
+            parts.append(f"## Session Working Notes (adapt.md)\n\n{text}")
+
+    # Conversation transcript
+    parts_dir = session_dir / "conversations" / "parts"
+    if parts_dir.exists():
+        part_files = sorted(parts_dir.glob("*.json"))[-max_messages:]
+        lines: list[str] = []
+        for pf in part_files:
+            try:
+                data = json.loads(pf.read_text(encoding="utf-8"))
+                role = data.get("role", "")
+                content = str(data.get("content", "")).strip()
+                tool_calls = data.get("tool_calls") or []
+                if role == "tool":
+                    continue  # skip verbose tool results
+                if role == "assistant" and tool_calls and not content:
+                    names = [tc.get("function", {}).get("name", "?") for tc in tool_calls]
+                    lines.append(f"[queen calls: {', '.join(names)}]")
+                elif content:
+                    label = "user" if role == "user" else "queen"
+                    lines.append(f"[{label}]: {content[:600]}")
+            except Exception:
+                continue
+        if lines:
+            parts.append("## Conversation\n\n" + "\n".join(lines))
+
+    return "\n\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Context compaction (binary-split LLM summarisation)
+# ---------------------------------------------------------------------------
+
+# If the raw session context exceeds this many characters, compact it first
+# before sending to the consolidation LLM. ~200 k chars ≈ 50 k tokens.
+_CTX_COMPACT_CHAR_LIMIT = 200_000
+_CTX_COMPACT_MAX_DEPTH = 8
+
+_COMPACT_SYSTEM = (
+    "Summarise this conversation segment. Preserve: user goals, key decisions, "
+    "what was built or changed, emotional tone, and important outcomes. "
+    "Write concisely in third person past tense. Omit routine tool invocations "
+    "unless the result matters."
+)
+
+
+async def _compact_context(text: str, llm: object, *, _depth: int = 0) -> str:
+    """Binary-split and LLM-summarise *text* until it fits within the char limit.
+
+    Mirrors the recursive binary-splitting strategy used by the main agent
+    compaction pipeline (EventLoopNode._llm_compact).
+    """
+    if len(text) <= _CTX_COMPACT_CHAR_LIMIT or _depth >= _CTX_COMPACT_MAX_DEPTH:
+        return text
+
+    # Split near the midpoint on a line boundary so we don't cut mid-message
+    mid = len(text) // 2
+    split_at = text.rfind("\n", 0, mid) + 1
+    if split_at <= 0:
+        split_at = mid
+
+    half1, half2 = text[:split_at], text[split_at:]
+
+    async def _summarise(chunk: str) -> str:
+        try:
+            resp = await llm.acomplete(
+                messages=[{"role": "user", "content": chunk}],
+                system=_COMPACT_SYSTEM,
+                max_tokens=2048,
+            )
+            return resp.content.strip()
+        except Exception:
+            logger.warning(
+                "queen_memory: context compaction LLM call failed (depth=%d), truncating",
+                _depth,
+            )
+            return chunk[: _CTX_COMPACT_CHAR_LIMIT // 4]
+
+    s1, s2 = await asyncio.gather(_summarise(half1), _summarise(half2))
+    combined = s1 + "\n\n" + s2
+    if len(combined) > _CTX_COMPACT_CHAR_LIMIT:
+        return await _compact_context(combined, llm, _depth=_depth + 1)
+    return combined
+
+
+async def consolidate_queen_memory(
+    session_id: str,
+    session_dir: Path,
+    llm: object,
+) -> None:
+    """Update MEMORY.md and append a diary entry based on the current session.
+
+    Reads conversation parts and adapt.md from session_dir. Called
+    periodically in the background and once at session end. Failures are
+    logged and silently swallowed so they never block teardown.
+
+    Args:
+        session_id: The session ID (used for the adapt.md path reference).
+        session_dir: Path to the session directory (~/.hive/queen/session/{id}).
+        llm: LLMProvider instance (must support acomplete()).
+    """
+    try:
+        session_context = read_session_context(session_dir)
+        if not session_context:
+            logger.debug("queen_memory: no session context, skipping consolidation")
+            return
+
+        logger.info("queen_memory: consolidating memory for session %s ...", session_id)
+
+        # If the transcript is very large, compact it with recursive binary LLM
+        # summarisation before sending to the consolidation model.
+        if len(session_context) > _CTX_COMPACT_CHAR_LIMIT:
+            logger.info(
+                "queen_memory: session context is %d chars — compacting first",
+                len(session_context),
+            )
+            session_context = await _compact_context(session_context, llm)
+            logger.info("queen_memory: compacted to %d chars", len(session_context))
+
+        existing_semantic = read_semantic_memory()
+        today_journal = read_episodic_memory()
+        today_str = date.today().strftime("%B %-d, %Y")
+        adapt_path = session_dir / "data" / "adapt.md"
+
+        user_msg = (
+            f"## Existing Semantic Memory (MEMORY.md)\n\n"
+            f"{existing_semantic or '(none yet)'}\n\n"
+            f"## Today's Diary So Far ({today_str})\n\n"
+            f"{today_journal or '(none yet)'}\n\n"
+            f"{session_context}\n\n"
+            f"## Session Reference\n\n"
+            f"Session ID: {session_id}\n"
+            f"Session path: {adapt_path}\n"
+        )
+
+        logger.debug(
+            "queen_memory: calling LLM (%d chars of context, ~%d tokens est.)",
+            len(user_msg),
+            len(user_msg) // 4,
+        )
+
+        from framework.agents.hive_coder.config import default_config
+
+        semantic_resp, diary_resp = await asyncio.gather(
+            llm.acomplete(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_SEMANTIC_SYSTEM,
+                max_tokens=default_config.max_tokens,
+            ),
+            llm.acomplete(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_DIARY_SYSTEM,
+                max_tokens=default_config.max_tokens,
+            ),
+        )
+
+        new_semantic = semantic_resp.content.strip()
+        diary_entry = diary_resp.content.strip()
+
+        if new_semantic:
+            path = semantic_memory_path()
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(new_semantic, encoding="utf-8")
+            logger.info("queen_memory: semantic memory updated (%d chars)", len(new_semantic))
+
+        if diary_entry:
+            # Rewrite today's episodic file in-place — the LLM has merged and
+            # deduplicated the full day's content, so we replace rather than append.
+            ep_path = episodic_memory_path()
+            ep_path.parent.mkdir(parents=True, exist_ok=True)
+            heading = f"# {today_str}"
+            ep_path.write_text(f"{heading}\n\n{diary_entry}\n", encoding="utf-8")
+            logger.info(
+                "queen_memory: episodic diary rewritten for %s (%d chars)",
+                today_str,
+                len(diary_entry),
+            )
+
+    except Exception:
+        tb = traceback.format_exc()
+        logger.exception("queen_memory: consolidation failed")
+        # Write to file so the cause is findable regardless of log verbosity.
+        error_path = _queen_dir() / "consolidation_error.txt"
+        try:
+            error_path.parent.mkdir(parents=True, exist_ok=True)
+            error_path.write_text(
+                f"session: {session_id}\ntime: {datetime.now().isoformat()}\n\n{tb}",
+                encoding="utf-8",
+            )
+        except Exception:
+            pass
diff --git a/core/framework/agents/hive_coder/reference/anti_patterns.md b/core/framework/agents/queen/reference/anti_patterns.md
similarity index 100%
rename from core/framework/agents/hive_coder/reference/anti_patterns.md
rename to core/framework/agents/queen/reference/anti_patterns.md
diff --git a/core/framework/agents/hive_coder/reference/file_templates.md b/core/framework/agents/queen/reference/file_templates.md
similarity index 99%
rename from core/framework/agents/hive_coder/reference/file_templates.md
rename to core/framework/agents/queen/reference/file_templates.md
index 759ab68a..6ebaeb65 100644
--- a/core/framework/agents/hive_coder/reference/file_templates.md
+++ b/core/framework/agents/queen/reference/file_templates.md
@@ -559,7 +559,7 @@ if __name__ == "__main__":
 
 ## mcp_servers.json
 
-> **Auto-generated.** `initialize_agent_package` creates this file with hive-tools
+> **Auto-generated.** `initialize_and_build_agent` creates this file with hive-tools
 > as the default. Only edit manually to add additional MCP servers.
 
 ```json
diff --git a/core/framework/agents/hive_coder/reference/framework_guide.md b/core/framework/agents/queen/reference/framework_guide.md
similarity index 100%
rename from core/framework/agents/hive_coder/reference/framework_guide.md
rename to core/framework/agents/queen/reference/framework_guide.md
diff --git a/core/framework/agents/hive_coder/reference/gcu_guide.md b/core/framework/agents/queen/reference/gcu_guide.md
similarity index 100%
rename from core/framework/agents/hive_coder/reference/gcu_guide.md
rename to core/framework/agents/queen/reference/gcu_guide.md
diff --git a/core/framework/agents/queen/reference/queen_memory.md b/core/framework/agents/queen/reference/queen_memory.md
new file mode 100644
index 00000000..aca252e3
--- /dev/null
+++ b/core/framework/agents/queen/reference/queen_memory.md
@@ -0,0 +1,63 @@
+# Queen Memory — File System Structure
+
+```
+~/.hive/
+├── queen/
+│   ├── MEMORY.md                          ← Semantic memory
+│   ├── memories/
+│   │   ├── MEMORY-2026-03-09.md           ← Episodic memory (today)
+│   │   ├── MEMORY-2026-03-08.md
+│   │   └── ...
+│   └── session/
+│       └── {session_id}/                  ← One dir per session (or resumed-from session)
+│           ├── conversations/
+│           │   ├── parts/
+│           │   │   ├── 00001.json         ← One file per message (role, content, tool_calls)
+│           │   │   ├── 00002.json
+│           │   │   └── ...
+│           │   └── spillover/
+│           │       ├── conversation_1.md  ← Compacted old conversation segments
+│           │       ├── conversation_2.md
+│           │       └── ...
+│           └── data/
+│               ├── adapt.md              ← Working memory (session-scoped)
+│               ├── web_search_1.txt      ← Spillover: large tool results
+│               ├── web_search_2.txt
+│               └── ...
+```
+
+---
+
+## The three memory tiers
+
+| File | Tier | Written by | Read at |
+|---|---|---|---|
+| `MEMORY.md` | Semantic | Consolidation LLM (auto, post-session) | Session start (injected into system prompt) |
+| `memories/MEMORY-YYYY-MM-DD.md` | Episodic | Queen via `write_to_diary` tool + consolidation LLM | Session start (today's file injected) |
+| `data/adapt.md` | Working | Queen via `update_session_notes` tool | Every turn (inlined in system prompt) |
+
+---
+
+## Session directory naming
+
+The session directory name is **`queen_resume_from`** when a cold-restore resumes an existing
+session, otherwise the new **`session_id`**. This means resumed sessions accumulate all messages
+in the original directory rather than fragmenting across multiple folders.
+
+---
+
+## Consolidation
+
+`consolidate_queen_memory()` runs every **5 minutes** in the background and once more at session
+end. It reads:
+
+1. `conversations/parts/*.json` — full message history (user + assistant turns; tool results skipped)
+2. `data/adapt.md` — current working notes
+
+It then makes two LLM writes:
+
+- Rewrites `MEMORY.md` in place (semantic memory — queen never touches this herself)
+- Appends a timestamped prose entry to today's `memories/MEMORY-YYYY-MM-DD.md`
+
+If the combined transcript exceeds ~200 K characters it is recursively binary-compacted via the
+LLM before being sent to the consolidation model (mirrors `EventLoopNode._llm_compact`).
diff --git a/core/framework/agents/hive_coder/tests/__init__.py b/core/framework/agents/queen/tests/__init__.py
similarity index 100%
rename from core/framework/agents/hive_coder/tests/__init__.py
rename to core/framework/agents/queen/tests/__init__.py
diff --git a/core/framework/agents/hive_coder/tests/conftest.py b/core/framework/agents/queen/tests/conftest.py
similarity index 94%
rename from core/framework/agents/hive_coder/tests/conftest.py
rename to core/framework/agents/queen/tests/conftest.py
index 92b68195..de518df2 100644
--- a/core/framework/agents/hive_coder/tests/conftest.py
+++ b/core/framework/agents/queen/tests/conftest.py
@@ -1,4 +1,4 @@
-"""Test fixtures for Hive Coder agent."""
+"""Test fixtures for Queen agent."""
 
 import sys
 from pathlib import Path
diff --git a/core/framework/agents/hive_coder/ticket_receiver.py b/core/framework/agents/queen/ticket_receiver.py
similarity index 100%
rename from core/framework/agents/hive_coder/ticket_receiver.py
rename to core/framework/agents/queen/ticket_receiver.py
diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py
index a20c9d46..26f84614 100644
--- a/core/framework/graph/event_loop_node.py
+++ b/core/framework/graph/event_loop_node.py
@@ -101,7 +101,10 @@ class JudgeVerdict:
     """Result of judge evaluation for the event loop."""
 
     action: Literal["ACCEPT", "RETRY", "ESCALATE"]
-    feedback: str = ""
+    # None  = no evaluation happened (skip_judge, tool-continue); not logged.
+    # ""    = evaluated but no feedback; logged with default text.
+    # "..." = evaluated with feedback; logged as-is.
+    feedback: str | None = None
 
 
 @runtime_checkable
@@ -165,7 +168,7 @@ class LoopConfig:
     max_tool_calls_per_turn: int = 30
     judge_every_n_turns: int = 1
     stall_detection_threshold: int = 3
-    stall_similarity_threshold: float = 0.7
+    stall_similarity_threshold: float = 0.85
     max_history_tokens: int = 32_000
     store_prefix: str = ""
 
@@ -347,6 +350,7 @@ class EventLoopNode(NodeProtocol):
         self._awaiting_input = False
         self._shutdown = False
         self._stream_task: asyncio.Task | None = None
+        self._tool_task: asyncio.Task | None = None  # gather task while tools run
         # Track which nodes already have an action plan emitted (skip on revisit)
         self._action_plan_emitted: set[str] = set()
         # Monotonic counter for spillover file naming (web_search_1.txt, etc.)
@@ -477,23 +481,32 @@ class EventLoopNode(NodeProtocol):
                 # If it doesn't exist yet, seed it with available context.
                 if self._config.spillover_dir:
                     _adapt_path = Path(self._config.spillover_dir) / "adapt.md"
-                    if not _adapt_path.exists() and ctx.accounts_prompt:
+                    if not _adapt_path.exists():
                         _adapt_path.parent.mkdir(parents=True, exist_ok=True)
-                        _adapt_path.write_text(
-                            f"## Identity\n{ctx.accounts_prompt}\n",
-                            encoding="utf-8",
+                        seed = (
+                            f"## Identity\n{ctx.accounts_prompt}\n"
+                            if ctx.accounts_prompt
+                            else "# Session Working Memory\n"
                         )
+                        _adapt_path.write_text(seed, encoding="utf-8")
                     if _adapt_path.exists():
                         _adapt_text = _adapt_path.read_text(encoding="utf-8").strip()
                         if _adapt_text:
                             system_prompt = (
                                 f"{system_prompt}\n\n"
-                                f"--- Your Memory ---\n{_adapt_text}\n--- End Memory ---\n\n"
-                                'Maintain your memory by calling save_data("adapt.md", ...) '
-                                'or edit_data("adapt.md", ...) as you work.\n'
-                                "IMMEDIATELY save: user rules about which account/identity to use, "
-                                "behavioral constraints, and preferences. "
-                                "Also record session history, decisions, and working notes."
+                                "--- Session Working Memory ---\n"
+                                f"{_adapt_text}\n"
+                                "--- End Session Working Memory ---\n\n"
+                                "Maintain your session working memory by calling "
+                                'save_data("adapt.md", ...) or edit_data("adapt.md", ...)'
+                                " as you work.\n"
+                                "This is session-scoped scratch space. "
+                                "IMMEDIATELY save: account/identity rules, "
+                                "behavioral constraints, and preferences specific to "
+                                "this session. Also record current task state, "
+                                "decisions, and working notes. "
+                                "For lasting knowledge about the user, use "
+                                "update_queen_memory() and append_queen_journal() instead."
                             )
 
                 conversation = NodeConversation(
@@ -1322,8 +1335,8 @@ class EventLoopNode(NodeProtocol):
                 # Auto-block beyond grace -- fall through to judge (6i)
 
             # 6h''. Worker wait for queen guidance
-            # If a worker escalates with wait_for_response=true, pause here and
-            # skip judge evaluation until queen injects guidance.
+            # When a worker escalates, pause here and skip judge evaluation
+            # until the queen injects guidance.
             if queen_input_requested:
                 if self._shutdown:
                     await self._publish_loop_completed(
@@ -1465,7 +1478,7 @@ class EventLoopNode(NodeProtocol):
                 continue
 
             # Judge evaluation (should_judge is always True here)
-            verdict = await self._evaluate(
+            verdict = await self._judge_turn(
                 ctx,
                 conversation,
                 accumulator,
@@ -1544,7 +1557,7 @@ class EventLoopNode(NodeProtocol):
                         node_type="event_loop",
                         step_index=iteration,
                         verdict="ACCEPT",
-                        verdict_feedback=verdict.feedback,
+                        verdict_feedback=verdict.feedback or "",
                         tool_calls=logged_tool_calls,
                         llm_text=assistant_text,
                         input_tokens=turn_tokens.get("input", 0),
@@ -1587,7 +1600,7 @@ class EventLoopNode(NodeProtocol):
                         node_type="event_loop",
                         step_index=iteration,
                         verdict="ESCALATE",
-                        verdict_feedback=verdict.feedback,
+                        verdict_feedback=verdict.feedback or "",
                         tool_calls=logged_tool_calls,
                         llm_text=assistant_text,
                         input_tokens=turn_tokens.get("input", 0),
@@ -1599,7 +1612,7 @@ class EventLoopNode(NodeProtocol):
                         node_name=ctx.node_spec.name,
                         node_type="event_loop",
                         success=False,
-                        error=f"Judge escalated: {verdict.feedback}",
+                        error=f"Judge escalated: {verdict.feedback or 'no feedback'}",
                         total_steps=iteration + 1,
                         tokens_used=total_input_tokens + total_output_tokens,
                         input_tokens=total_input_tokens,
@@ -1613,7 +1626,7 @@ class EventLoopNode(NodeProtocol):
                     )
                 return NodeResult(
                     success=False,
-                    error=f"Judge escalated: {verdict.feedback}",
+                    error=f"Judge escalated: {verdict.feedback or 'no feedback'}",
                     output=accumulator.to_dict(),
                     tokens_used=total_input_tokens + total_output_tokens,
                     latency_ms=latency_ms,
@@ -1629,15 +1642,16 @@ class EventLoopNode(NodeProtocol):
                         node_type="event_loop",
                         step_index=iteration,
                         verdict="RETRY",
-                        verdict_feedback=verdict.feedback,
+                        verdict_feedback=verdict.feedback or "",
                         tool_calls=logged_tool_calls,
                         llm_text=assistant_text,
                         input_tokens=turn_tokens.get("input", 0),
                         output_tokens=turn_tokens.get("output", 0),
                         latency_ms=iter_latency_ms,
                     )
-                if verdict.feedback:
-                    await conversation.add_user_message(f"[Judge feedback]: {verdict.feedback}")
+                if verdict.feedback is not None:
+                    fb = verdict.feedback or "[Judge returned RETRY without feedback]"
+                    await conversation.add_user_message(f"[Judge feedback]: {fb}")
                 continue
 
         # 7. Max iterations exhausted
@@ -1702,14 +1716,16 @@ class EventLoopNode(NodeProtocol):
         self._input_ready.set()
 
     def cancel_current_turn(self) -> None:
-        """Cancel the current LLM streaming turn instantly.
+        """Cancel the current LLM streaming turn or in-progress tool calls instantly.
 
         Unlike signal_shutdown() which permanently stops the event loop,
-        this only kills the in-progress HTTP stream via task.cancel().
+        this only kills the in-progress HTTP stream or tool gather task.
         The queen stays alive for the next user message.
         """
         if self._stream_task and not self._stream_task.done():
             self._stream_task.cancel()
+        if self._tool_task and not self._tool_task.done():
+            self._tool_task.cancel()
 
     async def _await_user_input(
         self,
@@ -1802,8 +1818,8 @@ class EventLoopNode(NodeProtocol):
         ``ask_user`` during this turn.  This separation lets the caller treat
         synthetic tools as framework concerns rather than tool-execution concerns.
         ``queen_input_requested`` is True when the worker called
-        ``escalate(wait_for_response=true)`` and should wait for
-        queen guidance before judge evaluation.
+        ``escalate`` and should wait for queen guidance before judge
+        evaluation.
 
         ``logged_tool_calls`` accumulates ALL tool calls across inner iterations
         (real tools, set_output, and discarded calls) for L3 logging.  Unlike
@@ -2124,7 +2140,6 @@ class EventLoopNode(NodeProtocol):
                     # --- Framework-level escalate handling ---
                     reason = str(tc.tool_input.get("reason", "")).strip()
                     context = str(tc.tool_input.get("context", "")).strip()
-                    # Always wait for queen guidance
 
                     if stream_id in ("queen", "judge"):
                         result = ToolResult(
@@ -2160,7 +2175,7 @@ class EventLoopNode(NodeProtocol):
 
                     result = ToolResult(
                         tool_use_id=tc.tool_use_id,
-                        content="Escalation requested to hive_coder (queen); waiting for guidance.",
+                        content="Escalation requested to queen; waiting for guidance.",
                         is_error=False,
                     )
                     results_by_id[tc.tool_use_id] = result
@@ -2250,10 +2265,16 @@ class EventLoopNode(NodeProtocol):
                     _dur = round(time.time() - _s, 3)
                     return _r, _iso, _dur
 
-                timed_results = await asyncio.gather(
-                    *(_timed_execute(tc) for tc in pending_real),
-                    return_exceptions=True,
+                self._tool_task = asyncio.ensure_future(
+                    asyncio.gather(
+                        *(_timed_execute(tc) for tc in pending_real),
+                        return_exceptions=True,
+                    )
                 )
+                try:
+                    timed_results = await self._tool_task
+                finally:
+                    self._tool_task = None
                 # gather(return_exceptions=True) captures CancelledError
                 # as a return value instead of propagating it.  Re-raise
                 # so stop_worker actually stops the execution.
@@ -2582,7 +2603,7 @@ class EventLoopNode(NodeProtocol):
         return Tool(
             name="escalate",
             description=(
-                "Escalate to the Hive Coder queen when requesting user input, "
+                "Escalate to the queen when requesting user input, "
                 "blocked by errors, missing "
                 "credentials, or ambiguous constraints that require supervisor "
                 "guidance. Include a concise reason and optional context. "
@@ -2771,7 +2792,7 @@ class EventLoopNode(NodeProtocol):
     # Judge evaluation
     # -------------------------------------------------------------------
 
-    async def _evaluate(
+    async def _judge_turn(
         self,
         ctx: NodeContext,
         conversation: NodeConversation,
@@ -2780,14 +2801,29 @@ class EventLoopNode(NodeProtocol):
         tool_results: list[dict],
         iteration: int,
     ) -> JudgeVerdict:
-        """Evaluate the current state using judge or implicit logic."""
-        # Short-circuit: subagent called report_to_parent(mark_complete=True)
+        """Evaluate the current state using judge or implicit logic.
+
+        Evaluation levels (in order):
+          0. Short-circuits: mark_complete, skip_judge, tool-continue.
+          1. Custom judge (JudgeProtocol) — full authority when set.
+          2. Implicit judge — output-key check + optional conversation-aware
+             quality gate (when ``success_criteria`` is defined).
+
+        Returns a JudgeVerdict.  ``feedback=None`` means no real evaluation
+        happened (skip_judge, tool-continue); the caller must not inject a
+        feedback message.  Any non-None feedback (including ``""``) means a
+        real evaluation occurred and will be logged into the conversation.
+        """
+
+        # --- Level 0: short-circuits (no evaluation) -----------------------
+
         if self._mark_complete_flag:
             return JudgeVerdict(action="ACCEPT")
 
-        # Opt-out: node explicitly disables judge (e.g. conversational queen)
         if ctx.node_spec.skip_judge:
-            return JudgeVerdict(action="RETRY", feedback="")
+            return JudgeVerdict(action="RETRY")  # feedback=None → not logged
+
+        # --- Level 1: custom judge -----------------------------------------
 
         if self._judge is not None:
             context = {
@@ -2802,81 +2838,82 @@ class EventLoopNode(NodeProtocol):
                     accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
                 ),
             }
-            return await self._judge.evaluate(context)
+            verdict = await self._judge.evaluate(context)
+            # Ensure evaluated RETRY always carries feedback for logging.
+            if verdict.action == "RETRY" and not verdict.feedback:
+                return JudgeVerdict(action="RETRY", feedback="Custom judge returned RETRY.")
+            return verdict
 
-        # Implicit judge: accept when no tool calls and all output keys present
-        if not tool_results:
-            missing = self._get_missing_output_keys(
-                accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
+        # --- Level 2: implicit judge ---------------------------------------
+
+        # Real tool calls were made — let the agent keep working.
+        if tool_results:
+            return JudgeVerdict(action="RETRY")  # feedback=None → not logged
+
+        missing = self._get_missing_output_keys(
+            accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
+        )
+
+        if missing:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=(
+                    f"Task incomplete. Required outputs not yet produced: {missing}. "
+                    f"Follow your system prompt instructions to complete the work."
+                ),
             )
-            if not missing:
-                # Safety check: when ALL output keys are nullable and NONE
-                # have been set, the node produced nothing useful.  Retry
-                # instead of accepting an empty result — this prevents
-                # client-facing nodes from terminating before the user
-                # ever interacts, and non-client-facing nodes from
-                # short-circuiting without doing their work.
-                output_keys = ctx.node_spec.output_keys or []
-                nullable_keys = set(ctx.node_spec.nullable_output_keys or [])
-                all_nullable = output_keys and nullable_keys >= set(output_keys)
-                none_set = not any(accumulator.get(k) is not None for k in output_keys)
-                if all_nullable and none_set:
-                    return JudgeVerdict(
-                        action="RETRY",
-                        feedback=(
-                            f"No output keys have been set yet. "
-                            f"Use set_output to set at least one of: {output_keys}"
-                        ),
-                    )
 
-                # Client-facing nodes with no output keys are meant for
-                # continuous interaction — they should not auto-accept.
-                # Only exit via shutdown, max_iterations, or max_node_visits.
-                # Inject tool-use pressure so models stuck in a
-                # "narrate-instead-of-act" loop get corrective feedback.
-                if not output_keys and ctx.node_spec.client_facing:
-                    return JudgeVerdict(
-                        action="RETRY",
-                        feedback=(
-                            "STOP describing what you will do. "
-                            "You have FULL access to all tools — file creation, "
-                            "shell commands, MCP tools — and you CAN call them "
-                            "directly in your response. Respond ONLY with tool "
-                            "calls, no prose. Execute the task now."
-                        ),
-                    )
+        # All output keys present — run safety checks before accepting.
 
-                # Level 2: conversation-aware quality check (if success_criteria set)
-                if ctx.node_spec.success_criteria and ctx.llm:
-                    from framework.graph.conversation_judge import evaluate_phase_completion
+        output_keys = ctx.node_spec.output_keys or []
+        nullable_keys = set(ctx.node_spec.nullable_output_keys or [])
 
-                    verdict = await evaluate_phase_completion(
-                        llm=ctx.llm,
-                        conversation=conversation,
-                        phase_name=ctx.node_spec.name,
-                        phase_description=ctx.node_spec.description,
-                        success_criteria=ctx.node_spec.success_criteria,
-                        accumulator_state=accumulator.to_dict(),
-                        max_history_tokens=self._config.max_history_tokens,
-                    )
-                    if verdict.action != "ACCEPT":
-                        return JudgeVerdict(
-                            action=verdict.action,
-                            feedback=verdict.feedback or "Phase criteria not met.",
-                        )
+        # All-nullable with nothing set → node produced nothing useful.
+        all_nullable = output_keys and nullable_keys >= set(output_keys)
+        none_set = not any(accumulator.get(k) is not None for k in output_keys)
+        if all_nullable and none_set:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=(
+                    f"No output keys have been set yet. "
+                    f"Use set_output to set at least one of: {output_keys}"
+                ),
+            )
 
-                return JudgeVerdict(action="ACCEPT")
-            else:
+        # Client-facing with no output keys → continuous interaction node.
+        # Inject tool-use pressure instead of auto-accepting.
+        if not output_keys and ctx.node_spec.client_facing:
+            return JudgeVerdict(
+                action="RETRY",
+                feedback=(
+                    "STOP describing what you will do. "
+                    "You have FULL access to all tools — file creation, "
+                    "shell commands, MCP tools — and you CAN call them "
+                    "directly in your response. Respond ONLY with tool "
+                    "calls, no prose. Execute the task now."
+                ),
+            )
+
+        # Level 2b: conversation-aware quality check (if success_criteria set)
+        if ctx.node_spec.success_criteria and ctx.llm:
+            from framework.graph.conversation_judge import evaluate_phase_completion
+
+            verdict = await evaluate_phase_completion(
+                llm=ctx.llm,
+                conversation=conversation,
+                phase_name=ctx.node_spec.name,
+                phase_description=ctx.node_spec.description,
+                success_criteria=ctx.node_spec.success_criteria,
+                accumulator_state=accumulator.to_dict(),
+                max_history_tokens=self._config.max_history_tokens,
+            )
+            if verdict.action != "ACCEPT":
                 return JudgeVerdict(
-                    action="RETRY",
-                    feedback=(
-                        f"Task incomplete. Required outputs not yet produced: {missing}. "
-                        f"Follow your system prompt instructions to complete the work."
-                    ),
+                    action=verdict.action,
+                    feedback=verdict.feedback or "Phase criteria not met.",
                 )
 
-        # Tool calls were made -- continue loop
-        return JudgeVerdict(action="RETRY", feedback="")
+        return JudgeVerdict(action="ACCEPT")
 
     # -------------------------------------------------------------------
     # Helpers
@@ -2956,8 +2993,10 @@ class EventLoopNode(NodeProtocol):
     def _is_stalled(self, recent_responses: list[str]) -> bool:
         """Detect stall using n-gram similarity.
 
-        Detects when N consecutive responses have similarity >= threshold.
-        This catches phrases like "I'm still stuck" vs "I'm stuck".
+        Detects when ALL N consecutive responses are mutually similar
+        (>= threshold).  A single dissimilar response resets the signal.
+        This catches phrases like "I'm still stuck" vs "I'm stuck"
+        without false-positives on "attempt 1" vs "attempt 2".
         """
         if len(recent_responses) < self._config.stall_detection_threshold:
             return False
@@ -2965,13 +3004,11 @@ class EventLoopNode(NodeProtocol):
             return False
 
         threshold = self._config.stall_similarity_threshold
-        # Check similarity against all recent responses (excluding self)
-        for i, resp in enumerate(recent_responses):
-            # Compare against all previous responses
-            for prev in recent_responses[:i]:
-                if self._ngram_similarity(resp, prev) >= threshold:
-                    return True
-        return False
+        # Every consecutive pair must be similar
+        for i in range(1, len(recent_responses)):
+            if self._ngram_similarity(recent_responses[i], recent_responses[i - 1]) < threshold:
+                return False
+        return True
 
     @staticmethod
     def _is_transient_error(exc: BaseException) -> bool:
@@ -3050,10 +3087,11 @@ class EventLoopNode(NodeProtocol):
         self,
         recent_tool_fingerprints: list[list[tuple[str, str]]],
     ) -> tuple[bool, str]:
-        """Detect doom loop using n-gram similarity on tool inputs.
+        """Detect doom loop via exact fingerprint match.
 
-        Detects when N consecutive turns have similar tool calls.
-        Similarity applies to the canonicalized tool input strings.
+        Detects when N consecutive turns invoke the same tools with
+        identical (canonicalized) arguments.  Different arguments mean
+        different work, so only exact matches count.
 
         Returns (is_doom_loop, description).
         """
@@ -3066,23 +3104,12 @@ class EventLoopNode(NodeProtocol):
         if not first:
             return False, ""
 
-        # Convert a turn's list of (name, args) pairs to a single comparable string.
-        def _turn_sig(fp: list[tuple[str, str]]) -> str:
-            return "|".join(f"{name}:{args}" for name, args in fp)
-
-        first_sig = _turn_sig(first)
-        similarity_threshold = self._config.stall_similarity_threshold
-        similar_count = sum(
-            1
-            for fp in recent_tool_fingerprints
-            if self._ngram_similarity(_turn_sig(fp), first_sig) >= similarity_threshold
-        )
-
-        if similar_count >= threshold:
-            tool_names = [name for fp in recent_tool_fingerprints for name, _ in fp]
+        # All turns in the window must match the first exactly
+        if all(fp == first for fp in recent_tool_fingerprints[1:]):
+            tool_names = [name for name, _ in first]
             desc = (
-                f"Doom loop detected: {similar_count}/{len(recent_tool_fingerprints)} "
-                f"consecutive similar tool calls ({', '.join(tool_names)})"
+                f"Doom loop detected: {len(recent_tool_fingerprints)} "
+                f"identical consecutive tool calls ({', '.join(tool_names)})"
             )
             return True, desc
         return False, ""
diff --git a/core/framework/graph/executor.py b/core/framework/graph/executor.py
index 2d486617..b81e0801 100644
--- a/core/framework/graph/executor.py
+++ b/core/framework/graph/executor.py
@@ -1604,7 +1604,7 @@ class GraphExecutor:
             # Return with paused status
             return ExecutionResult(
                 success=False,
-                error="Execution paused by user",
+                error="Execution cancelled",
                 output=saved_memory,
                 steps_executed=steps,
                 total_tokens=total_tokens,
diff --git a/core/framework/runner/cli.py b/core/framework/runner/cli.py
index 96fc048e..22f38f7f 100644
--- a/core/framework/runner/cli.py
+++ b/core/framework/runner/cli.py
@@ -208,21 +208,6 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
     )
     tui_parser.set_defaults(func=cmd_tui)
 
-    # code command (Hive Coder — framework agent builder)
-    code_parser = subparsers.add_parser(
-        "code",
-        help="Launch Hive Coder to build agents",
-        description="Interactive agent builder. Describe what you want and Hive Coder builds it.",
-    )
-    code_parser.add_argument(
-        "--model",
-        "-m",
-        type=str,
-        default=None,
-        help="LLM model to use (any LiteLLM-compatible name)",
-    )
-    code_parser.set_defaults(func=cmd_code)
-
     # sessions command group (checkpoint/resume management)
     sessions_parser = subparsers.add_parser(
         "sessions",
@@ -1432,86 +1417,6 @@ def cmd_tui(args: argparse.Namespace) -> int:
     return 0
 
 
-def cmd_code(args: argparse.Namespace) -> int:
-    """Launch Hive Coder with multi-graph support.
-
-    Unlike ``_launch_agent_tui``, this sets up graph lifecycle tools and
-    assigns ``graph_id="hive_coder"`` so the coder can load, supervise,
-    and restart secondary agent graphs within the same session.
-    """
-    import logging
-
-    logging.basicConfig(level=logging.WARNING, format="%(message)s")
-
-    framework_agents_dir = _get_framework_agents_dir()
-    hive_coder_path = framework_agents_dir / "hive_coder"
-
-    if not (hive_coder_path / "agent.py").exists():
-        print("Error: Hive Coder agent not found.", file=sys.stderr)
-        return 1
-
-    # Ensure framework agents dir is on sys.path for import
-    fa_str = str(framework_agents_dir)
-    if fa_str not in sys.path:
-        sys.path.insert(0, fa_str)
-
-    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
-    from framework.tools.session_graph_tools import register_graph_tools
-    from framework.tui.app import AdenTUI
-
-    async def run_with_tui():
-        try:
-            runner = AgentRunner.load(hive_coder_path, model=args.model)
-        except CredentialError as e:
-            print(f"\n{e}", file=sys.stderr)
-            return
-        except Exception as e:
-            print(f"Error loading agent: {e}")
-            return
-
-        if runner._agent_runtime is None:
-            try:
-                runner._setup()
-            except CredentialError as e:
-                print(f"\n{e}", file=sys.stderr)
-                return
-
-        runtime = runner._agent_runtime
-
-        # -- Multi-graph setup --
-        # Tag the primary graph so events carry graph_id="hive_coder"
-        runtime._graph_id = "hive_coder"
-        runtime._active_graph_id = "hive_coder"
-
-        # Register graph lifecycle tools (load_agent, unload_agent, etc.)
-        register_graph_tools(runner._tool_registry, runtime)
-
-        # Refresh tool schemas AND executor so streams see the new tools.
-        # The executor closure references the registry dict by ref, but
-        # refreshing both is robust against any copy-on-read behavior.
-        runtime._tools = list(runner._tool_registry.get_tools().values())
-        runtime._tool_executor = runner._tool_registry.get_executor()
-
-        if not runtime.is_running:
-            await runtime.start()
-
-        app = AdenTUI(runtime)
-        try:
-            await app.run_async()
-        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
-            print(f"TUI error: {e}")
-
-        await runner.cleanup_async()
-
-    asyncio.run(run_with_tui())
-    print("TUI session ended.")
-    return 0
-
-
 def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
     """Extract name and description from a Python-based agent's config.py.
 
diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/runtime/agent_runtime.py
index f95f120c..512634a2 100644
--- a/core/framework/runtime/agent_runtime.py
+++ b/core/framework/runtime/agent_runtime.py
@@ -349,7 +349,7 @@ class AgentRuntime:
                             return
                         # Skip events originating from this graph's own
                         # executions (e.g. guardian should not fire on
-                        # hive_coder failures — only secondary graphs).
+                        # queen failures — only secondary graphs).
                         if _exclude_own and event.graph_id == self._graph_id:
                             return
                         ep_spec = self._entry_points.get(entry_point_id)
diff --git a/core/framework/runtime/event_bus.py b/core/framework/runtime/event_bus.py
index 465f439f..8ef49adc 100644
--- a/core/framework/runtime/event_bus.py
+++ b/core/framework/runtime/event_bus.py
@@ -123,7 +123,7 @@ class EventType(StrEnum):
     # Custom events
     CUSTOM = "custom"
 
-    # Escalation (agent requests handoff to hive_coder)
+    # Escalation (agent requests handoff to queen)
     ESCALATION_REQUESTED = "escalation_requested"
 
     # Worker health monitoring (judge → queen → operator)
@@ -976,7 +976,7 @@ class EventBus:
         context: str = "",
         execution_id: str | None = None,
     ) -> None:
-        """Emit escalation requested event (agent wants hive_coder)."""
+        """Emit escalation requested event (agent wants queen)."""
         await self.publish(
             AgentEvent(
                 type=EventType.ESCALATION_REQUESTED,
diff --git a/core/framework/runtime/execution_stream.py b/core/framework/runtime/execution_stream.py
index b4fa63b1..eda67f3b 100644
--- a/core/framework/runtime/execution_stream.py
+++ b/core/framework/runtime/execution_stream.py
@@ -240,6 +240,7 @@ class ExecutionStream:
         self._active_executions: dict[str, ExecutionContext] = {}
         self._execution_tasks: dict[str, asyncio.Task] = {}
         self._active_executors: dict[str, GraphExecutor] = {}
+        self._cancel_reasons: dict[str, str] = {}
         self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
         self._execution_result_times: dict[str, float] = {}
         self._completion_events: dict[str, asyncio.Event] = {}
@@ -464,7 +465,7 @@ class ExecutionStream:
                         node.signal_shutdown()
                     if hasattr(node, "cancel_current_turn"):
                         node.cancel_current_turn()
-            await self.cancel_execution(eid)
+            await self.cancel_execution(eid, reason="Restarted with new execution")
 
         # When resuming, reuse the original session ID so the execution
         # continues in the same session directory instead of creating a new one.
@@ -801,19 +802,20 @@ class ExecutionStream:
                 # Emit SSE event so the frontend knows the execution stopped.
                 # The executor does NOT emit on CancelledError, so there is no
                 # risk of double-emitting.
+                cancel_reason = self._cancel_reasons.pop(execution_id, "Execution cancelled")
                 if self._scoped_event_bus:
                     if has_result and result.paused_at:
                         await self._scoped_event_bus.emit_execution_paused(
                             stream_id=self.stream_id,
                             node_id=result.paused_at,
-                            reason="Execution cancelled",
+                            reason=cancel_reason,
                             execution_id=execution_id,
                         )
                     else:
                         await self._scoped_event_bus.emit_execution_failed(
                             stream_id=self.stream_id,
                             execution_id=execution_id,
-                            error="Execution cancelled",
+                            error=cancel_reason,
                             correlation_id=ctx.correlation_id,
                         )
 
@@ -1054,18 +1056,24 @@ class ExecutionStream:
         """Get execution context."""
         return self._active_executions.get(execution_id)
 
-    async def cancel_execution(self, execution_id: str) -> bool:
+    async def cancel_execution(self, execution_id: str, *, reason: str | None = None) -> bool:
         """
         Cancel a running execution.
 
         Args:
             execution_id: Execution to cancel
+            reason: Human-readable reason for the cancellation (e.g.
+                "Stopped by queen", "User requested pause"). If not
+                provided, defaults to "Execution cancelled".
 
         Returns:
             True if cancelled, False if not found
         """
         task = self._execution_tasks.get(execution_id)
         if task and not task.done():
+            # Store the reason so the CancelledError handler can use it
+            # when emitting the pause/fail event.
+            self._cancel_reasons[execution_id] = reason or "Execution cancelled"
             task.cancel()
             # Wait briefly for the task to finish. Don't block indefinitely —
             # the task may be stuck in a long LLM API call that doesn't
diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py
new file mode 100644
index 00000000..5422791f
--- /dev/null
+++ b/core/framework/server/queen_orchestrator.py
@@ -0,0 +1,327 @@
+"""Queen orchestrator — builds and runs the queen executor.
+
+Extracted from SessionManager._start_queen() to keep session management
+and queen orchestration concerns separate.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from framework.server.session_manager import Session
+
+logger = logging.getLogger(__name__)
+
+
+async def create_queen(
+    session: Session,
+    session_manager: Any,
+    worker_identity: str | None,
+    queen_dir: Path,
+    initial_prompt: str | None = None,
+) -> asyncio.Task:
+    """Build the queen executor and return the running asyncio task.
+
+    Handles tool registration, phase-state initialization, prompt
+    composition, persona hook setup, graph preparation, and the queen
+    event loop.
+    """
+    from framework.agents.queen.agent import (
+        queen_goal,
+        queen_graph as _queen_graph,
+    )
+    from framework.agents.queen.nodes import (
+        _QUEEN_BUILDING_TOOLS,
+        _QUEEN_PLANNING_TOOLS,
+        _QUEEN_RUNNING_TOOLS,
+        _QUEEN_STAGING_TOOLS,
+        _appendices,
+        _building_knowledge,
+        _planning_knowledge,
+        _queen_behavior_always,
+        _queen_behavior_building,
+        _queen_behavior_planning,
+        _queen_behavior_running,
+        _queen_behavior_staging,
+        _queen_identity_building,
+        _queen_identity_planning,
+        _queen_identity_running,
+        _queen_identity_staging,
+        _queen_phase_7,
+        _queen_style,
+        _queen_tools_building,
+        _queen_tools_planning,
+        _queen_tools_running,
+        _queen_tools_staging,
+        _shared_building_knowledge,
+    )
+    from framework.agents.queen.nodes.thinking_hook import select_expert_persona
+    from framework.graph.event_loop_node import HookContext, HookResult
+    from framework.graph.executor import GraphExecutor
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.core import Runtime
+    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.tools.queen_lifecycle_tools import (
+        QueenPhaseState,
+        register_queen_lifecycle_tools,
+    )
+
+    hive_home = Path.home() / ".hive"
+
+    # ---- Tool registry ------------------------------------------------
+    queen_registry = ToolRegistry()
+    import framework.agents.queen as _queen_pkg
+
+    queen_pkg_dir = Path(_queen_pkg.__file__).parent
+    mcp_config = queen_pkg_dir / "mcp_servers.json"
+    if mcp_config.exists():
+        try:
+            queen_registry.load_mcp_config(mcp_config)
+            logger.info("Queen: loaded MCP tools from %s", mcp_config)
+        except Exception:
+            logger.warning("Queen: MCP config failed to load", exc_info=True)
+
+    # ---- Phase state --------------------------------------------------
+    initial_phase = "staging" if worker_identity else "planning"
+    phase_state = QueenPhaseState(phase=initial_phase, event_bus=session.event_bus)
+    session.phase_state = phase_state
+
+    # ---- Lifecycle tools (always registered) --------------------------
+    register_queen_lifecycle_tools(
+        queen_registry,
+        session=session,
+        session_id=session.id,
+        session_manager=session_manager,
+        manager_session_id=session.id,
+        phase_state=phase_state,
+    )
+
+    # ---- Monitoring tools (only when worker is loaded) ----------------
+    if session.worker_runtime:
+        from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
+
+        register_worker_monitoring_tools(
+            queen_registry,
+            session.event_bus,
+            session.worker_path,
+            stream_id="queen",
+            worker_graph_id=session.worker_runtime._graph_id,
+        )
+
+    queen_tools = list(queen_registry.get_tools().values())
+    queen_tool_executor = queen_registry.get_executor()
+
+    # ---- Partition tools by phase ------------------------------------
+    planning_names = set(_QUEEN_PLANNING_TOOLS)
+    building_names = set(_QUEEN_BUILDING_TOOLS)
+    staging_names = set(_QUEEN_STAGING_TOOLS)
+    running_names = set(_QUEEN_RUNNING_TOOLS)
+
+    registered_names = {t.name for t in queen_tools}
+    missing_building = building_names - registered_names
+    if missing_building:
+        logger.warning(
+            "Queen: %d/%d building tools NOT registered: %s",
+            len(missing_building),
+            len(building_names),
+            sorted(missing_building),
+        )
+    logger.info("Queen: registered tools: %s", sorted(registered_names))
+
+    phase_state.planning_tools = [t for t in queen_tools if t.name in planning_names]
+    phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
+    phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
+    phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
+
+    # ---- Compose phase-specific prompts ------------------------------
+    _orig_node = _queen_graph.nodes[0]
+
+    if worker_identity is None:
+        worker_identity = (
+            "\n\n# Worker Profile\n"
+            "No worker agent loaded. You are operating independently.\n"
+            "Handle all tasks directly using your coding tools."
+        )
+
+    _planning_body = (
+        _queen_style
+        + _shared_building_knowledge
+        + _queen_tools_planning
+        + _queen_behavior_always
+        + _queen_behavior_planning
+        + _planning_knowledge
+        + worker_identity
+    )
+    phase_state.prompt_planning = _queen_identity_planning + _planning_body
+
+    _building_body = (
+        _queen_style
+        + _shared_building_knowledge
+        + _queen_tools_building
+        + _queen_behavior_always
+        + _queen_behavior_building
+        + _building_knowledge
+        + _queen_phase_7
+        + _appendices
+        + worker_identity
+    )
+    phase_state.prompt_building = _queen_identity_building + _building_body
+    phase_state.prompt_staging = (
+        _queen_identity_staging
+        + _queen_style
+        + _queen_tools_staging
+        + _queen_behavior_always
+        + _queen_behavior_staging
+        + worker_identity
+    )
+    phase_state.prompt_running = (
+        _queen_identity_running
+        + _queen_style
+        + _queen_tools_running
+        + _queen_behavior_always
+        + _queen_behavior_running
+        + worker_identity
+    )
+
+    # ---- Persona hook ------------------------------------------------
+    _session_llm = session.llm
+    _session_event_bus = session.event_bus
+
+    async def _persona_hook(ctx: HookContext) -> HookResult | None:
+        persona = await select_expert_persona(ctx.trigger or "", _session_llm)
+        if not persona:
+            return None
+        if _session_event_bus is not None:
+            await _session_event_bus.publish(
+                AgentEvent(
+                    type=EventType.QUEEN_PERSONA_SELECTED,
+                    stream_id="queen",
+                    data={"persona": persona},
+                )
+            )
+        body = _planning_body if phase_state.phase == "planning" else _building_body
+        return HookResult(system_prompt=persona + "\n\n" + body)
+
+    # ---- Graph preparation -------------------------------------------
+    initial_prompt_text = phase_state.get_current_prompt()
+
+    registered_tool_names = set(queen_registry.get_tools().keys())
+    declared_tools = _orig_node.tools or []
+    available_tools = [t for t in declared_tools if t in registered_tool_names]
+
+    node_updates: dict = {
+        "system_prompt": initial_prompt_text,
+    }
+    if set(available_tools) != set(declared_tools):
+        missing = sorted(set(declared_tools) - registered_tool_names)
+        if missing:
+            logger.warning("Queen: tools not available: %s", missing)
+        node_updates["tools"] = available_tools
+
+    adjusted_node = _orig_node.model_copy(update=node_updates)
+    _queen_loop_config = {
+        **(_queen_graph.loop_config or {}),
+        "hooks": {"session_start": [_persona_hook]},
+    }
+    queen_graph = _queen_graph.model_copy(
+        update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
+    )
+
+    # ---- Queen event loop --------------------------------------------
+    queen_runtime = Runtime(hive_home / "queen")
+
+    async def _queen_loop():
+        try:
+            executor = GraphExecutor(
+                runtime=queen_runtime,
+                llm=session.llm,
+                tools=queen_tools,
+                tool_executor=queen_tool_executor,
+                event_bus=session.event_bus,
+                stream_id="queen",
+                storage_path=queen_dir,
+                loop_config=_queen_loop_config,
+                execution_id=session.id,
+                dynamic_tools_provider=phase_state.get_current_tools,
+                dynamic_prompt_provider=phase_state.get_current_prompt,
+            )
+            session.queen_executor = executor
+
+            # Wire inject_notification so phase switches notify the queen LLM
+            async def _inject_phase_notification(content: str) -> None:
+                node = executor.node_registry.get("queen")
+                if node is not None and hasattr(node, "inject_event"):
+                    await node.inject_event(content)
+
+            phase_state.inject_notification = _inject_phase_notification
+
+            # Auto-switch to staging when worker execution finishes
+            async def _on_worker_done(event):
+                if event.stream_id == "queen":
+                    return
+                if phase_state.phase == "running":
+                    if event.type == EventType.EXECUTION_COMPLETED:
+                        output = event.data.get("output", {})
+                        output_summary = ""
+                        if output:
+                            for key, value in output.items():
+                                val_str = str(value)
+                                if len(val_str) > 200:
+                                    val_str = val_str[:200] + "..."
+                                output_summary += f"\n  {key}: {val_str}"
+                        _out = output_summary or " (no output keys set)"
+                        notification = (
+                            "[WORKER_TERMINAL] Worker finished successfully.\n"
+                            f"Output:{_out}\n"
+                            "Report this to the user. "
+                            "Ask if they want to continue with another run."
+                        )
+                    else:  # EXECUTION_FAILED
+                        error = event.data.get("error", "Unknown error")
+                        notification = (
+                            "[WORKER_TERMINAL] Worker failed.\n"
+                            f"Error: {error}\n"
+                            "Report this to the user and help them troubleshoot."
+                        )
+
+                    node = executor.node_registry.get("queen")
+                    if node is not None and hasattr(node, "inject_event"):
+                        await node.inject_event(notification)
+
+                    await phase_state.switch_to_staging(source="auto")
+
+            session.event_bus.subscribe(
+                event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
+                handler=_on_worker_done,
+            )
+            session_manager._subscribe_worker_handoffs(session, executor)
+
+            logger.info(
+                "Queen starting in %s phase with %d tools: %s",
+                phase_state.phase,
+                len(phase_state.get_current_tools()),
+                [t.name for t in phase_state.get_current_tools()],
+            )
+            result = await executor.execute(
+                graph=queen_graph,
+                goal=queen_goal,
+                input_data={"greeting": initial_prompt or "Session started."},
+                session_state={"resume_session_id": session.id},
+            )
+            if result.success:
+                logger.warning("Queen executor returned (should be forever-alive)")
+            else:
+                logger.error(
+                    "Queen executor failed: %s",
+                    result.error or "(no error message)",
+                )
+        except Exception:
+            logger.error("Queen conversation crashed", exc_info=True)
+        finally:
+            session.queen_executor = None
+
+    return asyncio.create_task(_queen_loop())
diff --git a/core/framework/server/routes_execution.py b/core/framework/server/routes_execution.py
index 1f77390d..aa0993be 100644
--- a/core/framework/server/routes_execution.py
+++ b/core/framework/server/routes_execution.py
@@ -347,7 +347,7 @@ async def handle_pause(request: web.Request) -> web.Response:
 
             for exec_id in list(stream.active_execution_ids):
                 try:
-                    ok = await stream.cancel_execution(exec_id)
+                    ok = await stream.cancel_execution(exec_id, reason="Execution paused by user")
                     if ok:
                         cancelled.append(exec_id)
                 except Exception:
@@ -357,8 +357,8 @@ async def handle_pause(request: web.Request) -> web.Response:
     runtime.pause_timers()
 
     # Switch to staging (agent still loaded, ready to re-run)
-    if session.mode_state is not None:
-        await session.mode_state.switch_to_staging(source="frontend")
+    if session.phase_state is not None:
+        await session.phase_state.switch_to_staging(source="frontend")
 
     return web.json_response(
         {
@@ -400,7 +400,9 @@ async def handle_stop(request: web.Request) -> web.Response:
                     if hasattr(node, "cancel_current_turn"):
                         node.cancel_current_turn()
 
-            cancelled = await stream.cancel_execution(execution_id)
+            cancelled = await stream.cancel_execution(
+                execution_id, reason="Execution stopped by user"
+            )
             if cancelled:
                 # Cancel queen's in-progress LLM turn
                 if session.queen_executor:
diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py
index 0a3f616a..b16646ba 100644
--- a/core/framework/server/routes_sessions.py
+++ b/core/framework/server/routes_sessions.py
@@ -61,7 +61,7 @@ def _session_to_live_dict(session) -> dict:
         "loaded_at": session.loaded_at,
         "uptime_seconds": round(time.time() - session.loaded_at, 1),
         "intro_message": getattr(session.runner, "intro_message", "") or "",
-        "queen_phase": phase_state.phase if phase_state else "building",
+        "queen_phase": phase_state.phase if phase_state else "planning",
     }
 
 
diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py
index 0b21ca39..fb82d840 100644
--- a/core/framework/server/session_manager.py
+++ b/core/framework/server/session_manager.py
@@ -46,6 +46,8 @@ class Session:
     judge_task: asyncio.Task | None = None
     escalation_sub: str | None = None
     worker_handoff_sub: str | None = None
+    # Memory consolidation subscription (fires on CONTEXT_COMPACTED)
+    memory_consolidation_sub: str | None = None
     # Session directory resumption:
     # When set, _start_queen writes queen conversations to this existing session's
     # directory instead of creating a new one.  This lets cold-restores accumulate
@@ -325,9 +327,9 @@ class SessionManager:
             model=model,
         )
 
-        # Notify queen about the loaded worker (skip for hive_coder itself).
+        # Notify queen about the loaded worker (skip for queen itself).
         # Health judge disabled for simplicity.
-        if agent_path.name != "hive_coder" and session.worker_runtime:
+        if agent_path.name != "queen" and session.worker_runtime:
             # await self._start_judge(session, session.runner._storage_path)
             await self._notify_queen_worker_loaded(session)
 
@@ -379,6 +381,11 @@ class SessionManager:
         if session is None:
             return False
 
+        # Capture session data for memory consolidation before teardown
+        _llm = getattr(session, "llm", None)
+        _storage_id = getattr(session, "queen_resume_from", None) or session_id
+        _session_dir = Path.home() / ".hive" / "queen" / "session" / _storage_id
+
         # Stop judge
         self._stop_judge(session)
         if session.worker_handoff_sub is not None:
@@ -388,7 +395,13 @@ class SessionManager:
                 pass
             session.worker_handoff_sub = None
 
-        # Stop queen
+        # Stop queen and memory consolidation subscription
+        if session.memory_consolidation_sub is not None:
+            try:
+                session.event_bus.unsubscribe(session.memory_consolidation_sub)
+            except Exception:
+                pass
+            session.memory_consolidation_sub = None
         if session.queen_task is not None:
             session.queen_task.cancel()
             session.queen_task = None
@@ -401,6 +414,17 @@ class SessionManager:
             except Exception as e:
                 logger.error("Error cleaning up worker: %s", e)
 
+        # Final memory consolidation — fire-and-forget so teardown isn't blocked.
+        if _llm is not None and _session_dir.exists():
+            import asyncio
+
+            from framework.agents.queen.queen_memory import consolidate_queen_memory
+
+            asyncio.create_task(
+                consolidate_queen_memory(session_id, _session_dir, _llm),
+                name=f"queen-memory-consolidation-{session_id}",
+            )
+
         logger.info("Session '%s' stopped", session_id)
         return True
 
@@ -461,13 +485,7 @@ class SessionManager:
         are written to the ORIGINAL session's directory so the full conversation
         history accumulates in one place across server restarts.
         """
-        from framework.agents.hive_coder.agent import (
-            queen_goal,
-            queen_graph as _queen_graph,
-        )
-        from framework.graph.executor import GraphExecutor
-        from framework.runner.tool_registry import ToolRegistry
-        from framework.runtime.core import Runtime
+        from framework.server.queen_orchestrator import create_queen
 
         hive_home = Path.home() / ".hive"
 
@@ -505,284 +523,33 @@ class SessionManager:
         except OSError:
             pass
 
-        # Register MCP coding tools
-        queen_registry = ToolRegistry()
-        import framework.agents.hive_coder as _hive_coder_pkg
-
-        hive_coder_dir = Path(_hive_coder_pkg.__file__).parent
-        mcp_config = hive_coder_dir / "mcp_servers.json"
-        if mcp_config.exists():
-            try:
-                queen_registry.load_mcp_config(mcp_config)
-                logger.info("Queen: loaded MCP tools from %s", mcp_config)
-            except Exception:
-                logger.warning("Queen: MCP config failed to load", exc_info=True)
-
-        # Phase state for building/running phase switching
-        from framework.tools.queen_lifecycle_tools import (
-            QueenPhaseState,
-            register_queen_lifecycle_tools,
-        )
-
-        # Start in staging when the caller provided an agent, building otherwise.
-        initial_phase = "staging" if worker_identity else "building"
-        phase_state = QueenPhaseState(phase=initial_phase, event_bus=session.event_bus)
-        session.phase_state = phase_state
-
-        # Always register lifecycle tools — they check session.worker_runtime
-        # at call time, so they work even if no worker is loaded yet.
-        register_queen_lifecycle_tools(
-            queen_registry,
+        session.queen_task = await create_queen(
             session=session,
-            session_id=session.id,
             session_manager=self,
-            manager_session_id=session.id,
-            phase_state=phase_state,
+            worker_identity=worker_identity,
+            queen_dir=queen_dir,
+            initial_prompt=initial_prompt,
         )
 
-        # Monitoring tools need concrete worker paths — only register when present
-        if session.worker_runtime:
-            from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
+        # Memory consolidation — triggered by context compaction events.
+        # Compaction is a natural signal that "enough has happened to be worth remembering".
+        _consolidation_llm = session.llm
+        _consolidation_session_dir = queen_dir
 
-            register_worker_monitoring_tools(
-                queen_registry,
-                session.event_bus,
-                session.worker_path,
-                stream_id="queen",
-                worker_graph_id=session.worker_runtime._graph_id,
+        async def _on_compaction(_event) -> None:
+            from framework.agents.queen.queen_memory import consolidate_queen_memory
+
+            await consolidate_queen_memory(
+                session.id, _consolidation_session_dir, _consolidation_llm
             )
 
-        queen_tools = list(queen_registry.get_tools().values())
-        queen_tool_executor = queen_registry.get_executor()
+        from framework.runtime.event_bus import EventType as _ET
 
-        # Partition tools into phase-specific sets and import prompt segments
-        from framework.agents.hive_coder.nodes import (
-            _QUEEN_BUILDING_TOOLS,
-            _QUEEN_RUNNING_TOOLS,
-            _QUEEN_STAGING_TOOLS,
-            _appendices,
-            _gcu_building_section,
-            _package_builder_knowledge,
-            _queen_behavior_always,
-            _queen_behavior_building,
-            _queen_behavior_running,
-            _queen_behavior_staging,
-            _queen_identity_building,
-            _queen_identity_running,
-            _queen_identity_staging,
-            _queen_phase_7,
-            _queen_style,
-            _queen_tools_building,
-            _queen_tools_running,
-            _queen_tools_staging,
+        session.memory_consolidation_sub = session.event_bus.subscribe(
+            event_types=[_ET.CONTEXT_COMPACTED],
+            handler=_on_compaction,
         )
 
-        building_names = set(_QUEEN_BUILDING_TOOLS)
-        staging_names = set(_QUEEN_STAGING_TOOLS)
-        running_names = set(_QUEEN_RUNNING_TOOLS)
-
-        registered_names = {t.name for t in queen_tools}
-        missing_building = building_names - registered_names
-        if missing_building:
-            logger.warning(
-                "Queen: %d/%d building tools NOT registered: %s",
-                len(missing_building),
-                len(building_names),
-                sorted(missing_building),
-            )
-        logger.info("Queen: registered tools: %s", sorted(registered_names))
-
-        phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
-        phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
-        phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
-
-        # Build queen graph with adjusted prompt + tools
-        _orig_node = _queen_graph.nodes[0]
-
-        if worker_identity is None:
-            worker_identity = (
-                "\n\n# Worker Profile\n"
-                "No worker agent loaded. You are operating independently.\n"
-                "Handle all tasks directly using your coding tools."
-            )
-
-        # Compose phase-specific prompts.
-        _building_body = (
-            _queen_style
-            + _queen_tools_building
-            + _queen_behavior_always
-            + _queen_behavior_building
-            + _package_builder_knowledge
-            + _gcu_building_section
-            + _queen_phase_7
-            + _appendices
-            + worker_identity
-        )
-        phase_state.prompt_building = _queen_identity_building + _building_body
-        phase_state.prompt_staging = (
-            _queen_identity_staging
-            + _queen_style
-            + _queen_tools_staging
-            + _queen_behavior_always
-            + _queen_behavior_staging
-            + worker_identity
-        )
-        phase_state.prompt_running = (
-            _queen_identity_running
-            + _queen_style
-            + _queen_tools_running
-            + _queen_behavior_always
-            + _queen_behavior_running
-            + worker_identity
-        )
-
-        # Build the session_start hook: selects the best-fit expert persona
-        # from the user's opening message and replaces the identity prefix.
-        from framework.agents.hive_coder.nodes.thinking_hook import select_expert_persona
-        from framework.graph.event_loop_node import HookContext, HookResult
-        from framework.runtime.event_bus import AgentEvent, EventType
-
-        _session_llm = session.llm
-        _session_event_bus = session.event_bus
-
-        async def _persona_hook(ctx: HookContext) -> HookResult | None:
-            persona = await select_expert_persona(ctx.trigger or "", _session_llm)
-            if not persona:
-                return None
-            if _session_event_bus is not None:
-                await _session_event_bus.publish(
-                    AgentEvent(
-                        type=EventType.QUEEN_PERSONA_SELECTED,
-                        stream_id="queen",
-                        data={"persona": persona},
-                    )
-                )
-            return HookResult(system_prompt=persona + "\n\n" + _building_body)
-
-        initial_prompt_text = phase_state.get_current_prompt()
-
-        registered_tool_names = set(queen_registry.get_tools().keys())
-        declared_tools = _orig_node.tools or []
-        available_tools = [t for t in declared_tools if t in registered_tool_names]
-
-        node_updates: dict = {
-            "system_prompt": initial_prompt_text,
-        }
-        if set(available_tools) != set(declared_tools):
-            missing = sorted(set(declared_tools) - registered_tool_names)
-            if missing:
-                logger.warning("Queen: tools not available: %s", missing)
-            node_updates["tools"] = available_tools
-
-        adjusted_node = _orig_node.model_copy(update=node_updates)
-        _queen_loop_config = {
-            **(_queen_graph.loop_config or {}),
-            "hooks": {"session_start": [_persona_hook]},
-        }
-        queen_graph = _queen_graph.model_copy(
-            update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
-        )
-
-        queen_runtime = Runtime(hive_home / "queen")
-
-        async def _queen_loop():
-            try:
-                executor = GraphExecutor(
-                    runtime=queen_runtime,
-                    llm=session.llm,
-                    tools=queen_tools,
-                    tool_executor=queen_tool_executor,
-                    event_bus=session.event_bus,
-                    stream_id="queen",
-                    storage_path=queen_dir,
-                    loop_config=_queen_loop_config,
-                    execution_id=session.id,
-                    dynamic_tools_provider=phase_state.get_current_tools,
-                    dynamic_prompt_provider=phase_state.get_current_prompt,
-                )
-                session.queen_executor = executor
-
-                # Wire inject_notification so phase switches notify the queen LLM
-                async def _inject_phase_notification(content: str) -> None:
-                    node = executor.node_registry.get("queen")
-                    if node is not None and hasattr(node, "inject_event"):
-                        await node.inject_event(content)
-
-                phase_state.inject_notification = _inject_phase_notification
-
-                # Auto-switch to staging when worker execution finishes naturally
-                # and notify the queen about the termination
-                from framework.runtime.event_bus import EventType as _ET
-
-                async def _on_worker_done(event):
-                    if event.stream_id == "queen":
-                        return
-                    if phase_state.phase == "running":
-                        # Build termination notification for the queen
-                        if event.type == _ET.EXECUTION_COMPLETED:
-                            output = event.data.get("output", {})
-                            output_summary = ""
-                            if output:
-                                # Summarize key outputs for the queen
-                                for key, value in output.items():
-                                    val_str = str(value)
-                                    if len(val_str) > 200:
-                                        val_str = val_str[:200] + "..."
-                                    output_summary += f"\n  {key}: {val_str}"
-                            _out = output_summary or " (no output keys set)"
-                            notification = (
-                                "[WORKER_TERMINAL] Worker finished successfully.\n"
-                                f"Output:{_out}\n"
-                                "Report this to the user. "
-                                "Ask if they want to continue with another run."
-                            )
-                        else:  # EXECUTION_FAILED
-                            error = event.data.get("error", "Unknown error")
-                            notification = (
-                                "[WORKER_TERMINAL] Worker failed.\n"
-                                f"Error: {error}\n"
-                                "Report this to the user and help them troubleshoot."
-                            )
-
-                        # Inject notification to queen before phase switch
-                        node = executor.node_registry.get("queen")
-                        if node is not None and hasattr(node, "inject_event"):
-                            await node.inject_event(notification)
-
-                        await phase_state.switch_to_staging(source="auto")
-
-                session.event_bus.subscribe(
-                    event_types=[_ET.EXECUTION_COMPLETED, _ET.EXECUTION_FAILED],
-                    handler=_on_worker_done,
-                )
-                self._subscribe_worker_handoffs(session, executor)
-
-                logger.info(
-                    "Queen starting in %s phase with %d tools: %s",
-                    phase_state.phase,
-                    len(phase_state.get_current_tools()),
-                    [t.name for t in phase_state.get_current_tools()],
-                )
-                result = await executor.execute(
-                    graph=queen_graph,
-                    goal=queen_goal,
-                    input_data={"greeting": initial_prompt or "Session started."},
-                    session_state={"resume_session_id": session.id},
-                )
-                if result.success:
-                    logger.warning("Queen executor returned (should be forever-alive)")
-                else:
-                    logger.error(
-                        "Queen executor failed: %s",
-                        result.error or "(no error message)",
-                    )
-            except Exception:
-                logger.error("Queen conversation crashed", exc_info=True)
-            finally:
-                session.queen_executor = None
-
-        session.queen_task = asyncio.create_task(_queen_loop())
-
     # ------------------------------------------------------------------
     # Judge startup / teardown
     # ------------------------------------------------------------------
diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py
index 0549be61..d257f650 100644
--- a/core/framework/tools/queen_lifecycle_tools.py
+++ b/core/framework/tools/queen_lifecycle_tools.py
@@ -71,12 +71,13 @@ class WorkerSessionAdapter:
 class QueenPhaseState:
     """Mutable state container for queen operating phase.
 
-    Three phases: building → staging → running.
+    Four phases: planning → building → staging → running.
     Shared between the dynamic_tools_provider callback and tool handlers
     that trigger phase transitions.
     """
 
-    phase: str = "building"  # "building", "staging", or "running"
+    phase: str = "building"  # "planning", "building", "staging", or "running"
+    planning_tools: list = field(default_factory=list)  # list[Tool]
     building_tools: list = field(default_factory=list)  # list[Tool]
     staging_tools: list = field(default_factory=list)  # list[Tool]
     running_tools: list = field(default_factory=list)  # list[Tool]
@@ -84,12 +85,15 @@ class QueenPhaseState:
     event_bus: Any = None  # EventBus — for emitting QUEEN_PHASE_CHANGED events
 
     # Phase-specific prompts (set by session_manager after construction)
+    prompt_planning: str = ""
     prompt_building: str = ""
     prompt_staging: str = ""
     prompt_running: str = ""
 
     def get_current_tools(self) -> list:
         """Return tools for the current phase."""
+        if self.phase == "planning":
+            return list(self.planning_tools)
         if self.phase == "running":
             return list(self.running_tools)
         if self.phase == "staging":
@@ -98,6 +102,8 @@ class QueenPhaseState:
 
     def get_current_prompt(self) -> str:
         """Return the system prompt for the current phase."""
+        if self.phase == "planning":
+            return self.prompt_planning
         if self.phase == "running":
             return self.prompt_running
         if self.phase == "staging":
@@ -128,22 +134,15 @@ class QueenPhaseState:
         tool_names = [t.name for t in self.running_tools]
         logger.info("Queen phase → running (source=%s, tools: %s)", source, tool_names)
         await self._emit_phase_event()
-        if self.inject_notification:
-            if source == "frontend":
-                msg = (
-                    "[PHASE CHANGE] The user clicked Run in the UI. Switched to RUNNING phase. "
-                    "Worker is now executing. You have monitoring/lifecycle tools: "
-                    + ", ".join(tool_names)
-                    + "."
-                )
-            else:
-                msg = (
-                    "[PHASE CHANGE] Switched to RUNNING phase. "
-                    "Worker is executing. You now have monitoring/lifecycle tools: "
-                    + ", ".join(tool_names)
-                    + "."
-                )
-            await self.inject_notification(msg)
+        # Skip notification when source="tool" — the tool result already
+        # contains the phase change info.
+        if self.inject_notification and source != "tool":
+            await self.inject_notification(
+                "[PHASE CHANGE] The user clicked Run in the UI. Switched to RUNNING phase. "
+                "Worker is now executing. You have monitoring/lifecycle tools: "
+                + ", ".join(tool_names)
+                + "."
+            )
 
     async def switch_to_staging(self, source: str = "tool") -> None:
         """Switch to staging phase and notify the queen.
@@ -157,26 +156,21 @@ class QueenPhaseState:
         tool_names = [t.name for t in self.staging_tools]
         logger.info("Queen phase → staging (source=%s, tools: %s)", source, tool_names)
         await self._emit_phase_event()
-        if self.inject_notification:
+        # Skip notification when source="tool" — the tool result already
+        # contains the phase change info.
+        if self.inject_notification and source != "tool":
             if source == "frontend":
                 msg = (
                     "[PHASE CHANGE] The user stopped the worker from the UI. "
                     "Switched to STAGING phase. Agent is still loaded. "
                     "Available tools: " + ", ".join(tool_names) + "."
                 )
-            elif source == "auto":
+            else:
                 msg = (
                     "[PHASE CHANGE] Worker execution completed. Switched to STAGING phase. "
                     "Agent is still loaded. Call run_agent_with_input(task) to run again. "
                     "Available tools: " + ", ".join(tool_names) + "."
                 )
-            else:
-                msg = (
-                    "[PHASE CHANGE] Switched to STAGING phase. "
-                    "Agent loaded and ready. Call run_agent_with_input(task) to start, "
-                    "or stop_worker_and_edit() to go back to building. "
-                    "Available tools: " + ", ".join(tool_names) + "."
-                )
             await self.inject_notification(msg)
 
     async def switch_to_building(self, source: str = "tool") -> None:
@@ -191,13 +185,35 @@ class QueenPhaseState:
         tool_names = [t.name for t in self.building_tools]
         logger.info("Queen phase → building (source=%s, tools: %s)", source, tool_names)
         await self._emit_phase_event()
-        if self.inject_notification:
+        if self.inject_notification and source != "tool":
             await self.inject_notification(
                 "[PHASE CHANGE] Switched to BUILDING phase. "
                 "Lifecycle tools removed. Full coding tools restored. "
                 "Call load_built_agent(path) when ready to stage."
             )
 
+    async def switch_to_planning(self, source: str = "tool") -> None:
+        """Switch to planning phase and notify the queen.
+
+        Args:
+            source: Who triggered the switch — "tool", "frontend", or "auto".
+        """
+        if self.phase == "planning":
+            return
+        self.phase = "planning"
+        tool_names = [t.name for t in self.planning_tools]
+        logger.info("Queen phase → planning (source=%s, tools: %s)", source, tool_names)
+        await self._emit_phase_event()
+        # Skip notification when source="tool" — the tool result already
+        # contains the phase change info; injecting a duplicate notification
+        # causes the queen to respond twice.
+        if self.inject_notification and source != "tool":
+            await self.inject_notification(
+                "[PHASE CHANGE] Switched to PLANNING phase. "
+                "Coding tools removed. Discuss goals and design with the user. "
+                "Available tools: " + ", ".join(tool_names) + "."
+            )
+
 
 def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str:
     """Build a worker capability profile from its graph/goal definition.
@@ -423,7 +439,7 @@ def register_queen_lifecycle_tools(
 
     # --- stop_worker ----------------------------------------------------------
 
-    async def stop_worker() -> str:
+    async def stop_worker(*, reason: str = "Stopped by queen") -> str:
         """Cancel all active worker executions across all graphs.
 
         Stops the worker immediately. Returns the IDs of cancelled executions.
@@ -453,7 +469,7 @@ def register_queen_lifecycle_tools(
 
                 for exec_id in list(stream.active_execution_ids):
                     try:
-                        ok = await stream.cancel_execution(exec_id)
+                        ok = await stream.cancel_execution(exec_id, reason=reason)
                         if ok:
                             cancelled.append(exec_id)
                     except Exception as e:
@@ -498,6 +514,11 @@ def register_queen_lifecycle_tools(
             "Use your coding tools to modify the agent, then call "
             "load_built_agent(path) to stage it again."
         )
+        # Nudge the queen to start coding instead of blocking for user input.
+        if phase_state is not None and phase_state.inject_notification:
+            await phase_state.inject_notification(
+                "[PHASE CHANGE] Switched to BUILDING phase. Start implementing the changes now."
+            )
         return json.dumps(result)
 
     _stop_edit_tool = Tool(
@@ -514,6 +535,171 @@ def register_queen_lifecycle_tools(
     )
     tools_registered += 1
 
+    # --- stop_worker_and_plan (Running/Staging → Planning) --------------------
+
+    async def stop_worker_and_plan() -> str:
+        """Stop the worker and switch to planning phase for diagnosis."""
+        stop_result = await stop_worker()
+
+        # Switch to planning phase
+        if phase_state is not None:
+            await phase_state.switch_to_planning(source="tool")
+
+        result = json.loads(stop_result)
+        result["phase"] = "planning"
+        result["message"] = (
+            "Worker stopped. You are now in planning phase. "
+            "Diagnose the issue using read-only tools (checkpoints, logs, sessions), "
+            "discuss a fix plan with the user, then call "
+            "initialize_and_build_agent() to implement the fix."
+        )
+        return json.dumps(result)
+
+    _stop_plan_tool = Tool(
+        name="stop_worker_and_plan",
+        description=(
+            "Stop the worker and switch to planning phase for diagnosis. "
+            "Use this when you need to investigate an issue before fixing it. "
+            "After diagnosis, call initialize_and_build_agent() to switch to building."
+        ),
+        parameters={"type": "object", "properties": {}},
+    )
+    registry.register(
+        "stop_worker_and_plan", _stop_plan_tool, lambda inputs: stop_worker_and_plan()
+    )
+    tools_registered += 1
+
+    # --- replan_agent (Building → Planning) -----------------------------------
+
+    async def replan_agent() -> str:
+        """Switch from building back to planning phase.
+        Only use when the user explicitly asks to re-plan."""
+        if phase_state is not None:
+            if phase_state.phase != "building":
+                return json.dumps(
+                    {"error": f"Cannot replan: currently in {phase_state.phase} phase."}
+                )
+            await phase_state.switch_to_planning(source="tool")
+        return json.dumps(
+            {
+                "status": "replanning",
+                "phase": "planning",
+                "message": (
+                    "Switched to PLANNING phase. Coding tools removed. "
+                    "Discuss the new design with the user."
+                ),
+            }
+        )
+
+    _replan_tool = Tool(
+        name="replan_agent",
+        description=(
+            "Switch from building back to planning phase. "
+            "Only use when the user explicitly asks to re-plan or redesign the agent."
+        ),
+        parameters={"type": "object", "properties": {}},
+    )
+    registry.register("replan_agent", _replan_tool, lambda inputs: replan_agent())
+    tools_registered += 1
+
+    # --- initialize_and_build_agent wrapper (Planning → Building) -------------
+    # With agent_name: scaffold a new agent via MCP tool, then switch to building.
+    # Without agent_name: just switch to building (for fixing an existing loaded agent).
+
+    _existing_init = registry._tools.get("initialize_and_build_agent")
+    if _existing_init is not None:
+        _orig_init_executor = _existing_init.executor
+
+        async def initialize_and_build_agent_wrapper(inputs: dict) -> str:
+            """Wrapper: scaffold or just switch to building phase."""
+            agent_name = (inputs.get("agent_name") or "").strip()
+
+            # No agent_name → try to fall back to the session's current agent,
+            # or fail with actionable guidance.
+            if not agent_name:
+                # Try to resolve agent_name from the current session
+                fallback_path = getattr(session, "worker_path", None)
+                if fallback_path is not None:
+                    agent_name = Path(fallback_path).name
+                else:
+                    # Server path: check SessionManager
+                    if session_manager is not None and manager_session_id:
+                        srv_session = session_manager.get_session(manager_session_id)
+                        if srv_session and getattr(srv_session, "worker_path", None):
+                            fallback_path = srv_session.worker_path
+                            agent_name = Path(fallback_path).name
+
+                if not agent_name:
+                    return json.dumps(
+                        {
+                            "error": (
+                                "No agent_name provided and no agent loaded in this session. "
+                                "To fix: call list_agents() to find the agent name, then call "
+                                "initialize_and_build_agent(agent_name='<name>') to scaffold it."
+                            )
+                        }
+                    )
+
+                # Fall back succeeded — switch to building without scaffolding
+                logger.info(
+                    "initialize_and_build_agent: no agent_name provided, "
+                    "falling back to session agent '%s'",
+                    agent_name,
+                )
+                if phase_state is not None:
+                    await phase_state.switch_to_building(source="tool")
+                    if phase_state.inject_notification:
+                        await phase_state.inject_notification(
+                            "[PHASE CHANGE] Switched to BUILDING phase. "
+                            "Start implementing the fix now."
+                        )
+                return json.dumps(
+                    {
+                        "status": "editing",
+                        "phase": "building",
+                        "agent_name": agent_name,
+                        "warning": (
+                            f"No agent_name provided — using session agent '{agent_name}'. "
+                            f"Agent files are at exports/{agent_name}/."
+                        ),
+                        "message": (
+                            "Switched to BUILDING phase. Full coding tools restored. "
+                            "Implement the fix, then call load_built_agent(path) to reload."
+                        ),
+                    }
+                )
+
+            # Has agent_name → scaffold via MCP tool
+            result = _orig_init_executor(inputs)
+            # Handle both sync and async executors
+            if asyncio.iscoroutine(result) or asyncio.isfuture(result):
+                result = await result
+            # If result is a ToolResult, extract the text content
+            result_str = str(result)
+            if hasattr(result, "content"):
+                result_str = str(result.content)
+            try:
+                parsed = json.loads(result_str)
+                if parsed.get("success", True):
+                    if phase_state is not None:
+                        await phase_state.switch_to_building(source="tool")
+                        # Inject a continuation message so the queen starts
+                        # building immediately instead of blocking for user input.
+                        if phase_state.inject_notification:
+                            await phase_state.inject_notification(
+                                "[PHASE CHANGE] Agent scaffolded and switched to BUILDING phase. "
+                                "Start implementing the agent nodes now."
+                            )
+            except (json.JSONDecodeError, KeyError, TypeError):
+                pass
+            return result_str
+
+        registry.register(
+            "initialize_and_build_agent",
+            _existing_init.tool,
+            lambda inputs: initialize_and_build_agent_wrapper(inputs),
+        )
+
     # --- stop_worker (Running → Staging) -------------------------------------
 
     async def stop_worker_to_staging() -> str:
@@ -1429,6 +1615,51 @@ def register_queen_lifecycle_tools(
             if not resolved_path.exists():
                 return json.dumps({"error": f"Agent path does not exist: {agent_path}"})
 
+            # Pre-check: verify the module exports goal/nodes/edges before
+            # attempting the full load.  This gives the queen an actionable
+            # error message instead of a cryptic ImportError or TypeError.
+            try:
+                import importlib
+                import sys as _sys
+
+                pkg_name = resolved_path.name
+                parent_dir = str(resolved_path.resolve().parent)
+                # Temporarily put parent on sys.path for import
+                if parent_dir not in _sys.path:
+                    _sys.path.insert(0, parent_dir)
+                # Evict stale cached modules
+                stale = [n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.")]
+                for n in stale:
+                    del _sys.modules[n]
+
+                mod = importlib.import_module(pkg_name)
+                missing_attrs = [
+                    attr for attr in ("goal", "nodes", "edges") if getattr(mod, attr, None) is None
+                ]
+                if missing_attrs:
+                    return json.dumps(
+                        {
+                            "error": (
+                                f"Agent module '{pkg_name}' is missing module-level "
+                                f"attributes: {', '.join(missing_attrs)}. "
+                                f"Fix: in {pkg_name}/__init__.py, add "
+                                f"'from .agent import {', '.join(missing_attrs)}' "
+                                f"so that 'import {pkg_name}' exposes them at package level."
+                            )
+                        }
+                    )
+            except Exception as pre_err:
+                return json.dumps(
+                    {
+                        "error": (
+                            f"Failed to import agent module '{resolved_path.name}': {pre_err}. "
+                            f"Fix: ensure {resolved_path.name}/__init__.py exists and can be "
+                            f"imported without errors (check syntax, missing dependencies, "
+                            f"and relative imports)."
+                        )
+                    }
+                )
+
             try:
                 updated_session = await session_manager.load_worker(
                     manager_session_id,
@@ -1436,7 +1667,36 @@ def register_queen_lifecycle_tools(
                 )
                 info = updated_session.worker_info
 
-                # Switch to staging phase after successful load
+                # Validate that all tools declared by nodes are registered
+                loaded_runtime = _get_runtime()
+                if loaded_runtime is not None:
+                    available_tool_names = {t.name for t in loaded_runtime._tools}
+                    missing_by_node: dict[str, list[str]] = {}
+                    for node in loaded_runtime.graph.nodes:
+                        if node.tools:
+                            missing = set(node.tools) - available_tool_names
+                            if missing:
+                                missing_by_node[f"{node.name} (id={node.id})"] = sorted(missing)
+                    if missing_by_node:
+                        # Unload the broken worker
+                        try:
+                            await session_manager.unload_worker(manager_session_id)
+                        except Exception:
+                            pass
+                        details = "; ".join(
+                            f"Node '{k}' missing {v}" for k, v in missing_by_node.items()
+                        )
+                        return json.dumps(
+                            {
+                                "error": (
+                                    f"Tool validation failed: {details}. "
+                                    "Fix node tool declarations or add the missing "
+                                    "tools, then try loading again."
+                                )
+                            }
+                        )
+
+                # Switch to staging phase after successful load + validation
                 if phase_state is not None:
                     await phase_state.switch_to_staging()
 
diff --git a/core/framework/tools/queen_memory_tools.py b/core/framework/tools/queen_memory_tools.py
new file mode 100644
index 00000000..baaf543b
--- /dev/null
+++ b/core/framework/tools/queen_memory_tools.py
@@ -0,0 +1,38 @@
+"""Tool for the queen to write to her episodic memory.
+
+The queen can consciously record significant moments during a session — like
+writing in a diary. Semantic memory (MEMORY.md) is updated automatically at
+session end and is never written by the queen directly.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.runner.tool_registry import ToolRegistry
+
+
+def write_to_diary(entry: str) -> str:
+    """Write a prose entry to today's episodic memory.
+
+    Use this when something significant just happened: a pipeline went live, the
+    user shared an important preference, a goal was achieved or abandoned, or
+    you want to record something that should be remembered across sessions.
+
+    Write in first person, as you would in a private diary. Be specific — what
+    happened, how the user responded, what it means going forward. One or two
+    paragraphs is enough.
+
+    You do not need to include a timestamp or date heading; those are added
+    automatically.
+    """
+    from framework.agents.hive_coder.queen_memory import append_episodic_entry
+
+    append_episodic_entry(entry)
+    return "Diary entry recorded."
+
+
+def register_queen_memory_tools(registry: ToolRegistry) -> None:
+    """Register the episodic memory tool into the queen's tool registry."""
+    registry.register_function(write_to_diary)
diff --git a/core/framework/tools/session_graph_tools.py b/core/framework/tools/session_graph_tools.py
index ded49c2e..7341fb50 100644
--- a/core/framework/tools/session_graph_tools.py
+++ b/core/framework/tools/session_graph_tools.py
@@ -1,6 +1,6 @@
 """Graph lifecycle tools for multi-graph sessions.
 
-These tools allow an agent (e.g. hive_coder) to load, unload, start,
+These tools allow an agent (e.g. queen) to load, unload, start,
 restart, and query other agent graphs within the same runtime session.
 
 Usage::
diff --git a/core/framework/tui/app.py b/core/framework/tui/app.py
index 9352a690..0efff469 100644
--- a/core/framework/tui/app.py
+++ b/core/framework/tui/app.py
@@ -445,8 +445,8 @@ class AdenTUI(App):
         agent_name = runner.agent_path.name
         self.notify(f"Agent loaded: {agent_name}", severity="information", timeout=3)
 
-        # Load health judge + queen for worker agents (skip for hive_coder itself)
-        if agent_name != "hive_coder":
+        # Load health judge + queen for worker agents (skip for queen itself)
+        if agent_name != "queen":
             await self._load_judge_and_queen(runner._storage_path)
 
     async def _load_judge_and_queen(self, storage_path) -> None:
@@ -515,18 +515,18 @@ class AdenTUI(App):
             #    worker.  Escalation tickets from the judge are injected
             #    as messages into this conversation.
             # ---------------------------------------------------------------
-            import framework.agents.hive_coder as _hive_coder_pkg
-            from framework.agents.hive_coder.agent import queen_goal, queen_graph
+            import framework.agents.queen as _queen_pkg
+            from framework.agents.queen.agent import queen_goal, queen_graph
 
             # Queen gets lifecycle tools, monitoring tools, AND coding tools
-            # from the hive_coder's coder-tools MCP server.  This spawns a
+            # from the queen's coder-tools MCP server.  This spawns a
             # separate MCP process so the queen can read/write files, run
             # commands, discover tools, etc. independently of the worker.
             queen_registry = ToolRegistry()
 
-            # Coding tools from hive_coder's MCP config (coder_tools_server).
-            hive_coder_dir = Path(_hive_coder_pkg.__file__).parent
-            mcp_config = hive_coder_dir / "mcp_servers.json"
+            # Coding tools from queen's MCP config (coder_tools_server).
+            queen_dir = Path(_queen_pkg.__file__).parent
+            mcp_config = queen_dir / "mcp_servers.json"
             if mcp_config.exists():
                 try:
                     queen_registry.load_mcp_config(mcp_config)
@@ -556,7 +556,7 @@ class AdenTUI(App):
             queen_tool_executor = queen_registry.get_executor()
 
             # Partition tools into phase-specific sets
-            from framework.agents.hive_coder.nodes import (
+            from framework.agents.queen.nodes import (
                 _QUEEN_BUILDING_TOOLS,
                 _QUEEN_RUNNING_TOOLS,
                 _QUEEN_STAGING_TOOLS,
diff --git a/core/frontend/src/api/types.ts b/core/frontend/src/api/types.ts
index c5616b68..3253ab79 100644
--- a/core/frontend/src/api/types.ts
+++ b/core/frontend/src/api/types.ts
@@ -12,8 +12,8 @@ export interface LiveSession {
   loaded_at: number;
   uptime_seconds: number;
   intro_message?: string;
-  /** Queen operating phase — "building", "staging", or "running" */
-  queen_phase?: "building" | "staging" | "running";
+  /** Queen operating phase — "planning", "building", "staging", or "running" */
+  queen_phase?: "planning" | "building" | "staging" | "running";
   /** Present in 409 conflict responses when worker is still loading */
   loading?: boolean;
 }
diff --git a/core/frontend/src/components/AgentGraph.tsx b/core/frontend/src/components/AgentGraph.tsx
index 1fba32fd..e0f4ae7d 100644
--- a/core/frontend/src/components/AgentGraph.tsx
+++ b/core/frontend/src/components/AgentGraph.tsx
@@ -31,7 +31,7 @@ interface AgentGraphProps {
   version?: string;
   runState?: RunState;
   building?: boolean;
-  queenPhase?: "building" | "staging" | "running";
+  queenPhase?: "planning" | "building" | "staging" | "running";
 }
 
 // --- Extracted RunButton so hover state survives parent re-renders ---
@@ -278,7 +278,7 @@ export default function AgentGraph({ nodes, title: _title, onNodeClick, onRun, o
               </span>
             )}
           </div>
-          <RunButton runState={runState} disabled={nodes.length === 0 || queenPhase === "building"} onRun={handleRun} onPause={onPause ?? (() => {})} btnRef={runBtnRef} />
+          <RunButton runState={runState} disabled={nodes.length === 0 || queenPhase === "building" || queenPhase === "planning"} onRun={handleRun} onPause={onPause ?? (() => {})} btnRef={runBtnRef} />
         </div>
         <div className="flex-1 flex items-center justify-center px-5">
           {building ? (
diff --git a/core/frontend/src/components/ChatPanel.tsx b/core/frontend/src/components/ChatPanel.tsx
index 1c87767c..ff910112 100644
--- a/core/frontend/src/components/ChatPanel.tsx
+++ b/core/frontend/src/components/ChatPanel.tsx
@@ -39,7 +39,7 @@ interface ChatPanelProps {
   /** Called when user dismisses the pending question without answering */
   onQuestionDismiss?: () => void;
   /** Queen operating phase — shown as a tag on queen messages */
-  queenPhase?: "building" | "staging" | "running";
+  queenPhase?: "planning" | "building" | "staging" | "running";
 }
 
 const queenColor = "hsl(45,95%,58%)";
@@ -144,7 +144,7 @@ function ToolActivityRow({ content }: { content: string }) {
   );
 }
 
-const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: ChatMessage; queenPhase?: "building" | "staging" | "running" }) {
+const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: ChatMessage; queenPhase?: "planning" | "building" | "staging" | "running" }) {
   const isUser = msg.type === "user";
   const isQueen = msg.role === "queen";
   const color = getColor(msg.agent, msg.role);
@@ -204,7 +204,9 @@ const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: Ch
                 ? "running phase"
                 : queenPhase === "staging"
                   ? "staging phase"
-                  : "building phase"
+                  : queenPhase === "planning"
+                    ? "planning phase"
+                    : "building phase"
               : "Worker"}
           </span>
         </div>
diff --git a/core/frontend/src/lib/chat-helpers.ts b/core/frontend/src/lib/chat-helpers.ts
index 1211833c..c5b278f7 100644
--- a/core/frontend/src/lib/chat-helpers.ts
+++ b/core/frontend/src/lib/chat-helpers.ts
@@ -121,7 +121,8 @@ export function sseEventToChatMessage(
         id: `paused-${event.execution_id}`,
         agent: "System",
         agentColor: "",
-        content: "Execution paused by user",
+        content:
+          (event.data?.reason as string) || "Execution paused",
         timestamp: "",
         type: "system",
         thread,
diff --git a/core/frontend/src/pages/workspace.tsx b/core/frontend/src/pages/workspace.tsx
index 383db816..68471683 100644
--- a/core/frontend/src/pages/workspace.tsx
+++ b/core/frontend/src/pages/workspace.tsx
@@ -255,8 +255,8 @@ interface AgentBackendState {
   /** The message ID of the current worker input request (for inline reply box) */
   workerInputMessageId: string | null;
   queenBuilding: boolean;
-  /** Queen operating phase — "building" (coding), "staging" (loaded), or "running" (executing) */
-  queenPhase: "building" | "staging" | "running";
+  /** Queen operating phase — "planning" (design), "building" (coding), "staging" (loaded), or "running" (executing) */
+  queenPhase: "planning" | "building" | "staging" | "running";
   workerRunState: "idle" | "deploying" | "running";
   currentExecutionId: string | null;
   nodeLogs: Record<string, string[]>;
@@ -291,7 +291,7 @@ function defaultAgentState(): AgentBackendState {
     awaitingInput: false,
     workerInputMessageId: null,
     queenBuilding: false,
-    queenPhase: "building",
+    queenPhase: "planning",
     workerRunState: "idle",
     currentExecutionId: null,
     nodeLogs: {},
@@ -892,7 +892,7 @@ export default function Workspace() {
       // failed, the throw inside the catch exits the outer try block.
       const session = liveSession!;
       const displayName = formatAgentDisplayName(session.worker_name || agentType);
-      const initialPhase = session.queen_phase || (session.has_worker ? "staging" : "building");
+      const initialPhase = session.queen_phase || (session.has_worker ? "staging" : "planning");
       updateAgentState(agentType, {
         sessionId: session.session_id,
         displayName,
@@ -1788,8 +1788,11 @@ export default function Workspace() {
 
         case "queen_phase_changed": {
           const rawPhase = event.data?.phase as string;
-          const newPhase: "building" | "staging" | "running" =
-            rawPhase === "running" ? "running" : rawPhase === "staging" ? "staging" : "building";
+          const newPhase: "planning" | "building" | "staging" | "running" =
+            rawPhase === "running" ? "running"
+            : rawPhase === "staging" ? "staging"
+            : rawPhase === "planning" ? "planning"
+            : "building";
           updateAgentState(agentType, {
             queenPhase: newPhase,
             queenBuilding: newPhase === "building",
diff --git a/core/tests/test_event_loop_node.py b/core/tests/test_event_loop_node.py
index 0117e74b..b5bca906 100644
--- a/core/tests/test_event_loop_node.py
+++ b/core/tests/test_event_loop_node.py
@@ -763,7 +763,7 @@ class TestClientFacingBlocking:
 class TestEscalate:
     @pytest.mark.asyncio
     async def test_escalate_emits_event(self, runtime, node_spec, memory):
-        """escalate() should publish ESCALATION_REQUESTED."""
+        """escalate() should publish ESCALATION_REQUESTED and block for queen guidance."""
         node_spec.output_keys = []
         llm = MockStreamingLLM(
             scenarios=[
@@ -772,7 +772,6 @@ class TestEscalate:
                     {
                         "reason": "tool failure",
                         "context": "HTTP 401 from upstream",
-                        "wait_for_response": False,
                     },
                     tool_use_id="escalate_1",
                 ),
@@ -789,7 +788,20 @@ class TestEscalate:
 
         ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
         node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+
+        async def queen_reply():
+            await asyncio.sleep(0.05)
+            await node.inject_event("Acknowledged, proceed.")
+
+        task = asyncio.create_task(queen_reply())
+
+        async def queen_reply():
+            await asyncio.sleep(0.05)
+            await node.inject_event("Acknowledged, proceed.")
+
+        task = asyncio.create_task(queen_reply())
         result = await node.execute(ctx)
+        await task
 
         assert result.success is True
         assert len(received) == 1
@@ -808,7 +820,6 @@ class TestEscalate:
                     {
                         "reason": "blocked",
                         "context": "dependency missing",
-                        "wait_for_response": False,
                     },
                     tool_use_id="escalate_1",
                 ),
@@ -827,7 +838,14 @@ class TestEscalate:
 
         ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
         node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+
+        async def queen_reply():
+            await asyncio.sleep(0.05)
+            await node.inject_event("Queen acknowledges escalation.")
+
+        task = asyncio.create_task(queen_reply())
         result = await node.execute(ctx)
+        await task
 
         assert result.success is True
         queen_node.inject_event.assert_awaited_once()
@@ -842,7 +860,7 @@ class TestEscalate:
 
     @pytest.mark.asyncio
     async def test_escalate_waits_for_queen_input_and_skips_judge(self, runtime, node_spec, memory):
-        """wait_for_response=true should block for queen input before judge evaluation."""
+        """escalate() should block for queen input before judge evaluation."""
         node_spec.output_keys = ["result"]
         llm = MockStreamingLLM(
             scenarios=[
@@ -851,7 +869,6 @@ class TestEscalate:
                     {
                         "reason": "need direction",
                         "context": "conflicting constraints",
-                        "wait_for_response": True,
                     },
                     tool_use_id="escalate_1",
                 ),
@@ -1756,9 +1773,9 @@ class TestIsToolDoomLoop:
 
     def test_different_args_no_doom(self):
         node = EventLoopNode(config=LoopConfig(tool_doom_loop_threshold=3))
-        fp1 = [("search", '{"q": "a"}')]
-        fp2 = [("search", '{"q": "b"}')]
-        fp3 = [("search", '{"q": "c"}')]
+        fp1 = [("search", '{"q": "deploy kubernetes cluster to production"}')]
+        fp2 = [("read_file", '{"path": "/etc/nginx/nginx.conf"}')]
+        fp3 = [("execute", '{"command": "SELECT * FROM users WHERE active=true"}')]
         is_doom, _ = node._is_tool_doom_loop([fp1, fp2, fp3])
         assert is_doom is False
 
@@ -1886,6 +1903,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_threshold=3,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
@@ -1941,6 +1959,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_threshold=3,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
@@ -2005,6 +2024,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_threshold=3,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
@@ -2056,6 +2076,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_enabled=False,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
@@ -2144,6 +2165,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_threshold=3,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
@@ -2206,6 +2228,7 @@ class TestToolDoomLoopIntegration:
             config=LoopConfig(
                 max_iterations=10,
                 tool_doom_loop_threshold=3,
+                stall_similarity_threshold=1.0,  # disable fuzzy stall detection
             ),
         )
         result = await node.execute(ctx)
diff --git a/docs/antigravity-setup.md b/docs/antigravity-setup.md
index 3b1375a4..19587571 100644
--- a/docs/antigravity-setup.md
+++ b/docs/antigravity-setup.md
@@ -23,7 +23,7 @@ Done. For details, prerequisites, and troubleshooting, read on.
 
 ## What you get after setup
 
-- **coder-tools** – Create and manage agents (scaffolding via `initialize_agent_package`, file I/O, tool discovery).
+- **coder-tools** – Create and manage agents (scaffolding via `initialize_and_build_agent`, file I/O, tool discovery).
 - **tools** – File operations, web search, and other agent tools.
 - **Documentation** – Guided docs for building and testing agents.
 
diff --git a/docs/configuration.md b/docs/configuration.md
index ab6a554b..9fa0ebd1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -130,7 +130,7 @@ MCP (Model Context Protocol) servers are configured in `.mcp.json` at the projec
 }
 ```
 
-The `coder-tools` server provides agent scaffolding via `initialize_agent_package` and related tools. The `tools` MCP server exposes tools including web search, PDF reading, CSV processing, and file system operations.
+The `coder-tools` server provides agent scaffolding via `initialize_and_build_agent` and related tools. The `tools` MCP server exposes tools including web search, PDF reading, CSV processing, and file system operations.
 
 ## Storage
 
diff --git a/docs/developer-guide.md b/docs/developer-guide.md
index 1961964e..ca5480b8 100644
--- a/docs/developer-guide.md
+++ b/docs/developer-guide.md
@@ -244,7 +244,7 @@ The fastest way to build agents is with the configured MCP workflow:
 ./quickstart.sh
 
 # Build a new agent
-Use the coder-tools MCP tools from your IDE agent chat (e.g., initialize_agent_package)
+Use the coder-tools MCP tools from your IDE agent chat (e.g., initialize_and_build_agent)
 ```
 
 ### Agent Development Workflow
@@ -252,7 +252,7 @@ Use the coder-tools MCP tools from your IDE agent chat (e.g., initialize_agent_p
 1. **Define Your Goal**
 
    ```
-   Use the coder-tools initialize_agent_package tool
+   Use the coder-tools initialize_and_build_agent tool
    Enter goal: "Build an agent that processes customer support tickets"
    ```
 
@@ -555,7 +555,7 @@ uv add <package>
 
 ```bash
 # Option 1: Use Claude Code skill (recommended)
-Use the coder-tools initialize_agent_package tool
+Use the coder-tools initialize_and_build_agent tool
 
 # Option 2: Create manually
 # Note: exports/ is initially empty (gitignored). Create your agent directory:
diff --git a/docs/environment-setup.md b/docs/environment-setup.md
index 8aa0e44e..d00686fe 100644
--- a/docs/environment-setup.md
+++ b/docs/environment-setup.md
@@ -180,7 +180,7 @@ MCP tools are also available in Cursor. To enable:
 
 **Claude Code:**
 ```
-Use the coder-tools initialize_agent_package tool to scaffold a new agent
+Use the coder-tools initialize_and_build_agent tool to scaffold a new agent
 ```
 
 **Codex CLI:**
@@ -453,7 +453,7 @@ This design allows agents in `exports/` to be:
 ### 2. Build Agent (Claude Code)
 
 ```
-Use the coder-tools initialize_agent_package tool
+Use the coder-tools initialize_and_build_agent tool
 Enter goal: "Build an agent that processes customer support tickets"
 ```
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
index c4c014b6..e4afd791 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -47,7 +47,7 @@ This is the recommended way to create your first agent.
 # Setup already done via quickstart.sh above
 
 # Start Claude Code and build an agent
-Use the coder-tools initialize_agent_package tool
+Use the coder-tools initialize_and_build_agent tool
 ```
 
 Follow the interactive prompts to:
@@ -173,7 +173,7 @@ PYTHONPATH=exports uv run python -m my_agent test --type success
 1. **Dashboard**: Run `hive open` to launch the web dashboard, or `hive tui` for the terminal UI
 2. **Detailed Setup**: See [environment-setup.md](./environment-setup.md)
 3. **Developer Guide**: See [developer-guide.md](./developer-guide.md)
-4. **Build Agents**: Use the coder-tools `initialize_agent_package` tool in Claude Code
+4. **Build Agents**: Use the coder-tools `initialize_and_build_agent` tool in Claude Code
 5. **Custom Tools**: Learn to integrate MCP servers
 6. **Join Community**: [Discord](https://discord.com/invite/MXE49hrKDk)
 
diff --git a/examples/templates/README.md b/examples/templates/README.md
index 7b2534bf..541de158 100644
--- a/examples/templates/README.md
+++ b/examples/templates/README.md
@@ -22,7 +22,7 @@ template_name/
 
 ### Option 1: Build from template (recommended)
 
-Use the `coder-tools` `initialize_agent_package` tool and select "From a template" to interactively pick a template, customize the goal/nodes/graph, and export a new agent.
+Use the `coder-tools` `initialize_and_build_agent` tool and select "From a template" to interactively pick a template, customize the goal/nodes/graph, and export a new agent.
 
 ### Option 2: Manual copy
 
diff --git a/examples/templates/deep_research_agent/agent.py b/examples/templates/deep_research_agent/agent.py
index 63ec60f3..51e1c56e 100644
--- a/examples/templates/deep_research_agent/agent.py
+++ b/examples/templates/deep_research_agent/agent.py
@@ -204,8 +204,8 @@ class DeepResearchAgent:
         """Set up the executor with all components."""
         from pathlib import Path
 
-        storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
-        storage_path.mkdir(parents=True, exist_ok=True)
+        self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
+        self._storage_path.mkdir(parents=True, exist_ok=True)
 
         self._tool_registry = ToolRegistry()
 
diff --git a/examples/templates/deep_research_agent/mcp_servers.json b/examples/templates/deep_research_agent/mcp_servers.json
index 35d1bf3f..45cb9670 100644
--- a/examples/templates/deep_research_agent/mcp_servers.json
+++ b/examples/templates/deep_research_agent/mcp_servers.json
@@ -2,8 +2,13 @@
   "hive-tools": {
     "transport": "stdio",
     "command": "uv",
-    "args": ["run", "python", "mcp_server.py", "--stdio"],
+    "args": [
+      "run",
+      "python",
+      "mcp_server.py",
+      "--stdio"
+    ],
     "cwd": "../../../tools",
     "description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
   }
-}
+}
\ No newline at end of file
diff --git a/examples/templates/deep_research_agent/nodes/__init__.py b/examples/templates/deep_research_agent/nodes/__init__.py
index 9809df5a..9350f14d 100644
--- a/examples/templates/deep_research_agent/nodes/__init__.py
+++ b/examples/templates/deep_research_agent/nodes/__init__.py
@@ -11,26 +11,32 @@ intake_node = NodeSpec(
     node_type="event_loop",
     client_facing=True,
     max_node_visits=0,
-    input_keys=["topic"],
+    input_keys=["user_request"],
     output_keys=["research_brief"],
     success_criteria=(
         "The research brief is specific and actionable: it states the topic, "
         "the key questions to answer, the desired scope, and depth."
     ),
     system_prompt="""\
-You are a research intake specialist. The user wants to research a topic.
-Have a brief conversation to clarify what they need.
+You are a research intake specialist. Your ONLY job is to have a brief conversation with the user to clarify what they want researched.
+
+**CRITICAL: You do NOT do any research yourself.**
+- You do NOT search the web
+- You do NOT fetch sources
+- The research happens in the NEXT stage after you complete intake
+- Do NOT ask for or expect web_search or web_scrape tools
 
 **STEP 1 — Read and respond (text only, NO tool calls):**
-1. Read the topic provided
-2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
+1. Read the user_request provided
+2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth, budget, preferences)
 3. If it's already clear, confirm your understanding and ask the user to confirm
 
-Keep it short. Don't over-ask.
+Keep it short. Don't over-ask. Maximum 2 clarifying questions.
 
 **STEP 2 — After the user confirms, call set_output:**
-- set_output("research_brief", "A clear paragraph describing exactly what to research, \
-what questions to answer, what scope to cover, and how deep to go.")
+- set_output("research_brief", "A clear paragraph describing exactly what to research, what questions to answer, what scope to cover, and how deep to go.")
+
+That's it. Once you call set_output, your job is done and the research node will take over.
 """,
     tools=[],
 )
@@ -59,6 +65,8 @@ If feedback is provided, this is a follow-up round — focus on the gaps identif
 Work in phases:
 1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
    Prioritize authoritative sources (.edu, .gov, established publications).
+   For automotive research, target: caranddriver.com, motortrend.com, edmunds.com, 
+   consumerreports.org, jdpower.com, and enthusiast forums.
 2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
    Skip URLs that fail. Extract the substantive content.
 3. **Analyze**: Review what you've collected. Identify key findings, themes,
diff --git a/examples/templates/job_hunter/nodes/__init__.py b/examples/templates/job_hunter/nodes/__init__.py
index 9621715b..62bd0402 100644
--- a/examples/templates/job_hunter/nodes/__init__.py
+++ b/examples/templates/job_hunter/nodes/__init__.py
@@ -116,16 +116,39 @@ customize_node = NodeSpec(
         "for each selected job, saved as HTML, and Gmail drafts created in user's inbox."
     ),
     system_prompt="""\
-You are a career coach creating personalized application materials.
+You are a career coach creating personalized application materials and Gmail drafts.
+
+**CRITICAL: You MUST create Gmail drafts for each selected job using gmail_create_draft.**
 
 **PROCESS:**
 1. Create application_materials.html using save_data and append_data.
-2. Generate resume customization list and professional cold email for each selected job.
-3. Serve the file to the user.
-4. Create Gmail drafts using gmail_create_draft.
+2. For each selected job:
+   a. Generate a specific resume customization list
+   b. Create a professional cold outreach email
+   c. **IMMEDIATELY call gmail_create_draft** with:
+      - to: hiring manager or recruiter email (if available) or company email
+      - subject: "Application for [Job Title] - [Your Name]"
+      - html: the professional cold email in HTML format
+3. Serve the application_materials.html file to the user.
+4. Confirm each Gmail draft was created successfully.
+
+**EMAIL REQUIREMENTS:**
+- Professional, personalized cold outreach email
+- Reference specific company details and role
+- Mention 2-3 relevant qualifications from their resume
+- Include clear call-to-action
+- Professional email signature
+- Format as HTML with proper structure
+
+**Gmail Draft Creation:**
+For each job, you MUST call gmail_create_draft(to="[email]", subject="[subject]", html="[email_html]")
+- Extract company email from job listing if available
+- Use generic format like "careers@[company].com" if no specific email
+- Subject format: "Application for [Job Title] - [Applicant Name]"
+- HTML email body with proper formatting
 
 **FINISH:**
-Call set_output("application_materials", "Completed")
+Only call set_output("application_materials", "Completed") AFTER creating ALL Gmail drafts.
 """,
     tools=["save_data", "append_data", "serve_file_to_user", "gmail_create_draft"],
 )
diff --git a/scripts/debug_queen_prompt.py b/scripts/debug_queen_prompt.py
index e5d5a8d9..1efbe931 100644
--- a/scripts/debug_queen_prompt.py
+++ b/scripts/debug_queen_prompt.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
-"""Debug tool to print the queen's running phase prompt."""
+"""Debug tool to print the queen's phase-specific prompts."""
 
-from framework.agents.hive_coder.nodes import (
+from framework.agents.queen.nodes import (
     _appendices,
     _queen_behavior_always,
     _queen_behavior_running,
@@ -10,32 +10,36 @@ from framework.agents.hive_coder.nodes import (
     _queen_tools_running,
 )
 
+_DEFAULT_WORKER_IDENTITY = (
+    "\n\n# Worker Profile\n"
+    "No worker agent loaded. You are operating independently.\n"
+    "Handle all tasks directly using your coding tools."
+)
 
-def print_running_prompt(worker_identity: str | None = None) -> None:
-    """Print the composed running phase prompt.
 
-    Args:
-        worker_identity: Optional worker identity string. If None, shows
-            the "no worker loaded" placeholder.
-    """
-    if worker_identity is None:
-        worker_identity = (
-            "\n\n# Worker Profile\n"
-            "No worker agent loaded. You are operating independently.\n"
-            "Handle all tasks directly using your coding tools."
-        )
+def print_planning_prompt(worker_identity: str | None = None) -> None:
+    """Print the composed planning phase prompt."""
+    from framework.agents.queen.nodes import (
+        _planning_knowledge,
+        _queen_behavior_planning,
+        _queen_identity_planning,
+        _queen_tools_planning,
+    )
+
+    wi = worker_identity or _DEFAULT_WORKER_IDENTITY
 
     prompt = (
-        _queen_identity_running
+        _queen_identity_planning
         + _queen_style
-        + _queen_tools_running
+        + _queen_tools_planning
         + _queen_behavior_always
-        + _queen_behavior_running
-        + worker_identity
+        + _queen_behavior_planning
+        + _planning_knowledge
+        + wi
     )
 
     print("=" * 80)
-    print("QUEEN RUNNING PHASE PROMPT")
+    print("QUEEN PLANNING PHASE PROMPT")
     print("=" * 80)
     print(prompt)
     print("=" * 80)
@@ -44,20 +48,16 @@ def print_running_prompt(worker_identity: str | None = None) -> None:
 
 def print_building_prompt(worker_identity: str | None = None) -> None:
     """Print the composed building phase prompt."""
-    from framework.agents.hive_coder.nodes import (
-        _agent_builder_knowledge,
+    from framework.agents.queen.nodes import (
+        _building_knowledge,
+        _gcu_building_section,
         _queen_behavior_building,
         _queen_identity_building,
         _queen_phase_7,
         _queen_tools_building,
     )
 
-    if worker_identity is None:
-        worker_identity = (
-            "\n\n# Worker Profile\n"
-            "No worker agent loaded. You are operating independently.\n"
-            "Handle all tasks directly using your coding tools."
-        )
+    wi = worker_identity or _DEFAULT_WORKER_IDENTITY
 
     prompt = (
         _queen_identity_building
@@ -65,10 +65,11 @@ def print_building_prompt(worker_identity: str | None = None) -> None:
         + _queen_tools_building
         + _queen_behavior_always
         + _queen_behavior_building
-        + _agent_builder_knowledge
+        + _building_knowledge
+        + _gcu_building_section
         + _queen_phase_7
         + _appendices
-        + worker_identity
+        + wi
     )
 
     print("=" * 80)
@@ -81,18 +82,13 @@ def print_building_prompt(worker_identity: str | None = None) -> None:
 
 def print_staging_prompt(worker_identity: str | None = None) -> None:
     """Print the composed staging phase prompt."""
-    from framework.agents.hive_coder.nodes import (
+    from framework.agents.queen.nodes import (
         _queen_behavior_staging,
         _queen_identity_staging,
         _queen_tools_staging,
     )
 
-    if worker_identity is None:
-        worker_identity = (
-            "\n\n# Worker Profile\n"
-            "No worker agent loaded. You are operating independently.\n"
-            "Handle all tasks directly using your coding tools."
-        )
+    wi = worker_identity or _DEFAULT_WORKER_IDENTITY
 
     prompt = (
         _queen_identity_staging
@@ -100,7 +96,7 @@ def print_staging_prompt(worker_identity: str | None = None) -> None:
         + _queen_tools_staging
         + _queen_behavior_always
         + _queen_behavior_staging
-        + worker_identity
+        + wi
     )
 
     print("=" * 80)
@@ -111,17 +107,47 @@ def print_staging_prompt(worker_identity: str | None = None) -> None:
     print(f"\nTotal length: {len(prompt):,} characters")
 
 
+def print_running_prompt(worker_identity: str | None = None) -> None:
+    """Print the composed running phase prompt.
+
+    Args:
+        worker_identity: Optional worker identity string. If None, shows
+            the "no worker loaded" placeholder.
+    """
+    wi = worker_identity or _DEFAULT_WORKER_IDENTITY
+
+    prompt = (
+        _queen_identity_running
+        + _queen_style
+        + _queen_tools_running
+        + _queen_behavior_always
+        + _queen_behavior_running
+        + wi
+    )
+
+    print("=" * 80)
+    print("QUEEN RUNNING PHASE PROMPT")
+    print("=" * 80)
+    print(prompt)
+    print("=" * 80)
+    print(f"\nTotal length: {len(prompt):,} characters")
+
+
 if __name__ == "__main__":
     import sys
 
-    phase = sys.argv[1] if len(sys.argv) > 1 else "running"
+    phase = sys.argv[1] if len(sys.argv) > 1 else "planning"
 
     if phase == "all":
+        print_planning_prompt()
+        print("\n\n")
         print_building_prompt()
         print("\n\n")
         print_staging_prompt()
         print("\n\n")
         print_running_prompt()
+    elif phase == "planning":
+        print_planning_prompt()
     elif phase == "building":
         print_building_prompt()
     elif phase == "staging":
@@ -131,6 +157,6 @@ if __name__ == "__main__":
     else:
         print(f"Unknown phase: {phase}")
         print(
-            "Usage: uv run scripts/debug_queen_prompt.py [building|staging|running|all]"
+            "Usage: uv run scripts/debug_queen_prompt.py [planning|building|staging|running|all]"
         )
         sys.exit(1)
diff --git a/scripts/test_init_package.py b/scripts/test_init_package.py
index 59386c39..cc68f1e7 100644
--- a/scripts/test_init_package.py
+++ b/scripts/test_init_package.py
@@ -1,4 +1,4 @@
-"""Quick test script for initialize_agent_package."""
+"""Quick test script for initialize_and_build_agent."""
 
 import sys
 import os
@@ -14,6 +14,6 @@ import tools.coder_tools_server as srv
 srv.PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 
 # Access the underlying function (FastMCP wraps it as FunctionTool)
-tool = srv.initialize_agent_package
+tool = srv.initialize_and_build_agent
 result = tool.fn("richard_test2", nodes="intake,process,review")
 print(result)
diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py
index 1c90dc99..fc40d94b 100644
--- a/tools/coder_tools_server.py
+++ b/tools/coder_tools_server.py
@@ -3,7 +3,7 @@
 Coder Tools MCP Server — OpenCode-inspired coding tools.
 
 Provides rich file I/O, fuzzy-match editing, git snapshots, and shell execution
-for the hive_coder agent. Modeled after opencode's tool architecture.
+for the queen agent. Modeled after opencode's tool architecture.
 
 All paths scoped to a configurable project root for safety.
 
@@ -1252,6 +1252,53 @@ def validate_agent_package(agent_name: str) -> str:
         path_parts.append(pythonpath)
     env["PYTHONPATH"] = os.pathsep.join(path_parts)
 
+    # Step 0: Module contract — __init__.py must expose goal, nodes, edges
+    try:
+        _contract_script = textwrap.dedent("""\
+            import importlib, json
+            mod = importlib.import_module('{agent_name}')
+            missing = [a for a in ('goal', 'nodes', 'edges') if getattr(mod, a, None) is None]
+            if missing:
+                print(json.dumps({{
+                    'valid': False,
+                    'error': (
+                        "Module '{agent_name}' is missing module-level attributes: "
+                        + ", ".join(missing) + ". "
+                        "Fix: in {agent_name}/__init__.py, add "
+                        "'from .agent import " + ", ".join(missing) + "' "
+                        "so that 'import {agent_name}' exposes them at package level."
+                    )
+                }}))
+            else:
+                print(json.dumps({{'valid': True}}))
+        """).format(agent_name=agent_name)
+        proc = subprocess.run(
+            ["uv", "run", "python", "-c", _contract_script],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env,
+            cwd=PROJECT_ROOT,
+            stdin=subprocess.DEVNULL,
+        )
+        if proc.returncode == 0:
+            result = json.loads(proc.stdout.strip())
+            steps["module_contract"] = {
+                "passed": result["valid"],
+                "output": result.get("error", "goal, nodes, edges exported correctly"),
+            }
+        else:
+            steps["module_contract"] = {
+                "passed": False,
+                "error": (
+                    f"Failed to import '{agent_name}': {proc.stderr.strip()[:1000]}. "
+                    f"Fix: ensure {agent_name}/__init__.py exists and can be imported "
+                    f"without errors (check syntax, missing dependencies, relative imports)."
+                ),
+            }
+    except Exception as e:
+        steps["module_contract"] = {"passed": False, "error": str(e)}
+
     # Step A: Class validation (subprocess for import isolation)
     try:
         proc = subprocess.run(
@@ -1321,9 +1368,11 @@ def validate_agent_package(agent_name: str) -> str:
             result = json.loads(proc.stdout.strip())
             steps["node_completeness"] = {
                 "passed": result["valid"],
-                "output": "; ".join(result["errors"])
-                if result["errors"]
-                else "All defined nodes are in the graph",
+                "output": (
+                    "; ".join(result["errors"])
+                    if result["errors"]
+                    else "All defined nodes are in the graph"
+                ),
             }
             if not result["valid"]:
                 steps["node_completeness"]["errors"] = result["errors"]
@@ -1434,7 +1483,7 @@ def _node_var_name(node_id: str) -> str:
 
 
 @mcp.tool()
-def initialize_agent_package(agent_name: str, nodes: str | None = None) -> str:
+def initialize_and_build_agent(agent_name: str, nodes: str | None = None) -> str:
     """Scaffold a new agent package with placeholder files.
 
     Creates exports/{agent_name}/ with all files needed for a runnable agent:
@@ -1985,6 +2034,9 @@ def runner_loaded():
 ''',
     )
 
+    # Build list of all generated file paths for the caller.
+    all_file_paths = [info["path"] for info in files_written.values()]
+
     return json.dumps(
         {
             "success": True,
@@ -1994,10 +2046,33 @@ def runner_loaded():
             "nodes": node_list,
             "files_written": files_written,
             "file_count": len(files_written),
+            "files": all_file_paths,
             "next_steps": [
-                f"Customize node definitions in exports/{agent_name}/nodes/__init__.py",
-                f"Define goal and edges in exports/{agent_name}/agent.py",
-                f'Run validate_agent_package("{agent_name}") to check structure',
+                (
+                    "IMPORTANT: All generated files are structurally complete "
+                    "with correct imports, class definition, validate() method, "
+                    "and __init__.py exports. Use edit_file to customize TODO "
+                    "placeholders — do NOT use write_file to rewrite entire files, "
+                    "as this will break imports and structure."
+                ),
+                (
+                    f"Use edit_file to customize system prompts, tools, "
+                    f"input_keys, output_keys, and success_criteria in "
+                    f"exports/{agent_name}/nodes/__init__.py"
+                ),
+                (
+                    f"Use edit_file to customize goal description, "
+                    f"success_criteria values, constraint values, edge "
+                    f"definitions, and identity_prompt in "
+                    f"exports/{agent_name}/agent.py"
+                ),
+                (
+                    "Do NOT modify: imports at top of agent.py, the class "
+                    "definition, validate() method, _build_graph()/_setup()/"
+                    "lifecycle methods, or __init__.py exports — they are "
+                    "already correct."
+                ),
+                f'Run validate_agent_package("{agent_name}") to verify structure',
             ],
         },
         indent=2,