Merge branch 'main' into feature/openrouter-api-key-support

test: add comprehensive test suite for safe_eval (#4015 )
* test: add comprehensive test suite for safe_eval sandboxed evaluator Adds 113 tests across 14 test classes covering the full surface area of the safe_eval expression evaluator used by edge conditions: - Literals, data structures, arithmetic, unary/binary/boolean operators - Short-circuit semantics for `and`/`or` (including guard patterns) - Ternary expressions, variable lookup, subscript/attribute access - Whitelisted function and method calls - Security boundaries (private attrs, disallowed AST nodes, blocked builtins) - Real-world EdgeSpec.condition_expr patterns from graph executor usage * style: fix import sort order --------- Co-authored-by: mma2027 <mma2027@users.noreply.github.com> Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-03-17 11:21:53 -07:00 · 2026-03-18 01:01:31 +08:00 · 2026-03-18 00:36:54 +08:00 · 2026-03-17 07:34:10 -07:00 · 2026-03-16 20:54:36 -07:00 · 2026-03-16 20:44:47 -07:00
393 changed files with 41517 additions and 18871 deletions
@@ -1,31 +0,0 @@
-name: Link Discord Account
-description: Connect your GitHub and Discord for the bounty program
-title: "link: @{{ github.actor }}"
-labels: ["link-discord"]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Link your Discord account to receive XP and role rewards when your bounty PRs are merged.
-
-        **How to find your Discord ID:**
-        1. Open Discord Settings > Advanced > Enable **Developer Mode**
-        2. Right-click your username > **Copy User ID**
-
-  - type: input
-    id: discord_id
-    attributes:
-      label: Discord User ID
-      description: "Your numeric Discord ID (not your username). Example: 123456789012345678"
-      placeholder: "123456789012345678"
-    validations:
-      required: true
-
-  - type: input
-    id: display_name
-    attributes:
-      label: Display Name (optional)
-      description: How you'd like to be credited
-      placeholder: "Jane Doe"
-    validations:
-      required: false
@@ -0,0 +1,78 @@
+name: Standard Bounty
+description: A bounty task for general framework contributions (not integration-specific)
+title: "[Bounty]: "
+labels: []
+body:
+  - type: markdown
+    attributes:
+      value: |
+        ## Standard Bounty
+
+        This issue is part of the [Bounty Program](../../docs/bounty-program/README.md).
+        **Claim this bounty** by commenting below — a maintainer will assign you within 24 hours.
+
+  - type: dropdown
+    id: bounty-size
+    attributes:
+      label: Bounty Size
+      options:
+        - "Small (10 pts)"
+        - "Medium (30 pts)"
+        - "Large (75 pts)"
+        - "Extreme (150 pts)"
+    validations:
+      required: true
+
+  - type: dropdown
+    id: difficulty
+    attributes:
+      label: Difficulty
+      options:
+        - Easy
+        - Medium
+        - Hard
+    validations:
+      required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Description
+      description: What needs to be done to complete this bounty.
+      placeholder: |
+        Describe the specific task, including:
+        - What the contributor needs to do
+        - Links to relevant files in the repo
+        - Any context or motivation for the change
+    validations:
+      required: true
+
+  - type: textarea
+    id: acceptance-criteria
+    attributes:
+      label: Acceptance Criteria
+      description: What "done" looks like. The PR must meet all criteria.
+      placeholder: |
+        - [ ] Criterion 1
+        - [ ] Criterion 2
+        - [ ] CI passes
+    validations:
+      required: true
+
+  - type: textarea
+    id: relevant-files
+    attributes:
+      label: Relevant Files
+      description: Links to files or directories related to this bounty.
+      placeholder: |
+        - `path/to/file.py`
+        - `path/to/directory/`
+
+  - type: textarea
+    id: resources
+    attributes:
+      label: Resources
+      description: Links to docs, issues, or external references that will help.
+      placeholder: |
+        - Related issue: #XXXX
+        - Docs: https://...
@@ -5,7 +5,7 @@ on:
    branches: [main]
  pull_request:
    branches: [main]
-
+    
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
@@ -24,6 +24,8 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true

      - name: Install dependencies
        run: uv sync --project core --group dev
@@ -54,10 +56,12 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true

      - name: Install dependencies and run tests
+        working-directory: core
        run: |
-          cd core
          uv sync
          uv run pytest tests/ -v

@@ -77,10 +81,12 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true

      - name: Install dependencies and run tests
+        working-directory: tools
        run: |
-          cd tools
          uv sync --extra dev
          uv run pytest tests/ -v

@@ -98,10 +104,12 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@v4
-
+        with:
+          enable-cache: true
+            
      - name: Install dependencies
+        working-directory: core
        run: |
-          cd core
          uv sync

      - name: Validate exported agents
@@ -0,0 +1,54 @@
+# Closes PRs that still have the `pr-requirements-warning` label
+# after contributors were warned in pr-requirements.yml.
+name: PR Requirements Enforcement
+on:
+  schedule:
+    - cron: "0 0 * * *"   # runs every day once at midnight 
+jobs:
+  enforce:
+    name: Close PRs still failing contribution requirements
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+    steps:
+      - name: Close PRs still failing requirements
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { owner, repo } = context.repo;
+            const prs = await github.paginate(github.rest.pulls.list, {
+              owner,
+              repo,
+              state: "open",
+              per_page: 100
+            });
+            for (const pr of prs) {
+              // Skip draft PRs — author may still be actively working toward compliance
+              if (pr.draft) continue;
+              const labels = pr.labels.map(l => l.name);
+              if (!labels.includes("pr-requirements-warning")) continue;
+              const gracePeriod = 24 * 60 * 60 * 1000;
+              const lastUpdated = new Date(pr.created_at);
+              const now = new Date();
+              if (now - lastUpdated < gracePeriod) {
+                console.log(`Skipping PR #${pr.number} — still within grace period`);
+                continue;
+              }
+              const prNumber = pr.number;
+              const prAuthor = pr.user.login;
+              await github.rest.issues.createComment({
+                owner,
+                repo,
+                issue_number: prNumber,
+                body: `Closing PR because the contribution requirements were not resolved within the 24-hour grace period.
+                If this was closed in error, feel free to reopen the PR after fixing the requirements.`
+              });
+              await github.rest.pulls.update({
+                owner,
+                repo,
+                pull_number: prNumber,
+                state: "closed"
+              });
+              console.log(`Closed PR #${prNumber} by ${prAuthor} (PR requirements were not met)`);
+            }
@@ -43,9 +43,10 @@ jobs:
            console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);

            if (issueNumbers.length === 0) {
-              const message = `## PR Closed - Requirements Not Met
+              const message = `## PR Requirements Warning

-            This PR has been automatically closed because it doesn't meet the requirements.
+            This PR does not meet the contribution requirements.
+            If the issue is not fixed within ~24 hours, it may be automatically closed.

            **Missing:** No linked issue found.

@@ -67,14 +68,15 @@ jobs:

            **Why is this required?** See #472 for details.`;

-              const comments = await github.rest.issues.listComments({
+              const comments = await github.paginate(github.rest.issues.listComments, {
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: prNumber,
+                per_page: 100,
              });

-              const botComment = comments.data.find(
-                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              const botComment = comments.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Requirements Warning')
              );

              if (!botComment) {
@@ -86,11 +88,11 @@ jobs:
                });
              }

-              await github.rest.pulls.update({
+              await github.rest.issues.addLabels({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                pull_number: prNumber,
-                state: 'closed',
+                issue_number: prNumber,
+                labels: ['pr-requirements-warning'],
              });

              core.setFailed('PR must reference an issue');
@@ -132,9 +134,10 @@ jobs:
                `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
              ).join(', ');

-              const message = `## PR Closed - Requirements Not Met
+              const message = `## PR Requirements Warning

-            This PR has been automatically closed because it doesn't meet the requirements.
+            This PR does not meet the contribution requirements.
+            If the issue is not fixed within ~24 hours, it may be automatically closed.

            **PR Author:** @${prAuthor}
            **Found issues:** ${issueList}
@@ -157,14 +160,15 @@ jobs:

            **Why is this required?** See #472 for details.`;

-              const comments = await github.rest.issues.listComments({
+              const comments = await github.paginate(github.rest.issues.listComments, {
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: prNumber,
+                per_page: 100,
              });

-              const botComment = comments.data.find(
-                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              const botComment = comments.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Requirements Warning')
              );

              if (!botComment) {
@@ -176,14 +180,24 @@ jobs:
                });
              }

-              await github.rest.pulls.update({
+              await github.rest.issues.addLabels({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                pull_number: prNumber,
-                state: 'closed',
+                issue_number: prNumber,
+                labels: ['pr-requirements-warning'],
              });

              core.setFailed('PR author must be assigned to the linked issue');
            } else {
              console.log(`PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
-            }
+              try {
+                await github.rest.issues.removeLabel({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  name: "pr-requirements-warning"
+                });
+              }catch (error){
+                //ignore if label doesn't exist
+              }
+            }
@@ -2,10 +2,6 @@

 Shared agent instructions for this workspace.

-## Deprecations
-
- **TUI is deprecated.** The terminal UI (`hive tui`) is no longer maintained. Use the browser-based interface (`hive open`) instead.
-
 ## Coding Agent Notes

 - 
@@ -1,17 +1,149 @@
 # Release Notes

+## v0.7.1
+
+**Release Date:** March 13, 2026
+**Tag:** v0.7.1
+
+### Chrome-Native Browser Control
+
+v0.7.1 replaces Playwright with direct Chrome DevTools Protocol (CDP) integration. The GCU now launches the user's system Chrome via `open -n` on macOS, connects over CDP, and manages browser lifecycle end-to-end -- no extra browser binary required.
+
+---
+
+### Highlights
+
+#### System Chrome via CDP
+
+The entire GCU browser stack has been rewritten:
+
+- **Chrome finder & launcher** -- New `chrome_finder.py` discovers installed Chrome and `chrome_launcher.py` manages process lifecycle with `--remote-debugging-port`
+- **Coexist with user's browser** -- `open -n` on macOS launches a separate Chrome instance so the user's tabs stay untouched
+- **Dynamic viewport sizing** -- Viewport auto-sizes to the available display area, suppressing Chrome warning bars
+- **Orphan cleanup** -- Chrome processes are killed on GCU server shutdown to prevent leaks
+- **`--no-startup-window`** -- Chrome launches headlessly by default until a page is needed
+
+#### Per-Subagent Browser Isolation
+
+Each GCU subagent gets its own Chrome user-data directory, preventing cookie/session cross-contamination:
+
+- Unique browser profiles injected per subagent
+- Profiles cleaned up after top-level GCU node execution
+- Tab origin and age metadata tracked per subagent
+
+#### Dummy Agent Testing Framework
+
+A comprehensive test suite for validating agent graph patterns without LLM calls:
+
+- 8 test modules covering echo, pipeline, branch, parallel merge, retry, feedback loop, worker, and GCU subagent patterns
+- Shared fixtures and a `run_all.py` runner for CI integration
+- Subagent lifecycle tests
+
+---
+
+### What's New
+
+#### GCU Browser
+
+- **Switch from Playwright to system Chrome via CDP** -- Direct CDP connection replaces Playwright dependency. (@bryanadenhq)
+- **Chrome finder and launcher modules** -- `chrome_finder.py` and `chrome_launcher.py` for cross-platform Chrome discovery and process management. (@bryanadenhq)
+- **Dynamic viewport sizing** -- Auto-size viewport and suppress Chrome warning bar. (@bryanadenhq)
+- **Per-subagent browser profile isolation** -- Unique user-data directories per subagent with cleanup. (@bryanadenhq)
+- **Tab origin/age metadata** -- Track which subagent opened each tab and when. (@bryanadenhq)
+- **`browser_close_all` tool** -- Bulk tab cleanup for agents managing many pages. (@bryanadenhq)
+- **Auto-track popup pages** -- Popups are automatically captured and tracked. (@bryanadenhq)
+- **Auto-snapshot from browser interactions** -- Browser interaction tools return screenshots automatically. (@bryanadenhq)
+- **Kill orphaned Chrome processes** -- GCU server shutdown cleans up lingering Chrome instances. (@bryanadenhq)
+- **`--no-startup-window` Chrome flag** -- Prevent empty window on launch. (@bryanadenhq)
+- **Launch Chrome via `open -n` on macOS** -- Coexist with the user's running browser. (@bryanadenhq)
+
+#### Framework & Runtime
+
+- **Session resume fix for new agents** -- Correctly resume sessions when a new agent is loaded. (@bryanadenhq)
+- **Queen upsert fix** -- Prevent duplicate queen entries on session restore. (@bryanadenhq)
+- **Anchor worker monitoring to queen's session ID on cold-restore** -- Worker monitors reconnect to the correct queen after restart. (@bryanadenhq)
+- **Update meta.json when loading workers** -- Worker metadata stays in sync with runtime state. (@RichardTang-Aden)
+- **Generate worker MCP file correctly** -- Fix MCP config generation for spawned workers. (@RichardTang-Aden)
+- **Share event bus so tool events are visible to parent** -- Tool execution events propagate up to parent graphs. (@bryanadenhq)
+- **Subagent activity tracking in queen status** -- Queen instructions include live subagent status. (@bryanadenhq)
+- **GCU system prompt updates** -- Auto-snapshots, batching, popup tracking, and close_all guidance. (@bryanadenhq)
+
+#### Frontend
+
+- **Loading spinner in draft panel** -- Shows spinner during planning phase instead of blank panel. (@bryanadenhq)
+- **Fix credential modal errors** -- Modal no longer eats errors; banner stays visible. (@bryanadenhq)
+- **Fix credentials_required loop** -- Stop clearing the flag on modal close to prevent infinite re-prompting. (@bryanadenhq)
+- **Fix "Add tab" dropdown overflow** -- Dropdown no longer hidden when many agents are open. (@prasoonmhwr)
+
+#### Testing
+
+- **Dummy agent test framework** -- 8 test modules (echo, pipeline, branch, parallel merge, retry, feedback loop, worker, GCU subagent) with shared fixtures and CI runner. (@bryanadenhq)
+- **Subagent lifecycle tests** -- Validate subagent spawn and completion flows. (@bryanadenhq)
+
+#### Documentation & Infrastructure
+
+- **MCP integration PRD** -- Product requirements for MCP server registry. (@TimothyZhang7)
+- **Skills registry PRD** -- Product requirements for skill registry system. (@bryanadenhq)
+- **Bounty program updates** -- Standard bounty issue template and updated contributor guide. (@bryanadenhq)
+- **Windows quickstart** -- Add default context limit for PowerShell setup. (@bryanadenhq)
+- **Remove deprecated files** -- Clean up `setup_mcp.py`, `verify_mcp.py`, `antigravity-setup.md`, and `setup-antigravity-mcp.sh`. (@bryanadenhq)
+
+---
+
+### Bug Fixes
+
+- Fix credential modal eating errors and banner staying open
+- Stop clearing `credentials_required` on modal close to prevent infinite loop
+- Share event bus so tool events are visible to parent graph
+- Use lazy %-formatting in subagent completion log to avoid f-string in logger
+- Anchor worker monitoring to queen's session ID on cold-restore
+- Update meta.json when loading workers
+- Generate worker MCP file correctly
+- Fix "Add tab" dropdown partially hidden when creating multiple agents
+
+---
+
+### Community Contributors
+
+- **Prasoon Mahawar** (@prasoonmhwr) -- Fix UI overflow on agent tab dropdown
+- **Richard Tang** (@RichardTang-Aden) -- Worker MCP generation and meta.json fixes
+
+---
+
+### Upgrading
+
+```bash
+git pull origin main
+uv sync
+```
+
+The Playwright dependency is no longer required for GCU browser operations. Chrome must be installed on the host system.
+
+---
+
+## v0.7.0
+
+**Release Date:** March 5, 2026
+**Tag:** v0.7.0
+
+Session management refactor release.
+
+---
+
+## v0.5.1
+
 **Release Date:** February 18, 2026
 **Tag:** v0.5.1

-## The Hive Gets a Brain
+### The Hive Gets a Brain

 v0.5.1 is our most ambitious release yet. Hive agents can now **build other agents** -- the new Hive Coder meta-agent writes, tests, and fixes agent packages from natural language. The runtime grows multi-graph support so one session can orchestrate multiple agents simultaneously. The TUI gets a complete overhaul with an in-app agent picker, live streaming, and seamless escalation to the Coder. And we're now provider-agnostic: Claude Code subscriptions, OpenAI-compatible endpoints, and any LiteLLM-supported model work out of the box.

 ---

-## Highlights
+### Highlights

-### Hive Coder -- The Agent That Builds Agents
+#### Hive Coder -- The Agent That Builds Agents

 A native meta-agent that lives inside the framework at `core/framework/agents/hive_coder/`. Give it a natural-language specification and it produces a complete agent package -- goal definition, node prompts, edge routing, MCP tool wiring, tests, and all boilerplate files.

@@ -30,7 +162,7 @@ The Coder ships with:
 - **Coder Tools MCP server** -- file I/O, fuzzy-match editing, git snapshots, and sandboxed shell execution (`tools/coder_tools_server.py`)
 - **Test generation** -- structural tests for forever-alive agents that don't hang on `runner.run()`

-### Multi-Graph Agent Runtime
+#### Multi-Graph Agent Runtime

 `AgentRuntime` now supports loading, managing, and switching between multiple agent graphs within a single session. Six new lifecycle tools give agents (and the TUI) full control:

@@ -44,7 +176,7 @@ await runtime.add_graph("exports/deep_research_agent")

 The Hive Coder uses multi-graph internally -- when you escalate from a worker agent, the Coder loads as a separate graph while the worker stays alive in the background.

-### TUI Revamp
+#### TUI Revamp

 The Terminal UI gets a ground-up rebuild with five major additions:

@@ -54,7 +186,7 @@ The Terminal UI gets a ground-up rebuild with five major additions:
 - **PDF attachments** -- `/attach` and `/detach` commands with native OS file dialog (macOS, Linux, Windows)
 - **Multi-graph commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>` for managing agent graphs in-session

-### Provider-Agnostic LLM Support
+#### Provider-Agnostic LLM Support

 Hive is no longer Anthropic-only. v0.5.1 adds first-class support for:

@@ -66,9 +198,9 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal

 ---

-## What's New
+### What's New

-### Architecture & Runtime
+#### Architecture & Runtime

 - **Hive Coder meta-agent** -- Natural-language agent builder with reference docs, guardian watchdog, and `hive code` CLI command. (@TimothyZhang7)
 - **Multi-graph agent sessions** -- `add_graph`/`remove_graph` on AgentRuntime with 6 lifecycle tools (`load_agent`, `unload_agent`, `start_agent`, `restart_agent`, `list_agents`, `get_user_presence`). (@TimothyZhang7)
@@ -79,7 +211,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal
 - **Pre-start confirmation prompt** -- Interactive prompt before agent execution allowing credential updates or abort. (@RichardTang-Aden)
 - **Event bus multi-graph support** -- `graph_id` on events, `filter_graph` on subscriptions, `ESCALATION_REQUESTED` event type, `exclude_own_graph` filter. (@TimothyZhang7)

-### TUI Improvements
+#### TUI Improvements

 - **In-app agent picker** (Ctrl+A) -- Tabbed modal for browsing agents with metadata badges (nodes, tools, sessions, tags). (@TimothyZhang7)
 - **Runtime-optional TUI startup** -- Launches without a pre-loaded agent, shows agent picker on startup. (@TimothyZhang7)
@@ -89,7 +221,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal
 - **Multi-graph TUI commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>`. (@TimothyZhang7)
 - **Agent Guardian watchdog** -- Event-driven monitor that catches secondary agent failures and triggers automatic remediation, with `--no-guardian` CLI flag. (@TimothyZhang7)

-### New Tool Integrations
+#### New Tool Integrations

 | Tool                   | Description                                                                                                                                                            | Contributor        |
 | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
@@ -99,7 +231,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal
 | **Google Docs**        | Document creation, reading, and editing with OAuth credential support                                                                                                  | @haliaeetusvocifer |
 | **Gmail enhancements** | Expanded mail operations for inbox management                                                                                                                          | @bryanadenhq       |

-### Infrastructure
+#### Infrastructure

 - **Default node type → `event_loop`** -- `NodeSpec.node_type` defaults to `"event_loop"` instead of `"llm_tool_use"`. (@TimothyZhang7)
 - **Default `max_node_visits` → 0 (unlimited)** -- Nodes default to unlimited visits, reducing friction for feedback loops and forever-alive agents. (@TimothyZhang7)
@@ -112,7 +244,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal

 ---

-## Bug Fixes
+### Bug Fixes

 - Flush WIP accumulator outputs on cancel/failure so edge conditions see correct values on resume
 - Stall detection state preserved across resume (no more resets on checkpoint restore)
@@ -125,13 +257,13 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal
 - Fix email agent version conflicts (@RichardTang-Aden)
 - Fix coder tool timeouts (120s for tests, 300s cap for commands)

-## Documentation
+### Documentation

 - Clarify installation and prevent root pip install misuse (@paarths-collab)

 ---

-## Agent Updates
+### Agent Updates

 - **Email Inbox Management** -- Consolidate `gmail_inbox_guardian` and `inbox_management` into a single unified agent with updated prompts and config. (@RichardTang-Aden, @bryanadenhq)
 - **Job Hunter** -- Updated node prompts, config, and agent metadata; added PDF resume selection. (@bryanadenhq)
@@ -141,7 +273,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal

 ---

-## Breaking Changes
+### Breaking Changes

 - **Deprecated node types raise `RuntimeError`** -- `llm_tool_use`, `llm_generate`, `function`, `router`, `human_input` now fail instead of warning. Migrate to `event_loop`.
 - **`NodeSpec.node_type` defaults to `"event_loop"`** (was `"llm_tool_use"`)
@@ -150,7 +282,7 @@ The quickstart script auto-detects Claude Code subscriptions and ZAI Code instal

 ---

-## Community Contributors
+### Community Contributors

 A huge thank you to everyone who contributed to this release:

@@ -165,14 +297,14 @@ A huge thank you to everyone who contributed to this release:

 ---

-## Upgrading
+### Upgrading

 ```bash
 git pull origin main
 uv sync
 ```

-### Migration Guide
+#### Migration Guide

 If your agents use deprecated node types, update them:

@@ -196,12 +328,3 @@ hive code
 # Or from TUI -- press Ctrl+E to escalate
 hive tui
 ```
-
---
-
-## What's Next
-
- **Agent-to-agent communication** -- one agent's output triggers another agent's entry point
- **Cost visibility** -- detailed runtime log of LLM costs per node and per session
- **Persistent webhook subscriptions** -- survive agent restarts without re-registering
- **Remote agent deployment** -- run agents as long-lived services with HTTP APIs
@@ -5,20 +5,20 @@ help: ## Show this help
 		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'

 lint: ## Run ruff linter and formatter (with auto-fix)
-	cd core && ruff check --fix .
-	cd tools && ruff check --fix .
-	cd core && ruff format .
-	cd tools && ruff format .
+	cd core && uv run ruff check --fix .
+	cd tools && uv run ruff check --fix .
+	cd core && uv run ruff format .
+	cd tools && uv run ruff format .

 format: ## Run ruff formatter
-	cd core && ruff format .
-	cd tools && ruff format .
+	cd core && uv run ruff format .
+	cd tools && uv run ruff format .

 check: ## Run all checks without modifying files (CI-safe)
-	cd core && ruff check .
-	cd tools && ruff check .
-	cd core && ruff format --check .
-	cd tools && ruff format --check .
+	cd core && uv run ruff check .
+	cd tools && uv run ruff check .
+	cd core && uv run ruff format --check .
+	cd tools && uv run ruff format --check .

 test: ## Run all tests (core + tools, excludes live)
 	cd core && uv run python -m pytest tests/ -v
@@ -27,7 +27,7 @@
  <img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
  <img src="https://img.shields.io/badge/Headless-Development-purple?style=flat-square" alt="Headless" />
  <img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
-  <img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
+  <img src="https://img.shields.io/badge/Browser-Use-red?style=flat-square" alt="Browser Use" />
 </p>
 <p align="center">
  <img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
@@ -37,7 +37,7 @@

 ## Overview

-Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with hive coding agent(queen), and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
+Generate a swarm of worker agents with a coding agent(queen) that control them. Define your goal through conversation with hive queen, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, browser use, credential management, and real-time monitoring give you control without sacrificing adaptability.

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

@@ -45,7 +45,7 @@ Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and

 ## Who Is Hive For?

-Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
+Hive is designed for developers and teams who want to build many **autonomous AI agents** fast without manually wiring complex workflows.

 Hive is a good fit if you:

@@ -84,7 +84,7 @@ Use Hive when you need:
 - An LLM provider that powers the agents
 - **ripgrep (optional, recommended on Windows):** The `search_files` tool uses ripgrep for faster file search. If not installed, a Python fallback is used. On Windows: `winget install BurntSushi.ripgrep` or `scoop install ripgrep`

-> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
+> **Windows Users:** Native Windows is supported via `quickstart.ps1` and `hive.ps1`. Run these in PowerShell 5.1+. WSL is also an option but not required.

 ### Installation

@@ -111,39 +111,36 @@ This sets up:
 - **LLM provider** - Interactive default model configuration
 - All required Python dependencies with `uv`

- At last, it will initiate the open hive interface in your browser
+- Finally, it will open the Hive interface in your browser

 > **Tip:** To reopen the dashboard later, run `hive open` from the project directory.

-<img width="2500" height="1214" alt="home-screen" src="https://github.com/user-attachments/assets/134d897f-5e75-4874-b00b-e0505f6b45c4" />
-
 ### Build Your First Agent

-Type the agent you want to build in the home input box
+Type the agent you want to build in the home input box. The queen is going to ask you questions and work out a solution with you.

 <img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/1ce19141-a78b-46f5-8d64-dbf987e048f4" />

 ### Use Template Agents

-Click "Try a sample agent" and check the templates. You can run a templates directly or choose to build your version on top of the existing template.
+Click "Try a sample agent" and check the templates. You can run a template directly or choose to build your version on top of the existing template.

 ### Run Agents

-Now you can run an agent by selectiing the agent (either an existing agent or example agent). You can click the Run button on the top left, or talk to the queen agent and it can run the agent for you.
+Now you can run an agent by selecting the agent (either an existing agent or example agent). You can click the Run button on the top left, or talk to the queen agent and it can run the agent for you.

-<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/71c38206-2ad5-49aa-bde8-6698d0bc55f5" />
+<img width="2549" height="1174" alt="Screenshot 2026-03-12 at 9 27 36 PM" src="https://github.com/user-attachments/assets/7c7d30fa-9ceb-4c23-95af-b1caa405547d" />

 ## Features

 - **Browser-Use** - Control the browser on your computer to achieve hard tasks
- **Parallel Execution** - Execute the generated graph in parallel. This way you can have multiple agent compelteing the jobs for you
+- **Parallel Execution** - Execute the generated graph in parallel. This way you can have multiple agents completing the jobs for you
 - **[Goal-Driven Generation](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
 - **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
 - **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
 - **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
 - **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
 - **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
- **Production-Ready** - Self-hostable, built for scale and reliability

 ## Integration

@@ -392,10 +389,6 @@ Hive generates your entire agent system from natural language goals using a codi

 Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.

-**Q: Can Hive handle complex, production-scale use cases?**
-
-Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
-
 **Q: Does Hive support human-in-the-loop workflows?**

 Yes, Hive fully supports [human-in-the-loop](docs/key_concepts/graph.md#human-in-the-loop) workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
@@ -420,6 +413,16 @@ Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API refer

 Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.

+## Star History
+
+<a href="https://star-history.com/#aden-hive/hive&Date">
+ <picture>
+   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=aden-hive/hive&type=Date&theme=dark" />
+   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=aden-hive/hive&type=Date" />
+   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=aden-hive/hive&type=Date" />
+ </picture>
+</a>
+
 ---

 <p align="center">
@@ -39,8 +39,8 @@ We consider security research conducted in accordance with this policy to be:
 ## Security Best Practices for Users

 1. **Keep Updated**: Always run the latest version
-2. **Secure Configuration**: Review `config.yaml` settings, especially in production
-3. **Environment Variables**: Never commit `.env` files or `config.yaml` with secrets
+2. **Secure Configuration**: Review your `~/.hive/configuration.json`, `.mcp.json`, and environment variable settings, especially in production
+3. **Environment Variables**: Never commit `.env` files or any configuration files that contain secrets
 4. **Network Security**: Use HTTPS in production, configure firewalls appropriately
 5. **Database Security**: Use strong passwords, limit network access

@@ -1,6 +1,6 @@
 # MCP Server Guide - Agent Building Tools

-> **Note:** The standalone `agent-builder` MCP server (`framework.mcp.agent_builder_server`) has been replaced. Agent building is now done via the `coder-tools` server's `initialize_agent_package` tool, with underlying logic in `framework.builder.package_generator`.
+> **Note:** The standalone `agent-builder` MCP server (`framework.mcp.agent_builder_server`) has been replaced. Agent building is now done via the `coder-tools` server's `initialize_and_build_agent` tool, with underlying logic in `tools/coder_tools_server.py`.

 This guide covers the MCP tools available for building goal-driven agents.

@@ -19,7 +19,7 @@ uv pip install -e .

 ## Agent Building

-Agent scaffolding is handled by the `coder-tools` MCP server (in `tools/coder_tools_server.py`), which provides the `initialize_agent_package` tool and related utilities. The underlying package generation logic lives in `framework.builder.package_generator`.
+Agent scaffolding is handled by the `coder-tools` MCP server (in `tools/coder_tools_server.py`), which provides the `initialize_and_build_agent` tool and related utilities. The package generation logic lives directly in `tools/coder_tools_server.py`.

 See the [Getting Started Guide](../docs/getting-started.md) for building agents.

@@ -1,740 +0,0 @@
-#!/usr/bin/env python3
-"""
-EventLoopNode WebSocket Demo
-
-Real LLM, real FileConversationStore, real EventBus.
-Streams EventLoopNode execution to a browser via WebSocket.
-
-Usage:
-    cd /home/timothy/oss/hive/core
-    python demos/event_loop_wss_demo.py
-
-    Then open http://localhost:8765 in your browser.
-"""
-
-import asyncio
-import json
-import logging
-import sys
-import tempfile
-from http import HTTPStatus
-from pathlib import Path
-
-import httpx
-import websockets
-from bs4 import BeautifulSoup
-from websockets.http11 import Request, Response
-
-# Add core, tools, and hive root to path
-_CORE_DIR = Path(__file__).resolve().parent.parent
-_HIVE_DIR = _CORE_DIR.parent
-sys.path.insert(0, str(_CORE_DIR))  # framework.*
-sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
-sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
-
-import os  # noqa: E402
-
-from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
-from core.framework.credentials import CredentialStore  # noqa: E402
-
-from framework.credentials.storage import (  # noqa: E402
-    CompositeStorage,
-    EncryptedFileStorage,
-    EnvVarStorage,
-)
-from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
-from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
-from framework.llm.litellm import LiteLLMProvider  # noqa: E402
-from framework.llm.provider import Tool  # noqa: E402
-from framework.runner.tool_registry import ToolRegistry  # noqa: E402
-from framework.runtime.core import Runtime  # noqa: E402
-from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
-from framework.storage.conversation_store import FileConversationStore  # noqa: E402
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
-logger = logging.getLogger("demo")
-
-# -------------------------------------------------------------------------
-# Persistent state (shared across WebSocket connections)
-# -------------------------------------------------------------------------
-
-STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_demo_"))
-STORE = FileConversationStore(STORE_DIR / "conversation")
-RUNTIME = Runtime(STORE_DIR / "runtime")
-LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
-
-# -------------------------------------------------------------------------
-# Tool Registry — real tools via ToolRegistry (same pattern as GraphExecutor)
-# -------------------------------------------------------------------------
-
-TOOL_REGISTRY = ToolRegistry()
-
-# Credential store: Aden sync (OAuth2 tokens) + encrypted files + env var fallback
-_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
-_local_storage = CompositeStorage(
-    primary=EncryptedFileStorage(),
-    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
-)
-
-if os.environ.get("ADEN_API_KEY"):
-    try:
-        from framework.credentials.aden import (  # noqa: E402
-            AdenCachedStorage,
-            AdenClientConfig,
-            AdenCredentialClient,
-            AdenSyncProvider,
-        )
-
-        _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com"))
-        _provider = AdenSyncProvider(client=_client)
-        _storage = AdenCachedStorage(
-            local_storage=_local_storage,
-            aden_provider=_provider,
-        )
-        _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True)
-        _synced = _provider.sync_all(_cred_store)
-        logger.info("Synced %d credentials from Aden", _synced)
-    except Exception as e:
-        logger.warning("Aden sync unavailable: %s", e)
-        _cred_store = CredentialStore(storage=_local_storage)
-else:
-    logger.info("ADEN_API_KEY not set, using local credential storage")
-    _cred_store = CredentialStore(storage=_local_storage)
-
-CREDENTIALS = CredentialStoreAdapter(_cred_store)
-
-# Debug: log which credentials resolved
-for _name in ["brave_search", "hubspot", "anthropic"]:
-    _val = CREDENTIALS.get(_name)
-    if _val:
-        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
-    else:
-        logger.debug("credential %s: not found", _name)
-
-# --- web_search (Brave Search API) ---
-
-TOOL_REGISTRY.register(
-    name="web_search",
-    tool=Tool(
-        name="web_search",
-        description=(
-            "Search the web for current information. "
-            "Returns titles, URLs, and snippets from search results."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "The search query (1-500 characters)",
-                },
-                "num_results": {
-                    "type": "integer",
-                    "description": "Number of results to return (1-20, default 10)",
-                },
-            },
-            "required": ["query"],
-        },
-    ),
-    executor=lambda inputs: _exec_web_search(inputs),
-)
-
-
-def _exec_web_search(inputs: dict) -> dict:
-    api_key = CREDENTIALS.get("brave_search")
-    if not api_key:
-        return {"error": "brave_search credential not configured"}
-    query = inputs.get("query", "")
-    num_results = min(inputs.get("num_results", 10), 20)
-    resp = httpx.get(
-        "https://api.search.brave.com/res/v1/web/search",
-        params={"q": query, "count": num_results},
-        headers={"X-Subscription-Token": api_key, "Accept": "application/json"},
-        timeout=30.0,
-    )
-    if resp.status_code != 200:
-        return {"error": f"Brave API HTTP {resp.status_code}"}
-    data = resp.json()
-    results = [
-        {
-            "title": item.get("title", ""),
-            "url": item.get("url", ""),
-            "snippet": item.get("description", ""),
-        }
-        for item in data.get("web", {}).get("results", [])[:num_results]
-    ]
-    return {"query": query, "results": results, "total": len(results)}
-
-
-# --- web_scrape (httpx + BeautifulSoup, no playwright for sync compat) ---
-
-TOOL_REGISTRY.register(
-    name="web_scrape",
-    tool=Tool(
-        name="web_scrape",
-        description=(
-            "Scrape and extract text content from a webpage URL. "
-            "Returns the page title and main text content."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "url": {
-                    "type": "string",
-                    "description": "URL of the webpage to scrape",
-                },
-                "max_length": {
-                    "type": "integer",
-                    "description": "Maximum text length (default 50000)",
-                },
-            },
-            "required": ["url"],
-        },
-    ),
-    executor=lambda inputs: _exec_web_scrape(inputs),
-)
-
-_SCRAPE_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-        "AppleWebKit/537.36 (KHTML, like Gecko) "
-        "Chrome/131.0.0.0 Safari/537.36"
-    ),
-    "Accept": "text/html,application/xhtml+xml",
-}
-
-
-def _exec_web_scrape(inputs: dict) -> dict:
-    url = inputs.get("url", "")
-    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
-    if not url.startswith(("http://", "https://")):
-        url = "https://" + url
-    try:
-        resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS)
-        if resp.status_code != 200:
-            return {"error": f"HTTP {resp.status_code}"}
-        soup = BeautifulSoup(resp.text, "html.parser")
-        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
-            tag.decompose()
-        title = soup.title.get_text(strip=True) if soup.title else ""
-        main = (
-            soup.find("article")
-            or soup.find("main")
-            or soup.find(attrs={"role": "main"})
-            or soup.find("body")
-        )
-        text = main.get_text(separator=" ", strip=True) if main else ""
-        text = " ".join(text.split())
-        if len(text) > max_length:
-            text = text[:max_length] + "..."
-        return {"url": url, "title": title, "content": text, "length": len(text)}
-    except httpx.TimeoutException:
-        return {"error": "Request timed out"}
-    except Exception as e:
-        return {"error": f"Scrape failed: {e}"}
-
-
-# --- HubSpot CRM tools (optional, requires HUBSPOT_ACCESS_TOKEN) ---
-
-_HUBSPOT_API = "https://api.hubapi.com"
-
-
-def _hubspot_headers() -> dict | None:
-    token = CREDENTIALS.get("hubspot")
-    if token:
-        logger.debug("HubSpot token: %s...%s (len=%d)", token[:8], token[-4:], len(token))
-    else:
-        logger.debug("HubSpot token: not found")
-    if not token:
-        return None
-    return {
-        "Authorization": f"Bearer {token}",
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-
-def _exec_hubspot_search(inputs: dict) -> dict:
-    headers = _hubspot_headers()
-    if not headers:
-        return {"error": "HUBSPOT_ACCESS_TOKEN not set"}
-    object_type = inputs.get("object_type", "contacts")
-    query = inputs.get("query", "")
-    limit = min(inputs.get("limit", 10), 100)
-    body: dict = {"limit": limit}
-    if query:
-        body["query"] = query
-    try:
-        resp = httpx.post(
-            f"{_HUBSPOT_API}/crm/v3/objects/{object_type}/search",
-            headers=headers,
-            json=body,
-            timeout=30.0,
-        )
-        if resp.status_code != 200:
-            return {"error": f"HubSpot API HTTP {resp.status_code}: {resp.text[:200]}"}
-        return resp.json()
-    except httpx.TimeoutException:
-        return {"error": "Request timed out"}
-    except Exception as e:
-        return {"error": f"HubSpot error: {e}"}
-
-
-TOOL_REGISTRY.register(
-    name="hubspot_search",
-    tool=Tool(
-        name="hubspot_search",
-        description=(
-            "Search HubSpot CRM objects (contacts, companies, or deals). "
-            "Returns matching records with their properties."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "object_type": {
-                    "type": "string",
-                    "description": "CRM object type: 'contacts', 'companies', or 'deals'",
-                },
-                "query": {
-                    "type": "string",
-                    "description": "Search query (name, email, domain, etc.)",
-                },
-                "limit": {
-                    "type": "integer",
-                    "description": "Max results (1-100, default 10)",
-                },
-            },
-            "required": ["object_type"],
-        },
-    ),
-    executor=lambda inputs: _exec_hubspot_search(inputs),
-)
-
-logger.info(
-    "ToolRegistry loaded: %s",
-    ", ".join(TOOL_REGISTRY.get_registered_names()),
-)
-
-
-# -------------------------------------------------------------------------
-# HTML page (embedded)
-# -------------------------------------------------------------------------
-
-HTML_PAGE = (  # noqa: E501
-    """<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>EventLoopNode Live Demo</title>
-<style>
-  * { box-sizing: border-box; margin: 0; padding: 0; }
-  body {
-    font-family: 'SF Mono', 'Fira Code', monospace;
-    background: #0d1117; color: #c9d1d9;
-    height: 100vh; display: flex; flex-direction: column;
-  }
-  header {
-    background: #161b22; padding: 12px 20px;
-    border-bottom: 1px solid #30363d;
-    display: flex; align-items: center; gap: 16px;
-  }
-  header h1 { font-size: 16px; color: #58a6ff; font-weight: 600; }
-  .status {
-    font-size: 12px; padding: 3px 10px; border-radius: 12px;
-    background: #21262d; color: #8b949e;
-  }
-  .status.running { background: #1a4b2e; color: #3fb950; }
-  .status.done { background: #1a3a5c; color: #58a6ff; }
-  .status.error { background: #4b1a1a; color: #f85149; }
-  .chat { flex: 1; overflow-y: auto; padding: 16px; }
-  .msg {
-    margin: 8px 0; padding: 10px 14px; border-radius: 8px;
-    line-height: 1.6; white-space: pre-wrap; word-wrap: break-word;
-  }
-  .msg.user { background: #1a3a5c; color: #58a6ff; }
-  .msg.assistant { background: #161b22; color: #c9d1d9; }
-  .msg.event {
-    background: transparent; color: #8b949e; font-size: 11px;
-    padding: 4px 14px; border-left: 3px solid #30363d;
-  }
-  .msg.event.loop { border-left-color: #58a6ff; }
-  .msg.event.tool { border-left-color: #d29922; }
-  .msg.event.stall { border-left-color: #f85149; }
-  .input-bar {
-    padding: 12px 16px; background: #161b22;
-    border-top: 1px solid #30363d; display: flex; gap: 8px;
-  }
-  .input-bar input {
-    flex: 1; background: #0d1117; border: 1px solid #30363d;
-    color: #c9d1d9; padding: 8px 12px; border-radius: 6px;
-    font-family: inherit; font-size: 14px; outline: none;
-  }
-  .input-bar input:focus { border-color: #58a6ff; }
-  .input-bar button {
-    background: #238636; color: #fff; border: none;
-    padding: 8px 20px; border-radius: 6px; cursor: pointer;
-    font-family: inherit; font-weight: 600;
-  }
-  .input-bar button:hover { background: #2ea043; }
-  .input-bar button:disabled {
-    background: #21262d; color: #484f58; cursor: not-allowed;
-  }
-  .input-bar button.clear { background: #da3633; }
-  .input-bar button.clear:hover { background: #f85149; }
-</style>
-</head>
-<body>
-  <header>
-    <h1>EventLoopNode Live</h1>
-    <span id="status" class="status">Idle</span>
-    <span id="iter" class="status" style="display:none">Step 0</span>
-  </header>
-  <div id="chat" class="chat"></div>
-  <div class="input-bar">
-    <input id="input" type="text"
-           placeholder="Ask anything..." autofocus />
-    <button id="go" onclick="run()">Send</button>
-    <button class="clear"
-            onclick="clearConversation()">Clear</button>
-  </div>
-
-<script>
-let ws = null;
-let currentAssistantEl = null;
-let iterCount = 0;
-const chat = document.getElementById('chat');
-const status = document.getElementById('status');
-const iterEl = document.getElementById('iter');
-const goBtn = document.getElementById('go');
-const inputEl = document.getElementById('input');
-
-inputEl.addEventListener('keydown', e => {
-  if (e.key === 'Enter') run();
-});
-
-function setStatus(text, cls) {
-  status.textContent = text;
-  status.className = 'status ' + cls;
-}
-
-function addMsg(text, cls) {
-  const el = document.createElement('div');
-  el.className = 'msg ' + cls;
-  el.textContent = text;
-  chat.appendChild(el);
-  chat.scrollTop = chat.scrollHeight;
-  return el;
-}
-
-function connect() {
-  ws = new WebSocket('ws://' + location.host + '/ws');
-  ws.onopen = () => {
-    setStatus('Ready', 'done');
-    goBtn.disabled = false;
-  };
-  ws.onmessage = handleEvent;
-  ws.onerror = () => { setStatus('Error', 'error'); };
-  ws.onclose = () => {
-    setStatus('Reconnecting...', '');
-    goBtn.disabled = true;
-    setTimeout(connect, 2000);
-  };
-}
-
-function handleEvent(msg) {
-  const evt = JSON.parse(msg.data);
-
-  if (evt.type === 'llm_text_delta') {
-    if (currentAssistantEl) {
-      currentAssistantEl.textContent += evt.content;
-      chat.scrollTop = chat.scrollHeight;
-    }
-  }
-  else if (evt.type === 'ready') {
-    setStatus('Ready', 'done');
-    if (currentAssistantEl && !currentAssistantEl.textContent)
-      currentAssistantEl.remove();
-    goBtn.disabled = false;
-  }
-  else if (evt.type === 'node_loop_iteration') {
-    iterCount = evt.iteration || (iterCount + 1);
-    iterEl.textContent = 'Step ' + iterCount;
-    iterEl.style.display = '';
-  }
-  else if (evt.type === 'tool_call_started') {
-    var info = evt.tool_name + '('
-      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
-    addMsg('TOOL  ' + info, 'event tool');
-  }
-  else if (evt.type === 'tool_call_completed') {
-    var preview = (evt.result || '').slice(0, 200);
-    var cls = evt.is_error ? 'stall' : 'tool';
-    addMsg('RESULT  ' + evt.tool_name + ': ' + preview,
-           'event ' + cls);
-    currentAssistantEl = addMsg('', 'assistant');
-  }
-  else if (evt.type === 'result') {
-    setStatus('Session ended', evt.success ? 'done' : 'error');
-    if (evt.error) addMsg('ERROR  ' + evt.error, 'event stall');
-    if (currentAssistantEl && !currentAssistantEl.textContent)
-      currentAssistantEl.remove();
-    goBtn.disabled = false;
-  }
-  else if (evt.type === 'node_stalled') {
-    addMsg('STALLED  ' + evt.reason, 'event stall');
-  }
-  else if (evt.type === 'cleared') {
-    chat.innerHTML = '';
-    iterCount = 0;
-    iterEl.textContent = 'Step 0';
-    iterEl.style.display = 'none';
-    setStatus('Ready', 'done');
-    goBtn.disabled = false;
-  }
-}
-
-function run() {
-  const text = inputEl.value.trim();
-  if (!text || !ws || ws.readyState !== 1) return;
-  addMsg(text, 'user');
-  currentAssistantEl = addMsg('', 'assistant');
-  inputEl.value = '';
-  setStatus('Running', 'running');
-  goBtn.disabled = true;
-  ws.send(JSON.stringify({ topic: text }));
-}
-
-function clearConversation() {
-  if (ws && ws.readyState === 1) {
-    ws.send(JSON.stringify({ command: 'clear' }));
-  }
-}
-
-connect();
-</script>
-</body>
-</html>"""
-)
-
-
-# -------------------------------------------------------------------------
-# WebSocket handler
-# -------------------------------------------------------------------------
-
-
-async def handle_ws(websocket):
-    """Persistent WebSocket: long-lived EventLoopNode with client_facing blocking."""
-    global STORE
-
-    # -- Event forwarding (WebSocket ← EventBus) ----------------------------
-    bus = EventBus()
-
-    async def forward_event(event):
-        try:
-            payload = {"type": event.type.value, **event.data}
-            if event.node_id:
-                payload["node_id"] = event.node_id
-            await websocket.send(json.dumps(payload))
-        except Exception:
-            pass
-
-    bus.subscribe(
-        event_types=[
-            EventType.NODE_LOOP_STARTED,
-            EventType.NODE_LOOP_ITERATION,
-            EventType.NODE_LOOP_COMPLETED,
-            EventType.LLM_TEXT_DELTA,
-            EventType.TOOL_CALL_STARTED,
-            EventType.TOOL_CALL_COMPLETED,
-            EventType.NODE_STALLED,
-        ],
-        handler=forward_event,
-    )
-
-    # -- Per-connection state -----------------------------------------------
-    node = None
-    loop_task = None
-
-    tools = list(TOOL_REGISTRY.get_tools().values())
-    tool_executor = TOOL_REGISTRY.get_executor()
-
-    node_spec = NodeSpec(
-        id="assistant",
-        name="Chat Assistant",
-        description="A conversational assistant that remembers context across messages",
-        node_type="event_loop",
-        client_facing=True,
-        system_prompt=(
-            "You are a helpful assistant with access to tools. "
-            "You can search the web, scrape webpages, and query HubSpot CRM. "
-            "Use tools when the user asks for current information or external data. "
-            "You have full conversation history, so you can reference previous messages."
-        ),
-    )
-
-    # -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus ---
-    async def on_input_requested(event):
-        try:
-            await websocket.send(json.dumps({"type": "ready"}))
-        except Exception:
-            pass
-
-    bus.subscribe(
-        event_types=[EventType.CLIENT_INPUT_REQUESTED],
-        handler=on_input_requested,
-    )
-
-    async def start_loop(first_message: str):
-        """Create an EventLoopNode and run it as a background task."""
-        nonlocal node, loop_task
-
-        memory = SharedMemory()
-        ctx = NodeContext(
-            runtime=RUNTIME,
-            node_id="assistant",
-            node_spec=node_spec,
-            memory=memory,
-            input_data={},
-            llm=LLM,
-            available_tools=tools,
-        )
-        node = EventLoopNode(
-            event_bus=bus,
-            config=LoopConfig(max_iterations=10_000, max_history_tokens=32_000),
-            conversation_store=STORE,
-            tool_executor=tool_executor,
-        )
-        await node.inject_event(first_message)
-
-        async def _run():
-            try:
-                result = await node.execute(ctx)
-                try:
-                    await websocket.send(
-                        json.dumps(
-                            {
-                                "type": "result",
-                                "success": result.success,
-                                "output": result.output,
-                                "error": result.error,
-                                "tokens": result.tokens_used,
-                            }
-                        )
-                    )
-                except Exception:
-                    pass
-                logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}")
-            except websockets.exceptions.ConnectionClosed:
-                logger.info("Loop stopped: WebSocket closed")
-            except Exception as e:
-                logger.exception("Loop error")
-                try:
-                    await websocket.send(
-                        json.dumps(
-                            {
-                                "type": "result",
-                                "success": False,
-                                "error": str(e),
-                                "output": {},
-                            }
-                        )
-                    )
-                except Exception:
-                    pass
-
-        loop_task = asyncio.create_task(_run())
-
-    async def stop_loop():
-        """Signal the node and wait for the loop task to finish."""
-        nonlocal node, loop_task
-        if loop_task and not loop_task.done():
-            if node:
-                node.signal_shutdown()
-            try:
-                await asyncio.wait_for(loop_task, timeout=5.0)
-            except (TimeoutError, asyncio.CancelledError):
-                loop_task.cancel()
-        node = None
-        loop_task = None
-
-    # -- Message loop (runs for the lifetime of this WebSocket) -------------
-    try:
-        async for raw in websocket:
-            try:
-                msg = json.loads(raw)
-            except Exception:
-                continue
-
-            # Clear command
-            if msg.get("command") == "clear":
-                import shutil
-
-                await stop_loop()
-                await STORE.close()
-                conv_dir = STORE_DIR / "conversation"
-                if conv_dir.exists():
-                    shutil.rmtree(conv_dir)
-                STORE = FileConversationStore(conv_dir)
-                await websocket.send(json.dumps({"type": "cleared"}))
-                logger.info("Conversation cleared")
-                continue
-
-            topic = msg.get("topic", "")
-            if not topic:
-                continue
-
-            if node is None:
-                # First message — spin up the loop
-                logger.info(f"Starting persistent loop: {topic}")
-                await start_loop(topic)
-            else:
-                # Subsequent message — inject into the running loop
-                logger.info(f"Injecting message: {topic}")
-                await node.inject_event(topic)
-
-    except websockets.exceptions.ConnectionClosed:
-        pass
-    finally:
-        await stop_loop()
-        logger.info("WebSocket closed, loop stopped")
-
-
-# -------------------------------------------------------------------------
-# HTTP handler for serving the HTML page
-# -------------------------------------------------------------------------
-
-
-async def process_request(connection, request: Request):
-    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
-    if request.path == "/ws":
-        return None  # let websockets handle the upgrade
-    # Serve the HTML page for any other path
-    return Response(
-        HTTPStatus.OK,
-        "OK",
-        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
-        HTML_PAGE.encode(),
-    )
-
-
-# -------------------------------------------------------------------------
-# Main
-# -------------------------------------------------------------------------
-
-
-async def main():
-    port = 8765
-    async with websockets.serve(
-        handle_ws,
-        "0.0.0.0",
-        port,
-        process_request=process_request,
-    ):
-        logger.info(f"Demo running at http://localhost:{port}")
-        logger.info("Open in your browser and enter a topic to research.")
-        await asyncio.Future()  # run forever
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
@@ -1,930 +0,0 @@
-#!/usr/bin/env python3
-"""
-Two-Node ContextHandoff Demo
-
-Demonstrates ContextHandoff between two EventLoopNode instances:
-  Node A (Researcher) → ContextHandoff → Node B (Analyst)
-
-Real LLM, real FileConversationStore, real EventBus.
-Streams both nodes to a browser via WebSocket.
-
-Usage:
-    cd /home/timothy/oss/hive/core
-    python demos/handoff_demo.py
-
-    Then open http://localhost:8766 in your browser.
-"""
-
-import asyncio
-import json
-import logging
-import sys
-import tempfile
-from http import HTTPStatus
-from pathlib import Path
-
-import httpx
-import websockets
-from bs4 import BeautifulSoup
-from websockets.http11 import Request, Response
-
-# Add core, tools, and hive root to path
-_CORE_DIR = Path(__file__).resolve().parent.parent
-_HIVE_DIR = _CORE_DIR.parent
-sys.path.insert(0, str(_CORE_DIR))  # framework.*
-sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
-sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
-
-from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
-from core.framework.credentials import CredentialStore  # noqa: E402
-
-from framework.credentials.storage import (  # noqa: E402
-    CompositeStorage,
-    EncryptedFileStorage,
-    EnvVarStorage,
-)
-from framework.graph.context_handoff import ContextHandoff  # noqa: E402
-from framework.graph.conversation import NodeConversation  # noqa: E402
-from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
-from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
-from framework.llm.litellm import LiteLLMProvider  # noqa: E402
-from framework.llm.provider import Tool  # noqa: E402
-from framework.runner.tool_registry import ToolRegistry  # noqa: E402
-from framework.runtime.core import Runtime  # noqa: E402
-from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
-from framework.storage.conversation_store import FileConversationStore  # noqa: E402
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
-logger = logging.getLogger("handoff_demo")
-
-# -------------------------------------------------------------------------
-# Persistent state
-# -------------------------------------------------------------------------
-
-STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_"))
-RUNTIME = Runtime(STORE_DIR / "runtime")
-LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
-
-# -------------------------------------------------------------------------
-# Credentials
-# -------------------------------------------------------------------------
-
-# Composite credential store: encrypted files (primary) + env vars (fallback)
-_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
-_composite = CompositeStorage(
-    primary=EncryptedFileStorage(),
-    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
-)
-CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite))
-
-for _name in ["brave_search", "hubspot"]:
-    _val = CREDENTIALS.get(_name)
-    if _val:
-        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
-    else:
-        logger.debug("credential %s: not found", _name)
-
-# -------------------------------------------------------------------------
-# Tool Registry — web_search + web_scrape for Node A (Researcher)
-# -------------------------------------------------------------------------
-
-TOOL_REGISTRY = ToolRegistry()
-
-
-def _exec_web_search(inputs: dict) -> dict:
-    api_key = CREDENTIALS.get("brave_search")
-    if not api_key:
-        return {"error": "brave_search credential not configured"}
-    query = inputs.get("query", "")
-    num_results = min(inputs.get("num_results", 10), 20)
-    resp = httpx.get(
-        "https://api.search.brave.com/res/v1/web/search",
-        params={"q": query, "count": num_results},
-        headers={
-            "X-Subscription-Token": api_key,
-            "Accept": "application/json",
-        },
-        timeout=30.0,
-    )
-    if resp.status_code != 200:
-        return {"error": f"Brave API HTTP {resp.status_code}"}
-    data = resp.json()
-    results = [
-        {
-            "title": item.get("title", ""),
-            "url": item.get("url", ""),
-            "snippet": item.get("description", ""),
-        }
-        for item in data.get("web", {}).get("results", [])[:num_results]
-    ]
-    return {"query": query, "results": results, "total": len(results)}
-
-
-TOOL_REGISTRY.register(
-    name="web_search",
-    tool=Tool(
-        name="web_search",
-        description=(
-            "Search the web for current information. "
-            "Returns titles, URLs, and snippets from search results."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "The search query (1-500 characters)",
-                },
-                "num_results": {
-                    "type": "integer",
-                    "description": "Number of results (1-20, default 10)",
-                },
-            },
-            "required": ["query"],
-        },
-    ),
-    executor=lambda inputs: _exec_web_search(inputs),
-)
-
-_SCRAPE_HEADERS = {
-    "User-Agent": (
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-        "AppleWebKit/537.36 (KHTML, like Gecko) "
-        "Chrome/131.0.0.0 Safari/537.36"
-    ),
-    "Accept": "text/html,application/xhtml+xml",
-}
-
-
-def _exec_web_scrape(inputs: dict) -> dict:
-    url = inputs.get("url", "")
-    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
-    if not url.startswith(("http://", "https://")):
-        url = "https://" + url
-    try:
-        resp = httpx.get(
-            url,
-            timeout=30.0,
-            follow_redirects=True,
-            headers=_SCRAPE_HEADERS,
-        )
-        if resp.status_code != 200:
-            return {"error": f"HTTP {resp.status_code}"}
-        soup = BeautifulSoup(resp.text, "html.parser")
-        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
-            tag.decompose()
-        title = soup.title.get_text(strip=True) if soup.title else ""
-        main = (
-            soup.find("article")
-            or soup.find("main")
-            or soup.find(attrs={"role": "main"})
-            or soup.find("body")
-        )
-        text = main.get_text(separator=" ", strip=True) if main else ""
-        text = " ".join(text.split())
-        if len(text) > max_length:
-            text = text[:max_length] + "..."
-        return {
-            "url": url,
-            "title": title,
-            "content": text,
-            "length": len(text),
-        }
-    except httpx.TimeoutException:
-        return {"error": "Request timed out"}
-    except Exception as e:
-        return {"error": f"Scrape failed: {e}"}
-
-
-TOOL_REGISTRY.register(
-    name="web_scrape",
-    tool=Tool(
-        name="web_scrape",
-        description=(
-            "Scrape and extract text content from a webpage URL. "
-            "Returns the page title and main text content."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "url": {
-                    "type": "string",
-                    "description": "URL of the webpage to scrape",
-                },
-                "max_length": {
-                    "type": "integer",
-                    "description": "Maximum text length (default 50000)",
-                },
-            },
-            "required": ["url"],
-        },
-    ),
-    executor=lambda inputs: _exec_web_scrape(inputs),
-)
-
-logger.info(
-    "ToolRegistry loaded: %s",
-    ", ".join(TOOL_REGISTRY.get_registered_names()),
-)
-
-# -------------------------------------------------------------------------
-# Node Specs
-# -------------------------------------------------------------------------
-
-RESEARCHER_SPEC = NodeSpec(
-    id="researcher",
-    name="Researcher",
-    description="Researches a topic using web search and scraping tools",
-    node_type="event_loop",
-    input_keys=["topic"],
-    output_keys=["research_summary"],
-    system_prompt=(
-        "You are a thorough research assistant. Your job is to research "
-        "the given topic using the web_search and web_scrape tools.\n\n"
-        "1. Search for relevant information on the topic\n"
-        "2. Scrape 1-2 of the most promising URLs for details\n"
-        "3. Synthesize your findings into a comprehensive summary\n"
-        "4. Use set_output with key='research_summary' to save your "
-        "findings\n\n"
-        "Be thorough but efficient. Aim for 2-4 search/scrape calls, "
-        "then summarize and set_output."
-    ),
-)
-
-ANALYST_SPEC = NodeSpec(
-    id="analyst",
-    name="Analyst",
-    description="Analyzes research findings and provides insights",
-    node_type="event_loop",
-    input_keys=["context"],
-    output_keys=["analysis"],
-    system_prompt=(
-        "You are a strategic analyst. You receive research findings from "
-        "a previous researcher and must:\n\n"
-        "1. Identify key themes and patterns\n"
-        "2. Assess the reliability and significance of the findings\n"
-        "3. Provide actionable insights and recommendations\n"
-        "4. Use set_output with key='analysis' to save your analysis\n\n"
-        "Be concise but insightful. Focus on what matters most."
-    ),
-)
-
-
-# -------------------------------------------------------------------------
-# HTML page
-# -------------------------------------------------------------------------
-
-HTML_PAGE = (  # noqa: E501
-    """<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>ContextHandoff Demo</title>
-<style>
-  * {
-    box-sizing: border-box;
-    margin: 0;
-    padding: 0;
-  }
-  body {
-    font-family: 'SF Mono', 'Fira Code', monospace;
-    background: #0d1117;
-    color: #c9d1d9;
-    height: 100vh;
-    display: flex;
-    flex-direction: column;
-  }
-  header {
-    background: #161b22;
-    padding: 12px 20px;
-    border-bottom: 1px solid #30363d;
-    display: flex;
-    align-items: center;
-    gap: 16px;
-  }
-  header h1 {
-    font-size: 16px;
-    color: #58a6ff;
-    font-weight: 600;
-  }
-  .badge {
-    font-size: 12px;
-    padding: 3px 10px;
-    border-radius: 12px;
-    background: #21262d;
-    color: #8b949e;
-  }
-  .badge.researcher {
-    background: #1a3a5c;
-    color: #58a6ff;
-  }
-  .badge.analyst {
-    background: #1a4b2e;
-    color: #3fb950;
-  }
-  .badge.handoff {
-    background: #3d1f00;
-    color: #d29922;
-  }
-  .badge.done {
-    background: #21262d;
-    color: #8b949e;
-  }
-  .badge.error {
-    background: #4b1a1a;
-    color: #f85149;
-  }
-  .chat {
-    flex: 1;
-    overflow-y: auto;
-    padding: 16px;
-  }
-  .msg {
-    margin: 8px 0;
-    padding: 10px 14px;
-    border-radius: 8px;
-    line-height: 1.6;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-  }
-  .msg.user {
-    background: #1a3a5c;
-    color: #58a6ff;
-  }
-  .msg.assistant {
-    background: #161b22;
-    color: #c9d1d9;
-  }
-  .msg.assistant.analyst-msg {
-    border-left: 3px solid #3fb950;
-  }
-  .msg.event {
-    background: transparent;
-    color: #8b949e;
-    font-size: 11px;
-    padding: 4px 14px;
-    border-left: 3px solid #30363d;
-  }
-  .msg.event.loop {
-    border-left-color: #58a6ff;
-  }
-  .msg.event.tool {
-    border-left-color: #d29922;
-  }
-  .msg.event.stall {
-    border-left-color: #f85149;
-  }
-  .handoff-banner {
-    margin: 16px 0;
-    padding: 16px;
-    background: #1c1200;
-    border: 1px solid #d29922;
-    border-radius: 8px;
-    text-align: center;
-  }
-  .handoff-banner h3 {
-    color: #d29922;
-    font-size: 14px;
-    margin-bottom: 8px;
-  }
-  .handoff-banner p, .result-banner p {
-    color: #8b949e;
-    font-size: 12px;
-    line-height: 1.5;
-    max-height: 200px;
-    overflow-y: auto;
-    white-space: pre-wrap;
-    text-align: left;
-  }
-  .result-banner {
-    margin: 16px 0;
-    padding: 16px;
-    background: #0a2614;
-    border: 1px solid #3fb950;
-    border-radius: 8px;
-  }
-  .result-banner h3 {
-    color: #3fb950;
-    font-size: 14px;
-    margin-bottom: 8px;
-    text-align: center;
-  }
-  .result-banner .label {
-    color: #58a6ff;
-    font-size: 11px;
-    font-weight: 600;
-    margin-top: 10px;
-    margin-bottom: 2px;
-  }
-  .result-banner .tokens {
-    color: #484f58;
-    font-size: 11px;
-    text-align: center;
-    margin-top: 10px;
-  }
-  .input-bar {
-    padding: 12px 16px;
-    background: #161b22;
-    border-top: 1px solid #30363d;
-    display: flex;
-    gap: 8px;
-  }
-  .input-bar input {
-    flex: 1;
-    background: #0d1117;
-    border: 1px solid #30363d;
-    color: #c9d1d9;
-    padding: 8px 12px;
-    border-radius: 6px;
-    font-family: inherit;
-    font-size: 14px;
-    outline: none;
-  }
-  .input-bar input:focus {
-    border-color: #58a6ff;
-  }
-  .input-bar button {
-    background: #238636;
-    color: #fff;
-    border: none;
-    padding: 8px 20px;
-    border-radius: 6px;
-    cursor: pointer;
-    font-family: inherit;
-    font-weight: 600;
-  }
-  .input-bar button:hover {
-    background: #2ea043;
-  }
-  .input-bar button:disabled {
-    background: #21262d;
-    color: #484f58;
-    cursor: not-allowed;
-  }
-</style>
-</head>
-<body>
-  <header>
-    <h1>ContextHandoff Demo</h1>
-    <span id="phase" class="badge">Idle</span>
-    <span id="iter" class="badge" style="display:none">Step 0</span>
-  </header>
-  <div id="chat" class="chat"></div>
-  <div class="input-bar">
-    <input id="input" type="text"
-           placeholder="Enter a research topic..." autofocus />
-    <button id="go" onclick="run()">Research</button>
-  </div>
-
-<script>
-let ws = null;
-let currentAssistantEl = null;
-let iterCount = 0;
-let currentPhase = 'idle';
-const chat = document.getElementById('chat');
-const phase = document.getElementById('phase');
-const iterEl = document.getElementById('iter');
-const goBtn = document.getElementById('go');
-const inputEl = document.getElementById('input');
-
-inputEl.addEventListener('keydown', e => {
-  if (e.key === 'Enter') run();
-});
-
-function setPhase(text, cls) {
-  phase.textContent = text;
-  phase.className = 'badge ' + cls;
-  currentPhase = cls;
-}
-
-function addMsg(text, cls) {
-  const el = document.createElement('div');
-  el.className = 'msg ' + cls;
-  el.textContent = text;
-  chat.appendChild(el);
-  chat.scrollTop = chat.scrollHeight;
-  return el;
-}
-
-function addHandoffBanner(summary) {
-  const banner = document.createElement('div');
-  banner.className = 'handoff-banner';
-  const h3 = document.createElement('h3');
-  h3.textContent = 'Context Handoff: Researcher -> Analyst';
-  const p = document.createElement('p');
-  p.textContent = summary || 'Passing research context...';
-  banner.appendChild(h3);
-  banner.appendChild(p);
-  chat.appendChild(banner);
-  chat.scrollTop = chat.scrollHeight;
-}
-
-function addResultBanner(researcher, analyst, tokens) {
-  const banner = document.createElement('div');
-  banner.className = 'result-banner';
-  const h3 = document.createElement('h3');
-  h3.textContent = 'Pipeline Complete';
-  banner.appendChild(h3);
-
-  if (researcher && researcher.research_summary) {
-    const lbl = document.createElement('div');
-    lbl.className = 'label';
-    lbl.textContent = 'RESEARCH SUMMARY';
-    banner.appendChild(lbl);
-    const p = document.createElement('p');
-    p.textContent = researcher.research_summary;
-    banner.appendChild(p);
-  }
-
-  if (analyst && analyst.analysis) {
-    const lbl = document.createElement('div');
-    lbl.className = 'label';
-    lbl.textContent = 'ANALYSIS';
-    lbl.style.color = '#3fb950';
-    banner.appendChild(lbl);
-    const p = document.createElement('p');
-    p.textContent = analyst.analysis;
-    banner.appendChild(p);
-  }
-
-  if (tokens) {
-    const t = document.createElement('div');
-    t.className = 'tokens';
-    t.textContent = 'Total tokens: ' + tokens.toLocaleString();
-    banner.appendChild(t);
-  }
-
-  chat.appendChild(banner);
-  chat.scrollTop = chat.scrollHeight;
-}
-
-function connect() {
-  ws = new WebSocket('ws://' + location.host + '/ws');
-  ws.onopen = () => {
-    setPhase('Ready', 'done');
-    goBtn.disabled = false;
-  };
-  ws.onmessage = handleEvent;
-  ws.onerror = () => { setPhase('Error', 'error'); };
-  ws.onclose = () => {
-    setPhase('Reconnecting...', '');
-    goBtn.disabled = true;
-    setTimeout(connect, 2000);
-  };
-}
-
-function handleEvent(msg) {
-  const evt = JSON.parse(msg.data);
-
-  if (evt.type === 'phase') {
-    if (evt.phase === 'researcher') {
-      setPhase('Researcher', 'researcher');
-    } else if (evt.phase === 'handoff') {
-      setPhase('Handoff', 'handoff');
-    } else if (evt.phase === 'analyst') {
-      setPhase('Analyst', 'analyst');
-    }
-    iterCount = 0;
-    iterEl.style.display = 'none';
-  }
-  else if (evt.type === 'llm_text_delta') {
-    if (currentAssistantEl) {
-      currentAssistantEl.textContent += evt.content;
-      chat.scrollTop = chat.scrollHeight;
-    }
-  }
-  else if (evt.type === 'node_loop_iteration') {
-    iterCount = evt.iteration || (iterCount + 1);
-    iterEl.textContent = 'Step ' + iterCount;
-    iterEl.style.display = '';
-  }
-  else if (evt.type === 'tool_call_started') {
-    var info = evt.tool_name + '('
-      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
-    addMsg('TOOL  ' + info, 'event tool');
-  }
-  else if (evt.type === 'tool_call_completed') {
-    var preview = (evt.result || '').slice(0, 200);
-    var cls = evt.is_error ? 'stall' : 'tool';
-    addMsg(
-      'RESULT  ' + evt.tool_name + ': ' + preview,
-      'event ' + cls
-    );
-    var assistCls = currentPhase === 'analyst'
-      ? 'assistant analyst-msg' : 'assistant';
-    currentAssistantEl = addMsg('', assistCls);
-  }
-  else if (evt.type === 'handoff_context') {
-    addHandoffBanner(evt.summary);
-    var assistCls = 'assistant analyst-msg';
-    currentAssistantEl = addMsg('', assistCls);
-  }
-  else if (evt.type === 'node_result') {
-    if (evt.node_id === 'researcher') {
-      if (currentAssistantEl
-          && !currentAssistantEl.textContent) {
-        currentAssistantEl.remove();
-      }
-    }
-  }
-  else if (evt.type === 'done') {
-    setPhase('Done', 'done');
-    iterEl.style.display = 'none';
-    if (currentAssistantEl
-        && !currentAssistantEl.textContent) {
-      currentAssistantEl.remove();
-    }
-    currentAssistantEl = null;
-    addResultBanner(
-      evt.researcher, evt.analyst, evt.total_tokens
-    );
-    goBtn.disabled = false;
-    inputEl.placeholder = 'Enter another topic...';
-  }
-  else if (evt.type === 'error') {
-    setPhase('Error', 'error');
-    addMsg('ERROR  ' + evt.message, 'event stall');
-    goBtn.disabled = false;
-  }
-  else if (evt.type === 'node_stalled') {
-    addMsg('STALLED  ' + evt.reason, 'event stall');
-  }
-}
-
-function run() {
-  const text = inputEl.value.trim();
-  if (!text || !ws || ws.readyState !== 1) return;
-  chat.innerHTML = '';
-  addMsg(text, 'user');
-  currentAssistantEl = addMsg('', 'assistant');
-  inputEl.value = '';
-  goBtn.disabled = true;
-  ws.send(JSON.stringify({ topic: text }));
-}
-
-connect();
-</script>
-</body>
-</html>"""
-)
-
-
-# -------------------------------------------------------------------------
-# WebSocket handler — sequential Node A → Handoff → Node B
-# -------------------------------------------------------------------------
-
-
-async def handle_ws(websocket):
-    """Run the two-node handoff pipeline per user message."""
-    try:
-        async for raw in websocket:
-            try:
-                msg = json.loads(raw)
-            except Exception:
-                continue
-
-            topic = msg.get("topic", "")
-            if not topic:
-                continue
-
-            logger.info(f"Starting handoff pipeline for: {topic}")
-
-            try:
-                await _run_pipeline(websocket, topic)
-            except websockets.exceptions.ConnectionClosed:
-                logger.info("WebSocket closed during pipeline")
-                return
-            except Exception as e:
-                logger.exception("Pipeline error")
-                try:
-                    await websocket.send(json.dumps({"type": "error", "message": str(e)}))
-                except Exception:
-                    pass
-
-    except websockets.exceptions.ConnectionClosed:
-        pass
-
-
-async def _run_pipeline(websocket, topic: str):
-    """Execute: Node A (research) → ContextHandoff → Node B (analysis)."""
-    import shutil
-
-    # Fresh stores for each run
-    run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR))
-    store_a = FileConversationStore(run_dir / "node_a")
-    store_b = FileConversationStore(run_dir / "node_b")
-
-    # Shared event bus
-    bus = EventBus()
-
-    async def forward_event(event):
-        try:
-            payload = {"type": event.type.value, **event.data}
-            if event.node_id:
-                payload["node_id"] = event.node_id
-            await websocket.send(json.dumps(payload))
-        except Exception:
-            pass
-
-    bus.subscribe(
-        event_types=[
-            EventType.NODE_LOOP_STARTED,
-            EventType.NODE_LOOP_ITERATION,
-            EventType.NODE_LOOP_COMPLETED,
-            EventType.LLM_TEXT_DELTA,
-            EventType.TOOL_CALL_STARTED,
-            EventType.TOOL_CALL_COMPLETED,
-            EventType.NODE_STALLED,
-        ],
-        handler=forward_event,
-    )
-
-    tools = list(TOOL_REGISTRY.get_tools().values())
-    tool_executor = TOOL_REGISTRY.get_executor()
-
-    # ---- Phase 1: Researcher ------------------------------------------------
-    await websocket.send(json.dumps({"type": "phase", "phase": "researcher"}))
-
-    node_a = EventLoopNode(
-        event_bus=bus,
-        judge=None,  # implicit judge: accept when output_keys filled
-        config=LoopConfig(
-            max_iterations=20,
-            max_tool_calls_per_turn=30,
-            max_history_tokens=32_000,
-        ),
-        conversation_store=store_a,
-        tool_executor=tool_executor,
-    )
-
-    ctx_a = NodeContext(
-        runtime=RUNTIME,
-        node_id="researcher",
-        node_spec=RESEARCHER_SPEC,
-        memory=SharedMemory(),
-        input_data={"topic": topic},
-        llm=LLM,
-        available_tools=tools,
-    )
-
-    result_a = await node_a.execute(ctx_a)
-    logger.info(
-        "Researcher done: success=%s, tokens=%s",
-        result_a.success,
-        result_a.tokens_used,
-    )
-
-    await websocket.send(
-        json.dumps(
-            {
-                "type": "node_result",
-                "node_id": "researcher",
-                "success": result_a.success,
-                "output": result_a.output,
-            }
-        )
-    )
-
-    if not result_a.success:
-        await websocket.send(
-            json.dumps(
-                {
-                    "type": "error",
-                    "message": f"Researcher failed: {result_a.error}",
-                }
-            )
-        )
-        return
-
-    # ---- Phase 2: Context Handoff -------------------------------------------
-    await websocket.send(json.dumps({"type": "phase", "phase": "handoff"}))
-
-    # Restore the researcher's conversation from store
-    conversation_a = await NodeConversation.restore(store_a)
-    if conversation_a is None:
-        await websocket.send(
-            json.dumps(
-                {
-                    "type": "error",
-                    "message": "Failed to restore researcher conversation",
-                }
-            )
-        )
-        return
-
-    handoff_engine = ContextHandoff(llm=LLM)
-    handoff_context = handoff_engine.summarize_conversation(
-        conversation=conversation_a,
-        node_id="researcher",
-        output_keys=["research_summary"],
-    )
-
-    formatted_handoff = ContextHandoff.format_as_input(handoff_context)
-    logger.info(
-        "Handoff: %d turns, ~%d tokens, keys=%s",
-        handoff_context.turn_count,
-        handoff_context.total_tokens_used,
-        list(handoff_context.key_outputs.keys()),
-    )
-
-    # Send handoff context to browser
-    await websocket.send(
-        json.dumps(
-            {
-                "type": "handoff_context",
-                "summary": handoff_context.summary[:500],
-                "turn_count": handoff_context.turn_count,
-                "tokens": handoff_context.total_tokens_used,
-                "key_outputs": handoff_context.key_outputs,
-            }
-        )
-    )
-
-    # ---- Phase 3: Analyst ---------------------------------------------------
-    await websocket.send(json.dumps({"type": "phase", "phase": "analyst"}))
-
-    node_b = EventLoopNode(
-        event_bus=bus,
-        judge=None,  # implicit judge
-        config=LoopConfig(
-            max_iterations=10,
-            max_tool_calls_per_turn=30,
-            max_history_tokens=32_000,
-        ),
-        conversation_store=store_b,
-    )
-
-    ctx_b = NodeContext(
-        runtime=RUNTIME,
-        node_id="analyst",
-        node_spec=ANALYST_SPEC,
-        memory=SharedMemory(),
-        input_data={"context": formatted_handoff},
-        llm=LLM,
-        available_tools=[],
-    )
-
-    result_b = await node_b.execute(ctx_b)
-    logger.info(
-        "Analyst done: success=%s, tokens=%s",
-        result_b.success,
-        result_b.tokens_used,
-    )
-
-    # ---- Done ---------------------------------------------------------------
-    await websocket.send(
-        json.dumps(
-            {
-                "type": "done",
-                "researcher": result_a.output,
-                "analyst": result_b.output,
-                "total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)),
-            }
-        )
-    )
-
-    # Clean up temp stores
-    try:
-        shutil.rmtree(run_dir)
-    except Exception:
-        pass
-
-
-# -------------------------------------------------------------------------
-# HTTP handler
-# -------------------------------------------------------------------------
-
-
-async def process_request(connection, request: Request):
-    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
-    if request.path == "/ws":
-        return None
-    return Response(
-        HTTPStatus.OK,
-        "OK",
-        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
-        HTML_PAGE.encode(),
-    )
-
-
-# -------------------------------------------------------------------------
-# Main
-# -------------------------------------------------------------------------
-
-
-async def main():
-    port = 8766
-    async with websockets.serve(
-        handle_ws,
-        "0.0.0.0",
-        port,
-        process_request=process_request,
-    ):
-        logger.info(f"Handoff demo at http://localhost:{port}")
-        logger.info("Enter a research topic to start the pipeline.")
-        await asyncio.Future()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
@@ -22,7 +22,6 @@ The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
 See `framework.testing` for details.
 """

-from framework.builder.query import BuilderQuery
 from framework.llm import AnthropicProvider, LLMProvider
 from framework.runner import AgentOrchestrator, AgentRunner
 from framework.runtime.core import Runtime
@@ -51,8 +50,6 @@ __all__ = [
    "Problem",
    # Runtime
    "Runtime",
-    # Builder
-    "BuilderQuery",
    # LLM
    "LLMProvider",
    "AnthropicProvider",
@@ -1,8 +1,6 @@
 """CLI entry point for Credential Tester agent."""

 import asyncio
-import logging
-import sys

 import click

@@ -10,13 +8,14 @@ from .agent import CredentialTesterAgent


 def setup_logging(verbose=False, debug=False):
+    from framework.observability import configure_logging
+
    if debug:
-        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+        configure_logging(level="DEBUG")
    elif verbose:
-        level, fmt = logging.INFO, "%(message)s"
+        configure_logging(level="INFO")
    else:
-        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
-    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+        configure_logging(level="WARNING")


 def pick_account(agent: CredentialTesterAgent) -> dict | None:
@@ -51,42 +50,6 @@ def cli():
    pass


-@cli.command()
-@click.option("--verbose", "-v", is_flag=True)
-@click.option("--debug", is_flag=True)
-def tui(verbose, debug):
-    """Launch TUI to test a credential interactively."""
-    setup_logging(verbose=verbose, debug=debug)
-
-    try:
-        from framework.tui.app import AdenTUI
-    except ImportError:
-        click.echo("TUI requires 'textual'. Install with: pip install textual")
-        sys.exit(1)
-
-    agent = CredentialTesterAgent()
-    account = pick_account(agent)
-    if account is None:
-        sys.exit(1)
-
-    agent.select_account(account)
-    provider = account.get("provider", "?")
-    alias = account.get("alias", "?")
-    click.echo(f"\nTesting {provider}/{alias}...\n")
-
-    async def run_tui():
-        agent._setup()
-        runtime = agent._agent_runtime
-        await runtime.start()
-        try:
-            app = AdenTUI(runtime)
-            await app.run_async()
-        finally:
-            await runtime.stop()
-
-    asyncio.run(run_tui())
-
-
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
@click.option("--debug", is_flag=True)
@@ -19,6 +19,7 @@ from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING

+from framework.config import get_max_context_tokens
 from framework.graph import Goal, NodeSpec, SuccessCriterion
 from framework.graph.checkpoint_config import CheckpointConfig
 from framework.graph.edge import GraphSpec
@@ -455,7 +456,6 @@ identity_prompt = (
 loop_config = {
    "max_iterations": 50,
    "max_tool_calls_per_turn": 30,
-    "max_history_tokens": 32000,
 }

 # ---------------------------------------------------------------------------
@@ -541,7 +541,7 @@ class CredentialTesterAgent:
            loop_config={
                "max_iterations": 50,
                "max_tool_calls_per_turn": 30,
-                "max_history_tokens": 32000,
+                "max_context_tokens": get_max_context_tokens(),
            },
            conversation_mode="continuous",
            identity_prompt=(
@@ -0,0 +1,178 @@
+"""Agent discovery — scan known directories and return categorised AgentEntry lists."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+@dataclass
+class AgentEntry:
+    """Lightweight agent metadata for the picker / API discover endpoint."""
+
+    path: Path
+    name: str
+    description: str
+    category: str
+    session_count: int = 0
+    run_count: int = 0
+    node_count: int = 0
+    tool_count: int = 0
+    tags: list[str] = field(default_factory=list)
+    last_active: str | None = None
+
+
+def _get_last_active(agent_name: str) -> str | None:
+    """Return the most recent updated_at timestamp across all sessions."""
+    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
+    if not sessions_dir.exists():
+        return None
+    latest: str | None = None
+    for session_dir in sessions_dir.iterdir():
+        if not session_dir.is_dir() or not session_dir.name.startswith("session_"):
+            continue
+        state_file = session_dir / "state.json"
+        if not state_file.exists():
+            continue
+        try:
+            data = json.loads(state_file.read_text(encoding="utf-8"))
+            ts = data.get("timestamps", {}).get("updated_at")
+            if ts and (latest is None or ts > latest):
+                latest = ts
+        except Exception:
+            continue
+    return latest
+
+
+def _count_sessions(agent_name: str) -> int:
+    """Count session directories under ~/.hive/agents/{agent_name}/sessions/."""
+    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
+    if not sessions_dir.exists():
+        return 0
+    return sum(1 for d in sessions_dir.iterdir() if d.is_dir() and d.name.startswith("session_"))
+
+
+def _count_runs(agent_name: str) -> int:
+    """Count unique run_ids across all sessions for an agent."""
+    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
+    if not sessions_dir.exists():
+        return 0
+    run_ids: set[str] = set()
+    for session_dir in sessions_dir.iterdir():
+        if not session_dir.is_dir() or not session_dir.name.startswith("session_"):
+            continue
+        # runs.jsonl lives inside workspace subdirectories
+        for runs_file in session_dir.rglob("runs.jsonl"):
+            try:
+                for line in runs_file.read_text(encoding="utf-8").splitlines():
+                    line = line.strip()
+                    if not line:
+                        continue
+                    record = json.loads(line)
+                    rid = record.get("run_id")
+                    if rid:
+                        run_ids.add(rid)
+            except Exception:
+                continue
+    return len(run_ids)
+
+
+def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
+    """Extract node count, tool count, and tags from an agent directory.
+
+    Prefers agent.py (AST-parsed) over agent.json for node/tool counts
+    since agent.json may be stale.  Tags are only available from agent.json.
+    """
+    import ast
+
+    node_count, tool_count, tags = 0, 0, []
+
+    agent_py = agent_path / "agent.py"
+    if agent_py.exists():
+        try:
+            tree = ast.parse(agent_py.read_text(encoding="utf-8"))
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Assign):
+                    for target in node.targets:
+                        if isinstance(target, ast.Name) and target.id == "nodes":
+                            if isinstance(node.value, ast.List):
+                                node_count = len(node.value.elts)
+        except Exception:
+            pass
+
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
+            if node_count == 0:
+                node_count = len(json_nodes)
+            tools: set[str] = set()
+            for n in json_nodes:
+                tools.update(n.get("tools", []))
+            tool_count = len(tools)
+            tags = data.get("agent", {}).get("tags", [])
+        except Exception:
+            pass
+
+    return node_count, tool_count, tags
+
+
+def discover_agents() -> dict[str, list[AgentEntry]]:
+    """Discover agents from all known sources grouped by category."""
+    from framework.runner.cli import (
+        _extract_python_agent_metadata,
+        _get_framework_agents_dir,
+        _is_valid_agent_dir,
+    )
+
+    groups: dict[str, list[AgentEntry]] = {}
+    sources = [
+        ("Your Agents", Path("exports")),
+        ("Framework", _get_framework_agents_dir()),
+        ("Examples", Path("examples/templates")),
+    ]
+
+    for category, base_dir in sources:
+        if not base_dir.exists():
+            continue
+        entries: list[AgentEntry] = []
+        for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
+            if not _is_valid_agent_dir(path):
+                continue
+
+            name, desc = _extract_python_agent_metadata(path)
+            config_fallback_name = path.name.replace("_", " ").title()
+            used_config = name != config_fallback_name
+
+            node_count, tool_count, tags = _extract_agent_stats(path)
+            if not used_config:
+                agent_json = path / "agent.json"
+                if agent_json.exists():
+                    try:
+                        data = json.loads(agent_json.read_text(encoding="utf-8"))
+                        meta = data.get("agent", {})
+                        name = meta.get("name", name)
+                        desc = meta.get("description", desc)
+                    except Exception:
+                        pass
+
+            entries.append(
+                AgentEntry(
+                    path=path,
+                    name=name,
+                    description=desc,
+                    category=category,
+                    session_count=_count_sessions(path.name),
+                    run_count=_count_runs(path.name),
+                    node_count=node_count,
+                    tool_count=tool_count,
+                    tags=tags,
+                    last_active=_get_last_active(path.name),
+                )
+            )
+        if entries:
+            groups[category] = entries
+
+    return groups
@@ -1,40 +0,0 @@
-"""
-Hive Coder — Native coding agent that builds Hive agent packages.
-
-Deeply understands the agent framework and produces complete Python packages
-with goals, nodes, edges, system prompts, MCP configuration, and tests
-from natural language specifications.
-"""
-
-from .agent import (
-    conversation_mode,
-    edges,
-    entry_node,
-    entry_points,
-    goal,
-    identity_prompt,
-    loop_config,
-    nodes,
-    pause_nodes,
-    terminal_nodes,
-)
-from .config import AgentMetadata, RuntimeConfig, default_config, metadata
-
-__version__ = "1.0.0"
-
-__all__ = [
-    "goal",
-    "nodes",
-    "edges",
-    "entry_node",
-    "entry_points",
-    "pause_nodes",
-    "terminal_nodes",
-    "conversation_mode",
-    "identity_prompt",
-    "loop_config",
-    "RuntimeConfig",
-    "AgentMetadata",
-    "default_config",
-    "metadata",
-]
@@ -1,60 +0,0 @@
-"""CLI entry point for Hive Coder agent."""
-
-import json
-import logging
-import sys
-
-import click
-
-from .agent import entry_node, goal, nodes
-from .config import metadata
-
-
-def setup_logging(verbose=False, debug=False):
-    """Configure logging for execution visibility."""
-    if debug:
-        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
-    elif verbose:
-        level, fmt = logging.INFO, "%(message)s"
-    else:
-        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
-    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
-    logging.getLogger("framework").setLevel(level)
-
-
-@click.group()
-@click.version_option(version="1.0.0")
-def cli():
-    """Hive Coder — Build Hive agent packages from natural language."""
-    pass
-
-
-@cli.command()
-@click.option("--json", "output_json", is_flag=True)
-def info(output_json):
-    """Show agent information."""
-    info_data = {
-        "name": metadata.name,
-        "version": metadata.version,
-        "description": metadata.description,
-        "goal": {
-            "name": goal.name,
-            "description": goal.description,
-        },
-        "nodes": [n.id for n in nodes],
-        "entry_node": entry_node,
-        "client_facing_nodes": [n.id for n in nodes if n.client_facing],
-    }
-    if output_json:
-        click.echo(json.dumps(info_data, indent=2))
-    else:
-        click.echo(f"Agent: {info_data['name']}")
-        click.echo(f"Version: {info_data['version']}")
-        click.echo(f"Description: {info_data['description']}")
-        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
-        click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
-        click.echo(f"Entry: {info_data['entry_node']}")
-
-
-if __name__ == "__main__":
-    cli()
@@ -1,153 +0,0 @@
-"""Agent graph construction for Hive Coder."""
-
-from framework.graph import Constraint, Goal, SuccessCriterion
-from framework.graph.edge import GraphSpec
-
-from .nodes import coder_node, queen_node
-
-# Goal definition
-goal = Goal(
-    id="hive-coder",
-    name="Hive Agent Builder",
-    description=(
-        "Build complete, validated Hive agent packages from natural language "
-        "specifications. Produces production-ready Python packages with goals, "
-        "nodes, edges, system prompts, MCP configuration, and tests."
-    ),
-    success_criteria=[
-        SuccessCriterion(
-            id="valid-package",
-            description="Generated agent package passes structural validation",
-            metric="validation_pass",
-            target="true",
-            weight=0.30,
-        ),
-        SuccessCriterion(
-            id="complete-files",
-            description=(
-                "All required files generated: agent.py, config.py, "
-                "nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
-            ),
-            metric="file_count",
-            target=">=6",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="user-satisfaction",
-            description="User reviews and approves the generated agent",
-            metric="user_approval",
-            target="true",
-            weight=0.25,
-        ),
-        SuccessCriterion(
-            id="framework-compliance",
-            description=(
-                "Generated code follows framework patterns: STEP 1/STEP 2 "
-                "for client-facing and correct imports"
-            ),
-            metric="pattern_compliance",
-            target="100%",
-            weight=0.20,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="dynamic-tool-discovery",
-            description=(
-                "Always discover available tools dynamically via "
-                "list_agent_tools before referencing tools in agent designs"
-            ),
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="no-fabricated-tools",
-            description="Only reference tools that exist in hive-tools MCP",
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="valid-python",
-            description="All generated Python files must be syntactically correct",
-            constraint_type="hard",
-            category="correctness",
-        ),
-        Constraint(
-            id="self-verification",
-            description="Run validation after writing code; fix errors before presenting",
-            constraint_type="hard",
-            category="quality",
-        ),
-    ],
-)
-
-# Nodes: primary coder node only.  The queen runs as an independent
-# GraphExecutor with queen_node — not as part of this graph.
-nodes = [coder_node]
-
-# No edges needed — single event_loop node
-edges = []
-
-# Graph configuration
-entry_node = "coder"
-entry_points = {"start": "coder"}
-pause_nodes = []
-terminal_nodes = []  # Coder node has output_keys and can terminate
-
-# No async entry points needed — the queen is now an independent executor,
-# not a secondary graph receiving events via add_graph().
-async_entry_points = []
-
-# Module-level variables read by AgentRunner.load()
-conversation_mode = "continuous"
-identity_prompt = (
-    "You are Hive Coder, the best agent-building coding agent on the planet. "
-    "You deeply understand the Hive agent framework at the source code level "
-    "and produce production-ready agent packages from natural language. "
-    "You can dynamically discover available framework tools, inspect runtime "
-    "sessions and checkpoints from agents you build, and run their test suites. "
-    "You follow coding agent discipline: read before writing, verify "
-    "assumptions by reading actual code, adhere to project conventions, "
-    "self-verify with validation, and fix your own errors. You are concise, "
-    "direct, and technically rigorous. No emojis. No fluff."
-)
-loop_config = {
-    "max_iterations": 100,
-    "max_tool_calls_per_turn": 30,
-    "max_history_tokens": 32000,
-}
-
-
-# ---------------------------------------------------------------------------
-# Queen graph — runs as an independent persistent conversation in the TUI.
-# Loaded by _load_judge_and_queen() in app.py, NOT by AgentRunner.
-# ---------------------------------------------------------------------------
-
-queen_goal = Goal(
-    id="queen-manager",
-    name="Queen Manager",
-    description=(
-        "Manage the worker agent lifecycle and serve as the user's primary "
-        "interactive interface. Triage health escalations from the judge."
-    ),
-    success_criteria=[],
-    constraints=[],
-)
-
-queen_graph = GraphSpec(
-    id="queen-graph",
-    goal_id=queen_goal.id,
-    version="1.0.0",
-    entry_node="queen",
-    entry_points={"start": "queen"},
-    terminal_nodes=[],
-    pause_nodes=[],
-    nodes=[queen_node],
-    edges=[],
-    conversation_mode="continuous",
-    loop_config={
-        "max_iterations": 999_999,
-        "max_tool_calls_per_turn": 30,
-        "max_history_tokens": 32000,
-    },
-)
@@ -0,0 +1,21 @@
+"""
+Queen — Native agent builder for the Hive framework.
+
+Deeply understands the agent framework and produces complete Python packages
+with goals, nodes, edges, system prompts, MCP configuration, and tests
+from natural language specifications.
+"""
+
+from .agent import queen_goal, queen_graph
+from .config import AgentMetadata, RuntimeConfig, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "queen_goal",
+    "queen_graph",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -0,0 +1,38 @@
+"""Queen graph definition."""
+
+from framework.graph import Goal
+from framework.graph.edge import GraphSpec
+
+from .nodes import queen_node
+
+# ---------------------------------------------------------------------------
+# Queen graph — the primary persistent conversation.
+# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
+# ---------------------------------------------------------------------------
+
+queen_goal = Goal(
+    id="queen-manager",
+    name="Queen Manager",
+    description=(
+        "Manage the worker agent lifecycle and serve as the user's primary interactive interface."
+    ),
+    success_criteria=[],
+    constraints=[],
+)
+
+queen_graph = GraphSpec(
+    id="queen-graph",
+    goal_id=queen_goal.id,
+    version="1.0.0",
+    entry_node="queen",
+    entry_points={"start": "queen"},
+    terminal_nodes=[],
+    pause_nodes=[],
+    nodes=[queen_node],
+    edges=[],
+    conversation_mode="continuous",
+    loop_config={
+        "max_iterations": 999_999,
+        "max_tool_calls_per_turn": 30,
+    },
+)
@@ -1,4 +1,4 @@
-"""Runtime configuration for Hive Coder agent."""
+"""Runtime configuration for Queen agent."""

 import json
 from dataclasses import dataclass, field
@@ -34,7 +34,7 @@ default_config = RuntimeConfig()

@dataclass
 class AgentMetadata:
-    name: str = "Hive Coder"
+    name: str = "Queen"
    version: str = "1.0.0"
    description: str = (
        "Native coding agent that builds production-ready Hive agent packages "
@@ -43,7 +43,7 @@ class AgentMetadata:
        "MCP configuration, and tests."
    )
    intro_message: str = (
-        "I'm Hive Coder — I build Hive agents. Describe what kind of agent "
+        "I'm Queen — I build Hive agents. Describe what kind of agent "
        "you want to create and I'll design, implement, and validate it for you."
    )

@@ -0,0 +1,399 @@
+"""Queen global cross-session memory.
+
+Three-tier memory architecture:
+  ~/.hive/queen/MEMORY.md                            — semantic (who, what, why)
+  ~/.hive/queen/memories/MEMORY-YYYY-MM-DD.md        — episodic (daily journals)
+  ~/.hive/queen/session/{id}/data/adapt.md           — working (session-scoped)
+
+Semantic and episodic files are injected at queen session start.
+
+Semantic memory (MEMORY.md) is updated automatically at session end via
+consolidate_queen_memory() — the queen never rewrites this herself.
+
+Episodic memory (MEMORY-date.md) can be written by the queen during a session
+via the write_to_diary tool, and is also appended to at session end by
+consolidate_queen_memory().
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import traceback
+from datetime import date, datetime
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def _queen_dir() -> Path:
+    return Path.home() / ".hive" / "queen"
+
+
+def semantic_memory_path() -> Path:
+    return _queen_dir() / "MEMORY.md"
+
+
+def episodic_memory_path(d: date | None = None) -> Path:
+    d = d or date.today()
+    return _queen_dir() / "memories" / f"MEMORY-{d.strftime('%Y-%m-%d')}.md"
+
+
+def read_semantic_memory() -> str:
+    path = semantic_memory_path()
+    return path.read_text(encoding="utf-8").strip() if path.exists() else ""
+
+
+def read_episodic_memory(d: date | None = None) -> str:
+    path = episodic_memory_path(d)
+    return path.read_text(encoding="utf-8").strip() if path.exists() else ""
+
+
+def _find_recent_episodic(lookback: int = 7) -> tuple[date, str] | None:
+    """Find the most recent non-empty episodic memory within *lookback* days."""
+    from datetime import timedelta
+
+    today = date.today()
+    for offset in range(lookback):
+        d = today - timedelta(days=offset)
+        content = read_episodic_memory(d)
+        if content:
+            return d, content
+    return None
+
+
+# Budget (in characters) for episodic memory in the system prompt.
+_EPISODIC_CHAR_BUDGET = 6_000
+
+
+def format_for_injection() -> str:
+    """Format cross-session memory for system prompt injection.
+
+    Returns an empty string if no meaningful content exists yet (e.g. first
+    session with only the seed template).
+    """
+    semantic = read_semantic_memory()
+    recent = _find_recent_episodic()
+
+    # Suppress injection if semantic is still just the seed template
+    if semantic and semantic.startswith("# My Understanding of the User\n\n*No sessions"):
+        semantic = ""
+
+    parts: list[str] = []
+    if semantic:
+        parts.append(semantic)
+
+    if recent:
+        d, content = recent
+        # Trim oversized episodic entries to keep the prompt manageable
+        if len(content) > _EPISODIC_CHAR_BUDGET:
+            content = content[:_EPISODIC_CHAR_BUDGET] + "\n\n…(truncated)"
+        today = date.today()
+        if d == today:
+            label = f"## Today — {d.strftime('%B %-d, %Y')}"
+        else:
+            label = f"## {d.strftime('%B %-d, %Y')}"
+        parts.append(f"{label}\n\n{content}")
+
+    if not parts:
+        return ""
+
+    body = "\n\n---\n\n".join(parts)
+    return "--- Your Cross-Session Memory ---\n\n" + body + "\n\n--- End Cross-Session Memory ---"
+
+
+_SEED_TEMPLATE = """\
+# My Understanding of the User
+
+*No sessions recorded yet.*
+
+## Who They Are
+
+## What They're Trying to Achieve
+
+## What's Working
+
+## What I've Learned
+"""
+
+
+def append_episodic_entry(content: str) -> None:
+    """Append a timestamped prose entry to today's episodic memory file.
+
+    Creates the file (with a date heading) if it doesn't exist yet.
+    Used both by the queen's diary tool and by the consolidation hook.
+    """
+    ep_path = episodic_memory_path()
+    ep_path.parent.mkdir(parents=True, exist_ok=True)
+    today = date.today()
+    today_str = f"{today.strftime('%B')} {today.day}, {today.year}"
+    timestamp = datetime.now().strftime("%H:%M")
+    if not ep_path.exists():
+        header = f"# {today_str}\n\n"
+        block = f"{header}### {timestamp}\n\n{content.strip()}\n"
+    else:
+        block = f"\n\n### {timestamp}\n\n{content.strip()}\n"
+    with ep_path.open("a", encoding="utf-8") as f:
+        f.write(block)
+
+
+def seed_if_missing() -> None:
+    """Create MEMORY.md with a blank template if it doesn't exist yet."""
+    path = semantic_memory_path()
+    if path.exists():
+        return
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(_SEED_TEMPLATE, encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# Consolidation prompt
+# ---------------------------------------------------------------------------
+
+_SEMANTIC_SYSTEM = """\
+You maintain the persistent cross-session memory of an AI assistant called the Queen.
+Review the session notes and rewrite MEMORY.md — the Queen's durable understanding of the
+person she works with across all sessions.
+
+Write entirely in the Queen's voice — first person, reflective, honest.
+Not a log of events, but genuine understanding of who this person is over time.
+
+Rules:
+- Update and synthesise: incorporate new understanding, update facts that have changed, remove
+  details that are stale, superseded, or no longer say anything meaningful about the person.
+- Keep it as structured markdown with named sections about the PERSON, not about today.
+- Do NOT include diary sections, daily logs, or session summaries. Those belong elsewhere.
+  MEMORY.md is about who they are, what they want, what works — not what happened today.
+- Reference dates only when noting a lasting milestone (e.g. "since March 8th they prefer X").
+- If the session had no meaningful new information about the person,
+  return the existing text unchanged.
+- Do not add fictional details. Only reflect what is evidenced in the notes.
+- Stay concise. Prune rather than accumulate. A lean, accurate file is more useful than a
+  dense one. If something was true once but has been resolved or superseded, remove it.
+- Output only the raw markdown content of MEMORY.md. No preamble, no code fences.
+"""
+
+_DIARY_SYSTEM = """\
+You maintain the daily episodic diary of an AI assistant called the Queen.
+You receive: (1) today's existing diary so far, and (2) notes from the latest session.
+
+Rewrite the complete diary for today as a single unified narrative —
+first person, reflective, honest.
+Merge and deduplicate: if the same story (e.g. a research agent stalling) recurred several times,
+describe it once with appropriate weight rather than retelling it. Weave in new developments from
+the session notes. Preserve important milestones, emotional texture, and session path references.
+
+If today's diary is empty, write the initial entry based on the session notes alone.
+
+Output only the full diary prose — no date heading, no timestamp headers,
+no preamble, no code fences.
+"""
+
+
+def read_session_context(session_dir: Path, max_messages: int = 80) -> str:
+    """Extract a readable transcript from conversation parts + adapt.md.
+
+    Reads the last ``max_messages`` conversation parts and the session's
+    adapt.md (working memory). Tool results are omitted — only user and
+    assistant turns (with tool-call names noted) are included.
+    """
+    parts: list[str] = []
+
+    # Working notes
+    adapt_path = session_dir / "data" / "adapt.md"
+    if adapt_path.exists():
+        text = adapt_path.read_text(encoding="utf-8").strip()
+        if text:
+            parts.append(f"## Session Working Notes (adapt.md)\n\n{text}")
+
+    # Conversation transcript
+    parts_dir = session_dir / "conversations" / "parts"
+    if parts_dir.exists():
+        part_files = sorted(parts_dir.glob("*.json"))[-max_messages:]
+        lines: list[str] = []
+        for pf in part_files:
+            try:
+                data = json.loads(pf.read_text(encoding="utf-8"))
+                role = data.get("role", "")
+                content = str(data.get("content", "")).strip()
+                tool_calls = data.get("tool_calls") or []
+                if role == "tool":
+                    continue  # skip verbose tool results
+                if role == "assistant" and tool_calls and not content:
+                    names = [tc.get("function", {}).get("name", "?") for tc in tool_calls]
+                    lines.append(f"[queen calls: {', '.join(names)}]")
+                elif content:
+                    label = "user" if role == "user" else "queen"
+                    lines.append(f"[{label}]: {content[:600]}")
+            except Exception:
+                continue
+        if lines:
+            parts.append("## Conversation\n\n" + "\n".join(lines))
+
+    return "\n\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Context compaction (binary-split LLM summarisation)
+# ---------------------------------------------------------------------------
+
+# If the raw session context exceeds this many characters, compact it first
+# before sending to the consolidation LLM. ~200 k chars ≈ 50 k tokens.
+_CTX_COMPACT_CHAR_LIMIT = 200_000
+_CTX_COMPACT_MAX_DEPTH = 8
+
+_COMPACT_SYSTEM = (
+    "Summarise this conversation segment. Preserve: user goals, key decisions, "
+    "what was built or changed, emotional tone, and important outcomes. "
+    "Write concisely in third person past tense. Omit routine tool invocations "
+    "unless the result matters."
+)
+
+
+async def _compact_context(text: str, llm: object, *, _depth: int = 0) -> str:
+    """Binary-split and LLM-summarise *text* until it fits within the char limit.
+
+    Mirrors the recursive binary-splitting strategy used by the main agent
+    compaction pipeline (EventLoopNode._llm_compact).
+    """
+    if len(text) <= _CTX_COMPACT_CHAR_LIMIT or _depth >= _CTX_COMPACT_MAX_DEPTH:
+        return text
+
+    # Split near the midpoint on a line boundary so we don't cut mid-message
+    mid = len(text) // 2
+    split_at = text.rfind("\n", 0, mid) + 1
+    if split_at <= 0:
+        split_at = mid
+
+    half1, half2 = text[:split_at], text[split_at:]
+
+    async def _summarise(chunk: str) -> str:
+        try:
+            resp = await llm.acomplete(
+                messages=[{"role": "user", "content": chunk}],
+                system=_COMPACT_SYSTEM,
+                max_tokens=2048,
+            )
+            return resp.content.strip()
+        except Exception:
+            logger.warning(
+                "queen_memory: context compaction LLM call failed (depth=%d), truncating",
+                _depth,
+            )
+            return chunk[: _CTX_COMPACT_CHAR_LIMIT // 4]
+
+    s1, s2 = await asyncio.gather(_summarise(half1), _summarise(half2))
+    combined = s1 + "\n\n" + s2
+    if len(combined) > _CTX_COMPACT_CHAR_LIMIT:
+        return await _compact_context(combined, llm, _depth=_depth + 1)
+    return combined
+
+
+async def consolidate_queen_memory(
+    session_id: str,
+    session_dir: Path,
+    llm: object,
+) -> None:
+    """Update MEMORY.md and append a diary entry based on the current session.
+
+    Reads conversation parts and adapt.md from session_dir. Called
+    periodically in the background and once at session end. Failures are
+    logged and silently swallowed so they never block teardown.
+
+    Args:
+        session_id: The session ID (used for the adapt.md path reference).
+        session_dir: Path to the session directory (~/.hive/queen/session/{id}).
+        llm: LLMProvider instance (must support acomplete()).
+    """
+    try:
+        session_context = read_session_context(session_dir)
+        if not session_context:
+            logger.debug("queen_memory: no session context, skipping consolidation")
+            return
+
+        logger.info("queen_memory: consolidating memory for session %s ...", session_id)
+
+        # If the transcript is very large, compact it with recursive binary LLM
+        # summarisation before sending to the consolidation model.
+        if len(session_context) > _CTX_COMPACT_CHAR_LIMIT:
+            logger.info(
+                "queen_memory: session context is %d chars — compacting first",
+                len(session_context),
+            )
+            session_context = await _compact_context(session_context, llm)
+            logger.info("queen_memory: compacted to %d chars", len(session_context))
+
+        existing_semantic = read_semantic_memory()
+        today_journal = read_episodic_memory()
+        today = date.today()
+        today_str = f"{today.strftime('%B')} {today.day}, {today.year}"
+        adapt_path = session_dir / "data" / "adapt.md"
+
+        user_msg = (
+            f"## Existing Semantic Memory (MEMORY.md)\n\n"
+            f"{existing_semantic or '(none yet)'}\n\n"
+            f"## Today's Diary So Far ({today_str})\n\n"
+            f"{today_journal or '(none yet)'}\n\n"
+            f"{session_context}\n\n"
+            f"## Session Reference\n\n"
+            f"Session ID: {session_id}\n"
+            f"Session path: {adapt_path}\n"
+        )
+
+        logger.debug(
+            "queen_memory: calling LLM (%d chars of context, ~%d tokens est.)",
+            len(user_msg),
+            len(user_msg) // 4,
+        )
+
+        from framework.agents.queen.config import default_config
+
+        semantic_resp, diary_resp = await asyncio.gather(
+            llm.acomplete(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_SEMANTIC_SYSTEM,
+                max_tokens=default_config.max_tokens,
+            ),
+            llm.acomplete(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_DIARY_SYSTEM,
+                max_tokens=default_config.max_tokens,
+            ),
+        )
+
+        new_semantic = semantic_resp.content.strip()
+        diary_entry = diary_resp.content.strip()
+
+        if new_semantic:
+            path = semantic_memory_path()
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(new_semantic, encoding="utf-8")
+            logger.info("queen_memory: semantic memory updated (%d chars)", len(new_semantic))
+
+        if diary_entry:
+            # Rewrite today's episodic file in-place — the LLM has merged and
+            # deduplicated the full day's content, so we replace rather than append.
+            ep_path = episodic_memory_path()
+            ep_path.parent.mkdir(parents=True, exist_ok=True)
+            heading = f"# {today_str}"
+            ep_path.write_text(f"{heading}\n\n{diary_entry}\n", encoding="utf-8")
+            logger.info(
+                "queen_memory: episodic diary rewritten for %s (%d chars)",
+                today_str,
+                len(diary_entry),
+            )
+
+    except Exception:
+        tb = traceback.format_exc()
+        logger.exception("queen_memory: consolidation failed")
+        # Write to file so the cause is findable regardless of log verbosity.
+        error_path = _queen_dir() / "consolidation_error.txt"
+        try:
+            error_path.parent.mkdir(parents=True, exist_ok=True)
+            error_path.write_text(
+                f"session: {session_id}\ntime: {datetime.now().isoformat()}\n\n{tb}",
+                encoding="utf-8",
+            )
+        except Exception:
+            pass
@@ -27,6 +27,9 @@
 ## GCU Errors
 15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
 16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
+17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
+18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.

 ## Worker Agent Errors
-17. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Client-facing nodes in workers are for mid-execution review/approval only.
+19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Client-facing nodes in workers are for mid-execution review/approval only.
+20. **Putting `escalate` or `set_output` in NodeSpec `tools=[]`** — These are synthetic framework tools, auto-injected at runtime. Only list MCP tools from `list_agent_tools()`.
@@ -180,7 +180,7 @@ terminal_nodes = []  # Forever-alive
 # Module-level vars read by AgentRunner.load()
 conversation_mode = "continuous"
 identity_prompt = "You are a helpful agent."
-loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_history_tokens": 32000}
+loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_context_tokens": 32000}


 class MyAgent:
@@ -332,81 +332,46 @@ class MyAgent:
 default_agent = MyAgent()
 ```

-## agent.py — Async Entry Points Variant
+## triggers.json — Timer and Webhook Triggers

-When an agent needs timers, webhooks, or event-driven triggers, add
-`async_entry_points` and optionally `runtime_config` as module-level variables.
-These are IN ADDITION to the standard variables above.
+When an agent needs timers, webhooks, or event-driven triggers, create a
+`triggers.json` file in the agent's directory (alongside `agent.py`).
+The queen loads these at session start and the user can manage them via
+the `set_trigger` / `remove_trigger` tools at runtime.

-```python
-# Additional imports for async entry points
-from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
-from framework.runtime.agent_runtime import (
-    AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
-)
-
-# ... (goal, nodes, edges, entry_node, entry_points, etc. as above) ...
-
-# Async entry points — event-driven triggers
-async_entry_points = [
-    # Timer with cron: daily at 9am
-    AsyncEntryPointSpec(
-        id="daily-check",
-        name="Daily Check",
-        entry_node="process-node",
-        trigger_type="timer",
-        trigger_config={"cron": "0 9 * * *"},
-        isolation_level="shared",
-        max_concurrent=1,
-    ),
-    # Timer with fixed interval: every 20 minutes
-    AsyncEntryPointSpec(
-        id="scheduled-check",
-        name="Scheduled Check",
-        entry_node="process-node",
-        trigger_type="timer",
-        trigger_config={"interval_minutes": 20, "run_immediately": False},
-        isolation_level="shared",
-        max_concurrent=1,
-    ),
-    # Event: reacts to webhook events
-    AsyncEntryPointSpec(
-        id="webhook-event",
-        name="Webhook Event Handler",
-        entry_node="process-node",
-        trigger_type="event",
-        trigger_config={"event_types": ["webhook_received"]},
-        isolation_level="shared",
-        max_concurrent=10,
-    ),
+```json
+[
+  {
+    "id": "daily-check",
+    "name": "Daily Check",
+    "trigger_type": "timer",
+    "trigger_config": {"cron": "0 9 * * *"},
+    "task": "Run the daily check process"
+  },
+  {
+    "id": "scheduled-check",
+    "name": "Scheduled Check",
+    "trigger_type": "timer",
+    "trigger_config": {"interval_minutes": 20},
+    "task": "Run the scheduled check"
+  },
+  {
+    "id": "webhook-event",
+    "name": "Webhook Event Handler",
+    "trigger_type": "webhook",
+    "trigger_config": {"event_types": ["webhook_received"]},
+    "task": "Process incoming webhook event"
+  }
 ]
-
-# Webhook server config (only needed if using webhooks)
-runtime_config = AgentRuntimeConfig(
-    webhook_host="127.0.0.1",
-    webhook_port=8080,
-    webhook_routes=[
-        {
-            "source_id": "my-source",
-            "path": "/webhooks/my-source",
-            "methods": ["POST"],
-        },
-    ],
-)
 ```

-**Key rules for async entry points:**
- `async_entry_points` is a list of `AsyncEntryPointSpec` (NOT `EntryPointSpec`)
- `runtime_config` is `AgentRuntimeConfig` (NOT `RuntimeConfig` from config.py)
- Valid trigger_types: `timer`, `event`, `webhook`, `manual`, `api`
- Valid isolation_levels: `isolated`, `shared`, `synchronized`
+**Key rules for triggers.json:**
+- Valid trigger_types: `timer`, `webhook`
 - Timer trigger_config (cron): `{"cron": "0 9 * * *"}` — standard 5-field cron expression
- Timer trigger_config (interval): `{"interval_minutes": float, "run_immediately": bool}`
- Event trigger_config: `{"event_types": ["webhook_received"], "filter_stream": "...", "filter_node": "..."}`
- Use `isolation_level="shared"` for async entry points that need to read
-  the primary session's memory (e.g., user-configured rules)
- The `_build_graph()` method passes `async_entry_points` to GraphSpec
- Reference: `exports/gmail_inbox_guardian/agent.py`
+- Timer trigger_config (interval): `{"interval_minutes": float}`
+- Each trigger must have a unique `id`
+- The `task` field describes what the worker should do when the trigger fires
+- Triggers are persisted back to `triggers.json` when modified via queen tools

 ## __init__.py

@@ -453,21 +418,6 @@ __all__ = [
 ]
 ```

-**If the agent uses async entry points**, also import and export:
-```python
-from .agent import (
-    ...,
-    async_entry_points,
-    runtime_config,  # Only if using webhooks
-)
-
-__all__ = [
-    ...,
-    "async_entry_points",
-    "runtime_config",
-]
-```
-
 ## __main__.py

 ```python
@@ -559,7 +509,7 @@ if __name__ == "__main__":

 ## mcp_servers.json

-> **Auto-generated.** `initialize_agent_package` creates this file with hive-tools
+> **Auto-generated.** `initialize_and_build_agent` creates this file with hive-tools
 > as the default. Only edit manually to add additional MCP servers.

 ```json
@@ -31,8 +31,7 @@ module-level variables via `getattr()`:
 | `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
 | `identity_prompt` | no | not passed | No agent-level identity |
 | `loop_config` | no | `{}` | No iteration limits |
-| `async_entry_points` | no | `[]` | No async triggers (timers, webhooks, events) |
-| `runtime_config` | no | `None` | No webhook server |
+| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |

 **CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
 `agent.py`. Missing exports silently fall back to defaults, causing
@@ -226,7 +225,7 @@ Only three valid keys:
 loop_config = {
    "max_iterations": 100,          # Max LLM turns per node visit
    "max_tool_calls_per_turn": 20,  # Max tool calls per LLM response
-    "max_history_tokens": 32000,    # Triggers conversation compaction
+    "max_context_tokens": 32000,    # Triggers conversation compaction
 }
 ```
 **INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
@@ -257,44 +256,28 @@ Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.ga

 Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.

-## Async Entry Points (Webhooks, Timers, Events)
+## Triggers (Timers, Webhooks)

-For agents that react to external events, use `AsyncEntryPointSpec`:
+For agents that react to external events, create a `triggers.json` file
+in the agent's export directory:

-```python
-from framework.graph.edge import AsyncEntryPointSpec
-from framework.runtime.agent_runtime import AgentRuntimeConfig
-
-# Timer trigger (cron or interval)
-async_entry_points = [
-    AsyncEntryPointSpec(
-        id="daily-check",
-        name="Daily Check",
-        entry_node="process",
-        trigger_type="timer",
-        trigger_config={"cron": "0 9 * * *"},  # daily at 9am
-        isolation_level="shared",
-    )
+```json
+[
+  {
+    "id": "daily-check",
+    "name": "Daily Check",
+    "trigger_type": "timer",
+    "trigger_config": {"cron": "0 9 * * *"},
+    "task": "Run the daily check process"
+  }
 ]
-
-# Webhook server (optional)
-runtime_config = AgentRuntimeConfig(
-    webhook_host="127.0.0.1",
-    webhook_port=8080,
-    webhook_routes=[{"source_id": "gmail", "path": "/webhooks/gmail", "methods": ["POST"]}],
-)
 ```

 ### Key Fields
- `trigger_type`: `"timer"`, `"event"`, `"webhook"`, `"manual"`
+- `trigger_type`: `"timer"` or `"webhook"`
 - `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
- `isolation_level`: `"shared"` (recommended), `"isolated"`, `"synchronized"`
- `event_types`: For event triggers, e.g., `["webhook_received"]`
-
-### Exports Required
-Both `async_entry_points` and `runtime_config` must be exported from `__init__.py`.
-
-See `exports/gmail_inbox_guardian/agent.py` for complete example.
+- `task`: describes what the worker should do when the trigger fires
+- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools

 ## Tool Discovery

@@ -109,6 +109,45 @@ Key rules to bake into GCU node prompts:
 - Keep tool calls per turn ≤10
 - Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call

+## Multiple Concurrent GCU Subagents
+
+When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
+node for each and invoke them all in the same LLM turn.  The framework batches all
+`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
+they execute concurrently — not sequentially.
+
+**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
+argument is needed in tool calls.  The framework derives a unique profile from the subagent's
+node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
+runs.
+
+### Example: three sites in parallel
+
+```python
+# Three distinct GCU nodes
+gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
+gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
+gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
+
+orchestrator = NodeSpec(
+    id="orchestrator",
+    node_type="event_loop",
+    sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
+    system_prompt="""\
+Call all three subagents in a single response to run them in parallel:
+  delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
+  delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
+  delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
+""",
+)
+```
+
+**Rules:**
+- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
+- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
+- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
+  if they want to release resources mid-run.
+
 ## GCU Anti-Patterns

 - Using `browser_screenshot` to read text (use `browser_snapshot`)
@@ -0,0 +1,63 @@
+# Queen Memory — File System Structure
+
+```
+~/.hive/
+├── queen/
+│   ├── MEMORY.md                          ← Semantic memory
+│   ├── memories/
+│   │   ├── MEMORY-2026-03-09.md           ← Episodic memory (today)
+│   │   ├── MEMORY-2026-03-08.md
+│   │   └── ...
+│   └── session/
+│       └── {session_id}/                  ← One dir per session (or resumed-from session)
+│           ├── conversations/
+│           │   ├── parts/
+│           │   │   ├── 00001.json         ← One file per message (role, content, tool_calls)
+│           │   │   ├── 00002.json
+│           │   │   └── ...
+│           │   └── spillover/
+│           │       ├── conversation_1.md  ← Compacted old conversation segments
+│           │       ├── conversation_2.md
+│           │       └── ...
+│           └── data/
+│               ├── adapt.md              ← Working memory (session-scoped)
+│               ├── web_search_1.txt      ← Spillover: large tool results
+│               ├── web_search_2.txt
+│               └── ...
+```
+
+---
+
+## The three memory tiers
+
+| File | Tier | Written by | Read at |
+|---|---|---|---|
+| `MEMORY.md` | Semantic | Consolidation LLM (auto, post-session) | Session start (injected into system prompt) |
+| `memories/MEMORY-YYYY-MM-DD.md` | Episodic | Queen via `write_to_diary` tool + consolidation LLM | Session start (today's file injected) |
+| `data/adapt.md` | Working | Queen via `update_session_notes` tool | Every turn (inlined in system prompt) |
+
+---
+
+## Session directory naming
+
+The session directory name is **`queen_resume_from`** when a cold-restore resumes an existing
+session, otherwise the new **`session_id`**. This means resumed sessions accumulate all messages
+in the original directory rather than fragmenting across multiple folders.
+
+---
+
+## Consolidation
+
+`consolidate_queen_memory()` runs every **5 minutes** in the background and once more at session
+end. It reads:
+
+1. `conversations/parts/*.json` — full message history (user + assistant turns; tool results skipped)
+2. `data/adapt.md` — current working notes
+
+It then makes two LLM writes:
+
+- Rewrites `MEMORY.md` in place (semantic memory — queen never touches this herself)
+- Appends a timestamped prose entry to today's `memories/MEMORY-YYYY-MM-DD.md`
+
+If the combined transcript exceeds ~200 K characters it is recursively binary-compacted via the
+LLM before being sent to the consolidation model (mirrors `EventLoopNode._llm_compact`).
@@ -1,4 +1,4 @@
-"""Test fixtures for Hive Coder agent."""
+"""Test fixtures for Queen agent."""

 import sys
 from pathlib import Path
@@ -1,8 +1,8 @@
 """Queen's ticket receiver entry point.

-When the Worker Health Judge emits a WORKER_ESCALATION_TICKET event on the
-shared EventBus, this entry point fires and routes to the ``ticket_triage``
-node, where the Queen deliberates and decides whether to notify the operator.
+When a WORKER_ESCALATION_TICKET event is emitted on the shared EventBus,
+this entry point fires and routes to the ``ticket_triage`` node, where the
+Queen deliberates and decides whether to notify the operator.

 Isolation level is ``isolated`` — the queen's triage memory is kept separate
 from the worker's shared memory. Each ticket triage runs in its own context.
@@ -1,7 +0,0 @@
-"""Builder interface for analyzing and building agents."""
-
-from framework.builder.query import BuilderQuery
-
-__all__ = [
-    "BuilderQuery",
-]
@@ -1,501 +0,0 @@
-"""
-Builder Query Interface - How I (Builder) analyze agent runs.
-
-This is designed around the questions I need to answer:
-1. What happened? (summaries, narratives)
-2. Why did it fail? (failure analysis, decision traces)
-3. What patterns emerge? (across runs, across nodes)
-4. What should we change? (suggestions)
-"""
-
-from collections import defaultdict
-from pathlib import Path
-from typing import Any
-
-from framework.schemas.decision import Decision
-from framework.schemas.run import Run, RunStatus, RunSummary
-from framework.storage.backend import FileStorage
-
-
-class FailureAnalysis:
-    """Structured analysis of why a run failed."""
-
-    def __init__(
-        self,
-        run_id: str,
-        failure_point: str,
-        root_cause: str,
-        decision_chain: list[str],
-        problems: list[str],
-        suggestions: list[str],
-    ):
-        self.run_id = run_id
-        self.failure_point = failure_point
-        self.root_cause = root_cause
-        self.decision_chain = decision_chain
-        self.problems = problems
-        self.suggestions = suggestions
-
-    def to_dict(self) -> dict[str, Any]:
-        return {
-            "run_id": self.run_id,
-            "failure_point": self.failure_point,
-            "root_cause": self.root_cause,
-            "decision_chain": self.decision_chain,
-            "problems": self.problems,
-            "suggestions": self.suggestions,
-        }
-
-    def __str__(self) -> str:
-        lines = [
-            f"=== Failure Analysis for {self.run_id} ===",
-            "",
-            f"Failure Point: {self.failure_point}",
-            f"Root Cause: {self.root_cause}",
-            "",
-            "Decision Chain Leading to Failure:",
-        ]
-        for i, dec in enumerate(self.decision_chain, 1):
-            lines.append(f"  {i}. {dec}")
-
-        if self.problems:
-            lines.append("")
-            lines.append("Reported Problems:")
-            for prob in self.problems:
-                lines.append(f"  - {prob}")
-
-        if self.suggestions:
-            lines.append("")
-            lines.append("Suggestions:")
-            for sug in self.suggestions:
-                lines.append(f"  → {sug}")
-
-        return "\n".join(lines)
-
-
-class PatternAnalysis:
-    """Patterns detected across multiple runs."""
-
-    def __init__(
-        self,
-        goal_id: str,
-        run_count: int,
-        success_rate: float,
-        common_failures: list[tuple[str, int]],
-        problematic_nodes: list[tuple[str, float]],
-        decision_patterns: dict[str, Any],
-    ):
-        self.goal_id = goal_id
-        self.run_count = run_count
-        self.success_rate = success_rate
-        self.common_failures = common_failures
-        self.problematic_nodes = problematic_nodes
-        self.decision_patterns = decision_patterns
-
-    def to_dict(self) -> dict[str, Any]:
-        return {
-            "goal_id": self.goal_id,
-            "run_count": self.run_count,
-            "success_rate": self.success_rate,
-            "common_failures": self.common_failures,
-            "problematic_nodes": self.problematic_nodes,
-            "decision_patterns": self.decision_patterns,
-        }
-
-    def __str__(self) -> str:
-        lines = [
-            f"=== Pattern Analysis for Goal {self.goal_id} ===",
-            "",
-            f"Runs Analyzed: {self.run_count}",
-            f"Success Rate: {self.success_rate:.1%}",
-        ]
-
-        if self.common_failures:
-            lines.append("")
-            lines.append("Common Failures:")
-            for failure, count in self.common_failures:
-                lines.append(f"  - {failure} ({count} occurrences)")
-
-        if self.problematic_nodes:
-            lines.append("")
-            lines.append("Problematic Nodes (failure rate):")
-            for node, rate in self.problematic_nodes:
-                lines.append(f"  - {node}: {rate:.1%} failure rate")
-
-        return "\n".join(lines)
-
-
-class BuilderQuery:
-    """
-    The interface I (Builder) use to understand what agents are doing.
-
-    This is optimized for the questions I need to answer when analyzing
-    agent behavior and deciding what to improve.
-    """
-
-    def __init__(self, storage_path: str | Path):
-        self.storage = FileStorage(storage_path)
-
-    # === WHAT HAPPENED? ===
-
-    def get_run_summary(self, run_id: str) -> RunSummary | None:
-        """Get a quick summary of a run."""
-        return self.storage.load_summary(run_id)
-
-    def get_full_run(self, run_id: str) -> Run | None:
-        """Get the complete run with all decisions."""
-        return self.storage.load_run(run_id)
-
-    def list_runs_for_goal(self, goal_id: str) -> list[RunSummary]:
-        """Get summaries of all runs for a goal."""
-        run_ids = self.storage.get_runs_by_goal(goal_id)
-        summaries = []
-        for run_id in run_ids:
-            summary = self.storage.load_summary(run_id)
-            if summary:
-                summaries.append(summary)
-        return summaries
-
-    def get_recent_failures(self, limit: int = 10) -> list[RunSummary]:
-        """Get recent failed runs."""
-        run_ids = self.storage.get_runs_by_status(RunStatus.FAILED)
-        summaries = []
-        for run_id in run_ids[:limit]:
-            summary = self.storage.load_summary(run_id)
-            if summary:
-                summaries.append(summary)
-        return summaries
-
-    # === WHY DID IT FAIL? ===
-
-    def analyze_failure(self, run_id: str) -> FailureAnalysis | None:
-        """
-        Deep analysis of why a run failed.
-
-        This is my primary tool for understanding what went wrong.
-        """
-        run = self.storage.load_run(run_id)
-        if run is None or run.status != RunStatus.FAILED:
-            return None
-
-        # Find the first failed decision
-        failed_decisions = [d for d in run.decisions if not d.was_successful]
-        if not failed_decisions:
-            failure_point = "Unknown - no decision marked as failed"
-            root_cause = "Run failed but all decisions succeeded (external cause?)"
-        else:
-            first_failure = failed_decisions[0]
-            failure_point = first_failure.summary_for_builder()
-            root_cause = first_failure.outcome.error if first_failure.outcome else "Unknown"
-
-        # Build the decision chain leading to failure
-        decision_chain = []
-        for d in run.decisions:
-            decision_chain.append(d.summary_for_builder())
-            if not d.was_successful:
-                break
-
-        # Extract problems
-        problems = [f"[{p.severity}] {p.description}" for p in run.problems]
-
-        # Generate suggestions based on the failure
-        suggestions = self._generate_suggestions(run, failed_decisions)
-
-        return FailureAnalysis(
-            run_id=run_id,
-            failure_point=failure_point,
-            root_cause=root_cause,
-            decision_chain=decision_chain,
-            problems=problems,
-            suggestions=suggestions,
-        )
-
-    def get_decision_trace(self, run_id: str) -> list[str]:
-        """Get a readable trace of all decisions in a run."""
-        run = self.storage.load_run(run_id)
-        if run is None:
-            return []
-        return [d.summary_for_builder() for d in run.decisions]
-
-    # === WHAT PATTERNS EMERGE? ===
-
-    def find_patterns(self, goal_id: str) -> PatternAnalysis | None:
-        """
-        Find patterns across runs for a goal.
-
-        This helps me understand systemic issues vs one-off failures.
-        """
-        run_ids = self.storage.get_runs_by_goal(goal_id)
-        if not run_ids:
-            return None
-
-        runs = []
-        for run_id in run_ids:
-            run = self.storage.load_run(run_id)
-            if run:
-                runs.append(run)
-
-        if not runs:
-            return None
-
-        # Calculate success rate
-        completed = [r for r in runs if r.status == RunStatus.COMPLETED]
-        success_rate = len(completed) / len(runs) if runs else 0.0
-
-        # Find common failures
-        failure_counts: dict[str, int] = defaultdict(int)
-        for run in runs:
-            for decision in run.decisions:
-                if not decision.was_successful and decision.outcome:
-                    error = decision.outcome.error or "Unknown error"
-                    failure_counts[error] += 1
-
-        common_failures = sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:5]
-
-        # Find problematic nodes
-        node_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"total": 0, "failed": 0})
-        for run in runs:
-            for decision in run.decisions:
-                node_stats[decision.node_id]["total"] += 1
-                if not decision.was_successful:
-                    node_stats[decision.node_id]["failed"] += 1
-
-        problematic_nodes = []
-        for node_id, stats in node_stats.items():
-            if stats["total"] > 0:
-                failure_rate = stats["failed"] / stats["total"]
-                if failure_rate > 0.1:  # More than 10% failure rate
-                    problematic_nodes.append((node_id, failure_rate))
-
-        problematic_nodes.sort(key=lambda x: x[1], reverse=True)
-
-        # Decision patterns
-        decision_patterns = self._analyze_decision_patterns(runs)
-
-        return PatternAnalysis(
-            goal_id=goal_id,
-            run_count=len(runs),
-            success_rate=success_rate,
-            common_failures=common_failures,
-            problematic_nodes=problematic_nodes,
-            decision_patterns=decision_patterns,
-        )
-
-    def compare_runs(self, run_id_1: str, run_id_2: str) -> dict[str, Any]:
-        """Compare two runs to understand what differed."""
-        run1 = self.storage.load_run(run_id_1)
-        run2 = self.storage.load_run(run_id_2)
-
-        if run1 is None or run2 is None:
-            return {"error": "One or both runs not found"}
-
-        return {
-            "run_1": {
-                "id": run1.id,
-                "status": run1.status.value,
-                "decisions": len(run1.decisions),
-                "success_rate": run1.metrics.success_rate,
-            },
-            "run_2": {
-                "id": run2.id,
-                "status": run2.status.value,
-                "decisions": len(run2.decisions),
-                "success_rate": run2.metrics.success_rate,
-            },
-            "differences": self._find_differences(run1, run2),
-        }
-
-    # === WHAT SHOULD WE CHANGE? ===
-
-    def suggest_improvements(self, goal_id: str) -> list[dict[str, Any]]:
-        """
-        Generate improvement suggestions based on run analysis.
-
-        This is what I use to propose changes to the human engineer.
-        """
-        patterns = self.find_patterns(goal_id)
-        if patterns is None:
-            return []
-
-        suggestions = []
-
-        # Suggestion: Fix problematic nodes
-        for node_id, failure_rate in patterns.problematic_nodes:
-            suggestions.append(
-                {
-                    "type": "node_improvement",
-                    "target": node_id,
-                    "reason": f"Node has {failure_rate:.1%} failure rate",
-                    "recommendation": (
-                        f"Review and improve node '{node_id}' - "
-                        "high failure rate suggests prompt or tool issues"
-                    ),
-                    "priority": "high" if failure_rate > 0.3 else "medium",
-                }
-            )
-
-        # Suggestion: Address common failures
-        for failure, count in patterns.common_failures:
-            if count >= 2:
-                suggestions.append(
-                    {
-                        "type": "error_handling",
-                        "target": failure,
-                        "reason": f"Error occurred {count} times",
-                        "recommendation": f"Add handling for: {failure}",
-                        "priority": "high" if count >= 5 else "medium",
-                    }
-                )
-
-        # Suggestion: Overall success rate
-        if patterns.success_rate < 0.8:
-            suggestions.append(
-                {
-                    "type": "architecture",
-                    "target": goal_id,
-                    "reason": f"Goal success rate is only {patterns.success_rate:.1%}",
-                    "recommendation": (
-                        "Consider restructuring the agent graph or improving goal definition"
-                    ),
-                    "priority": "high",
-                }
-            )
-
-        return suggestions
-
-    def get_node_performance(self, node_id: str) -> dict[str, Any]:
-        """Get performance metrics for a specific node across all runs."""
-        run_ids = self.storage.get_runs_by_node(node_id)
-
-        total_decisions = 0
-        successful_decisions = 0
-        total_latency = 0
-        total_tokens = 0
-        decision_types: dict[str, int] = defaultdict(int)
-
-        for run_id in run_ids:
-            run = self.storage.load_run(run_id)
-            if run:
-                for decision in run.decisions:
-                    if decision.node_id == node_id:
-                        total_decisions += 1
-                        if decision.was_successful:
-                            successful_decisions += 1
-                        if decision.outcome:
-                            total_latency += decision.outcome.latency_ms
-                            total_tokens += decision.outcome.tokens_used
-                        decision_types[decision.decision_type.value] += 1
-
-        return {
-            "node_id": node_id,
-            "total_decisions": total_decisions,
-            "success_rate": successful_decisions / total_decisions if total_decisions > 0 else 0,
-            "avg_latency_ms": total_latency / total_decisions if total_decisions > 0 else 0,
-            "total_tokens": total_tokens,
-            "decision_type_distribution": dict(decision_types),
-        }
-
-    # === PRIVATE HELPERS ===
-
-    def _generate_suggestions(
-        self,
-        run: Run,
-        failed_decisions: list[Decision],
-    ) -> list[str]:
-        """Generate suggestions based on failure analysis."""
-        suggestions = []
-
-        for decision in failed_decisions:
-            # Check if there were alternatives
-            if len(decision.options) > 1:
-                chosen = decision.chosen_option
-                alternatives = [o for o in decision.options if o.id != decision.chosen_option_id]
-                if alternatives:
-                    alt_desc = alternatives[0].description
-                    chosen_desc = chosen.description if chosen else "unknown"
-                    suggestions.append(
-                        f"Consider alternative: '{alt_desc}' instead of '{chosen_desc}'"
-                    )
-
-            # Check for missing context
-            if not decision.input_context:
-                suggestions.append(
-                    f"Decision '{decision.intent}' had no input context - "
-                    "ensure relevant data is passed"
-                )
-
-            # Check for constraint issues
-            if decision.active_constraints:
-                constraints = ", ".join(decision.active_constraints)
-                suggestions.append(f"Review constraints: {constraints} - may be too restrictive")
-
-        # Check for reported problems with suggestions
-        for problem in run.problems:
-            if problem.suggested_fix:
-                suggestions.append(problem.suggested_fix)
-
-        return suggestions
-
-    def _analyze_decision_patterns(self, runs: list[Run]) -> dict[str, Any]:
-        """Analyze decision patterns across runs."""
-        type_counts: dict[str, int] = defaultdict(int)
-        option_counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
-
-        for run in runs:
-            for decision in run.decisions:
-                type_counts[decision.decision_type.value] += 1
-
-                # Track which options are chosen for similar intents
-                intent_key = decision.intent[:50]  # Truncate for grouping
-                if decision.chosen_option:
-                    option_counts[intent_key][decision.chosen_option.description] += 1
-
-        # Find most common choices per intent
-        common_choices = {}
-        for intent, choices in option_counts.items():
-            if choices:
-                most_common = max(choices.items(), key=lambda x: x[1])
-                common_choices[intent] = {
-                    "choice": most_common[0],
-                    "count": most_common[1],
-                    "alternatives": len(choices) - 1,
-                }
-
-        return {
-            "decision_type_distribution": dict(type_counts),
-            "common_choices": common_choices,
-        }
-
-    def _find_differences(self, run1: Run, run2: Run) -> list[str]:
-        """Find key differences between two runs."""
-        differences = []
-
-        # Status difference
-        if run1.status != run2.status:
-            differences.append(f"Status: {run1.status.value} vs {run2.status.value}")
-
-        # Decision count difference
-        if len(run1.decisions) != len(run2.decisions):
-            differences.append(f"Decision count: {len(run1.decisions)} vs {len(run2.decisions)}")
-
-        # Find first divergence point
-        for i, (d1, d2) in enumerate(zip(run1.decisions, run2.decisions, strict=False)):
-            if d1.chosen_option_id != d2.chosen_option_id:
-                differences.append(
-                    f"Diverged at decision {i}: "
-                    f"chose '{d1.chosen_option_id}' vs '{d2.chosen_option_id}'"
-                )
-                break
-
-        # Node differences
-        nodes1 = set(run1.metrics.nodes_executed)
-        nodes2 = set(run2.metrics.nodes_executed)
-        if nodes1 != nodes2:
-            only_1 = nodes1 - nodes2
-            only_2 = nodes2 - nodes1
-            if only_1:
-                differences.append(f"Nodes only in run 1: {only_1}")
-            if only_2:
-                differences.append(f"Nodes only in run 2: {only_2}")
-
-        return differences
@@ -19,6 +19,10 @@ from framework.graph.edge import DEFAULT_MAX_TOKENS
 # ---------------------------------------------------------------------------

 HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+
+# Hive LLM router endpoint (Anthropic-compatible).
+# litellm's Anthropic handler appends /v1/messages, so this is just the base host.
+HIVE_LLM_ENDPOINT = "https://api.adenhq.com"
 logger = logging.getLogger(__name__)


@@ -47,7 +51,13 @@ def get_preferred_model() -> str:
    """Return the user's preferred LLM model string (e.g. 'anthropic/claude-sonnet-4-20250514')."""
    llm = get_hive_config().get("llm", {})
    if llm.get("provider") and llm.get("model"):
-        return f"{llm['provider']}/{llm['model']}"
+        provider = str(llm["provider"])
+        model = str(llm["model"]).strip()
+        # OpenRouter quickstart stores raw model IDs; tolerate pasted "openrouter/<id>" too.
+        if provider.lower() == "openrouter" and model.lower().startswith("openrouter/"):
+            model = model[len("openrouter/") :]
+        if model:
+            return f"{provider}/{model}"
    return "anthropic/claude-sonnet-4-20250514"


@@ -56,6 +66,15 @@ def get_max_tokens() -> int:
    return get_hive_config().get("llm", {}).get("max_tokens", DEFAULT_MAX_TOKENS)


+DEFAULT_MAX_CONTEXT_TOKENS = 32_000
+OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
+
+
+def get_max_context_tokens() -> int:
+    """Return the configured max_context_tokens, falling back to DEFAULT_MAX_CONTEXT_TOKENS."""
+    return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
+
+
 def get_api_key() -> str | None:
    """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.

@@ -90,6 +109,17 @@ def get_api_key() -> str | None:
        except ImportError:
            pass

+    # Kimi Code subscription: read API key from ~/.kimi/config.toml
+    if llm.get("use_kimi_code_subscription"):
+        try:
+            from framework.runner.runner import get_kimi_code_token
+
+            token = get_kimi_code_token()
+            if token:
+                return token
+        except ImportError:
+            pass
+
    # Standard env-var path (covers ZAI Code and all API-key providers)
    api_key_env_var = llm.get("api_key_env_var")
    if api_key_env_var:
@@ -102,13 +132,28 @@ def get_gcu_enabled() -> bool:
    return get_hive_config().get("gcu_enabled", True)


+def get_gcu_viewport_scale() -> float:
+    """Return GCU viewport scale factor (0.1-1.0), default 0.8."""
+    scale = get_hive_config().get("gcu_viewport_scale", 0.8)
+    if isinstance(scale, (int, float)) and 0.1 <= scale <= 1.0:
+        return float(scale)
+    return 0.8
+
+
 def get_api_base() -> str | None:
    """Return the api_base URL for OpenAI-compatible endpoints, if configured."""
    llm = get_hive_config().get("llm", {})
    if llm.get("use_codex_subscription"):
        # Codex subscription routes through the ChatGPT backend, not api.openai.com.
        return "https://chatgpt.com/backend-api/codex"
-    return llm.get("api_base")
+    if llm.get("use_kimi_code_subscription"):
+        # Kimi Code uses an Anthropic-compatible endpoint (no /v1 suffix).
+        return "https://api.kimi.com/coding"
+    if llm.get("api_base"):
+        return llm["api_base"]
+    if str(llm.get("provider", "")).lower() == "openrouter":
+        return OPENROUTER_API_BASE
+    return None


 def get_llm_extra_kwargs() -> dict[str, Any]:
@@ -164,6 +209,7 @@ class RuntimeConfig:
    model: str = field(default_factory=get_preferred_model)
    temperature: float = 0.7
    max_tokens: int = field(default_factory=get_max_tokens)
+    max_context_tokens: int = field(default_factory=get_max_context_tokens)
    api_key: str | None = field(default_factory=get_api_key)
    api_base: str | None = field(default_factory=get_api_base)
    extra_kwargs: dict[str, Any] = field(default_factory=get_llm_extra_kwargs)
@@ -6,7 +6,7 @@ This module provides secure credential storage with:
 - Template-based usage: {{cred.key}} patterns for injection
 - Bipartisan model: Store stores values, tools define usage
 - Provider system: Extensible lifecycle management (refresh, validate)
- Multiple backends: Encrypted files, env vars, HashiCorp Vault
+- Multiple backends: Encrypted files, env vars

 Quick Start:
    from core.framework.credentials import CredentialStore, CredentialObject
@@ -38,8 +38,6 @@ For Aden server sync:
        AdenSyncProvider,
    )

-For Vault integration:
-    from core.framework.credentials.vault import HashiCorpVaultStorage
 """

 from .key_storage import (
@@ -30,6 +30,7 @@ Usage:

 from __future__ import annotations

+import json as _json
 import logging
 import os
 import time
@@ -37,8 +38,6 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any

-import json as _json
-
 import httpx

 logger = logging.getLogger(__name__)
@@ -142,17 +142,27 @@ def save_aden_api_key(key: str) -> None:
    os.environ[ADEN_ENV_VAR] = key


-def delete_aden_api_key() -> None:
-    """Remove ADEN_API_KEY from the encrypted store and ``os.environ``."""
+def delete_aden_api_key() -> bool:
+    """Remove ADEN_API_KEY from the encrypted store and ``os.environ``.
+
+    Returns True if the key existed and was deleted, False otherwise.
+    """
+    deleted = False
    try:
        from .storage import EncryptedFileStorage

        storage = EncryptedFileStorage()
-        storage.delete(ADEN_CREDENTIAL_ID)
+        deleted = storage.delete(ADEN_CREDENTIAL_ID)
+    except (FileNotFoundError, PermissionError) as e:
+        logger.debug("Could not delete %s from encrypted store: %s", ADEN_CREDENTIAL_ID, e)
    except Exception:
-        logger.debug("Could not delete %s from encrypted store", ADEN_CREDENTIAL_ID)
-
+        logger.warning(
+            "Unexpected error deleting %s from encrypted store",
+            ADEN_CREDENTIAL_ID,
+            exc_info=True,
+        )
    os.environ.pop(ADEN_ENV_VAR, None)
+    return deleted


 # ---------------------------------------------------------------------------
@@ -167,8 +177,10 @@ def _read_credential_key_file() -> str | None:
            value = CREDENTIAL_KEY_PATH.read_text(encoding="utf-8").strip()
            if value:
                return value
+    except (FileNotFoundError, PermissionError) as e:
+        logger.debug("Could not read %s: %s", CREDENTIAL_KEY_PATH, e)
    except Exception:
-        logger.debug("Could not read %s", CREDENTIAL_KEY_PATH)
+        logger.warning("Unexpected error reading %s", CREDENTIAL_KEY_PATH, exc_info=True)
    return None


@@ -196,6 +208,12 @@ def _read_aden_from_encrypted_store() -> str | None:
        cred = storage.load(ADEN_CREDENTIAL_ID)
        if cred:
            return cred.get_key("api_key")
+    except (FileNotFoundError, PermissionError, KeyError) as e:
+        logger.debug("Could not load %s from encrypted store: %s", ADEN_CREDENTIAL_ID, e)
    except Exception:
-        logger.debug("Could not load %s from encrypted store", ADEN_CREDENTIAL_ID)
+        logger.warning(
+            "Unexpected error loading %s from encrypted store",
+            ADEN_CREDENTIAL_ID,
+            exc_info=True,
+        )
    return None
@@ -51,6 +51,16 @@ def ensure_credential_key_env() -> None:
                    if found and value:
                        os.environ[var_name] = value
                        logger.debug("Loaded %s from shell config", var_name)
+        # Also load the currently configured LLM env var even if it's not in CREDENTIAL_SPECS.
+        # This keeps quickstart-written keys available to fresh processes on Unix shells.
+        from framework.config import get_hive_config
+
+        llm_env_var = str(get_hive_config().get("llm", {}).get("api_key_env_var", "")).strip()
+        if llm_env_var and not os.environ.get(llm_env_var):
+            found, value = check_env_var_in_shell_config(llm_env_var)
+            if found and value:
+                os.environ[llm_env_var] = value
+                logger.debug("Loaded configured LLM env var %s from shell config", llm_env_var)
    except ImportError:
        pass

@@ -1,55 +0,0 @@
-"""
-HashiCorp Vault integration for the credential store.
-
-This module provides enterprise-grade secret management through
-HashiCorp Vault integration.
-
-Quick Start:
-    from core.framework.credentials import CredentialStore
-    from core.framework.credentials.vault import HashiCorpVaultStorage
-
-    # Configure Vault storage
-    storage = HashiCorpVaultStorage(
-        url="https://vault.example.com:8200",
-        # token read from VAULT_TOKEN env var
-        mount_point="secret",
-        path_prefix="hive/agents/prod"
-    )
-
-    # Create credential store with Vault backend
-    store = CredentialStore(storage=storage)
-
-    # Use normally - credentials are stored in Vault
-    credential = store.get_credential("my_api")
-
-Requirements:
-    pip install hvac
-
-Authentication:
-    Set the VAULT_TOKEN environment variable or pass the token directly:
-
-        export VAULT_TOKEN="hvs.xxxxxxxxxxxxx"
-
-    For production, consider using Vault auth methods:
-    - Kubernetes auth
-    - AppRole auth
-    - AWS IAM auth
-
-Vault Configuration:
-    Ensure KV v2 secrets engine is enabled:
-
-        vault secrets enable -path=secret kv-v2
-
-    Grant appropriate policies:
-
-        path "secret/data/hive/credentials/*" {
-            capabilities = ["create", "read", "update", "delete", "list"]
-        }
-        path "secret/metadata/hive/credentials/*" {
-            capabilities = ["list", "delete"]
-        }
-"""
-
-from .hashicorp import HashiCorpVaultStorage
-
-__all__ = ["HashiCorpVaultStorage"]
@@ -1,394 +0,0 @@
-"""
-HashiCorp Vault storage adapter.
-
-Provides integration with HashiCorp Vault for enterprise secret management.
-Requires the 'hvac' package: uv pip install hvac
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from datetime import datetime
-from typing import Any
-
-from pydantic import SecretStr
-
-from ..models import CredentialKey, CredentialObject, CredentialType
-from ..storage import CredentialStorage
-
-logger = logging.getLogger(__name__)
-
-
-class HashiCorpVaultStorage(CredentialStorage):
-    """
-    HashiCorp Vault storage adapter.
-
-    Features:
-    - KV v2 secrets engine support
-    - Namespace support (Enterprise)
-    - Automatic secret versioning
-    - Audit logging via Vault
-
-    The adapter stores credentials in Vault's KV v2 secrets engine with
-    the following structure:
-
-        {mount_point}/data/{path_prefix}/{credential_id}
-        └── data:
-            ├── _type: "oauth2"
-            ├── access_token: "xxx"
-            ├── refresh_token: "yyy"
-            ├── _expires_access_token: "2024-01-26T12:00:00"
-            └── _provider_id: "oauth2"
-
-    Example:
-        storage = HashiCorpVaultStorage(
-            url="https://vault.example.com:8200",
-            token="hvs.xxx",  # Or use VAULT_TOKEN env var
-            mount_point="secret",
-            path_prefix="hive/credentials"
-        )
-
-        store = CredentialStore(storage=storage)
-
-        # Credentials are now stored in Vault
-        store.save_credential(credential)
-        credential = store.get_credential("my_api")
-
-    Authentication:
-        The adapter uses token-based authentication. The token can be provided:
-        1. Directly via the 'token' parameter
-        2. Via the VAULT_TOKEN environment variable
-
-        For production, consider using:
-        - Kubernetes auth method
-        - AppRole auth method
-        - AWS IAM auth method
-
-    Requirements:
-        uv pip install hvac
-    """
-
-    def __init__(
-        self,
-        url: str,
-        token: str | None = None,
-        mount_point: str = "secret",
-        path_prefix: str = "hive/credentials",
-        namespace: str | None = None,
-        verify_ssl: bool = True,
-    ):
-        """
-        Initialize Vault storage.
-
-        Args:
-            url: Vault server URL (e.g., https://vault.example.com:8200)
-            token: Vault token. If None, reads from VAULT_TOKEN env var
-            mount_point: KV secrets engine mount point (default: "secret")
-            path_prefix: Path prefix for all credentials
-            namespace: Vault namespace (Enterprise feature)
-            verify_ssl: Whether to verify SSL certificates
-
-        Raises:
-            ImportError: If hvac is not installed
-            ValueError: If authentication fails
-        """
-        try:
-            import hvac
-        except ImportError as e:
-            raise ImportError(
-                "HashiCorp Vault support requires 'hvac'. Install with: uv pip install hvac"
-            ) from e
-
-        self._url = url
-        self._token = token or os.environ.get("VAULT_TOKEN")
-        self._mount = mount_point
-        self._prefix = path_prefix
-        self._namespace = namespace
-
-        if not self._token:
-            raise ValueError(
-                "Vault token required. Set VAULT_TOKEN env var or pass token parameter."
-            )
-
-        self._client = hvac.Client(
-            url=url,
-            token=self._token,
-            namespace=namespace,
-            verify=verify_ssl,
-        )
-
-        if not self._client.is_authenticated():
-            raise ValueError("Vault authentication failed. Check token and server URL.")
-
-        logger.info(f"Connected to HashiCorp Vault at {url}")
-
-    def _path(self, credential_id: str) -> str:
-        """Build Vault path for credential."""
-        # Sanitize credential_id
-        safe_id = credential_id.replace("/", "_").replace("\\", "_")
-        return f"{self._prefix}/{safe_id}"
-
-    def save(self, credential: CredentialObject) -> None:
-        """Save credential to Vault KV v2."""
-        path = self._path(credential.id)
-        data = self._serialize_for_vault(credential)
-
-        try:
-            self._client.secrets.kv.v2.create_or_update_secret(
-                path=path,
-                secret=data,
-                mount_point=self._mount,
-            )
-            logger.debug(f"Saved credential '{credential.id}' to Vault at {path}")
-        except Exception as e:
-            logger.error(f"Failed to save credential '{credential.id}' to Vault: {e}")
-            raise
-
-    def load(self, credential_id: str) -> CredentialObject | None:
-        """Load credential from Vault."""
-        path = self._path(credential_id)
-
-        try:
-            response = self._client.secrets.kv.v2.read_secret_version(
-                path=path,
-                mount_point=self._mount,
-            )
-            data = response["data"]["data"]
-            return self._deserialize_from_vault(credential_id, data)
-        except Exception as e:
-            # Check if it's a "not found" error
-            error_str = str(e).lower()
-            if "not found" in error_str or "404" in error_str:
-                logger.debug(f"Credential '{credential_id}' not found in Vault")
-                return None
-            logger.error(f"Failed to load credential '{credential_id}' from Vault: {e}")
-            raise
-
-    def delete(self, credential_id: str) -> bool:
-        """Delete credential from Vault (all versions)."""
-        path = self._path(credential_id)
-
-        try:
-            self._client.secrets.kv.v2.delete_metadata_and_all_versions(
-                path=path,
-                mount_point=self._mount,
-            )
-            logger.debug(f"Deleted credential '{credential_id}' from Vault")
-            return True
-        except Exception as e:
-            error_str = str(e).lower()
-            if "not found" in error_str or "404" in error_str:
-                return False
-            logger.error(f"Failed to delete credential '{credential_id}' from Vault: {e}")
-            raise
-
-    def list_all(self) -> list[str]:
-        """List all credentials under the prefix."""
-        try:
-            response = self._client.secrets.kv.v2.list_secrets(
-                path=self._prefix,
-                mount_point=self._mount,
-            )
-            keys = response.get("data", {}).get("keys", [])
-            # Remove trailing slashes from folder names
-            return [k.rstrip("/") for k in keys]
-        except Exception as e:
-            error_str = str(e).lower()
-            if "not found" in error_str or "404" in error_str:
-                return []
-            logger.error(f"Failed to list credentials from Vault: {e}")
-            raise
-
-    def exists(self, credential_id: str) -> bool:
-        """Check if credential exists in Vault."""
-        try:
-            path = self._path(credential_id)
-            self._client.secrets.kv.v2.read_secret_version(
-                path=path,
-                mount_point=self._mount,
-            )
-            return True
-        except Exception:
-            return False
-
-    def _serialize_for_vault(self, credential: CredentialObject) -> dict[str, Any]:
-        """Convert credential to Vault secret format."""
-        data: dict[str, Any] = {
-            "_type": credential.credential_type.value,
-        }
-
-        if credential.provider_id:
-            data["_provider_id"] = credential.provider_id
-
-        if credential.description:
-            data["_description"] = credential.description
-
-        if credential.auto_refresh:
-            data["_auto_refresh"] = "true"
-
-        # Store each key
-        for key_name, key in credential.keys.items():
-            data[key_name] = key.get_secret_value()
-
-            if key.expires_at:
-                data[f"_expires_{key_name}"] = key.expires_at.isoformat()
-
-            if key.metadata:
-                data[f"_metadata_{key_name}"] = str(key.metadata)
-
-        return data
-
-    def _deserialize_from_vault(self, credential_id: str, data: dict[str, Any]) -> CredentialObject:
-        """Reconstruct credential from Vault secret."""
-        # Extract metadata fields
-        cred_type = CredentialType(data.pop("_type", "api_key"))
-        provider_id = data.pop("_provider_id", None)
-        description = data.pop("_description", "")
-        auto_refresh = data.pop("_auto_refresh", "") == "true"
-
-        # Build keys dict
-        keys: dict[str, CredentialKey] = {}
-
-        # Find all non-metadata keys
-        key_names = [k for k in data.keys() if not k.startswith("_")]
-
-        for key_name in key_names:
-            value = data[key_name]
-
-            # Check for expiration
-            expires_at = None
-            expires_key = f"_expires_{key_name}"
-            if expires_key in data:
-                try:
-                    expires_at = datetime.fromisoformat(data[expires_key])
-                except (ValueError, TypeError):
-                    pass
-
-            # Check for metadata
-            metadata: dict[str, Any] = {}
-            metadata_key = f"_metadata_{key_name}"
-            if metadata_key in data:
-                try:
-                    import ast
-
-                    metadata = ast.literal_eval(data[metadata_key])
-                except (ValueError, SyntaxError):
-                    pass
-
-            keys[key_name] = CredentialKey(
-                name=key_name,
-                value=SecretStr(value),
-                expires_at=expires_at,
-                metadata=metadata,
-            )
-
-        return CredentialObject(
-            id=credential_id,
-            credential_type=cred_type,
-            keys=keys,
-            provider_id=provider_id,
-            description=description,
-            auto_refresh=auto_refresh,
-        )
-
-    # --- Vault-Specific Operations ---
-
-    def get_secret_metadata(self, credential_id: str) -> dict[str, Any] | None:
-        """
-        Get Vault metadata for a secret (version info, timestamps, etc.).
-
-        Args:
-            credential_id: The credential identifier
-
-        Returns:
-            Metadata dict or None if not found
-        """
-        path = self._path(credential_id)
-
-        try:
-            response = self._client.secrets.kv.v2.read_secret_metadata(
-                path=path,
-                mount_point=self._mount,
-            )
-            return response.get("data", {})
-        except Exception:
-            return None
-
-    def soft_delete(self, credential_id: str, versions: list[int] | None = None) -> bool:
-        """
-        Soft delete specific versions (can be recovered).
-
-        Args:
-            credential_id: The credential identifier
-            versions: Version numbers to delete. If None, deletes latest.
-
-        Returns:
-            True if successful
-        """
-        path = self._path(credential_id)
-
-        try:
-            if versions:
-                self._client.secrets.kv.v2.delete_secret_versions(
-                    path=path,
-                    versions=versions,
-                    mount_point=self._mount,
-                )
-            else:
-                self._client.secrets.kv.v2.delete_latest_version_of_secret(
-                    path=path,
-                    mount_point=self._mount,
-                )
-            return True
-        except Exception as e:
-            logger.error(f"Soft delete failed for '{credential_id}': {e}")
-            return False
-
-    def undelete(self, credential_id: str, versions: list[int]) -> bool:
-        """
-        Recover soft-deleted versions.
-
-        Args:
-            credential_id: The credential identifier
-            versions: Version numbers to recover
-
-        Returns:
-            True if successful
-        """
-        path = self._path(credential_id)
-
-        try:
-            self._client.secrets.kv.v2.undelete_secret_versions(
-                path=path,
-                versions=versions,
-                mount_point=self._mount,
-            )
-            return True
-        except Exception as e:
-            logger.error(f"Undelete failed for '{credential_id}': {e}")
-            return False
-
-    def load_version(self, credential_id: str, version: int) -> CredentialObject | None:
-        """
-        Load a specific version of a credential.
-
-        Args:
-            credential_id: The credential identifier
-            version: Version number to load
-
-        Returns:
-            CredentialObject or None
-        """
-        path = self._path(credential_id)
-
-        try:
-            response = self._client.secrets.kv.v2.read_secret_version(
-                path=path,
-                version=version,
-                mount_point=self._mount,
-            )
-            data = response["data"]["data"]
-            return self._deserialize_from_vault(credential_id, data)
-        except Exception:
-            return None
@@ -307,13 +307,13 @@ class NodeConversation:
    def __init__(
        self,
        system_prompt: str = "",
-        max_history_tokens: int = 32000,
+        max_context_tokens: int = 32000,
        compaction_threshold: float = 0.8,
        output_keys: list[str] | None = None,
        store: ConversationStore | None = None,
    ) -> None:
        self._system_prompt = system_prompt
-        self._max_history_tokens = max_history_tokens
+        self._max_context_tokens = max_context_tokens
        self._compaction_threshold = compaction_threshold
        self._output_keys = output_keys
        self._store = store
@@ -525,16 +525,16 @@ class NodeConversation:
        self._last_api_input_tokens = actual_input_tokens

    def usage_ratio(self) -> float:
-        """Current token usage as a fraction of *max_history_tokens*.
+        """Current token usage as a fraction of *max_context_tokens*.

-        Returns 0.0 when ``max_history_tokens`` is zero (unlimited).
+        Returns 0.0 when ``max_context_tokens`` is zero (unlimited).
        """
-        if self._max_history_tokens <= 0:
+        if self._max_context_tokens <= 0:
            return 0.0
-        return self.estimate_tokens() / self._max_history_tokens
+        return self.estimate_tokens() / self._max_context_tokens

    def needs_compaction(self) -> bool:
-        return self.estimate_tokens() >= self._max_history_tokens * self._compaction_threshold
+        return self.estimate_tokens() >= self._max_context_tokens * self._compaction_threshold

    # --- Output-key extraction ---------------------------------------------

@@ -1029,7 +1029,7 @@ class NodeConversation:
        await self._store.write_meta(
            {
                "system_prompt": self._system_prompt,
-                "max_history_tokens": self._max_history_tokens,
+                "max_context_tokens": self._max_context_tokens,
                "compaction_threshold": self._compaction_threshold,
                "output_keys": self._output_keys,
            }
@@ -1062,7 +1062,7 @@ class NodeConversation:

        conv = cls(
            system_prompt=meta.get("system_prompt", ""),
-            max_history_tokens=meta.get("max_history_tokens", 32000),
+            max_context_tokens=meta.get("max_context_tokens", 32000),
            compaction_threshold=meta.get("compaction_threshold", 0.8),
            output_keys=meta.get("output_keys"),
            store=store,
@@ -37,7 +37,7 @@ async def evaluate_phase_completion(
    phase_description: str,
    success_criteria: str,
    accumulator_state: dict[str, Any],
-    max_history_tokens: int = 8_196,
+    max_context_tokens: int = 8_196,
 ) -> PhaseVerdict:
    """Level 2 judge: read the conversation and evaluate quality.

@@ -50,7 +50,7 @@ async def evaluate_phase_completion(
        phase_description: Description of the phase
        success_criteria: Natural-language criteria for phase completion
        accumulator_state: Current output key values
-        max_history_tokens: Main conversation token budget (judge gets 20%)
+        max_context_tokens: Main conversation token budget (judge gets 20%)

    Returns:
        PhaseVerdict with action and optional feedback
@@ -89,7 +89,7 @@ FEEDBACK: (reason if RETRY, empty if ACCEPT)"""
        response = await llm.acomplete(
            messages=[{"role": "user", "content": user_prompt}],
            system=system_prompt,
-            max_tokens=max(1024, max_history_tokens // 5),
+            max_tokens=max(1024, max_context_tokens // 5),
            max_retries=1,
        )
        if not response.content or not response.content.strip():
@@ -322,7 +322,11 @@ class AsyncEntryPointSpec(BaseModel):

    id: str = Field(description="Unique identifier for this entry point")
    name: str = Field(description="Human-readable name")
-    entry_node: str = Field(description="Node ID to start execution from")
+    entry_node: str = Field(
+        default="",
+        description="Deprecated: Node ID to start execution from. "
+        "Triggers are graph-level; worker always enters at GraphSpec.entry_node.",
+    )
    trigger_type: str = Field(
        default="manual",
        description="How this entry point is triggered: webhook, api, timer, event, manual",
@@ -331,6 +335,10 @@ class AsyncEntryPointSpec(BaseModel):
        default_factory=dict,
        description="Trigger-specific configuration (e.g., webhook URL, timer interval)",
    )
+    task: str = Field(
+        default="",
+        description="Worker task string when this trigger fires autonomously",
+    )
    isolation_level: str = Field(
        default="shared", description="State isolation: isolated, shared, or synchronized"
    )
@@ -368,28 +376,8 @@ class GraphSpec(BaseModel):
            edges=[...],
        )

-    For multi-entry-point agents (concurrent streams):
-        GraphSpec(
-            id="support-agent-graph",
-            goal_id="support-001",
-            entry_node="process-webhook",  # Default entry
-            async_entry_points=[
-                AsyncEntryPointSpec(
-                    id="webhook",
-                    name="Zendesk Webhook",
-                    entry_node="process-webhook",
-                    trigger_type="webhook",
-                ),
-                AsyncEntryPointSpec(
-                    id="api",
-                    name="API Handler",
-                    entry_node="process-request",
-                    trigger_type="api",
-                ),
-            ],
-            nodes=[...],
-            edges=[...],
-        )
+    Triggers (timer, webhook, event) are now defined in ``triggers.json``
+    alongside the agent directory, not embedded in the graph spec.
    """

    id: str
@@ -402,12 +390,6 @@ class GraphSpec(BaseModel):
        default_factory=dict,
        description="Named entry points for resuming execution. Format: {name: node_id}",
    )
-    async_entry_points: list[AsyncEntryPointSpec] = Field(
-        default_factory=list,
-        description=(
-            "Asynchronous entry points for concurrent execution streams (used with AgentRuntime)"
-        ),
-    )
    terminal_nodes: list[str] = Field(
        default_factory=list, description="IDs of nodes that end execution"
    )
@@ -486,17 +468,6 @@ class GraphSpec(BaseModel):
                return node
        return None

-    def has_async_entry_points(self) -> bool:
-        """Check if this graph uses async entry points (multi-stream execution)."""
-        return len(self.async_entry_points) > 0
-
-    def get_async_entry_point(self, entry_point_id: str) -> AsyncEntryPointSpec | None:
-        """Get an async entry point by ID."""
-        for ep in self.async_entry_points:
-            if ep.id == entry_point_id:
-                return ep
-        return None
-
    def get_outgoing_edges(self, node_id: str) -> list[EdgeSpec]:
        """Get all edges leaving a node, sorted by priority."""
        edges = [e for e in self.edges if e.source == node_id]
@@ -587,37 +558,6 @@ class GraphSpec(BaseModel):
        if not self.get_node(self.entry_node):
            errors.append(f"Entry node '{self.entry_node}' not found")

-        # Check async entry points
-        seen_entry_ids = set()
-        for entry_point in self.async_entry_points:
-            # Check for duplicate IDs
-            if entry_point.id in seen_entry_ids:
-                errors.append(f"Duplicate async entry point ID: '{entry_point.id}'")
-            seen_entry_ids.add(entry_point.id)
-
-            # Check entry node exists
-            if not self.get_node(entry_point.entry_node):
-                errors.append(
-                    f"Async entry point '{entry_point.id}' references "
-                    f"missing node '{entry_point.entry_node}'"
-                )
-
-            # Validate isolation level
-            valid_isolation = {"isolated", "shared", "synchronized"}
-            if entry_point.isolation_level not in valid_isolation:
-                errors.append(
-                    f"Async entry point '{entry_point.id}' has invalid isolation_level "
-                    f"'{entry_point.isolation_level}'. Valid: {valid_isolation}"
-                )
-
-            # Validate trigger type
-            valid_triggers = {"webhook", "api", "timer", "event", "manual"}
-            if entry_point.trigger_type not in valid_triggers:
-                errors.append(
-                    f"Async entry point '{entry_point.id}' has invalid trigger_type "
-                    f"'{entry_point.trigger_type}'. Valid: {valid_triggers}"
-                )
-
        # Check terminal nodes exist
        for term in self.terminal_nodes:
            if not self.get_node(term):
@@ -646,10 +586,6 @@ class GraphSpec(BaseModel):
        for entry_point_node in self.entry_points.values():
            to_visit.append(entry_point_node)

-        # Add all async entry points as valid starting points
-        for async_entry in self.async_entry_points:
-            to_visit.append(async_entry.entry_node)
-
        # Traverse from all entry points
        while to_visit:
            current = to_visit.pop()
@@ -666,18 +602,10 @@ class GraphSpec(BaseModel):
                for sub_agent_id in sub_agents:
                    reachable.add(sub_agent_id)

-        # Build set of async entry point nodes for quick lookup
-        async_entry_nodes = {ep.entry_node for ep in self.async_entry_points}
-
        for node in self.nodes:
            if node.id not in reachable:
-                # Skip if node is a pause node, entry point target, or async entry
-                # (pause/resume architecture and async entry points make reachable)
-                if (
-                    node.id in self.pause_nodes
-                    or node.id in self.entry_points.values()
-                    or node.id in async_entry_nodes
-                ):
+                # Skip if node is a pause node or entry point target
+                if node.id in self.pause_nodes or node.id in self.entry_points.values():
                    continue
                errors.append(f"Node '{node.id}' is unreachable from entry")

@@ -27,11 +27,24 @@ from framework.graph.node import (
    SharedMemory,
 )
 from framework.graph.validator import OutputValidator
-from framework.llm.provider import LLMProvider, Tool
+from framework.llm.provider import LLMProvider, Tool, ToolUse
 from framework.observability import set_trace_context
 from framework.runtime.core import Runtime
 from framework.schemas.checkpoint import Checkpoint
 from framework.storage.checkpoint_store import CheckpointStore
+from framework.utils.io import atomic_write
+
+logger = logging.getLogger(__name__)
+
+
+def _default_max_context_tokens() -> int:
+    """Resolve max_context_tokens from global config, falling back to 32000."""
+    try:
+        from framework.config import get_max_context_tokens
+
+        return get_max_context_tokens()
+    except Exception:
+        return 32_000


@dataclass
@@ -138,6 +151,9 @@ class GraphExecutor:
        tool_provider_map: dict[str, str] | None = None,
        dynamic_tools_provider: Callable | None = None,
        dynamic_prompt_provider: Callable | None = None,
+        iteration_metadata_provider: Callable | None = None,
+        skills_catalog_prompt: str = "",
+        protocols_prompt: str = "",
    ):
        """
        Initialize the executor.
@@ -163,6 +179,8 @@ class GraphExecutor:
                tool list (for mode switching)
            dynamic_prompt_provider: Optional callback returning current
                system prompt (for phase switching)
+            skills_catalog_prompt: Available skills catalog for system prompt
+            protocols_prompt: Default skill operational protocols for system prompt
        """
        self.runtime = runtime
        self.llm = llm
@@ -183,6 +201,21 @@ class GraphExecutor:
        self.tool_provider_map = tool_provider_map
        self.dynamic_tools_provider = dynamic_tools_provider
        self.dynamic_prompt_provider = dynamic_prompt_provider
+        self.iteration_metadata_provider = iteration_metadata_provider
+        self.skills_catalog_prompt = skills_catalog_prompt
+        self.protocols_prompt = protocols_prompt
+
+        if protocols_prompt:
+            self.logger.info(
+                "GraphExecutor[%s] received protocols_prompt (%d chars)",
+                stream_id,
+                len(protocols_prompt),
+            )
+        else:
+            self.logger.warning(
+                "GraphExecutor[%s] received EMPTY protocols_prompt",
+                stream_id,
+            )

        # Parallel execution settings
        self.enable_parallel_execution = enable_parallel_execution
@@ -212,11 +245,11 @@ class GraphExecutor:
        """
        if not self._storage_path:
            return
+        state_path = self._storage_path / "state.json"
        try:
            import json as _json
            from datetime import datetime

-            state_path = self._storage_path / "state.json"
            if state_path.exists():
                state_data = _json.loads(state_path.read_text(encoding="utf-8"))
            else:
@@ -239,9 +272,14 @@ class GraphExecutor:
            state_data["memory"] = memory_snapshot
            state_data["memory_keys"] = list(memory_snapshot.keys())

-            state_path.write_text(_json.dumps(state_data, indent=2), encoding="utf-8")
+            with atomic_write(state_path, encoding="utf-8") as f:
+                _json.dump(state_data, f, indent=2)
        except Exception:
-            pass  # Best-effort — never block execution
+            logger.warning(
+                "Failed to persist progress state to %s",
+                state_path,
+                exc_info=True,
+            )

    def _validate_tools(self, graph: GraphSpec) -> list[str]:
        """
@@ -330,7 +368,7 @@ class GraphExecutor:
                _depth,
            )
        else:
-            max_tokens = getattr(conversation, "_max_history_tokens", 32000)
+            max_tokens = getattr(conversation, "_max_context_tokens", 32000)
            target_tokens = max_tokens // 2
            target_chars = target_tokens * 4

@@ -403,6 +441,14 @@ class GraphExecutor:
        )
        return s1 + "\n\n" + s2

+    def _get_runtime_log_session_id(self) -> str:
+        """Return the session-backed execution ID for runtime logging, if any."""
+        if not self._storage_path:
+            return ""
+        if self._storage_path.parent.name != "sessions":
+            return ""
+        return self._storage_path.name
+
    async def execute(
        self,
        graph: GraphSpec,
@@ -696,10 +742,7 @@ class GraphExecutor:
        )

        if self.runtime_logger:
-            # Extract session_id from storage_path if available (for unified sessions)
-            session_id = ""
-            if self._storage_path and self._storage_path.name.startswith("session_"):
-                session_id = self._storage_path.name
+            session_id = self._get_runtime_log_session_id()
            self.runtime_logger.start_run(goal_id=goal.id, session_id=session_id)

        self.logger.info(f"🚀 Starting execution: {goal.name}")
@@ -925,6 +968,33 @@ class GraphExecutor:
                self.logger.info("   Executing...")
                result = await node_impl.execute(ctx)

+                # GCU tab cleanup: stop the browser profile after a top-level GCU node
+                # finishes so tabs don't accumulate. Mirrors the subagent cleanup in
+                # EventLoopNode._execute_subagent().
+                if node_spec.node_type == "gcu" and self.tool_executor is not None:
+                    try:
+                        from gcu.browser.session import (
+                            _active_profile as _gcu_profile_var,
+                        )
+
+                        _gcu_profile = _gcu_profile_var.get()
+                        _stop_use = ToolUse(
+                            id="gcu-cleanup",
+                            name="browser_stop",
+                            input={"profile": _gcu_profile},
+                        )
+                        _stop_result = self.tool_executor(_stop_use)
+                        if asyncio.iscoroutine(_stop_result) or asyncio.isfuture(_stop_result):
+                            await _stop_result
+                    except ImportError:
+                        pass  # GCU not installed
+                    except Exception as _gcu_exc:
+                        logger.warning(
+                            "GCU browser_stop failed for profile %r: %s",
+                            _gcu_profile,
+                            _gcu_exc,
+                        )
+
                # Emit node-completed event (skip event_loop nodes)
                if self._event_bus and node_spec.node_type != "event_loop":
                    await self._event_bus.emit_node_loop_completed(
@@ -1604,7 +1674,7 @@ class GraphExecutor:
            # Return with paused status
            return ExecutionResult(
                success=False,
-                error="Execution paused by user",
+                error="Execution cancelled",
                output=saved_memory,
                steps_executed=steps,
                total_tokens=total_tokens,
@@ -1753,10 +1823,31 @@ class GraphExecutor:
            if node_spec.tools:
                available_tools = [t for t in self.tools if t.name in node_spec.tools]

-        # Create scoped memory view
+        # Create scoped memory view.
+        # When permissions are restricted (non-empty key lists), auto-include
+        # _-prefixed keys used by default skill protocols so agents can read/write
+        # operational state (e.g. _working_notes, _batch_ledger) regardless of
+        # what the node declares.  When key lists are empty (unrestricted), leave
+        # unchanged — empty means "allow all".
+        read_keys = list(node_spec.input_keys)
+        write_keys = list(node_spec.output_keys)
+        # Only extend lists that were already restricted (non-empty).
+        # Empty means "allow all" — adding keys would accidentally
+        # activate the permission check and block legitimate reads/writes.
+        if read_keys or write_keys:
+            from framework.skills.defaults import SHARED_MEMORY_KEYS as _skill_keys
+
+            existing_underscore = [k for k in memory._data if k.startswith("_")]
+            extra_keys = set(_skill_keys) | set(existing_underscore)
+            for k in extra_keys:
+                if read_keys and k not in read_keys:
+                    read_keys.append(k)
+                if write_keys and k not in write_keys:
+                    write_keys.append(k)
+
        scoped_memory = memory.with_permissions(
-            read_keys=node_spec.input_keys,
-            write_keys=node_spec.output_keys,
+            read_keys=read_keys,
+            write_keys=write_keys,
        )

        # Build per-node accounts prompt (filtered to this node's tools)
@@ -1799,6 +1890,9 @@ class GraphExecutor:
            shared_node_registry=self.node_registry,  # For subagent escalation routing
            dynamic_tools_provider=self.dynamic_tools_provider,
            dynamic_prompt_provider=self.dynamic_prompt_provider,
+            iteration_metadata_provider=self.iteration_metadata_provider,
+            skills_catalog_prompt=self.skills_catalog_prompt,
+            protocols_prompt=self.protocols_prompt,
        )

    VALID_NODE_TYPES = {
@@ -1872,7 +1966,7 @@ class GraphExecutor:
                    max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
                    tool_call_overflow_margin=lc.get("tool_call_overflow_margin", 0.5),
                    stall_detection_threshold=lc.get("stall_detection_threshold", 3),
-                    max_history_tokens=lc.get("max_history_tokens", 32000),
+                    max_context_tokens=lc.get("max_context_tokens", _default_max_context_tokens()),
                    max_tool_result_chars=lc.get("max_tool_result_chars", 30_000),
                    spillover_dir=spillover,
                    hooks=lc.get("hooks", {}),
@@ -2039,6 +2133,10 @@ class GraphExecutor:
                edge=edge,
            )

+        # Track which branch wrote which key for memory conflict detection
+        fanout_written_keys: dict[str, str] = {}  # key -> branch_id that wrote it
+        fanout_keys_lock = asyncio.Lock()
+
        self.logger.info(f"   ⑂ Fan-out: executing {len(branches)} branches in parallel")
        for branch in branches.values():
            target_spec = graph.get_node(branch.node_id)
@@ -2130,8 +2228,31 @@ class GraphExecutor:
                        )

                    if result.success:
-                        # Write outputs to shared memory using async write
+                        # Write outputs to shared memory with conflict detection
+                        conflict_strategy = self._parallel_config.memory_conflict_strategy
                        for key, value in result.output.items():
+                            async with fanout_keys_lock:
+                                prior_branch = fanout_written_keys.get(key)
+                                if prior_branch and prior_branch != branch.branch_id:
+                                    if conflict_strategy == "error":
+                                        raise RuntimeError(
+                                            f"Memory conflict: key '{key}' already written "
+                                            f"by branch '{prior_branch}', "
+                                            f"conflicting write from '{branch.branch_id}'"
+                                        )
+                                    elif conflict_strategy == "first_wins":
+                                        self.logger.debug(
+                                            f"      ⚠ Skipping write to '{key}' "
+                                            f"(first_wins: already set by {prior_branch})"
+                                        )
+                                        continue
+                                    else:
+                                        # last_wins (default): write and log
+                                        self.logger.debug(
+                                            f"      ⚠ Key '{key}' overwritten "
+                                            f"(last_wins: {prior_branch} -> {branch.branch_id})"
+                                        )
+                                fanout_written_keys[key] = branch.branch_id
                            await memory.write_async(key, value)

                        branch.result = result
@@ -2178,9 +2299,11 @@ class GraphExecutor:

                return branch, e

-        # Execute all branches concurrently
-        tasks = [execute_single_branch(b) for b in branches.values()]
-        results = await asyncio.gather(*tasks, return_exceptions=False)
+        # Execute all branches concurrently with per-branch timeout
+        timeout = self._parallel_config.branch_timeout_seconds
+        branch_list = list(branches.values())
+        tasks = [asyncio.wait_for(execute_single_branch(b), timeout=timeout) for b in branch_list]
+        results = await asyncio.gather(*tasks, return_exceptions=True)

        # Process results
        total_tokens = 0
@@ -2188,17 +2311,33 @@ class GraphExecutor:
        branch_results: dict[str, NodeResult] = {}
        failed_branches: list[ParallelBranch] = []

-        for branch, result in results:
-            path.append(branch.node_id)
+        for i, result in enumerate(results):
+            branch = branch_list[i]

-            if isinstance(result, Exception):
+            if isinstance(result, asyncio.TimeoutError):
+                # Branch timed out
+                branch.status = "timed_out"
+                branch.error = f"Branch timed out after {timeout}s"
+                self.logger.warning(
+                    f"      ⏱ Branch {graph.get_node(branch.node_id).name}: "
+                    f"timed out after {timeout}s"
+                )
+                path.append(branch.node_id)
                failed_branches.append(branch)
-            elif result is None or not result.success:
+            elif isinstance(result, Exception):
+                path.append(branch.node_id)
                failed_branches.append(branch)
            else:
-                total_tokens += result.tokens_used
-                total_latency += result.latency_ms
-                branch_results[branch.branch_id] = result
+                returned_branch, node_result = result
+                path.append(returned_branch.node_id)
+                if node_result is None or isinstance(node_result, Exception):
+                    failed_branches.append(returned_branch)
+                elif not node_result.success:
+                    failed_branches.append(returned_branch)
+                else:
+                    total_tokens += node_result.tokens_used
+                    total_latency += node_result.latency_ms
+                    branch_results[returned_branch.branch_id] = node_result

        # Handle failures based on config
        if failed_branches:
@@ -37,24 +37,42 @@ Follow these rules for reliable, efficient browser interaction.
 ## Reading Pages
 - ALWAYS prefer `browser_snapshot` over `browser_get_text("body")`
  — it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
- Use `browser_snapshot_aria` when you need full ARIA properties
-  for detailed element inspection.
+- Interaction tools (`browser_click`, `browser_type`, `browser_fill`,
+  `browser_scroll`, etc.) return a page snapshot automatically in their
+  result. Use it to decide your next action — do NOT call
+  `browser_snapshot` separately after every action.
+  Only call `browser_snapshot` when you need a fresh view without
+  performing an action, or after setting `auto_snapshot=false`.
 - Do NOT use `browser_screenshot` for reading text content
  — it produces huge base64 images with no searchable text.
 - Only fall back to `browser_get_text` for extracting specific
  small elements by CSS selector.

 ## Navigation & Waiting
- Always call `browser_wait` after navigation actions
-  (`browser_open`, `browser_navigate`, `browser_click` on links)
-  to let the page load.
+- `browser_navigate` and `browser_open` already wait for the page to
+  load (`domcontentloaded`). Do NOT call `browser_wait` with no
+  arguments after navigation — it wastes time.
+  Only use `browser_wait` when you need a *specific element* or *text*
+  to appear (pass `selector` or `text`).
 - NEVER re-navigate to the same URL after scrolling
  — this resets your scroll position and loses loaded content.

 ## Scrolling
 - Use large scroll amounts ~2000 when loading more content
  — sites like twitter and linkedin have lazy loading for paging.
- After scrolling, take a new `browser_snapshot` to see updated content.
+- The scroll result includes a snapshot automatically — no need to call
+  `browser_snapshot` separately.
+
+## Batching Actions
+- You can call multiple tools in a single turn — they execute in parallel.
+  ALWAYS batch independent actions together. Examples:
+  - Fill multiple form fields in one turn.
+  - Navigate + snapshot in one turn.
+  - Click + scroll if targeting different elements.
+- When batching, set `auto_snapshot=false` on all but the last action
+  to avoid redundant snapshots.
+- Aim for 3-5 tool calls per turn minimum. One tool call per turn is
+  wasteful.

 ## Error Recovery
 - If a tool fails, retry once with the same approach.
@@ -65,11 +83,33 @@ Follow these rules for reliable, efficient browser interaction.
  then `browser_start`, then retry.

 ## Tab Management
- Use `browser_tabs` to list open tabs when managing multiple pages.
- Pass `target_id` to tools when operating on a specific tab.
- Open background tabs with `browser_open(url=..., background=true)`
-  to avoid losing your current context.
- Close tabs you no longer need with `browser_close` to free resources.
+
+**Close tabs as soon as you are done with them** — not only at the end of the task.
+After reading or extracting data from a tab, close it immediately.
+
+**Decision rules:**
+- Finished reading/extracting from a tab? → `browser_close(target_id=...)`
+- Completed a multi-tab workflow? → `browser_close_finished()` to clean up all your tabs
+- More than 3 tabs open? → stop and close finished ones before opening more
+- Popup appeared that you didn't need? → close it immediately
+
+**Origin awareness:** `browser_tabs` returns an `origin` field for each tab:
+- `"agent"` — you opened it; you own it; close it when done
+- `"popup"` — opened by a link or script; close after extracting what you need
+- `"startup"` or `"user"` — leave these alone unless the task requires it
+
+**Cleanup tools:**
+- `browser_close(target_id=...)` — close one specific tab
+- `browser_close_finished()` — close all your agent/popup tabs (safe: leaves startup/user tabs)
+- `browser_close_all()` — close everything except the active tab (use only for full reset)
+
+**Multi-tab workflow pattern:**
+1. Open background tabs with `browser_open(url=..., background=true)` to stay on current tab
+2. Process each tab and close it with `browser_close` when done
+3. When the full workflow completes, call `browser_close_finished()` to confirm cleanup
+4. Check `browser_tabs` at any point — it shows `origin` and `age_seconds` per tab
+
+Never accumulate tabs. Treat every tab you open as a resource you must free.

 ## Login & Auth Walls
 - If you see a "Log in" or "Sign up" prompt instead of expected
@@ -1,203 +0,0 @@
-"""
-Standardized HITL (Human-In-The-Loop) Protocol
-
-This module defines the formal structure for pause/resume interactions
-where agents need to gather input from humans.
-"""
-
-from dataclasses import dataclass, field
-from enum import StrEnum
-from typing import Any
-
-
-class HITLInputType(StrEnum):
-    """Type of input expected from human."""
-
-    FREE_TEXT = "free_text"  # Open-ended text response
-    STRUCTURED = "structured"  # Specific fields to fill
-    SELECTION = "selection"  # Choose from options
-    APPROVAL = "approval"  # Yes/no/modify decision
-    MULTI_FIELD = "multi_field"  # Multiple related inputs
-
-
-@dataclass
-class HITLQuestion:
-    """A single question to ask the human."""
-
-    id: str
-    question: str
-    input_type: HITLInputType = HITLInputType.FREE_TEXT
-
-    # For SELECTION type
-    options: list[str] = field(default_factory=list)
-
-    # For STRUCTURED type
-    fields: dict[str, str] = field(default_factory=dict)  # {field_name: description}
-
-    # Metadata
-    required: bool = True
-    help_text: str = ""
-
-
-@dataclass
-class HITLRequest:
-    """
-    Formal request for human input at a pause node.
-
-    This is what the agent produces when it needs human input.
-    """
-
-    # Context
-    objective: str  # What we're trying to accomplish
-    current_state: str  # Where we are in the process
-
-    # What we need
-    questions: list[HITLQuestion] = field(default_factory=list)
-    missing_info: list[str] = field(default_factory=list)
-
-    # Guidance
-    instructions: str = ""
-    examples: list[str] = field(default_factory=list)
-
-    # Metadata
-    request_id: str = ""
-    node_id: str = ""
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for serialization."""
-        return {
-            "objective": self.objective,
-            "current_state": self.current_state,
-            "questions": [
-                {
-                    "id": q.id,
-                    "question": q.question,
-                    "input_type": q.input_type.value,
-                    "options": q.options,
-                    "fields": q.fields,
-                    "required": q.required,
-                    "help_text": q.help_text,
-                }
-                for q in self.questions
-            ],
-            "missing_info": self.missing_info,
-            "instructions": self.instructions,
-            "examples": self.examples,
-            "request_id": self.request_id,
-            "node_id": self.node_id,
-        }
-
-
-@dataclass
-class HITLResponse:
-    """
-    Human's response to a HITL request.
-
-    This is what gets passed back when resuming from a pause.
-    """
-
-    # Original request reference
-    request_id: str
-
-    # Human's answers
-    answers: dict[str, Any] = field(default_factory=dict)  # {question_id: answer}
-    raw_input: str = ""  # Raw text if provided
-
-    # Metadata
-    response_time_ms: int = 0
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for serialization."""
-        return {
-            "request_id": self.request_id,
-            "answers": self.answers,
-            "raw_input": self.raw_input,
-            "response_time_ms": self.response_time_ms,
-        }
-
-
-class HITLProtocol:
-    """
-    Standardized protocol for HITL interactions.
-
-    Usage in pause nodes:
-
-    1. Pause Node: Generates HITLRequest with questions
-    2. Executor: Saves state and returns request to user
-    3. User: Provides HITLResponse with answers
-    4. Resume Node: Processes response and merges into context
-    """
-
-    @staticmethod
-    def create_request(
-        objective: str,
-        questions: list[HITLQuestion],
-        missing_info: list[str] | None = None,
-        node_id: str = "",
-    ) -> HITLRequest:
-        """Create a standardized HITL request."""
-        return HITLRequest(
-            objective=objective,
-            current_state="Awaiting clarification",
-            questions=questions,
-            missing_info=missing_info or [],
-            request_id=f"{node_id}_{hash(objective) % 10000}",
-            node_id=node_id,
-        )
-
-    @staticmethod
-    def parse_response(
-        raw_input: str,
-        request: HITLRequest,
-        use_haiku: bool = True,
-    ) -> HITLResponse:
-        """
-        Parse human's raw input into structured response.
-
-        Maps the raw input to the first question. For multi-question HITL,
-        the caller should present one question at a time.
-        """
-        response = HITLResponse(request_id=request.request_id, raw_input=raw_input)
-
-        # If no questions, just return raw input
-        if not request.questions:
-            return response
-
-        # Map raw input to first question
-        response.answers[request.questions[0].id] = raw_input
-        return response
-
-    @staticmethod
-    def format_for_display(request: HITLRequest) -> str:
-        """Format HITL request for user-friendly display."""
-        parts = []
-
-        if request.objective:
-            parts.append(f"📋 Objective: {request.objective}")
-
-        if request.current_state:
-            parts.append(f"📍 Current State: {request.current_state}")
-
-        if request.instructions:
-            parts.append(f"\n{request.instructions}")
-
-        if request.questions:
-            parts.append(f"\n❓ Questions ({len(request.questions)}):")
-            for i, q in enumerate(request.questions, 1):
-                parts.append(f"{i}. {q.question}")
-                if q.help_text:
-                    parts.append(f"   💡 {q.help_text}")
-                if q.options:
-                    parts.append(f"   Options: {', '.join(q.options)}")
-
-        if request.missing_info:
-            parts.append("\n📝 Missing Information:")
-            for info in request.missing_info:
-                parts.append(f"  • {info}")
-
-        if request.examples:
-            parts.append("\n📚 Examples:")
-            for example in request.examples:
-                parts.append(f"  • {example}")
-
-        return "\n".join(parts)
@@ -565,6 +565,15 @@ class NodeContext:
    # staging / running) without restarting the conversation.
    dynamic_prompt_provider: Any = None  # Callable[[], str] | None

+    # Skill system prompts — injected by the skill discovery pipeline
+    skills_catalog_prompt: str = ""  # Available skills XML catalog
+    protocols_prompt: str = ""  # Default skill operational protocols
+
+    # Per-iteration metadata provider — when set, EventLoopNode merges
+    # the returned dict into node_loop_iteration event data.  Used by
+    # the queen to record the current phase per iteration.
+    iteration_metadata_provider: Any = None  # Callable[[], dict] | None
+

@dataclass
 class NodeResult:
@@ -140,14 +140,18 @@ def compose_system_prompt(
    focus_prompt: str | None,
    narrative: str | None = None,
    accounts_prompt: str | None = None,
+    skills_catalog_prompt: str | None = None,
+    protocols_prompt: str | None = None,
 ) -> str:
-    """Compose the three-layer system prompt.
+    """Compose the multi-layer system prompt.

    Args:
        identity_prompt: Layer 1 — static agent identity (from GraphSpec).
        focus_prompt: Layer 3 — per-node focus directive (from NodeSpec.system_prompt).
        narrative: Layer 2 — auto-generated from conversation state.
        accounts_prompt: Connected accounts block (sits between identity and narrative).
+        skills_catalog_prompt: Available skills catalog XML (Agent Skills standard).
+        protocols_prompt: Default skill operational protocols section.

    Returns:
        Composed system prompt with all layers present, plus current datetime.
@@ -162,6 +166,14 @@ def compose_system_prompt(
    if accounts_prompt:
        parts.append(f"\n{accounts_prompt}")

+    # Skills catalog (discovered skills available for activation)
+    if skills_catalog_prompt:
+        parts.append(f"\n{skills_catalog_prompt}")
+
+    # Operational protocols (default skill behavioral guidance)
+    if protocols_prompt:
+        parts.append(f"\n{protocols_prompt}")
+
    # Layer 2: Narrative (what's happened so far)
    if narrative:
        parts.append(f"\n--- Context (what has happened so far) ---\n{narrative}")
@@ -115,11 +115,23 @@ class SafeEvalVisitor(ast.NodeVisitor):
        return True

    def visit_BoolOp(self, node: ast.BoolOp) -> Any:
-        values = [self.visit(v) for v in node.values]
+        # Short-circuit evaluation to match Python semantics.
+        # Previously all operands were eagerly evaluated, which broke
+        # guard patterns like: ``x is not None and x.get("key")``
        if isinstance(node.op, ast.And):
-            return all(values)
+            result = True
+            for v in node.values:
+                result = self.visit(v)
+                if not result:
+                    return result
+            return result
        elif isinstance(node.op, ast.Or):
-            return any(values)
+            result = False
+            for v in node.values:
+                result = self.visit(v)
+                if result:
+                    return result
+            return result
        raise ValueError(f"Boolean operator {type(node.op).__name__} is not allowed")

    def visit_IfExp(self, node: ast.IfExp) -> Any:
@@ -7,9 +7,12 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """

+import ast
 import asyncio
 import json
 import logging
+import os
+import re
 import time
 from collections.abc import AsyncIterator
 from datetime import datetime
@@ -23,6 +26,7 @@ except ImportError:
    litellm = None  # type: ignore[assignment]
    RateLimitError = Exception  # type: ignore[assignment, misc]

+from framework.config import HIVE_LLM_ENDPOINT as HIVE_API_BASE
 from framework.llm.provider import LLMProvider, LLMResponse, Tool
 from framework.llm.stream_events import StreamEvent

@@ -45,6 +49,12 @@ def _patch_litellm_anthropic_oauth() -> None:
        from litellm.llms.anthropic.common_utils import AnthropicModelInfo
        from litellm.types.llms.anthropic import ANTHROPIC_OAUTH_TOKEN_PREFIX
    except ImportError:
+        logger.warning(
+            "Could not apply litellm Anthropic OAuth patch — litellm internals may have "
+            "changed. Anthropic OAuth tokens (Claude Code subscriptions) may fail with 401. "
+            "See BerriAI/litellm#19618. Current litellm version: %s",
+            getattr(litellm, "__version__", "unknown"),
+        )
        return

    original = AnthropicModelInfo.validate_environment
@@ -86,10 +96,12 @@ def _patch_litellm_metadata_nonetype() -> None:
    """
    import functools

+    patched_count = 0
    for fn_name in ("completion", "acompletion", "responses", "aresponses"):
        original = getattr(litellm, fn_name, None)
        if original is None:
            continue
+        patched_count += 1
        if asyncio.iscoroutinefunction(original):

            @functools.wraps(original)
@@ -109,23 +121,75 @@ def _patch_litellm_metadata_nonetype() -> None:

            setattr(litellm, fn_name, _sync_wrapper)

+    if patched_count == 0:
+        logger.warning(
+            "Could not apply litellm metadata=None patch — none of the expected entry "
+            "points (completion, acompletion, responses, aresponses) were found. "
+            "metadata=None TypeError may occur. Current litellm version: %s",
+            getattr(litellm, "__version__", "unknown"),
+        )
+

 if litellm is not None:
    _patch_litellm_anthropic_oauth()
    _patch_litellm_metadata_nonetype()
+    litellm.suppress_debug_info = True
+    if not os.environ.get("LITELLM_LOG"):
+        logging.getLogger("LiteLLM").setLevel(logging.WARNING)
+        logging.getLogger("LiteLLM Router").setLevel(logging.WARNING)
+        logging.getLogger("LiteLLM Proxy").setLevel(logging.WARNING)

 RATE_LIMIT_MAX_RETRIES = 10
 RATE_LIMIT_BACKOFF_BASE = 2  # seconds
 RATE_LIMIT_MAX_DELAY = 120  # seconds - cap to prevent absurd waits
+MINIMAX_API_BASE = "https://api.minimax.io/v1"
+OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
+
+# Providers that accept cache_control on message content blocks.
+# Anthropic: native ephemeral caching. MiniMax & Z-AI/GLM: pass-through to their APIs.
+# (OpenAI caches automatically server-side; Groq/Gemini/etc. strip the header.)
+_CACHE_CONTROL_PREFIXES = (
+    "anthropic/",
+    "claude-",
+    "minimax/",
+    "minimax-",
+    "MiniMax-",
+    "zai-glm",
+    "glm-",
+)
+
+
+def _model_supports_cache_control(model: str) -> bool:
+    return any(model.startswith(p) for p in _CACHE_CONTROL_PREFIXES)
+
+
+# Kimi For Coding uses an Anthropic-compatible endpoint (no /v1 suffix).
+# Claude Code integration uses this format; the /v1 OpenAI-compatible endpoint
+# enforces a coding-agent whitelist that blocks unknown User-Agents.
+KIMI_API_BASE = "https://api.kimi.com/coding"

 # Empty-stream retries use a short fixed delay, not the rate-limit backoff.
 # Conversation-structure issues are deterministic — long waits don't help.
 EMPTY_STREAM_MAX_RETRIES = 3
 EMPTY_STREAM_RETRY_DELAY = 1.0  # seconds
+OPENROUTER_TOOL_COMPAT_ERROR_SNIPPETS = (
+    "no endpoints found that support tool use",
+    "no endpoints available that support tool use",
+    "provider routing",
+)
+OPENROUTER_TOOL_CALL_RE = re.compile(
+    r"<\|tool_call_start\|>\s*(.*?)\s*<\|tool_call_end\|>",
+    re.DOTALL,
+)
+OPENROUTER_TOOL_COMPAT_MODEL_CACHE: set[str] = set()

 # Directory for dumping failed requests
 FAILED_REQUESTS_DIR = Path.home() / ".hive" / "failed_requests"

+# Maximum number of dump files to retain in ~/.hive/failed_requests/.
+# Older files are pruned automatically to prevent unbounded disk growth.
+MAX_FAILED_REQUEST_DUMPS = 50
+

 def _estimate_tokens(model: str, messages: list[dict]) -> tuple[int, str]:
    """Estimate token count for messages. Returns (token_count, method)."""
@@ -142,6 +206,24 @@ def _estimate_tokens(model: str, messages: list[dict]) -> tuple[int, str]:
    return total_chars // 4, "estimate"


+def _prune_failed_request_dumps(max_files: int = MAX_FAILED_REQUEST_DUMPS) -> None:
+    """Remove oldest dump files when the count exceeds *max_files*.
+
+    Best-effort: never raises — a pruning failure must not break retry logic.
+    """
+    try:
+        all_dumps = sorted(
+            FAILED_REQUESTS_DIR.glob("*.json"),
+            key=lambda f: f.stat().st_mtime,
+        )
+        excess = len(all_dumps) - max_files
+        if excess > 0:
+            for old_file in all_dumps[:excess]:
+                old_file.unlink(missing_ok=True)
+    except Exception:
+        pass  # Best-effort — never block the caller
+
+
 def _dump_failed_request(
    model: str,
    kwargs: dict[str, Any],
@@ -173,6 +255,9 @@ def _dump_failed_request(
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(dump_data, f, indent=2, default=str)

+    # Prune old dumps to prevent unbounded disk growth
+    _prune_failed_request_dumps()
+
    return str(filepath)


@@ -322,13 +407,31 @@ class LiteLLMProvider(LLMProvider):
            api_base: Custom API base URL (for proxies or local deployments)
            **kwargs: Additional arguments passed to litellm.completion()
        """
+        # Kimi For Coding exposes an Anthropic-compatible endpoint at
+        # https://api.kimi.com/coding (the same format Claude Code uses natively).
+        # Translate kimi/ prefix to anthropic/ so litellm uses the Anthropic
+        # Messages API handler and routes to that endpoint — no special headers needed.
+        _original_model = model
+        if model.lower().startswith("kimi/"):
+            model = "anthropic/" + model[len("kimi/") :]
+            # Normalise api_base: litellm's Anthropic handler appends /v1/messages,
+            # so the base must be https://api.kimi.com/coding (no /v1 suffix).
+            # Strip a trailing /v1 in case the user's saved config has the old value.
+            if api_base and api_base.rstrip("/").endswith("/v1"):
+                api_base = api_base.rstrip("/")[:-3]
+        elif model.lower().startswith("hive/"):
+            model = "anthropic/" + model[len("hive/") :]
+            if api_base and api_base.rstrip("/").endswith("/v1"):
+                api_base = api_base.rstrip("/")[:-3]
        self.model = model
        self.api_key = api_key
-        self.api_base = api_base
+        self.api_base = api_base or self._default_api_base_for_model(_original_model)
        self.extra_kwargs = kwargs
        # The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
        # several standard OpenAI params: max_output_tokens, stream_options.
-        self._codex_backend = bool(api_base and "chatgpt.com/backend-api/codex" in api_base)
+        self._codex_backend = bool(
+            self.api_base and "chatgpt.com/backend-api/codex" in self.api_base
+        )

        if litellm is None:
            raise ImportError(
@@ -341,6 +444,20 @@ class LiteLLMProvider(LLMProvider):
        # override the mode.  The responses_api_bridge in litellm handles
        # converting Chat Completions requests to Responses API format.

+    @staticmethod
+    def _default_api_base_for_model(model: str) -> str | None:
+        """Return provider-specific default API base when required."""
+        model_lower = model.lower()
+        if model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            return MINIMAX_API_BASE
+        if model_lower.startswith("openrouter/"):
+            return OPENROUTER_API_BASE
+        if model_lower.startswith("kimi/"):
+            return KIMI_API_BASE
+        if model_lower.startswith("hive/"):
+            return HIVE_API_BASE
+        return None
+
    def _completion_with_rate_limit_retry(
        self, max_retries: int | None = None, **kwargs: Any
    ) -> Any:
@@ -678,7 +795,10 @@ class LiteLLMProvider(LLMProvider):

        full_messages: list[dict[str, Any]] = []
        if system:
-            full_messages.append({"role": "system", "content": system})
+            sys_msg: dict[str, Any] = {"role": "system", "content": system}
+            if _model_supports_cache_control(self.model):
+                sys_msg["cache_control"] = {"type": "ephemeral"}
+            full_messages.append(sys_msg)
        full_messages.extend(messages)

        if json_mode:
@@ -735,6 +855,564 @@ class LiteLLMProvider(LLMProvider):
            },
        }

+    def _is_minimax_model(self) -> bool:
+        """Return True when the configured model targets MiniMax."""
+        model = (self.model or "").lower()
+        return model.startswith("minimax/") or model.startswith("minimax-")
+
+    def _is_openrouter_model(self) -> bool:
+        """Return True when the configured model targets OpenRouter."""
+        model = (self.model or "").lower()
+        if model.startswith("openrouter/"):
+            return True
+        api_base = (self.api_base or "").lower()
+        return "openrouter.ai/api/v1" in api_base
+
+    def _should_use_openrouter_tool_compat(
+        self,
+        error: BaseException,
+        tools: list[Tool] | None,
+    ) -> bool:
+        """Return True when OpenRouter rejects native tool use for the model."""
+        if not tools or not self._is_openrouter_model():
+            return False
+        error_text = str(error).lower()
+        return "openrouter" in error_text and any(
+            snippet in error_text for snippet in OPENROUTER_TOOL_COMPAT_ERROR_SNIPPETS
+        )
+
+    @staticmethod
+    def _extract_json_object(text: str) -> dict[str, Any] | None:
+        """Extract the first JSON object from a model response."""
+        candidates = [text.strip()]
+
+        stripped = text.strip()
+        if stripped.startswith("```"):
+            fence_lines = stripped.splitlines()
+            if len(fence_lines) >= 3:
+                candidates.append("\n".join(fence_lines[1:-1]).strip())
+
+        decoder = json.JSONDecoder()
+        for candidate in candidates:
+            if not candidate:
+                continue
+            try:
+                parsed = json.loads(candidate)
+            except json.JSONDecodeError:
+                parsed = None
+            if isinstance(parsed, dict):
+                return parsed
+
+            for start_idx, char in enumerate(candidate):
+                if char != "{":
+                    continue
+                try:
+                    parsed, _ = decoder.raw_decode(candidate[start_idx:])
+                except json.JSONDecodeError:
+                    continue
+                if isinstance(parsed, dict):
+                    return parsed
+        return None
+
+    def _parse_openrouter_tool_compat_response(
+        self,
+        content: str,
+        tools: list[Tool],
+    ) -> tuple[str, list[dict[str, Any]]]:
+        """Parse JSON tool-compat output into assistant text and tool calls."""
+        payload = self._extract_json_object(content)
+        if payload is None:
+            text_tool_content, text_tool_calls = self._parse_openrouter_text_tool_calls(
+                content,
+                tools,
+            )
+            if text_tool_calls:
+                logger.info(
+                    "[openrouter-tool-compat] Parsed textual tool-call markers for %s",
+                    self.model,
+                )
+                return text_tool_content, text_tool_calls
+            logger.info(
+                "[openrouter-tool-compat] %s returned non-JSON fallback content; "
+                "treating it as plain text.",
+                self.model,
+            )
+            return content.strip(), []
+
+        assistant_text = payload.get("assistant_response")
+        if not isinstance(assistant_text, str):
+            assistant_text = payload.get("content")
+        if not isinstance(assistant_text, str):
+            assistant_text = payload.get("response")
+        if not isinstance(assistant_text, str):
+            assistant_text = ""
+
+        tool_calls_raw = payload.get("tool_calls")
+        if not tool_calls_raw and {"name", "arguments"} <= payload.keys():
+            tool_calls_raw = [payload]
+        elif isinstance(payload.get("tool_call"), dict):
+            tool_calls_raw = [payload["tool_call"]]
+
+        if not isinstance(tool_calls_raw, list):
+            tool_calls_raw = []
+
+        allowed_tool_names = {tool.name for tool in tools}
+        tool_calls: list[dict[str, Any]] = []
+        compat_prefix = f"openrouter_compat_{time.time_ns()}"
+
+        for idx, raw_call in enumerate(tool_calls_raw):
+            if not isinstance(raw_call, dict):
+                continue
+
+            function_block = raw_call.get("function")
+            function_name = (
+                raw_call.get("name")
+                or raw_call.get("tool_name")
+                or (function_block.get("name") if isinstance(function_block, dict) else None)
+            )
+            if not isinstance(function_name, str) or function_name not in allowed_tool_names:
+                if function_name:
+                    logger.warning(
+                        "[openrouter-tool-compat] Ignoring unknown tool '%s' for model %s",
+                        function_name,
+                        self.model,
+                    )
+                continue
+
+            arguments = raw_call.get("arguments")
+            if arguments is None:
+                arguments = raw_call.get("tool_input")
+            if arguments is None:
+                arguments = raw_call.get("input")
+            if arguments is None and isinstance(function_block, dict):
+                arguments = function_block.get("arguments")
+            if arguments is None:
+                arguments = {}
+
+            if isinstance(arguments, str):
+                try:
+                    arguments = json.loads(arguments)
+                except json.JSONDecodeError:
+                    arguments = {"_raw": arguments}
+            elif not isinstance(arguments, dict):
+                arguments = {"value": arguments}
+
+            tool_calls.append(
+                {
+                    "id": f"{compat_prefix}_{idx}",
+                    "name": function_name,
+                    "input": arguments,
+                }
+            )
+
+        return assistant_text.strip(), tool_calls
+
+    @staticmethod
+    def _close_truncated_json_fragment(fragment: str) -> str:
+        """Close a truncated JSON fragment by balancing quotes/brackets."""
+        stack: list[str] = []
+        in_string = False
+        escaped = False
+        normalized = fragment.rstrip()
+
+        while normalized and normalized[-1] in ",:{[":
+            normalized = normalized[:-1].rstrip()
+
+        for char in normalized:
+            if in_string:
+                if escaped:
+                    escaped = False
+                elif char == "\\":
+                    escaped = True
+                elif char == '"':
+                    in_string = False
+                continue
+
+            if char == '"':
+                in_string = True
+            elif char in "{[":
+                stack.append(char)
+            elif char == "}" and stack and stack[-1] == "{":
+                stack.pop()
+            elif char == "]" and stack and stack[-1] == "[":
+                stack.pop()
+
+        if in_string:
+            if escaped:
+                normalized = normalized[:-1]
+            normalized += '"'
+
+        for opener in reversed(stack):
+            normalized += "}" if opener == "{" else "]"
+
+        return normalized
+
+    def _repair_truncated_tool_arguments(self, raw_arguments: str) -> dict[str, Any] | None:
+        """Try to recover a truncated JSON object from tool-call arguments."""
+        stripped = raw_arguments.strip()
+        if not stripped or stripped[0] != "{":
+            return None
+
+        max_trim = min(len(stripped), 256)
+        for trim in range(max_trim + 1):
+            candidate = stripped[: len(stripped) - trim].rstrip()
+            if not candidate:
+                break
+            candidate = self._close_truncated_json_fragment(candidate)
+            try:
+                parsed = json.loads(candidate)
+            except json.JSONDecodeError:
+                continue
+            if isinstance(parsed, dict):
+                return parsed
+        return None
+
+    def _parse_tool_call_arguments(self, raw_arguments: str, tool_name: str) -> dict[str, Any]:
+        """Parse streamed tool arguments, repairing truncation when possible."""
+        try:
+            parsed = json.loads(raw_arguments) if raw_arguments else {}
+        except json.JSONDecodeError:
+            parsed = None
+
+        if isinstance(parsed, dict):
+            return parsed
+
+        repaired = self._repair_truncated_tool_arguments(raw_arguments)
+        if repaired is not None:
+            logger.warning(
+                "[tool-args] Recovered truncated arguments for %s on %s",
+                tool_name,
+                self.model,
+            )
+            return repaired
+
+        raise ValueError(
+            f"Failed to parse tool call arguments for '{tool_name}' (likely truncated JSON)."
+        )
+
+    def _parse_openrouter_text_tool_calls(
+        self,
+        content: str,
+        tools: list[Tool],
+    ) -> tuple[str, list[dict[str, Any]]]:
+        """Parse textual OpenRouter tool calls into synthetic tool calls.
+
+        Supports both:
+        - Marker wrapped payloads: <|tool_call_start|>...<|tool_call_end|>
+        - Plain one-line tool calls: ask_user("...", ["..."])
+        """
+        tools_by_name = {tool.name: tool for tool in tools}
+        compat_prefix = f"openrouter_compat_{time.time_ns()}"
+        tool_calls: list[dict[str, Any]] = []
+        segment_index = 0
+
+        for match in OPENROUTER_TOOL_CALL_RE.finditer(content):
+            parsed_calls = self._parse_openrouter_text_tool_call_block(
+                block=match.group(1),
+                tools_by_name=tools_by_name,
+                compat_prefix=f"{compat_prefix}_{segment_index}",
+            )
+            if parsed_calls:
+                segment_index += 1
+                tool_calls.extend(parsed_calls)
+
+        stripped_content = OPENROUTER_TOOL_CALL_RE.sub("", content)
+        retained_lines: list[str] = []
+        for line in stripped_content.splitlines():
+            stripped_line = line.strip()
+            if not stripped_line:
+                retained_lines.append(line)
+                continue
+
+            candidate = stripped_line
+            if candidate.startswith("`") and candidate.endswith("`") and len(candidate) > 1:
+                candidate = candidate[1:-1].strip()
+
+            parsed_calls = self._parse_openrouter_text_tool_call_block(
+                block=candidate,
+                tools_by_name=tools_by_name,
+                compat_prefix=f"{compat_prefix}_{segment_index}",
+            )
+            if parsed_calls:
+                segment_index += 1
+                tool_calls.extend(parsed_calls)
+                continue
+
+            retained_lines.append(line)
+
+        stripped_text = "\n".join(retained_lines).strip()
+        return stripped_text, tool_calls
+
+    def _parse_openrouter_text_tool_call_block(
+        self,
+        block: str,
+        tools_by_name: dict[str, Tool],
+        compat_prefix: str,
+    ) -> list[dict[str, Any]]:
+        """Parse a single textual tool-call block like [tool(arg='x')]."""
+        try:
+            parsed = ast.parse(block.strip(), mode="eval").body
+        except SyntaxError:
+            return []
+
+        call_nodes = parsed.elts if isinstance(parsed, ast.List) else [parsed]
+        tool_calls: list[dict[str, Any]] = []
+
+        for call_index, call_node in enumerate(call_nodes):
+            if not isinstance(call_node, ast.Call) or not isinstance(call_node.func, ast.Name):
+                continue
+
+            tool_name = call_node.func.id
+            tool = tools_by_name.get(tool_name)
+            if tool is None:
+                continue
+
+            try:
+                tool_input = self._parse_openrouter_text_tool_call_arguments(
+                    call_node=call_node,
+                    tool=tool,
+                )
+            except (ValueError, SyntaxError):
+                continue
+
+            tool_calls.append(
+                {
+                    "id": f"{compat_prefix}_{call_index}",
+                    "name": tool_name,
+                    "input": tool_input,
+                }
+            )
+
+        return tool_calls
+
+    @staticmethod
+    def _parse_openrouter_text_tool_call_arguments(
+        call_node: ast.Call,
+        tool: Tool,
+    ) -> dict[str, Any]:
+        """Parse positional/keyword args from a textual tool call."""
+        properties = tool.parameters.get("properties", {})
+        positional_keys = list(properties.keys())
+        tool_input: dict[str, Any] = {}
+
+        if len(call_node.args) > len(positional_keys):
+            raise ValueError("Too many positional args for textual tool call")
+
+        for idx, arg_node in enumerate(call_node.args):
+            tool_input[positional_keys[idx]] = ast.literal_eval(arg_node)
+
+        for kwarg in call_node.keywords:
+            if kwarg.arg is None:
+                raise ValueError("Star args are not supported in textual tool calls")
+            tool_input[kwarg.arg] = ast.literal_eval(kwarg.value)
+
+        return tool_input
+
+    def _build_openrouter_tool_compat_messages(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list[Tool],
+    ) -> list[dict[str, Any]]:
+        """Build a JSON-only prompt for models without native tool support."""
+        tool_specs = [
+            {
+                "name": tool.name,
+                "description": tool.description,
+                "parameters": tool.parameters,
+            }
+            for tool in tools
+        ]
+        compat_instruction = (
+            "Tool compatibility mode is active because this OpenRouter model does not support "
+            "native function calling on the routed provider.\n"
+            "Return exactly one JSON object and nothing else.\n"
+            'Schema: {"assistant_response": string, '
+            '"tool_calls": [{"name": string, "arguments": object}]}\n'
+            "Rules:\n"
+            "- If a tool is required, put one or more entries in tool_calls "
+            "and do not invent tool results.\n"
+            "- If no tool is required, set tool_calls to [] and put the full "
+            "answer in assistant_response.\n"
+            "- Only use tool names from the allowed tool list.\n"
+            "- arguments must always be valid JSON objects.\n"
+            f"Allowed tools:\n{json.dumps(tool_specs, ensure_ascii=True)}"
+        )
+        compat_system = compat_instruction if not system else f"{system}\n\n{compat_instruction}"
+
+        full_messages: list[dict[str, Any]] = [{"role": "system", "content": compat_system}]
+        full_messages.extend(messages)
+        return [
+            message
+            for message in full_messages
+            if not (
+                message.get("role") == "assistant"
+                and not message.get("content")
+                and not message.get("tool_calls")
+            )
+        ]
+
+    async def _acomplete_via_openrouter_tool_compat(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list[Tool],
+        max_tokens: int,
+    ) -> LLMResponse:
+        """Emulate tool calling via JSON when OpenRouter rejects native tools."""
+        full_messages = self._build_openrouter_tool_compat_messages(messages, system, tools)
+        kwargs: dict[str, Any] = {
+            "model": self.model,
+            "messages": full_messages,
+            "max_tokens": max_tokens,
+            **self.extra_kwargs,
+        }
+        if self.api_key:
+            kwargs["api_key"] = self.api_key
+        if self.api_base:
+            kwargs["api_base"] = self.api_base
+
+        response = await self._acompletion_with_rate_limit_retry(**kwargs)
+        raw_content = response.choices[0].message.content or ""
+        assistant_text, tool_calls = self._parse_openrouter_tool_compat_response(
+            raw_content,
+            tools,
+        )
+        usage = response.usage
+        input_tokens = usage.prompt_tokens if usage else 0
+        output_tokens = usage.completion_tokens if usage else 0
+        stop_reason = "tool_calls" if tool_calls else (response.choices[0].finish_reason or "stop")
+
+        return LLMResponse(
+            content=assistant_text,
+            model=response.model or self.model,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            stop_reason=stop_reason,
+            raw_response={
+                "compat_mode": "openrouter_tool_emulation",
+                "tool_calls": tool_calls,
+                "response": response,
+            },
+        )
+
+    async def _stream_via_openrouter_tool_compat(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list[Tool],
+        max_tokens: int,
+    ) -> AsyncIterator[StreamEvent]:
+        """Fallback stream for OpenRouter models without native tool support."""
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+            ToolCallEvent,
+        )
+
+        logger.info(
+            "[openrouter-tool-compat] Using compatibility mode for %s",
+            self.model,
+        )
+        try:
+            response = await self._acomplete_via_openrouter_tool_compat(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+            )
+        except Exception as e:
+            yield StreamErrorEvent(error=str(e), recoverable=False)
+            return
+
+        raw_response = response.raw_response if isinstance(response.raw_response, dict) else {}
+        tool_calls = raw_response.get("tool_calls", [])
+
+        if response.content:
+            yield TextDeltaEvent(content=response.content, snapshot=response.content)
+            yield TextEndEvent(full_text=response.content)
+
+        for tool_call in tool_calls:
+            yield ToolCallEvent(
+                tool_use_id=tool_call["id"],
+                tool_name=tool_call["name"],
+                tool_input=tool_call["input"],
+            )
+
+        yield FinishEvent(
+            stop_reason=response.stop_reason,
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            model=response.model,
+        )
+
+    async def _stream_via_nonstream_completion(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list[Tool] | None,
+        max_tokens: int,
+        response_format: dict[str, Any] | None,
+        json_mode: bool,
+    ) -> AsyncIterator[StreamEvent]:
+        """Fallback path: convert non-stream completion to stream events.
+
+        Some providers currently fail in LiteLLM's chunk parser for stream=True.
+        For those providers we do a regular async completion and emit equivalent
+        stream events so higher layers continue to work.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+            ToolCallEvent,
+        )
+
+        try:
+            response = await self.acomplete(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                json_mode=json_mode,
+            )
+        except Exception as e:
+            yield StreamErrorEvent(error=str(e), recoverable=False)
+            return
+
+        raw = response.raw_response
+        tool_calls = []
+        if raw and hasattr(raw, "choices") and raw.choices:
+            msg = raw.choices[0].message
+            tool_calls = msg.tool_calls or []
+
+        for tc in tool_calls:
+            args = tc.function.arguments if tc.function else ""
+            parsed_args = self._parse_tool_call_arguments(
+                args,
+                tc.function.name if tc.function else "",
+            )
+            yield ToolCallEvent(
+                tool_use_id=getattr(tc, "id", ""),
+                tool_name=tc.function.name if tc.function else "",
+                tool_input=parsed_args,
+            )
+
+        if response.content:
+            yield TextDeltaEvent(content=response.content, snapshot=response.content)
+            yield TextEndEvent(full_text=response.content)
+
+        yield FinishEvent(
+            stop_reason=response.stop_reason or "stop",
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            model=response.model,
+        )
+
    async def stream(
        self,
        messages: list[dict[str, Any]],
@@ -762,9 +1440,40 @@ class LiteLLMProvider(LLMProvider):
            ToolCallEvent,
        )

+        # MiniMax currently fails in litellm's stream chunk parser for some
+        # responses (missing "id" in stream chunks). Use non-stream fallback.
+        if self._is_minimax_model():
+            async for event in self._stream_via_nonstream_completion(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                json_mode=json_mode,
+            ):
+                yield event
+            return
+
+        if (
+            tools
+            and self._is_openrouter_model()
+            and self.model in OPENROUTER_TOOL_COMPAT_MODEL_CACHE
+        ):
+            async for event in self._stream_via_openrouter_tool_compat(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+            ):
+                yield event
+            return
+
        full_messages: list[dict[str, Any]] = []
        if system:
-            full_messages.append({"role": "system", "content": system})
+            sys_msg: dict[str, Any] = {"role": "system", "content": system}
+            if _model_supports_cache_control(self.model):
+                sys_msg["cache_control"] = {"type": "ephemeral"}
+            full_messages.append(sys_msg)
        full_messages.extend(messages)

        # Codex Responses API requires an `instructions` field (system prompt).
@@ -829,9 +1538,26 @@ class LiteLLMProvider(LLMProvider):
                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]

                async for chunk in response:
-                    choice = chunk.choices[0] if chunk.choices else None
-                    if not choice:
+                    # Capture usage from the trailing usage-only chunk that
+                    # stream_options={"include_usage": True} sends with empty choices.
+                    if not chunk.choices:
+                        usage = getattr(chunk, "usage", None)
+                        if usage:
+                            input_tokens = getattr(usage, "prompt_tokens", 0) or 0
+                            output_tokens = getattr(usage, "completion_tokens", 0) or 0
+                            logger.debug(
+                                "[tokens] trailing usage chunk: input=%d output=%d model=%s",
+                                input_tokens,
+                                output_tokens,
+                                self.model,
+                            )
+                        else:
+                            logger.debug(
+                                "[tokens] empty-choices chunk with no usage (model=%s)",
+                                self.model,
+                            )
                        continue
+                    choice = chunk.choices[0]

                    delta = choice.delta

@@ -888,10 +1614,10 @@ class LiteLLMProvider(LLMProvider):
                    if choice.finish_reason:
                        stream_finish_reason = choice.finish_reason
                        for _idx, tc_data in sorted(tool_calls_acc.items()):
-                            try:
-                                parsed_args = json.loads(tc_data["arguments"])
-                            except (json.JSONDecodeError, KeyError):
-                                parsed_args = {"_raw": tc_data.get("arguments", "")}
+                            parsed_args = self._parse_tool_call_arguments(
+                                tc_data.get("arguments", ""),
+                                tc_data.get("name", ""),
+                            )
                            tail_events.append(
                                ToolCallEvent(
                                    tool_use_id=tc_data["id"],
@@ -904,19 +1630,91 @@ class LiteLLMProvider(LLMProvider):
                            tail_events.append(TextEndEvent(full_text=accumulated_text))

                        usage = getattr(chunk, "usage", None)
+                        logger.debug(
+                            "[tokens] finish-chunk raw usage: %r (type=%s)",
+                            usage,
+                            type(usage).__name__,
+                        )
+                        cached_tokens = 0
                        if usage:
                            input_tokens = getattr(usage, "prompt_tokens", 0) or 0
                            output_tokens = getattr(usage, "completion_tokens", 0) or 0
+                            _details = getattr(usage, "prompt_tokens_details", None)
+                            cached_tokens = (
+                                getattr(_details, "cached_tokens", 0) or 0
+                                if _details is not None
+                                else getattr(usage, "cache_read_input_tokens", 0) or 0
+                            )
+                            logger.debug(
+                                "[tokens] finish-chunk usage: "
+                                "input=%d output=%d cached=%d model=%s",
+                                input_tokens,
+                                output_tokens,
+                                cached_tokens,
+                                self.model,
+                            )

+                        logger.debug(
+                            "[tokens] finish event: input=%d output=%d cached=%d stop=%s model=%s",
+                            input_tokens,
+                            output_tokens,
+                            cached_tokens,
+                            choice.finish_reason,
+                            self.model,
+                        )
                        tail_events.append(
                            FinishEvent(
                                stop_reason=choice.finish_reason,
                                input_tokens=input_tokens,
                                output_tokens=output_tokens,
+                                cached_tokens=cached_tokens,
                                model=self.model,
                            )
                        )

+                # Fallback: LiteLLM strips usage from yielded chunks before
+                # returning them to us, but appends the original chunk (with
+                # usage intact) to response.chunks first.  Use LiteLLM's own
+                # calculate_total_usage() on that accumulated list.
+                if input_tokens == 0 and output_tokens == 0:
+                    try:
+                        from litellm.litellm_core_utils.streaming_handler import (
+                            calculate_total_usage,
+                        )
+
+                        _chunks = getattr(response, "chunks", None)
+                        if _chunks:
+                            _usage = calculate_total_usage(chunks=_chunks)
+                            input_tokens = _usage.prompt_tokens or 0
+                            output_tokens = _usage.completion_tokens or 0
+                            _details = getattr(_usage, "prompt_tokens_details", None)
+                            cached_tokens = (
+                                getattr(_details, "cached_tokens", 0) or 0
+                                if _details is not None
+                                else getattr(_usage, "cache_read_input_tokens", 0) or 0
+                            )
+                            logger.debug(
+                                "[tokens] post-loop chunks fallback:"
+                                " input=%d output=%d cached=%d model=%s",
+                                input_tokens,
+                                output_tokens,
+                                cached_tokens,
+                                self.model,
+                            )
+                            # Patch the FinishEvent already queued with 0 tokens
+                            for _i, _ev in enumerate(tail_events):
+                                if isinstance(_ev, FinishEvent) and _ev.input_tokens == 0:
+                                    tail_events[_i] = FinishEvent(
+                                        stop_reason=_ev.stop_reason,
+                                        input_tokens=input_tokens,
+                                        output_tokens=output_tokens,
+                                        cached_tokens=cached_tokens,
+                                        model=_ev.model,
+                                    )
+                                    break
+                    except Exception as _e:
+                        logger.debug("[tokens] chunks fallback failed: %s", _e)
+
                # Check whether the stream produced any real content.
                # (If text deltas were yielded above, has_content is True
                # and we skip the retry path — nothing was yielded in vain.)
@@ -1000,6 +1798,16 @@ class LiteLLMProvider(LLMProvider):
                return

            except Exception as e:
+                if self._should_use_openrouter_tool_compat(e, tools):
+                    OPENROUTER_TOOL_COMPAT_MODEL_CACHE.add(self.model)
+                    async for event in self._stream_via_openrouter_tool_compat(
+                        messages=messages,
+                        system=system,
+                        tools=tools or [],
+                        max_tokens=max_tokens,
+                    ):
+                        yield event
+                    return
                if _is_stream_transient_error(e) and attempt < RATE_LIMIT_MAX_RETRIES:
                    wait = _compute_retry_delay(attempt, exception=e)
                    logger.warning(
@@ -71,6 +71,7 @@ class FinishEvent:
    stop_reason: str = ""
    input_tokens: int = 0
    output_tokens: int = 0
+    cached_tokens: int = 0
    model: str = ""


@@ -1,4 +0,0 @@
-"""MCP servers for worker-bee."""
-
-# Don't auto-import servers to avoid double-import issues when running with -m
-__all__ = []
@@ -1,33 +1 @@
-"""Framework-level worker monitoring package.
-
-Provides the Worker Health Judge: a reusable secondary graph that attaches to
-any worker agent runtime and monitors its execution health via periodic log
-inspection. Emits structured EscalationTickets when degradation is detected.
-
-Usage::
-
-    from framework.monitoring import HEALTH_JUDGE_ENTRY_POINT, judge_goal, judge_graph
-    from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
-
-    # Register tools bound to the worker runtime's EventBus
-    monitoring_registry = ToolRegistry()
-    register_worker_monitoring_tools(monitoring_registry, worker_runtime._event_bus, storage_path)
-
-    # Load judge as secondary graph on the worker runtime
-    await worker_runtime.add_graph(
-        graph_id="judge",
-        graph=judge_graph,
-        goal=judge_goal,
-        entry_points={"health_check": HEALTH_JUDGE_ENTRY_POINT},
-        storage_subpath="graphs/judge",
-    )
-"""
-
-from .judge import HEALTH_JUDGE_ENTRY_POINT, judge_goal, judge_graph, judge_node
-
-__all__ = [
-    "HEALTH_JUDGE_ENTRY_POINT",
-    "judge_goal",
-    "judge_graph",
-    "judge_node",
-]
+"""Framework-level worker monitoring package."""
@@ -1,258 +0,0 @@
-"""Worker Health Judge — framework-level reusable monitoring graph.
-
-Attaches to any worker agent runtime as a secondary graph. Fires on a
-2-minute timer, reads the worker's session logs via ``get_worker_health_summary``,
-accumulates observations in a continuous conversation context, and emits a
-structured ``EscalationTicket`` when it detects a degradation pattern.
-
-Usage::
-
-    from framework.monitoring import judge_graph, judge_goal, HEALTH_JUDGE_ENTRY_POINT
-    from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
-
-    # Register tools bound to the worker runtime's event bus
-    monitoring_registry = ToolRegistry()
-    register_worker_monitoring_tools(
-        monitoring_registry, worker_runtime._event_bus, storage_path
-    )
-    monitoring_tools = list(monitoring_registry.get_tools().values())
-    monitoring_executor = monitoring_registry.get_executor()
-
-    # Load judge as secondary graph on the worker runtime
-    await worker_runtime.add_graph(
-        graph_id="judge",
-        graph=judge_graph,
-        goal=judge_goal,
-        entry_points={"health_check": HEALTH_JUDGE_ENTRY_POINT},
-        storage_subpath="graphs/judge",
-    )
-
-Design:
- ``isolation_level="isolated"`` — the judge has its own memory, not
-  polluting the worker's shared memory namespace.
- ``conversation_mode="continuous"`` — the judge's conversation carries
-  across timer ticks. The conversation IS the judge's memory. It tracks
-  trends by referring to its own prior messages ("Last check I saw 47
-  steps; now 52; 5 new steps, 3 RETRY").
- No shared memory keys. No external state files.
-"""
-
-from __future__ import annotations
-
-from framework.graph import Constraint, Goal, NodeSpec, SuccessCriterion
-from framework.graph.edge import AsyncEntryPointSpec, GraphSpec
-
-# ---------------------------------------------------------------------------
-# Goal
-# ---------------------------------------------------------------------------
-
-judge_goal = Goal(
-    id="worker-health-monitor",
-    name="Worker Health Monitor",
-    description=(
-        "Periodically assess the health of the worker agent by reading its "
-        "execution logs. Detect degradation patterns (excessive retries, "
-        "stalls, doom loops) and emit structured EscalationTickets when the "
-        "worker needs attention."
-    ),
-    success_criteria=[
-        SuccessCriterion(
-            id="accurate-detection",
-            description="Only escalates genuine degradation, not normal retry cycles",
-            metric="false_positive_rate",
-            target="low",
-            weight=0.5,
-        ),
-        SuccessCriterion(
-            id="timely-detection",
-            description="Detects genuine stalls within 2 timer ticks (≤4 minutes)",
-            metric="detection_latency_minutes",
-            target="<=4",
-            weight=0.5,
-        ),
-    ],
-    constraints=[
-        Constraint(
-            id="conservative-escalation",
-            description=(
-                "Do not escalate on a single bad verdict or a brief stall. "
-                "Require clear patterns (10+ consecutive bad verdicts or 4+ minute stall) "
-                "before creating a ticket."
-            ),
-            constraint_type="hard",
-            category="quality",
-        ),
-        Constraint(
-            id="complete-ticket",
-            description=(
-                "Every EscalationTicket must have all required fields filled. "
-                "Do not emit partial or placeholder tickets."
-            ),
-            constraint_type="hard",
-            category="correctness",
-        ),
-    ],
-)
-
-# ---------------------------------------------------------------------------
-# Node
-# ---------------------------------------------------------------------------
-
-judge_node = NodeSpec(
-    id="judge",
-    name="Worker Health Judge",
-    description=(
-        "Autonomous health monitor for worker agents. Reads execution logs "
-        "on each timer tick, compares to prior observations (via conversation "
-        "history), and emits a structured EscalationTicket when a genuine "
-        "degradation pattern is detected."
-    ),
-    node_type="event_loop",
-    client_facing=False,  # Autonomous monitor, not interactive
-    max_node_visits=0,  # Unbounded — runs on every timer tick
-    input_keys=[],
-    output_keys=["health_verdict"],
-    nullable_output_keys=["health_verdict"],
-    success_criteria=(
-        "A clear health verdict is produced each check: either 'healthy' with "
-        "a brief observation, or a complete EscalationTicket is emitted via "
-        "emit_escalation_ticket and health_verdict describes the issue."
-    ),
-    tools=[
-        "get_worker_health_summary",
-        "emit_escalation_ticket",
-    ],
-    system_prompt="""\
-You are the Worker Health Judge. You run every 2 minutes alongside a worker \
-agent to monitor its execution health.
-
-# Your Role
-
-You observe the worker's iteration patterns over time and escalate only when \
-you see genuine degradation — not normal retry cycles. Your conversation history \
-IS your memory. On each check, refer to your previous observations to track trends.
-
-# Check Procedure
-
-On each timer tick (every 2 minutes):
-
-## Step 1: Read health snapshot
-Call get_worker_health_summary() with no arguments to auto-discover the active \
-session. This returns:
- worker_agent_id: the worker's agent name — use this for ticket identity fields
- worker_graph_id: the worker's primary graph ID — use this for ticket identity fields
- session_id: the session being monitored — use this for worker_session_id in tickets
- total_steps: how many log steps have been recorded
- recent_verdicts: list of recent ACCEPT/RETRY/CONTINUE verdicts
- steps_since_last_accept: consecutive non-ACCEPT steps
- stall_minutes: wall-clock since last step (null if active)
- evidence_snippet: recent LLM output
-
-## Step 2: Compare to prior check
-Look at your conversation history. What was total_steps last time?
- If total_steps is UNCHANGED from prior check AND prior check was also unchanged:
-  → STALL confirmed (worker has produced no new iterations in 4+ minutes).
-  → Escalate with severity="high" or "critical" depending on stall duration.
- If total_steps increased: worker is making progress. Examine verdicts.
-
-## Step 3: Analyze verdict pattern
- Healthy: Mix of ACCEPT and RETRY, steps_since_last_accept < 5. No action.
- Warning: steps_since_last_accept is 5-9. Note it, no escalation yet.
- Degraded: steps_since_last_accept >= 10. Examine evidence_snippet.
-  - If evidence shows the agent is making real progress (complex reasoning,
-    exploring solutions, productive tool use): may be a hard problem. Note it.
-  - If evidence shows a loop (same error, same tool call, no new information):
-    → Escalate with severity="medium" or "high".
- Critical: steps_since_last_accept >= 20, OR stall_minutes >= 4.
-  → Escalate with severity="critical".
-
-## Step 4: Decide
-
-### If healthy:
-set_output("health_verdict", "healthy: <brief observation>")
-Done.
-
-### If escalating:
-Build an EscalationTicket JSON string with ALL required fields:
-{
-  "worker_agent_id": "<worker_agent_id from get_worker_health_summary>",
-  "worker_session_id": "<session_id from get_worker_health_summary>",
-  "worker_node_id": "<worker_graph_id from get_worker_health_summary>",
-  "worker_graph_id": "<worker_graph_id from get_worker_health_summary>",
-  "severity": "<low|medium|high|critical>",
-  "cause": "<what you observed — concrete, specific>",
-  "judge_reasoning": "<why you decided to escalate, not just dismiss>",
-  "suggested_action": "<what you recommend: restart, human review, etc.>",
-  "recent_verdicts": [<list from get_worker_health_summary>],
-  "total_steps_checked": <int>,
-  "steps_since_last_accept": <int>,
-  "stall_minutes": <float or null>,
-  "evidence_snippet": "<from get_worker_health_summary>"
-}
-
-Call: emit_escalation_ticket(ticket_json=<the JSON string above>)
-Then: set_output("health_verdict", "escalated: <one-line summary>")
-
-# Severity Guide
-
- low: Mild concern, worth noting. 5-9 consecutive bad verdicts.
- medium: Clear degradation pattern. 10-15 bad verdicts or brief stall (1-2 min).
- high: Serious issue. 15+ bad verdicts or stall 2-4 minutes or clear doom loop.
- critical: Worker is definitively stuck. 20+ bad verdicts or stall > 4 minutes.
-
-# Conservative Bias
-
-You MUST resist the urge to escalate prematurely. Worker agents naturally retry.
-A node may legitimately need 5-8 retries before succeeding. Do not escalate unless:
-1. The pattern is clear and sustained across your observation window, AND
-2. The evidence shows no genuine progress
-
-One missed escalation is less costly than two false alarms. The Queen will filter \
-further. But do not be passive — genuine stalls and doom loops must be caught.
-
-# Rules
- Never escalate on the FIRST check unless stall_minutes > 4
- Always call get_worker_health_summary FIRST before deciding anything
- All ticket fields are REQUIRED — do not submit partial tickets
- After any emit_escalation_ticket call, always set_output to complete the check
-""",
-)
-
-# ---------------------------------------------------------------------------
-# Entry Point
-# ---------------------------------------------------------------------------
-
-HEALTH_JUDGE_ENTRY_POINT = AsyncEntryPointSpec(
-    id="health_check",
-    name="Worker Health Check",
-    entry_node="judge",
-    trigger_type="timer",
-    trigger_config={
-        "interval_minutes": 2,
-        "run_immediately": True,  # Fire immediately to establish a baseline
-    },
-    isolation_level="isolated",  # Own memory namespace, not polluting worker's
-)
-
-# ---------------------------------------------------------------------------
-# Graph
-# ---------------------------------------------------------------------------
-
-judge_graph = GraphSpec(
-    id="judge-graph",
-    goal_id=judge_goal.id,
-    version="1.0.0",
-    entry_node="judge",
-    entry_points={"health_check": "judge"},
-    terminal_nodes=["judge"],  # Judge node can terminate after each check
-    pause_nodes=[],
-    nodes=[judge_node],
-    edges=[],
-    conversation_mode="continuous",  # Conversation persists across timer ticks
-    async_entry_points=[HEALTH_JUDGE_ENTRY_POINT],
-    loop_config={
-        "max_iterations": 10,  # One check shouldn't take many turns
-        "max_tool_calls_per_turn": 3,  # get_summary + optionally emit_ticket
-        "max_history_tokens": 16000,  # Compact — judge only needs recent context
-    },
-)
@@ -83,18 +83,18 @@ configure_logging(level="INFO", format="auto")
 - Compact single-line format (easy to stream/parse)
 - All trace context fields included automatically

-### Human-Readable Format (Development)
+### Human-Readable Format (Development / Terminal)

 ```
-[INFO    ] [trace:12345678 | exec:a1b2c3d4 | agent:sales-agent] Starting agent execution
-[INFO    ] [trace:12345678 | exec:a1b2c3d4 | agent:sales-agent] Processing input data [node_id:input-processor]
-[INFO    ] [trace:12345678 | exec:a1b2c3d4 | agent:sales-agent] LLM call completed [latency_ms:1250] [tokens_used:450]
+[INFO    ] [agent:sales-agent] Starting agent execution
+[INFO    ] [agent:sales-agent] Processing input data [node_id:input-processor]
+[INFO    ] [agent:sales-agent] LLM call completed [latency_ms:1250] [tokens_used:450]
 ```

 **Features:**
 - Color-coded log levels
- Shortened IDs for readability (first 8 chars)
- Context prefix shows trace correlation
+- Terminal output omits trace_id and execution_id for readability
+- For full traceability (e.g. debugging), use `ENV=production` to get JSON file logs with trace_id and execution_id

 ## Trace Context Fields

@@ -4,8 +4,9 @@ Structured logging with automatic trace context propagation.
 Key Features:
 - Zero developer friction: Standard logger.info() calls get automatic context
 - ContextVar-based propagation: Thread-safe and async-safe
- Dual output modes: JSON for production, human-readable for development
- Correlation IDs: trace_id follows entire request flow automatically
+- Dual output modes: JSON for production (full trace_id/execution_id), human-readable for terminal
+- Terminal omits trace_id/execution_id for readability
+- Use ENV=production for file logs with full traceability

 Architecture:
    Runtime.start_run() → Generates trace_id, sets context once
@@ -101,10 +102,11 @@ class StructuredFormatter(logging.Formatter):

 class HumanReadableFormatter(logging.Formatter):
    """
-    Human-readable formatter for development.
+    Human-readable formatter for development (terminal output).

-    Provides colorized logs with trace context for local debugging.
-    Includes trace_id prefix for correlation - AUTOMATIC!
+    Provides colorized logs for local debugging. Omits trace_id and execution_id
+    from the terminal for readability; use ENV=production (JSON file logs) when
+    traceability is needed.
    """

    COLORS = {
@@ -118,18 +120,11 @@ class HumanReadableFormatter(logging.Formatter):

    def format(self, record: logging.LogRecord) -> str:
        """Format log record as human-readable string."""
-        # Get trace context - AUTOMATIC!
+        # Get trace context; omit trace_id and execution_id in terminal for readability
        context = trace_context.get() or {}
-        trace_id = context.get("trace_id", "")
-        execution_id = context.get("execution_id", "")
        agent_id = context.get("agent_id", "")

-        # Build context prefix
        prefix_parts = []
-        if trace_id:
-            prefix_parts.append(f"trace:{trace_id[:8]}")
-        if execution_id:
-            prefix_parts.append(f"exec:{execution_id[-8:]}")
        if agent_id:
            prefix_parts.append(f"agent:{agent_id}")

@@ -148,8 +143,9 @@ class HumanReadableFormatter(logging.Formatter):
        if record_event is not None:
            event = f" [{record_event}]"

-        # Format message: [LEVEL] [trace context] message
-        return f"{color}[{level}]{reset} {context_prefix}{record.getMessage()}{event}"
+        timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S")
+        # Format message: TIMESTAMP [LEVEL] [trace context] message
+        return f"{timestamp} {color}[{level}]{reset} {context_prefix}{record.getMessage()}{event}"


 def configure_logging(
@@ -210,6 +206,10 @@ def configure_logging(
    root_logger.addHandler(handler)
    root_logger.setLevel(level.upper())

+    # Suppress noisy LiteLLM INFO logs (model/provider line + Provider List URL
+    # printed on every single completion call).  Warnings and errors still show.
+    logging.getLogger("LiteLLM").setLevel(logging.WARNING)
+
    # When in JSON mode, configure known third-party loggers to use JSON formatter
    # This ensures libraries like LiteLLM, httpcore also output clean JSON
    if format == "json":
@@ -51,11 +51,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
        action="store_true",
        help="Show detailed execution logs (steps, LLM calls, etc.)",
    )
-    run_parser.add_argument(
-        "--tui",
-        action="store_true",
-        help="Launch interactive terminal dashboard",
-    )
+
    run_parser.add_argument(
        "--model",
        "-m",
@@ -194,158 +190,6 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    shell_parser.set_defaults(func=cmd_shell)

    # tui command (interactive agent dashboard)
-    tui_parser = subparsers.add_parser(
-        "tui",
-        help="Launch interactive TUI dashboard",
-        description="Browse available agents and launch the terminal dashboard.",
-    )
-    tui_parser.add_argument(
-        "--model",
-        "-m",
-        type=str,
-        default=None,
-        help="LLM model to use (any LiteLLM-compatible name)",
-    )
-    tui_parser.set_defaults(func=cmd_tui)
-
-    # code command (Hive Coder — framework agent builder)
-    code_parser = subparsers.add_parser(
-        "code",
-        help="Launch Hive Coder to build agents",
-        description="Interactive agent builder. Describe what you want and Hive Coder builds it.",
-    )
-    code_parser.add_argument(
-        "--model",
-        "-m",
-        type=str,
-        default=None,
-        help="LLM model to use (any LiteLLM-compatible name)",
-    )
-    code_parser.set_defaults(func=cmd_code)
-
-    # sessions command group (checkpoint/resume management)
-    sessions_parser = subparsers.add_parser(
-        "sessions",
-        help="Manage agent sessions",
-        description="List, inspect, and manage agent execution sessions.",
-    )
-    sessions_subparsers = sessions_parser.add_subparsers(
-        dest="sessions_cmd",
-        help="Session management commands",
-    )
-
-    # sessions list
-    sessions_list_parser = sessions_subparsers.add_parser(
-        "list",
-        help="List agent sessions",
-        description="List all sessions for an agent.",
-    )
-    sessions_list_parser.add_argument(
-        "agent_path",
-        type=str,
-        help="Path to agent folder",
-    )
-    sessions_list_parser.add_argument(
-        "--status",
-        choices=["all", "active", "failed", "completed", "paused"],
-        default="all",
-        help="Filter by session status (default: all)",
-    )
-    sessions_list_parser.add_argument(
-        "--has-checkpoints",
-        action="store_true",
-        help="Show only sessions with checkpoints",
-    )
-    sessions_list_parser.set_defaults(func=cmd_sessions_list)
-
-    # sessions show
-    sessions_show_parser = sessions_subparsers.add_parser(
-        "show",
-        help="Show session details",
-        description="Display detailed information about a specific session.",
-    )
-    sessions_show_parser.add_argument(
-        "agent_path",
-        type=str,
-        help="Path to agent folder",
-    )
-    sessions_show_parser.add_argument(
-        "session_id",
-        type=str,
-        help="Session ID to inspect",
-    )
-    sessions_show_parser.add_argument(
-        "--json",
-        action="store_true",
-        help="Output as JSON",
-    )
-    sessions_show_parser.set_defaults(func=cmd_sessions_show)
-
-    # sessions checkpoints
-    sessions_checkpoints_parser = sessions_subparsers.add_parser(
-        "checkpoints",
-        help="List session checkpoints",
-        description="List all checkpoints for a session.",
-    )
-    sessions_checkpoints_parser.add_argument(
-        "agent_path",
-        type=str,
-        help="Path to agent folder",
-    )
-    sessions_checkpoints_parser.add_argument(
-        "session_id",
-        type=str,
-        help="Session ID",
-    )
-    sessions_checkpoints_parser.set_defaults(func=cmd_sessions_checkpoints)
-
-    # pause command
-    pause_parser = subparsers.add_parser(
-        "pause",
-        help="Pause running session",
-        description="Request graceful pause of a running agent session.",
-    )
-    pause_parser.add_argument(
-        "agent_path",
-        type=str,
-        help="Path to agent folder",
-    )
-    pause_parser.add_argument(
-        "session_id",
-        type=str,
-        help="Session ID to pause",
-    )
-    pause_parser.set_defaults(func=cmd_pause)
-
-    # resume command
-    resume_parser = subparsers.add_parser(
-        "resume",
-        help="Resume session from checkpoint",
-        description="Resume a paused or failed session from a checkpoint.",
-    )
-    resume_parser.add_argument(
-        "agent_path",
-        type=str,
-        help="Path to agent folder",
-    )
-    resume_parser.add_argument(
-        "session_id",
-        type=str,
-        help="Session ID to resume",
-    )
-    resume_parser.add_argument(
-        "--checkpoint",
-        "-c",
-        type=str,
-        help="Specific checkpoint ID to resume from (default: latest)",
-    )
-    resume_parser.add_argument(
-        "--tui",
-        action="store_true",
-        help="Resume in TUI dashboard mode",
-    )
-    resume_parser.set_defaults(func=cmd_resume)
-
    # setup-credentials command
    setup_creds_parser = subparsers.add_parser(
        "setup-credentials",
@@ -399,6 +243,8 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
        action="store_true",
        help="Open dashboard in browser after server starts",
    )
+    serve_parser.add_argument("--verbose", "-v", action="store_true", help="Enable INFO log level")
+    serve_parser.add_argument("--debug", action="store_true", help="Enable DEBUG log level")
    serve_parser.set_defaults(func=cmd_serve)

    # open command (serve + auto-open browser)
@@ -436,6 +282,8 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
        default=None,
        help="LLM model for preloaded agents",
    )
+    open_parser.add_argument("--verbose", "-v", action="store_true", help="Enable INFO log level")
+    open_parser.add_argument("--debug", action="store_true", help="Enable DEBUG log level")
    open_parser.set_defaults(func=cmd_open)


@@ -531,18 +379,18 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):

 def cmd_run(args: argparse.Namespace) -> int:
    """Run an exported agent."""
-    import logging

    from framework.credentials.models import CredentialError
+    from framework.observability import configure_logging
    from framework.runner import AgentRunner

    # Set logging level (quiet by default for cleaner output)
    if args.quiet:
-        logging.basicConfig(level=logging.ERROR, format="%(message)s")
+        configure_logging(level="ERROR")
    elif getattr(args, "verbose", False):
-        logging.basicConfig(level=logging.INFO, format="%(message)s")
+        configure_logging(level="INFO")
    else:
-        logging.basicConfig(level=logging.WARNING, format="%(message)s")
+        configure_logging(level="WARNING")

    # Load input context
    context = {}
@@ -577,128 +425,67 @@ def cmd_run(args: argparse.Namespace) -> int:
            )
            return 1

-    # Run the agent (with TUI or standard)
-    if getattr(args, "tui", False):
-        from framework.tui.app import AdenTUI
+    # Standard execution
+    # AgentRunner handles credential setup interactively when stdin is a TTY.
+    try:
+        runner = AgentRunner.load(
+            args.agent_path,
+            model=args.model,
+        )
+    except CredentialError as e:
+        print(f"\n{e}", file=sys.stderr)
+        return 1
+    except FileNotFoundError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1

-        async def run_with_tui():
-            try:
-                # Load runner inside the async loop to ensure strict loop affinity
-                # (only one load — avoids spawning duplicate MCP subprocesses)
-                # AgentRunner handles credential setup interactively when stdin is a TTY.
-                try:
-                    runner = AgentRunner.load(
-                        args.agent_path,
-                        model=args.model,
-                    )
-                except CredentialError as e:
-                    print(f"\n{e}", file=sys.stderr)
-                    return
-                except Exception as e:
-                    print(f"Error loading agent: {e}")
-                    return
+    # Prompt before starting (allows credential updates)
+    if sys.stdin.isatty() and not args.quiet:
+        runner = _prompt_before_start(args.agent_path, runner, args.model)
+        if runner is None:
+            return 1

-                # Prompt before starting (allows credential updates)
-                if sys.stdin.isatty():
-                    runner = _prompt_before_start(args.agent_path, runner, args.model)
-                    if runner is None:
-                        return
-
-                # Force setup inside the loop
-                if runner._agent_runtime is None:
-                    try:
-                        runner._setup()
-                    except CredentialError as e:
-                        print(f"\n{e}", file=sys.stderr)
-                        return
-
-                # Start runtime before TUI so it's ready for user input
-                if runner._agent_runtime and not runner._agent_runtime.is_running:
-                    await runner._agent_runtime.start()
-
-                app = AdenTUI(
-                    runner._agent_runtime,
-                    resume_session=getattr(args, "resume_session", None),
-                    resume_checkpoint=getattr(args, "checkpoint", None),
-                )
-
-                # TUI handles execution via ChatRepl — user submits input,
-                # ChatRepl calls runtime.trigger_and_wait(). No auto-launch.
-                await app.run_async()
-            except Exception as e:
-                import traceback
-
-                traceback.print_exc()
-                print(f"TUI error: {e}")
-
-            await runner.cleanup_async()
-            return None
-
-        asyncio.run(run_with_tui())
-        print("TUI session ended.")
-        return 0
-    else:
-        # Standard execution — load runner here (not shared with TUI path)
-        # AgentRunner handles credential setup interactively when stdin is a TTY.
-        try:
-            runner = AgentRunner.load(
-                args.agent_path,
-                model=args.model,
+    # Load session/checkpoint state for resume (headless mode)
+    session_state = None
+    resume_session = getattr(args, "resume_session", None)
+    checkpoint = getattr(args, "checkpoint", None)
+    if resume_session:
+        session_state = _load_resume_state(args.agent_path, resume_session, checkpoint)
+        if session_state is None:
+            print(
+                f"Error: Could not load session state for {resume_session}",
+                file=sys.stderr,
            )
-        except CredentialError as e:
-            print(f"\n{e}", file=sys.stderr)
            return 1
-        except FileNotFoundError as e:
-            print(f"Error: {e}", file=sys.stderr)
-            return 1
-
-        # Prompt before starting (allows credential updates)
-        if sys.stdin.isatty() and not args.quiet:
-            runner = _prompt_before_start(args.agent_path, runner, args.model)
-            if runner is None:
-                return 1
-
-        # Load session/checkpoint state for resume (headless mode)
-        session_state = None
-        resume_session = getattr(args, "resume_session", None)
-        checkpoint = getattr(args, "checkpoint", None)
-        if resume_session:
-            session_state = _load_resume_state(args.agent_path, resume_session, checkpoint)
-            if session_state is None:
-                print(
-                    f"Error: Could not load session state for {resume_session}",
-                    file=sys.stderr,
-                )
-                return 1
-            if not args.quiet:
-                resume_node = session_state.get("paused_at", "unknown")
-                if checkpoint:
-                    print(f"Resuming from checkpoint: {checkpoint}")
-                else:
-                    print(f"Resuming session: {resume_session}")
-                print(f"Resume point: {resume_node}")
-                print()
-
-        # Auto-inject user_id if the agent expects it but it's not provided
-        entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
-        if "user_id" in entry_input_keys and context.get("user_id") is None:
-            import os
-
-            context["user_id"] = os.environ.get("USER", "default_user")
-
        if not args.quiet:
-            info = runner.info()
-            print(f"Agent: {info.name}")
-            print(f"Goal: {info.goal_name}")
-            print(f"Steps: {info.node_count}")
-            print(f"Input: {json.dumps(context)}")
-            print()
-            print("=" * 60)
-            print("Executing agent...")
-            print("=" * 60)
+            resume_node = session_state.get("paused_at", "unknown")
+            if checkpoint:
+                print(f"Resuming from checkpoint: {checkpoint}")
+            else:
+                print(f"Resuming session: {resume_session}")
+            print(f"Resume point: {resume_node}")
            print()

-        result = asyncio.run(runner.run(context, session_state=session_state))
+    # Auto-inject user_id if the agent expects it but it's not provided
+    entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
+    if "user_id" in entry_input_keys and context.get("user_id") is None:
+        import os
+
+        context["user_id"] = os.environ.get("USER", "default_user")
+
+    if not args.quiet:
+        info = runner.info()
+        print(f"Agent: {info.name}")
+        print(f"Goal: {info.goal_name}")
+        print(f"Steps: {info.node_count}")
+        print(f"Input: {json.dumps(context)}")
+        print()
+        print("=" * 60)
+        print("Executing agent...")
+        print("=" * 60)
+        print()
+
+    result = asyncio.run(runner.run(context, session_state=session_state))

    # Format output
    output = {
@@ -959,6 +746,17 @@ def cmd_dispatch(args: argparse.Namespace) -> int:
    if args.agents:
        # Use specific agents
        for agent_name in args.agents:
+            # Guard against full paths: if the name contains path separators
+            # (e.g. "exports/my_agent"), it will be doubled with agents_dir
+            agent_name_path = Path(agent_name)
+            if len(agent_name_path.parts) > 1:
+                print(
+                    f"Error: --agents expects agent names, not paths. "
+                    f"Use: --agents {agent_name_path.name} "
+                    f"instead of --agents {agent_name}",
+                    file=sys.stderr,
+                )
+                return 1
            agent_path = agents_dir / agent_name
            if not _is_valid_agent_dir(agent_path):
                print(f"Agent not found: {agent_path}", file=sys.stderr)
@@ -1124,16 +922,12 @@ def _format_natural_language_to_json(

 def cmd_shell(args: argparse.Namespace) -> int:
    """Start an interactive agent session."""
-    import logging

    from framework.credentials.models import CredentialError
+    from framework.observability import configure_logging
    from framework.runner import AgentRunner

-    # Configure logging to show runtime visibility
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(message)s",  # Simple format for clean output
-    )
+    configure_logging(level="INFO")

    agents_dir = Path(args.agents_dir)

@@ -1364,154 +1158,6 @@ def _get_framework_agents_dir() -> Path:
    return Path(__file__).resolve().parent.parent / "agents"


-def _launch_agent_tui(
-    agent_path: str | Path,
-    model: str | None = None,
-) -> int:
-    """Load an agent and launch the TUI. Shared by cmd_tui and cmd_code."""
-    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
-    from framework.tui.app import AdenTUI
-
-    async def run_with_tui():
-        # AgentRunner handles credential setup interactively when stdin is a TTY.
-        try:
-            runner = AgentRunner.load(
-                agent_path,
-                model=model,
-            )
-        except CredentialError as e:
-            print(f"\n{e}", file=sys.stderr)
-            return
-        except Exception as e:
-            print(f"Error loading agent: {e}")
-            return
-
-        if runner._agent_runtime is None:
-            try:
-                runner._setup()
-            except CredentialError as e:
-                print(f"\n{e}", file=sys.stderr)
-                return
-
-        if runner._agent_runtime and not runner._agent_runtime.is_running:
-            await runner._agent_runtime.start()
-
-        app = AdenTUI(runner._agent_runtime)
-        try:
-            await app.run_async()
-        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
-            print(f"TUI error: {e}")
-
-        await runner.cleanup_async()
-
-    asyncio.run(run_with_tui())
-    print("TUI session ended.")
-    return 0
-
-
-def cmd_tui(args: argparse.Namespace) -> int:
-    """Launch the interactive TUI dashboard with in-app agent picker."""
-    import logging
-
-    logging.basicConfig(level=logging.WARNING, format="%(message)s")
-
-    from framework.tui.app import AdenTUI
-
-    async def run_tui():
-        app = AdenTUI(
-            model=args.model,
-        )
-        await app.run_async()
-
-    asyncio.run(run_tui())
-    print("TUI session ended.")
-    return 0
-
-
-def cmd_code(args: argparse.Namespace) -> int:
-    """Launch Hive Coder with multi-graph support.
-
-    Unlike ``_launch_agent_tui``, this sets up graph lifecycle tools and
-    assigns ``graph_id="hive_coder"`` so the coder can load, supervise,
-    and restart secondary agent graphs within the same session.
-    """
-    import logging
-
-    logging.basicConfig(level=logging.WARNING, format="%(message)s")
-
-    framework_agents_dir = _get_framework_agents_dir()
-    hive_coder_path = framework_agents_dir / "hive_coder"
-
-    if not (hive_coder_path / "agent.py").exists():
-        print("Error: Hive Coder agent not found.", file=sys.stderr)
-        return 1
-
-    # Ensure framework agents dir is on sys.path for import
-    fa_str = str(framework_agents_dir)
-    if fa_str not in sys.path:
-        sys.path.insert(0, fa_str)
-
-    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
-    from framework.tools.session_graph_tools import register_graph_tools
-    from framework.tui.app import AdenTUI
-
-    async def run_with_tui():
-        try:
-            runner = AgentRunner.load(hive_coder_path, model=args.model)
-        except CredentialError as e:
-            print(f"\n{e}", file=sys.stderr)
-            return
-        except Exception as e:
-            print(f"Error loading agent: {e}")
-            return
-
-        if runner._agent_runtime is None:
-            try:
-                runner._setup()
-            except CredentialError as e:
-                print(f"\n{e}", file=sys.stderr)
-                return
-
-        runtime = runner._agent_runtime
-
-        # -- Multi-graph setup --
-        # Tag the primary graph so events carry graph_id="hive_coder"
-        runtime._graph_id = "hive_coder"
-        runtime._active_graph_id = "hive_coder"
-
-        # Register graph lifecycle tools (load_agent, unload_agent, etc.)
-        register_graph_tools(runner._tool_registry, runtime)
-
-        # Refresh tool schemas AND executor so streams see the new tools.
-        # The executor closure references the registry dict by ref, but
-        # refreshing both is robust against any copy-on-read behavior.
-        runtime._tools = list(runner._tool_registry.get_tools().values())
-        runtime._tool_executor = runner._tool_registry.get_executor()
-
-        if not runtime.is_running:
-            await runtime.start()
-
-        app = AdenTUI(runtime)
-        try:
-            await app.run_async()
-        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
-            print(f"TUI error: {e}")
-
-        await runner.cleanup_async()
-
-    asyncio.run(run_with_tui())
-    print("TUI session ended.")
-    return 0
-
-
 def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
    """Extract name and description from a Python-based agent's config.py.

@@ -1864,56 +1510,6 @@ def _interactive_multi(agents_dir: Path) -> int:
    return 0


-def cmd_sessions_list(args: argparse.Namespace) -> int:
-    """List agent sessions."""
-    print("⚠ Sessions list command not yet implemented")
-    print("This will be available once checkpoint infrastructure is complete.")
-    print(f"\nAgent: {args.agent_path}")
-    print(f"Status filter: {args.status}")
-    print(f"Has checkpoints: {args.has_checkpoints}")
-    return 1
-
-
-def cmd_sessions_show(args: argparse.Namespace) -> int:
-    """Show detailed session information."""
-    print("⚠ Session show command not yet implemented")
-    print("This will be available once checkpoint infrastructure is complete.")
-    print(f"\nAgent: {args.agent_path}")
-    print(f"Session: {args.session_id}")
-    return 1
-
-
-def cmd_sessions_checkpoints(args: argparse.Namespace) -> int:
-    """List checkpoints for a session."""
-    print("⚠ Session checkpoints command not yet implemented")
-    print("This will be available once checkpoint infrastructure is complete.")
-    print(f"\nAgent: {args.agent_path}")
-    print(f"Session: {args.session_id}")
-    return 1
-
-
-def cmd_pause(args: argparse.Namespace) -> int:
-    """Pause a running session."""
-    print("⚠ Pause command not yet implemented")
-    print("This will be available once executor pause integration is complete.")
-    print(f"\nAgent: {args.agent_path}")
-    print(f"Session: {args.session_id}")
-    return 1
-
-
-def cmd_resume(args: argparse.Namespace) -> int:
-    """Resume a session from checkpoint."""
-    print("⚠ Resume command not yet implemented")
-    print("This will be available once checkpoint resume integration is complete.")
-    print(f"\nAgent: {args.agent_path}")
-    print(f"Session: {args.session_id}")
-    if args.checkpoint:
-        print(f"Checkpoint: {args.checkpoint}")
-    if args.tui:
-        print("Mode: TUI")
-    return 1
-
-
 def cmd_setup_credentials(args: argparse.Namespace) -> int:
    """Interactive credential setup for an agent."""
    from framework.credentials.setup import CredentialSetupSession
@@ -2029,18 +1625,18 @@ def _build_frontend() -> bool:

 def cmd_serve(args: argparse.Namespace) -> int:
    """Start the HTTP API server."""
-    import logging

    from aiohttp import web

    _build_frontend()

+    from framework.observability import configure_logging
    from framework.server.app import create_app

-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-    )
+    if getattr(args, "debug", False):
+        configure_logging(level="DEBUG")
+    else:
+        configure_logging(level="INFO")

    model = getattr(args, "model", None)
    app = create_app(model=model)
@@ -68,6 +68,7 @@ class MCPClient:
        self._read_stream = None
        self._write_stream = None
        self._stdio_context = None  # Context manager for stdio_client
+        self._errlog_handle = None  # Track errlog file handle for cleanup
        self._http_client: httpx.Client | None = None
        self._tools: dict[str, MCPTool] = {}
        self._connected = False
@@ -200,7 +201,8 @@ class MCPClient:
                        if os.name == "nt":
                            errlog = sys.stderr
                        else:
-                            errlog = open(os.devnull, "w")  # noqa: SIM115
+                            self._errlog_handle = open(os.devnull, "w")
+                            errlog = self._errlog_handle
                        self._stdio_context = stdio_client(server_params, errlog=errlog)
                        (
                            self._read_stream,
@@ -475,6 +477,15 @@ class MCPClient:
        finally:
            self._stdio_context = None

+        # Third: close errlog file handle if we opened one
+        if self._errlog_handle is not None:
+            try:
+                self._errlog_handle.close()
+            except Exception as e:
+                logger.debug(f"Error closing errlog handle: {e}")
+            finally:
+                self._errlog_handle = None
+
    def disconnect(self) -> None:
        """Disconnect from the MCP server."""
        # Clean up persistent STDIO connection
@@ -545,6 +556,7 @@ class MCPClient:
            self._write_stream = None
            self._loop = None
            self._loop_thread = None
+            self._errlog_handle = None

        # Clean up HTTP client
        if self._http_client:
@@ -9,14 +9,13 @@ from datetime import UTC
 from pathlib import Path
 from typing import TYPE_CHECKING, Any

-from framework.config import get_hive_config, get_preferred_model
+from framework.config import get_hive_config, get_max_context_tokens, get_preferred_model
 from framework.credentials.validation import (
    ensure_credential_key_env as _ensure_credential_key_env,
 )
 from framework.graph import Goal
 from framework.graph.edge import (
    DEFAULT_MAX_TOKENS,
-    AsyncEntryPointSpec,
    EdgeCondition,
    EdgeSpec,
    GraphSpec,
@@ -29,6 +28,7 @@ from framework.runner.tool_registry import ToolRegistry
 from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
 from framework.runtime.execution_stream import EntryPointSpec
 from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.tools.flowchart_utils import generate_fallback_flowchart

 if TYPE_CHECKING:
    from framework.runner.protocol import AgentMessage, CapabilityResponse
@@ -517,6 +517,41 @@ def get_codex_account_id() -> str | None:
    return None


+# ---------------------------------------------------------------------------
+# Kimi Code subscription token helpers
+# ---------------------------------------------------------------------------
+
+
+def get_kimi_code_token() -> str | None:
+    """Get the API key from a Kimi Code CLI installation.
+
+    Reads the API key from ``~/.kimi/config.toml``, which is created when
+    the user runs ``kimi /login`` in the Kimi Code CLI.
+
+    Returns:
+        The API key if available, None otherwise.
+    """
+    import tomllib
+
+    config_path = Path.home() / ".kimi" / "config.toml"
+    if not config_path.exists():
+        return None
+
+    try:
+        with open(config_path, "rb") as f:
+            config = tomllib.load(f)
+        providers = config.get("providers", {})
+        # kimi-cli stores credentials under providers.kimi-for-coding
+        for provider_cfg in providers.values():
+            if isinstance(provider_cfg, dict):
+                key = provider_cfg.get("api_key")
+                if key:
+                    return key
+    except Exception:
+        pass
+    return None
+
+
@dataclass
 class AgentInfo:
    """Information about an exported agent."""
@@ -535,9 +570,6 @@ class AgentInfo:
    constraints: list[dict]
    required_tools: list[str]
    has_tools_module: bool
-    # Multi-entry-point support
-    async_entry_points: list[dict] = field(default_factory=list)
-    is_multi_entry_point: bool = False


@dataclass
@@ -595,22 +627,6 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
        )
        edges.append(edge)

-    # Build AsyncEntryPointSpec objects for multi-entry-point support
-    async_entry_points = []
-    for aep_data in graph_data.get("async_entry_points", []):
-        async_entry_points.append(
-            AsyncEntryPointSpec(
-                id=aep_data["id"],
-                name=aep_data.get("name", aep_data["id"]),
-                entry_node=aep_data["entry_node"],
-                trigger_type=aep_data.get("trigger_type", "manual"),
-                trigger_config=aep_data.get("trigger_config", {}),
-                isolation_level=aep_data.get("isolation_level", "shared"),
-                priority=aep_data.get("priority", 0),
-                max_concurrent=aep_data.get("max_concurrent", 10),
-            )
-        )
-
    # Build GraphSpec
    graph = GraphSpec(
        id=graph_data.get("id", "agent-graph"),
@@ -618,7 +634,6 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
        version=graph_data.get("version", "1.0.0"),
        entry_node=graph_data.get("entry_node", ""),
        entry_points=graph_data.get("entry_points", {}),  # Support pause/resume architecture
-        async_entry_points=async_entry_points,  # Support multi-entry-point agents
        terminal_nodes=graph_data.get("terminal_nodes", []),
        pause_nodes=graph_data.get("pause_nodes", []),  # Support pause/resume architecture
        nodes=nodes,
@@ -770,8 +785,6 @@ class AgentRunner:

        # AgentRuntime — unified execution path for all agents
        self._agent_runtime: AgentRuntime | None = None
-        self._uses_async_entry_points = self.graph.has_async_entry_points()
-
        # Pre-load validation: structural checks + credentials.
        # Fails fast with actionable guidance — no MCP noise on screen.
        run_preload_validation(
@@ -891,10 +904,32 @@ class AgentRunner:

            if agent_config and hasattr(agent_config, "max_tokens"):
                max_tokens = agent_config.max_tokens
+                logger.info(
+                    "Agent default_config overrides max_tokens: %d "
+                    "(configuration.json value ignored)",
+                    max_tokens,
+                )
            else:
                hive_config = get_hive_config()
                max_tokens = hive_config.get("llm", {}).get("max_tokens", DEFAULT_MAX_TOKENS)

+            # Resolve max_context_tokens with priority:
+            #   1. agent loop_config["max_context_tokens"] (explicit, wins silently)
+            #   2. agent default_config.max_context_tokens (logged)
+            #   3. configuration.json llm.max_context_tokens
+            #   4. hardcoded default (32_000)
+            agent_loop_config: dict = dict(getattr(agent_module, "loop_config", {}))
+            if "max_context_tokens" not in agent_loop_config:
+                if agent_config and hasattr(agent_config, "max_context_tokens"):
+                    agent_loop_config["max_context_tokens"] = agent_config.max_context_tokens
+                    logger.info(
+                        "Agent default_config overrides max_context_tokens: %d"
+                        " (configuration.json value ignored)",
+                        agent_config.max_context_tokens,
+                    )
+                else:
+                    agent_loop_config["max_context_tokens"] = get_max_context_tokens()
+
            # Read intro_message from agent metadata (shown on TUI load)
            agent_metadata = getattr(agent_module, "metadata", None)
            intro_message = ""
@@ -908,13 +943,12 @@ class AgentRunner:
                "version": "1.0.0",
                "entry_node": getattr(agent_module, "entry_node", nodes[0].id),
                "entry_points": getattr(agent_module, "entry_points", {}),
-                "async_entry_points": getattr(agent_module, "async_entry_points", []),
                "terminal_nodes": getattr(agent_module, "terminal_nodes", []),
                "pause_nodes": getattr(agent_module, "pause_nodes", []),
                "nodes": nodes,
                "edges": edges,
                "max_tokens": max_tokens,
-                "loop_config": getattr(agent_module, "loop_config", {}),
+                "loop_config": agent_loop_config,
            }
            # Only pass optional fields if explicitly defined by the agent module
            conversation_mode = getattr(agent_module, "conversation_mode", None)
@@ -926,6 +960,12 @@ class AgentRunner:

            graph = GraphSpec(**graph_kwargs)

+            # Generate flowchart.json if missing (for template/legacy agents)
+            generate_fallback_flowchart(graph, goal, agent_path)
+            # Read skill configuration from agent module
+            agent_default_skills = getattr(agent_module, "default_skills", None)
+            agent_skills = getattr(agent_module, "skills", None)
+
            # Read runtime config (webhook settings, etc.) if defined
            agent_runtime_config = getattr(agent_module, "runtime_config", None)

@@ -937,7 +977,7 @@ class AgentRunner:
            configure_fn = getattr(agent_module, "configure_for_account", None)
            list_accts_fn = getattr(agent_module, "list_connected_accounts", None)

-            return cls(
+            runner = cls(
                agent_path=agent_path,
                graph=graph,
                goal=goal,
@@ -953,19 +993,31 @@ class AgentRunner:
                list_accounts=list_accts_fn,
                credential_store=credential_store,
            )
+            # Stash skill config for use in _setup()
+            runner._agent_default_skills = agent_default_skills
+            runner._agent_skills = agent_skills
+            return runner

        # Fallback: load from agent.json (legacy JSON-based agents)
        agent_json_path = agent_path / "agent.json"
        if not agent_json_path.is_file():
            raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")

-        content = agent_json_path.read_text(encoding="utf-8").strip()
-        if not content:
-            raise FileNotFoundError(f"agent.json is empty: {agent_json_path}")
+        with open(agent_json_path, encoding="utf-8") as f:
+            export_data = f.read()

-        graph, goal = load_agent_export(content)
+        if not export_data.strip():
+            raise ValueError(f"Empty agent export file: {agent_json_path}")

-        return cls(
+        try:
+            graph, goal = load_agent_export(export_data)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
+
+        # Generate flowchart.json if missing (for legacy JSON-based agents)
+        generate_fallback_flowchart(graph, goal, agent_path)
+
+        runner = cls(
            agent_path=agent_path,
            graph=graph,
            goal=goal,
@@ -976,6 +1028,9 @@ class AgentRunner:
            skip_credential_validation=skip_credential_validation or False,
            credential_store=credential_store,
        )
+        runner._agent_default_skills = None
+        runner._agent_skills = None
+        return runner

    def register_tool(
        self,
@@ -1099,6 +1154,7 @@ class AgentRunner:
            llm_config = config.get("llm", {})
            use_claude_code = llm_config.get("use_claude_code_subscription", False)
            use_codex = llm_config.get("use_codex_subscription", False)
+            use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
            api_base = llm_config.get("api_base")

            api_key = None
@@ -1114,6 +1170,12 @@ class AgentRunner:
                if not api_key:
                    print("Warning: Codex subscription configured but no token found.")
                    print("Run 'codex' to authenticate, then try again.")
+            elif use_kimi_code:
+                # Get API key from Kimi Code CLI config (~/.kimi/config.toml)
+                api_key = get_kimi_code_token()
+                if not api_key:
+                    print("Warning: Kimi Code subscription configured but no key found.")
+                    print("Run 'kimi /login' to authenticate, then try again.")

            if api_key and use_claude_code:
                # Use litellm's built-in Anthropic OAuth support.
@@ -1144,6 +1206,14 @@ class AgentRunner:
                    store=False,
                    allowed_openai_params=["store"],
                )
+            elif api_key and use_kimi_code:
+                # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
+                # The api_base is set automatically by LiteLLMProvider for kimi/ models.
+                self._llm = LiteLLMProvider(
+                    model=self.model,
+                    api_key=api_key,
+                    api_base=api_base,
+                )
            else:
                # Local models (e.g. Ollama) don't need an API key
                if self._is_local_model(self.model):
@@ -1270,6 +1340,19 @@ class AgentRunner:
        except Exception:
            pass  # Best-effort — agent works without account info

+        # Skill configuration — the runtime handles discovery, loading, and
+        # prompt rasterization.  The runner just builds the config.
+        from framework.skills.config import SkillsConfig
+        from framework.skills.manager import SkillsManagerConfig
+
+        skills_manager_config = SkillsManagerConfig(
+            skills_config=SkillsConfig.from_agent_vars(
+                default_skills=getattr(self, "_agent_default_skills", None),
+                skills=getattr(self, "_agent_skills", None),
+            ),
+            project_root=self.agent_path,
+        )
+
        self._setup_agent_runtime(
            tools,
            tool_executor,
@@ -1277,6 +1360,7 @@ class AgentRunner:
            accounts_data=accounts_data,
            tool_provider_map=tool_provider_map,
            event_bus=event_bus,
+            skills_manager_config=skills_manager_config,
        )

    def _get_api_key_env_var(self, model: str) -> str | None:
@@ -1297,6 +1381,8 @@ class AgentRunner:
            return "MISTRAL_API_KEY"
        elif model_lower.startswith("groq/"):
            return "GROQ_API_KEY"
+        elif model_lower.startswith("openrouter/"):
+            return "OPENROUTER_API_KEY"
        elif self._is_local_model(model_lower):
            return None  # Local models don't need an API key
        elif model_lower.startswith("azure/"):
@@ -1307,6 +1393,12 @@ class AgentRunner:
            return "REPLICATE_API_KEY"
        elif model_lower.startswith("together/"):
            return "TOGETHER_API_KEY"
+        elif model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            return "MINIMAX_API_KEY"
+        elif model_lower.startswith("kimi/"):
+            return "KIMI_API_KEY"
+        elif model_lower.startswith("hive/"):
+            return "HIVE_API_KEY"
        else:
            # Default: assume OpenAI-compatible
            return "OPENAI_API_KEY"
@@ -1325,6 +1417,12 @@ class AgentRunner:
        cred_id = None
        if model_lower.startswith("anthropic/") or model_lower.startswith("claude"):
            cred_id = "anthropic"
+        elif model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            cred_id = "minimax"
+        elif model_lower.startswith("kimi/"):
+            cred_id = "kimi"
+        elif model_lower.startswith("hive/"):
+            cred_id = "hive"
        # Add more mappings as providers are added to LLM_CREDENTIALS

        if cred_id is None:
@@ -1364,23 +1462,10 @@ class AgentRunner:
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
        event_bus=None,
+        skills_manager_config=None,
    ) -> None:
        """Set up multi-entry-point execution using AgentRuntime."""
-        # Convert AsyncEntryPointSpec to EntryPointSpec for AgentRuntime
        entry_points = []
-        for async_ep in self.graph.async_entry_points:
-            ep = EntryPointSpec(
-                id=async_ep.id,
-                name=async_ep.name,
-                entry_node=async_ep.entry_node,
-                trigger_type=async_ep.trigger_type,
-                trigger_config=async_ep.trigger_config,
-                isolation_level=async_ep.isolation_level,
-                priority=async_ep.priority,
-                max_concurrent=async_ep.max_concurrent,
-                max_resurrections=async_ep.max_resurrections,
-            )
-            entry_points.append(ep)

        # Always create a primary entry point for the graph's entry node.
        # For multi-entry-point agents this ensures the primary path (e.g.
@@ -1437,26 +1522,37 @@ class AgentRunner:
            accounts_data=accounts_data,
            tool_provider_map=tool_provider_map,
            event_bus=event_bus,
+            skills_manager_config=skills_manager_config,
        )

        # Pass intro_message through for TUI display
        self._agent_runtime.intro_message = self.intro_message

+    # ------------------------------------------------------------------
+    # Execution modes
+    #
+    # run()              – One-shot, blocking execution for worker agents
+    #                      (headless CLI via ``hive run``). Validates, runs
+    #                      the graph to completion, and returns the result.
+    #
+    # start() / trigger() – Long-lived runtime for the frontend (queen).
+    #                      start() boots the runtime; trigger() sends
+    #                      non-blocking execution requests. Used by the
+    #                      server session manager and API routes.
+    # ------------------------------------------------------------------
+
    async def run(
        self,
        input_data: dict | None = None,
        session_state: dict | None = None,
        entry_point_id: str | None = None,
    ) -> ExecutionResult:
-        """
-        Execute the agent with given input data.
+        """One-shot execution for worker agents (headless CLI).

-        Validates credentials before execution. If any required credentials
-        are missing, returns an error result with instructions on how to
-        provide them.
+        Validates credentials, runs the graph to completion, and returns
+        the result. Used by ``hive run`` and programmatic callers.

-        For single-entry-point agents, this is the standard execution path.
-        For multi-entry-point agents, you can optionally specify which entry point to use.
+        For the frontend (queen), use start() + trigger() instead.

        Args:
            input_data: Input data for the agent (e.g., {"lead_id": "123"})
@@ -1582,7 +1678,12 @@ class AgentRunner:
    # === Runtime API ===

    async def start(self) -> None:
-        """Start the agent runtime."""
+        """Boot the agent runtime for the frontend (queen).
+
+        Pair with trigger() to send execution requests. Used by the
+        server session manager. For headless worker agents, use run()
+        instead.
+        """
        if self._agent_runtime is None:
            self._setup()

@@ -1599,10 +1700,10 @@ class AgentRunner:
        input_data: dict[str, Any],
        correlation_id: str | None = None,
    ) -> str:
-        """
-        Trigger execution at a specific entry point (non-blocking).
+        """Send a non-blocking execution request to a running runtime.

-        Returns execution ID for tracking.
+        Used by the server API routes after start(). For headless
+        worker agents, use run() instead.

        Args:
            entry_point_id: Which entry point to trigger
@@ -1687,19 +1788,6 @@ class AgentRunner:
            for edge in self.graph.edges
        ]

-        # Build async entry points info
-        async_entry_points_info = [
-            {
-                "id": ep.id,
-                "name": ep.name,
-                "entry_node": ep.entry_node,
-                "trigger_type": ep.trigger_type,
-                "isolation_level": ep.isolation_level,
-                "max_concurrent": ep.max_concurrent,
-            }
-            for ep in self.graph.async_entry_points
-        ]
-
        return AgentInfo(
            name=self.graph.id,
            description=self.graph.description,
@@ -1726,8 +1814,6 @@ class AgentRunner:
            ],
            required_tools=sorted(required_tools),
            has_tools_module=(self.agent_path / "tools.py").exists(),
-            async_entry_points=async_entry_points_info,
-            is_multi_entry_point=self._uses_async_entry_points,
        )

    def validate(self) -> ValidationResult:
@@ -2042,18 +2128,6 @@ Respond with JSON only:
                trigger_type="manual",
                isolation_level="shared",
            )
-        for aep in runner.graph.async_entry_points:
-            entry_points[aep.id] = EntryPointSpec(
-                id=aep.id,
-                name=aep.name,
-                entry_node=aep.entry_node,
-                trigger_type=aep.trigger_type,
-                trigger_config=aep.trigger_config,
-                isolation_level=aep.isolation_level,
-                priority=aep.priority,
-                max_concurrent=aep.max_concurrent,
-            )
-
        await runtime.add_graph(
            graph_id=gid,
            graph=runner.graph,
@@ -455,11 +455,23 @@ class ToolRegistry:

        for server_config in server_list:
            server_config = self._resolve_mcp_server_config(server_config, base_dir)
-            try:
-                self.register_mcp_server(server_config)
-            except Exception as e:
-                name = server_config.get("name", "unknown")
-                logger.warning(f"Failed to register MCP server '{name}': {e}")
+            for _attempt in range(2):
+                try:
+                    self.register_mcp_server(server_config)
+                    break
+                except Exception as e:
+                    name = server_config.get("name", "unknown")
+                    if _attempt == 0:
+                        logger.warning(
+                            "MCP server '%s' failed to register, retrying in 2s: %s",
+                            name,
+                            e,
+                        )
+                        import time
+
+                        time.sleep(2)
+                    else:
+                        logger.warning("MCP server '%s' failed after retry: %s", name, e)

        # Snapshot credential files and ADEN_API_KEY so we can detect mid-session changes
        self._mcp_cred_snapshot = self._snapshot_credentials()
@@ -454,11 +454,11 @@ An agent has requested handoff to the Hive Coder (via the `escalate` synthetic t

 ## Worker Health Monitoring

-These events form the **judge → queen → operator** escalation pipeline.
+These events form the **queen → operator** escalation pipeline.

 ### `worker_escalation_ticket`

-The Worker Health Judge has detected a degradation pattern and is escalating to the Queen.
+A worker degradation pattern has been detected and is being escalated to the Queen.

 | Data Field | Type   | Description                          |
 | ---------- | ------ | ------------------------------------ |
@@ -8,6 +8,7 @@ while preserving the goal-driven approach.
 import asyncio
 import logging
 import time
+import uuid
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -28,6 +29,7 @@ if TYPE_CHECKING:
    from framework.graph.edge import GraphSpec
    from framework.graph.goal import Goal
    from framework.llm.provider import LLMProvider, Tool
+    from framework.skills.manager import SkillsManagerConfig

 logger = logging.getLogger(__name__)

@@ -131,6 +133,10 @@ class AgentRuntime:
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
        event_bus: "EventBus | None" = None,
+        skills_manager_config: "SkillsManagerConfig | None" = None,
+        # Deprecated — pass skills_manager_config instead.
+        skills_catalog_prompt: str = "",
+        protocols_prompt: str = "",
    ):
        """
        Initialize agent runtime.
@@ -152,7 +158,13 @@ class AgentRuntime:
            event_bus: Optional external EventBus. If provided, the runtime shares
                this bus instead of creating its own. Used by SessionManager to
                share a single bus between queen, worker, and judge.
+            skills_manager_config: Skill configuration — the runtime owns
+                discovery, loading, and prompt renderation internally.
+            skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
+            protocols_prompt: Deprecated. Pre-rendered operational protocols.
        """
+        from framework.skills.manager import SkillsManager
+
        self.graph = graph
        self.goal = goal
        self._config = config or AgentRuntimeConfig()
@@ -160,6 +172,29 @@ class AgentRuntime:
        self._checkpoint_config = checkpoint_config
        self.accounts_prompt = accounts_prompt

+        # --- Skill lifecycle: runtime owns the SkillsManager ---
+        if skills_manager_config is not None:
+            # New path: config-driven, runtime handles loading
+            self._skills_manager = SkillsManager(skills_manager_config)
+            self._skills_manager.load()
+        elif skills_catalog_prompt or protocols_prompt:
+            # Legacy path: caller passed pre-rendered strings
+            import warnings
+
+            warnings.warn(
+                "Passing pre-rendered skills_catalog_prompt/protocols_prompt "
+                "is deprecated. Pass skills_manager_config instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            self._skills_manager = SkillsManager.from_precomputed(
+                skills_catalog_prompt, protocols_prompt
+            )
+        else:
+            # Bare constructor: auto-load defaults
+            self._skills_manager = SkillsManager()
+            self._skills_manager.load()
+
        # Primary graph identity
        self._graph_id: str = graph_id or "primary"

@@ -215,6 +250,18 @@ class AgentRuntime:
        # Optional greeting shown to user on TUI load (set by AgentRunner)
        self.intro_message: str = ""

+    # ------------------------------------------------------------------
+    # Skill prompt accessors (read by ExecutionStream constructors)
+    # ------------------------------------------------------------------
+
+    @property
+    def skills_catalog_prompt(self) -> str:
+        return self._skills_manager.skills_catalog_prompt
+
+    @property
+    def protocols_prompt(self) -> str:
+        return self._skills_manager.protocols_prompt
+
    def register_entry_point(self, spec: EntryPointSpec) -> None:
        """
        Register a named entry point for the agent.
@@ -292,6 +339,8 @@ class AgentRuntime:
                    accounts_prompt=self._accounts_prompt,
                    accounts_data=self._accounts_data,
                    tool_provider_map=self._tool_provider_map,
+                    skills_catalog_prompt=self.skills_catalog_prompt,
+                    protocols_prompt=self.protocols_prompt,
                )
                await stream.start()
                self._streams[ep_id] = stream
@@ -349,7 +398,7 @@ class AgentRuntime:
                            return
                        # Skip events originating from this graph's own
                        # executions (e.g. guardian should not fire on
-                        # hive_coder failures — only secondary graphs).
+                        # queen failures — only secondary graphs).
                        if _exclude_own and event.graph_id == self._graph_id:
                            return
                        ep_spec = self._entry_points.get(entry_point_id)
@@ -392,18 +441,24 @@ class AgentRuntime:

                tc = spec.trigger_config
                cron_expr = tc.get("cron")
-                interval = tc.get("interval_minutes")
+                _raw_interval = tc.get("interval_minutes")
+                interval = float(_raw_interval) if _raw_interval is not None else None
                run_immediately = tc.get("run_immediately", False)

                if cron_expr:
                    # Cron expression mode — takes priority over interval_minutes
                    try:
                        from croniter import croniter
+                    except ImportError as e:
+                        raise RuntimeError(
+                            "croniter is required for cron-based entry points. "
+                            "Install it with: uv pip install croniter"
+                        ) from e

-                        # Validate the expression upfront
+                    try:
                        if not croniter.is_valid(cron_expr):
                            raise ValueError(f"Invalid cron expression: {cron_expr}")
-                    except (ImportError, ValueError) as e:
+                    except ValueError as e:
                        logger.warning(
                            "Entry point '%s' has invalid cron config: %s",
                            ep_id,
@@ -543,7 +598,7 @@ class AgentRuntime:
                            ep_id,
                            cron_expr,
                            run_immediately,
-                            idle_timeout=tc.get("idle_timeout_seconds", 300),
+                            idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
                        )()
                    )
                    self._timer_tasks.append(task)
@@ -673,7 +728,7 @@ class AgentRuntime:
                            ep_id,
                            interval,
                            run_immediately,
-                            idle_timeout=tc.get("idle_timeout_seconds", 300),
+                            idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
                        )()
                    )
                    self._timer_tasks.append(task)
@@ -822,7 +877,8 @@ class AgentRuntime:
        if stream is None:
            raise ValueError(f"Entry point '{entry_point_id}' not found")

-        return await stream.execute(input_data, correlation_id, session_state)
+        run_id = uuid.uuid4().hex[:12]
+        return await stream.execute(input_data, correlation_id, session_state, run_id=run_id)

    async def trigger_and_wait(
        self,
@@ -919,6 +975,8 @@ class AgentRuntime:
                accounts_prompt=self._accounts_prompt,
                accounts_data=self._accounts_data,
                tool_provider_map=self._tool_provider_map,
+                skills_catalog_prompt=self.skills_catalog_prompt,
+                protocols_prompt=self.protocols_prompt,
            )
            if self._running:
                await stream.start()
@@ -997,7 +1055,8 @@ class AgentRuntime:
            if spec.trigger_type != "timer":
                continue
            tc = spec.trigger_config
-            interval = tc.get("interval_minutes")
+            _raw_interval = tc.get("interval_minutes")
+            interval = float(_raw_interval) if _raw_interval is not None else None
            run_immediately = tc.get("run_immediately", False)

            if interval and interval > 0 and self._running:
@@ -1142,7 +1201,7 @@ class AgentRuntime:
                        ep_id,
                        interval,
                        run_immediately,
-                        idle_timeout=tc.get("idle_timeout_seconds", 300),
+                        idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
                    )()
                )
                timer_tasks.append(task)
@@ -1359,8 +1418,8 @@ class AgentRuntime:
                allowed_keys = set(entry_node.input_keys)

        # Search primary graph's streams for an active session.
-        # Skip isolated streams (e.g. health judge) — they have their own
-        # session directories and must never be used as a shared session.
+        # Skip isolated streams — they have their own session directories
+        # and must never be used as a shared session.
        all_streams: list[tuple[str, ExecutionStream]] = []
        for _gid, reg in self._graphs.items():
            for ep_id, stream in reg.streams.items():
@@ -1531,6 +1590,11 @@ class AgentRuntime:
                for executor in stream._active_executors.values():
                    for node_id, node in executor.node_registry.items():
                        if getattr(node, "_awaiting_input", False):
+                            # Skip escalation receivers — those are handled
+                            # by the queen via inject_worker_message(), not
+                            # by the user directly.
+                            if ":escalation:" in node_id:
+                                continue
                            return node_id, graph_id
        return None, None

@@ -1692,6 +1756,10 @@ def create_agent_runtime(
    accounts_data: list[dict] | None = None,
    tool_provider_map: dict[str, str] | None = None,
    event_bus: "EventBus | None" = None,
+    skills_manager_config: "SkillsManagerConfig | None" = None,
+    # Deprecated — pass skills_manager_config instead.
+    skills_catalog_prompt: str = "",
+    protocols_prompt: str = "",
 ) -> AgentRuntime:
    """
    Create and configure an AgentRuntime with entry points.
@@ -1718,6 +1786,10 @@ def create_agent_runtime(
        accounts_data: Raw account data for per-node prompt generation.
        tool_provider_map: Tool name to provider name mapping for account routing.
        event_bus: Optional external EventBus to share with other components.
+        skills_manager_config: Skill configuration — the runtime owns
+            discovery, loading, and prompt renderation internally.
+        skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
+        protocols_prompt: Deprecated. Pre-rendered operational protocols.

    Returns:
        Configured AgentRuntime (not yet started)
@@ -1744,6 +1816,9 @@ def create_agent_runtime(
        accounts_data=accounts_data,
        tool_provider_map=tool_provider_map,
        event_bus=event_bus,
+        skills_manager_config=skills_manager_config,
+        skills_catalog_prompt=skills_catalog_prompt,
+        protocols_prompt=protocols_prompt,
    )

    for spec in entry_points:
@@ -1,4 +1,4 @@
-"""EscalationTicket — structured schema for worker health judge escalations."""
+"""EscalationTicket — structured schema for worker health escalations."""

 from __future__ import annotations

@@ -10,10 +10,10 @@ from pydantic import BaseModel, Field


 class EscalationTicket(BaseModel):
-    """Structured escalation report emitted by the Worker Health Judge.
+    """Structured escalation report for worker health monitoring.

-    The judge must fill every field before calling emit_escalation_ticket.
-    Pydantic validation rejects partial tickets, preventing impulsive escalation.
+    All fields must be filled before calling emit_escalation_ticket.
+    Pydantic validation rejects partial tickets.
    """

    ticket_id: str = Field(default_factory=lambda: str(uuid4()))
@@ -25,7 +25,7 @@ class EscalationTicket(BaseModel):
    worker_node_id: str
    worker_graph_id: str

-    # Problem characterization (filled by judge via LLM deliberation)
+    # Problem characterization
    severity: Literal["low", "medium", "high", "critical"]
    cause: str  # Human-readable: "Node has produced 18 RETRY verdicts..."
    judge_reasoning: str  # Judge's own deliberation chain
@@ -97,6 +97,7 @@ class EventType(StrEnum):
    # Client I/O (client_facing=True nodes only)
    CLIENT_OUTPUT_DELTA = "client_output_delta"
    CLIENT_INPUT_REQUESTED = "client_input_requested"
+    CLIENT_INPUT_RECEIVED = "client_input_received"

    # Internal node observability (client_facing=False nodes)
    NODE_INTERNAL_OUTPUT = "node_internal_output"
@@ -104,7 +105,7 @@ class EventType(StrEnum):
    NODE_STALLED = "node_stalled"
    NODE_TOOL_DOOM_LOOP = "node_tool_doom_loop"

-    # Judge decisions
+    # Judge decisions (implicit judge in event loop nodes)
    JUDGE_VERDICT = "judge_verdict"

    # Output tracking
@@ -123,10 +124,10 @@ class EventType(StrEnum):
    # Custom events
    CUSTOM = "custom"

-    # Escalation (agent requests handoff to hive_coder)
+    # Escalation (agent requests handoff to queen)
    ESCALATION_REQUESTED = "escalation_requested"

-    # Worker health monitoring (judge → queen → operator)
+    # Worker health monitoring
    WORKER_ESCALATION_TICKET = "worker_escalation_ticket"
    QUEEN_INTERVENTION_REQUESTED = "queen_intervention_requested"

@@ -137,6 +138,12 @@ class EventType(StrEnum):
    WORKER_LOADED = "worker_loaded"
    CREDENTIALS_REQUIRED = "credentials_required"

+    # Draft graph (planning phase — lightweight graph preview)
+    DRAFT_GRAPH_UPDATED = "draft_graph_updated"
+
+    # Flowchart map updated (after reconciliation with runtime graph)
+    FLOWCHART_MAP_UPDATED = "flowchart_map_updated"
+
    # Queen phase changes (building <-> staging <-> running)
    QUEEN_PHASE_CHANGED = "queen_phase_changed"

@@ -146,6 +153,14 @@ class EventType(StrEnum):
    # Subagent reports (one-way progress updates from sub-agents)
    SUBAGENT_REPORT = "subagent_report"

+    # Trigger lifecycle (queen-level triggers / heartbeats)
+    TRIGGER_AVAILABLE = "trigger_available"
+    TRIGGER_ACTIVATED = "trigger_activated"
+    TRIGGER_DEACTIVATED = "trigger_deactivated"
+    TRIGGER_FIRED = "trigger_fired"
+    TRIGGER_REMOVED = "trigger_removed"
+    TRIGGER_UPDATED = "trigger_updated"
+

@dataclass
 class AgentEvent:
@@ -159,10 +174,11 @@ class AgentEvent:
    timestamp: datetime = field(default_factory=datetime.now)
    correlation_id: str | None = None  # For tracking related events
    graph_id: str | None = None  # Which graph emitted this event (multi-graph sessions)
+    run_id: str | None = None  # Unique ID per trigger() invocation — used for run dividers

    def to_dict(self) -> dict:
        """Convert to dictionary for serialization."""
-        return {
+        d = {
            "type": self.type.value,
            "stream_id": self.stream_id,
            "node_id": self.node_id,
@@ -172,6 +188,9 @@ class AgentEvent:
            "correlation_id": self.correlation_id,
            "graph_id": self.graph_id,
        }
+        if self.run_id is not None:
+            d["run_id"] = self.run_id
+        return d


 # Type for event handlers
@@ -240,6 +259,128 @@ class EventBus:
        self._semaphore = asyncio.Semaphore(max_concurrent_handlers)
        self._subscription_counter = 0
        self._lock = asyncio.Lock()
+        # Per-session persistent event log (always-on, survives restarts)
+        self._session_log: IO[str] | None = None
+        self._session_log_iteration_offset: int = 0
+        # Accumulator for client_output_delta snapshots — flushed on llm_turn_complete.
+        # Key: (stream_id, node_id, execution_id, iteration, inner_turn) → latest AgentEvent
+        self._pending_output_snapshots: dict[tuple, AgentEvent] = {}
+
+    def set_session_log(self, path: Path, *, iteration_offset: int = 0) -> None:
+        """Enable per-session event persistence to a JSONL file.
+
+        Called once when the queen starts so that all events survive server
+        restarts and can be replayed to reconstruct the frontend state.
+
+        ``iteration_offset`` is added to the ``iteration`` field in logged
+        events so that cold-resumed sessions produce monotonically increasing
+        iteration values — preventing frontend message ID collisions between
+        the original run and resumed runs.
+        """
+        if self._session_log is not None:
+            try:
+                self._session_log.close()
+            except Exception:
+                pass
+        path.parent.mkdir(parents=True, exist_ok=True)
+        self._session_log = open(path, "a", encoding="utf-8")  # noqa: SIM115
+        self._session_log_iteration_offset = iteration_offset
+        logger.info("Session event log → %s (iteration_offset=%d)", path, iteration_offset)
+
+    def close_session_log(self) -> None:
+        """Close the per-session event log file."""
+        # Flush any pending output snapshots before closing
+        self._flush_pending_snapshots()
+        if self._session_log is not None:
+            try:
+                self._session_log.close()
+            except Exception:
+                pass
+            self._session_log = None
+
+    # Event types that are high-frequency streaming deltas — accumulated rather
+    # than written individually to the session log.
+    _STREAMING_DELTA_TYPES = frozenset(
+        {
+            EventType.CLIENT_OUTPUT_DELTA,
+            EventType.LLM_TEXT_DELTA,
+            EventType.LLM_REASONING_DELTA,
+        }
+    )
+
+    def _write_session_log_event(self, event: AgentEvent) -> None:
+        """Write an event to the per-session log with streaming coalescing.
+
+        Streaming deltas (client_output_delta, llm_text_delta) are accumulated
+        in memory.  When llm_turn_complete fires, any pending snapshots for that
+        (stream_id, node_id, execution_id) are flushed as single consolidated
+        events before the turn-complete event itself is written.
+
+        Note: iteration offset is already applied in publish() before this is
+        called, so events here already have correct iteration values.
+        """
+        if self._session_log is None:
+            return
+
+        if event.type in self._STREAMING_DELTA_TYPES:
+            # Accumulate — keep only the latest event (which carries the full snapshot)
+            key = (
+                event.stream_id,
+                event.node_id,
+                event.execution_id,
+                event.data.get("iteration"),
+                event.data.get("inner_turn", 0),
+            )
+            self._pending_output_snapshots[key] = event
+            return
+
+        # On turn-complete, flush accumulated snapshots for this stream first
+        if event.type == EventType.LLM_TURN_COMPLETE:
+            self._flush_pending_snapshots(
+                stream_id=event.stream_id,
+                node_id=event.node_id,
+                execution_id=event.execution_id,
+            )
+
+        line = json.dumps(event.to_dict(), default=str)
+        self._session_log.write(line + "\n")
+        self._session_log.flush()
+
+    def _flush_pending_snapshots(
+        self,
+        stream_id: str | None = None,
+        node_id: str | None = None,
+        execution_id: str | None = None,
+    ) -> None:
+        """Flush accumulated streaming snapshots to the session log.
+
+        When called with filters, only matching entries are flushed.
+        When called without filters (e.g. on close), everything is flushed.
+        """
+        if self._session_log is None or not self._pending_output_snapshots:
+            return
+
+        to_flush: list[tuple] = []
+        for key, _evt in self._pending_output_snapshots.items():
+            if stream_id is not None:
+                k_stream, k_node, k_exec, _, _ = key
+                if k_stream != stream_id or k_node != node_id or k_exec != execution_id:
+                    continue
+            to_flush.append(key)
+
+        for key in to_flush:
+            evt = self._pending_output_snapshots.pop(key)
+            try:
+                line = json.dumps(evt.to_dict(), default=str)
+                self._session_log.write(line + "\n")
+            except Exception:
+                pass
+
+        if to_flush:
+            try:
+                self._session_log.flush()
+            except Exception:
+                pass

    def subscribe(
        self,
@@ -305,6 +446,19 @@ class EventBus:
        Args:
            event: Event to publish
        """
+        # Apply iteration offset at the source so ALL consumers (SSE subscribers,
+        # event history, session log) see the same monotonically increasing
+        # iteration values.  Without this, live SSE would use raw iterations
+        # while events.jsonl would use offset iterations, causing ID collisions
+        # on the frontend when replaying after cold resume.
+        if (
+            self._session_log_iteration_offset
+            and isinstance(event.data, dict)
+            and "iteration" in event.data
+        ):
+            offset = self._session_log_iteration_offset
+            event.data = {**event.data, "iteration": event.data["iteration"] + offset}
+
        # Add to history
        async with self._lock:
            self._event_history.append(event)
@@ -325,6 +479,15 @@ class EventBus:
                except Exception:
                    pass  # never break event delivery

+        # Per-session persistent log (always-on when set_session_log was called).
+        # Streaming deltas are coalesced: client_output_delta and llm_text_delta
+        # are accumulated and flushed as a single snapshot event on llm_turn_complete.
+        if self._session_log is not None:
+            try:
+                self._write_session_log_event(event)
+            except Exception:
+                pass  # never break event delivery
+
        # Find matching subscriptions
        matching_handlers: list[EventHandler] = []

@@ -385,6 +548,7 @@ class EventBus:
        execution_id: str,
        input_data: dict[str, Any] | None = None,
        correlation_id: str | None = None,
+        run_id: str | None = None,
    ) -> None:
        """Emit execution started event."""
        await self.publish(
@@ -394,6 +558,7 @@ class EventBus:
                execution_id=execution_id,
                data={"input": input_data or {}},
                correlation_id=correlation_id,
+                run_id=run_id,
            )
        )

@@ -403,6 +568,7 @@ class EventBus:
        execution_id: str,
        output: dict[str, Any] | None = None,
        correlation_id: str | None = None,
+        run_id: str | None = None,
    ) -> None:
        """Emit execution completed event."""
        await self.publish(
@@ -412,6 +578,7 @@ class EventBus:
                execution_id=execution_id,
                data={"output": output or {}},
                correlation_id=correlation_id,
+                run_id=run_id,
            )
        )

@@ -421,6 +588,7 @@ class EventBus:
        execution_id: str,
        error: str,
        correlation_id: str | None = None,
+        run_id: str | None = None,
    ) -> None:
        """Emit execution failed event."""
        await self.publish(
@@ -430,6 +598,7 @@ class EventBus:
                execution_id=execution_id,
                data={"error": error},
                correlation_id=correlation_id,
+                run_id=run_id,
            )
        )

@@ -521,15 +690,19 @@ class EventBus:
        node_id: str,
        iteration: int,
        execution_id: str | None = None,
+        extra_data: dict[str, Any] | None = None,
    ) -> None:
        """Emit node loop iteration event."""
+        data: dict[str, Any] = {"iteration": iteration}
+        if extra_data:
+            data.update(extra_data)
        await self.publish(
            AgentEvent(
                type=EventType.NODE_LOOP_ITERATION,
                stream_id=stream_id,
                node_id=node_id,
                execution_id=execution_id,
-                data={"iteration": iteration},
+                data=data,
            )
        )

@@ -578,6 +751,7 @@ class EventBus:
        content: str,
        snapshot: str,
        execution_id: str | None = None,
+        inner_turn: int = 0,
    ) -> None:
        """Emit LLM text delta event."""
        await self.publish(
@@ -586,7 +760,7 @@ class EventBus:
                stream_id=stream_id,
                node_id=node_id,
                execution_id=execution_id,
-                data={"content": content, "snapshot": snapshot},
+                data={"content": content, "snapshot": snapshot, "inner_turn": inner_turn},
            )
        )

@@ -616,6 +790,7 @@ class EventBus:
        model: str,
        input_tokens: int,
        output_tokens: int,
+        cached_tokens: int = 0,
        execution_id: str | None = None,
        iteration: int | None = None,
    ) -> None:
@@ -625,6 +800,7 @@ class EventBus:
            "model": model,
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
+            "cached_tokens": cached_tokens,
        }
        if iteration is not None:
            data["iteration"] = iteration
@@ -700,9 +876,10 @@ class EventBus:
        snapshot: str,
        execution_id: str | None = None,
        iteration: int | None = None,
+        inner_turn: int = 0,
    ) -> None:
        """Emit client output delta event (client_facing=True nodes)."""
-        data: dict = {"content": content, "snapshot": snapshot}
+        data: dict = {"content": content, "snapshot": snapshot, "inner_turn": inner_turn}
        if iteration is not None:
            data["iteration"] = iteration
        await self.publish(
@@ -722,16 +899,23 @@ class EventBus:
        prompt: str = "",
        execution_id: str | None = None,
        options: list[str] | None = None,
+        questions: list[dict] | None = None,
    ) -> None:
        """Emit client input requested event (client_facing=True nodes).

        Args:
            options: Optional predefined choices for the user (1-3 items).
-                     The frontend appends an "Other" free-text option automatically.
+                     The frontend appends an "Other" free-text option
+                     automatically.
+            questions: Optional list of question dicts for multi-question
+                       batches (from ask_user_multiple). Each dict has id,
+                       prompt, and optional options.
        """
        data: dict[str, Any] = {"prompt": prompt}
        if options:
            data["options"] = options
+        if questions:
+            data["questions"] = questions
        await self.publish(
            AgentEvent(
                type=EventType.CLIENT_INPUT_REQUESTED,
@@ -976,7 +1160,7 @@ class EventBus:
        context: str = "",
        execution_id: str | None = None,
    ) -> None:
-        """Emit escalation requested event (agent wants hive_coder)."""
+        """Emit escalation requested event (agent wants queen)."""
        await self.publish(
            AgentEvent(
                type=EventType.ESCALATION_REQUESTED,
@@ -994,7 +1178,7 @@ class EventBus:
        ticket: dict,
        execution_id: str | None = None,
    ) -> None:
-        """Emitted by health judge when worker shows a degradation pattern."""
+        """Emitted when worker shows a degradation pattern."""
        await self.publish(
            AgentEvent(
                type=EventType.WORKER_ESCALATION_TICKET,
@@ -9,6 +9,7 @@ Each stream has:

 import asyncio
 import logging
+import os
 import time
 import uuid
 from collections import OrderedDict
@@ -126,6 +127,7 @@ class ExecutionContext:
    input_data: dict[str, Any]
    isolation_level: IsolationLevel
    session_state: dict[str, Any] | None = None  # For resuming from pause
+    run_id: str | None = None  # Unique ID per trigger() invocation
    started_at: datetime = field(default_factory=datetime.now)
    completed_at: datetime | None = None
    status: str = "pending"  # pending, running, completed, failed, paused
@@ -184,6 +186,8 @@ class ExecutionStream:
        accounts_prompt: str = "",
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
+        skills_catalog_prompt: str = "",
+        protocols_prompt: str = "",
    ):
        """
        Initialize execution stream.
@@ -207,6 +211,8 @@ class ExecutionStream:
            accounts_prompt: Connected accounts block for system prompt injection
            accounts_data: Raw account data for per-node prompt generation
            tool_provider_map: Tool name to provider name mapping for account routing
+            skills_catalog_prompt: Available skills catalog for system prompt
+            protocols_prompt: Default skill operational protocols for system prompt
        """
        self.stream_id = stream_id
        self.entry_spec = entry_spec
@@ -228,6 +234,21 @@ class ExecutionStream:
        self._accounts_prompt = accounts_prompt
        self._accounts_data = accounts_data
        self._tool_provider_map = tool_provider_map
+        self._skills_catalog_prompt = skills_catalog_prompt
+        self._protocols_prompt = protocols_prompt
+
+        _es_logger = logging.getLogger(__name__)
+        if protocols_prompt:
+            _es_logger.info(
+                "ExecutionStream[%s] received protocols_prompt (%d chars)",
+                stream_id,
+                len(protocols_prompt),
+            )
+        else:
+            _es_logger.warning(
+                "ExecutionStream[%s] received EMPTY protocols_prompt",
+                stream_id,
+            )

        # Create stream-scoped runtime
        self._runtime = StreamRuntime(
@@ -240,6 +261,7 @@ class ExecutionStream:
        self._active_executions: dict[str, ExecutionContext] = {}
        self._execution_tasks: dict[str, asyncio.Task] = {}
        self._active_executors: dict[str, GraphExecutor] = {}
+        self._cancel_reasons: dict[str, str] = {}
        self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
        self._execution_result_times: dict[str, float] = {}
        self._completion_events: dict[str, asyncio.Event] = {}
@@ -423,11 +445,36 @@ class ExecutionStream:
                return True
        return False

+    async def inject_trigger(
+        self,
+        node_id: str,
+        trigger: Any,
+    ) -> bool:
+        """Inject a trigger event into a running queen EventLoopNode.
+
+        Searches active executors for a node matching ``node_id`` and calls
+        its ``inject_trigger()`` method to wake the queen.
+
+        Args:
+            node_id: The queen EventLoopNode ID.
+            trigger: A ``TriggerEvent`` instance (typed as Any to avoid
+                circular imports with graph layer).
+
+        Returns True if the trigger was delivered, False otherwise.
+        """
+        for executor in self._active_executors.values():
+            node = executor.node_registry.get(node_id)
+            if node is not None and hasattr(node, "inject_trigger"):
+                await node.inject_trigger(trigger)
+                return True
+        return False
+
    async def execute(
        self,
        input_data: dict[str, Any],
        correlation_id: str | None = None,
        session_state: dict[str, Any] | None = None,
+        run_id: str | None = None,
    ) -> str:
        """
        Queue an execution and return its ID.
@@ -438,6 +485,7 @@ class ExecutionStream:
            input_data: Input data for this execution
            correlation_id: Optional ID to correlate related executions
            session_state: Optional session state to resume from (with paused_at, memory)
+            run_id: Unique ID for this trigger invocation (for run dividers)

        Returns:
            Execution ID for tracking
@@ -464,7 +512,7 @@ class ExecutionStream:
                        node.signal_shutdown()
                    if hasattr(node, "cancel_current_turn"):
                        node.cancel_current_turn()
-            await self.cancel_execution(eid)
+            await self.cancel_execution(eid, reason="Restarted with new execution")

        # When resuming, reuse the original session ID so the execution
        # continues in the same session directory instead of creating a new one.
@@ -498,6 +546,7 @@ class ExecutionStream:
            input_data=input_data,
            isolation_level=self.entry_spec.get_isolation_level(),
            session_state=session_state,
+            run_id=run_id,
        )

        async with self._lock:
@@ -573,7 +622,9 @@ class ExecutionStream:
                        execution_id=execution_id,
                        input_data=ctx.input_data,
                        correlation_id=ctx.correlation_id,
+                        run_id=ctx.run_id,
                    )
+                self._write_run_event(execution_id, ctx.run_id, "run_started")

                # Create execution-scoped memory
                self._state_manager.create_memory(
@@ -643,6 +694,8 @@ class ExecutionStream:
                        accounts_prompt=self._accounts_prompt,
                        accounts_data=self._accounts_data,
                        tool_provider_map=self._tool_provider_map,
+                        skills_catalog_prompt=self._skills_catalog_prompt,
+                        protocols_prompt=self._protocols_prompt,
                    )
                    # Track executor so inject_input() can reach EventLoopNode instances
                    self._active_executors[execution_id] = executor
@@ -738,6 +791,7 @@ class ExecutionStream:
                            execution_id=execution_id,
                            output=result.output,
                            correlation_id=ctx.correlation_id,
+                            run_id=ctx.run_id,
                        )
                    elif result.paused_at:
                        # The executor returns paused_at on CancelledError but
@@ -755,8 +809,22 @@ class ExecutionStream:
                            execution_id=execution_id,
                            error=result.error or "Unknown error",
                            correlation_id=ctx.correlation_id,
+                            run_id=ctx.run_id,
                        )

+                # Write run event for historical restoration
+                if result.success:
+                    self._write_run_event(execution_id, ctx.run_id, "run_completed")
+                elif result.paused_at:
+                    self._write_run_event(execution_id, ctx.run_id, "run_paused")
+                else:
+                    self._write_run_event(
+                        execution_id,
+                        ctx.run_id,
+                        "run_failed",
+                        {"error": result.error or "Unknown error"},
+                    )
+
                logger.debug(f"Execution {execution_id} completed: success={result.success}")

            except asyncio.CancelledError:
@@ -801,22 +869,25 @@ class ExecutionStream:
                # Emit SSE event so the frontend knows the execution stopped.
                # The executor does NOT emit on CancelledError, so there is no
                # risk of double-emitting.
+                cancel_reason = self._cancel_reasons.pop(execution_id, "Execution cancelled")
                if self._scoped_event_bus:
                    if has_result and result.paused_at:
                        await self._scoped_event_bus.emit_execution_paused(
                            stream_id=self.stream_id,
                            node_id=result.paused_at,
-                            reason="Execution cancelled",
+                            reason=cancel_reason,
                            execution_id=execution_id,
                        )
                    else:
                        await self._scoped_event_bus.emit_execution_failed(
                            stream_id=self.stream_id,
                            execution_id=execution_id,
-                            error="Execution cancelled",
+                            error=cancel_reason,
                            correlation_id=ctx.correlation_id,
+                            run_id=ctx.run_id,
                        )

+                self._write_run_event(execution_id, ctx.run_id, "run_cancelled")
                # Don't re-raise - we've handled it and saved state

            except Exception as e:
@@ -853,7 +924,9 @@ class ExecutionStream:
                        execution_id=execution_id,
                        error=str(e),
                        correlation_id=ctx.correlation_id,
+                        run_id=ctx.run_id,
                    )
+                self._write_run_event(execution_id, ctx.run_id, "run_failed", {"error": str(e)})

            finally:
                # Clean up state
@@ -869,6 +942,36 @@ class ExecutionStream:
                    self._completion_events.pop(execution_id, None)
                    self._execution_tasks.pop(execution_id, None)

+    def _write_run_event(
+        self,
+        execution_id: str,
+        run_id: str | None,
+        event: str,
+        extra: dict[str, Any] | None = None,
+    ) -> None:
+        """Append a run lifecycle event to runs.jsonl for historical restoration."""
+        if not self._session_store or not run_id:
+            return
+        import json as _json
+
+        session_dir = self._session_store.get_session_path(execution_id)
+        runs_file = session_dir / "runs.jsonl"
+        now = datetime.now()
+        record = {
+            "run_id": run_id,
+            "event": event,
+            "timestamp": now.isoformat(),
+            "created_at": now.timestamp(),
+        }
+        if extra:
+            record.update(extra)
+        try:
+            runs_file.parent.mkdir(parents=True, exist_ok=True)
+            with open(runs_file, "a", encoding="utf-8") as f:
+                f.write(_json.dumps(record) + "\n")
+        except OSError:
+            pass  # Non-critical — don't break execution
+
    async def _write_session_state(
        self,
        execution_id: str,
@@ -961,6 +1064,9 @@ class ExecutionStream:
            if error:
                state.result.error = error

+            # Stamp the owning process ID for cross-process stale detection
+            state.pid = os.getpid()
+
            # Write state.json
            await self._session_store.write_state(execution_id, state)
            logger.debug(f"Wrote state.json for session {execution_id} (status={status})")
@@ -972,8 +1078,8 @@ class ExecutionStream:
    def _create_modified_graph(self) -> "GraphSpec":
        """Create a graph with the entry point overridden.

-        Preserves the original graph's entry_points and async_entry_points
-        so that validation correctly considers ALL entry nodes reachable.
+        Preserves the original graph's entry_points so that validation
+        correctly considers ALL entry nodes reachable.
        Each stream only executes from its own entry_node, but the full
        graph must validate with all entry points accounted for.
        """
@@ -998,7 +1104,6 @@ class ExecutionStream:
            version=self.graph.version,
            entry_node=self.entry_spec.entry_node,  # Use our entry point
            entry_points=merged_entry_points,
-            async_entry_points=self.graph.async_entry_points,
            terminal_nodes=self.graph.terminal_nodes,
            pause_nodes=self.graph.pause_nodes,
            nodes=self.graph.nodes,
@@ -1054,18 +1159,24 @@ class ExecutionStream:
        """Get execution context."""
        return self._active_executions.get(execution_id)

-    async def cancel_execution(self, execution_id: str) -> bool:
+    async def cancel_execution(self, execution_id: str, *, reason: str | None = None) -> bool:
        """
        Cancel a running execution.

        Args:
            execution_id: Execution to cancel
+            reason: Human-readable reason for the cancellation (e.g.
+                "Stopped by queen", "User requested pause"). If not
+                provided, defaults to "Execution cancelled".

        Returns:
            True if cancelled, False if not found
        """
        task = self._execution_tasks.get(execution_id)
        if task and not task.done():
+            # Store the reason so the CancelledError handler can use it
+            # when emitting the pause/fail event.
+            self._cancel_reasons[execution_id] = reason or "Execution cancelled"
            task.cancel()
            # Wait briefly for the task to finish. Don't block indefinitely —
            # the task may be stuck in a long LLM API call that doesn't
@@ -47,25 +47,34 @@ class RuntimeLogStore:
        self._base_path = base_path
        # Note: _runs_dir is determined per-run_id by _get_run_dir()

+    def _session_logs_dir(self, run_id: str) -> Path:
+        """Return the unified session-backed logs directory for a run ID."""
+        is_runtime_logs = self._base_path.name == "runtime_logs"
+        root = self._base_path.parent if is_runtime_logs else self._base_path
+        return root / "sessions" / run_id / "logs"
+
+    def _legacy_run_dir(self, run_id: str) -> Path:
+        """Return the deprecated standalone runs directory for a run ID."""
+        return self._base_path / "runs" / run_id
+
    def _get_run_dir(self, run_id: str) -> Path:
        """Determine run directory path based on run_id format.

-        - New format (session_*): {storage_root}/sessions/{run_id}/logs/
+        - Session-backed runs: {storage_root}/sessions/{run_id}/logs/
        - Old format (anything else): {base_path}/runs/{run_id}/ (deprecated)
        """
-        if run_id.startswith("session_"):
-            is_runtime_logs = self._base_path.name == "runtime_logs"
-            root = self._base_path.parent if is_runtime_logs else self._base_path
-            return root / "sessions" / run_id / "logs"
+        session_run_dir = self._session_logs_dir(run_id)
+        if session_run_dir.exists() or run_id.startswith("session_"):
+            return session_run_dir
        import warnings

        warnings.warn(
            f"Reading logs from deprecated location for run_id={run_id}. "
-            "New sessions use unified storage at sessions/session_*/logs/",
+            "New sessions use unified storage at sessions/<session_id>/logs/",
            DeprecationWarning,
            stacklevel=3,
        )
-        return self._base_path / "runs" / run_id
+        return self._legacy_run_dir(run_id)

    # -------------------------------------------------------------------
    # Incremental write (sync — called from locked sections)
@@ -76,6 +85,10 @@ class RuntimeLogStore:
        run_dir = self._get_run_dir(run_id)
        run_dir.mkdir(parents=True, exist_ok=True)

+    def ensure_session_run_dir(self, run_id: str) -> None:
+        """Create the unified session-backed log directory immediately."""
+        self._session_logs_dir(run_id).mkdir(parents=True, exist_ok=True)
+
    def append_step(self, run_id: str, step: NodeStepLog) -> None:
        """Append one JSONL line to tool_logs.jsonl. Sync."""
        path = self._get_run_dir(run_id) / "tool_logs.jsonl"
@@ -200,17 +213,17 @@ class RuntimeLogStore:
        run_ids = []

        # Scan new location: base_path/sessions/{session_id}/logs/
-        # Determine the correct base path for sessions
        is_runtime_logs = self._base_path.name == "runtime_logs"
        root = self._base_path.parent if is_runtime_logs else self._base_path
        sessions_dir = root / "sessions"

        if sessions_dir.exists():
            for session_dir in sessions_dir.iterdir():
-                if session_dir.is_dir() and session_dir.name.startswith("session_"):
-                    logs_dir = session_dir / "logs"
-                    if logs_dir.exists() and logs_dir.is_dir():
-                        run_ids.append(session_dir.name)
+                if not session_dir.is_dir():
+                    continue
+                logs_dir = session_dir / "logs"
+                if logs_dir.exists() and logs_dir.is_dir():
+                    run_ids.append(session_dir.name)

        # Scan old location: base_path/runs/ (deprecated)
        old_runs_dir = self._base_path / "runs"
@@ -66,15 +66,16 @@ class RuntimeLogger:
        """
        if session_id:
            self._run_id = session_id
+            self._store.ensure_session_run_dir(self._run_id)
        else:
            ts = datetime.now(UTC).strftime("%Y%m%dT%H%M%S")
            short_uuid = uuid.uuid4().hex[:8]
            self._run_id = f"{ts}_{short_uuid}"
+            self._store.ensure_run_dir(self._run_id)

        self._goal_id = goal_id
        self._started_at = datetime.now(UTC).isoformat()
        self._logged_node_ids = set()
-        self._store.ensure_run_dir(self._run_id)
        return self._run_id

    def log_step(
@@ -17,7 +17,7 @@ from pathlib import Path
 import pytest

 from framework.graph import Goal
-from framework.graph.edge import AsyncEntryPointSpec, EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
 from framework.graph.goal import Constraint, SuccessCriterion
 from framework.graph.node import NodeSpec
 from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
@@ -101,30 +101,12 @@ def sample_graph():
        ),
    ]

-    async_entry_points = [
-        AsyncEntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-            isolation_level="shared",
-        ),
-        AsyncEntryPointSpec(
-            id="api",
-            name="API Handler",
-            entry_node="process-api",
-            trigger_type="api",
-            isolation_level="shared",
-        ),
-    ]
-
    return GraphSpec(
        id="test-graph",
        goal_id="test-goal",
        version="1.0.0",
        entry_node="process-webhook",
        entry_points={"start": "process-webhook"},
-        async_entry_points=async_entry_points,
        terminal_nodes=["complete"],
        pause_nodes=[],
        nodes=nodes,
@@ -504,108 +486,6 @@ class TestAgentRuntime:
 # === GraphSpec Validation Tests ===


-class TestGraphSpecValidation:
-    """Tests for GraphSpec with async_entry_points."""
-
-    def test_has_async_entry_points(self, sample_graph):
-        """Test checking for async entry points."""
-        assert sample_graph.has_async_entry_points() is True
-
-        # Graph without async entry points
-        simple_graph = GraphSpec(
-            id="simple",
-            goal_id="goal",
-            entry_node="start",
-            nodes=[],
-            edges=[],
-        )
-        assert simple_graph.has_async_entry_points() is False
-
-    def test_get_async_entry_point(self, sample_graph):
-        """Test getting async entry point by ID."""
-        ep = sample_graph.get_async_entry_point("webhook")
-        assert ep is not None
-        assert ep.id == "webhook"
-        assert ep.entry_node == "process-webhook"
-
-        ep_not_found = sample_graph.get_async_entry_point("nonexistent")
-        assert ep_not_found is None
-
-    def test_validate_async_entry_points(self):
-        """Test validation catches async entry point errors."""
-        nodes = [
-            NodeSpec(
-                id="valid-node",
-                name="Valid Node",
-                description="A valid node",
-                node_type="event_loop",
-                input_keys=[],
-                output_keys=[],
-            ),
-        ]
-
-        # Invalid entry node
-        graph = GraphSpec(
-            id="test",
-            goal_id="goal",
-            entry_node="valid-node",
-            async_entry_points=[
-                AsyncEntryPointSpec(
-                    id="invalid",
-                    name="Invalid",
-                    entry_node="nonexistent-node",
-                    trigger_type="webhook",
-                ),
-            ],
-            nodes=nodes,
-            edges=[],
-        )
-
-        errors = graph.validate()["errors"]
-        assert any("nonexistent-node" in e for e in errors)
-
-        # Invalid isolation level
-        graph2 = GraphSpec(
-            id="test",
-            goal_id="goal",
-            entry_node="valid-node",
-            async_entry_points=[
-                AsyncEntryPointSpec(
-                    id="bad-isolation",
-                    name="Bad Isolation",
-                    entry_node="valid-node",
-                    trigger_type="webhook",
-                    isolation_level="invalid",
-                ),
-            ],
-            nodes=nodes,
-            edges=[],
-        )
-
-        errors2 = graph2.validate()["errors"]
-        assert any("isolation_level" in e for e in errors2)
-
-        # Invalid trigger type
-        graph3 = GraphSpec(
-            id="test",
-            goal_id="goal",
-            entry_node="valid-node",
-            async_entry_points=[
-                AsyncEntryPointSpec(
-                    id="bad-trigger",
-                    name="Bad Trigger",
-                    entry_node="valid-node",
-                    trigger_type="invalid_trigger",
-                ),
-            ],
-            nodes=nodes,
-            edges=[],
-        )
-
-        errors3 = graph3.validate()["errors"]
-        assert any("trigger_type" in e for e in errors3)
-
-
 # === Integration Tests ===


@@ -0,0 +1,29 @@
+"""Tests for custom session-backed runtime logging paths."""
+
+from pathlib import Path
+from unittest.mock import MagicMock
+
+from framework.graph.executor import GraphExecutor
+from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.runtime.runtime_logger import RuntimeLogger
+
+
+def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
+    executor = GraphExecutor(
+        runtime=MagicMock(),
+        storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
+    )
+
+    assert executor._get_runtime_log_session_id() == "my-custom-session"
+
+
+def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
+    base = tmp_path / ".hive" / "agents" / "test_agent"
+    base.mkdir(parents=True)
+    store = RuntimeLogStore(base)
+    logger = RuntimeLogger(store=store, agent_id="test-agent")
+
+    run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
+
+    assert run_id == "my-custom-session"
+    assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
@@ -483,7 +483,6 @@ class TestEventDrivenEntryPoints:
            version="1.0.0",
            entry_node="process-event",
            entry_points={"start": "process-event"},
-            async_entry_points=[],
            terminal_nodes=[],
            pause_nodes=[],
            nodes=nodes,
@@ -0,0 +1,22 @@
+"""Trigger definitions for queen-level heartbeats (timers, webhooks)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class TriggerDefinition:
+    """A registered trigger that can be activated on the queen runtime.
+
+    Trigger *definitions* come from the worker's ``triggers.json``.
+    Activation state is per-session (persisted in ``SessionState.active_triggers``).
+    """
+
+    id: str
+    trigger_type: str  # "timer" | "webhook"
+    trigger_config: dict[str, Any] = field(default_factory=dict)
+    description: str = ""
+    task: str = ""
+    active: bool = False
@@ -134,6 +134,9 @@ class SessionState(BaseModel):
    # Input data (for debugging/replay)
    input_data: dict[str, Any] = Field(default_factory=dict)

+    # Process ID of the owning process (for cross-process stale session detection)
+    pid: int | None = None
+
    # Isolation level (from ExecutionContext)
    isolation_level: str = "shared"

@@ -141,6 +144,13 @@ class SessionState(BaseModel):
    checkpoint_enabled: bool = False
    latest_checkpoint_id: str | None = None

+    # Trigger activation state (IDs of triggers the queen/user turned on)
+    active_triggers: list[str] = Field(default_factory=list)
+    # Per-trigger task strings (user overrides, keyed by trigger ID)
+    trigger_tasks: dict[str, str] = Field(default_factory=dict)
+    # True after first successful worker execution (gates trigger delivery on restart)
+    worker_configured: bool = Field(default=False)
+
    model_config = {"extra": "allow"}

    @computed_field
@@ -1,36 +0,0 @@
-"""Backward-compatibility shim.
-
-The primary implementation is now in ``session_manager.py``.
-This module re-exports ``SessionManager`` as ``AgentManager`` and
-keeps ``AgentSlot`` for test compatibility.
-"""
-
-import asyncio
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any
-
-from framework.server.session_manager import Session, SessionManager  # noqa: F401
-
-
-@dataclass
-class AgentSlot:
-    """Legacy data class — kept for test compatibility only.
-
-    New code should use ``Session`` from ``session_manager``.
-    """
-
-    id: str
-    agent_path: Path
-    runner: Any
-    runtime: Any
-    info: Any
-    loaded_at: float
-    queen_executor: Any = None
-    queen_task: asyncio.Task | None = None
-    judge_task: asyncio.Task | None = None
-    escalation_sub: str | None = None
-
-
-# Backward compat alias
-AgentManager = SessionManager
@@ -94,6 +94,29 @@ def sessions_dir(session: Session) -> Path:
    return Path.home() / ".hive" / "agents" / agent_name / "sessions"


+def cold_sessions_dir(session_id: str) -> Path | None:
+    """Resolve the worker sessions directory from disk for a cold/stopped session.
+
+    Reads agent_path from the queen session's meta.json to find the agent name,
+    then returns ~/.hive/agents/{agent_name}/sessions/.
+    Returns None if meta.json is missing or has no agent_path.
+    """
+    import json
+
+    meta_path = Path.home() / ".hive" / "queen" / "session" / session_id / "meta.json"
+    if not meta_path.exists():
+        return None
+    try:
+        meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        agent_path = meta.get("agent_path")
+        if not agent_path:
+            return None
+        agent_name = Path(agent_path).name
+        return Path.home() / ".hive" / "agents" / agent_name / "sessions"
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
 # Allowed CORS origins (localhost on any port)
 _CORS_ORIGINS = {"http://localhost", "http://127.0.0.1"}

@@ -0,0 +1,368 @@
+"""Queen orchestrator — builds and runs the queen executor.
+
+Extracted from SessionManager._start_queen() to keep session management
+and queen orchestration concerns separate.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from framework.server.session_manager import Session
+
+logger = logging.getLogger(__name__)
+
+
+async def create_queen(
+    session: Session,
+    session_manager: Any,
+    worker_identity: str | None,
+    queen_dir: Path,
+    initial_prompt: str | None = None,
+) -> asyncio.Task:
+    """Build the queen executor and return the running asyncio task.
+
+    Handles tool registration, phase-state initialization, prompt
+    composition, persona hook setup, graph preparation, and the queen
+    event loop.
+    """
+    from framework.agents.queen.agent import (
+        queen_goal,
+        queen_graph as _queen_graph,
+    )
+    from framework.agents.queen.nodes import (
+        _QUEEN_BUILDING_TOOLS,
+        _QUEEN_PLANNING_TOOLS,
+        _QUEEN_RUNNING_TOOLS,
+        _QUEEN_STAGING_TOOLS,
+        _appendices,
+        _building_knowledge,
+        _planning_knowledge,
+        _queen_behavior_always,
+        _queen_behavior_building,
+        _queen_behavior_planning,
+        _queen_behavior_running,
+        _queen_behavior_staging,
+        _queen_identity_building,
+        _queen_identity_planning,
+        _queen_identity_running,
+        _queen_identity_staging,
+        _queen_phase_7,
+        _queen_style,
+        _queen_tools_building,
+        _queen_tools_planning,
+        _queen_tools_running,
+        _queen_tools_staging,
+        _shared_building_knowledge,
+    )
+    from framework.agents.queen.nodes.thinking_hook import select_expert_persona
+    from framework.graph.event_loop_node import HookContext, HookResult
+    from framework.graph.executor import GraphExecutor
+    from framework.runner.tool_registry import ToolRegistry
+    from framework.runtime.core import Runtime
+    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.tools.queen_lifecycle_tools import (
+        QueenPhaseState,
+        register_queen_lifecycle_tools,
+    )
+
+    hive_home = Path.home() / ".hive"
+
+    # ---- Tool registry ------------------------------------------------
+    queen_registry = ToolRegistry()
+    import framework.agents.queen as _queen_pkg
+
+    queen_pkg_dir = Path(_queen_pkg.__file__).parent
+    mcp_config = queen_pkg_dir / "mcp_servers.json"
+    if mcp_config.exists():
+        try:
+            queen_registry.load_mcp_config(mcp_config)
+            logger.info("Queen: loaded MCP tools from %s", mcp_config)
+        except Exception:
+            logger.warning("Queen: MCP config failed to load", exc_info=True)
+
+    # ---- Phase state --------------------------------------------------
+    initial_phase = "staging" if worker_identity else "planning"
+    phase_state = QueenPhaseState(phase=initial_phase, event_bus=session.event_bus)
+    session.phase_state = phase_state
+
+    # ---- Track ask rounds during planning ----------------------------
+    # Increment planning_ask_rounds each time the queen requests user
+    # input (ask_user or ask_user_multiple) while in the planning phase.
+    async def _track_planning_asks(event: AgentEvent) -> None:
+        if phase_state.phase != "planning":
+            return
+        # Only count explicit ask_user / ask_user_multiple calls, not
+        # auto-block (text-only turns emit CLIENT_INPUT_REQUESTED with
+        # an empty prompt and no options/questions).
+        data = event.data or {}
+        has_prompt = bool(data.get("prompt"))
+        has_questions = bool(data.get("questions"))
+        has_options = bool(data.get("options"))
+        if has_prompt or has_questions or has_options:
+            phase_state.planning_ask_rounds += 1
+
+    session.event_bus.subscribe(
+        [EventType.CLIENT_INPUT_REQUESTED],
+        _track_planning_asks,
+        filter_stream="queen",
+    )
+
+    # ---- Lifecycle tools (always registered) --------------------------
+    register_queen_lifecycle_tools(
+        queen_registry,
+        session=session,
+        session_id=session.id,
+        session_manager=session_manager,
+        manager_session_id=session.id,
+        phase_state=phase_state,
+    )
+
+    # ---- Monitoring tools (only when worker is loaded) ----------------
+    if session.worker_runtime:
+        from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
+
+        register_worker_monitoring_tools(
+            queen_registry,
+            session.event_bus,
+            session.worker_path,
+            stream_id="queen",
+            worker_graph_id=session.worker_runtime._graph_id,
+            default_session_id=session.id,
+        )
+
+    queen_tools = list(queen_registry.get_tools().values())
+    queen_tool_executor = queen_registry.get_executor()
+
+    # ---- Partition tools by phase ------------------------------------
+    planning_names = set(_QUEEN_PLANNING_TOOLS)
+    building_names = set(_QUEEN_BUILDING_TOOLS)
+    staging_names = set(_QUEEN_STAGING_TOOLS)
+    running_names = set(_QUEEN_RUNNING_TOOLS)
+
+    registered_names = {t.name for t in queen_tools}
+    missing_building = building_names - registered_names
+    if missing_building:
+        logger.warning(
+            "Queen: %d/%d building tools NOT registered: %s",
+            len(missing_building),
+            len(building_names),
+            sorted(missing_building),
+        )
+    logger.info("Queen: registered tools: %s", sorted(registered_names))
+
+    phase_state.planning_tools = [t for t in queen_tools if t.name in planning_names]
+    phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
+    phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
+    phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
+
+    # ---- Cross-session memory ----------------------------------------
+    from framework.agents.queen.queen_memory import seed_if_missing
+
+    seed_if_missing()
+
+    # ---- Compose phase-specific prompts ------------------------------
+    _orig_node = _queen_graph.nodes[0]
+
+    if worker_identity is None:
+        worker_identity = (
+            "\n\n# Worker Profile\n"
+            "No worker agent loaded. You are operating independently.\n"
+            "Design or build the agent to solve the user's problem "
+            "according to your current phase."
+        )
+
+    _planning_body = (
+        _queen_style
+        + _shared_building_knowledge
+        + _queen_tools_planning
+        + _queen_behavior_always
+        + _queen_behavior_planning
+        + _planning_knowledge
+        + worker_identity
+    )
+    phase_state.prompt_planning = _queen_identity_planning + _planning_body
+
+    _building_body = (
+        _queen_style
+        + _shared_building_knowledge
+        + _queen_tools_building
+        + _queen_behavior_always
+        + _queen_behavior_building
+        + _building_knowledge
+        + _queen_phase_7
+        + _appendices
+        + worker_identity
+    )
+    phase_state.prompt_building = _queen_identity_building + _building_body
+    phase_state.prompt_staging = (
+        _queen_identity_staging
+        + _queen_style
+        + _queen_tools_staging
+        + _queen_behavior_always
+        + _queen_behavior_staging
+        + worker_identity
+    )
+    phase_state.prompt_running = (
+        _queen_identity_running
+        + _queen_style
+        + _queen_tools_running
+        + _queen_behavior_always
+        + _queen_behavior_running
+        + worker_identity
+    )
+
+    # ---- Default skill protocols -------------------------------------
+    try:
+        from framework.skills.manager import SkillsManager
+
+        _queen_skills_mgr = SkillsManager()
+        _queen_skills_mgr.load()
+        phase_state.protocols_prompt = _queen_skills_mgr.protocols_prompt
+    except Exception:
+        logger.debug("Queen skill loading failed (non-fatal)", exc_info=True)
+
+    # ---- Persona hook ------------------------------------------------
+    _session_llm = session.llm
+    _session_event_bus = session.event_bus
+
+    async def _persona_hook(ctx: HookContext) -> HookResult | None:
+        persona = await select_expert_persona(ctx.trigger or "", _session_llm)
+        if not persona:
+            return None
+        if _session_event_bus is not None:
+            await _session_event_bus.publish(
+                AgentEvent(
+                    type=EventType.QUEEN_PERSONA_SELECTED,
+                    stream_id="queen",
+                    data={"persona": persona},
+                )
+            )
+        return HookResult(system_prompt=persona + "\n\n" + phase_state.get_current_prompt())
+
+    # ---- Graph preparation -------------------------------------------
+    initial_prompt_text = phase_state.get_current_prompt()
+
+    registered_tool_names = set(queen_registry.get_tools().keys())
+    declared_tools = _orig_node.tools or []
+    available_tools = [t for t in declared_tools if t in registered_tool_names]
+
+    node_updates: dict = {
+        "system_prompt": initial_prompt_text,
+    }
+    if set(available_tools) != set(declared_tools):
+        missing = sorted(set(declared_tools) - registered_tool_names)
+        if missing:
+            logger.warning("Queen: tools not available: %s", missing)
+        node_updates["tools"] = available_tools
+
+    adjusted_node = _orig_node.model_copy(update=node_updates)
+    _queen_loop_config = {
+        **(_queen_graph.loop_config or {}),
+        "hooks": {"session_start": [_persona_hook]},
+    }
+    queen_graph = _queen_graph.model_copy(
+        update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
+    )
+
+    # ---- Queen event loop --------------------------------------------
+    queen_runtime = Runtime(hive_home / "queen")
+
+    async def _queen_loop():
+        try:
+            executor = GraphExecutor(
+                runtime=queen_runtime,
+                llm=session.llm,
+                tools=queen_tools,
+                tool_executor=queen_tool_executor,
+                event_bus=session.event_bus,
+                stream_id="queen",
+                storage_path=queen_dir,
+                loop_config=_queen_loop_config,
+                execution_id=session.id,
+                dynamic_tools_provider=phase_state.get_current_tools,
+                dynamic_prompt_provider=phase_state.get_current_prompt,
+                iteration_metadata_provider=lambda: {"phase": phase_state.phase},
+            )
+            session.queen_executor = executor
+
+            # Wire inject_notification so phase switches notify the queen LLM
+            async def _inject_phase_notification(content: str) -> None:
+                node = executor.node_registry.get("queen")
+                if node is not None and hasattr(node, "inject_event"):
+                    await node.inject_event(content)
+
+            phase_state.inject_notification = _inject_phase_notification
+
+            # Auto-switch to staging when worker execution finishes
+            async def _on_worker_done(event):
+                if event.stream_id == "queen":
+                    return
+                if phase_state.phase == "running":
+                    if event.type == EventType.EXECUTION_COMPLETED:
+                        # Mark worker as configured after first successful run
+                        session.worker_configured = True
+                        output = event.data.get("output", {})
+                        output_summary = ""
+                        if output:
+                            for key, value in output.items():
+                                val_str = str(value)
+                                if len(val_str) > 200:
+                                    val_str = val_str[:200] + "..."
+                                output_summary += f"\n  {key}: {val_str}"
+                        _out = output_summary or " (no output keys set)"
+                        notification = (
+                            "[WORKER_TERMINAL] Worker finished successfully.\n"
+                            f"Output:{_out}\n"
+                            "Report this to the user. "
+                            "Ask if they want to continue with another run."
+                        )
+                    else:  # EXECUTION_FAILED
+                        error = event.data.get("error", "Unknown error")
+                        notification = (
+                            "[WORKER_TERMINAL] Worker failed.\n"
+                            f"Error: {error}\n"
+                            "Report this to the user and help them troubleshoot."
+                        )
+
+                    node = executor.node_registry.get("queen")
+                    if node is not None and hasattr(node, "inject_event"):
+                        await node.inject_event(notification)
+
+                    await phase_state.switch_to_staging(source="auto")
+
+            session.event_bus.subscribe(
+                event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
+                handler=_on_worker_done,
+            )
+            session_manager._subscribe_worker_handoffs(session, executor)
+
+            logger.info(
+                "Queen starting in %s phase with %d tools: %s",
+                phase_state.phase,
+                len(phase_state.get_current_tools()),
+                [t.name for t in phase_state.get_current_tools()],
+            )
+            result = await executor.execute(
+                graph=queen_graph,
+                goal=queen_goal,
+                input_data={"greeting": initial_prompt or "Session started."},
+                session_state={"resume_session_id": session.id},
+            )
+            if result.success:
+                logger.warning("Queen executor returned (should be forever-alive)")
+            else:
+                logger.error(
+                    "Queen executor failed: %s",
+                    result.error or "(no error message)",
+                )
+        except Exception:
+            logger.error("Queen conversation crashed", exc_info=True)
+        finally:
+            session.queen_executor = None
+
+    return asyncio.create_task(_queen_loop())
@@ -103,7 +103,9 @@ async def handle_delete_credential(request: web.Request) -> web.Response:
    if credential_id == "aden_api_key":
        from framework.credentials.key_storage import delete_aden_api_key

-        delete_aden_api_key()
+        deleted = delete_aden_api_key()
+        if not deleted:
+            return web.json_response({"error": "Credential 'aden_api_key' not found"}, status=404)
        return web.json_response({"deleted": True})

    store = _get_store(request)
@@ -178,7 +180,10 @@ async def handle_check_agent(request: web.Request) -> web.Response:
        )
    except Exception as e:
        logger.exception(f"Error checking agent credentials: {e}")
-        return web.json_response({"error": str(e)}, status=500)
+        return web.json_response(
+            {"error": "Internal server error while checking credentials"},
+            status=500,
+        )


 def _status_to_dict(c) -> dict:
@@ -6,7 +6,7 @@ import logging
 from aiohttp import web
 from aiohttp.client_exceptions import ClientConnectionResetError as _AiohttpConnReset

-from framework.runtime.event_bus import EventType
+from framework.runtime.event_bus import AgentEvent, EventType
 from framework.server.app import resolve_session

 logger = logging.getLogger(__name__)
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 DEFAULT_EVENT_TYPES = [
    EventType.CLIENT_OUTPUT_DELTA,
    EventType.CLIENT_INPUT_REQUESTED,
+    EventType.CLIENT_INPUT_RECEIVED,
    EventType.LLM_TEXT_DELTA,
    EventType.TOOL_CALL_STARTED,
    EventType.TOOL_CALL_COMPLETED,
@@ -40,6 +41,13 @@ DEFAULT_EVENT_TYPES = [
    EventType.CREDENTIALS_REQUIRED,
    EventType.SUBAGENT_REPORT,
    EventType.QUEEN_PHASE_CHANGED,
+    EventType.TRIGGER_AVAILABLE,
+    EventType.TRIGGER_ACTIVATED,
+    EventType.TRIGGER_DEACTIVATED,
+    EventType.TRIGGER_FIRED,
+    EventType.TRIGGER_REMOVED,
+    EventType.TRIGGER_UPDATED,
+    EventType.DRAFT_GRAPH_UPDATED,
 ]

 # Keepalive interval in seconds
@@ -89,6 +97,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
        "execution_failed",
        "execution_paused",
        "client_input_requested",
+        "client_input_received",
        "node_loop_iteration",
        "node_loop_started",
        "credentials_required",
@@ -142,6 +151,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
        EventType.CLIENT_OUTPUT_DELTA.value,
        EventType.EXECUTION_STARTED.value,
        EventType.CLIENT_INPUT_REQUESTED.value,
+        EventType.CLIENT_INPUT_RECEIVED.value,
    }
    event_type_values = {et.value for et in event_types}
    replay_types = _REPLAY_TYPES & event_type_values
@@ -156,6 +166,54 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
    if replayed:
        logger.info("SSE replayed %d buffered events for session='%s'", replayed, session.id)

+    # Inject a live-status snapshot so the frontend knows which nodes are
+    # currently running.  This covers the case where the user navigated away
+    # and back — the localStorage snapshot is stale, and the ring-buffer
+    # replay may not include the original node_loop_started events.
+    worker_runtime = getattr(session, "worker_runtime", None)
+    if worker_runtime and getattr(worker_runtime, "is_running", False):
+        try:
+            for stream_info in worker_runtime.get_active_streams():
+                graph_id = stream_info.get("graph_id")
+                stream_id = stream_info.get("stream_id", "default")
+                for exec_id in stream_info.get("active_execution_ids", []):
+                    # Synthesize execution_started so frontend sets workerRunState
+                    synth_exec = AgentEvent(
+                        type=EventType.EXECUTION_STARTED,
+                        stream_id=stream_id,
+                        execution_id=exec_id,
+                        graph_id=graph_id,
+                        data={"synthetic": True},
+                    ).to_dict()
+                    try:
+                        queue.put_nowait(synth_exec)
+                    except asyncio.QueueFull:
+                        pass
+
+                # Find the currently executing node via the executor
+                for _gid, reg in worker_runtime._graphs.items():
+                    if _gid != graph_id:
+                        continue
+                    for _ep_id, stream in reg.streams.items():
+                        for exec_id, executor in stream._active_executors.items():
+                            current = getattr(executor, "current_node_id", None)
+                            if current:
+                                synth_node = AgentEvent(
+                                    type=EventType.NODE_LOOP_STARTED,
+                                    stream_id=stream_id,
+                                    node_id=current,
+                                    execution_id=exec_id,
+                                    graph_id=graph_id,
+                                    data={"synthetic": True},
+                                ).to_dict()
+                                try:
+                                    queue.put_nowait(synth_node)
+                                except asyncio.QueueFull:
+                                    pass
+            logger.info("SSE injected live-status snapshot for session='%s'", session.id)
+        except Exception:
+            logger.debug("Failed to inject live-status snapshot", exc_info=True)
+
    event_count = 0
    close_reason = "unknown"
    try:
@@ -125,6 +125,18 @@ async def handle_chat(request: web.Request) -> web.Response:
        node = queen_executor.node_registry.get("queen")
        if node is not None and hasattr(node, "inject_event"):
            await node.inject_event(message, is_client_input=True)
+            # Publish to EventBus so the session event log captures user messages
+            from framework.runtime.event_bus import AgentEvent, EventType
+
+            await session.event_bus.publish(
+                AgentEvent(
+                    type=EventType.CLIENT_INPUT_RECEIVED,
+                    stream_id="queen",
+                    node_id="queen",
+                    execution_id=session.id,
+                    data={"content": message},
+                )
+            )
            return web.json_response(
                {
                    "status": "queen",
@@ -347,7 +359,7 @@ async def handle_pause(request: web.Request) -> web.Response:

            for exec_id in list(stream.active_execution_ids):
                try:
-                    ok = await stream.cancel_execution(exec_id)
+                    ok = await stream.cancel_execution(exec_id, reason="Execution paused by user")
                    if ok:
                        cancelled.append(exec_id)
                except Exception:
@@ -357,8 +369,8 @@ async def handle_pause(request: web.Request) -> web.Response:
    runtime.pause_timers()

    # Switch to staging (agent still loaded, ready to re-run)
-    if session.mode_state is not None:
-        await session.mode_state.switch_to_staging(source="frontend")
+    if session.phase_state is not None:
+        await session.phase_state.switch_to_staging(source="frontend")

    return web.json_response(
        {
@@ -400,7 +412,9 @@ async def handle_stop(request: web.Request) -> web.Response:
                    if hasattr(node, "cancel_current_turn"):
                        node.cancel_current_turn()

-            cancelled = await stream.cancel_execution(execution_id)
+            cancelled = await stream.cancel_execution(
+                execution_id, reason="Execution stopped by user"
+            )
            if cancelled:
                # Cancel queen's in-progress LLM turn
                if session.queen_executor:
@@ -2,6 +2,7 @@

 import json
 import logging
+import time

 from aiohttp import web

@@ -116,6 +117,20 @@ async def handle_list_nodes(request: web.Request) -> web.Response:
        }
        for ep in reg.entry_points.values()
    ]
+    # Append triggers from triggers.json (stored on session)
+    for t in getattr(session, "available_triggers", {}).values():
+        entry = {
+            "id": t.id,
+            "name": t.description or t.id,
+            "entry_node": graph.entry_node,
+            "trigger_type": t.trigger_type,
+            "trigger_config": t.trigger_config,
+            "task": t.task,
+        }
+        mono = getattr(session, "trigger_next_fire", {}).get(t.id)
+        if mono is not None:
+            entry["next_fire_in"] = max(0.0, mono - time.monotonic())
+        entry_points.append(entry)
    return web.json_response(
        {
            "nodes": nodes,
@@ -234,8 +249,73 @@ async def handle_node_tools(request: web.Request) -> web.Response:
    return web.json_response({"tools": tools_out})


+async def handle_draft_graph(request: web.Request) -> web.Response:
+    """Return the current draft graph from planning phase (if any)."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    phase_state = getattr(session, "phase_state", None)
+    if phase_state is None or phase_state.draft_graph is None:
+        return web.json_response({"draft": None})
+
+    return web.json_response({"draft": phase_state.draft_graph})
+
+
+async def handle_flowchart_map(request: web.Request) -> web.Response:
+    """Return the flowchart→runtime node mapping and the original (pre-dissolution) draft.
+
+    Available after confirm_and_build() dissolves decision nodes, or loaded
+    from the agent's flowchart.json file, or synthesized from the runtime graph.
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    phase_state = getattr(session, "phase_state", None)
+
+    # Fast path: already in memory
+    if phase_state is not None and phase_state.original_draft_graph is not None:
+        return web.json_response(
+            {
+                "map": phase_state.flowchart_map,
+                "original_draft": phase_state.original_draft_graph,
+            }
+        )
+
+    # Try loading from flowchart.json in the agent folder
+    worker_path = getattr(session, "worker_path", None)
+    if worker_path is not None:
+        from pathlib import Path
+
+        target = Path(worker_path) / "flowchart.json"
+        if target.is_file():
+            try:
+                data = json.loads(target.read_text(encoding="utf-8"))
+                original_draft = data.get("original_draft")
+                fmap = data.get("flowchart_map")
+                # Cache in phase_state for future requests
+                if phase_state is not None and original_draft:
+                    phase_state.original_draft_graph = original_draft
+                    phase_state.flowchart_map = fmap
+                return web.json_response(
+                    {
+                        "map": fmap,
+                        "original_draft": original_draft,
+                    }
+                )
+            except Exception:
+                logger.warning("Failed to read flowchart.json from %s", worker_path)
+
+    return web.json_response({"map": None, "original_draft": None})
+
+
 def register_routes(app: web.Application) -> None:
    """Register graph/node inspection routes."""
+    # Draft graph (planning phase — visual only, no loaded worker required)
+    app.router.add_get("/api/sessions/{session_id}/draft-graph", handle_draft_graph)
+    # Flowchart map (post-dissolution — maps runtime nodes to original draft nodes)
+    app.router.add_get("/api/sessions/{session_id}/flowchart-map", handle_flowchart_map)
    # Session-primary routes
    app.router.add_get("/api/sessions/{session_id}/graphs/{graph_id}/nodes", handle_list_nodes)
    app.router.add_get(
@@ -9,8 +9,10 @@ Session-primary routes:
 - DELETE /api/sessions/{session_id}/worker           — unload worker from session
 - GET    /api/sessions/{session_id}/stats            — runtime statistics
 - GET    /api/sessions/{session_id}/entry-points     — list entry points
+- PATCH  /api/sessions/{session_id}/triggers/{id}   — update trigger task
 - GET    /api/sessions/{session_id}/graphs           — list graph IDs
 - GET    /api/sessions/{session_id}/queen-messages   — queen conversation history
+- GET    /api/sessions/{session_id}/events/history  — persisted eventbus log (for replay)

 Worker session browsing (persisted execution runs on disk):
 - GET    /api/sessions/{session_id}/worker-sessions                             — list
@@ -22,6 +24,8 @@ Worker session browsing (persisted execution runs on disk):

 """

+import asyncio
+import contextlib
 import json
 import logging
 import shutil
@@ -31,6 +35,7 @@ from pathlib import Path
 from aiohttp import web

 from framework.server.app import (
+    cold_sessions_dir,
    resolve_session,
    safe_path_segment,
    sessions_dir,
@@ -61,7 +66,9 @@ def _session_to_live_dict(session) -> dict:
        "loaded_at": session.loaded_at,
        "uptime_seconds": round(time.time() - session.loaded_at, 1),
        "intro_message": getattr(session.runner, "intro_message", "") or "",
-        "queen_phase": phase_state.phase if phase_state else "building",
+        "queen_phase": phase_state.phase
+        if phase_state
+        else ("staging" if session.worker_runtime else "planning"),
    }


@@ -140,6 +147,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
            session = await manager.create_session_with_worker(
                agent_path,
                agent_id=agent_id,
+                session_id=session_id,
                model=model,
                initial_prompt=initial_prompt,
                queen_resume_from=queen_resume_from,
@@ -228,6 +236,22 @@ async def handle_get_live_session(request: web.Request) -> web.Response:
            }
            for ep in rt.get_entry_points()
        ]
+        # Append triggers from triggers.json (stored on session)
+        runner = getattr(session, "runner", None)
+        graph_entry = runner.graph.entry_node if runner else ""
+        for t in getattr(session, "available_triggers", {}).values():
+            entry = {
+                "id": t.id,
+                "name": t.description or t.id,
+                "entry_node": graph_entry,
+                "trigger_type": t.trigger_type,
+                "trigger_config": t.trigger_config,
+                "task": t.task,
+            }
+            mono = getattr(session, "trigger_next_fire", {}).get(t.id)
+            if mono is not None:
+                entry["next_fire_in"] = max(0.0, mono - time.monotonic())
+            data["entry_points"].append(entry)
        data["graphs"] = session.worker_runtime.list_graphs()

    return web.json_response(data)
@@ -351,23 +375,190 @@ async def handle_session_entry_points(request: web.Request) -> web.Response:

    rt = session.worker_runtime
    eps = rt.get_entry_points() if rt else []
+    entry_points = [
+        {
+            "id": ep.id,
+            "name": ep.name,
+            "entry_node": ep.entry_node,
+            "trigger_type": ep.trigger_type,
+            "trigger_config": ep.trigger_config,
+            **(
+                {"next_fire_in": nf}
+                if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
+                else {}
+            ),
+        }
+        for ep in eps
+    ]
+    # Append triggers from triggers.json (stored on session)
+    runner = getattr(session, "runner", None)
+    graph_entry = runner.graph.entry_node if runner else ""
+    for t in getattr(session, "available_triggers", {}).values():
+        entry = {
+            "id": t.id,
+            "name": t.description or t.id,
+            "entry_node": graph_entry,
+            "trigger_type": t.trigger_type,
+            "trigger_config": t.trigger_config,
+            "task": t.task,
+        }
+        mono = getattr(session, "trigger_next_fire", {}).get(t.id)
+        if mono is not None:
+            entry["next_fire_in"] = max(0.0, mono - time.monotonic())
+        entry_points.append(entry)
+    return web.json_response({"entry_points": entry_points})
+
+
+async def handle_update_trigger_task(request: web.Request) -> web.Response:
+    """PATCH /api/sessions/{session_id}/triggers/{trigger_id} — update trigger fields."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    trigger_id = request.match_info["trigger_id"]
+    available = getattr(session, "available_triggers", {})
+    tdef = available.get(trigger_id)
+    if tdef is None:
+        return web.json_response(
+            {"error": f"Trigger '{trigger_id}' not found"},
+            status=404,
+        )
+
+    try:
+        body = await request.json()
+    except Exception:
+        return web.json_response({"error": "Invalid JSON body"}, status=400)
+
+    updates: dict[str, object] = {}
+
+    if "task" in body:
+        task = body.get("task")
+        if not isinstance(task, str):
+            return web.json_response({"error": "'task' must be a string"}, status=400)
+        tdef.task = task
+        updates["task"] = tdef.task
+
+    trigger_config_update = body.get("trigger_config")
+    if trigger_config_update is not None:
+        if not isinstance(trigger_config_update, dict):
+            return web.json_response(
+                {"error": "'trigger_config' must be an object"},
+                status=400,
+            )
+        merged_trigger_config = dict(tdef.trigger_config)
+        merged_trigger_config.update(trigger_config_update)
+
+        if tdef.trigger_type == "timer":
+            cron_expr = merged_trigger_config.get("cron")
+            interval = merged_trigger_config.get("interval_minutes")
+            if cron_expr is not None and not isinstance(cron_expr, str):
+                return web.json_response(
+                    {"error": "'trigger_config.cron' must be a string"},
+                    status=400,
+                )
+            if cron_expr:
+                try:
+                    from croniter import croniter
+
+                    if not croniter.is_valid(cron_expr):
+                        return web.json_response(
+                            {"error": f"Invalid cron expression: {cron_expr}"},
+                            status=400,
+                        )
+                except ImportError:
+                    return web.json_response(
+                        {
+                            "error": (
+                                "croniter package not installed — cannot validate cron expression."
+                            )
+                        },
+                        status=500,
+                    )
+                merged_trigger_config.pop("interval_minutes", None)
+            elif interval is None:
+                return web.json_response(
+                    {
+                        "error": (
+                            "Timer trigger needs 'cron' or 'interval_minutes' in trigger_config."
+                        )
+                    },
+                    status=400,
+                )
+            elif not isinstance(interval, (int, float)) or interval <= 0:
+                return web.json_response(
+                    {"error": "'trigger_config.interval_minutes' must be > 0"},
+                    status=400,
+                )
+        tdef.trigger_config = merged_trigger_config
+        updates["trigger_config"] = tdef.trigger_config
+
+    if not updates:
+        return web.json_response(
+            {"error": "Provide at least one of 'task' or 'trigger_config'"},
+            status=400,
+        )
+
+    # Persist to session state and agent definition
+    from framework.tools.queen_lifecycle_tools import (
+        _persist_active_triggers,
+        _save_trigger_to_agent,
+        _start_trigger_timer,
+        _start_trigger_webhook,
+    )
+
+    if "trigger_config" in updates and trigger_id in getattr(session, "active_trigger_ids", set()):
+        task = session.active_timer_tasks.pop(trigger_id, None)
+        if task and not task.done():
+            task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await task
+        getattr(session, "trigger_next_fire", {}).pop(trigger_id, None)
+
+        webhook_subs = getattr(session, "active_webhook_subs", {})
+        if sub_id := webhook_subs.pop(trigger_id, None):
+            with contextlib.suppress(Exception):
+                session.event_bus.unsubscribe(sub_id)
+
+        if tdef.trigger_type == "timer":
+            await _start_trigger_timer(session, trigger_id, tdef)
+        elif tdef.trigger_type == "webhook":
+            await _start_trigger_webhook(session, trigger_id, tdef)
+
+    if trigger_id in getattr(session, "active_trigger_ids", set()):
+        session_id = request.match_info["session_id"]
+        await _persist_active_triggers(session, session_id)
+
+    _save_trigger_to_agent(session, trigger_id, tdef)
+
+    # Emit SSE event so the frontend updates the graph and detail panel
+    bus = getattr(session, "event_bus", None)
+    if bus:
+        from framework.runtime.event_bus import AgentEvent, EventType
+
+        await bus.publish(
+            AgentEvent(
+                type=EventType.TRIGGER_UPDATED,
+                stream_id="queen",
+                data={
+                    "trigger_id": trigger_id,
+                    "task": tdef.task,
+                    "trigger_config": tdef.trigger_config,
+                    "trigger_type": tdef.trigger_type,
+                    "name": tdef.description or trigger_id,
+                    "entry_node": getattr(
+                        getattr(getattr(session, "runner", None), "graph", None),
+                        "entry_node",
+                        None,
+                    ),
+                },
+            )
+        )
+
    return web.json_response(
        {
-            "entry_points": [
-                {
-                    "id": ep.id,
-                    "name": ep.name,
-                    "entry_node": ep.entry_node,
-                    "trigger_type": ep.trigger_type,
-                    "trigger_config": ep.trigger_config,
-                    **(
-                        {"next_fire_in": nf}
-                        if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
-                        else {}
-                    ),
-                }
-                for ep in eps
-            ]
+            "trigger_id": trigger_id,
+            "task": tdef.task,
+            "trigger_config": tdef.trigger_config,
        }
    )

@@ -397,23 +588,28 @@ async def handle_list_worker_sessions(request: web.Request) -> web.Response:
    """List worker sessions on disk."""
    session, err = resolve_session(request)
    if err:
-        return err
-
-    if not session.worker_path:
-        return web.json_response({"sessions": []})
-
-    sess_dir = sessions_dir(session)
+        # Fall back to cold session lookup from disk
+        sid = request.match_info["session_id"]
+        sess_dir = cold_sessions_dir(sid)
+        if sess_dir is None:
+            return err
+    else:
+        if not session.worker_path:
+            return web.json_response({"sessions": []})
+        sess_dir = sessions_dir(session)
    if not sess_dir.exists():
        return web.json_response({"sessions": []})

    sessions = []
    for d in sorted(sess_dir.iterdir(), reverse=True):
-        if not d.is_dir() or not d.name.startswith("session_"):
+        if not d.is_dir():
+            continue
+        state_path = d / "state.json"
+        if not d.name.startswith("session_") and not state_path.exists():
            continue

        entry: dict = {"session_id": d.name}

-        state_path = d / "state.json"
        if state_path.exists():
            try:
                state = json.loads(state_path.read_text(encoding="utf-8"))
@@ -564,48 +760,85 @@ async def handle_messages(request: web.Request) -> web.Response:
    """Get messages for a worker session."""
    session, err = resolve_session(request)
    if err:
-        return err
-
-    if not session.worker_path:
-        return web.json_response({"error": "No worker loaded"}, status=503)
+        # Fall back to cold session lookup from disk
+        sid = request.match_info["session_id"]
+        sess_dir = cold_sessions_dir(sid)
+        if sess_dir is None:
+            return err
+    else:
+        if not session.worker_path:
+            return web.json_response({"error": "No worker loaded"}, status=503)
+        sess_dir = sessions_dir(session)

    ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
    ws_id = safe_path_segment(ws_id)

-    convs_dir = sessions_dir(session) / ws_id / "conversations"
+    convs_dir = sess_dir / ws_id / "conversations"
    if not convs_dir.exists():
        return web.json_response({"messages": []})

    filter_node = request.query.get("node_id")
    all_messages = []

-    for node_dir in convs_dir.iterdir():
-        if not node_dir.is_dir():
-            continue
-        if filter_node and node_dir.name != filter_node:
-            continue
-
-        parts_dir = node_dir / "parts"
+    def _collect_msg_parts(parts_dir: Path, node_id: str) -> None:
        if not parts_dir.exists():
-            continue
-
+            return
        for part_file in sorted(parts_dir.iterdir()):
            if part_file.suffix != ".json":
                continue
            try:
                part = json.loads(part_file.read_text(encoding="utf-8"))
-                part["_node_id"] = node_dir.name
+                part["_node_id"] = node_id
                part.setdefault("created_at", part_file.stat().st_mtime)
                all_messages.append(part)
            except (json.JSONDecodeError, OSError):
                continue

+    # Flat layout: conversations/parts/*.json
+    if not filter_node:
+        _collect_msg_parts(convs_dir / "parts", "worker")
+
+    # Node-based layout: conversations/<node_id>/parts/*.json
+    for node_dir in convs_dir.iterdir():
+        if not node_dir.is_dir() or node_dir.name == "parts":
+            continue
+        if filter_node and node_dir.name != filter_node:
+            continue
+        _collect_msg_parts(node_dir / "parts", node_dir.name)
+
+    # Merge run lifecycle markers from runs.jsonl (for historical dividers)
+    runs_file = sess_dir / ws_id / "runs.jsonl"
+    if runs_file.exists():
+        try:
+            for line in runs_file.read_text(encoding="utf-8").splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    record = json.loads(line)
+                    all_messages.append(
+                        {
+                            "seq": -1,
+                            "role": "system",
+                            "content": "",
+                            "_node_id": "_run_marker",
+                            "is_run_marker": True,
+                            "run_id": record.get("run_id"),
+                            "run_event": record.get("event"),
+                            "created_at": record.get("created_at", 0),
+                        }
+                    )
+                except json.JSONDecodeError:
+                    continue
+        except OSError:
+            pass
+
    all_messages.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))

    client_only = request.query.get("client_only", "").lower() in ("true", "1")
    if client_only:
        client_facing_nodes: set[str] = set()
-        if session.runner and hasattr(session.runner, "graph"):
+        if session and session.runner and hasattr(session.runner, "graph"):
            for node in session.runner.graph.nodes:
                if node.client_facing:
                    client_facing_nodes.add(node.id)
@@ -614,12 +847,15 @@ async def handle_messages(request: web.Request) -> web.Response:
            all_messages = [
                m
                for m in all_messages
-                if not m.get("is_transition_marker")
-                and m["role"] != "tool"
-                and not (m["role"] == "assistant" and m.get("tool_calls"))
-                and (
-                    (m["role"] == "user" and m.get("is_client_input"))
-                    or (m["role"] == "assistant" and m.get("_node_id") in client_facing_nodes)
+                if m.get("is_run_marker")
+                or (
+                    not m.get("is_transition_marker")
+                    and m["role"] != "tool"
+                    and not (m["role"] == "assistant" and m.get("tool_calls"))
+                    and (
+                        (m["role"] == "user" and m.get("is_client_input"))
+                        or (m["role"] == "assistant" and m.get("_node_id") in client_facing_nodes)
+                    )
                )
            ]

@@ -640,18 +876,16 @@ async def handle_queen_messages(request: web.Request) -> web.Response:
        return web.json_response({"messages": [], "session_id": session_id})

    all_messages: list[dict] = []
-    for node_dir in convs_dir.iterdir():
-        if not node_dir.is_dir():
-            continue
-        parts_dir = node_dir / "parts"
+
+    def _read_parts(parts_dir: Path, node_id: str) -> None:
        if not parts_dir.exists():
-            continue
+            return
        for part_file in sorted(parts_dir.iterdir()):
            if part_file.suffix != ".json":
                continue
            try:
                part = json.loads(part_file.read_text(encoding="utf-8"))
-                part["_node_id"] = node_dir.name
+                part["_node_id"] = node_id
                # Use file mtime as created_at so frontend can order
                # queen and worker messages chronologically.
                part.setdefault("created_at", part_file.stat().st_mtime)
@@ -659,6 +893,15 @@ async def handle_queen_messages(request: web.Request) -> web.Response:
            except (json.JSONDecodeError, OSError):
                continue

+    # Flat layout: conversations/parts/*.json
+    _read_parts(convs_dir / "parts", "queen")
+
+    # Node-based layout: conversations/<node_id>/parts/*.json
+    for node_dir in convs_dir.iterdir():
+        if not node_dir.is_dir() or node_dir.name == "parts":
+            continue
+        _read_parts(node_dir / "parts", node_dir.name)
+
    all_messages.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))

    # Filter to client-facing messages only
@@ -673,6 +916,38 @@ async def handle_queen_messages(request: web.Request) -> web.Response:
    return web.json_response({"messages": all_messages, "session_id": session_id})


+async def handle_session_events_history(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/events/history — persisted eventbus log.
+
+    Reads ``events.jsonl`` from the session directory on disk so it works for
+    both live sessions and cold (post-server-restart) sessions.  The frontend
+    replays these events through ``sseEventToChatMessage`` to fully reconstruct
+    the UI state on resume.
+    """
+    session_id = request.match_info["session_id"]
+
+    queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+    events_path = queen_dir / "events.jsonl"
+    if not events_path.exists():
+        return web.json_response({"events": [], "session_id": session_id})
+
+    events: list[dict] = []
+    try:
+        with open(events_path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    events.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+    except OSError:
+        return web.json_response({"events": [], "session_id": session_id})
+
+    return web.json_response({"events": events, "session_id": session_id})
+
+
 async def handle_session_history(request: web.Request) -> web.Response:
    """GET /api/sessions/history — all queen sessions on disk (live + cold).

@@ -731,7 +1006,7 @@ async def handle_delete_history_session(request: web.Request) -> web.Response:

 async def handle_discover(request: web.Request) -> web.Response:
    """GET /api/discover — discover agents from filesystem."""
-    from framework.tui.screens.agent_picker import discover_agents
+    from framework.agents.discovery import discover_agents

    manager = _get_manager(request)
    loaded_paths = {str(s.worker_path) for s in manager.list_sessions() if s.worker_path}
@@ -746,6 +1021,7 @@ async def handle_discover(request: web.Request) -> web.Response:
                "description": entry.description,
                "category": entry.category,
                "session_count": entry.session_count,
+                "run_count": entry.run_count,
                "node_count": entry.node_count,
                "tool_count": entry.tool_count,
                "tags": entry.tags,
@@ -783,8 +1059,12 @@ def register_routes(app: web.Application) -> None:
    # Session info
    app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats)
    app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points)
+    app.router.add_patch(
+        "/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task
+    )
    app.router.add_get("/api/sessions/{session_id}/graphs", handle_session_graphs)
    app.router.add_get("/api/sessions/{session_id}/queen-messages", handle_queen_messages)
+    app.router.add_get("/api/sessions/{session_id}/events/history", handle_session_events_history)

    # Worker session browsing (session-primary)
    app.router.add_get("/api/sessions/{session_id}/worker-sessions", handle_list_worker_sessions)
@@ -7,7 +7,6 @@ Architecture:
 - Session owns EventBus + LLM, shared with queen and worker
 - Queen is always present once a session starts
 - Worker is optional — loaded into an existing session
- Judge is active only when a worker is loaded
 """

 import asyncio
@@ -15,11 +14,13 @@ import json
 import logging
 import time
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from typing import Any

+from framework.runtime.triggers import TriggerDefinition
+
 logger = logging.getLogger(__name__)


@@ -42,10 +43,23 @@ class Session:
    worker_info: Any | None = None  # AgentInfo
    # Queen phase state (building/staging/running)
    phase_state: Any = None  # QueenPhaseState
-    # Judge (active when worker is loaded)
-    judge_task: asyncio.Task | None = None
-    escalation_sub: str | None = None
+    # Worker handoff subscription
    worker_handoff_sub: str | None = None
+    # Memory consolidation subscription (fires on CONTEXT_COMPACTED)
+    memory_consolidation_sub: str | None = None
+    # Trigger definitions loaded from agent's triggers.json (available but inactive)
+    available_triggers: dict[str, TriggerDefinition] = field(default_factory=dict)
+    # Active trigger tracking (IDs currently firing + their asyncio tasks)
+    active_trigger_ids: set[str] = field(default_factory=set)
+    active_timer_tasks: dict[str, asyncio.Task] = field(default_factory=dict)
+    # Queen-owned webhook server (lazy singleton, created on first webhook trigger activation)
+    queen_webhook_server: Any = None
+    # EventBus subscription IDs for active webhook triggers (trigger_id -> sub_id)
+    active_webhook_subs: dict[str, str] = field(default_factory=dict)
+    # True after first successful worker execution (gates trigger delivery)
+    worker_configured: bool = False
+    # Monotonic timestamps for next trigger fire (mirrors AgentRuntime._timer_next_fire)
+    trigger_next_fire: dict[str, float] = field(default_factory=dict)
    # Session directory resumption:
    # When set, _start_queen writes queen conversations to this existing session's
    # directory instead of creating a new one.  This lets cold-restores accumulate
@@ -128,7 +142,9 @@ class SessionManager:
        to that existing session's directory instead of creating a new one.
        This preserves full conversation history across server restarts.
        """
-        session = await self._create_session_core(session_id=session_id, model=model)
+        # Reuse the original session ID when cold-restoring
+        resolved_session_id = queen_resume_from or session_id
+        session = await self._create_session_core(session_id=resolved_session_id, model=model)
        session.queen_resume_from = queen_resume_from

        # Start queen immediately (queen-only, no worker tools yet)
@@ -145,22 +161,28 @@ class SessionManager:
        self,
        agent_path: str | Path,
        agent_id: str | None = None,
+        session_id: str | None = None,
        model: str | None = None,
        initial_prompt: str | None = None,
        queen_resume_from: str | None = None,
    ) -> Session:
        """Create a session and load a worker in one step.

-        When ``queen_resume_from`` is set the queen writes conversation messages
-        to that existing session's directory instead of creating a new one.
+        When ``queen_resume_from`` is set the session reuses the original session
+        ID so the frontend sees a single continuous session.  The queen writes
+        conversation messages to that existing directory, preserving full history.
        """
        from framework.tools.queen_lifecycle_tools import build_worker_profile

        agent_path = Path(agent_path)
        resolved_worker_id = agent_id or agent_path.name

-        # Auto-generate session ID (not the agent name)
-        session = await self._create_session_core(model=model)
+        # Reuse the original session ID when cold-restoring so the frontend
+        # sees one continuous session instead of a new one each time.
+        session = await self._create_session_core(
+            session_id=queen_resume_from,
+            model=model,
+        )
        session.queen_resume_from = queen_resume_from
        try:
            # Load worker FIRST (before queen) so queen gets full tools
@@ -200,8 +222,8 @@ class SessionManager:
    ) -> None:
        """Load a worker agent into a session (core logic).

-        Sets up the runner, runtime, and session fields. Does NOT start the
-        judge or notify the queen — callers handle those steps.
+        Sets up the runner, runtime, and session fields. Does NOT notify
+        the queen — callers handle that step.
        """
        from framework.runner import AgentRunner

@@ -240,6 +262,25 @@ class SessionManager:

            runtime = runner._agent_runtime

+            # Load triggers from the agent's triggers.json definition file.
+            from framework.tools.queen_lifecycle_tools import _read_agent_triggers_json
+
+            for tdata in _read_agent_triggers_json(agent_path):
+                tid = tdata.get("id", "")
+                ttype = tdata.get("trigger_type", "")
+                if tid and ttype in ("timer", "webhook"):
+                    session.available_triggers[tid] = TriggerDefinition(
+                        id=tid,
+                        trigger_type=ttype,
+                        trigger_config=tdata.get("trigger_config", {}),
+                        description=tdata.get("name", tid),
+                        task=tdata.get("task", ""),
+                    )
+                    logger.info("Loaded trigger '%s' (%s) from triggers.json", tid, ttype)
+
+            if session.available_triggers:
+                await self._emit_trigger_events(session, "available", session.available_triggers)
+
            # Start runtime on event loop
            if runtime and not runtime.is_running:
                await runtime.start()
@@ -276,11 +317,20 @@ class SessionManager:
        When a new runtime starts, any on-disk session still marked 'active'
        is from a process that no longer exists. 'Paused' sessions are left
        intact so they remain resumable.
+
+        Two-layer protection against corrupting live sessions:
+        1. In-memory: skip any session ID currently tracked in self._sessions
+           (guaranteed alive in this process).
+        2. PID validation: if state.json contains a ``pid`` field, check whether
+           that process is still running on the host. If it is, the session is
+           owned by another healthy worker process, so leave it alone.
        """
        sessions_path = Path.home() / ".hive" / "agents" / agent_path.name / "sessions"
        if not sessions_path.exists():
            return

+        live_session_ids = set(self._sessions.keys())
+
        for d in sessions_path.iterdir():
            if not d.is_dir() or not d.name.startswith("session_"):
                continue
@@ -291,6 +341,26 @@ class SessionManager:
                state = json.loads(state_path.read_text(encoding="utf-8"))
                if state.get("status") != "active":
                    continue
+
+                # Layer 1: skip sessions that are alive in this process
+                session_id = state.get("session_id", d.name)
+                if session_id in live_session_ids or d.name in live_session_ids:
+                    logger.debug(
+                        "Skipping live in-memory session '%s' during stale cleanup",
+                        d.name,
+                    )
+                    continue
+
+                # Layer 2: skip sessions whose owning process is still alive
+                recorded_pid = state.get("pid")
+                if recorded_pid is not None and self._is_pid_alive(recorded_pid):
+                    logger.debug(
+                        "Skipping session '%s' — owning process %d is still running",
+                        d.name,
+                        recorded_pid,
+                    )
+                    continue
+
                state["status"] = "cancelled"
                state.setdefault("result", {})["error"] = "Stale session: runtime restarted"
                state.setdefault("timestamps", {})["updated_at"] = datetime.now().isoformat()
@@ -301,6 +371,34 @@ class SessionManager:
            except (json.JSONDecodeError, OSError) as e:
                logger.warning("Failed to clean up stale session %s: %s", d.name, e)

+    @staticmethod
+    def _is_pid_alive(pid: int) -> bool:
+        """Check whether a process with the given PID is still running."""
+        import os
+        import platform
+
+        if platform.system() == "Windows":
+            import ctypes
+
+            # PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
+            kernel32 = ctypes.windll.kernel32
+            handle = kernel32.OpenProcess(0x1000, False, pid)
+            if not handle:
+                # 5 is ERROR_ACCESS_DENIED, meaning the process exists but is protected
+                return kernel32.GetLastError() == 5
+
+            exit_code = ctypes.c_ulong()
+            kernel32.GetExitCodeProcess(handle, ctypes.byref(exit_code))
+            kernel32.CloseHandle(handle)
+            # 259 is STILL_ACTIVE
+            return exit_code.value == 259
+        else:
+            try:
+                os.kill(pid, 0)
+            except OSError:
+                return False
+            return True
+
    async def load_worker(
        self,
        session_id: str,
@@ -310,7 +408,7 @@ class SessionManager:
    ) -> Session:
        """Load a worker agent into an existing session (with running queen).

-        Starts the worker runtime, health judge, and notifies the queen.
+        Starts the worker runtime and notifies the queen.
        """
        agent_path = Path(agent_path)

@@ -325,12 +423,69 @@ class SessionManager:
            model=model,
        )

-        # Notify queen about the loaded worker (skip for hive_coder itself).
-        # Health judge disabled for simplicity.
-        if agent_path.name != "hive_coder" and session.worker_runtime:
-            # await self._start_judge(session, session.runner._storage_path)
+        # Notify queen about the loaded worker (skip for queen itself).
+        if agent_path.name != "queen" and session.worker_runtime:
            await self._notify_queen_worker_loaded(session)

+        # Update meta.json so cold-restore can discover this session by agent_path
+        storage_session_id = session.queen_resume_from or session.id
+        meta_path = Path.home() / ".hive" / "queen" / "session" / storage_session_id / "meta.json"
+        try:
+            _agent_name = (
+                session.worker_info.name
+                if session.worker_info
+                else str(agent_path.name).replace("_", " ").title()
+            )
+            existing_meta = {}
+            if meta_path.exists():
+                existing_meta = json.loads(meta_path.read_text(encoding="utf-8"))
+            existing_meta["agent_name"] = _agent_name
+            existing_meta["agent_path"] = (
+                str(session.worker_path) if session.worker_path else str(agent_path)
+            )
+            meta_path.write_text(json.dumps(existing_meta), encoding="utf-8")
+        except OSError:
+            pass
+
+        # Restore previously active triggers from persisted session state
+        if session.available_triggers and session.worker_runtime:
+            try:
+                store = session.worker_runtime._session_store
+                state = await store.read_state(session_id)
+                if state and state.active_triggers:
+                    from framework.tools.queen_lifecycle_tools import (
+                        _start_trigger_timer,
+                        _start_trigger_webhook,
+                    )
+
+                    saved_tasks = getattr(state, "trigger_tasks", {}) or {}
+                    for tid in state.active_triggers:
+                        tdef = session.available_triggers.get(tid)
+                        if tdef:
+                            # Restore user-configured task override
+                            saved_task = saved_tasks.get(tid, "")
+                            if saved_task:
+                                tdef.task = saved_task
+                            tdef.active = True
+                            session.active_trigger_ids.add(tid)
+                            if tdef.trigger_type == "timer":
+                                await _start_trigger_timer(session, tid, tdef)
+                                logger.info("Restored trigger timer '%s'", tid)
+                            elif tdef.trigger_type == "webhook":
+                                await _start_trigger_webhook(session, tid, tdef)
+                                logger.info("Restored webhook trigger '%s'", tid)
+                        else:
+                            logger.warning(
+                                "Saved trigger '%s' not found in worker entry points, skipping",
+                                tid,
+                            )
+
+                # Restore worker_configured flag
+                if state and getattr(state, "worker_configured", False):
+                    session.worker_configured = True
+            except Exception as e:
+                logger.warning("Failed to restore active triggers: %s", e)
+
        # Emit SSE event so the frontend can update UI
        await self._emit_worker_loaded(session)

@@ -344,9 +499,6 @@ class SessionManager:
        if session.worker_runtime is None:
            return False

-        # Stop judge + escalation
-        self._stop_judge(session)
-
        # Cleanup worker
        if session.runner:
            try:
@@ -354,6 +506,26 @@ class SessionManager:
            except Exception as e:
                logger.error("Error cleaning up worker '%s': %s", session.worker_id, e)

+        # Cancel active trigger timers
+        for tid, task in session.active_timer_tasks.items():
+            task.cancel()
+            logger.info("Cancelled trigger timer '%s' on unload", tid)
+        session.active_timer_tasks.clear()
+
+        # Unsubscribe webhook handlers (server stays alive — queen-owned)
+        for sub_id in session.active_webhook_subs.values():
+            try:
+                session.event_bus.unsubscribe(sub_id)
+            except Exception:
+                pass
+        session.active_webhook_subs.clear()
+        session.active_trigger_ids.clear()
+
+        # Clean up triggers
+        if session.available_triggers:
+            await self._emit_trigger_events(session, "removed", session.available_triggers)
+            session.available_triggers.clear()
+
        worker_id = session.worker_id
        session.worker_id = None
        session.worker_path = None
@@ -379,8 +551,11 @@ class SessionManager:
        if session is None:
            return False

-        # Stop judge
-        self._stop_judge(session)
+        # Capture session data for memory consolidation before teardown
+        _llm = getattr(session, "llm", None)
+        _storage_id = getattr(session, "queen_resume_from", None) or session_id
+        _session_dir = Path.home() / ".hive" / "queen" / "session" / _storage_id
+
        if session.worker_handoff_sub is not None:
            try:
                session.event_bus.unsubscribe(session.worker_handoff_sub)
@@ -388,12 +563,37 @@ class SessionManager:
                pass
            session.worker_handoff_sub = None

-        # Stop queen
+        # Stop queen and memory consolidation subscription
+        if session.memory_consolidation_sub is not None:
+            try:
+                session.event_bus.unsubscribe(session.memory_consolidation_sub)
+            except Exception:
+                pass
+            session.memory_consolidation_sub = None
        if session.queen_task is not None:
            session.queen_task.cancel()
            session.queen_task = None
        session.queen_executor = None

+        # Cancel active trigger timers
+        for task in session.active_timer_tasks.values():
+            task.cancel()
+        session.active_timer_tasks.clear()
+
+        # Unsubscribe webhook handlers and stop queen webhook server
+        for sub_id in session.active_webhook_subs.values():
+            try:
+                session.event_bus.unsubscribe(sub_id)
+            except Exception:
+                pass
+        session.active_webhook_subs.clear()
+        if session.queen_webhook_server is not None:
+            try:
+                await session.queen_webhook_server.stop()
+            except Exception:
+                logger.error("Error stopping queen webhook server", exc_info=True)
+            session.queen_webhook_server = None
+
        # Cleanup worker
        if session.runner:
            try:
@@ -401,6 +601,20 @@ class SessionManager:
            except Exception as e:
                logger.error("Error cleaning up worker: %s", e)

+        # Final memory consolidation — fire-and-forget so teardown isn't blocked.
+        if _llm is not None and _session_dir.exists():
+            import asyncio
+
+            from framework.agents.queen.queen_memory import consolidate_queen_memory
+
+            asyncio.create_task(
+                consolidate_queen_memory(session_id, _session_dir, _llm),
+                name=f"queen-memory-consolidation-{session_id}",
+            )
+
+        # Close per-session event log
+        session.event_bus.close_session_log()
+
        logger.info("Session '%s' stopped", session_id)
        return True

@@ -410,7 +624,7 @@ class SessionManager:

    async def _handle_worker_handoff(self, session: Session, executor: Any, event: Any) -> None:
        """Route worker escalation events into the queen conversation."""
-        if event.stream_id in ("queen", "judge"):
+        if event.stream_id == "queen":
            return

        reason = str(event.data.get("reason", "")).strip()
@@ -461,13 +675,7 @@ class SessionManager:
        are written to the ORIGINAL session's directory so the full conversation
        history accumulates in one place across server restarts.
        """
-        from framework.agents.hive_coder.agent import (
-            queen_goal,
-            queen_graph as _queen_graph,
-        )
-        from framework.graph.executor import GraphExecutor
-        from framework.runner.tool_registry import ToolRegistry
-        from framework.runtime.core import Runtime
+        from framework.server.queen_orchestrator import create_queen

        hive_home = Path.home() / ".hive"

@@ -505,391 +713,81 @@ class SessionManager:
        except OSError:
            pass

-        # Register MCP coding tools
-        queen_registry = ToolRegistry()
-        import framework.agents.hive_coder as _hive_coder_pkg
-
-        hive_coder_dir = Path(_hive_coder_pkg.__file__).parent
-        mcp_config = hive_coder_dir / "mcp_servers.json"
-        if mcp_config.exists():
-            try:
-                queen_registry.load_mcp_config(mcp_config)
-                logger.info("Queen: loaded MCP tools from %s", mcp_config)
-            except Exception:
-                logger.warning("Queen: MCP config failed to load", exc_info=True)
-
-        # Phase state for building/running phase switching
-        from framework.tools.queen_lifecycle_tools import (
-            QueenPhaseState,
-            register_queen_lifecycle_tools,
-        )
-
-        # Start in staging when the caller provided an agent, building otherwise.
-        initial_phase = "staging" if worker_identity else "building"
-        phase_state = QueenPhaseState(phase=initial_phase, event_bus=session.event_bus)
-        session.phase_state = phase_state
-
-        # Always register lifecycle tools — they check session.worker_runtime
-        # at call time, so they work even if no worker is loaded yet.
-        register_queen_lifecycle_tools(
-            queen_registry,
-            session=session,
-            session_id=session.id,
-            session_manager=self,
-            manager_session_id=session.id,
-            phase_state=phase_state,
-        )
-
-        # Monitoring tools need concrete worker paths — only register when present
-        if session.worker_runtime:
-            from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
-
-            register_worker_monitoring_tools(
-                queen_registry,
-                session.event_bus,
-                session.worker_path,
-                stream_id="queen",
-                worker_graph_id=session.worker_runtime._graph_id,
-            )
-
-        queen_tools = list(queen_registry.get_tools().values())
-        queen_tool_executor = queen_registry.get_executor()
-
-        # Partition tools into phase-specific sets and import prompt segments
-        from framework.agents.hive_coder.nodes import (
-            _QUEEN_BUILDING_TOOLS,
-            _QUEEN_RUNNING_TOOLS,
-            _QUEEN_STAGING_TOOLS,
-            _package_builder_knowledge,
-            _appendices,
-            _queen_behavior_always,
-            _queen_behavior_building,
-            _queen_behavior_running,
-            _queen_behavior_staging,
-            _queen_identity_building,
-            _queen_identity_running,
-            _queen_identity_staging,
-            _queen_phase_7,
-            _queen_style,
-            _queen_tools_building,
-            _queen_tools_running,
-            _queen_tools_staging,
-        )
-
-        building_names = set(_QUEEN_BUILDING_TOOLS)
-        staging_names = set(_QUEEN_STAGING_TOOLS)
-        running_names = set(_QUEEN_RUNNING_TOOLS)
-
-        registered_names = {t.name for t in queen_tools}
-        missing_building = building_names - registered_names
-        if missing_building:
-            logger.warning(
-                "Queen: %d/%d building tools NOT registered: %s",
-                len(missing_building),
-                len(building_names),
-                sorted(missing_building),
-            )
-        logger.info("Queen: registered tools: %s", sorted(registered_names))
-
-        phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
-        phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
-        phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
-
-        # Build queen graph with adjusted prompt + tools
-        _orig_node = _queen_graph.nodes[0]
-
-        if worker_identity is None:
-            worker_identity = (
-                "\n\n# Worker Profile\n"
-                "No worker agent loaded. You are operating independently.\n"
-                "Handle all tasks directly using your coding tools."
-            )
-
-        # Compose phase-specific prompts.
-        _building_body = (
-            _queen_style
-            + _queen_tools_building
-            + _queen_behavior_always
-            + _queen_behavior_building
-            + _package_builder_knowledge
-            + _queen_phase_7
-            + _appendices
-            + worker_identity
-        )
-        phase_state.prompt_building = _queen_identity_building + _building_body
-        phase_state.prompt_staging = (
-            _queen_identity_staging
-            + _queen_style
-            + _queen_tools_staging
-            + _queen_behavior_always
-            + _queen_behavior_staging
-            + worker_identity
-        )
-        phase_state.prompt_running = (
-            _queen_identity_running
-            + _queen_style
-            + _queen_tools_running
-            + _queen_behavior_always
-            + _queen_behavior_running
-            + worker_identity
-        )
-
-        # Build the session_start hook: selects the best-fit expert persona
-        # from the user's opening message and replaces the identity prefix.
-        from framework.agents.hive_coder.nodes.thinking_hook import select_expert_persona
-        from framework.graph.event_loop_node import HookContext, HookResult
-        from framework.runtime.event_bus import AgentEvent, EventType
-
-        _session_llm = session.llm
-        _session_event_bus = session.event_bus
-
-        async def _persona_hook(ctx: HookContext) -> HookResult | None:
-            persona = await select_expert_persona(ctx.trigger or "", _session_llm)
-            if not persona:
-                return None
-            if _session_event_bus is not None:
-                await _session_event_bus.publish(
-                    AgentEvent(
-                        type=EventType.QUEEN_PERSONA_SELECTED,
-                        stream_id="queen",
-                        data={"persona": persona},
-                    )
-                )
-            return HookResult(system_prompt=persona + "\n\n" + _building_body)
-
-        initial_prompt_text = phase_state.get_current_prompt()
-
-        registered_tool_names = set(queen_registry.get_tools().keys())
-        declared_tools = _orig_node.tools or []
-        available_tools = [t for t in declared_tools if t in registered_tool_names]
-
-        node_updates: dict = {
-            "system_prompt": initial_prompt_text,
-        }
-        if set(available_tools) != set(declared_tools):
-            missing = sorted(set(declared_tools) - registered_tool_names)
-            if missing:
-                logger.warning("Queen: tools not available: %s", missing)
-            node_updates["tools"] = available_tools
-
-        adjusted_node = _orig_node.model_copy(update=node_updates)
-        _queen_loop_config = {
-            **(_queen_graph.loop_config or {}),
-            "hooks": {"session_start": [_persona_hook]},
-        }
-        queen_graph = _queen_graph.model_copy(
-            update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
-        )
-
-        queen_runtime = Runtime(hive_home / "queen")
-
-        async def _queen_loop():
-            try:
-                executor = GraphExecutor(
-                    runtime=queen_runtime,
-                    llm=session.llm,
-                    tools=queen_tools,
-                    tool_executor=queen_tool_executor,
-                    event_bus=session.event_bus,
-                    stream_id="queen",
-                    storage_path=queen_dir,
-                    loop_config=_queen_loop_config,
-                    execution_id=session.id,
-                    dynamic_tools_provider=phase_state.get_current_tools,
-                    dynamic_prompt_provider=phase_state.get_current_prompt,
-                )
-                session.queen_executor = executor
-
-                # Wire inject_notification so phase switches notify the queen LLM
-                async def _inject_phase_notification(content: str) -> None:
-                    node = executor.node_registry.get("queen")
-                    if node is not None and hasattr(node, "inject_event"):
-                        await node.inject_event(content)
-
-                phase_state.inject_notification = _inject_phase_notification
-
-                # Auto-switch to staging when worker execution finishes naturally
-                # and notify the queen about the termination
-                from framework.runtime.event_bus import EventType as _ET
-
-                async def _on_worker_done(event):
-                    if event.stream_id == "queen":
-                        return
-                    if phase_state.phase == "running":
-                        # Build termination notification for the queen
-                        if event.type == _ET.EXECUTION_COMPLETED:
-                            output = event.data.get("output", {})
-                            output_summary = ""
-                            if output:
-                                # Summarize key outputs for the queen
-                                for key, value in output.items():
-                                    val_str = str(value)
-                                    if len(val_str) > 200:
-                                        val_str = val_str[:200] + "..."
-                                    output_summary += f"\n  {key}: {val_str}"
-                            _out = output_summary or " (no output keys set)"
-                            notification = (
-                                "[WORKER_TERMINAL] Worker finished successfully.\n"
-                                f"Output:{_out}\n"
-                                "Report this to the user. "
-                                "Ask if they want to continue with another run."
-                            )
-                        else:  # EXECUTION_FAILED
-                            error = event.data.get("error", "Unknown error")
-                            notification = (
-                                "[WORKER_TERMINAL] Worker failed.\n"
-                                f"Error: {error}\n"
-                                "Report this to the user and help them troubleshoot."
-                            )
-
-                        # Inject notification to queen before phase switch
-                        node = executor.node_registry.get("queen")
-                        if node is not None and hasattr(node, "inject_event"):
-                            await node.inject_event(notification)
-
-                        await phase_state.switch_to_staging(source="auto")
-
-                session.event_bus.subscribe(
-                    event_types=[_ET.EXECUTION_COMPLETED, _ET.EXECUTION_FAILED],
-                    handler=_on_worker_done,
-                )
-                self._subscribe_worker_handoffs(session, executor)
-
-                logger.info(
-                    "Queen starting in %s phase with %d tools: %s",
-                    phase_state.phase,
-                    len(phase_state.get_current_tools()),
-                    [t.name for t in phase_state.get_current_tools()],
-                )
-                result = await executor.execute(
-                    graph=queen_graph,
-                    goal=queen_goal,
-                    input_data={"greeting": initial_prompt or "Session started."},
-                    session_state={"resume_session_id": session.id},
-                )
-                if result.success:
-                    logger.warning("Queen executor returned (should be forever-alive)")
-                else:
-                    logger.error(
-                        "Queen executor failed: %s",
-                        result.error or "(no error message)",
-                    )
-            except Exception:
-                logger.error("Queen conversation crashed", exc_info=True)
-            finally:
-                session.queen_executor = None
-
-        session.queen_task = asyncio.create_task(_queen_loop())
-
-    # ------------------------------------------------------------------
-    # Judge startup / teardown
-    # ------------------------------------------------------------------
-
-    async def _start_judge(
-        self,
-        session: Session,
-        worker_storage_path: str | Path,
-    ) -> None:
-        """Start the health judge for a session's worker."""
-        from framework.graph.executor import GraphExecutor
-        from framework.monitoring import judge_goal, judge_graph
-        from framework.runner.tool_registry import ToolRegistry
-        from framework.runtime.core import Runtime
-        from framework.runtime.event_bus import EventType as _ET
-        from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
-
-        worker_storage_path = Path(worker_storage_path)
-
+        # Enable per-session event persistence so that all eventbus events
+        # survive server restarts and can be replayed on cold-session resume.
+        # Scan the existing event log to find the max iteration ever written,
+        # then use max+1 as offset so resumed sessions produce monotonically
+        # increasing iteration values — preventing frontend message ID collisions.
+        iteration_offset = 0
+        events_path = queen_dir / "events.jsonl"
        try:
-            # Monitoring tools
-            monitoring_registry = ToolRegistry()
-            register_worker_monitoring_tools(
-                monitoring_registry,
-                session.event_bus,
-                worker_storage_path,
-                worker_graph_id=session.worker_runtime._graph_id,
-            )
-
-            hive_home = Path.home() / ".hive"
-            judge_dir = hive_home / "judge" / "session" / session.id
-            judge_dir.mkdir(parents=True, exist_ok=True)
-
-            judge_runtime = Runtime(hive_home / "judge")
-            monitoring_tools = list(monitoring_registry.get_tools().values())
-            monitoring_executor = monitoring_registry.get_executor()
-
-            async def _judge_loop():
-                interval = 300  # 5 minutes between checks
-                # Wait before the first check — let the worker actually do something
-                await asyncio.sleep(interval)
-                while True:
-                    try:
-                        executor = GraphExecutor(
-                            runtime=judge_runtime,
-                            llm=session.llm,
-                            tools=monitoring_tools,
-                            tool_executor=monitoring_executor,
-                            event_bus=session.event_bus,
-                            stream_id="judge",
-                            storage_path=judge_dir,
-                            loop_config=judge_graph.loop_config,
-                        )
-                        await executor.execute(
-                            graph=judge_graph,
-                            goal=judge_goal,
-                            input_data={
-                                "event": {"source": "timer", "reason": "scheduled"},
-                            },
-                            session_state={"resume_session_id": session.id},
-                        )
-                    except Exception:
-                        logger.error("Health judge tick failed", exc_info=True)
-                    await asyncio.sleep(interval)
-
-            session.judge_task = asyncio.create_task(_judge_loop())
-
-            # Escalation: judge → queen
-            async def _on_escalation(event):
-                ticket = event.data.get("ticket", {})
-                executor = session.queen_executor
-                if executor is None:
-                    logger.warning("Escalation received but queen executor is None")
-                    return
-                node = executor.node_registry.get("queen")
-                if node is not None and hasattr(node, "inject_event"):
-                    msg = "[ESCALATION TICKET from Health Judge]\n" + json.dumps(
-                        ticket, indent=2, ensure_ascii=False
+            if events_path.exists():
+                max_iter = -1
+                with open(events_path, encoding="utf-8") as f:
+                    for line in f:
+                        line = line.strip()
+                        if not line:
+                            continue
+                        try:
+                            evt = json.loads(line)
+                            it = evt.get("data", {}).get("iteration")
+                            if isinstance(it, int) and it > max_iter:
+                                max_iter = it
+                        except (json.JSONDecodeError, TypeError):
+                            continue
+                if max_iter >= 0:
+                    iteration_offset = max_iter + 1
+                    logger.info(
+                        "Session '%s' resuming with iteration_offset=%d (from events.jsonl max)",
+                        session.id,
+                        iteration_offset,
                    )
-                    await node.inject_event(msg)
-                else:
-                    logger.warning("Escalation received but queen node not ready")
+        except OSError:
+            pass
+        session.event_bus.set_session_log(events_path, iteration_offset=iteration_offset)

-            session.escalation_sub = session.event_bus.subscribe(
-                event_types=[_ET.WORKER_ESCALATION_TICKET],
-                handler=_on_escalation,
+        session.queen_task = await create_queen(
+            session=session,
+            session_manager=self,
+            worker_identity=worker_identity,
+            queen_dir=queen_dir,
+            initial_prompt=initial_prompt,
+        )
+
+        # Auto-load worker on cold restore — the queen's conversation expects
+        # the agent to be loaded, but the new session has no worker.
+        if session.queen_resume_from and not session.worker_runtime:
+            meta_path = queen_dir / "meta.json"
+            if meta_path.exists():
+                try:
+                    _meta = json.loads(meta_path.read_text(encoding="utf-8"))
+                    _agent_path = _meta.get("agent_path")
+                    if _agent_path and Path(_agent_path).exists():
+                        await self.load_worker(session.id, _agent_path)
+                        if session.phase_state:
+                            await session.phase_state.switch_to_staging(source="auto")
+                        logger.info("Cold restore: auto-loaded worker from %s", _agent_path)
+                except Exception:
+                    logger.warning("Cold restore: failed to auto-load worker", exc_info=True)
+
+        # Memory consolidation — triggered by context compaction events.
+        # Compaction is a natural signal that "enough has happened to be worth remembering".
+        _consolidation_llm = session.llm
+        _consolidation_session_dir = queen_dir
+
+        async def _on_compaction(_event) -> None:
+            from framework.agents.queen.queen_memory import consolidate_queen_memory
+
+            await consolidate_queen_memory(
+                session.id, _consolidation_session_dir, _consolidation_llm
            )

-            logger.info("Judge started for session '%s'", session.id)
+        from framework.runtime.event_bus import EventType as _ET

-        except Exception as e:
-            logger.error(
-                "Failed to start judge for session '%s': %s",
-                session.id,
-                e,
-                exc_info=True,
-            )
-
-    def _stop_judge(self, session: Session) -> None:
-        """Cancel judge task and unsubscribe escalation events."""
-        if session.judge_task is not None:
-            session.judge_task.cancel()
-            session.judge_task = None
-        if session.escalation_sub is not None:
-            try:
-                session.event_bus.unsubscribe(session.escalation_sub)
-            except Exception:
-                pass
-            session.escalation_sub = None
+        session.memory_consolidation_sub = session.event_bus.subscribe(
+            event_types=[_ET.CONTEXT_COMPACTED],
+            handler=_on_compaction,
+        )

    # ------------------------------------------------------------------
    # Queen notifications
@@ -907,7 +805,22 @@ class SessionManager:
            return

        profile = build_worker_profile(session.worker_runtime, agent_path=session.worker_path)
-        await node.inject_event(f"[SYSTEM] Worker loaded.{profile}")
+
+        # Append available trigger info so the queen knows what's schedulable
+        trigger_lines = ""
+        if session.available_triggers:
+            parts = []
+            for t in session.available_triggers.values():
+                cfg = t.trigger_config
+                detail = cfg.get("cron") or f"every {cfg.get('interval_minutes', '?')} min"
+                task_info = f' -> task: "{t.task}"' if t.task else " (no task configured)"
+                parts.append(f"  - {t.id} ({t.trigger_type}: {detail}){task_info}")
+            trigger_lines = (
+                "\n\nAvailable triggers (inactive — use set_trigger to activate):\n"
+                + "\n".join(parts)
+            )
+
+        await node.inject_event(f"[SYSTEM] Worker loaded.{profile}{trigger_lines}")

    async def _emit_worker_loaded(self, session: Session) -> None:
        """Publish a WORKER_LOADED event so the frontend can update."""
@@ -939,9 +852,41 @@ class SessionManager:

        await node.inject_event(
            "[SYSTEM] Worker unloaded. You are now operating independently. "
-            "Handle all tasks directly using your coding tools."
+            "Design or build the agent to solve the user's problem "
+            "according to your current phase."
        )

+    async def _emit_trigger_events(
+        self,
+        session: Session,
+        kind: str,
+        triggers: dict[str, TriggerDefinition],
+    ) -> None:
+        """Emit TRIGGER_AVAILABLE or TRIGGER_REMOVED events for each trigger."""
+        from framework.runtime.event_bus import AgentEvent, EventType
+
+        event_type = (
+            EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED
+        )
+        # Resolve graph entry node for trigger target
+        runner = getattr(session, "runner", None)
+        graph_entry = runner.graph.entry_node if runner else None
+
+        for t in triggers.values():
+            await session.event_bus.publish(
+                AgentEvent(
+                    type=event_type,
+                    stream_id="queen",
+                    data={
+                        "trigger_id": t.id,
+                        "trigger_type": t.trigger_type,
+                        "trigger_config": t.trigger_config,
+                        "name": t.description or t.id,
+                        **({"entry_node": graph_entry} if graph_entry else {}),
+                    },
+                )
+            )
+
    async def revive_queen(self, session: Session, initial_prompt: str | None = None) -> None:
        """Revive a dead queen executor on an existing session.

@@ -1013,13 +958,19 @@ class SessionManager:
        # Check whether any message part files are actually present
        has_messages = False
        try:
-            for node_dir in convs_dir.iterdir():
-                if not node_dir.is_dir():
-                    continue
-                parts_dir = node_dir / "parts"
-                if parts_dir.exists() and any(f.suffix == ".json" for f in parts_dir.iterdir()):
-                    has_messages = True
-                    break
+            # Flat layout: conversations/parts/*.json
+            flat_parts = convs_dir / "parts"
+            if flat_parts.exists() and any(f.suffix == ".json" for f in flat_parts.iterdir()):
+                has_messages = True
+            else:
+                # Node-based layout: conversations/<node_id>/parts/*.json
+                for node_dir in convs_dir.iterdir():
+                    if not node_dir.is_dir() or node_dir.name == "parts":
+                        continue
+                    parts_dir = node_dir / "parts"
+                    if parts_dir.exists() and any(f.suffix == ".json" for f in parts_dir.iterdir()):
+                        has_messages = True
+                        break
        except OSError:
            pass

@@ -1096,21 +1047,27 @@ class SessionManager:
            if convs_dir.exists():
                try:
                    all_parts: list[dict] = []
-                    for node_dir in convs_dir.iterdir():
-                        if not node_dir.is_dir():
-                            continue
-                        parts_dir = node_dir / "parts"
+
+                    def _collect_parts(parts_dir: Path, _dest: list[dict] = all_parts) -> None:
                        if not parts_dir.exists():
-                            continue
+                            return
                        for part_file in sorted(parts_dir.iterdir()):
                            if part_file.suffix != ".json":
                                continue
                            try:
                                part = json.loads(part_file.read_text(encoding="utf-8"))
                                part.setdefault("created_at", part_file.stat().st_mtime)
-                                all_parts.append(part)
+                                _dest.append(part)
                            except (json.JSONDecodeError, OSError):
                                continue
+
+                    # Flat layout: conversations/parts/*.json
+                    _collect_parts(convs_dir / "parts")
+                    # Node-based layout: conversations/<node_id>/parts/*.json
+                    for node_dir in convs_dir.iterdir():
+                        if not node_dir.is_dir() or node_dir.name == "parts":
+                            continue
+                        _collect_parts(node_dir / "parts")
                    # Filter to client-facing messages only
                    client_msgs = [
                        p
@@ -5,6 +5,7 @@ Uses aiohttp TestClient with mocked sessions to test all endpoints
 without requiring actual LLM calls or agent loading.
 """

+import asyncio
 import json
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -13,9 +14,13 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 from aiohttp.test_utils import TestClient, TestServer

+from framework.runtime.triggers import TriggerDefinition
 from framework.server.app import create_app
 from framework.server.session_manager import Session

+REPO_ROOT = Path(__file__).resolve().parents[4]
+EXAMPLE_AGENT_PATH = REPO_ROOT / "examples" / "templates" / "deep_research_agent"
+
 # ---------------------------------------------------------------------------
 # Mock helpers
 # ---------------------------------------------------------------------------
@@ -37,6 +42,7 @@ class MockNodeSpec:
    client_facing: bool = False
    success_criteria: str | None = None
    system_prompt: str | None = None
+    sub_agents: list = field(default_factory=list)


@dataclass
@@ -67,6 +73,7 @@ class MockEntryPoint:
    name: str = "Default"
    entry_node: str = "start"
    trigger_type: str = "manual"
+    trigger_config: dict = field(default_factory=dict)


@dataclass
@@ -130,6 +137,9 @@ class MockRuntime:
    def get_stats(self):
        return {"running": True, "executions": 1}

+    def get_timer_next_fire_in(self, ep_id):
+        return None
+

 class MockAgentInfo:
    name: str = "test_agent"
@@ -164,6 +174,7 @@ def _make_session(
    runner.intro_message = "Test intro"

    mock_event_bus = MagicMock()
+    mock_event_bus.publish = AsyncMock()
    mock_llm = MagicMock()

    queen_executor = _make_queen_executor() if with_queen else None
@@ -202,11 +213,8 @@ def tmp_agent_dir(tmp_path, monkeypatch):
    return tmp_path, agent_name, base


-@pytest.fixture
-def sample_session(tmp_agent_dir):
-    """Create a sample session with state.json, checkpoints, and conversations."""
-    tmp_path, agent_name, base = tmp_agent_dir
-    session_id = "session_20260220_120000_abc12345"
+def _write_sample_session(base: Path, session_id: str):
+    """Create a sample worker session on disk."""
    session_dir = base / "sessions" / session_id

    # state.json
@@ -287,6 +295,20 @@ def sample_session(tmp_agent_dir):
    return session_id, session_dir, state


+@pytest.fixture
+def sample_session(tmp_agent_dir):
+    """Create a sample session with state.json, checkpoints, and conversations."""
+    _tmp_path, _agent_name, base = tmp_agent_dir
+    return _write_sample_session(base, "session_20260220_120000_abc12345")
+
+
+@pytest.fixture
+def custom_id_session(tmp_agent_dir):
+    """Create a sample session that uses a custom non-session_* ID."""
+    _tmp_path, _agent_name, base = tmp_agent_dir
+    return _write_sample_session(base, "my-custom-session")
+
+
 def _make_app_with_session(session):
    """Create an aiohttp app with a pre-loaded session."""
    app = create_app()
@@ -342,6 +364,35 @@ class TestHealth:


 class TestSessionCRUD:
+    @pytest.mark.asyncio
+    async def test_create_session_with_worker_forwards_session_id(self):
+        app = create_app()
+        manager = app["manager"]
+        manager.create_session_with_worker = AsyncMock(
+            return_value=_make_session(agent_id="my-custom-session")
+        )
+
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.post(
+                "/api/sessions",
+                json={
+                    "session_id": "my-custom-session",
+                    "agent_path": str(EXAMPLE_AGENT_PATH),
+                },
+            )
+            data = await resp.json()
+
+        assert resp.status == 201
+        assert data["session_id"] == "my-custom-session"
+        manager.create_session_with_worker.assert_awaited_once_with(
+            str(EXAMPLE_AGENT_PATH.resolve()),
+            agent_id=None,
+            session_id="my-custom-session",
+            model=None,
+            initial_prompt=None,
+            queen_resume_from=None,
+        )
+
    @pytest.mark.asyncio
    async def test_list_sessions_empty(self):
        app = create_app()
@@ -436,6 +487,70 @@ class TestSessionCRUD:
            data = await resp.json()
            assert "primary" in data["graphs"]

+    @pytest.mark.asyncio
+    async def test_update_trigger_task(self, tmp_path):
+        session = _make_session(tmp_dir=tmp_path)
+        session.available_triggers["daily"] = TriggerDefinition(
+            id="daily",
+            trigger_type="timer",
+            trigger_config={"cron": "0 5 * * *"},
+            task="Old task",
+        )
+        app = _make_app_with_session(session)
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.patch(
+                "/api/sessions/test_agent/triggers/daily",
+                json={"task": "New task"},
+            )
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["task"] == "New task"
+            assert data["trigger_config"]["cron"] == "0 5 * * *"
+            assert session.available_triggers["daily"].task == "New task"
+
+    @pytest.mark.asyncio
+    async def test_update_trigger_cron_restarts_active_timer(self, tmp_path):
+        session = _make_session(tmp_dir=tmp_path)
+        session.available_triggers["daily"] = TriggerDefinition(
+            id="daily",
+            trigger_type="timer",
+            trigger_config={"cron": "0 5 * * *"},
+            task="Run task",
+            active=True,
+        )
+        session.active_trigger_ids.add("daily")
+        session.active_timer_tasks["daily"] = asyncio.create_task(asyncio.sleep(60))
+        app = _make_app_with_session(session)
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.patch(
+                "/api/sessions/test_agent/triggers/daily",
+                json={"trigger_config": {"cron": "0 6 * * *"}},
+            )
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["trigger_config"]["cron"] == "0 6 * * *"
+            assert "daily" in session.active_timer_tasks
+            assert session.active_timer_tasks["daily"] is not None
+            assert session.available_triggers["daily"].trigger_config["cron"] == "0 6 * * *"
+            session.active_timer_tasks["daily"].cancel()
+
+    @pytest.mark.asyncio
+    async def test_update_trigger_cron_rejects_invalid_expression(self, tmp_path):
+        session = _make_session(tmp_dir=tmp_path)
+        session.available_triggers["daily"] = TriggerDefinition(
+            id="daily",
+            trigger_type="timer",
+            trigger_config={"cron": "0 5 * * *"},
+            task="Run task",
+        )
+        app = _make_app_with_session(session)
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.patch(
+                "/api/sessions/test_agent/triggers/daily",
+                json={"trigger_config": {"cron": "not a cron"}},
+            )
+            assert resp.status == 400
+

 class TestExecution:
    @pytest.mark.asyncio
@@ -762,6 +877,22 @@ class TestWorkerSessions:
            assert data["sessions"][0]["status"] == "paused"
            assert data["sessions"][0]["steps"] == 5

+    @pytest.mark.asyncio
+    async def test_list_sessions_includes_custom_id(self, custom_id_session, tmp_agent_dir):
+        session_id, session_dir, state = custom_id_session
+        tmp_path, agent_name, base = tmp_agent_dir
+
+        session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
+        app = _make_app_with_session(session)
+
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.get("/api/sessions/test_agent/worker-sessions")
+            assert resp.status == 200
+            data = await resp.json()
+            assert len(data["sessions"]) == 1
+            assert data["sessions"][0]["session_id"] == session_id
+            assert data["sessions"][0]["status"] == "paused"
+
    @pytest.mark.asyncio
    async def test_list_sessions_empty(self, tmp_agent_dir):
        tmp_path, agent_name, base = tmp_agent_dir
@@ -1279,6 +1410,28 @@ class TestLogs:
            assert len(data["logs"]) >= 1
            assert data["logs"][0]["run_id"] == session_id

+    @pytest.mark.asyncio
+    async def test_logs_list_summaries_with_custom_id(self, custom_id_session, tmp_agent_dir):
+        session_id, session_dir, state = custom_id_session
+        tmp_path, agent_name, base = tmp_agent_dir
+
+        from framework.runtime.runtime_log_store import RuntimeLogStore
+
+        log_store = RuntimeLogStore(base)
+        session = _make_session(
+            tmp_dir=tmp_path / ".hive" / "agents" / agent_name,
+            log_store=log_store,
+        )
+        app = _make_app_with_session(session)
+
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.get("/api/sessions/test_agent/logs")
+            assert resp.status == 200
+            data = await resp.json()
+            assert "logs" in data
+            assert len(data["logs"]) >= 1
+            assert data["logs"][0]["run_id"] == session_id
+
    @pytest.mark.asyncio
    async def test_logs_session_summary(self, sample_session, tmp_agent_dir):
        session_id, session_dir, state = sample_session
@@ -1556,3 +1709,106 @@ class TestErrorMiddleware:
        async with TestClient(TestServer(app)) as client:
            resp = await client.get("/api/nonexistent")
            assert resp.status == 404
+
+
+class TestCleanupStaleActiveSessions:
+    """Tests for _cleanup_stale_active_sessions with two-layer protection."""
+
+    def _make_manager(self):
+        from framework.server.session_manager import SessionManager
+
+        return SessionManager()
+
+    def _write_state(self, session_dir: Path, status: str, pid: int | None = None) -> None:
+        session_dir.mkdir(parents=True, exist_ok=True)
+        state: dict = {"status": status, "session_id": session_dir.name}
+        if pid is not None:
+            state["pid"] = pid
+        (session_dir / "state.json").write_text(json.dumps(state))
+
+    def _read_state(self, session_dir: Path) -> dict:
+        return json.loads((session_dir / "state.json").read_text())
+
+    def test_stale_session_is_cancelled(self, tmp_path, monkeypatch):
+        """Truly stale active sessions (no live tracking, no PID) get cancelled."""
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        agent_path = Path("my_agent")
+        sessions_dir = tmp_path / ".hive" / "agents" / "my_agent" / "sessions"
+        session_dir = sessions_dir / "session_stale_001"
+
+        self._write_state(session_dir, "active")
+
+        mgr = self._make_manager()
+        mgr._cleanup_stale_active_sessions(agent_path)
+
+        state = self._read_state(session_dir)
+        assert state["status"] == "cancelled"
+        assert "Stale session" in state["result"]["error"]
+
+    def test_live_in_memory_session_is_skipped(self, tmp_path, monkeypatch):
+        """Sessions tracked in self._sessions must NOT be cancelled (Layer 1)."""
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        agent_path = Path("my_agent")
+        sessions_dir = tmp_path / ".hive" / "agents" / "my_agent" / "sessions"
+        session_dir = sessions_dir / "session_live_002"
+
+        self._write_state(session_dir, "active")
+
+        mgr = self._make_manager()
+        # Simulate a live session in the manager's in-memory map
+        mgr._sessions["session_live_002"] = MagicMock()
+
+        mgr._cleanup_stale_active_sessions(agent_path)
+
+        state = self._read_state(session_dir)
+        assert state["status"] == "active", "Live in-memory session should NOT be cancelled"
+
+    def test_session_with_live_pid_is_skipped(self, tmp_path, monkeypatch):
+        """Sessions whose owning PID is still alive must NOT be cancelled (Layer 2)."""
+        import os
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        agent_path = Path("my_agent")
+        sessions_dir = tmp_path / ".hive" / "agents" / "my_agent" / "sessions"
+        session_dir = sessions_dir / "session_pid_003"
+
+        # Use the current process PID — guaranteed to be alive
+        self._write_state(session_dir, "active", pid=os.getpid())
+
+        mgr = self._make_manager()
+        mgr._cleanup_stale_active_sessions(agent_path)
+
+        state = self._read_state(session_dir)
+        assert state["status"] == "active", "Session with live PID should NOT be cancelled"
+
+    def test_session_with_dead_pid_is_cancelled(self, tmp_path, monkeypatch):
+        """Sessions whose owning PID is dead should be cancelled."""
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        agent_path = Path("my_agent")
+        sessions_dir = tmp_path / ".hive" / "agents" / "my_agent" / "sessions"
+        session_dir = sessions_dir / "session_dead_004"
+
+        # Use a PID that is almost certainly not running
+        self._write_state(session_dir, "active", pid=999999999)
+
+        mgr = self._make_manager()
+        mgr._cleanup_stale_active_sessions(agent_path)
+
+        state = self._read_state(session_dir)
+        assert state["status"] == "cancelled"
+        assert "Stale session" in state["result"]["error"]
+
+    def test_paused_session_is_never_touched(self, tmp_path, monkeypatch):
+        """Paused sessions should remain intact regardless of PID or tracking."""
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        agent_path = Path("my_agent")
+        sessions_dir = tmp_path / ".hive" / "agents" / "my_agent" / "sessions"
+        session_dir = sessions_dir / "session_paused_005"
+
+        self._write_state(session_dir, "paused")
+
+        mgr = self._make_manager()
+        mgr._cleanup_stale_active_sessions(agent_path)
+
+        state = self._read_state(session_dir)
+        assert state["status"] == "paused", "Paused sessions must remain untouched"
@@ -0,0 +1,26 @@
+"""Hive Agent Skills — discovery, parsing, and injection of SKILL.md packages.
+
+Implements the open Agent Skills standard (agentskills.io) for portable
+skill discovery and activation, plus built-in default skills for runtime
+operational discipline.
+"""
+
+from framework.skills.catalog import SkillCatalog
+from framework.skills.config import DefaultSkillConfig, SkillsConfig
+from framework.skills.defaults import DefaultSkillManager
+from framework.skills.discovery import DiscoveryConfig, SkillDiscovery
+from framework.skills.manager import SkillsManager, SkillsManagerConfig
+from framework.skills.parser import ParsedSkill, parse_skill_md
+
+__all__ = [
+    "DefaultSkillConfig",
+    "DefaultSkillManager",
+    "DiscoveryConfig",
+    "ParsedSkill",
+    "SkillCatalog",
+    "SkillDiscovery",
+    "SkillsConfig",
+    "SkillsManager",
+    "SkillsManagerConfig",
+    "parse_skill_md",
+]
@@ -0,0 +1,24 @@
+---
+name: hive.batch-ledger
+description: Track per-item status when processing collections to prevent skipped or duplicated items.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Batch Progress Ledger
+
+When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
+
+Initialize when you identify the batch:
+- `_batch_total`: total item count
+- `_batch_ledger`: JSON with per-item status
+
+Per-item statuses: pending → in_progress → completed|failed|skipped
+
+- Set `in_progress` BEFORE processing
+- Set final status AFTER processing with 1-line result_summary
+- Include error reason for failed/skipped items
+- Update aggregate counts after each item
+- NEVER remove items from the ledger
+- If resuming, skip items already marked completed
@@ -0,0 +1,22 @@
+---
+name: hive.context-preservation
+description: Proactively preserve critical information before automatic context pruning destroys it.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Context Preservation
+
+You operate under a finite context window. Important information WILL be pruned.
+
+Save-As-You-Go: After any tool call producing information you'll need later,
+immediately extract key data into `_working_notes` or `_preserved_data`.
+Do NOT rely on referring back to old tool results.
+
+What to extract: URLs and key snippets (not full pages), relevant API fields
+(not raw JSON), specific lines/values (not entire files), analysis results
+(not raw data).
+
+Before transitioning to the next phase/node, write a handoff summary to
+`_handoff_context` with everything the next phase needs to know.
@@ -0,0 +1,18 @@
+---
+name: hive.error-recovery
+description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Error Recovery
+
+When a tool call fails:
+
+1. Diagnose — record error in notes, classify as transient or structural
+2. Decide — transient: retry once. Structural fixable: fix and retry.
+   Structural unfixable: record as failed, move to next item.
+   Blocking all progress: record escalation note.
+3. Adapt — if same tool failed 3+ times, stop using it and find alternative.
+   Update plan in notes. Never silently drop the failed item.
@@ -0,0 +1,27 @@
+---
+name: hive.note-taking
+description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Structured Note-Taking
+
+Maintain structured working notes in shared memory key `_working_notes`.
+Update at these checkpoints:
+
+- After completing each discrete subtask or batch item
+- After receiving new information that changes your plan
+- Before any tool call that will produce substantial output
+
+Structure:
+
+### Objective — restate the goal
+### Current Plan — numbered steps, mark completed with ✓
+### Key Decisions — decisions made and WHY
+### Working Data — intermediate results, extracted values
+### Open Questions — uncertainties to verify
+### Blockers — anything preventing progress
+
+Update incrementally — do not rewrite from scratch each time.
@@ -0,0 +1,20 @@
+---
+name: hive.quality-monitor
+description: Periodically self-assess output quality to catch degradation before the judge does.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Quality Self-Assessment
+
+Every 5 iterations, self-assess:
+
+1. On-task? Still working toward the stated objective?
+2. Thorough? Cutting corners compared to earlier?
+3. Non-repetitive? Producing new value or rehashing?
+4. Consistent? Latest output contradict earlier decisions?
+5. Complete? Tracking all items, or silently dropped some?
+
+If degrading: write assessment to `_quality_log`, re-read `_working_notes`,
+change approach explicitly. If acceptable: brief note in `_quality_log`.
@@ -0,0 +1,17 @@
+---
+name: hive.task-decomposition
+description: Decompose complex tasks into explicit subtasks before diving in.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Task Decomposition
+
+Before starting a complex task:
+
+1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
+2. Estimate — relative effort per subtask (small/medium/large)
+3. Execute — work through in order, mark ✓ when complete
+4. Budget — if running low on iterations, prioritize by impact
+5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
@@ -0,0 +1,107 @@
+"""Skill catalog — in-memory index with system prompt generation.
+
+Builds the XML catalog injected into the system prompt for model-driven
+skill activation per the Agent Skills standard.
+"""
+
+from __future__ import annotations
+
+import logging
+from xml.sax.saxutils import escape
+
+from framework.skills.parser import ParsedSkill
+
+logger = logging.getLogger(__name__)
+
+_BEHAVIORAL_INSTRUCTION = (
+    "The following skills provide specialized instructions for specific tasks.\n"
+    "When a task matches a skill's description, read the SKILL.md at the listed\n"
+    "location to load the full instructions before proceeding.\n"
+    "When a skill references relative paths, resolve them against the skill's\n"
+    "directory (the parent of SKILL.md) and use absolute paths in tool calls."
+)
+
+
+class SkillCatalog:
+    """In-memory catalog of discovered skills."""
+
+    def __init__(self, skills: list[ParsedSkill] | None = None):
+        self._skills: dict[str, ParsedSkill] = {}
+        self._activated: set[str] = set()
+        if skills:
+            for skill in skills:
+                self.add(skill)
+
+    def add(self, skill: ParsedSkill) -> None:
+        """Add a skill to the catalog."""
+        self._skills[skill.name] = skill
+
+    def get(self, name: str) -> ParsedSkill | None:
+        """Look up a skill by name."""
+        return self._skills.get(name)
+
+    def mark_activated(self, name: str) -> None:
+        """Mark a skill as activated in the current session."""
+        self._activated.add(name)
+
+    def is_activated(self, name: str) -> bool:
+        """Check if a skill has been activated."""
+        return name in self._activated
+
+    @property
+    def skill_count(self) -> int:
+        return len(self._skills)
+
+    @property
+    def allowlisted_dirs(self) -> list[str]:
+        """All skill base directories for file access allowlisting."""
+        return [skill.base_dir for skill in self._skills.values()]
+
+    def to_prompt(self) -> str:
+        """Generate the catalog prompt for system prompt injection.
+
+        Returns empty string if no community/user skills are discovered
+        (default skills are handled separately by DefaultSkillManager).
+        """
+        # Filter out framework-scope skills (default skills) — they're
+        # injected via the protocols prompt, not the catalog
+        community_skills = [s for s in self._skills.values() if s.source_scope != "framework"]
+
+        if not community_skills:
+            return ""
+
+        lines = ["<available_skills>"]
+        for skill in sorted(community_skills, key=lambda s: s.name):
+            lines.append("  <skill>")
+            lines.append(f"    <name>{escape(skill.name)}</name>")
+            lines.append(f"    <description>{escape(skill.description)}</description>")
+            lines.append(f"    <location>{escape(skill.location)}</location>")
+            lines.append("  </skill>")
+        lines.append("</available_skills>")
+
+        xml_block = "\n".join(lines)
+        return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}"
+
+    def build_pre_activated_prompt(self, skill_names: list[str]) -> str:
+        """Build prompt content for pre-activated skills.
+
+        Pre-activated skills get their full SKILL.md body loaded into
+        the system prompt at startup (tier 2), bypassing model-driven
+        activation.
+
+        Returns empty string if no skills match.
+        """
+        parts: list[str] = []
+
+        for name in skill_names:
+            skill = self.get(name)
+            if skill is None:
+                logger.warning("Pre-activated skill '%s' not found in catalog", name)
+                continue
+            if self.is_activated(name):
+                continue  # Already activated, skip duplicate
+
+            self.mark_activated(name)
+            parts.append(f"--- Pre-Activated Skill: {skill.name} ---\n{skill.body}")
+
+        return "\n\n".join(parts)
--- a/Show More
+++ b/Show More