Merge remote-tracking branch 'upstream/main' into event-loop-arch

Resolve conflict in tools/mcp_server.py: take main's CredentialStoreAdapter.default() which encapsulates the same CompositeStorage logic our branch had inline. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Merge pull request #3372 from ranjithkumar9343/ranjithkumar9343-patch-1
2026-02-03 19:42:47 -08:00 · 2026-02-03 19:36:50 -08:00 · 2026-02-03 19:36:35 -08:00 · 2026-02-03 19:31:46 -08:00 · 2026-02-03 18:01:39 -08:00 · 2026-02-03 17:57:02 -08:00
181 changed files with 32088 additions and 3444 deletions
@@ -1,30 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(npm install:*)",
-      "Bash(npm test:*)",
-      "Skill(building-agents-construction)",
-      "Skill(building-agents-construction:*)",
-      "Bash(PYTHONPATH=core:exports pytest:*)",
-      "mcp__agent-builder__create_session",
-      "mcp__agent-builder__get_session_status",
-      "mcp__agent-builder__set_goal",
-      "mcp__agent-builder__list_mcp_servers",
-      "mcp__agent-builder__test_node",
-      "mcp__agent-builder__add_node",
-      "mcp__agent-builder__add_edge",
-      "mcp__agent-builder__validate_graph",
-      "Bash(ruff check:*)",
-      "Bash(PYTHONPATH=core:exports python:*)",
-      "mcp__agent-builder__list_tests",
-      "mcp__agent-builder__generate_constraint_tests",
-      "Bash(python -m agent:*)",
-      "Bash(python agent.py:*)",
-      "Bash(python -c:*)",
-      "Bash(done)",
-      "Bash(xargs cat:*)",
-      "mcp__agent-builder__list_mcp_tools",
-      "mcp__agent-builder__add_mcp_server"
-    ]
-  }
-}
@@ -11,6 +11,7 @@ metadata:
    - building-agents-construction
    - building-agents-patterns
    - testing-agent
+    - setup-credentials
 ---

 # Agent Development Workflow
@@ -21,10 +22,11 @@ Complete Standard Operating Procedure (SOP) for building production-ready goal-d

 This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:

-1. **Understand Concepts** (5-10 min) → `/building-agents-core` (optional)
-2. **Build Structure** (15-30 min) → `/building-agents-construction`
-3. **Optimize Design** (10-15 min) → `/building-agents-patterns` (optional)
-4. **Test & Validate** (20-40 min) → `/testing-agent`
+1. **Understand Concepts** → `/building-agents-core` (optional)
+2. **Build Structure** → `/building-agents-construction`
+3. **Optimize Design** → `/building-agents-patterns` (optional)
+4. **Setup Credentials** → `/setup-credentials` (if agent uses tools requiring API keys)
+5. **Test & Validate** → `/testing-agent`

 ## When to Use This Workflow

@@ -44,6 +46,7 @@ Use this meta-skill when:
 "Need to understand agent concepts" → building-agents-core
 "Build a new agent" → building-agents-construction
 "Optimize my agent design" → building-agents-patterns
+"Set up API keys for my agent" → setup-credentials
 "Test my agent" → testing-agent
 "Not sure what I need" → Read phases below, then decide
 "Agent has structure but needs implementation" → See agent directory STATUS.md
@@ -218,22 +218,9 @@ class OnlineResearchAgent:
        tool_registry = ToolRegistry()

        # Load MCP servers (always load, needed for tool validation)
-        agent_dir = Path(__file__).parent
-        mcp_config_path = agent_dir / "mcp_servers.json"
-
+        mcp_config_path = Path(__file__).parent / "mcp_servers.json"
        if mcp_config_path.exists():
-            with open(mcp_config_path) as f:
-                mcp_servers = json.load(f)
-
-            for server_name, server_config in mcp_servers.items():
-                server_config["name"] = server_name
-                # Resolve relative cwd paths
-                if (
-                    "cwd" in server_config
-                    and not Path(server_config["cwd"]).is_absolute()
-                ):
-                    server_config["cwd"] = str(agent_dir / server_config["cwd"])
-                tool_registry.register_mcp_server(server_config)
+            tool_registry.load_mcp_config(mcp_config_path)

        llm = None
        if not mock_mode:
@@ -1,13 +1,30 @@
 """Runtime configuration."""

-from dataclasses import dataclass
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+def _load_preferred_model() -> str:
+    """Load preferred model from ~/.hive/configuration.json."""
+    config_path = Path.home() / ".hive" / "configuration.json"
+    if config_path.exists():
+        try:
+            with open(config_path) as f:
+                config = json.load(f)
+            llm = config.get("llm", {})
+            if llm.get("provider") and llm.get("model"):
+                return f"{llm['provider']}/{llm['model']}"
+        except Exception:
+            pass
+    return "anthropic/claude-sonnet-4-20250514"


@dataclass
 class RuntimeConfig:
-    model: str = "groq/moonshotai/kimi-k2-instruct-0905"
+    model: str = field(default_factory=_load_preferred_model)
    temperature: float = 0.7
-    max_tokens: int = 16384
+    max_tokens: int = 8192
    api_key: str | None = None
    api_base: str | None = None

@@ -0,0 +1,649 @@
+---
+name: setup-credentials
+description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.2"
+  type: utility
+---
+
+# Setup Credentials
+
+Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
+
+## When to Use
+
+- Before running or testing an agent for the first time
+- When `AgentRunner.run()` fails with "missing required credentials"
+- When a user asks to configure credentials for an agent
+- After building a new agent that uses tools requiring API keys
+
+## Workflow
+
+### Step 1: Identify the Agent
+
+Determine which agent needs credentials. The user will either:
+
+- Name the agent directly (e.g., "set up credentials for hubspot-agent")
+- Have an agent directory open (check `exports/` for agent dirs)
+- Be working on an agent in the current session
+
+Locate the agent's directory under `exports/{agent_name}/`.
+
+### Step 2: Detect Required Credentials (Bash-First)
+
+Use bash commands to determine what the agent needs and what's already configured. This avoids Python import issues and works even when `HIVE_CREDENTIAL_KEY` is not set.
+
+#### Step 2a: Read Agent Requirements
+
+Extract `required_tools` and node types from the agent config:
+
+```bash
+# Get required tools
+jq -r '.required_tools[]?' exports/{agent_name}/agent.json 2>/dev/null
+
+# Get node types from graph nodes
+jq -r '.graph.nodes[]?.node_type' exports/{agent_name}/agent.json 2>/dev/null | sort -u
+```
+
+Map the extracted tools and node types to credentials by reading the spec files directly:
+
+```bash
+# Read all credential specs — each file defines tools, node_types, env_var, and credential_id
+cat tools/src/aden_tools/credentials/llm.py tools/src/aden_tools/credentials/search.py tools/src/aden_tools/credentials/email.py tools/src/aden_tools/credentials/integrations.py
+```
+
+For each `CredentialSpec`, match its `tools` and `node_types` lists against the agent's required tools and node types. Extract the `env_var`, `credential_id`, and `credential_group` for every match. This is the list of needed credentials.
+
+#### Step 2b: Check Existing Credential Sources
+
+For each needed credential, check three sources. A credential is "found" if it exists in ANY of them:
+
+**1. Encrypted store metadata index** (unencrypted JSON — no decryption key needed):
+
+```bash
+cat ~/.hive/credentials/metadata/index.json 2>/dev/null | jq -r '.credentials | keys[]'
+```
+
+If a credential ID appears in this list, it is stored in the encrypted store.
+
+**2. Environment variables:**
+
+```bash
+# Check each needed env var, e.g.:
+printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "ANTHROPIC_API_KEY: set" || echo "ANTHROPIC_API_KEY: not set"
+printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "BRAVE_SEARCH_API_KEY: set" || echo "BRAVE_SEARCH_API_KEY: not set"
+```
+
+**3. Project `.env` file:**
+
+```bash
+# Check each needed env var, e.g.:
+grep -q '^ANTHROPIC_API_KEY=' .env 2>/dev/null && echo "ANTHROPIC_API_KEY: in .env" || echo "ANTHROPIC_API_KEY: not in .env"
+grep -q '^BRAVE_SEARCH_API_KEY=' .env 2>/dev/null && echo "BRAVE_SEARCH_API_KEY: in .env" || echo "BRAVE_SEARCH_API_KEY: not in .env"
+```
+
+#### Step 2c: HIVE_CREDENTIAL_KEY Check
+
+If any credentials were found in the encrypted store metadata index, verify the encryption key is available. The key is typically persisted to shell config by a previous setup-credentials run.
+
+Check both the current session AND shell config files:
+
+```bash
+# Check 1: Current session
+printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+
+# Check 2: Shell config files (where setup-credentials persists it)
+# Note: check each file individually to avoid non-zero exit when one doesn't exist
+for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+```
+
+Decision logic:
+- **In current session** — no action needed, credentials in the store are usable
+- **In shell config but NOT in current session** — the key is persisted but this shell hasn't sourced it. Run `source ~/.zshrc` (or `~/.bashrc`), then re-check. Credentials in the store are usable after sourcing.
+- **Not in session AND not in shell config** — the key was never persisted. Warn the user that credentials in the store cannot be decrypted. Help fix the key situation (recover/re-persist), do NOT re-collect credential values that are already stored.
+
+#### Step 2d: Compute Missing & Group
+
+Diff the "needed" credentials against the "found" credentials to get the truly missing list.
+
+Group related credentials by their `credential_group` field from the spec files. Credentials that share the same non-empty `credential_group` value should be presented as a single setup step rather than asking for each one individually.
+
+**If nothing is missing and there's no HIVE_CREDENTIAL_KEY issue:** Report all credentials as configured and skip Steps 3-5. Example:
+
+```
+All required credentials are already configured:
+  ✓ anthropic (ANTHROPIC_API_KEY) — found in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — found in environment
+Your agent is ready to run!
+```
+
+**If credentials are missing:** Continue to Step 3 with only the missing ones.
+
+### Step 3: Present Auth Options for Each Missing Credential
+
+For each missing credential, check what authentication methods are available:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec:
+    # Determine available auth options
+    auth_options = []
+    if spec.aden_supported:
+        auth_options.append("aden")
+    if spec.direct_api_key_supported:
+        auth_options.append("direct")
+    auth_options.append("custom")  # Always available
+
+    # Get setup info
+    setup_info = {
+        "env_var": spec.env_var,
+        "description": spec.description,
+        "help_url": spec.help_url,
+        "api_key_instructions": spec.api_key_instructions,
+    }
+```
+
+Present the available options using AskUserQuestion:
+
+```
+Choose how to configure HUBSPOT_ACCESS_TOKEN:
+
+  1) Aden Platform (OAuth) (Recommended)
+     Secure OAuth2 flow via integration.adenhq.com
+     - Quick setup with automatic token refresh
+     - No need to manage API keys manually
+
+  2) Direct API Key
+     Enter your own API key manually
+     - Requires creating a HubSpot Private App
+     - Full control over scopes and permissions
+
+  3) Local Credential Setup (Advanced)
+     Programmatic configuration for CI/CD
+     - For automated deployments
+     - Requires manual API calls
+```
+
+### Step 4: Execute Auth Flow Based on User Choice
+
+#### Option 1: Aden Platform (OAuth)
+
+This is the recommended flow for supported integrations (HubSpot, etc.).
+
+**How Aden OAuth Works:**
+
+The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
+
+1. User has an Aden account
+2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
+3. We just need to sync those credentials down to the local credential store
+
+**4.1a. Check for ADEN_API_KEY**
+
+```python
+import os
+aden_key = os.environ.get("ADEN_API_KEY")
+```
+
+If not set, guide user to get one from Aden (this is where they do OAuth):
+
+```python
+from aden_tools.credentials import open_browser, get_aden_setup_url
+
+# Open browser to Aden - user will sign up and connect integrations there
+url = get_aden_setup_url()  # https://integration.adenhq.com/setup
+success, msg = open_browser(url)
+
+print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
+print("Once done, copy your API key and return here.")
+```
+
+Ask user to provide the ADEN_API_KEY they received.
+
+**4.1b. Save ADEN_API_KEY to Shell Config**
+
+With user approval, persist ADEN_API_KEY to their shell config:
+
+```python
+from aden_tools.credentials import (
+    detect_shell,
+    add_env_var_to_shell_config,
+    get_shell_source_command,
+)
+
+shell_type = detect_shell()  # 'bash', 'zsh', or 'unknown'
+
+# Ask user for approval before modifying shell config
+# If approved:
+success, config_path = add_env_var_to_shell_config(
+    "ADEN_API_KEY",
+    user_provided_key,
+    comment="Aden Platform (OAuth) API key"
+)
+
+if success:
+    source_cmd = get_shell_source_command()
+    print(f"Saved to {config_path}")
+    print(f"Run: {source_cmd}")
+```
+
+Also save to `~/.hive/configuration.json` for the framework:
+
+```python
+import json
+from pathlib import Path
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+config["aden"] = {
+    "api_key_configured": True,
+    "api_url": "https://api.adenhq.com"
+}
+
+config_path.parent.mkdir(parents=True, exist_ok=True)
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+**4.1c. Sync Credentials from Aden Server**
+
+Since the user has already authorized integrations on Aden, use the one-liner factory method:
+
+```python
+from core.framework.credentials import CredentialStore
+
+# This single call handles everything:
+# - Creates encrypted local storage at ~/.hive/credentials
+# - Configures Aden client from ADEN_API_KEY env var
+# - Syncs all credentials from Aden server automatically
+store = CredentialStore.with_aden_sync(
+    base_url="https://api.adenhq.com",
+    auto_sync=True,  # Syncs on creation
+)
+
+# Check what was synced
+synced = store.list_credentials()
+print(f"Synced credentials: {synced}")
+
+# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
+if "hubspot" not in synced:
+    print("HubSpot not found in your Aden account.")
+    print("Please visit https://integration.adenhq.com to connect HubSpot, then try again.")
+```
+
+For more control over the sync process:
+
+```python
+from core.framework.credentials import CredentialStore
+from core.framework.credentials.aden import (
+    AdenCredentialClient,
+    AdenClientConfig,
+    AdenSyncProvider,
+)
+
+# Create client (API key loaded from ADEN_API_KEY env var)
+client = AdenCredentialClient(AdenClientConfig(
+    base_url="https://api.adenhq.com",
+))
+
+# Create provider and store
+provider = AdenSyncProvider(client=client)
+store = CredentialStore.with_encrypted_storage()
+
+# Manual sync
+synced_count = provider.sync_all(store)
+print(f"Synced {synced_count} credentials from Aden")
+```
+
+**4.1d. Run Health Check**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+# Get the token from the store
+cred = store.get_credential("hubspot")
+token = cred.keys["access_token"].value.get_secret_value()
+
+result = check_credential_health("hubspot", token)
+if result.valid:
+    print("HubSpot credentials validated successfully!")
+else:
+    print(f"Validation failed: {result.message}")
+    # Offer to retry the OAuth flow
+```
+
+#### Option 2: Direct API Key
+
+For users who prefer manual API key management.
+
+**4.2a. Show Setup Instructions**
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec and spec.api_key_instructions:
+    print(spec.api_key_instructions)
+# Output:
+# To get a HubSpot Private App token:
+# 1. Go to HubSpot Settings > Integrations > Private Apps
+# 2. Click "Create a private app"
+# 3. Name your app (e.g., "Hive Agent")
+# ...
+
+if spec and spec.help_url:
+    print(f"More info: {spec.help_url}")
+```
+
+**4.2b. Collect API Key from User**
+
+Use AskUserQuestion to securely collect the API key:
+
+```
+Please provide your HubSpot access token:
+(This will be stored securely in ~/.hive/credentials)
+```
+
+**4.2c. Run Health Check Before Storing**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+result = check_credential_health("hubspot", user_provided_token)
+if not result.valid:
+    print(f"Warning: {result.message}")
+    # Ask user if they want to:
+    # 1. Try a different token
+    # 2. Continue anyway (not recommended)
+```
+
+**4.2d. Store in Local Encrypted Store**
+
+```python
+from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+from pydantic import SecretStr
+
+store = CredentialStore.with_encrypted_storage()
+
+cred = CredentialObject(
+    id="hubspot",
+    name="HubSpot Access Token",
+    keys={
+        "access_token": CredentialKey(
+            name="access_token",
+            value=SecretStr(user_provided_token),
+        )
+    },
+)
+store.save_credential(cred)
+```
+
+**4.2e. Export to Current Session**
+
+```bash
+export HUBSPOT_ACCESS_TOKEN="the-value"
+```
+
+#### Option 3: Local Credential Setup (Advanced)
+
+For programmatic/CI/CD setups.
+
+**4.3a. Show Documentation**
+
+```
+For advanced credential management, you can use the CredentialStore API directly:
+
+  from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+  from pydantic import SecretStr
+
+  store = CredentialStore.with_encrypted_storage()
+
+  cred = CredentialObject(
+      id="hubspot",
+      name="HubSpot Access Token",
+      keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
+  )
+  store.save_credential(cred)
+
+For CI/CD environments:
+  - Set HIVE_CREDENTIAL_KEY for encryption
+  - Pre-populate ~/.hive/credentials programmatically
+  - Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
+
+Documentation: See core/framework/credentials/README.md
+```
+
+### Step 5: Record Configuration Method
+
+Track which auth method was used for each credential in `~/.hive/configuration.json`:
+
+```python
+import json
+from pathlib import Path
+from datetime import datetime
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+if "credential_methods" not in config:
+    config["credential_methods"] = {}
+
+config["credential_methods"]["hubspot"] = {
+    "method": "aden",  # or "direct" or "custom"
+    "configured_at": datetime.now().isoformat(),
+}
+
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+### Step 6: Verify All Credentials
+
+Run validation again to confirm everything is set:
+
+```python
+runner = AgentRunner.load("exports/{agent_name}")
+validation = runner.validate()
+assert not validation.missing_credentials, "Still missing credentials!"
+```
+
+Report the result to the user.
+
+## Health Check Reference
+
+Health checks validate credentials by making lightweight API calls:
+
+| Credential      | Endpoint                                | What It Checks                     |
+| --------------- | --------------------------------------- | ---------------------------------- |
+| `anthropic`     | `POST /v1/messages`                     | API key validity                   |
+| `brave_search`  | `GET /res/v1/web/search?q=test&count=1` | API key validity                   |
+| `google_search` | `GET /customsearch/v1?q=test&num=1`     | API key + CSE ID validity          |
+| `github`        | `GET /user`                             | Token validity, user identity      |
+| `hubspot`       | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes  |
+| `resend`        | `GET /domains`                          | API key validity                   |
+
+```python
+from aden_tools.credentials import check_credential_health, HealthCheckResult
+
+result: HealthCheckResult = check_credential_health("hubspot", token_value)
+# result.valid: bool
+# result.message: str
+# result.details: dict (status_code, rate_limited, etc.)
+```
+
+## Encryption Key (HIVE_CREDENTIAL_KEY)
+
+The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
+
+- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
+- The user MUST persist this key (e.g., in `~/.bashrc` or a secrets manager)
+- Without this key, stored credentials cannot be decrypted
+- This is the ONLY secret that should live in `~/.bashrc` or environment config
+
+If `HIVE_CREDENTIAL_KEY` is not set:
+
+1. Let the store generate one
+2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
+3. Recommend adding it to `~/.bashrc` or their shell profile
+
+## Security Rules
+
+- **NEVER** log, print, or echo credential values in tool output
+- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
+- **NEVER** hardcode credentials in source code
+- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
+- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
+- **ALWAYS** run health checks before storing credentials (when possible)
+- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
+- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
+
+## Credential Sources Reference
+
+All credential specs are defined in `tools/src/aden_tools/credentials/`:
+
+| File              | Category      | Credentials                                   | Aden Supported |
+| ----------------- | ------------- | --------------------------------------------- | -------------- |
+| `llm.py`          | LLM Providers | `anthropic`                                   | No             |
+| `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
+| `email.py`        | Email         | `resend`                                      | No             |
+| `integrations.py` | Integrations  | `github`, `hubspot`                           | No / Yes       |
+
+**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
+variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
+Add them to `llm.py` as needed.
+
+To check what's registered:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+for name, spec in CREDENTIAL_SPECS.items():
+    print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
+```
+
+## Migration: CredentialManager → CredentialStore
+
+**CredentialManager is deprecated.** Use CredentialStore instead.
+
+| Old (Deprecated)                          | New (Recommended)                                                    |
+| ----------------------------------------- | -------------------------------------------------------------------- |
+| `CredentialManager()`                     | `CredentialStore.with_encrypted_storage()`                           |
+| `creds.get("hubspot")`                    | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
+| `creds.validate_for_tools(tools)`         | Use `store.is_available(cred_id)` per credential                     |
+| `creds.get_auth_options("hubspot")`       | Check `CREDENTIAL_SPECS["hubspot"].aden_supported`                   |
+| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly                        |
+
+**Why migrate?**
+
+- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
+- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
+- **CredentialStoreAdapter** exists for backward compatibility during migration
+
+```python
+# Old way (deprecated)
+from aden_tools.credentials import CredentialManager
+creds = CredentialManager()
+token = creds.get("hubspot")
+
+# New way (recommended)
+from core.framework.credentials import CredentialStore
+store = CredentialStore.with_encrypted_storage()
+token = store.get("hubspot")
+
+# With Aden sync (recommended for OAuth integrations)
+store = CredentialStore.with_aden_sync()
+token = store.get_key("hubspot", "access_token")
+```
+
+## Example Session
+
+```
+User: /setup-credentials for my research-agent
+
+Agent: Let me check what credentials your research-agent needs.
+
+[Reads agent config]
+$ jq -r '.required_tools[]?' exports/research-agent/agent.json
+web_search
+google_search
+
+$ jq -r '.graph.nodes[]?.node_type' exports/research-agent/agent.json | sort -u
+llm_tool_use
+
+[Maps tools/nodes to credentials using lookup table]
+Needed: anthropic, brave_search, google_search, google_cse
+
+[Checks encrypted store metadata index]
+$ cat ~/.hive/credentials/metadata/index.json | jq -r '.credentials | keys[]'
+anthropic
+brave_search
+
+[Checks environment variables]
+$ printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv GOOGLE_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+$ printenv GOOGLE_CSE_ID > /dev/null 2>&1 && echo "set" || echo "not set"
+not set
+
+[Checks HIVE_CREDENTIAL_KEY since credentials found in store]
+$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
+session: not set
+$ for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
+/Users/user/.zshrc
+
+[Key is in shell config but not current session — sourcing it]
+$ source ~/.zshrc
+
+[Computes missing credentials]
+Found:
+  ✓ anthropic (ANTHROPIC_API_KEY) — in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — in encrypted store
+Missing:
+  ✗ google_search (GOOGLE_API_KEY)
+  ✗ google_cse (GOOGLE_CSE_ID)
+
+Agent: 2 of 4 required credentials are already configured. Only Google Custom
+Search needs setup (2 values as a single group).
+
+--- Setting up Google Custom Search (google_search + google_cse) ---
+
+This requires two values that work together.
+
+First, the Google API Key:
+1. Go to https://console.cloud.google.com/apis/credentials
+2. Create a new project (or select an existing one)
+3. Enable the "Custom Search API" from the API Library
+4. Go to Credentials > Create Credentials > API Key
+5. Copy the generated API key
+
+[AskUserQuestion: "Please provide your Google API key:"]
+[User provides key]
+
+Now, the Custom Search Engine ID:
+1. Go to https://programmablesearchengine.google.com/controlpanel/all
+2. Click "Add" to create a new search engine
+3. Under "What to search", select "Search the entire web"
+4. Give your search engine a name
+5. Click "Create"
+6. Copy the Search Engine ID (cx value)
+
+[AskUserQuestion: "Please provide your Google CSE ID:"]
+[User provides ID]
+
+[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
+[Stores both in local encrypted store, exports to env]
+
+✓ Google Custom Search credentials valid
+
+All credentials are now configured:
+  ✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
+  ✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
+  ✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
+  ✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
+  Your agent is ready to run!
+```
@@ -117,22 +117,47 @@ async def test_happy_path(mock_mode):
 5. **Debug failures** - `debug_test(goal_id, test_name, agent_path)`
 6. **Iterate** - Repeat steps 4-5 until all pass

-## ⚠️ API Key Requirement for Real Testing
+## ⚠️ Credential Requirements for Testing

-**CRITICAL: Real LLM testing requires an API key.** Mock mode only validates structure and does NOT test actual agent behavior.
+**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).

 ### Prerequisites

-Before running agent tests, you MUST set your API key:
+Before running agent tests, you MUST collect ALL required credentials from the user.

+**Step 1: LLM API Key (always required)**
 ```bash
 export ANTHROPIC_API_KEY="your-key-here"
 ```

-**Why API keys are required:**
+**Step 2: Tool-specific credentials (depends on agent's tools)**
+
+Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
+
+```python
+from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
+
+creds = CredentialManager()
+
+# Determine which tools the agent uses (from agent.json or mcp_servers.json)
+agent_tools = [...]  # e.g., ["hubspot_search_contacts", "web_search", ...]
+
+# Find all missing credentials for those tools
+missing = creds.get_missing_for_tools(agent_tools)
+```
+
+Common tool credentials:
+| Tool | Env Var | Help URL |
+|------|---------|----------|
+| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
+| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
+| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
+
+**Why ALL credentials are required:**
 - Tests need to execute the agent's LLM nodes to validate behavior
- Mock mode bypasses LLM calls, providing no confidence in real-world performance
- Success criteria (personalization, reasoning quality, constraint adherence) can only be tested with real LLM calls
+- Tools with missing credentials will return error dicts instead of real data
+- Mock mode bypasses everything, providing no confidence in real-world performance
+- The `AgentRunner.run()` method validates credentials at startup and will fail fast if any are missing

 ### Mock Mode Limitations

@@ -146,11 +171,11 @@ Mock mode (`--mock` flag or `mock_mode=True`) is **ONLY for structure validation
 ✗ Does NOT test real API integrations or tool use
 ✗ Does NOT test personalization or content quality

-**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use a real API key.
+**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials for ALL services.

-### Enforcing API Key in Tests
+### Enforcing Credentials in Tests

-When generating tests, **ALWAYS include API key checks**:
+When generating tests, **ALWAYS include credential checks for ALL required services**:

 ```python
 import os
@@ -165,11 +190,14 @@ pytestmark = pytest.mark.skipif(


@pytest.fixture(scope="session", autouse=True)
-def check_api_key():
-    """Ensure API key is set for real testing."""
+def check_credentials():
+    """Ensure ALL required credentials are set for real testing."""
    creds = CredentialManager()
+    mock_mode = os.environ.get("MOCK_MODE")
+
+    # Always check LLM key
    if not creds.is_available("anthropic"):
-        if os.environ.get("MOCK_MODE"):
+        if mock_mode:
            print("\n⚠️  Running in MOCK MODE - structure validation only")
            print("   This does NOT test LLM behavior or agent quality")
            print("   Set ANTHROPIC_API_KEY for real testing\n")
@@ -183,39 +211,69 @@ def check_api_key():
                "   MOCK_MODE=1 pytest exports/{agent}/tests/\n\n"
                "Note: Mock mode does NOT validate agent behavior or quality."
            )
+
+    # Check tool-specific credentials (skip in mock mode)
+    if not mock_mode:
+        # List the tools this agent uses - update per agent
+        agent_tools = []  # e.g., ["hubspot_search_contacts", "hubspot_get_contact"]
+        missing = creds.get_missing_for_tools(agent_tools)
+        if missing:
+            lines = ["\n❌ Missing tool credentials!\n"]
+            for name in missing:
+                spec = creds.specs.get(name)
+                if spec:
+                    lines.append(f"  {spec.env_var} - {spec.description}")
+                    if spec.help_url:
+                        lines.append(f"    Setup: {spec.help_url}")
+            lines.append("\nSet the required environment variables and re-run.")
+            pytest.fail("\n".join(lines))
 ```

 ### User Communication

-When the user asks to test an agent, **ALWAYS check for the API key first**:
+When the user asks to test an agent, **ALWAYS check for ALL credentials first** — not just the LLM key:
+
+1. **Identify the agent's tools** from `agent.json` or `mcp_servers.json`
+2. **Check ALL required credentials** using `CredentialManager`
+3. **Ask the user to provide any missing credentials** before proceeding

 ```python
-from aden_tools.credentials import CredentialManager
+from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS

-# Before running any tests
 creds = CredentialManager()
-if not creds.is_available("anthropic"):
-    print("⚠️  No ANTHROPIC_API_KEY found!")
-    print()
-    print("Testing requires a real API key to validate agent behavior.")
-    print()
-    print("Options:")
-    print("1. Set your API key (RECOMMENDED):")
-    print("   export ANTHROPIC_API_KEY='your-key-here'")
-    print()
-    print("2. Run in mock mode (structure validation only):")
-    print("   MOCK_MODE=1 pytest exports/{agent}/tests/")
-    print()
-    print("Mock mode does NOT test:")
-    print("  - LLM message generation")
-    print("  - Reasoning or decision quality")
-    print("  - Constraint validation")
-    print("  - Real API integrations")

-    # Ask user what to do
+# 1. Check LLM key
+missing_creds = []
+if not creds.is_available("anthropic"):
+    missing_creds.append(("ANTHROPIC_API_KEY", "Anthropic API key for LLM calls"))
+
+# 2. Check tool-specific credentials
+agent_tools = [...]  # Determined from agent config
+missing_tools = creds.get_missing_for_tools(agent_tools)
+for name in missing_tools:
+    spec = CREDENTIAL_SPECS.get(name)
+    if spec:
+        missing_creds.append((spec.env_var, spec.description))
+
+# 3. Present ALL missing credentials to the user at once
+if missing_creds:
+    print("⚠️  Missing credentials required by this agent:\n")
+    for env_var, description in missing_creds:
+        print(f"  • {env_var} — {description}")
+    print()
+    print("Please set the missing environment variables:")
+    for env_var, _ in missing_creds:
+        print(f"  export {env_var}='your-value-here'")
+    print()
+    print("Or run in mock mode (structure validation only):")
+    print("  MOCK_MODE=1 pytest exports/{agent}/tests/")
+
+    # Ask user to provide credentials or choose mock mode
    AskUserQuestion(...)
 ```

+**IMPORTANT:** Do NOT skip credential collection. If an agent uses HubSpot tools, the user MUST provide `HUBSPOT_ACCESS_TOKEN`. If it uses web search, the user MUST provide the appropriate search API key. Collect ALL missing credentials in a single prompt rather than discovering them one at a time during test failures.
+
 ## The Three-Stage Flow

 ```
@@ -0,0 +1,145 @@
+# Triage Issue Skill
+
+Analyze a GitHub issue, verify claims against the codebase, and close invalid issues with a technical response.
+
+## Trigger
+
+User provides a GitHub issue URL or number, e.g.:
+- `/triage-issue 1970`
+- `/triage-issue https://github.com/adenhq/hive/issues/1970`
+
+## Workflow
+
+### Step 1: Fetch Issue Details
+
+```bash
+gh issue view <number> --repo adenhq/hive --json title,body,state,labels,author
+```
+
+Extract:
+- Title
+- Body (the claim/bug report)
+- Current state
+- Labels
+- Author
+
+If issue is already closed, inform user and stop.
+
+### Step 2: Analyze the Claim
+
+Read the issue body and identify:
+1. **The core claim** - What is the user asserting?
+2. **Technical specifics** - File paths, function names, code snippets mentioned
+3. **Expected behavior** - What do they think should happen?
+4. **Severity claimed** - Security issue? Bug? Feature request?
+
+### Step 3: Investigate the Codebase
+
+For each technical claim:
+1. Find the referenced code using Grep/Glob/Read
+2. Understand the actual implementation
+3. Check if the claim accurately describes the behavior
+4. Look for related tests, documentation, or design decisions
+
+### Step 4: Evaluate Validity
+
+Categorize the issue as one of:
+
+| Category | Action |
+|----------|--------|
+| **Valid Bug** | Do NOT close. Inform user this is a real issue. |
+| **Valid Feature Request** | Do NOT close. Suggest labeling appropriately. |
+| **Misunderstanding** | Prepare technical explanation for why behavior is correct. |
+| **Fundamentally Flawed** | Prepare critique explaining the technical impossibility or design rationale. |
+| **Duplicate** | Find the original issue and prepare duplicate notice. |
+| **Incomplete** | Prepare request for more information. |
+
+### Step 5: Draft Response
+
+For issues to be closed, draft a response that:
+
+1. **Acknowledges the concern** - Don't be dismissive
+2. **Explains the actual behavior** - With code references
+3. **Provides technical rationale** - Why it works this way
+4. **References industry standards** - If applicable
+5. **Offers alternatives** - If there's a better approach for the user
+
+Use this template:
+
+```markdown
+## Analysis
+
+[Brief summary of what was investigated]
+
+## Technical Details
+
+[Explanation with code references]
+
+## Why This Is Working As Designed
+
+[Rationale]
+
+## Recommendation
+
+[What the user should do instead, if applicable]
+
+---
+*This issue was reviewed and closed by the maintainers.*
+```
+
+### Step 6: User Review
+
+Present the draft to the user with:
+
+```
+## Issue #<number>: <title>
+
+**Claim:** <summary of claim>
+
+**Finding:** <valid/invalid/misunderstanding/etc>
+
+**Draft Response:**
+<the markdown response>
+
+---
+Do you want me to post this comment and close the issue?
+```
+
+Use AskUserQuestion with options:
+- "Post and close" - Post comment, close issue
+- "Edit response" - Let user modify the response
+- "Skip" - Don't take action
+
+### Step 7: Execute Action
+
+If user approves:
+
+```bash
+# Post comment
+gh issue comment <number> --repo adenhq/hive --body "<response>"
+
+# Close issue
+gh issue close <number> --repo adenhq/hive --reason "not planned"
+```
+
+Report success with link to the issue.
+
+## Important Guidelines
+
+1. **Never close valid issues** - If there's any merit to the claim, don't close it
+2. **Be respectful** - The reporter took time to file the issue
+3. **Be technical** - Provide code references and evidence
+4. **Be educational** - Help them understand, don't just dismiss
+5. **Check twice** - Make sure you understand the code before declaring something invalid
+6. **Consider edge cases** - Maybe their environment reveals a real issue
+
+## Example Critiques
+
+### Security Misunderstanding
+> "The claim that secrets are exposed in plaintext misunderstands the encryption architecture. While `SecretStr` is used for logging protection, actual encryption is provided by Fernet (AES-128-CBC) at the storage layer. The code path is: serialize → encrypt → write. Only encrypted bytes touch disk."
+
+### Impossible Request
+> "The requested feature would require [X] which violates [fundamental constraint]. This is not a limitation of our implementation but a fundamental property of [technology/protocol]."
+
+### Already Handled
+> "This scenario is already handled by [code reference]. The reporter may be using an older version or misconfigured environment."
@@ -1,9 +1,10 @@
 ---
 name: Bug Report
 about: Report a bug to help us improve
-title: '[Bug]: '
-labels: bug
+title: "[Bug]: "
+labels: bug, enhancement
 assignees: ''
+
 ---

 ## Describe the Bug
@@ -1,9 +1,10 @@
 ---
 name: Feature Request
 about: Suggest a new feature or enhancement
-title: '[Feature]: '
+title: "[Feature]: "
 labels: enhancement
 assignees: ''
+
 ---

 ## Problem Statement
@@ -0,0 +1,71 @@
+---
+name: Integration Request
+about: Suggest a new integration
+title: "[Integration]:"
+labels: ''
+assignees: ''
+
+---
+
+## Service                                                                                      
+                                                                                                 
+ Name and brief description of the service and what it enables agents to do.                     
+                                                                                                 
+ **Description:** [e.g., "API key for Slack Bot" — short one-liner for the credential spec]      
+                                                                                                 
+ ## Credential Identity                                                                          
+                                                                                                 
+ - **credential_id:** [e.g., `slack`]                                                            
+ - **env_var:** [e.g., `SLACK_BOT_TOKEN`]                                                        
+ - **credential_key:** [e.g., `access_token`, `api_key`, `bot_token`]                            
+                                                                                                 
+ ## Tools                                                                                        
+                                                                                                 
+ Tool function names that require this credential:                                               
+                                                                                                 
+ - [e.g., `slack_send_message`]                                                                  
+ - [e.g., `slack_list_channels`]                                                                 
+                                                                                                 
+ ## Auth Methods                                                                                 
+                                                                                                 
+ - **Direct API key supported:** Yes / No                                                        
+ - **Aden OAuth supported:** Yes / No                                                            
+                                                                                                 
+ If Aden OAuth is supported, describe the OAuth scopes/permissions required.                     
+                                                                                                 
+ ## How to Get the Credential                                                                    
+                                                                                                 
+ Link where users obtain the key/token:                                                          
+                                                                                                 
+ [e.g., https://api.slack.com/apps]                                                              
+                                                                                                 
+ Step-by-step instructions:                                                                      
+                                                                                                 
+ 1. Go to ...                                                                                    
+ 2. Create a ...                                                                                 
+ 3. Select scopes/permissions: ...                                                               
+ 4. Copy the key/token                                                                           
+                                                                                                 
+ ## Health Check                                                                                 
+                                                                                                 
+ A lightweight API call to validate the credential (no writes, no charges).                      
+                                                                                                 
+ - **Endpoint:** [e.g., `https://slack.com/api/auth.test`]                                       
+ - **Method:** [e.g., `GET` or `POST`]                                                           
+ - **Auth header:** [e.g., `Authorization: Bearer {token}` or `X-Api-Key: {key}`]                
+ - **Parameters (if any):** [e.g., `?limit=1`]                                                   
+ - **200 means:** [e.g., key is valid]                                                           
+ - **401 means:** [e.g., invalid or expired]                                                     
+ - **429 means:** [e.g., rate limited but key is valid]                                          
+                                                                                                 
+ ## Credential Group                                                                             
+                                                                                                 
+ Does this require multiple credentials configured together? (e.g., Google Custom Search needs   
+ both an API key and a CSE ID)                                                                   
+                                                                                                 
+ - [ ] No, single credential                                                                     
+ - [ ] Yes — list the other credential IDs in the group:                                         
+                                                                                                 
+ ## Additional Context                                                                           
+                                                                                                 
+ Links to API docs, rate limits, free tier availability, or anything else relevant.
@@ -22,6 +22,9 @@ jobs:
        with:
          bun-version: latest

+      - name: Run auto-close-duplicates tests
+        run: bun test scripts/auto-close-duplicates
+
      - name: Auto-close duplicate issues
        run: bun run scripts/auto-close-duplicates.ts
        env:
@@ -21,27 +21,29 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: |
-          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+        run: uv sync --project core --group dev

      - name: Ruff lint
        run: |
-          ruff check core/
-          ruff check tools/
+          uv run --project core ruff check core/
+          uv run --project core ruff check tools/

      - name: Ruff format
        run: |
-          ruff format --check core/
-          ruff format --check tools/
+          uv run --project core ruff format --check core/
+          uv run --project core ruff format --check tools/

  test:
    name: Test Python Framework
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
    steps:
      - uses: actions/checkout@v4

@@ -49,23 +51,23 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync

      - name: Run tests
        run: |
          cd core
-          pytest tests/ -v
+          uv run pytest tests/ -v

-  validate:
-    name: Validate Agent Exports
+  test-tools:
+    name: Test Tools
    runs-on: ubuntu-latest
-    needs: [lint, test]
    steps:
      - uses: actions/checkout@v4

@@ -73,13 +75,35 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
+        run: |
+          cd tools
+          uv sync --extra dev
+          uv run pytest tests/ -v
+
+  validate:
+    name: Validate Agent Exports
+    runs-on: ubuntu-latest
+    needs: [lint, test, test-tools]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync

      - name: Validate exported agents
        run: |
@@ -51,12 +51,26 @@ jobs:
            - Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
            - Suggest the user react with a thumbs-down if they disagree

-            ### 3. Check for invalid issues
-            If the issue lacks sufficient information, is spam, or doesn't make sense:
-            - Add the "invalid" label
-            - Comment asking for clarification or explaining why it's invalid
+            ### 3. Check for Low-Quality / AI Spam
+            Analyze the issue quality. We are receiving many low-effort, AI-generated spam issues.
+            Flag the issue as INVALID if it matches these criteria:
+            - **Vague/Generic**: Title is "Fix bug" or "Error" without specific context.
+            - **Hallucinated**: Refers to files or features that do not exist in this repo.
+            - **Template Filler**: Body contains "Insert description here" or unrelated gibberish.
+            - **Low Effort**: No reproduction steps, no logs, only 1-2 sentences.

-            ### 4. Categorize with labels (if NOT a duplicate)
+            If identified as spam/low-quality:
+            - Add the "invalid" label.
+            - Add a comment:
+              "This issue has been automatically flagged as low-quality or potentially AI-generated spam. It lacks specific details (logs, reproduction steps, file references) required for us to help. Please open a new issue following the template exactly if this is a legitimate request."
+            - Do NOT proceed to other steps.
+
+            ### 4. Check for invalid issues (General)
+            If the issue is not spam but still lacks information:
+            - Add the "invalid" label
+            - Comment asking for clarification
+
+            ### 5. Categorize with labels (if NOT a duplicate or spam)
            Apply appropriate labels based on the issue content. Use ONLY these labels:
            - bug: Something isn't working
            - enhancement: New feature or request
@@ -66,7 +80,13 @@ jobs:
            - help wanted: Extra attention is needed (if issue needs community input)
            - backlog: Tracked for the future, but not currently planned or prioritized

-            You may apply multiple labels if appropriate (e.g., "bug" and "help wanted").
+            ### 6. Estimate size (if NOT a duplicate, spam, or invalid)
+            Apply exactly ONE size label to help contributors match their capacity to the task:
+            - "size: small": Docs, typos, single-file fixes, config changes
+            - "size: medium": Bug fixes with tests, adding a single tool, changes within one package
+            - "size: large": Cross-package changes (core + tools), new modules, complex logic, architectural refactors
+
+            You may apply multiple labels if appropriate (e.g., "bug", "size: small", and "good first issue").

            ## Tools Available:
            - mcp__github__get_issue: Get issue details
@@ -58,6 +58,13 @@ jobs:
            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes

+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
            **Why is this required?** See #472 for details.`;

              const comments = await github.rest.issues.listComments({
@@ -141,6 +148,13 @@ jobs:
            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes

+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
            **Why is this required?** See #472 for details.`;

              const comments = await github.rest.issues.listComments({
@@ -21,18 +21,19 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
        run: |
          cd core
-          pip install -e .
-          pip install -r requirements-dev.txt
+          uv sync

      - name: Run tests
        run: |
          cd core
-          pytest tests/ -v
+          uv run pytest tests/ -v

      - name: Generate changelog
        id: changelog
@@ -1,20 +1,14 @@
 {
  "mcpServers": {
    "agent-builder": {
-      "command": "python",
-      "args": ["-m", "framework.mcp.agent_builder_server"],
-      "cwd": "core",
-      "env": {
-        "PYTHONPATH": "../tools/src"
-      }
+      "command": "uv",
+      "args": ["run", "-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core"
    },
    "tools": {
-      "command": "python",
-      "args": ["mcp_server.py", "--stdio"],
-      "cwd": "tools",
-      "env": {
-        "PYTHONPATH": "src"
-      }
+      "command": "uv",
+      "args": ["run", "mcp_server.py", "--stdio"],
+      "cwd": "tools"
    }
  }
 }
@@ -0,0 +1 @@
+3.11
@@ -1,15 +1,11 @@
 # Contributing to Aden Agent Framework

-Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors.
+Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. We’re especially looking for help building tools, integrations([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If you’re interested in extending its functionality, this is the perfect place to start. 

 ## Code of Conduct

 By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).

-## Contributor License Agreement
-
-By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
-
 ## Issue Assignment Policy

 To prevent duplicate work and respect contributors' time, we require issue assignment before submitting PRs.
@@ -18,23 +14,22 @@ To prevent duplicate work and respect contributors' time, we require issue assig

 1. **Find an Issue:** Browse existing issues or create a new one
 2. **Claim It:** Leave a comment (e.g., *"I'd like to work on this!"*)
-3. **Wait for Assignment:** A maintainer will assign you within 24 hours
+3. **Wait for Assignment:** A maintainer will assign you within 24 hours. Issues with reproducible steps or proposals are prioritized.
 4. **Submit Your PR:** Once assigned, you're ready to contribute

 > **Note:** PRs for unassigned issues may be delayed or closed if someone else was already assigned.

-### The 5-Day Momentum Rule
-
-To keep the project moving, issues with **no activity for 5 days** (no PR or status update) will be unassigned. If you need more time, just drop a quick comment!
-
 ### Exceptions (No Assignment Needed)

 You may submit PRs without prior assignment for:
 - **Documentation:** Fixing typos or clarifying instructions — add the `documentation` label or include `doc`/`docs` in your PR title to bypass the linked issue requirement
- **Micro-fixes:** Minor tweaks or obvious linting errors — add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement
- **Small Refactors:** Tiny improvements that don't change core logic
+- **Micro-fixes:** Add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement. Micro-fixes must meet **all** qualification criteria:

-If a high-quality PR is submitted for a "stale" assigned issue (no activity for 7+ days), we may proceed with the submitted code.
+  | Qualifies | Disqualifies |
+  |-----------|--------------|
+  | < 20 lines changed | Any functional bug fix |
+  | Typos & Documentation & Linting | Refactoring for "clean code" |
+  | No logic/API/DB changes | New features (even tiny ones) |

 ## Getting Started

@@ -42,20 +37,18 @@ If a high-quality PR is submitted for a "stale" assigned issue (no activity for
 2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
 3. Create a feature branch: `git checkout -b feature/your-feature-name`
 4. Make your changes
-5. Run tests: `PYTHONPATH=core:exports python -m pytest`
+5. Run checks and tests:
+   ```bash
+   make check    # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
+   make test     # Core tests (cd core && pytest tests/ -v)
+   ```
 6. Commit your changes following our commit conventions
 7. Push to your fork and submit a Pull Request

 ## Development Setup

 ```bash
-# Install Python packages
-./scripts/setup-python.sh
-
-# Verify installation
-python -c "import framework; import aden_tools; print('✓ Setup complete')"
-
-# Install Claude Code skills (optional)
+# Install Python packages and verify setup
 ./quickstart.sh
 ```

@@ -98,7 +91,7 @@ docs(readme): update installation instructions
 1. **Get assigned to the issue first** (see [Issue Assignment Policy](#issue-assignment-policy))
 2. Update documentation if needed
 3. Add tests for new functionality
-4. Ensure all tests pass
+4. Ensure `make check` and `make test` pass
 5. Update the CHANGELOG.md if applicable
 6. Request review from maintainers

@@ -112,7 +105,7 @@ feat(component): add new feature description
 ## Project Structure

 - `core/` - Core framework (agent runtime, graph executor, protocols)
- `tools/` - MCP Tools Package (19 tools for agent capabilities)
+- `tools/` - MCP Tools Package (tools for agent capabilities)
 - `exports/` - Agent packages and examples
 - `docs/` - Documentation
 - `scripts/` - Build and utility scripts
@@ -136,16 +129,25 @@ feat(component): add new feature description
 > ```

 ```bash
-# Run all tests for the framework
-cd core && python -m pytest
+# Run lint and format checks (mirrors CI lint job)
+make check

-# Run all tests for tools
-cd tools && python -m pytest
+# Run core framework tests (mirrors CI test job)
+make test
+
+# Or run tests directly
+cd core && pytest tests/ -v

 # Run tests for a specific agent
 PYTHONPATH=core:exports python -m agent_name test
 ```

+> **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
+
+## Contributor License Agreement
+
+By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
+
 ## Questions?

 Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).
@@ -23,8 +23,8 @@ Aden Agent Framework is a Python-based system for building goal-driven, self-imp
 | Package       | Directory  | Description                             | Tech Stack   |
 | ------------- | ---------- | --------------------------------------- | ------------ |
 | **framework** | `/core`    | Core runtime, graph executor, protocols | Python 3.11+ |
-| **tools**     | `/tools`   | 19 MCP tools for agent capabilities     | Python 3.11+ |
-| **exports**   | `/exports` | Agent packages and examples             | Python 3.11+ |
+| **tools**     | `/tools`   | MCP tools for agent capabilities        | Python 3.11+ |
+| **exports**   | `/exports` | Agent packages (user-created, gitignored) | Python 3.11+ |
 | **skills**    | `.claude`  | Claude Code skills for building/testing | Markdown     |

 ### Key Principles
@@ -44,7 +44,7 @@ Aden Agent Framework is a Python-based system for building goal-driven, self-imp
 Ensure you have installed:

 - **Python 3.11+** - [Download](https://www.python.org/downloads/) (3.12 or 3.13 recommended)
- **pip** - Package installer for Python (comes with Python)
+- **uv** - Python package manager ([Install](https://docs.astral.sh/uv/getting-started/installation/))
 - **git** - Version control
 - **Claude Code** - [Install](https://docs.anthropic.com/claude/docs/claude-code) (optional, for using building skills)

@@ -52,7 +52,7 @@ Verify installation:

 ```bash
 python --version    # Should be 3.11+
-pip --version       # Should be latest
+uv --version        # Should be latest
 git --version       # Any recent version
 ```

@@ -63,8 +63,8 @@ git --version       # Any recent version
 git clone https://github.com/adenhq/hive.git
 cd hive

-# 2. Run automated Python setup
-./scripts/setup-python.sh
+# 2. Run automated setup
+./quickstart.sh
 ```

 The setup script performs these actions:
@@ -115,8 +115,8 @@ python -c "import framework; print('✓ framework OK')"
 python -c "import aden_tools; print('✓ aden_tools OK')"
 python -c "import litellm; print('✓ litellm OK')"

-# Run an example agent
-PYTHONPATH=core:exports python -m support_ticket_agent validate
+# Run an agent (after building one via /building-agents-construction)
+PYTHONPATH=core:exports python -m your_agent_name validate
 ```

 ---
@@ -128,8 +128,12 @@ hive/                                    # Repository root
 │
 ├── .github/                             # GitHub configuration
 │   ├── workflows/
-│   │   ├── ci.yml                       # Runs on every PR
-│   │   └── release.yml                  # Runs on tags
+│   │   ├── ci.yml                       # Lint, test, validate on every PR
+│   │   ├── release.yml                  # Runs on tags
+│   │   ├── pr-requirements.yml          # PR requirement checks
+│   │   ├── pr-check-command.yml         # PR check commands
+│   │   ├── claude-issue-triage.yml      # Automated issue triage
+│   │   └── auto-close-duplicates.yml    # Close duplicate issues
 │   ├── ISSUE_TEMPLATE/                  # Bug report & feature request templates
 │   ├── PULL_REQUEST_TEMPLATE.md         # PR description template
 │   └── CODEOWNERS                       # Auto-assign reviewers
@@ -151,55 +155,54 @@ hive/                                    # Repository root
 │       └── agent-workflow/              # Complete workflow 
 |           ├── SKILL.md
 │           └── examples
-orchestration
 │
 ├── core/                                # CORE FRAMEWORK PACKAGE
 │   ├── framework/                       # Main package code
-│   │   ├── runner/                      # AgentRunner - loads and runs agents
-│   │   ├── executor/                    # GraphExecutor - executes node graphs
-│   │   ├── protocols/                   # Standard protocols (hooks, tracing, etc.)
+│   │   ├── builder/                     # Agent builder utilities
+│   │   ├── credentials/                 # Credential management
+│   │   ├── graph/                       # GraphExecutor - executes node graphs
 │   │   ├── llm/                         # LLM provider integrations (Anthropic, OpenAI, etc.)
-│   │   ├── memory/                      # Memory systems (STM, LTM/RLM)
-│   │   ├── tools/                       # Tool registry and management
+│   │   ├── mcp/                         # MCP server integration
+│   │   ├── runner/                      # AgentRunner - loads and runs agents
+│   │   ├── runtime/                     # Runtime environment
+│   │   ├── schemas/                     # Data schemas
+│   │   ├── storage/                     # File-based persistence
+│   │   ├── testing/                     # Testing utilities
 │   │   └── __init__.py
 │   ├── pyproject.toml                   # Package metadata and dependencies
-│   ├── requirements.txt                 # Python dependencies
 │   ├── README.md                        # Framework documentation
 │   ├── MCP_INTEGRATION_GUIDE.md         # MCP server integration guide
 │   └── docs/                            # Protocol documentation
 │
-├── tools/                               # TOOLS PACKAGE (19 MCP tools)
+├── tools/                               # TOOLS PACKAGE (MCP tools)
 │   ├── src/
 │   │   └── aden_tools/
 │   │       ├── tools/                   # Individual tool implementations
 │   │       │   ├── web_search_tool/
 │   │       │   ├── web_scrape_tool/
 │   │       │   ├── file_system_toolkits/
-│   │       │   └── ...                  # 19 tools total
+│   │       │   └── ...                  # Additional tools
 │   │       ├── mcp_server.py            # HTTP MCP server
 │   │       └── __init__.py
 │   ├── pyproject.toml                   # Package metadata
-│   ├── requirements.txt                 # Python dependencies
 │   └── README.md                        # Tools documentation
 │
-├── exports/                             # AGENT PACKAGES
-│   ├── support_ticket_agent/            # Example: Support ticket handler
-│   ├── market_research_agent/           # Example: Market research
-│   ├── outbound_sales_agent/            # Example: Sales outreach
-│   ├── personal_assistant_agent/        # Example: Personal assistant
-│   └── ...                              # More agent examples
+├── exports/                             # AGENT PACKAGES (user-created, gitignored)
+│   └── your_agent_name/                 # Created via /building-agents-construction
 │
 ├── docs/                                # Documentation
 │   ├── getting-started.md               # Quick start guide
 │   ├── configuration.md                 # Configuration reference
-│   ├── architecture.md                  # System architecture
-│   └── articles/                        # Technical articles
+│   ├── architecture/                    # System architecture
+│   ├── articles/                        # Technical articles
+│   ├── quizzes/                         # Developer quizzes
+│   └── i18n/                            # Translations
 │
 ├── scripts/                             # Build & utility scripts
 │   ├── setup-python.sh                  # Python environment setup
 │   └── setup.sh                         # Legacy setup script
 │
-├── quickstart.sh                        # Install Claude Code skills
+├── quickstart.sh                        # Interactive setup wizard
 ├── ENVIRONMENT_SETUP.md                 # Complete Python setup guide
 ├── README.md                            # Project overview
 ├── DEVELOPER.md                         # This file
@@ -376,7 +379,7 @@ def test_ticket_categorization():
 - **PEP 8** - Follow Python style guide
 - **Type hints** - Use for function signatures and class attributes
 - **Docstrings** - Document classes and public functions
- **Black** - Code formatter (run with `black .`)
+- **Ruff** - Linter and formatter (run with `make check`)

 ```python
 # Good
@@ -510,8 +513,8 @@ chore(deps): update React to 18.2.0

 1. Create a feature branch from `main`
 2. Make your changes with clear commits
-3. Run tests locally: `npm run test`
-4. Run linting: `npm run lint`
+3. Run tests locally: `make test`
+4. Run linting: `make check`
 5. Push and create a PR
 6. Fill out the PR template
 7. Request review from CODEOWNERS
@@ -520,66 +523,6 @@ chore(deps): update React to 18.2.0

 ---

-## Debugging
-
-### Frontend Debugging
-
-**React Developer Tools:**
-
-1. Install the [React DevTools browser extension](https://react.dev/learn/react-developer-tools)
-2. Open browser DevTools → React tab
-3. Inspect component tree, props, state, and hooks
-
-**VS Code Debugging:**
-
-1. Add Chrome debug configuration to `.vscode/launch.json`:
-
-```json
-{
-  "type": "chrome",
-  "request": "launch",
-  "name": "Debug Frontend",
-  "url": "http://localhost:3000",
-  "webRoot": "${workspaceFolder}/honeycomb/src"
-}
-```
-
-2. Start the dev server: `npm run dev -w honeycomb`
-3. Press F5 in VS Code
-
-### Backend Debugging
-
-**VS Code Debugging:**
-
-1. Add Node debug configuration:
-
-```json
-{
-  "type": "node",
-  "request": "launch",
-  "name": "Debug Backend",
-  "runtimeExecutable": "npm",
-  "runtimeArgs": ["run", "dev"],
-  "cwd": "${workspaceFolder}/hive",
-  "console": "integratedTerminal"
-}
-```
-
-2. Set breakpoints in your code
-3. Press F5 to start debugging
-
-**Logging:**
-
-```typescript
-import { logger } from "../utils/logger";
-
-// Add debug logs
-logger.debug("Processing request", {
-  userId: req.user.id,
-  body: req.body,
-});
-```
-
 ---

 ## Common Tasks
@@ -589,16 +532,11 @@ logger.debug("Processing request", {
 ```bash
 # Add to core framework
 cd core
-pip install <package>
-# Then add to requirements.txt or pyproject.toml
+uv add <package>

 # Add to tools package
 cd tools
-pip install <package>
-# Then add to requirements.txt or pyproject.toml
-
-# Reinstall in editable mode
-pip install -e .
+uv add <package>
 ```

 ### Creating a New Agent
@@ -720,61 +658,22 @@ kill -9 <PID>
 # Or change ports in config.yaml and regenerate
 ```

-### Node Modules Issues

-```bash
-# Clean everything and reinstall
-npm run clean
-rm -rf node_modules package-lock.json
-npm install
-```
-
-### Docker Issues
-
-```bash
-# Reset Docker state
-docker compose down -v
-docker system prune -f
-docker compose build --no-cache
-docker compose up
-```
-
-### TypeScript Errors After Pull
-
-```bash
-# Rebuild TypeScript
-npm run build
-
-# Or restart TS server in VS Code
-# Cmd/Ctrl + Shift + P → "TypeScript: Restart TS Server"
-```

 ### Environment Variables Not Loading

 ```bash
-# Regenerate from config.yaml
-npm run generate:env
-
-# Verify files exist
+# Verify .env file exists at project root
 cat .env
-cat honeycomb/.env
-cat hive/.env

-# Restart dev servers after changing env
+# Or check shell environment
+echo $ANTHROPIC_API_KEY
+
+# Create .env if needed
+# Then add your API keys
 ```

-### Tests Failing

-```bash
-# Run with verbose output
-npm run test -w honeycomb -- --reporter=verbose
-
-# Run single test file
-npm run test -w honeycomb -- src/components/Button.test.tsx
-
-# Clear test cache
-npm run test -w honeycomb -- --clearCache
-```

 ---

@@ -6,7 +6,7 @@ Complete setup guide for building and running goal-driven agents with the Aden A

 ```bash
 # Run the automated setup script
-./scripts/setup-python.sh
+./quickstart.sh
 ```

 > **Note for Windows Users:**  
@@ -21,6 +21,63 @@ This will:
 - Fix package compatibility issues (openai + litellm)
 - Verify all installations

+## Quick Setup (Windows – PowerShell)
+
+Windows users can use the native PowerShell setup script.
+
+Before running the script, allow script execution for the current session:
+
+```powershell
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
+```
+
+Run setup from the project root:
+
+```powershell
+./scripts/setup-python.ps1
+```
+
+This will:
+
+- Check Python version (requires 3.11+)
+- Create a local `.venv` virtual environment
+- Install the core framework package (`framework`)
+- Install the tools package (`aden_tools`)
+- Fix package compatibility issues (openai + litellm)
+- Verify all installations
+
+After setup, activate the virtual environment:
+
+```powershell
+.\.venv\Scripts\Activate.ps1
+```
+
+Set `PYTHONPATH` (required in every new PowerShell session):
+
+```powershell
+$env:PYTHONPATH="core;exports"
+```
+
+## Alpine Linux Setup
+
+If you are using Alpine Linux (e.g., inside a Docker container), you must install system dependencies and use a virtual environment before running the setup script:
+
+1. Install System Dependencies:
+```bash
+apk update
+apk add bash git python3 py3-pip nodejs npm curl build-base python3-dev linux-headers libffi-dev
+```
+2. Set up Virtual Environment (Required for Python 3.12+):
+```
+python3 -m venv venv
+source venv/bin/activate
+pip install --upgrade pip setuptools wheel
+```
+3. Run the Quickstart Script:
+```
+./quickstart.sh
+```
+
 ## Manual Setup (Alternative)

 If you prefer to set up manually or the script fails:
@@ -80,6 +137,12 @@ For running agents with real LLMs:
 export ANTHROPIC_API_KEY="your-key-here"
 ```

+Windows (PowerShell):
+
+```powershell
+$env:ANTHROPIC_API_KEY="your-key-here"
+```
+
 ## Running Agents

 All agent commands must be run from the project root with `PYTHONPATH` set:
@@ -89,37 +152,29 @@ All agent commands must be run from the project root with `PYTHONPATH` set:
 PYTHONPATH=core:exports python -m agent_name COMMAND
 ```

+Windows (PowerShell):
+
+```powershell
+$env:PYTHONPATH="core;exports"
+python -m agent_name COMMAND
+```
+
 ### Example: Support Ticket Agent

 ```bash
 # Validate agent structure
-PYTHONPATH=core:exports python -m support_ticket_agent validate
+PYTHONPATH=core:exports python -m your_agent_name validate

 # Show agent information
-PYTHONPATH=core:exports python -m support_ticket_agent info
+PYTHONPATH=core:exports python -m your_agent_name info

 # Run agent with input
-PYTHONPATH=core:exports python -m support_ticket_agent run --input '{
-  "ticket_content": "My login is broken. Error 401.",
-  "customer_id": "CUST-123",
-  "ticket_id": "TKT-456"
+PYTHONPATH=core:exports python -m your_agent_name run --input '{
+  "task": "Your input here"
 }'

 # Run in mock mode (no LLM calls)
-PYTHONPATH=core:exports python -m support_ticket_agent run --mock --input '{...}'
-```
-
-### Example: Other Agents
-
-```bash
-# Market Research Agent
-PYTHONPATH=core:exports python -m market_research_agent info
-
-# Outbound Sales Agent
-PYTHONPATH=core:exports python -m outbound_sales_agent validate
-
-# Personal Assistant Agent
-PYTHONPATH=core:exports python -m personal_assistant_agent run --input '{...}'
+PYTHONPATH=core:exports python -m your_agent_name run --mock --input '{...}'
 ```

 ## Building New Agents and Run Flow
@@ -132,7 +187,7 @@ Build and run an agent using Claude Code CLI with the agent building skills:
 ./quickstart.sh
 ```

-This installs agent-related Claude Code skills:
+This verifies agent-related Claude Code skills are available:

 - `/building-agents-construction` - Step-by-step build guide
 - `/building-agents-core` - Fundamental concepts
@@ -231,7 +286,7 @@ source .venv/bin/activate  # macOS/Linux
 # .venv\Scripts\activate   # Windows

 # Then run setup
-./scripts/setup-python.sh
+./quickstart.sh
 ```

 Always activate the venv before running agents:
@@ -241,6 +296,14 @@ source .venv/bin/activate
 PYTHONPATH=core:exports python -m your_agent_name demo
 ```

+### PowerShell: “running scripts is disabled on this system”
+
+Run once per session:
+
+```powershell
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
+```
+
 ### "ModuleNotFoundError: No module named 'framework'"

 **Solution:** Install the core package:
@@ -260,7 +323,13 @@ cd tools && pip install -e .
 Or run the setup script:

 ```bash
-./scripts/setup-python.sh
+./quickstart.sh
+```
+
+Windows:
+
+```powershell
+./scripts/setup-python.ps1
 ```

 ### "ModuleNotFoundError: No module named 'openai.\_models'"
@@ -273,14 +342,23 @@ Or run the setup script:
 pip install --upgrade "openai>=1.0.0"
 ```

-### "No module named 'support_ticket_agent'"
+### "No module named 'your_agent_name'"

-**Cause:** Not running from project root or missing PYTHONPATH
+**Cause:** Not running from project root, missing PYTHONPATH, or agent not yet created

-**Solution:** Ensure you're in the project root directory and use:
+**Solution:** Ensure you're in `/hive/` and use:
+
+Linux/macOS:

 ```bash
-PYTHONPATH=core:exports python -m support_ticket_agent validate
+PYTHONPATH=core:exports python -m your_agent_name validate
+```
+
+Windows:
+
+```powershell
+$env:PYTHONPATH="core;exports"
+python -m support_ticket_agent validate
 ```

 ### Agent imports fail with "broken installation"
@@ -294,7 +372,13 @@ PYTHONPATH=core:exports python -m support_ticket_agent validate
 pip uninstall -y framework tools

 # Reinstall correctly
-./scripts/setup-python.sh
+./quickstart.sh
+```
+
+Windows:
+
+```powershell
+./scripts/setup-python.ps1
 ```

 ## Package Structure
@@ -305,22 +389,75 @@ The Hive framework consists of three Python packages:
 hive/
 ├── core/                    # Core framework (runtime, graph executor, LLM providers)
 │   ├── framework/
-│   ├── pyproject.toml
-│   └── requirements.txt
+│   ├── .venv/              # Created by quickstart.sh
+│   └── pyproject.toml
 │
 ├── tools/                   # Tools and MCP servers
 │   ├── src/
 │   │   └── aden_tools/     # Actual package location
-│   ├── pyproject.toml
-│   └── README.md
+│   ├── .venv/              # Created by quickstart.sh
+│   └── pyproject.toml
 │
-└── exports/                 # Agent packages (your agents go here)
-    ├── support_ticket_agent/
-    ├── market_research_agent/
-    ├── outbound_sales_agent/
-    └── personal_assistant_agent/
+└── exports/                 # Agent packages (user-created, gitignored)
+    └── your_agent_name/     # Created via /building-agents-construction
 ```

+## Separate Virtual Environments
+
+The project uses **separate virtual environments** for `core` and `tools` packages to:
+
+- Isolate dependencies and avoid conflicts
+- Allow independent development and testing of each package
+- Enable MCP servers to run with their specific dependencies
+
+### How It Works
+
+When you run `./quickstart.sh` or `uv sync` in each directory:
+
+1. **core/.venv/** - Contains the `framework` package and its dependencies (anthropic, litellm, mcp, etc.)
+2. **tools/.venv/** - Contains the `aden_tools` package and its dependencies (beautifulsoup4, pandas, etc.)
+
+### Cross-Package Imports
+
+The `core` and `tools` packages are **intentionally independent**:
+
+- **No cross-imports**: `framework` does not import `aden_tools` directly, and vice versa
+- **Communication via MCP**: Tools are exposed to agents through MCP servers, not direct Python imports
+- **Runtime integration**: The agent runner loads tools via the MCP protocol at runtime
+
+If you need to use both packages in a single script (e.g., for testing), you have two options:
+
+```bash
+# Option 1: Install both in a shared environment
+python -m venv .venv
+source .venv/bin/activate
+pip install -e core/ -e tools/
+
+# Option 2: Use PYTHONPATH (for quick testing)
+PYTHONPATH=core:tools/src python your_script.py
+```
+
+### MCP Server Configuration
+
+The `.mcp.json` at project root configures MCP servers to use their respective virtual environments:
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "core/.venv/bin/python",
+      "args": ["-m", "framework.mcp.agent_builder_server"]
+    },
+    "tools": {
+      "command": "tools/.venv/bin/python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"]
+    }
+  }
+}
+```
+
+This ensures each MCP server runs with its correct dependencies.
+
 ### Why PYTHONPATH is Required

 The packages are installed in **editable mode** (`pip install -e`), which means:
@@ -339,7 +476,13 @@ This design allows agents in `exports/` to be:
 ### 1. Setup (Once)

 ```bash
-./scripts/setup-python.sh
+./quickstart.sh
+```
+
+Windows:
+
+```powershell
+./scripts/setup-python.ps1
 ```

 ### 2. Build Agent (Claude Code)
@@ -352,7 +495,7 @@ Enter goal: "Build an agent that processes customer support tickets"
 ### 3. Validate Agent

 ```bash
-PYTHONPATH=core:exports python -m support_ticket_agent validate
+PYTHONPATH=core:exports python -m your_agent_name validate
 ```

 ### 4. Test Agent
@@ -364,7 +507,7 @@ claude> /testing-agent
 ### 5. Run Agent

 ```bash
-PYTHONPATH=core:exports python -m support_ticket_agent run --input '{...}'
+PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
 ```

 ## IDE Setup
@@ -432,4 +575,4 @@ When contributing agent packages:

 - **Issues:** https://github.com/adenhq/hive/issues
 - **Discord:** https://discord.com/invite/MXE49hrKDk
- **Documentation:** https://docs.adenhq.com/
+- **Documentation:** https://docs.adenhq.com/
@@ -0,0 +1,51 @@
+## Summary
+- **Added HubSpot integration** — new HubSpot MCP tool with search, get, create, and update operations for contacts, companies, and deals. Includes OAuth2 provider for HubSpot credentials and credential store adapter for the tools layer.
+- **Replaced web_scrape tool with Playwright + stealth** — swapped httpx/BeautifulSoup for a headless Chromium browser using `playwright` (async API) and `playwright-stealth`, enabling JS-rendered page scraping and bot detection evasion
+- **Added empty response retry logic** — LLM provider now detects empty responses (e.g. Gemini returning 200 with no content on rate limit) and retries with exponential backoff, preventing hallucinated output from the cleanup LLM
+- **Added context-aware input compaction** — LLM nodes now estimate input token count before calling the model and progressively truncate the largest values if they exceed the context window budget
+- **Increased rate limit retries to 10** with verbose `[retry]` and `[compaction]` logging that includes model name, finish reason, and attempt count
+- **Updated setup scripts** — `scripts/setup-python.sh` now installs Playwright Chromium browser automatically for web scraping support
+- **Interactive quickstart onboarding** — `quickstart.sh` rewritten as bee-themed interactive wizard that detects existing API keys (including Claude Code subscription), lets user pick ONE default LLM provider, and saves configuration to `~/.hive/configuration.json`
+- **Fixed lint errors** across `hubspot_tool.py` (line length) and `agent_builder_server.py` (unused variable)
+
+## Changed files
+
+### HubSpot Integration
+- `tools/src/aden_tools/tools/hubspot_tool/` — New MCP tool: contacts, companies, and deals CRUD
+- `tools/src/aden_tools/tools/__init__.py` — Registered HubSpot tools
+- `tools/src/aden_tools/credentials/integrations.py` — HubSpot credential integration
+- `tools/src/aden_tools/credentials/__init__.py` — Updated credential exports
+- `core/framework/credentials/oauth2/hubspot_provider.py` — HubSpot OAuth2 provider
+- `core/framework/credentials/oauth2/__init__.py` — Registered HubSpot OAuth2 provider
+- `core/framework/runner/runner.py` — Updated runner for credential support
+
+### Web Scrape Rewrite
+- `tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py` — Playwright async rewrite
+- `tools/src/aden_tools/tools/web_scrape_tool/README.md` — Updated docs
+- `tools/pyproject.toml` — Added `playwright`, `playwright-stealth` deps
+- `tools/Dockerfile` — Added `playwright install chromium --with-deps`
+- `scripts/setup-python.sh` — Added Playwright Chromium browser install step
+
+### LLM Reliability
+- `core/framework/llm/litellm.py` — Empty response retry + max retries 10 + verbose logging
+- `core/framework/graph/node.py` — Input compaction via `_compact_inputs()`, `_estimate_tokens()`, `_get_context_limit()`
+
+### Quickstart & Setup
+- `quickstart.sh` — Interactive bee-themed onboarding wizard with single provider selection
+- `~/.hive/configuration.json` — New user config file for default LLM provider/model
+
+### Fixes
+- `core/framework/mcp/agent_builder_server.py` — Removed unused variable
+- `tools/src/aden_tools/tools/hubspot_tool/hubspot_tool.py` — Fixed E501 line length violations
+
+## Test plan
+- [ ] Run `make lint` — passes clean
+- [ ] Run `./quickstart.sh` and verify interactive flow works, config saved to `~/.hive/configuration.json`
+- [ ] Run `./scripts/setup-python.sh` and verify Playwright Chromium installs
+- [ ] Run `pytest tests/tools/test_web_scrape_tool.py -v`
+- [ ] Run agent against a JS-heavy site and verify `web_scrape` returns rendered content
+- [ ] Set `HUBSPOT_ACCESS_TOKEN` and verify HubSpot tool CRUD operations work
+- [ ] Trigger rate limit and verify `[retry]` logs appear with correct attempt counts
+- [ ] Run agent with large inputs and verify `[compaction]` logs show truncation
+
+🤖 Generated with [Claude Code](https://claude.com/claude-code)
@@ -4,17 +4,17 @@

 <p align="center">
  <a href="README.md">English</a> |
-  <a href="README.zh-CN.md">简体中文</a> |
-  <a href="README.es.md">Español</a> |
-  <a href="README.pt.md">Português</a> |
-  <a href="README.ja.md">日本語</a> |
-  <a href="README.ru.md">Русский</a> |
-  <a href="README.ko.md">한국어</a>
+  <a href="docs/i18n/zh-CN.md">简体中文</a> |
+  <a href="docs/i18n/es.md">Español</a> |
+  <a href="docs/i18n/hi.md">हिन्दी</a> |
+  <a href="docs/i18n/pt.md">Português</a> |
+  <a href="docs/i18n/ja.md">日本語</a> |
+  <a href="docs/i18n/ru.md">Русский</a> |
+  <a href="docs/i18n/ko.md">한국어</a>
 </p>

 [![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
 [![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
-[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/hive?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
 [![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
 [![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
 [![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
@@ -39,6 +39,31 @@ Build reliable, self-improving AI agents without hardcoding workflows. Define yo

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

+## Who Is Hive For?
+
+Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
+
+Hive is a good fit if you:
+
+- Want AI agents that **execute real business processes**, not demos
+- Prefer **goal-driven development** over hardcoded workflows
+- Need **self-healing and adaptive agents** that improve over time
+- Require **human-in-the-loop control**, observability, and cost limits
+- Plan to run agents in **production environments**
+
+Hive may not be the best fit if you’re only experimenting with simple agent chains or one-off scripts.
+
+## When Should You Use Hive?
+
+Use Hive when you need:
+
+- Long-running, autonomous agents
+- Multi-agent coordination
+- Continuous improvement based on failures
+- Strong monitoring, safety, and budget controls
+- A framework that evolves with your goals
+
+
 ## What is Aden

 <p align="center">
@@ -63,10 +88,12 @@ Aden is a platform for building, deploying, operating, and adapting AI agents:

 ## Quick Start

-### Prerequisites
+## Prerequisites

- [Python 3.11+](https://www.python.org/downloads/) for agent development
- [Docker](https://docs.docker.com/get-docker/) (v20.10+) - Optional, for containerized tools
+- Python 3.11+ for agent development
+- Claude Code or Cursor for utilizing agent skills
+
+> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.

 ### Installation

@@ -75,21 +102,18 @@ Aden is a platform for building, deploying, operating, and adapting AI agents:
 git clone https://github.com/adenhq/hive.git
 cd hive

-# Run Python environment setup
-./scripts/setup-python.sh
+# Run quickstart setup
+./quickstart.sh
 ```

-This installs:
- **framework** - Core agent runtime and graph executor
- **aden_tools** - 19 MCP tools for agent capabilities
- All required dependencies
+This sets up:
+- **framework** - Core agent runtime and graph executor (in `core/.venv`)
+- **aden_tools** - MCP tools for agent capabilities (in `tools/.venv`)
+- All required Python dependencies

 ### Build Your First Agent

 ```bash
-# Install Claude Code skills (one-time)
-./quickstart.sh
-
 # Build an agent using Claude Code
 claude> /building-agents-construction

@@ -114,7 +138,7 @@ Skills are also available in Cursor. To enable:
 ## Features

 - **Goal-Driven Development** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **Self-Adapting Agents** - Framework captures failures, updates objectives and updates the agent graph
+- **Adaptiveness** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
 - **Dynamic Node Connections** - No predefined edges; connection code is generated by any capable LLM based on your goals
 - **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
 - **Human-in-the-Loop** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
@@ -124,51 +148,38 @@ Skills are also available in Cursor. To enable:

 ## Why Aden

-Traditional agent frameworks require you to manually design workflows, define agent interactions, and handle failures reactively. Aden flips this paradigm—**you describe outcomes, and the system builds itself**.
+Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.

 ```mermaid
 flowchart LR
-    subgraph BUILD["🏗️ BUILD"]
-        GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
-        NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
-        EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
-    end
+    GOAL["Define Goal"] --> GEN["Auto-Generate Graph"]
+    GEN --> EXEC["Execute Agents"]
+    EXEC --> MON["Monitor & Observe"]
+    MON --> CHECK{{"Pass?"}}
+    CHECK -- "Yes" --> DONE["Deliver Result"]
+    CHECK -- "No" --> EVOLVE["Evolve Graph"]
+    EVOLVE --> EXEC

-    subgraph EXPORT["📦 EXPORT"]
-        direction TB
-        JSON["agent.json<br/>(GraphSpec)"]
-        TOOLS["tools.py<br/>(Functions)"]
-        MCP["mcp_servers.json<br/>(Integrations)"]
-    end
+    GOAL -.- V1["Natural Language"]
+    GEN -.- V2["Instant Architecture"]
+    EXEC -.- V3["Easy Integrations"]
+    MON -.- V4["Full visibility"]
+    EVOLVE -.- V5["Adaptability"]
+    DONE -.- V6["Reliable outcomes"]

-    subgraph RUN["🚀 RUNTIME"]
-        LOAD["AgentRunner<br/>Load + Parse"] --> SETUP["Setup Runtime<br/>+ ToolRegistry"]
-        SETUP --> EXEC["GraphExecutor<br/>Execute Nodes"]
-
-        subgraph DECISION["Decision Recording"]
-            DEC1["runtime.decide()<br/>intent → options → choice"]
-            DEC2["runtime.record_outcome()<br/>success, result, metrics"]
-        end
-    end
-
-    subgraph INFRA["⚙️ INFRASTRUCTURE"]
-        CTX["NodeContext<br/>memory • llm • tools"]
-        STORE[("FileStorage<br/>Runs & Decisions")]
-    end
-
-    APPROVE --> EXPORT
-    EXPORT --> LOAD
-    EXEC --> DECISION
-    EXEC --> CTX
-    DECISION --> STORE
-    STORE -.->|"Analyze & Improve"| NODES
-
-    style BUILD fill:#ffbe42,stroke:#cc5d00,stroke-width:3px,color:#333
-    style EXPORT fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
-    style RUN fill:#ffb100,stroke:#cc5d00,stroke-width:3px,color:#333
-    style DECISION fill:#ffcc80,stroke:#ed8c00,stroke-width:2px,color:#333
-    style INFRA fill:#e8763d,stroke:#cc5d00,stroke-width:3px,color:#fff
-    style STORE fill:#ed8c00,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style GOAL fill:#ffbe42,stroke:#cc5d00,stroke-width:2px,color:#333
+    style GEN fill:#ffb100,stroke:#cc5d00,stroke-width:2px,color:#333
+    style EXEC fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style MON fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style CHECK fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
+    style DONE fill:#4caf50,stroke:#2e7d32,stroke-width:2px,color:#fff
+    style EVOLVE fill:#e8763d,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style V1 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V2 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V3 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V4 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V5 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V6 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
 ```

 ### The Aden Advantage
@@ -177,7 +188,7 @@ flowchart LR
 | -------------------------- | -------------------------------------- |
 | Hardcode agent workflows   | Describe goals in natural language     |
 | Manual graph definition    | Auto-generated agent graphs            |
-| Reactive error handling    | Proactive self-evolution               |
+| Reactive error handling    | Outcome-evaluation and adaptiveness               |
 | Static tool configurations | Dynamic SDK-wrapped nodes              |
 | Separate monitoring setup  | Built-in real-time observability       |
 | DIY budget management      | Integrated cost controls & degradation |
@@ -188,74 +199,27 @@ flowchart LR
 2. **Coding Agent Generates** → Creates the agent graph, connection code, and test cases
 3. **Workers Execute** → SDK-wrapped nodes run with full observability and tool access
 4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
-5. **Self-Improve** → On failure, the system evolves the graph and redeploys automatically
+5. **Adaptiveness** → On failure, the system evolves the graph and redeploys automatically

-## How Aden Compares
+## Run pre-built Agents (Coming Soon)

-Aden takes a fundamentally different approach to agent development. While most frameworks require you to hardcode workflows or manually define agent graphs, Aden uses a **coding agent to generate your entire agent system** from natural language goals. When agents fail, the framework doesn't just log errors—it **automatically evolves the agent graph** and redeploys.
+### Run a sample agent
+Aden Hive provides a list of featured agents that you can use and build on top of.

-### Comparison Table
+### Run an agent shared by others
+Put the agent in `exports/` and run `PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'`

-| Framework                           | Category                  | Approach                                                        | Aden Difference                                           |
-| ----------------------------------- | ------------------------- | --------------------------------------------------------------- | --------------------------------------------------------- |
-| **LangChain, LlamaIndex, Haystack** | Component Libraries       | Predefined components for RAG/LLM apps; manual connection logic | Generates entire graph and connection code upfront        |
-| **CrewAI, AutoGen, Swarm**          | Multi-Agent Orchestration | Role-based agents with predefined collaboration patterns        | Dynamically creates agents/connections; adapts on failure |
-| **PydanticAI, Mastra, Agno**        | Type-Safe Frameworks      | Structured outputs and validation for known workflows           | Evolving workflows; structure emerges through iteration   |
-| **Agent Zero, Letta**               | Personal AI Assistants    | Memory and learning; OS-as-tool or stateful memory focus        | Production multi-agent systems with self-healing          |
-| **CAMEL**                           | Research Framework        | Emergent behavior in large-scale simulations (up to 1M agents)  | Production-oriented with reliable execution and recovery  |
-| **TEN Framework, Genkit**           | Infrastructure Frameworks | Real-time multimodal (TEN) or full-stack AI (Genkit)            | Higher abstraction—generates and evolves agent logic      |
-| **GPT Engineer, Motia**             | Code Generation           | Code from specs (GPT Engineer) or "Step" primitive (Motia)      | Self-adapting graphs with automatic failure recovery      |
-| **Trading Agents**                  | Domain-Specific           | Hardcoded trading firm roles on LangGraph                       | Domain-agnostic; generates structures for any use case    |
-
-### When to Choose Aden
-
-Choose Aden when you need:
-
- Agents that **self-improve from failures** without manual intervention
- **Goal-driven development** where you describe outcomes, not workflows
- **Production reliability** with automatic recovery and redeployment
- **Rapid iteration** on agent architectures without rewriting code
- **Full observability** with real-time monitoring and human oversight
-
-Choose other frameworks when you need:
-
- **Type-safe, predictable workflows** (PydanticAI, Mastra)
- **RAG and document processing** (LlamaIndex, Haystack)
- **Research on agent emergence** (CAMEL)
- **Real-time voice/multimodal** (TEN Framework)
- **Simple component chaining** (LangChain, Swarm)
-
-## Project Structure
-
-```
-hive/
-├── core/                   # Core framework - Agent runtime, graph executor, protocols
-├── tools/                  # MCP Tools Package - 19 tools for agent capabilities
-├── exports/                # Agent packages - Pre-built agents and examples
-├── docs/                   # Documentation and guides
-├── scripts/                # Build and utility scripts
-├── .claude/                # Claude Code skills for building agents
-├── .cursor/                # Cursor IDE skills (symlinks to .claude/skills)
-├── ENVIRONMENT_SETUP.md    # Python setup guide for agent development
-├── DEVELOPER.md            # Developer guide
-├── CONTRIBUTING.md         # Contribution guidelines
-└── ROADMAP.md              # Product roadmap
-```
-
-## Development
-
-### Python Agent Development

 For building and running goal-driven agents with the framework:

 ```bash
 # One-time setup
-./scripts/setup-python.sh
+./quickstart.sh

-# This installs:
+# This sets up:
 # - framework package (core runtime)
-# - aden_tools package (19 MCP tools)
-# - All dependencies
+# - aden_tools package (MCP tools)
+# - All Python dependencies

 # Build new agents using Claude Code skills
 claude> /building-agents-construction
@@ -278,25 +242,108 @@ See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions

 ## Roadmap

-Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here
-
-[ROADMAP.md](ROADMAP.md)
+Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [ROADMAP.md](ROADMAP.md) for details.

 ```mermaid
-timeline
-    title Aden Agent Framework Roadmap
-    section Foundation
-        Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
-        Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
-        Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
-        Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
-        Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
-    section Expansion
-        Intelligence : Guardrails : Streaming Mode : Semantic Search
-        Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
-        Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
-        Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
+flowchart TD
+subgraph Foundation
+    direction LR
+    subgraph arch["Architecture"]
+        a1["Node-Based Architecture"]:::done
+        a2["Python SDK"]:::done
+        a3["LLM Integration"]:::done
+        a4["Communication Protocol"]:::done
+    end
+    subgraph ca["Coding Agent"]
+        b1["Goal Creation Session"]:::done
+        b2["Worker Agent Creation"]
+        b3["MCP Tools"]:::done
+    end
+    subgraph wa["Worker Agent"]
+        c1["Human-in-the-Loop"]:::done
+        c2["Callback Handlers"]:::done
+        c3["Intervention Points"]:::done
+        c4["Streaming Interface"]
+    end
+    subgraph cred["Credentials"]
+        d1["Setup Process"]:::done
+        d2["Pluggable Sources"]:::done
+        d3["Enterprise Secrets"]
+        d4["Integration Tools"]:::done
+    end
+    subgraph tools["Tools"]
+        e1["File Use"]:::done
+        e2["Memory STM/LTM"]:::done
+        e3["Web Search/Scraper"]:::done
+        e4["CSV/PDF"]:::done
+        e5["Excel/Email"]
+    end
+    subgraph core["Core"]
+        f1["Eval System"]
+        f2["Pydantic Validation"]:::done
+        f3["Documentation"]:::done
+        f4["Adaptiveness"]
+        f5["Sample Agents"]
+    end
+end
+
+subgraph Expansion
+    direction LR
+    subgraph intel["Intelligence"]
+        g1["Guardrails"]
+        g2["Streaming Mode"]
+        g3["Image Generation"]
+        g4["Semantic Search"]
+    end
+    subgraph mem["Memory Iteration"]
+        h1["Message Model & Sessions"]
+        h2["Storage Migration"]
+        h3["Context Building"]
+        h4["Proactive Compaction"]
+        h5["Token Tracking"]
+    end
+    subgraph evt["Event System"]
+        i1["Event Bus for Nodes"]
+    end
+    subgraph cas["Coding Agent Support"]
+        j1["Claude Code"]
+        j2["Cursor"]
+        j3["Opencode"]
+        j4["Antigravity"]
+    end
+    subgraph plat["Platform"]
+        k1["JavaScript/TypeScript SDK"]
+        k2["Custom Tool Integrator"]
+        k3["Windows Support"]
+    end
+    subgraph dep["Deployment"]
+        l1["Self-Hosted"]
+        l2["Cloud Services"]
+        l3["CI/CD Pipeline"]
+    end
+    subgraph tmpl["Templates"]
+        m1["Sales Agent"]
+        m2["Marketing Agent"]
+        m3["Analytics Agent"]
+        m4["Training Agent"]
+        m5["Smart Form Agent"]
+    end
+end
+
+classDef done fill:#9e9e9e,color:#fff,stroke:#757575
 ```
+## Contributing
+
+We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+
+**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work. 
+
+1. Find or create an issue and get assigned
+2. Fork the repository
+3. Create your feature branch (`git checkout -b feature/amazing-feature`)
+4. Commit your changes (`git commit -m 'Add amazing feature'`)
+5. Push to the branch (`git push origin feature/amazing-feature`)
+6. Open a Pull Request

 ## Community & Support

@@ -306,19 +353,6 @@ We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature req
 - Twitter/X - [@adenhq](https://x.com/aden_hq)
 - LinkedIn - [Company Page](https://www.linkedin.com/company/teamaden/)

-## Contributing
-
-We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
-
-**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you within 24 hours. This helps prevent duplicate work.
-
-1. Find or create an issue and get assigned
-2. Fork the repository
-3. Create your feature branch (`git checkout -b feature/amazing-feature`)
-4. Commit your changes (`git commit -m 'Add amazing feature'`)
-5. Push to the branch (`git push origin feature/amazing-feature`)
-6. Open a Pull Request
-
 ## Join Our Team

 **We're hiring!** Join us in engineering, research, and go-to-market roles.
@@ -335,57 +369,57 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS

 ## Frequently Asked Questions (FAQ)

-**Q: Does Aden depend on LangChain or other agent frameworks?**
+**Q: Does Hive depend on LangChain or other agent frameworks?**

-No. Aden is built from the ground up with no dependencies on LangChain, CrewAI, or other agent frameworks. The framework is designed to be lean and flexible, generating agent graphs dynamically rather than relying on predefined components.
+No. Hive is built from the ground up with no dependencies on LangChain, CrewAI, or other agent frameworks. The framework is designed to be lean and flexible, generating agent graphs dynamically rather than relying on predefined components.

-**Q: What LLM providers does Aden support?**
+**Q: What LLM providers does Hive support?**

-Aden supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
+Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.

-**Q: Can I use Aden with local AI models like Ollama?**
+**Q: Can I use Hive with local AI models like Ollama?**

-Yes! Aden supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.
+Yes! Hive supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.

-**Q: What makes Aden different from other agent frameworks?**
+**Q: What makes Hive different from other agent frameworks?**

-Aden generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, evolves the agent graph, and redeploys. This self-improving loop is unique to Aden.
+Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, evolves the agent graph, and redeploys. This self-improving loop is unique to Aden.

-**Q: Is Aden open-source?**
+**Q: Is Hive open-source?**

-Yes, Aden is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.
+Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.

-**Q: Does Aden collect data from users?**
+**Q: Does Hive collect data from users?**

-Aden collects telemetry data for monitoring and observability purposes, including token usage, latency metrics, and cost tracking. Content capture (prompts and responses) is configurable and stored with team-scoped data isolation. All data stays within your infrastructure when self-hosted.
+Hive collects telemetry data for monitoring and observability purposes, including token usage, latency metrics, and cost tracking. Content capture (prompts and responses) is configurable and stored with team-scoped data isolation. All data stays within your infrastructure when self-hosted.

-**Q: What deployment options does Aden support?**
+**Q: What deployment options does Hive support?**

-Aden supports self-hosted deployments via Python packages. See the [Environment Setup Guide](ENVIRONMENT_SETUP.md) for installation instructions. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
+Hive supports self-hosted deployments via Python packages. See the [Environment Setup Guide](ENVIRONMENT_SETUP.md) for installation instructions. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.

-**Q: Can Aden handle complex, production-scale use cases?**
+**Q: Can Hive handle complex, production-scale use cases?**

-Yes. Aden is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
+Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.

-**Q: Does Aden support human-in-the-loop workflows?**
+**Q: Does Hive support human-in-the-loop workflows?**

-Yes, Aden fully supports human-in-the-loop workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
+Yes, Hive fully supports human-in-the-loop workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.

-**Q: What monitoring and debugging tools does Aden provide?**
+**Q: What monitoring and debugging tools does Hive provide?**

-Aden includes comprehensive observability features: real-time WebSocket streaming for live agent execution monitoring, TimescaleDB-powered analytics for cost and performance metrics, health check endpoints for Kubernetes integration, and 19 MCP tools for budget management, agent status, and policy control.
+Hive includes comprehensive observability features: real-time WebSocket streaming for live agent execution monitoring, TimescaleDB-powered analytics for cost and performance metrics, health check endpoints for Kubernetes integration, and MCP tools for agent execution, including file operations, web search, data processing, and more.

-**Q: What programming languages does Aden support?**
+**Q: What programming languages does Hive support?**

-Aden provides SDKs for both Python and JavaScript/TypeScript. The Python SDK includes integration templates for LangGraph, LangFlow, and LiveKit. The backend is Node.js/TypeScript, and the frontend is React/TypeScript.
+The Hive framework is built in Python. A JavaScript/TypeScript SDK is on the roadmap.

 **Q: Can Aden agents interact with external tools and APIs?**

 Yes. Aden's SDK-wrapped nodes provide built-in tool access, and the framework supports flexible tool ecosystems. Agents can integrate with external APIs, databases, and services through the node architecture.

-**Q: How does cost control work in Aden?**
+**Q: How does cost control work in Hive?**

-Aden provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.
+Hive provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.

 **Q: Where can I find examples and documentation?**

@@ -395,6 +429,14 @@ Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API refer

 Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.

+**Q: When will my team start seeing results from Aden's adaptive agents?**
+
+Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
+
+**Q: How does Hive compare to other agent frameworks?**
+
+Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
+
 **Q: Does Aden offer enterprise support?**

 For enterprise inquiries, contact the Aden team through [adenhq.com](https://adenhq.com) or join our [Discord community](https://discord.com/invite/MXE49hrKDk) for support and discussions.
@@ -1,21 +1,94 @@
-Product Roadmap
+# Product Roadmap

 Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here

 ```mermaid
-timeline
-    title Aden Agent Framework Roadmap
-    section Foundation
-        Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
-        Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
-        Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
-        Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
-        Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
-    section Expansion
-        Intelligence : Guardrails : Streaming Mode : Semantic Search
-        Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
-        Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
-        Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
+flowchart TD
+subgraph Foundation
+    direction LR
+    subgraph arch["Architecture"]
+        a1["Node-Based Architecture"]:::done
+        a2["Python SDK"]:::done
+        a3["LLM Integration"]:::done
+        a4["Communication Protocol"]:::done
+    end
+    subgraph ca["Coding Agent"]
+        b1["Goal Creation Session"]:::done
+        b2["Worker Agent Creation"]
+        b3["MCP Tools"]:::done
+    end
+    subgraph wa["Worker Agent"]
+        c1["Human-in-the-Loop"]:::done
+        c2["Callback Handlers"]:::done
+        c3["Intervention Points"]:::done
+        c4["Streaming Interface"]
+    end
+    subgraph cred["Credentials"]
+        d1["Setup Process"]:::done
+        d2["Pluggable Sources"]:::done
+        d3["Enterprise Secrets"]
+        d4["Integration Tools"]:::done
+    end
+    subgraph tools["Tools"]
+        e1["File Use"]:::done
+        e2["Memory STM/LTM"]:::done
+        e3["Web Search/Scraper"]:::done
+        e4["CSV/PDF"]:::done
+        e5["Excel/Email"]
+    end
+    subgraph core["Core"]
+        f1["Eval System"]
+        f2["Pydantic Validation"]:::done
+        f3["Documentation"]:::done
+        f4["Adaptiveness"]
+        f5["Sample Agents"]
+    end
+end
+
+subgraph Expansion
+    direction LR
+    subgraph intel["Intelligence"]
+        g1["Guardrails"]
+        g2["Streaming Mode"]
+        g3["Image Generation"]
+        g4["Semantic Search"]
+    end
+    subgraph mem["Memory Iteration"]
+        h1["Message Model & Sessions"]
+        h2["Storage Migration"]
+        h3["Context Building"]
+        h4["Proactive Compaction"]
+        h5["Token Tracking"]
+    end
+    subgraph evt["Event System"]
+        i1["Event Bus for Nodes"]
+    end
+    subgraph cas["Coding Agent Support"]
+        j1["Claude Code"]
+        j2["Cursor"]
+        j3["Opencode"]
+        j4["Antigravity"]
+    end
+    subgraph plat["Platform"]
+        k1["JavaScript/TypeScript SDK"]
+        k2["Custom Tool Integrator"]
+        k3["Windows Support"]
+    end
+    subgraph dep["Deployment"]
+        l1["Self-Hosted"]
+        l2["Cloud Services"]
+        l3["CI/CD Pipeline"]
+    end
+    subgraph tmpl["Templates"]
+        m1["Sales Agent"]
+        m2["Marketing Agent"]
+        m3["Analytics Agent"]
+        m4["Training Agent"]
+        m5["Smart Form Agent"]
+    end
+end
+
+classDef done fill:#9e9e9e,color:#fff,stroke:#757575
 ```

 ---
@@ -26,19 +99,19 @@ timeline
 - [ ] **Node-Based Architecture (Agent as a node)**
    - [x] Object schema definition
    - [x] Node wrapper SDK
-    - [ ] Shared memory access
+    - [x] Shared memory access
    - [ ] Default monitoring hooks
-    - [ ] Tool access layer
+    - [x] Tool access layer
    - [x] LLM integration layer (Natively supports all mainstream LLMs through LiteLLM)
        - [x] Anthropic
        - [x] OpenAI
        - [x] Google
- [ ] **Communication protocol between nodes**
- [ ] **[Coding Agent] Goal Creation Session** (separate from coding session)
-    - [ ] Instruction back and forth
+- [x] **Communication protocol between nodes**
+- [x] **[Coding Agent] Goal Creation Session** (separate from coding session)
+    - [x] Instruction back and forth
    - [x] Goal Object schema definition
-    - [ ] Being able to generate the test cases
-    - [ ] Test case validation for worker agent (Outcome driven)
+    - [x] Being able to generate the test cases
+    - [x] Test case validation for worker agent (Outcome driven)
 - [ ] **[Coding Agent] Worker Agent Creation**
    - [x] Coding Agent tools
    - [ ] Use Template Agent as a start
@@ -46,21 +119,62 @@ timeline
 - [ ] **[Worker Agent] Human-in-the-Loop**
    - [x] Worker Agents request with questions and options
    - [x] Callback Handler System to receive events throughout execution
-    - [ ] Tool-Based Intervention Points (tool to pause execution and request human input)
+    - [x] Tool-Based Intervention Points (tool to pause execution and request human input)
    - [x] Multiple entrypoint for different event source (e.g. Human input, webhook)
    - [ ] Streaming Interface for Real-time Monitoring
-    - [ ] Request State Management
+    - [x] Request State Management
+
+### Credential Management
+- [x] **Credentials Setup Process**
+    - [x] Install Credential MCP
+- [x] **Pluggable Credential Sources**
+    - [x] **Abstraction & Local Sources**
+        - [x] Introduce `CredentialSource` base class
+        - [x] Refactor existing logic into `EnvVarSource`
+        - [x] Implementation of Source Priority Chain mechanism
+        - [ ] Foundation unit tests
+    - [ ] **Enterprise Secret Managers**
+        - [x] `VaultSource` (HashiCorp Vault)
+        - [ ] `AWSSecretsSource` (AWS Secrets Manager)
+        - [ ] `AzureKeyVaultSource` (Azure Key Vault)
+        - [ ] Management of optional provider dependencies
+    - [ ] **Advanced Features**
+        - [x] Credential expiration and auto-refresh
+        - [ ] Audit logging for compliance/tracking
+        - [ ] Per-environment configuration support
+    - [ ] **Documentation & DX**
+        - [ ] Comprehensive source documentation
+        - [ ] Example configurations for all providers
+    - [x] **Integration as tools coverage**
+        - [x] Gsuite Tools
+        - [x] Social Media
+            - [ ] Twitter(X)
+            - [x] Github
+            - [ ] Instagram
+        - [ ] SAAS
+            - [ ] Hubspot
+            - [ ] Slack
+            - [ ] Teams
+            - [ ] Zoom
+            - [ ] Stripe
+            - [ ] Salesforce
+
+> [!IMPORTANT]
+> **Community Contribution Wanted**: We appreciate help from the community to expand the "Integration as tools" capability. Leave an issue of the integration you want to support via Hive!

 ### Essential Tools
 - [x] **File Use Tool Kit**
- [ ] **Memory Tools**
+- [X] **Memory Tools**
    - [x] STM Layer Tool (state-based short-term memory)
    - [x] LTM Layer Tool (RLM - long-term memory)
 - [ ] **Infrastructure Tools**
    - [x] Runtime Log Tool (logs for coding agent)
-    - [ ] Audit Trail Tool (decision timeline generation)
-    - [ ] Web Search
-    - [ ] Web Scraper
+    - [x] Web Search
+    - [x] Web Scraper
+    - [x] CSV tools
+    - [x] PDF tools
+    - [ ] Excel tools
+    - [ ] Email Tools
    - [ ] Recipe for "Add your own tools"

 ### Memory & File System
@@ -75,20 +189,25 @@ timeline
 - [ ] User-driven log analysis (OSS approach)

 ### Data Validation
- [ ] Natively Support data validation of LLMs output with Pydantic
+- [x] Natively Support data validation of LLMs output with Pydantic

 ### Developer Experience
- [ ] **Debugging mode**
- [ ] **Documentation**
-    - [ ] Quick start guide
-    - [ ] Goal creation guide
-    - [ ] Agent creation guide
-    - [ ] GitHub Page setup
-    - [ ] README with examples
-    - [ ] Contributing guidelines
- [ ] **Distribution**
-    - [ ] PyPI package
-    - [ ] Docker image on Docker Hub
+- [ ] **MVP Features**
+    - [ ] Debugging mode
+    - [ ] CLI tools for memory management
+    - [ ] CLI tools for credential management
+- [ ] **MVP Resources & Documentation**
+    - [x] Quick start guide
+    - [x] Goal creation guide
+    - [x] Agent creation guide
+    - [x] GitHub Page setup
+    - [x] README with examples
+    - [x] Contributing guidelines
+    - [ ] Introduction Video
+
+### Adaptiveness
+- [ ] Runtime data feedback loop
+- [ ] Instant Developer Feedback for improvement

 ### Sample Agents
 - [ ] Knowledge Agent
@@ -106,9 +225,35 @@ timeline

 ### Agent Capability
 - [ ] Streaming mode support
+- [ ] Image Generation support
+- [ ] Take end user input Image and flatfile understand capability

-### Cross-Platform
- [ ] JavaScript / TypeScript Version SDK
+### Event-loop For Nodes (Opencode-style)
+- [ ] **Event bus**
+
+### Memory System Iteration
+- [ ] **Message Model & Session Management**
+    - [ ] Introduce `Message` class with structured content types
+    - [ ] Implement `Session` classes for conversation state
+- [ ] **Storage Migration**
+    - [ ] Implement granular per-message file persistence (`/message/[agentID]/...`)
+    - [ ] Migrate from monolithic run storage
+- [ ] **Context Building & Conversation Loop**
+    - [ ] Implement `Message.stream(sessionID)`
+    - [ ] Update `LLMNode.execute()` for full context building
+    - [ ] Implement `Message.toModelMessages()` conversion
+- [ ] **Proactive Compaction**
+    - [ ] Implement proactive overflow detection
+    - [ ] Develop backward-scanning pruning strategy (e.g., clearing old tool outputs)
+- [ ] **Enhanced Token Tracking**
+    - [ ] Extend `LLMResponse` to track reasoning and cache tokens
+    - [ ] Integrate granular token metrics into compaction logic
+
+### Coding Agent Support
+- [ ] Claude Code
+- [ ] Cursor
+- [ ] Opencode
+- [ ] Antigravity

 ### File System Enhancement
 - [ ] Semantic Search integration
@@ -123,7 +268,7 @@ timeline
    - [ ] Wake-up Tool (resume agent tasks)

 ### Deployment (Self-Hosted)
- [ ] Docker container standardization
+- [ ] Workder agent docker container standardization
 - [ ] Headless backend execution
 - [ ] Exposed API for frontend attachment
 - [ ] Local monitoring & observability
@@ -148,3 +293,7 @@ timeline
 - [ ] Analytics Agent
 - [ ] Training Agent
 - [ ] Smart Entry / Form Agent (self-evolution emphasis)
+
+### Cross-Platform
+- [ ] JavaScript / TypeScript Version SDK
+- [ ] Better windows support
@@ -145,7 +145,7 @@ python -m framework test-debug <agent_path> <test_name>
 python -m framework test-list <goal_id>
 ```

-For detailed testing workflows, see the [testing-agent skill](.claude/skills/testing-agent/SKILL.md).
+For detailed testing workflows, see the [testing-agent skill](../.claude/skills/testing-agent/SKILL.md).

 ### Analyzing Agent Behavior with Builder

@@ -0,0 +1,740 @@
+#!/usr/bin/env python3
+"""
+EventLoopNode WebSocket Demo
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams EventLoopNode execution to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/event_loop_wss_demo.py
+
+    Then open http://localhost:8765 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+import os  # noqa: E402
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("demo")
+
+# -------------------------------------------------------------------------
+# Persistent state (shared across WebSocket connections)
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_demo_"))
+STORE = FileConversationStore(STORE_DIR / "conversation")
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Tool Registry — real tools via ToolRegistry (same pattern as GraphExecutor)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+# Credential store: Aden sync (OAuth2 tokens) + encrypted files + env var fallback
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_local_storage = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+
+if os.environ.get("ADEN_API_KEY"):
+    try:
+        from framework.credentials.aden import (  # noqa: E402
+            AdenCachedStorage,
+            AdenClientConfig,
+            AdenCredentialClient,
+            AdenSyncProvider,
+        )
+
+        _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com"))
+        _provider = AdenSyncProvider(client=_client)
+        _storage = AdenCachedStorage(
+            local_storage=_local_storage,
+            aden_provider=_provider,
+        )
+        _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True)
+        _synced = _provider.sync_all(_cred_store)
+        logger.info("Synced %d credentials from Aden", _synced)
+    except Exception as e:
+        logger.warning("Aden sync unavailable: %s", e)
+        _cred_store = CredentialStore(storage=_local_storage)
+else:
+    logger.info("ADEN_API_KEY not set, using local credential storage")
+    _cred_store = CredentialStore(storage=_local_storage)
+
+CREDENTIALS = CredentialStoreAdapter(_cred_store)
+
+# Debug: log which credentials resolved
+for _name in ["brave_search", "hubspot", "anthropic"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# --- web_search (Brave Search API) ---
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results to return (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={"X-Subscription-Token": api_key, "Accept": "application/json"},
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+# --- web_scrape (httpx + BeautifulSoup, no playwright for sync compat) ---
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS)
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {"url": url, "title": title, "content": text, "length": len(text)}
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+# --- HubSpot CRM tools (optional, requires HUBSPOT_ACCESS_TOKEN) ---
+
+_HUBSPOT_API = "https://api.hubapi.com"
+
+
+def _hubspot_headers() -> dict | None:
+    token = CREDENTIALS.get("hubspot")
+    if token:
+        logger.debug("HubSpot token: %s...%s (len=%d)", token[:8], token[-4:], len(token))
+    else:
+        logger.debug("HubSpot token: not found")
+    if not token:
+        return None
+    return {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+
+def _exec_hubspot_search(inputs: dict) -> dict:
+    headers = _hubspot_headers()
+    if not headers:
+        return {"error": "HUBSPOT_ACCESS_TOKEN not set"}
+    object_type = inputs.get("object_type", "contacts")
+    query = inputs.get("query", "")
+    limit = min(inputs.get("limit", 10), 100)
+    body: dict = {"limit": limit}
+    if query:
+        body["query"] = query
+    try:
+        resp = httpx.post(
+            f"{_HUBSPOT_API}/crm/v3/objects/{object_type}/search",
+            headers=headers,
+            json=body,
+            timeout=30.0,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HubSpot API HTTP {resp.status_code}: {resp.text[:200]}"}
+        return resp.json()
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"HubSpot error: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="hubspot_search",
+    tool=Tool(
+        name="hubspot_search",
+        description=(
+            "Search HubSpot CRM objects (contacts, companies, or deals). "
+            "Returns matching records with their properties."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "object_type": {
+                    "type": "string",
+                    "description": "CRM object type: 'contacts', 'companies', or 'deals'",
+                },
+                "query": {
+                    "type": "string",
+                    "description": "Search query (name, email, domain, etc.)",
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Max results (1-100, default 10)",
+                },
+            },
+            "required": ["object_type"],
+        },
+    ),
+    executor=lambda inputs: _exec_hubspot_search(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page (embedded)
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>EventLoopNode Live Demo</title>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117; color: #c9d1d9;
+    height: 100vh; display: flex; flex-direction: column;
+  }
+  header {
+    background: #161b22; padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex; align-items: center; gap: 16px;
+  }
+  header h1 { font-size: 16px; color: #58a6ff; font-weight: 600; }
+  .status {
+    font-size: 12px; padding: 3px 10px; border-radius: 12px;
+    background: #21262d; color: #8b949e;
+  }
+  .status.running { background: #1a4b2e; color: #3fb950; }
+  .status.done { background: #1a3a5c; color: #58a6ff; }
+  .status.error { background: #4b1a1a; color: #f85149; }
+  .chat { flex: 1; overflow-y: auto; padding: 16px; }
+  .msg {
+    margin: 8px 0; padding: 10px 14px; border-radius: 8px;
+    line-height: 1.6; white-space: pre-wrap; word-wrap: break-word;
+  }
+  .msg.user { background: #1a3a5c; color: #58a6ff; }
+  .msg.assistant { background: #161b22; color: #c9d1d9; }
+  .msg.event {
+    background: transparent; color: #8b949e; font-size: 11px;
+    padding: 4px 14px; border-left: 3px solid #30363d;
+  }
+  .msg.event.loop { border-left-color: #58a6ff; }
+  .msg.event.tool { border-left-color: #d29922; }
+  .msg.event.stall { border-left-color: #f85149; }
+  .input-bar {
+    padding: 12px 16px; background: #161b22;
+    border-top: 1px solid #30363d; display: flex; gap: 8px;
+  }
+  .input-bar input {
+    flex: 1; background: #0d1117; border: 1px solid #30363d;
+    color: #c9d1d9; padding: 8px 12px; border-radius: 6px;
+    font-family: inherit; font-size: 14px; outline: none;
+  }
+  .input-bar input:focus { border-color: #58a6ff; }
+  .input-bar button {
+    background: #238636; color: #fff; border: none;
+    padding: 8px 20px; border-radius: 6px; cursor: pointer;
+    font-family: inherit; font-weight: 600;
+  }
+  .input-bar button:hover { background: #2ea043; }
+  .input-bar button:disabled {
+    background: #21262d; color: #484f58; cursor: not-allowed;
+  }
+  .input-bar button.clear { background: #da3633; }
+  .input-bar button.clear:hover { background: #f85149; }
+</style>
+</head>
+<body>
+  <header>
+    <h1>EventLoopNode Live</h1>
+    <span id="status" class="status">Idle</span>
+    <span id="iter" class="status" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Ask anything..." autofocus />
+    <button id="go" onclick="run()">Send</button>
+    <button class="clear"
+            onclick="clearConversation()">Clear</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+const chat = document.getElementById('chat');
+const status = document.getElementById('status');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setStatus(text, cls) {
+  status.textContent = text;
+  status.className = 'status ' + cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setStatus('Error', 'error'); };
+  ws.onclose = () => {
+    setStatus('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'ready') {
+    setStatus('Ready', 'done');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg('RESULT  ' + evt.tool_name + ': ' + preview,
+           'event ' + cls);
+    currentAssistantEl = addMsg('', 'assistant');
+  }
+  else if (evt.type === 'result') {
+    setStatus('Session ended', evt.success ? 'done' : 'error');
+    if (evt.error) addMsg('ERROR  ' + evt.error, 'event stall');
+    if (currentAssistantEl && !currentAssistantEl.textContent)
+      currentAssistantEl.remove();
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+  else if (evt.type === 'cleared') {
+    chat.innerHTML = '';
+    iterCount = 0;
+    iterEl.textContent = 'Step 0';
+    iterEl.style.display = 'none';
+    setStatus('Ready', 'done');
+    goBtn.disabled = false;
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  setStatus('Running', 'running');
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+function clearConversation() {
+  if (ws && ws.readyState === 1) {
+    ws.send(JSON.stringify({ command: 'clear' }));
+  }
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Persistent WebSocket: long-lived EventLoopNode with client_facing blocking."""
+    global STORE
+
+    # -- Event forwarding (WebSocket ← EventBus) ----------------------------
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    # -- Per-connection state -----------------------------------------------
+    node = None
+    loop_task = None
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    node_spec = NodeSpec(
+        id="assistant",
+        name="Chat Assistant",
+        description="A conversational assistant that remembers context across messages",
+        node_type="event_loop",
+        client_facing=True,
+        system_prompt=(
+            "You are a helpful assistant with access to tools. "
+            "You can search the web, scrape webpages, and query HubSpot CRM. "
+            "Use tools when the user asks for current information or external data. "
+            "You have full conversation history, so you can reference previous messages."
+        ),
+    )
+
+    # -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus ---
+    async def on_input_requested(event):
+        try:
+            await websocket.send(json.dumps({"type": "ready"}))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[EventType.CLIENT_INPUT_REQUESTED],
+        handler=on_input_requested,
+    )
+
+    async def start_loop(first_message: str):
+        """Create an EventLoopNode and run it as a background task."""
+        nonlocal node, loop_task
+
+        memory = SharedMemory()
+        ctx = NodeContext(
+            runtime=RUNTIME,
+            node_id="assistant",
+            node_spec=node_spec,
+            memory=memory,
+            input_data={},
+            llm=LLM,
+            available_tools=tools,
+        )
+        node = EventLoopNode(
+            event_bus=bus,
+            config=LoopConfig(max_iterations=10_000, max_history_tokens=32_000),
+            conversation_store=STORE,
+            tool_executor=tool_executor,
+        )
+        await node.inject_event(first_message)
+
+        async def _run():
+            try:
+                result = await node.execute(ctx)
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": result.success,
+                                "output": result.output,
+                                "error": result.error,
+                                "tokens": result.tokens_used,
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+                logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}")
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("Loop stopped: WebSocket closed")
+            except Exception as e:
+                logger.exception("Loop error")
+                try:
+                    await websocket.send(
+                        json.dumps(
+                            {
+                                "type": "result",
+                                "success": False,
+                                "error": str(e),
+                                "output": {},
+                            }
+                        )
+                    )
+                except Exception:
+                    pass
+
+        loop_task = asyncio.create_task(_run())
+
+    async def stop_loop():
+        """Signal the node and wait for the loop task to finish."""
+        nonlocal node, loop_task
+        if loop_task and not loop_task.done():
+            if node:
+                node.signal_shutdown()
+            try:
+                await asyncio.wait_for(loop_task, timeout=5.0)
+            except (TimeoutError, asyncio.CancelledError):
+                loop_task.cancel()
+        node = None
+        loop_task = None
+
+    # -- Message loop (runs for the lifetime of this WebSocket) -------------
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            # Clear command
+            if msg.get("command") == "clear":
+                import shutil
+
+                await stop_loop()
+                await STORE.close()
+                conv_dir = STORE_DIR / "conversation"
+                if conv_dir.exists():
+                    shutil.rmtree(conv_dir)
+                STORE = FileConversationStore(conv_dir)
+                await websocket.send(json.dumps({"type": "cleared"}))
+                logger.info("Conversation cleared")
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            if node is None:
+                # First message — spin up the loop
+                logger.info(f"Starting persistent loop: {topic}")
+                await start_loop(topic)
+            else:
+                # Subsequent message — inject into the running loop
+                logger.info(f"Injecting message: {topic}")
+                await node.inject_event(topic)
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+    finally:
+        await stop_loop()
+        logger.info("WebSocket closed, loop stopped")
+
+
+# -------------------------------------------------------------------------
+# HTTP handler for serving the HTML page
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None  # let websockets handle the upgrade
+    # Serve the HTML page for any other path
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8765
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Demo running at http://localhost:{port}")
+        logger.info("Open in your browser and enter a topic to research.")
+        await asyncio.Future()  # run forever
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,930 @@
+#!/usr/bin/env python3
+"""
+Two-Node ContextHandoff Demo
+
+Demonstrates ContextHandoff between two EventLoopNode instances:
+  Node A (Researcher) → ContextHandoff → Node B (Analyst)
+
+Real LLM, real FileConversationStore, real EventBus.
+Streams both nodes to a browser via WebSocket.
+
+Usage:
+    cd /home/timothy/oss/hive/core
+    python demos/handoff_demo.py
+
+    Then open http://localhost:8766 in your browser.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import tempfile
+from http import HTTPStatus
+from pathlib import Path
+
+import httpx
+import websockets
+from bs4 import BeautifulSoup
+from websockets.http11 import Request, Response
+
+# Add core, tools, and hive root to path
+_CORE_DIR = Path(__file__).resolve().parent.parent
+_HIVE_DIR = _CORE_DIR.parent
+sys.path.insert(0, str(_CORE_DIR))  # framework.*
+sys.path.insert(0, str(_HIVE_DIR / "tools" / "src"))  # aden_tools.*
+sys.path.insert(0, str(_HIVE_DIR))  # core.framework.* (for aden_tools imports)
+
+from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter  # noqa: E402
+from core.framework.credentials import CredentialStore  # noqa: E402
+
+from framework.credentials.storage import (  # noqa: E402
+    CompositeStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+)
+from framework.graph.context_handoff import ContextHandoff  # noqa: E402
+from framework.graph.conversation import NodeConversation  # noqa: E402
+from framework.graph.event_loop_node import EventLoopNode, LoopConfig  # noqa: E402
+from framework.graph.node import NodeContext, NodeSpec, SharedMemory  # noqa: E402
+from framework.llm.litellm import LiteLLMProvider  # noqa: E402
+from framework.llm.provider import Tool  # noqa: E402
+from framework.runner.tool_registry import ToolRegistry  # noqa: E402
+from framework.runtime.core import Runtime  # noqa: E402
+from framework.runtime.event_bus import EventBus, EventType  # noqa: E402
+from framework.storage.conversation_store import FileConversationStore  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
+logger = logging.getLogger("handoff_demo")
+
+# -------------------------------------------------------------------------
+# Persistent state
+# -------------------------------------------------------------------------
+
+STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_"))
+RUNTIME = Runtime(STORE_DIR / "runtime")
+LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
+
+# -------------------------------------------------------------------------
+# Credentials
+# -------------------------------------------------------------------------
+
+# Composite credential store: encrypted files (primary) + env vars (fallback)
+_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
+_composite = CompositeStorage(
+    primary=EncryptedFileStorage(),
+    fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
+)
+CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite))
+
+for _name in ["brave_search", "hubspot"]:
+    _val = CREDENTIALS.get(_name)
+    if _val:
+        logger.debug("credential %s: OK (len=%d)", _name, len(_val))
+    else:
+        logger.debug("credential %s: not found", _name)
+
+# -------------------------------------------------------------------------
+# Tool Registry — web_search + web_scrape for Node A (Researcher)
+# -------------------------------------------------------------------------
+
+TOOL_REGISTRY = ToolRegistry()
+
+
+def _exec_web_search(inputs: dict) -> dict:
+    api_key = CREDENTIALS.get("brave_search")
+    if not api_key:
+        return {"error": "brave_search credential not configured"}
+    query = inputs.get("query", "")
+    num_results = min(inputs.get("num_results", 10), 20)
+    resp = httpx.get(
+        "https://api.search.brave.com/res/v1/web/search",
+        params={"q": query, "count": num_results},
+        headers={
+            "X-Subscription-Token": api_key,
+            "Accept": "application/json",
+        },
+        timeout=30.0,
+    )
+    if resp.status_code != 200:
+        return {"error": f"Brave API HTTP {resp.status_code}"}
+    data = resp.json()
+    results = [
+        {
+            "title": item.get("title", ""),
+            "url": item.get("url", ""),
+            "snippet": item.get("description", ""),
+        }
+        for item in data.get("web", {}).get("results", [])[:num_results]
+    ]
+    return {"query": query, "results": results, "total": len(results)}
+
+
+TOOL_REGISTRY.register(
+    name="web_search",
+    tool=Tool(
+        name="web_search",
+        description=(
+            "Search the web for current information. "
+            "Returns titles, URLs, and snippets from search results."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query (1-500 characters)",
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results (1-20, default 10)",
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_search(inputs),
+)
+
+_SCRAPE_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/131.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+
+def _exec_web_scrape(inputs: dict) -> dict:
+    url = inputs.get("url", "")
+    max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        resp = httpx.get(
+            url,
+            timeout=30.0,
+            follow_redirects=True,
+            headers=_SCRAPE_HEADERS,
+        )
+        if resp.status_code != 200:
+            return {"error": f"HTTP {resp.status_code}"}
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
+            tag.decompose()
+        title = soup.title.get_text(strip=True) if soup.title else ""
+        main = (
+            soup.find("article")
+            or soup.find("main")
+            or soup.find(attrs={"role": "main"})
+            or soup.find("body")
+        )
+        text = main.get_text(separator=" ", strip=True) if main else ""
+        text = " ".join(text.split())
+        if len(text) > max_length:
+            text = text[:max_length] + "..."
+        return {
+            "url": url,
+            "title": title,
+            "content": text,
+            "length": len(text),
+        }
+    except httpx.TimeoutException:
+        return {"error": "Request timed out"}
+    except Exception as e:
+        return {"error": f"Scrape failed: {e}"}
+
+
+TOOL_REGISTRY.register(
+    name="web_scrape",
+    tool=Tool(
+        name="web_scrape",
+        description=(
+            "Scrape and extract text content from a webpage URL. "
+            "Returns the page title and main text content."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "URL of the webpage to scrape",
+                },
+                "max_length": {
+                    "type": "integer",
+                    "description": "Maximum text length (default 50000)",
+                },
+            },
+            "required": ["url"],
+        },
+    ),
+    executor=lambda inputs: _exec_web_scrape(inputs),
+)
+
+logger.info(
+    "ToolRegistry loaded: %s",
+    ", ".join(TOOL_REGISTRY.get_registered_names()),
+)
+
+# -------------------------------------------------------------------------
+# Node Specs
+# -------------------------------------------------------------------------
+
+RESEARCHER_SPEC = NodeSpec(
+    id="researcher",
+    name="Researcher",
+    description="Researches a topic using web search and scraping tools",
+    node_type="event_loop",
+    input_keys=["topic"],
+    output_keys=["research_summary"],
+    system_prompt=(
+        "You are a thorough research assistant. Your job is to research "
+        "the given topic using the web_search and web_scrape tools.\n\n"
+        "1. Search for relevant information on the topic\n"
+        "2. Scrape 1-2 of the most promising URLs for details\n"
+        "3. Synthesize your findings into a comprehensive summary\n"
+        "4. Use set_output with key='research_summary' to save your "
+        "findings\n\n"
+        "Be thorough but efficient. Aim for 2-4 search/scrape calls, "
+        "then summarize and set_output."
+    ),
+)
+
+ANALYST_SPEC = NodeSpec(
+    id="analyst",
+    name="Analyst",
+    description="Analyzes research findings and provides insights",
+    node_type="event_loop",
+    input_keys=["context"],
+    output_keys=["analysis"],
+    system_prompt=(
+        "You are a strategic analyst. You receive research findings from "
+        "a previous researcher and must:\n\n"
+        "1. Identify key themes and patterns\n"
+        "2. Assess the reliability and significance of the findings\n"
+        "3. Provide actionable insights and recommendations\n"
+        "4. Use set_output with key='analysis' to save your analysis\n\n"
+        "Be concise but insightful. Focus on what matters most."
+    ),
+)
+
+
+# -------------------------------------------------------------------------
+# HTML page
+# -------------------------------------------------------------------------
+
+HTML_PAGE = (  # noqa: E501
+    """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>ContextHandoff Demo</title>
+<style>
+  * {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+  }
+  body {
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    background: #0d1117;
+    color: #c9d1d9;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+  }
+  header {
+    background: #161b22;
+    padding: 12px 20px;
+    border-bottom: 1px solid #30363d;
+    display: flex;
+    align-items: center;
+    gap: 16px;
+  }
+  header h1 {
+    font-size: 16px;
+    color: #58a6ff;
+    font-weight: 600;
+  }
+  .badge {
+    font-size: 12px;
+    padding: 3px 10px;
+    border-radius: 12px;
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.researcher {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .badge.analyst {
+    background: #1a4b2e;
+    color: #3fb950;
+  }
+  .badge.handoff {
+    background: #3d1f00;
+    color: #d29922;
+  }
+  .badge.done {
+    background: #21262d;
+    color: #8b949e;
+  }
+  .badge.error {
+    background: #4b1a1a;
+    color: #f85149;
+  }
+  .chat {
+    flex: 1;
+    overflow-y: auto;
+    padding: 16px;
+  }
+  .msg {
+    margin: 8px 0;
+    padding: 10px 14px;
+    border-radius: 8px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+  }
+  .msg.user {
+    background: #1a3a5c;
+    color: #58a6ff;
+  }
+  .msg.assistant {
+    background: #161b22;
+    color: #c9d1d9;
+  }
+  .msg.assistant.analyst-msg {
+    border-left: 3px solid #3fb950;
+  }
+  .msg.event {
+    background: transparent;
+    color: #8b949e;
+    font-size: 11px;
+    padding: 4px 14px;
+    border-left: 3px solid #30363d;
+  }
+  .msg.event.loop {
+    border-left-color: #58a6ff;
+  }
+  .msg.event.tool {
+    border-left-color: #d29922;
+  }
+  .msg.event.stall {
+    border-left-color: #f85149;
+  }
+  .handoff-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #1c1200;
+    border: 1px solid #d29922;
+    border-radius: 8px;
+    text-align: center;
+  }
+  .handoff-banner h3 {
+    color: #d29922;
+    font-size: 14px;
+    margin-bottom: 8px;
+  }
+  .handoff-banner p, .result-banner p {
+    color: #8b949e;
+    font-size: 12px;
+    line-height: 1.5;
+    max-height: 200px;
+    overflow-y: auto;
+    white-space: pre-wrap;
+    text-align: left;
+  }
+  .result-banner {
+    margin: 16px 0;
+    padding: 16px;
+    background: #0a2614;
+    border: 1px solid #3fb950;
+    border-radius: 8px;
+  }
+  .result-banner h3 {
+    color: #3fb950;
+    font-size: 14px;
+    margin-bottom: 8px;
+    text-align: center;
+  }
+  .result-banner .label {
+    color: #58a6ff;
+    font-size: 11px;
+    font-weight: 600;
+    margin-top: 10px;
+    margin-bottom: 2px;
+  }
+  .result-banner .tokens {
+    color: #484f58;
+    font-size: 11px;
+    text-align: center;
+    margin-top: 10px;
+  }
+  .input-bar {
+    padding: 12px 16px;
+    background: #161b22;
+    border-top: 1px solid #30363d;
+    display: flex;
+    gap: 8px;
+  }
+  .input-bar input {
+    flex: 1;
+    background: #0d1117;
+    border: 1px solid #30363d;
+    color: #c9d1d9;
+    padding: 8px 12px;
+    border-radius: 6px;
+    font-family: inherit;
+    font-size: 14px;
+    outline: none;
+  }
+  .input-bar input:focus {
+    border-color: #58a6ff;
+  }
+  .input-bar button {
+    background: #238636;
+    color: #fff;
+    border: none;
+    padding: 8px 20px;
+    border-radius: 6px;
+    cursor: pointer;
+    font-family: inherit;
+    font-weight: 600;
+  }
+  .input-bar button:hover {
+    background: #2ea043;
+  }
+  .input-bar button:disabled {
+    background: #21262d;
+    color: #484f58;
+    cursor: not-allowed;
+  }
+</style>
+</head>
+<body>
+  <header>
+    <h1>ContextHandoff Demo</h1>
+    <span id="phase" class="badge">Idle</span>
+    <span id="iter" class="badge" style="display:none">Step 0</span>
+  </header>
+  <div id="chat" class="chat"></div>
+  <div class="input-bar">
+    <input id="input" type="text"
+           placeholder="Enter a research topic..." autofocus />
+    <button id="go" onclick="run()">Research</button>
+  </div>
+
+<script>
+let ws = null;
+let currentAssistantEl = null;
+let iterCount = 0;
+let currentPhase = 'idle';
+const chat = document.getElementById('chat');
+const phase = document.getElementById('phase');
+const iterEl = document.getElementById('iter');
+const goBtn = document.getElementById('go');
+const inputEl = document.getElementById('input');
+
+inputEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter') run();
+});
+
+function setPhase(text, cls) {
+  phase.textContent = text;
+  phase.className = 'badge ' + cls;
+  currentPhase = cls;
+}
+
+function addMsg(text, cls) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + cls;
+  el.textContent = text;
+  chat.appendChild(el);
+  chat.scrollTop = chat.scrollHeight;
+  return el;
+}
+
+function addHandoffBanner(summary) {
+  const banner = document.createElement('div');
+  banner.className = 'handoff-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Context Handoff: Researcher -> Analyst';
+  const p = document.createElement('p');
+  p.textContent = summary || 'Passing research context...';
+  banner.appendChild(h3);
+  banner.appendChild(p);
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function addResultBanner(researcher, analyst, tokens) {
+  const banner = document.createElement('div');
+  banner.className = 'result-banner';
+  const h3 = document.createElement('h3');
+  h3.textContent = 'Pipeline Complete';
+  banner.appendChild(h3);
+
+  if (researcher && researcher.research_summary) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'RESEARCH SUMMARY';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = researcher.research_summary;
+    banner.appendChild(p);
+  }
+
+  if (analyst && analyst.analysis) {
+    const lbl = document.createElement('div');
+    lbl.className = 'label';
+    lbl.textContent = 'ANALYSIS';
+    lbl.style.color = '#3fb950';
+    banner.appendChild(lbl);
+    const p = document.createElement('p');
+    p.textContent = analyst.analysis;
+    banner.appendChild(p);
+  }
+
+  if (tokens) {
+    const t = document.createElement('div');
+    t.className = 'tokens';
+    t.textContent = 'Total tokens: ' + tokens.toLocaleString();
+    banner.appendChild(t);
+  }
+
+  chat.appendChild(banner);
+  chat.scrollTop = chat.scrollHeight;
+}
+
+function connect() {
+  ws = new WebSocket('ws://' + location.host + '/ws');
+  ws.onopen = () => {
+    setPhase('Ready', 'done');
+    goBtn.disabled = false;
+  };
+  ws.onmessage = handleEvent;
+  ws.onerror = () => { setPhase('Error', 'error'); };
+  ws.onclose = () => {
+    setPhase('Reconnecting...', '');
+    goBtn.disabled = true;
+    setTimeout(connect, 2000);
+  };
+}
+
+function handleEvent(msg) {
+  const evt = JSON.parse(msg.data);
+
+  if (evt.type === 'phase') {
+    if (evt.phase === 'researcher') {
+      setPhase('Researcher', 'researcher');
+    } else if (evt.phase === 'handoff') {
+      setPhase('Handoff', 'handoff');
+    } else if (evt.phase === 'analyst') {
+      setPhase('Analyst', 'analyst');
+    }
+    iterCount = 0;
+    iterEl.style.display = 'none';
+  }
+  else if (evt.type === 'llm_text_delta') {
+    if (currentAssistantEl) {
+      currentAssistantEl.textContent += evt.content;
+      chat.scrollTop = chat.scrollHeight;
+    }
+  }
+  else if (evt.type === 'node_loop_iteration') {
+    iterCount = evt.iteration || (iterCount + 1);
+    iterEl.textContent = 'Step ' + iterCount;
+    iterEl.style.display = '';
+  }
+  else if (evt.type === 'tool_call_started') {
+    var info = evt.tool_name + '('
+      + JSON.stringify(evt.tool_input).slice(0, 120) + ')';
+    addMsg('TOOL  ' + info, 'event tool');
+  }
+  else if (evt.type === 'tool_call_completed') {
+    var preview = (evt.result || '').slice(0, 200);
+    var cls = evt.is_error ? 'stall' : 'tool';
+    addMsg(
+      'RESULT  ' + evt.tool_name + ': ' + preview,
+      'event ' + cls
+    );
+    var assistCls = currentPhase === 'analyst'
+      ? 'assistant analyst-msg' : 'assistant';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'handoff_context') {
+    addHandoffBanner(evt.summary);
+    var assistCls = 'assistant analyst-msg';
+    currentAssistantEl = addMsg('', assistCls);
+  }
+  else if (evt.type === 'node_result') {
+    if (evt.node_id === 'researcher') {
+      if (currentAssistantEl
+          && !currentAssistantEl.textContent) {
+        currentAssistantEl.remove();
+      }
+    }
+  }
+  else if (evt.type === 'done') {
+    setPhase('Done', 'done');
+    iterEl.style.display = 'none';
+    if (currentAssistantEl
+        && !currentAssistantEl.textContent) {
+      currentAssistantEl.remove();
+    }
+    currentAssistantEl = null;
+    addResultBanner(
+      evt.researcher, evt.analyst, evt.total_tokens
+    );
+    goBtn.disabled = false;
+    inputEl.placeholder = 'Enter another topic...';
+  }
+  else if (evt.type === 'error') {
+    setPhase('Error', 'error');
+    addMsg('ERROR  ' + evt.message, 'event stall');
+    goBtn.disabled = false;
+  }
+  else if (evt.type === 'node_stalled') {
+    addMsg('STALLED  ' + evt.reason, 'event stall');
+  }
+}
+
+function run() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  chat.innerHTML = '';
+  addMsg(text, 'user');
+  currentAssistantEl = addMsg('', 'assistant');
+  inputEl.value = '';
+  goBtn.disabled = true;
+  ws.send(JSON.stringify({ topic: text }));
+}
+
+connect();
+</script>
+</body>
+</html>"""
+)
+
+
+# -------------------------------------------------------------------------
+# WebSocket handler — sequential Node A → Handoff → Node B
+# -------------------------------------------------------------------------
+
+
+async def handle_ws(websocket):
+    """Run the two-node handoff pipeline per user message."""
+    try:
+        async for raw in websocket:
+            try:
+                msg = json.loads(raw)
+            except Exception:
+                continue
+
+            topic = msg.get("topic", "")
+            if not topic:
+                continue
+
+            logger.info(f"Starting handoff pipeline for: {topic}")
+
+            try:
+                await _run_pipeline(websocket, topic)
+            except websockets.exceptions.ConnectionClosed:
+                logger.info("WebSocket closed during pipeline")
+                return
+            except Exception as e:
+                logger.exception("Pipeline error")
+                try:
+                    await websocket.send(json.dumps({"type": "error", "message": str(e)}))
+                except Exception:
+                    pass
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+
+
+async def _run_pipeline(websocket, topic: str):
+    """Execute: Node A (research) → ContextHandoff → Node B (analysis)."""
+    import shutil
+
+    # Fresh stores for each run
+    run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR))
+    store_a = FileConversationStore(run_dir / "node_a")
+    store_b = FileConversationStore(run_dir / "node_b")
+
+    # Shared event bus
+    bus = EventBus()
+
+    async def forward_event(event):
+        try:
+            payload = {"type": event.type.value, **event.data}
+            if event.node_id:
+                payload["node_id"] = event.node_id
+            await websocket.send(json.dumps(payload))
+        except Exception:
+            pass
+
+    bus.subscribe(
+        event_types=[
+            EventType.NODE_LOOP_STARTED,
+            EventType.NODE_LOOP_ITERATION,
+            EventType.NODE_LOOP_COMPLETED,
+            EventType.LLM_TEXT_DELTA,
+            EventType.TOOL_CALL_STARTED,
+            EventType.TOOL_CALL_COMPLETED,
+            EventType.NODE_STALLED,
+        ],
+        handler=forward_event,
+    )
+
+    tools = list(TOOL_REGISTRY.get_tools().values())
+    tool_executor = TOOL_REGISTRY.get_executor()
+
+    # ---- Phase 1: Researcher ------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "researcher"}))
+
+    node_a = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge: accept when output_keys filled
+        config=LoopConfig(
+            max_iterations=20,
+            max_tool_calls_per_turn=10,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_a,
+        tool_executor=tool_executor,
+    )
+
+    ctx_a = NodeContext(
+        runtime=RUNTIME,
+        node_id="researcher",
+        node_spec=RESEARCHER_SPEC,
+        memory=SharedMemory(),
+        input_data={"topic": topic},
+        llm=LLM,
+        available_tools=tools,
+    )
+
+    result_a = await node_a.execute(ctx_a)
+    logger.info(
+        "Researcher done: success=%s, tokens=%s",
+        result_a.success,
+        result_a.tokens_used,
+    )
+
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "node_result",
+                "node_id": "researcher",
+                "success": result_a.success,
+                "output": result_a.output,
+            }
+        )
+    )
+
+    if not result_a.success:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": f"Researcher failed: {result_a.error}",
+                }
+            )
+        )
+        return
+
+    # ---- Phase 2: Context Handoff -------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "handoff"}))
+
+    # Restore the researcher's conversation from store
+    conversation_a = await NodeConversation.restore(store_a)
+    if conversation_a is None:
+        await websocket.send(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": "Failed to restore researcher conversation",
+                }
+            )
+        )
+        return
+
+    handoff_engine = ContextHandoff(llm=LLM)
+    handoff_context = handoff_engine.summarize_conversation(
+        conversation=conversation_a,
+        node_id="researcher",
+        output_keys=["research_summary"],
+    )
+
+    formatted_handoff = ContextHandoff.format_as_input(handoff_context)
+    logger.info(
+        "Handoff: %d turns, ~%d tokens, keys=%s",
+        handoff_context.turn_count,
+        handoff_context.total_tokens_used,
+        list(handoff_context.key_outputs.keys()),
+    )
+
+    # Send handoff context to browser
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "handoff_context",
+                "summary": handoff_context.summary[:500],
+                "turn_count": handoff_context.turn_count,
+                "tokens": handoff_context.total_tokens_used,
+                "key_outputs": handoff_context.key_outputs,
+            }
+        )
+    )
+
+    # ---- Phase 3: Analyst ---------------------------------------------------
+    await websocket.send(json.dumps({"type": "phase", "phase": "analyst"}))
+
+    node_b = EventLoopNode(
+        event_bus=bus,
+        judge=None,  # implicit judge
+        config=LoopConfig(
+            max_iterations=10,
+            max_tool_calls_per_turn=5,
+            max_history_tokens=32_000,
+        ),
+        conversation_store=store_b,
+    )
+
+    ctx_b = NodeContext(
+        runtime=RUNTIME,
+        node_id="analyst",
+        node_spec=ANALYST_SPEC,
+        memory=SharedMemory(),
+        input_data={"context": formatted_handoff},
+        llm=LLM,
+        available_tools=[],
+    )
+
+    result_b = await node_b.execute(ctx_b)
+    logger.info(
+        "Analyst done: success=%s, tokens=%s",
+        result_b.success,
+        result_b.tokens_used,
+    )
+
+    # ---- Done ---------------------------------------------------------------
+    await websocket.send(
+        json.dumps(
+            {
+                "type": "done",
+                "researcher": result_a.output,
+                "analyst": result_b.output,
+                "total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)),
+            }
+        )
+    )
+
+    # Clean up temp stores
+    try:
+        shutil.rmtree(run_dir)
+    except Exception:
+        pass
+
+
+# -------------------------------------------------------------------------
+# HTTP handler
+# -------------------------------------------------------------------------
+
+
+async def process_request(connection, request: Request):
+    """Serve HTML on GET /, upgrade to WebSocket on /ws."""
+    if request.path == "/ws":
+        return None
+    return Response(
+        HTTPStatus.OK,
+        "OK",
+        websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
+        HTML_PAGE.encode(),
+    )
+
+
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+
+
+async def main():
+    port = 8766
+    async with websockets.serve(
+        handle_ws,
+        "0.0.0.0",
+        port,
+        process_request=process_request,
+    ):
+        logger.info(f"Handoff demo at http://localhost:{port}")
+        logger.info("Enter a research topic to start the pipeline.")
+        await asyncio.Future()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -99,10 +99,10 @@ async def example_4_custom_agent_with_mcp_tools():
    """Example 4: Build custom agent that uses MCP tools"""
    print("\n=== Example 4: Custom Agent with MCP Tools ===\n")

-    from framework.builder.workflow import WorkflowBuilder
+    from framework.builder.workflow import GraphBuilder

    # Create a workflow builder
-    builder = WorkflowBuilder()
+    builder = GraphBuilder()

    # Define goal
    builder.set_goal(
@@ -1,4 +1,4 @@
-"""Allow running as python -m framework"""
+"""Allow running as ``python -m framework``, which powers the ``hive`` console entry point."""

 from framework.cli import main

@@ -15,7 +15,7 @@ You cannot skip steps or bypass validation.

 from collections.abc import Callable
 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Any

@@ -26,7 +26,7 @@ from framework.graph.goal import Goal
 from framework.graph.node import NodeSpec


-class BuildPhase(str, Enum):
+class BuildPhase(StrEnum):
    """Current phase of the build process."""

    INIT = "init"  # Just started
@@ -1,27 +1,62 @@
 """
-Command-line interface for Goal Agent.
+Command-line interface for Aden Hive.

 Usage:
-    python -m core run exports/my-agent --input '{"key": "value"}'
-    python -m core info exports/my-agent
-    python -m core validate exports/my-agent
-    python -m core list exports/
-    python -m core dispatch exports/ --input '{"key": "value"}'
-    python -m core shell exports/my-agent
+    hive run exports/my-agent --input '{"key": "value"}'
+    hive info exports/my-agent
+    hive validate exports/my-agent
+    hive list exports/
+    hive dispatch exports/ --input '{"key": "value"}'
+    hive shell exports/my-agent

 Testing commands:
-    python -m core test-run <agent_path> --goal <goal_id>
-    python -m core test-debug <goal_id> <test_id>
-    python -m core test-list <goal_id>
-    python -m core test-stats <goal_id>
+    hive test-run <agent_path> --goal <goal_id>
+    hive test-debug <goal_id> <test_id>
+    hive test-list <goal_id>
+    hive test-stats <goal_id>
 """

 import argparse
 import sys
+from pathlib import Path
+
+
+def _configure_paths():
+    """Auto-configure sys.path so agents in exports/ are discoverable.
+
+    Resolves the project root by walking up from this file (framework/cli.py lives
+    inside core/framework/) or from CWD, then adds the exports/ directory to sys.path
+    if it exists. This eliminates the need for manual PYTHONPATH configuration.
+    """
+    # Strategy 1: resolve relative to this file (works when installed via pip install -e core/)
+    framework_dir = Path(__file__).resolve().parent  # core/framework/
+    core_dir = framework_dir.parent  # core/
+    project_root = core_dir.parent  # project root
+
+    # Strategy 2: if project_root doesn't look right, fall back to CWD
+    if not (project_root / "exports").is_dir() and not (project_root / "core").is_dir():
+        project_root = Path.cwd()
+
+    # Add exports/ to sys.path so agents are importable as top-level packages
+    exports_dir = project_root / "exports"
+    if exports_dir.is_dir():
+        exports_str = str(exports_dir)
+        if exports_str not in sys.path:
+            sys.path.insert(0, exports_str)
+
+    # Ensure core/ is also in sys.path (for non-editable-install scenarios)
+    core_str = str(project_root / "core")
+    if (project_root / "core").is_dir() and core_str not in sys.path:
+        sys.path.insert(0, core_str)


 def main():
-    parser = argparse.ArgumentParser(description="Goal Agent - Build and run goal-driven agents")
+    _configure_paths()
+
+    parser = argparse.ArgumentParser(
+        prog="hive",
+        description="Aden Hive - Build and run goal-driven agents",
+    )
    parser.add_argument(
        "--model",
        default="claude-haiku-4-5-20251001",
@@ -12,7 +12,7 @@ Quick Start:
    from core.framework.credentials import CredentialStore, CredentialObject

    # Create store with encrypted storage
-    store = CredentialStore.with_encrypted_storage("/var/hive/credentials")
+    store = CredentialStore.with_encrypted_storage()  # defaults to ~/.hive/credentials

    # Get a credential
    api_key = store.get("brave_search")
@@ -31,6 +31,13 @@ Quick Start:
 For OAuth2 support:
    from core.framework.credentials.oauth2 import BaseOAuth2Provider, OAuth2Config

+For Aden server sync:
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
 For Vault integration:
    from core.framework.credentials.vault import HashiCorpVaultStorage
 """
@@ -62,6 +69,21 @@ from .storage import (
 from .store import CredentialStore
 from .template import TemplateResolver

+# Aden sync components (lazy import to avoid httpx dependency when not needed)
+# Usage: from core.framework.credentials.aden import AdenSyncProvider
+# Or: from core.framework.credentials import AdenSyncProvider
+try:
+    from .aden import (
+        AdenCachedStorage,
+        AdenClientConfig,
+        AdenCredentialClient,
+        AdenSyncProvider,
+    )
+
+    _ADEN_AVAILABLE = True
+except ImportError:
+    _ADEN_AVAILABLE = False
+
 __all__ = [
    # Main store
    "CredentialStore",
@@ -89,4 +111,12 @@ __all__ = [
    "CredentialRefreshError",
    "CredentialValidationError",
    "CredentialDecryptionError",
+    # Aden sync (optional - requires httpx)
+    "AdenSyncProvider",
+    "AdenCredentialClient",
+    "AdenClientConfig",
+    "AdenCachedStorage",
 ]
+
+# Track Aden availability for runtime checks
+ADEN_AVAILABLE = _ADEN_AVAILABLE
@@ -0,0 +1,76 @@
+"""
+Aden Credential Sync.
+
+Components for synchronizing credentials with the Aden authentication server.
+
+The Aden server handles OAuth2 authorization flows and maintains refresh tokens.
+These components fetch and cache access tokens locally while delegating
+lifecycle management to Aden.
+
+Components:
+- AdenCredentialClient: HTTP client for Aden API
+- AdenSyncProvider: CredentialProvider that syncs with Aden
+- AdenCachedStorage: Storage with local cache + Aden fallback
+
+Quick Start:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
+    # Configure (API key loaded from ADEN_API_KEY env var)
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+    ))
+
+    provider = AdenSyncProvider(client=client)
+
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Initial sync
+    provider.sync_all(store)
+
+    # Use normally
+    token = store.get_key("hubspot", "access_token")
+
+See docs/aden-credential-sync.md for detailed documentation.
+"""
+
+from .client import (
+    AdenAuthenticationError,
+    AdenClientConfig,
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenIntegrationInfo,
+    AdenNotFoundError,
+    AdenRateLimitError,
+    AdenRefreshError,
+)
+from .provider import AdenSyncProvider
+from .storage import AdenCachedStorage
+
+__all__ = [
+    # Client
+    "AdenCredentialClient",
+    "AdenClientConfig",
+    "AdenCredentialResponse",
+    "AdenIntegrationInfo",
+    # Client errors
+    "AdenClientError",
+    "AdenAuthenticationError",
+    "AdenNotFoundError",
+    "AdenRateLimitError",
+    "AdenRefreshError",
+    # Provider
+    "AdenSyncProvider",
+    # Storage
+    "AdenCachedStorage",
+]
@@ -0,0 +1,466 @@
+"""
+Aden Credential Client.
+
+HTTP client for communicating with the Aden authentication server.
+The Aden server handles OAuth2 authorization flows and token management.
+This client fetches tokens and delegates refresh operations to Aden.
+
+Usage:
+    # API key loaded from ADEN_API_KEY environment variable by default
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url="https://api.adenhq.com",
+    ))
+
+    # Or explicitly provide the API key
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url="https://api.adenhq.com",
+        api_key="your-api-key",
+    ))
+
+    # Fetch a credential
+    response = client.get_credential("hubspot")
+    if response:
+        print(f"Token expires at: {response.expires_at}")
+
+    # Request a refresh
+    refreshed = client.request_refresh("hubspot")
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class AdenClientError(Exception):
+    """Base exception for Aden client errors."""
+
+    pass
+
+
+class AdenAuthenticationError(AdenClientError):
+    """Raised when API key is invalid or revoked."""
+
+    pass
+
+
+class AdenNotFoundError(AdenClientError):
+    """Raised when integration is not found."""
+
+    pass
+
+
+class AdenRefreshError(AdenClientError):
+    """Raised when token refresh fails."""
+
+    def __init__(
+        self,
+        message: str,
+        requires_reauthorization: bool = False,
+        reauthorization_url: str | None = None,
+    ):
+        super().__init__(message)
+        self.requires_reauthorization = requires_reauthorization
+        self.reauthorization_url = reauthorization_url
+
+
+class AdenRateLimitError(AdenClientError):
+    """Raised when rate limited."""
+
+    def __init__(self, message: str, retry_after: int = 60):
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+@dataclass
+class AdenClientConfig:
+    """Configuration for Aden API client."""
+
+    base_url: str
+    """Base URL of the Aden server (e.g., 'https://api.adenhq.com')."""
+
+    api_key: str | None = None
+    """Agent's API key for authenticating with Aden.
+    If not provided, loaded from ADEN_API_KEY environment variable."""
+
+    tenant_id: str | None = None
+    """Optional tenant ID for multi-tenant deployments."""
+
+    timeout: float = 30.0
+    """Request timeout in seconds."""
+
+    retry_attempts: int = 3
+    """Number of retry attempts for transient failures."""
+
+    retry_delay: float = 1.0
+    """Base delay between retries in seconds (exponential backoff)."""
+
+    def __post_init__(self) -> None:
+        """Load API key from environment if not provided."""
+        if self.api_key is None:
+            self.api_key = os.environ.get("ADEN_API_KEY")
+            if not self.api_key:
+                raise ValueError(
+                    "Aden API key not provided. Either pass api_key to AdenClientConfig "
+                    "or set the ADEN_API_KEY environment variable."
+                )
+
+
+@dataclass
+class AdenCredentialResponse:
+    """Response from Aden server containing credential data."""
+
+    integration_id: str
+    """Unique identifier for the integration (e.g., 'hubspot')."""
+
+    integration_type: str
+    """Type of integration (e.g., 'hubspot', 'github', 'slack')."""
+
+    access_token: str
+    """The access token for API calls."""
+
+    token_type: str = "Bearer"
+    """Token type (usually 'Bearer')."""
+
+    expires_at: datetime | None = None
+    """When the access token expires (UTC)."""
+
+    scopes: list[str] = field(default_factory=list)
+    """OAuth2 scopes granted to this token."""
+
+    metadata: dict[str, Any] = field(default_factory=dict)
+    """Additional integration-specific metadata."""
+
+    @classmethod
+    def from_dict(
+        cls, data: dict[str, Any], integration_id: str | None = None
+    ) -> AdenCredentialResponse:
+        """Create from API response dictionary."""
+        expires_at = None
+        if data.get("expires_at"):
+            expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
+
+        return cls(
+            integration_id=integration_id or data.get("alias", data.get("provider", "")),
+            integration_type=data.get("provider", ""),
+            access_token=data["access_token"],
+            token_type=data.get("token_type", "Bearer"),
+            expires_at=expires_at,
+            scopes=data.get("scopes", []),
+            metadata={"email": data.get("email")} if data.get("email") else {},
+        )
+
+
+@dataclass
+class AdenIntegrationInfo:
+    """Information about an available integration."""
+
+    integration_id: str
+    integration_type: str
+    status: str  # "active", "requires_reauth", "expired"
+    expires_at: datetime | None = None
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> AdenIntegrationInfo:
+        """Create from API response dictionary."""
+        expires_at = None
+        if data.get("expires_at"):
+            expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
+
+        return cls(
+            integration_id=data["integration_id"],
+            integration_type=data.get("provider", data["integration_id"]),
+            status=data.get("status", "unknown"),
+            expires_at=expires_at,
+        )
+
+
+class AdenCredentialClient:
+    """
+    HTTP client for Aden credential server.
+
+    Handles communication with the Aden authentication server,
+    including fetching credentials, requesting refreshes, and
+    reporting usage statistics.
+
+    The client automatically handles:
+    - Retries with exponential backoff for transient failures
+    - Proper error classification (auth, not found, rate limit, etc.)
+    - Request headers for authentication and tenant isolation
+
+    Usage:
+        # API key loaded from ADEN_API_KEY environment variable
+        config = AdenClientConfig(
+            base_url="https://api.adenhq.com",
+        )
+
+        client = AdenCredentialClient(config)
+
+        # Fetch a credential
+        cred = client.get_credential("hubspot")
+        if cred:
+            headers = {"Authorization": f"Bearer {cred.access_token}"}
+
+        # List all integrations
+        integrations = client.list_integrations()
+        for info in integrations:
+            print(f"{info.integration_id}: {info.status}")
+
+        # Clean up
+        client.close()
+    """
+
+    def __init__(self, config: AdenClientConfig):
+        """
+        Initialize the Aden client.
+
+        Args:
+            config: Client configuration including base URL and API key.
+        """
+        self.config = config
+        self._client: httpx.Client | None = None
+
+    def _get_client(self) -> httpx.Client:
+        """Get or create the HTTP client."""
+        if self._client is None:
+            headers = {
+                "Authorization": f"Bearer {self.config.api_key}",
+                "Content-Type": "application/json",
+                "User-Agent": "hive-credential-store/1.0",
+            }
+
+            if self.config.tenant_id:
+                headers["X-Tenant-ID"] = self.config.tenant_id
+
+            self._client = httpx.Client(
+                base_url=self.config.base_url,
+                timeout=self.config.timeout,
+                headers=headers,
+            )
+
+        return self._client
+
+    def _request_with_retry(
+        self,
+        method: str,
+        path: str,
+        **kwargs: Any,
+    ) -> httpx.Response:
+        """Make a request with retry logic."""
+        client = self._get_client()
+        last_error: Exception | None = None
+
+        for attempt in range(self.config.retry_attempts):
+            try:
+                response = client.request(method, path, **kwargs)
+
+                # Handle specific error codes
+                if response.status_code == 401:
+                    raise AdenAuthenticationError("Agent API key is invalid or revoked")
+
+                if response.status_code == 404:
+                    raise AdenNotFoundError(f"Integration not found: {path}")
+
+                if response.status_code == 429:
+                    retry_after = int(response.headers.get("Retry-After", 60))
+                    raise AdenRateLimitError(
+                        "Rate limited by Aden server",
+                        retry_after=retry_after,
+                    )
+
+                if response.status_code == 400:
+                    data = response.json()
+                    if data.get("error") == "refresh_failed":
+                        raise AdenRefreshError(
+                            data.get("message", "Token refresh failed"),
+                            requires_reauthorization=data.get("requires_reauthorization", False),
+                            reauthorization_url=data.get("reauthorization_url"),
+                        )
+
+                # Success or other error
+                response.raise_for_status()
+                return response
+
+            except (httpx.ConnectError, httpx.TimeoutException) as e:
+                last_error = e
+                if attempt < self.config.retry_attempts - 1:
+                    delay = self.config.retry_delay * (2**attempt)
+                    logger.warning(
+                        f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}"
+                    )
+                    time.sleep(delay)
+                else:
+                    raise AdenClientError(f"Failed to connect to Aden server: {e}") from e
+
+            except (
+                AdenAuthenticationError,
+                AdenNotFoundError,
+                AdenRefreshError,
+                AdenRateLimitError,
+            ):
+                # Don't retry these errors
+                raise
+
+        # Should not reach here, but just in case
+        raise AdenClientError(
+            f"Request failed after {self.config.retry_attempts} attempts"
+        ) from last_error
+
+    def get_credential(self, integration_id: str) -> AdenCredentialResponse | None:
+        """
+        Fetch the current credential for an integration.
+
+        The Aden server may refresh the token internally if it's expired
+        before returning it.
+
+        Args:
+            integration_id: The integration identifier (e.g., 'hubspot').
+
+        Returns:
+            Credential response with access token, or None if not found.
+
+        Raises:
+            AdenAuthenticationError: If API key is invalid.
+            AdenClientError: For connection failures.
+        """
+        try:
+            response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}")
+            data = response.json()
+            return AdenCredentialResponse.from_dict(data, integration_id=integration_id)
+        except AdenNotFoundError:
+            return None
+
+    def request_refresh(self, integration_id: str) -> AdenCredentialResponse:
+        """
+        Request the Aden server to refresh the token.
+
+        Use this when the local store detects an expired or near-expiry token.
+        The Aden server handles the actual OAuth2 refresh token flow.
+
+        Args:
+            integration_id: The integration identifier.
+
+        Returns:
+            Credential response with new access token.
+
+        Raises:
+            AdenRefreshError: If refresh fails (may require re-authorization).
+            AdenNotFoundError: If integration not found.
+            AdenAuthenticationError: If API key is invalid.
+            AdenRateLimitError: If rate limited.
+        """
+        response = self._request_with_retry("POST", f"/v1/credentials/{integration_id}/refresh")
+        data = response.json()
+        return AdenCredentialResponse.from_dict(data, integration_id=integration_id)
+
+    def list_integrations(self) -> list[AdenIntegrationInfo]:
+        """
+        List all integrations available for this agent/tenant.
+
+        Returns:
+            List of integration info objects.
+
+        Raises:
+            AdenAuthenticationError: If API key is invalid.
+            AdenClientError: For connection failures.
+        """
+        response = self._request_with_retry("GET", "/v1/credentials")
+        data = response.json()
+        return [AdenIntegrationInfo.from_dict(item) for item in data.get("integrations", [])]
+
+    def validate_token(self, integration_id: str) -> dict[str, Any]:
+        """
+        Check if a token is still valid without fetching it.
+
+        Args:
+            integration_id: The integration identifier.
+
+        Returns:
+            Dict with 'valid' bool and optional 'expires_at', 'reason',
+            'requires_reauthorization', 'reauthorization_url'.
+
+        Raises:
+            AdenNotFoundError: If integration not found.
+            AdenAuthenticationError: If API key is invalid.
+        """
+        response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}/validate")
+        return response.json()
+
+    def report_usage(
+        self,
+        integration_id: str,
+        operation: str,
+        status: str = "success",
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Report credential usage statistics to Aden.
+
+        This is optional and used for analytics/billing.
+
+        Args:
+            integration_id: The integration identifier.
+            operation: Operation name (e.g., 'api_call').
+            status: Operation status ('success', 'error').
+            metadata: Additional operation metadata.
+        """
+        try:
+            self._request_with_retry(
+                "POST",
+                f"/v1/credentials/{integration_id}/usage",
+                json={
+                    "operation": operation,
+                    "status": status,
+                    "timestamp": datetime.utcnow().isoformat() + "Z",
+                    "metadata": metadata or {},
+                },
+            )
+        except Exception as e:
+            # Usage reporting is best-effort, don't fail on errors
+            logger.warning(f"Failed to report usage for '{integration_id}': {e}")
+
+    def health_check(self) -> dict[str, Any]:
+        """
+        Check Aden server health and connectivity.
+
+        Returns:
+            Dict with 'status', 'version', 'timestamp', and optionally 'error'.
+        """
+        try:
+            client = self._get_client()
+            response = client.get("/health")
+            if response.status_code == 200:
+                data = response.json()
+                data["latency_ms"] = response.elapsed.total_seconds() * 1000
+                return data
+            return {
+                "status": "degraded",
+                "error": f"Unexpected status code: {response.status_code}",
+            }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+
+    def close(self) -> None:
+        """Close the HTTP client and release resources."""
+        if self._client:
+            self._client.close()
+            self._client = None
+
+    def __enter__(self) -> AdenCredentialClient:
+        """Context manager entry."""
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        """Context manager exit."""
+        self.close()
@@ -0,0 +1,415 @@
+"""
+Aden Sync Provider.
+
+Provider that synchronizes credentials with the Aden authentication server.
+The Aden server is the authoritative source for OAuth2 tokens - this provider
+fetches and caches tokens locally while delegating refresh operations to Aden.
+
+Usage:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
+    # Configure client (API key loaded from ADEN_API_KEY env var)
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+    ))
+
+    # Create provider
+    provider = AdenSyncProvider(client=client)
+
+    # Create store
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Initial sync from Aden
+    provider.sync_all(store)
+
+    # Use normally - auto-refreshes via Aden when needed
+    token = store.get_key("hubspot", "access_token")
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialRefreshError, CredentialType
+from ..provider import CredentialProvider
+from .client import (
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenRefreshError,
+)
+
+if TYPE_CHECKING:
+    from ..store import CredentialStore
+
+logger = logging.getLogger(__name__)
+
+
+class AdenSyncProvider(CredentialProvider):
+    """
+    Provider that synchronizes credentials with the Aden server.
+
+    The Aden server handles OAuth2 authorization flows and maintains
+    refresh tokens. This provider:
+
+    - Fetches access tokens from the Aden server
+    - Delegates token refresh to the Aden server
+    - Caches tokens locally in the credential store
+    - Optionally reports usage statistics back to Aden
+
+    Key benefits:
+    - Client secrets never leave the Aden server
+    - Refresh token security (stored only on Aden)
+    - Centralized audit logging
+    - Multi-tenant support
+
+    Usage:
+        client = AdenCredentialClient(AdenClientConfig(
+            base_url="https://api.adenhq.com",
+            api_key=os.environ["ADEN_API_KEY"],
+        ))
+
+        provider = AdenSyncProvider(client=client)
+
+        store = CredentialStore(
+            storage=EncryptedFileStorage(),
+            providers=[provider],
+            auto_refresh=True,
+        )
+    """
+
+    def __init__(
+        self,
+        client: AdenCredentialClient,
+        provider_id: str = "aden_sync",
+        refresh_buffer_minutes: int = 5,
+        report_usage: bool = False,
+    ):
+        """
+        Initialize the Aden sync provider.
+
+        Args:
+            client: Configured Aden API client.
+            provider_id: Unique identifier for this provider instance.
+                        Useful for multi-tenant scenarios (e.g., 'aden_tenant_123').
+            refresh_buffer_minutes: Minutes before expiry to trigger refresh.
+                                   Default is 5 minutes.
+            report_usage: Whether to report usage statistics to Aden server.
+        """
+        self._client = client
+        self._provider_id = provider_id
+        self._refresh_buffer = timedelta(minutes=refresh_buffer_minutes)
+        self._report_usage = report_usage
+
+    @property
+    def provider_id(self) -> str:
+        """Unique identifier for this provider."""
+        return self._provider_id
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        """Credential types this provider can manage."""
+        return [CredentialType.OAUTH2, CredentialType.BEARER_TOKEN]
+
+    def can_handle(self, credential: CredentialObject) -> bool:
+        """
+        Check if this provider can handle a credential.
+
+        Returns True if:
+        - Credential type is supported (OAUTH2 or BEARER_TOKEN)
+        - Credential's provider_id matches this provider, OR
+        - Credential has '_aden_managed' metadata flag
+        """
+        if credential.credential_type not in self.supported_types:
+            return False
+
+        # Check if credential is explicitly linked to this provider
+        if credential.provider_id == self.provider_id:
+            return True
+
+        # Check for Aden-managed flag in metadata
+        aden_flag = credential.keys.get("_aden_managed")
+        if aden_flag and aden_flag.value.get_secret_value() == "true":
+            return True
+
+        return False
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Refresh credential by requesting new token from Aden server.
+
+        The Aden server handles the actual OAuth2 refresh token flow.
+        This method simply fetches the result.
+
+        Args:
+            credential: The credential to refresh.
+
+        Returns:
+            Updated credential with new access token.
+
+        Raises:
+            CredentialRefreshError: If refresh fails.
+        """
+        try:
+            # Request Aden to refresh the token
+            aden_response = self._client.request_refresh(credential.id)
+
+            # Update credential with new values
+            credential = self._update_credential_from_aden(credential, aden_response)
+
+            logger.info(f"Refreshed credential '{credential.id}' via Aden server")
+
+            # Report usage if enabled
+            if self._report_usage:
+                self._client.report_usage(
+                    integration_id=credential.id,
+                    operation="token_refresh",
+                    status="success",
+                )
+
+            return credential
+
+        except AdenRefreshError as e:
+            logger.error(f"Aden refresh failed for '{credential.id}': {e}")
+
+            if e.requires_reauthorization:
+                raise CredentialRefreshError(
+                    f"Integration '{credential.id}' requires re-authorization. "
+                    f"Visit: {e.reauthorization_url or 'your Aden dashboard'}"
+                ) from e
+
+            raise CredentialRefreshError(
+                f"Failed to refresh credential '{credential.id}': {e}"
+            ) from e
+
+        except AdenClientError as e:
+            logger.error(f"Aden client error for '{credential.id}': {e}")
+
+            # Check if local token is still valid
+            access_key = credential.keys.get("access_token")
+            if access_key and access_key.expires_at:
+                if datetime.now(UTC) < access_key.expires_at:
+                    logger.warning(f"Aden unavailable, using cached token for '{credential.id}'")
+                    return credential
+
+            raise CredentialRefreshError(
+                f"Aden server unavailable and token expired for '{credential.id}'"
+            ) from e
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate credential via Aden server introspection.
+
+        Args:
+            credential: The credential to validate.
+
+        Returns:
+            True if credential is valid.
+        """
+        try:
+            result = self._client.validate_token(credential.id)
+            return result.get("valid", False)
+        except AdenClientError:
+            # Fall back to local validation
+            access_key = credential.keys.get("access_token")
+            if access_key is None:
+                return False
+
+            if access_key.expires_at is None:
+                # No expiration - assume valid
+                return True
+
+            return datetime.now(UTC) < access_key.expires_at
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """
+        Check if credential should be refreshed.
+
+        Returns True if access_token is expired or within the refresh buffer.
+
+        Args:
+            credential: The credential to check.
+
+        Returns:
+            True if credential should be refreshed.
+        """
+        access_key = credential.keys.get("access_token")
+        if access_key is None:
+            return False
+
+        if access_key.expires_at is None:
+            return False
+
+        # Refresh if within buffer of expiration
+        return datetime.now(UTC) >= (access_key.expires_at - self._refresh_buffer)
+
+    def fetch_from_aden(self, integration_id: str) -> CredentialObject | None:
+        """
+        Fetch credential directly from Aden server.
+
+        Use this for initial population or when local cache is missing.
+
+        Args:
+            integration_id: The integration identifier (e.g., 'hubspot').
+
+        Returns:
+            CredentialObject if found, None otherwise.
+
+        Raises:
+            AdenClientError: For connection failures.
+        """
+        aden_response = self._client.get_credential(integration_id)
+        if aden_response is None:
+            return None
+
+        return self._aden_response_to_credential(aden_response)
+
+    def sync_all(self, store: CredentialStore) -> int:
+        """
+        Sync all credentials from Aden server to local store.
+
+        Fetches the list of available integrations from Aden and
+        populates the local credential store with current tokens.
+
+        Args:
+            store: The credential store to populate.
+
+        Returns:
+            Number of credentials synced.
+        """
+        synced = 0
+
+        try:
+            integrations = self._client.list_integrations()
+
+            for info in integrations:
+                if info.status != "active":
+                    logger.warning(
+                        f"Skipping integration '{info.integration_id}': status={info.status}"
+                    )
+                    continue
+
+                try:
+                    cred = self.fetch_from_aden(info.integration_id)
+                    if cred:
+                        store.save_credential(cred)
+                        synced += 1
+                        logger.info(f"Synced credential '{info.integration_id}' from Aden")
+                except Exception as e:
+                    logger.warning(f"Failed to sync '{info.integration_id}': {e}")
+
+        except AdenClientError as e:
+            logger.error(f"Failed to list integrations from Aden: {e}")
+
+        return synced
+
+    def report_credential_usage(
+        self,
+        credential: CredentialObject,
+        operation: str,
+        status: str = "success",
+        metadata: dict | None = None,
+    ) -> None:
+        """
+        Report credential usage to Aden server.
+
+        Args:
+            credential: The credential that was used.
+            operation: Operation name (e.g., 'api_call').
+            status: Operation status ('success', 'error').
+            metadata: Additional metadata.
+        """
+        if self._report_usage:
+            self._client.report_usage(
+                integration_id=credential.id,
+                operation=operation,
+                status=status,
+                metadata=metadata or {},
+            )
+
+    def _update_credential_from_aden(
+        self,
+        credential: CredentialObject,
+        aden_response: AdenCredentialResponse,
+    ) -> CredentialObject:
+        """Update credential object from Aden response."""
+        # Update access token
+        credential.keys["access_token"] = CredentialKey(
+            name="access_token",
+            value=SecretStr(aden_response.access_token),
+            expires_at=aden_response.expires_at,
+        )
+
+        # Update scopes if present
+        if aden_response.scopes:
+            credential.keys["scope"] = CredentialKey(
+                name="scope",
+                value=SecretStr(" ".join(aden_response.scopes)),
+            )
+
+        # Mark as Aden-managed
+        credential.keys["_aden_managed"] = CredentialKey(
+            name="_aden_managed",
+            value=SecretStr("true"),
+        )
+
+        # Store integration type
+        credential.keys["_integration_type"] = CredentialKey(
+            name="_integration_type",
+            value=SecretStr(aden_response.integration_type),
+        )
+
+        # Update timestamps
+        credential.last_refreshed = datetime.now(UTC)
+        credential.provider_id = self.provider_id
+
+        return credential
+
+    def _aden_response_to_credential(
+        self,
+        aden_response: AdenCredentialResponse,
+    ) -> CredentialObject:
+        """Convert Aden response to CredentialObject."""
+        keys: dict[str, CredentialKey] = {
+            "access_token": CredentialKey(
+                name="access_token",
+                value=SecretStr(aden_response.access_token),
+                expires_at=aden_response.expires_at,
+            ),
+            "_aden_managed": CredentialKey(
+                name="_aden_managed",
+                value=SecretStr("true"),
+            ),
+            "_integration_type": CredentialKey(
+                name="_integration_type",
+                value=SecretStr(aden_response.integration_type),
+            ),
+        }
+
+        if aden_response.scopes:
+            keys["scope"] = CredentialKey(
+                name="scope",
+                value=SecretStr(" ".join(aden_response.scopes)),
+            )
+
+        return CredentialObject(
+            id=aden_response.integration_id,
+            credential_type=CredentialType.OAUTH2,
+            keys=keys,
+            provider_id=self.provider_id,
+            auto_refresh=True,
+        )
@@ -0,0 +1,389 @@
+"""
+Aden Cached Storage.
+
+Storage backend that combines local cache with Aden server fallback.
+Provides offline resilience by caching credentials locally while
+keeping them synchronized with the Aden server.
+
+Usage:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+        AdenCachedStorage,
+    )
+
+    # Configure
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+        api_key=os.environ["ADEN_API_KEY"],
+    ))
+    provider = AdenSyncProvider(client=client)
+
+    # Create cached storage
+    storage = AdenCachedStorage(
+        local_storage=EncryptedFileStorage(),
+        aden_provider=provider,
+        cache_ttl_seconds=300,  # Re-check Aden every 5 minutes
+    )
+
+    # Create store
+    store = CredentialStore(
+        storage=storage,
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Credentials automatically fetched from Aden on first access
+    # Cached locally for 5 minutes
+    # Falls back to cache if Aden is unreachable
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from ..storage import CredentialStorage
+
+if TYPE_CHECKING:
+    from ..models import CredentialObject
+    from .provider import AdenSyncProvider
+
+logger = logging.getLogger(__name__)
+
+
+class AdenCachedStorage(CredentialStorage):
+    """
+    Storage with local cache and Aden server fallback.
+
+    This storage provides:
+    - **Reads**: Try local cache first, fallback to Aden if stale/missing
+    - **Writes**: Always write to local cache
+    - **Offline resilience**: Uses cached credentials when Aden is unreachable
+    - **Provider-based lookup**: Match credentials by provider name (e.g., "hubspot")
+      when direct ID lookup fails, since Aden uses hash-based IDs internally.
+
+    The cache TTL determines how long to trust local credentials before
+    checking with the Aden server for updates. This balances:
+    - Performance (fewer network calls)
+    - Freshness (tokens stay current)
+    - Resilience (works during brief outages)
+
+    Usage:
+        storage = AdenCachedStorage(
+            local_storage=EncryptedFileStorage(),
+            aden_provider=provider,
+            cache_ttl_seconds=300,  # 5 minutes
+        )
+
+        store = CredentialStore(
+            storage=storage,
+            providers=[provider],
+        )
+
+        # First access fetches from Aden
+        # Subsequent accesses use cache until TTL expires
+        # Can look up by provider name OR credential ID
+        token = store.get_key("hubspot", "access_token")
+    """
+
+    def __init__(
+        self,
+        local_storage: CredentialStorage,
+        aden_provider: AdenSyncProvider,
+        cache_ttl_seconds: int = 300,
+        prefer_local: bool = True,
+    ):
+        """
+        Initialize Aden-cached storage.
+
+        Args:
+            local_storage: Local storage backend for caching (e.g., EncryptedFileStorage).
+            aden_provider: Provider for fetching from Aden server.
+            cache_ttl_seconds: How long to trust local cache before checking Aden.
+                              Default is 300 seconds (5 minutes).
+            prefer_local: If True, use local cache when available and fresh.
+                         If False, always check Aden first.
+        """
+        self._local = local_storage
+        self._aden_provider = aden_provider
+        self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
+        self._prefer_local = prefer_local
+        self._cache_timestamps: dict[str, datetime] = {}
+        # Index: provider name (e.g., "hubspot") -> credential hash ID
+        self._provider_index: dict[str, str] = {}
+
+    def save(self, credential: CredentialObject) -> None:
+        """
+        Save credential to local cache and update provider index.
+
+        Args:
+            credential: The credential to save.
+        """
+        self._local.save(credential)
+        self._cache_timestamps[credential.id] = datetime.now(UTC)
+        self._index_provider(credential)
+        logger.debug(f"Cached credential '{credential.id}'")
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """
+        Load credential from cache, with Aden fallback and provider-based lookup.
+
+        The loading strategy depends on the `prefer_local` setting:
+
+        If prefer_local=True (default):
+        1. Check if local cache exists and is fresh (within TTL)
+        2. If fresh, return cached credential
+        3. If stale or missing, fetch from Aden
+        4. Update local cache with Aden response
+        5. If Aden fails, fall back to stale cache
+
+        If prefer_local=False:
+        1. Always try to fetch from Aden first
+        2. Update local cache with response
+        3. Fall back to local cache only if Aden fails
+
+        Provider-based lookup:
+        When a provider index mapping exists for the credential_id (e.g.,
+        "hubspot" → hash ID), the Aden-synced credential is loaded first.
+        This ensures fresh OAuth tokens from Aden take priority over stale
+        local credentials (env vars, old encrypted files).
+
+        Args:
+            credential_id: The credential identifier or provider name.
+
+        Returns:
+            CredentialObject if found, None otherwise.
+        """
+        # Check provider index first — Aden-synced credentials take priority
+        resolved_id = self._provider_index.get(credential_id)
+        if resolved_id and resolved_id != credential_id:
+            result = self._load_by_id(resolved_id)
+            if result is not None:
+                logger.info(
+                    f"Loaded credential '{credential_id}' via provider index (id='{resolved_id}')"
+                )
+                return result
+
+        # Direct lookup (exact credential_id match)
+        return self._load_by_id(credential_id)
+
+    def _load_by_id(self, credential_id: str) -> CredentialObject | None:
+        """
+        Load credential by exact ID from cache, with Aden fallback.
+
+        Args:
+            credential_id: The exact credential identifier.
+
+        Returns:
+            CredentialObject if found, None otherwise.
+        """
+        local_cred = self._local.load(credential_id)
+
+        # If we prefer local and have a fresh cache, use it
+        if self._prefer_local and local_cred and self._is_cache_fresh(credential_id):
+            logger.debug(f"Using cached credential '{credential_id}'")
+            return local_cred
+
+        # Try to fetch from Aden
+        try:
+            aden_cred = self._aden_provider.fetch_from_aden(credential_id)
+            if aden_cred:
+                # Update local cache
+                self.save(aden_cred)
+                logger.debug(f"Fetched credential '{credential_id}' from Aden")
+                return aden_cred
+        except Exception as e:
+            logger.warning(f"Failed to fetch '{credential_id}' from Aden: {e}")
+
+            # Fall back to local cache if Aden fails
+            if local_cred:
+                logger.info(f"Using stale cached credential '{credential_id}'")
+                return local_cred
+
+        # Return local credential if it exists (may be None)
+        return local_cred
+
+    def delete(self, credential_id: str) -> bool:
+        """
+        Delete credential from local cache.
+
+        Note: This does NOT delete the credential from the Aden server.
+        It only removes the local cache entry.
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            True if credential existed and was deleted.
+        """
+        self._cache_timestamps.pop(credential_id, None)
+        return self._local.delete(credential_id)
+
+    def list_all(self) -> list[str]:
+        """
+        List credentials from local cache.
+
+        Returns:
+            List of credential IDs in local cache.
+        """
+        return self._local.list_all()
+
+    def exists(self, credential_id: str) -> bool:
+        """
+        Check if credential exists in local cache (by ID or provider name).
+
+        Args:
+            credential_id: The credential identifier or provider name.
+
+        Returns:
+            True if credential exists locally.
+        """
+        if self._local.exists(credential_id):
+            return True
+        # Check provider index
+        resolved_id = self._provider_index.get(credential_id)
+        if resolved_id and resolved_id != credential_id:
+            return self._local.exists(resolved_id)
+        return False
+
+    def _is_cache_fresh(self, credential_id: str) -> bool:
+        """
+        Check if local cache is still fresh (within TTL).
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            True if cache is fresh, False if stale or not cached.
+        """
+        cached_at = self._cache_timestamps.get(credential_id)
+        if cached_at is None:
+            return False
+        return datetime.now(UTC) - cached_at < self._cache_ttl
+
+    def invalidate_cache(self, credential_id: str) -> None:
+        """
+        Invalidate cache for a specific credential.
+
+        The next load() call will fetch from Aden regardless of TTL.
+
+        Args:
+            credential_id: The credential identifier.
+        """
+        self._cache_timestamps.pop(credential_id, None)
+        logger.debug(f"Invalidated cache for '{credential_id}'")
+
+    def invalidate_all(self) -> None:
+        """Invalidate all cache entries."""
+        self._cache_timestamps.clear()
+        logger.debug("Invalidated all cache entries")
+
+    def _index_provider(self, credential: CredentialObject) -> None:
+        """
+        Index a credential by its provider/integration type.
+
+        Aden credentials carry an ``_integration_type`` key whose value is
+        the provider name (e.g., ``hubspot``).  This method maps that
+        provider name to the credential's hash ID so that subsequent
+        ``load("hubspot")`` calls resolve to the correct credential.
+
+        Args:
+            credential: The credential to index.
+        """
+        integration_type_key = credential.keys.get("_integration_type")
+        if integration_type_key is None:
+            return
+        provider_name = integration_type_key.value.get_secret_value()
+        if provider_name:
+            self._provider_index[provider_name] = credential.id
+            logger.debug(f"Indexed provider '{provider_name}' -> '{credential.id}'")
+
+    def rebuild_provider_index(self) -> int:
+        """
+        Rebuild the provider index from all locally cached credentials.
+
+        Useful after loading from disk when the in-memory index is empty.
+
+        Returns:
+            Number of provider mappings indexed.
+        """
+        self._provider_index.clear()
+        indexed = 0
+        for cred_id in self._local.list_all():
+            cred = self._local.load(cred_id)
+            if cred:
+                before = len(self._provider_index)
+                self._index_provider(cred)
+                if len(self._provider_index) > before:
+                    indexed += 1
+        logger.debug(f"Rebuilt provider index with {indexed} mappings")
+        return indexed
+
+    def sync_all_from_aden(self) -> int:
+        """
+        Sync all credentials from Aden server to local cache.
+
+        Fetches the list of available integrations from Aden and
+        updates the local cache with current tokens.
+
+        Returns:
+            Number of credentials synced.
+        """
+        synced = 0
+
+        try:
+            integrations = self._aden_provider._client.list_integrations()
+
+            for info in integrations:
+                if info.status != "active":
+                    logger.warning(
+                        f"Skipping integration '{info.integration_id}': status={info.status}"
+                    )
+                    continue
+
+                try:
+                    cred = self._aden_provider.fetch_from_aden(info.integration_id)
+                    if cred:
+                        self.save(cred)
+                        synced += 1
+                        logger.info(f"Synced credential '{info.integration_id}' from Aden")
+                except Exception as e:
+                    logger.warning(f"Failed to sync '{info.integration_id}': {e}")
+
+        except Exception as e:
+            logger.error(f"Failed to list integrations from Aden: {e}")
+
+        return synced
+
+    def get_cache_info(self) -> dict[str, dict]:
+        """
+        Get cache status information for all credentials.
+
+        Returns:
+            Dict mapping credential_id to cache info (cached_at, is_fresh, ttl_remaining).
+        """
+        now = datetime.now(UTC)
+        info = {}
+
+        for cred_id in self.list_all():
+            cached_at = self._cache_timestamps.get(cred_id)
+            if cached_at:
+                ttl_remaining = (cached_at + self._cache_ttl - now).total_seconds()
+                info[cred_id] = {
+                    "cached_at": cached_at.isoformat(),
+                    "is_fresh": ttl_remaining > 0,
+                    "ttl_remaining_seconds": max(0, ttl_remaining),
+                }
+            else:
+                info[cred_id] = {
+                    "cached_at": None,
+                    "is_fresh": False,
+                    "ttl_remaining_seconds": 0,
+                }
+
+        return info
@@ -0,0 +1 @@
+"""Tests for Aden credential sync components."""
@@ -0,0 +1,813 @@
+"""
+Tests for Aden credential sync components.
+
+Tests cover:
+- AdenCredentialClient: HTTP client for Aden API
+- AdenSyncProvider: Provider that syncs with Aden
+- AdenCachedStorage: Storage with local cache + Aden fallback
+"""
+
+from datetime import UTC, datetime, timedelta
+from unittest.mock import Mock
+
+import pytest
+from pydantic import SecretStr
+
+from framework.credentials import (
+    CredentialKey,
+    CredentialObject,
+    CredentialStore,
+    CredentialType,
+    InMemoryStorage,
+)
+from framework.credentials.aden import (
+    AdenCachedStorage,
+    AdenClientConfig,
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenIntegrationInfo,
+    AdenRefreshError,
+    AdenSyncProvider,
+)
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def aden_config():
+    """Create a test Aden client config."""
+    return AdenClientConfig(
+        base_url="https://api.test-aden.com",
+        api_key="test-api-key",
+        tenant_id="test-tenant",
+        timeout=5.0,
+        retry_attempts=2,
+        retry_delay=0.1,
+    )
+
+
+@pytest.fixture
+def mock_client(aden_config):
+    """Create a mock Aden client."""
+    client = Mock(spec=AdenCredentialClient)
+    client.config = aden_config
+    return client
+
+
+@pytest.fixture
+def aden_response():
+    """Create a sample Aden credential response."""
+    return AdenCredentialResponse(
+        integration_id="hubspot",
+        integration_type="hubspot",
+        access_token="test-access-token",
+        token_type="Bearer",
+        expires_at=datetime.now(UTC) + timedelta(hours=1),
+        scopes=["crm.objects.contacts.read", "crm.objects.contacts.write"],
+        metadata={"portal_id": "12345"},
+    )
+
+
+@pytest.fixture
+def provider(mock_client):
+    """Create an AdenSyncProvider with mock client."""
+    return AdenSyncProvider(
+        client=mock_client,
+        provider_id="test_aden",
+        refresh_buffer_minutes=5,
+        report_usage=False,
+    )
+
+
+@pytest.fixture
+def local_storage():
+    """Create an in-memory storage for testing."""
+    return InMemoryStorage()
+
+
+@pytest.fixture
+def cached_storage(local_storage, provider):
+    """Create an AdenCachedStorage for testing."""
+    return AdenCachedStorage(
+        local_storage=local_storage,
+        aden_provider=provider,
+        cache_ttl_seconds=60,
+        prefer_local=True,
+    )
+
+
+# =============================================================================
+# AdenCredentialResponse Tests
+# =============================================================================
+
+
+class TestAdenCredentialResponse:
+    """Tests for AdenCredentialResponse dataclass."""
+
+    def test_from_dict_basic(self):
+        """Test creating response from dict."""
+        data = {
+            "integration_id": "github",
+            "integration_type": "github",
+            "access_token": "ghp_xxxxx",
+        }
+
+        response = AdenCredentialResponse.from_dict(data)
+
+        assert response.integration_id == "github"
+        assert response.integration_type == "github"
+        assert response.access_token == "ghp_xxxxx"
+        assert response.token_type == "Bearer"
+        assert response.expires_at is None
+        assert response.scopes == []
+
+    def test_from_dict_full(self):
+        """Test creating response with all fields."""
+        data = {
+            "integration_id": "hubspot",
+            "integration_type": "hubspot",
+            "access_token": "token123",
+            "token_type": "Bearer",
+            "expires_at": "2026-01-28T15:30:00Z",
+            "scopes": ["read", "write"],
+            "metadata": {"key": "value"},
+        }
+
+        response = AdenCredentialResponse.from_dict(data)
+
+        assert response.integration_id == "hubspot"
+        assert response.access_token == "token123"
+        assert response.expires_at is not None
+        assert response.scopes == ["read", "write"]
+        assert response.metadata == {"key": "value"}
+
+
+class TestAdenIntegrationInfo:
+    """Tests for AdenIntegrationInfo dataclass."""
+
+    def test_from_dict(self):
+        """Test creating integration info from dict."""
+        data = {
+            "integration_id": "slack",
+            "integration_type": "slack",
+            "status": "active",
+            "expires_at": "2026-02-01T00:00:00Z",
+        }
+
+        info = AdenIntegrationInfo.from_dict(data)
+
+        assert info.integration_id == "slack"
+        assert info.integration_type == "slack"
+        assert info.status == "active"
+        assert info.expires_at is not None
+
+
+# =============================================================================
+# AdenSyncProvider Tests
+# =============================================================================
+
+
+class TestAdenSyncProvider:
+    """Tests for AdenSyncProvider."""
+
+    def test_provider_id(self, provider):
+        """Test provider ID."""
+        assert provider.provider_id == "test_aden"
+
+    def test_supported_types(self, provider):
+        """Test supported credential types."""
+        assert CredentialType.OAUTH2 in provider.supported_types
+        assert CredentialType.BEARER_TOKEN in provider.supported_types
+
+    def test_can_handle_oauth2(self, provider):
+        """Test can_handle returns True for OAUTH2 credentials with matching provider_id."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+            provider_id="test_aden",
+        )
+
+        assert provider.can_handle(cred) is True
+
+    def test_can_handle_aden_managed(self, provider):
+        """Test can_handle returns True for Aden-managed credentials."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "_aden_managed": CredentialKey(
+                    name="_aden_managed",
+                    value=SecretStr("true"),
+                )
+            },
+        )
+
+        assert provider.can_handle(cred) is True
+
+    def test_can_handle_wrong_type(self, provider):
+        """Test can_handle returns False for unsupported types."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.API_KEY,
+            keys={},
+        )
+
+        assert provider.can_handle(cred) is False
+
+    def test_refresh_success(self, provider, mock_client, aden_response):
+        """Test successful credential refresh."""
+        mock_client.request_refresh.return_value = aden_response
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("old-token"),
+                )
+            },
+            provider_id="test_aden",
+        )
+
+        refreshed = provider.refresh(cred)
+
+        assert refreshed.keys["access_token"].value.get_secret_value() == "test-access-token"
+        assert refreshed.keys["_aden_managed"].value.get_secret_value() == "true"
+        assert refreshed.last_refreshed is not None
+        mock_client.request_refresh.assert_called_once_with("hubspot")
+
+    def test_refresh_requires_reauth(self, provider, mock_client):
+        """Test refresh that requires re-authorization."""
+        mock_client.request_refresh.side_effect = AdenRefreshError(
+            "Token revoked",
+            requires_reauthorization=True,
+            reauthorization_url="https://aden.com/reauth",
+        )
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+
+        from framework.credentials import CredentialRefreshError
+
+        with pytest.raises(CredentialRefreshError) as exc_info:
+            provider.refresh(cred)
+
+        assert "re-authorization" in str(exc_info.value).lower()
+
+    def test_refresh_aden_unavailable_cached_valid(self, provider, mock_client):
+        """Test refresh falls back to cache when Aden is unavailable and token is valid."""
+        mock_client.request_refresh.side_effect = AdenClientError("Connection failed")
+
+        # Token expires in 1 hour - still valid
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("cached-token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        # Should return the cached credential instead of failing
+        result = provider.refresh(cred)
+
+        assert result.keys["access_token"].value.get_secret_value() == "cached-token"
+
+    def test_should_refresh_expired(self, provider):
+        """Test should_refresh returns True for expired token."""
+        past = datetime.now(UTC) - timedelta(hours=1)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=past,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is True
+
+    def test_should_refresh_within_buffer(self, provider):
+        """Test should_refresh returns True when within buffer."""
+        # Expires in 3 minutes (buffer is 5 minutes)
+        soon = datetime.now(UTC) + timedelta(minutes=3)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=soon,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is True
+
+    def test_should_refresh_still_valid(self, provider):
+        """Test should_refresh returns False for valid token."""
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is False
+
+    def test_fetch_from_aden(self, provider, mock_client, aden_response):
+        """Test fetching credential from Aden."""
+        mock_client.get_credential.return_value = aden_response
+
+        cred = provider.fetch_from_aden("hubspot")
+
+        assert cred is not None
+        assert cred.id == "hubspot"
+        assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
+        assert cred.auto_refresh is True
+
+    def test_fetch_from_aden_not_found(self, provider, mock_client):
+        """Test fetch returns None when not found."""
+        mock_client.get_credential.return_value = None
+
+        cred = provider.fetch_from_aden("nonexistent")
+
+        assert cred is None
+
+    def test_sync_all(self, provider, mock_client, aden_response):
+        """Test syncing all credentials."""
+        mock_client.list_integrations.return_value = [
+            AdenIntegrationInfo(
+                integration_id="hubspot",
+                integration_type="hubspot",
+                status="active",
+            ),
+            AdenIntegrationInfo(
+                integration_id="github",
+                integration_type="github",
+                status="requires_reauth",  # Should be skipped
+            ),
+        ]
+        mock_client.get_credential.return_value = aden_response
+
+        store = CredentialStore(storage=InMemoryStorage())
+        synced = provider.sync_all(store)
+
+        assert synced == 1  # Only active one was synced
+        assert store.get_credential("hubspot") is not None
+
+    def test_validate_via_aden(self, provider, mock_client):
+        """Test validation via Aden introspection."""
+        mock_client.validate_token.return_value = {"valid": True}
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+
+        assert provider.validate(cred) is True
+
+    def test_validate_fallback_to_local(self, provider, mock_client):
+        """Test validation falls back to local check when Aden fails."""
+        mock_client.validate_token.side_effect = AdenClientError("Failed")
+
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        assert provider.validate(cred) is True
+
+
+# =============================================================================
+# AdenCachedStorage Tests
+# =============================================================================
+
+
+class TestAdenCachedStorage:
+    """Tests for AdenCachedStorage."""
+
+    def test_save_updates_cache_timestamp(self, cached_storage):
+        """Test save updates cache timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                )
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert "test" in cached_storage._cache_timestamps
+        assert cached_storage.exists("test")
+
+    def test_load_from_fresh_cache(self, cached_storage, local_storage):
+        """Test load returns cached credential when fresh."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("cached-token"),
+                )
+            },
+        )
+
+        # Save to both local storage and update timestamp
+        local_storage.save(cred)
+        cached_storage._cache_timestamps["test"] = datetime.now(UTC)
+
+        loaded = cached_storage.load("test")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "cached-token"
+
+    def test_load_from_aden_when_stale(
+        self, cached_storage, local_storage, provider, mock_client, aden_response
+    ):
+        """Test load fetches from Aden when cache is stale."""
+        # Create stale cached credential
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("stale-token"),
+                )
+            },
+        )
+        local_storage.save(cred)
+
+        # Set cache timestamp to be stale (2 minutes ago, TTL is 60 seconds)
+        cached_storage._cache_timestamps["hubspot"] = datetime.now(UTC) - timedelta(minutes=2)
+
+        # Mock Aden response
+        mock_client.get_credential.return_value = aden_response
+
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "test-access-token"
+
+    def test_load_falls_back_to_stale_when_aden_fails(
+        self, cached_storage, local_storage, provider, mock_client
+    ):
+        """Test load falls back to stale cache when Aden fails."""
+        # Create stale cached credential
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("stale-token"),
+                )
+            },
+        )
+        local_storage.save(cred)
+        cached_storage._cache_timestamps["hubspot"] = datetime.now(UTC) - timedelta(minutes=2)
+
+        # Aden fails
+        mock_client.get_credential.side_effect = AdenClientError("Connection failed")
+
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "stale-token"
+
+    def test_delete_removes_cache_timestamp(self, cached_storage, local_storage):
+        """Test delete removes cache timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+        cached_storage.save(cred)
+
+        assert "test" in cached_storage._cache_timestamps
+
+        cached_storage.delete("test")
+
+        assert "test" not in cached_storage._cache_timestamps
+        assert not cached_storage.exists("test")
+
+    def test_invalidate_cache(self, cached_storage, local_storage):
+        """Test invalidate_cache removes timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+        cached_storage.save(cred)
+
+        cached_storage.invalidate_cache("test")
+
+        assert "test" not in cached_storage._cache_timestamps
+        # Credential still exists in local storage
+        assert local_storage.exists("test")
+
+    def test_invalidate_all(self, cached_storage):
+        """Test invalidate_all clears all timestamps."""
+        for i in range(3):
+            cached_storage._cache_timestamps[f"test_{i}"] = datetime.now(UTC)
+
+        cached_storage.invalidate_all()
+
+        assert len(cached_storage._cache_timestamps) == 0
+
+    def test_is_cache_fresh(self, cached_storage):
+        """Test _is_cache_fresh logic."""
+        # Fresh cache
+        cached_storage._cache_timestamps["fresh"] = datetime.now(UTC)
+        assert cached_storage._is_cache_fresh("fresh") is True
+
+        # Stale cache
+        cached_storage._cache_timestamps["stale"] = datetime.now(UTC) - timedelta(minutes=5)
+        assert cached_storage._is_cache_fresh("stale") is False
+
+        # No cache
+        assert cached_storage._is_cache_fresh("nonexistent") is False
+
+    def test_get_cache_info(self, cached_storage, local_storage):
+        """Test get_cache_info returns status for all credentials."""
+        # Add some credentials
+        for name in ["fresh", "stale"]:
+            cred = CredentialObject(
+                id=name,
+                credential_type=CredentialType.OAUTH2,
+                keys={},
+            )
+            local_storage.save(cred)
+
+        cached_storage._cache_timestamps["fresh"] = datetime.now(UTC)
+        cached_storage._cache_timestamps["stale"] = datetime.now(UTC) - timedelta(minutes=5)
+
+        info = cached_storage.get_cache_info()
+
+        assert "fresh" in info
+        assert info["fresh"]["is_fresh"] is True
+        assert info["fresh"]["ttl_remaining_seconds"] > 0
+
+        assert "stale" in info
+        assert info["stale"]["is_fresh"] is False
+        assert info["stale"]["ttl_remaining_seconds"] == 0
+
+    def test_save_indexes_provider(self, cached_storage):
+        """Test save builds the provider index from _integration_type key."""
+        cred = CredentialObject(
+            id="aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token-value"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert cached_storage._provider_index["hubspot"] == "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+
+    def test_load_by_provider_name(self, cached_storage):
+        """Test load resolves provider name to hash-based credential ID."""
+        hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("hubspot-token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        # Save builds the index
+        cached_storage.save(cred)
+
+        # Load by provider name should resolve to the hash ID
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.id == hash_id
+        assert loaded.keys["access_token"].value.get_secret_value() == "hubspot-token"
+
+    def test_load_by_direct_id_still_works(self, cached_storage):
+        """Test load by direct hash ID still works as before."""
+        hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("hubspot"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        # Direct ID lookup should still work
+        loaded = cached_storage.load(hash_id)
+
+        assert loaded is not None
+        assert loaded.id == hash_id
+
+    def test_exists_by_provider_name(self, cached_storage):
+        """Test exists resolves provider name to hash-based credential ID."""
+        hash_id = "c2xhY2s6dGVzdDo5OTk="
+        cred = CredentialObject(
+            id=hash_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("slack-token"),
+                ),
+                "_integration_type": CredentialKey(
+                    name="_integration_type",
+                    value=SecretStr("slack"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert cached_storage.exists("slack") is True
+        assert cached_storage.exists(hash_id) is True
+        assert cached_storage.exists("nonexistent") is False
+
+    def test_rebuild_provider_index(self, cached_storage, local_storage):
+        """Test rebuild_provider_index reconstructs from local storage."""
+        # Manually save credentials to local storage (bypassing cached_storage.save)
+        for provider_name, hash_id in [("hubspot", "hash_hub"), ("slack", "hash_slack")]:
+            cred = CredentialObject(
+                id=hash_id,
+                credential_type=CredentialType.OAUTH2,
+                keys={
+                    "_integration_type": CredentialKey(
+                        name="_integration_type",
+                        value=SecretStr(provider_name),
+                    ),
+                },
+            )
+            local_storage.save(cred)
+
+        # Index should be empty (we bypassed save)
+        assert len(cached_storage._provider_index) == 0
+
+        # Rebuild
+        indexed = cached_storage.rebuild_provider_index()
+
+        assert indexed == 2
+        assert cached_storage._provider_index["hubspot"] == "hash_hub"
+        assert cached_storage._provider_index["slack"] == "hash_slack"
+
+    def test_save_without_integration_type_no_index(self, cached_storage):
+        """Test save does not index credentials without _integration_type key."""
+        cred = CredentialObject(
+            id="plain-cred",
+            credential_type=CredentialType.API_KEY,
+            keys={
+                "api_key": CredentialKey(
+                    name="api_key",
+                    value=SecretStr("key-value"),
+                ),
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert "plain-cred" not in cached_storage._provider_index
+        assert len(cached_storage._provider_index) == 0
+
+
+# =============================================================================
+# Integration Tests
+# =============================================================================
+
+
+class TestAdenIntegration:
+    """Integration tests for Aden sync components."""
+
+    def test_full_workflow(self, mock_client, aden_response):
+        """Test full workflow: sync, get, refresh."""
+        # Setup
+        mock_client.list_integrations.return_value = [
+            AdenIntegrationInfo(
+                integration_id="hubspot",
+                integration_type="hubspot",
+                status="active",
+            ),
+        ]
+        mock_client.get_credential.return_value = aden_response
+        mock_client.request_refresh.return_value = AdenCredentialResponse(
+            integration_id="hubspot",
+            integration_type="hubspot",
+            access_token="refreshed-token",
+            expires_at=datetime.now(UTC) + timedelta(hours=2),
+            scopes=[],
+        )
+
+        provider = AdenSyncProvider(client=mock_client)
+        storage = InMemoryStorage()
+        store = CredentialStore(
+            storage=storage,
+            providers=[provider],
+            auto_refresh=True,
+        )
+
+        # Initial sync
+        synced = provider.sync_all(store)
+        assert synced == 1
+
+        # Get credential
+        cred = store.get_credential("hubspot")
+        assert cred is not None
+        assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
+
+        # Simulate expiration
+        cred.keys["access_token"] = CredentialKey(
+            name="access_token",
+            value=SecretStr("test-access-token"),
+            expires_at=datetime.now(UTC) - timedelta(hours=1),  # Expired
+        )
+        storage.save(cred)
+
+        # Refresh should be triggered
+        refreshed = provider.refresh(cred)
+        assert refreshed.keys["access_token"].value.get_secret_value() == "refreshed-token"
+
+    def test_cached_storage_with_store(self, mock_client, aden_response):
+        """Test AdenCachedStorage with CredentialStore."""
+        mock_client.get_credential.return_value = aden_response
+
+        provider = AdenSyncProvider(client=mock_client)
+        local_storage = InMemoryStorage()
+        cached_storage = AdenCachedStorage(
+            local_storage=local_storage,
+            aden_provider=provider,
+            cache_ttl_seconds=300,
+        )
+
+        # First load fetches from Aden
+        cred = cached_storage.load("hubspot")
+        assert cred is not None
+        mock_client.get_credential.assert_called_once()
+
+        # Second load uses cache
+        mock_client.get_credential.reset_mock()
+        cred2 = cached_storage.load("hubspot")
+        assert cred2 is not None
+        mock_client.get_credential.assert_not_called()
@@ -8,7 +8,7 @@ containing one or more keys (e.g., api_key, access_token, refresh_token).
 from __future__ import annotations

 from datetime import UTC, datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, SecretStr
@@ -19,7 +19,7 @@ def _utc_now() -> datetime:
    return datetime.now(UTC)


-class CredentialType(str, Enum):
+class CredentialType(StrEnum):
    """Types of credentials the store can manage."""

    API_KEY = "api_key"
@@ -20,8 +20,7 @@ Quick Start:

    # Create store with OAuth2 provider
    store = CredentialStore.with_encrypted_storage(
-        "/var/hive/credentials",
-        providers=[provider]
+        providers=[provider]  # defaults to ~/.hive/credentials
    )

    # Get token using client credentials
@@ -64,6 +63,7 @@ For advanced lifecycle management:
 """

 from .base_provider import BaseOAuth2Provider
+from .hubspot_provider import HubSpotOAuth2Provider
 from .lifecycle import TokenLifecycleManager, TokenRefreshResult
 from .provider import (
    OAuth2Config,
@@ -79,8 +79,9 @@ __all__ = [
    "OAuth2Token",
    "OAuth2Config",
    "TokenPlacement",
-    # Provider
+    # Providers
    "BaseOAuth2Provider",
+    "HubSpotOAuth2Provider",
    # Lifecycle
    "TokenLifecycleManager",
    "TokenRefreshResult",
@@ -0,0 +1,112 @@
+"""
+HubSpot-specific OAuth2 provider.
+
+Pre-configured for HubSpot's OAuth2 endpoints and CRM scopes.
+Extends BaseOAuth2Provider for HubSpot-specific behavior.
+
+Usage:
+    provider = HubSpotOAuth2Provider(
+        client_id="your-client-id",
+        client_secret="your-client-secret",
+    )
+
+    # Use with credential store
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),  # defaults to ~/.hive/credentials
+        providers=[provider],
+    )
+
+See: https://developers.hubspot.com/docs/api/oauth-quickstart-guide
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from ..models import CredentialObject, CredentialType
+from .base_provider import BaseOAuth2Provider
+from .provider import OAuth2Config
+
+logger = logging.getLogger(__name__)
+
+# HubSpot OAuth2 endpoints
+HUBSPOT_TOKEN_URL = "https://api.hubapi.com/oauth/v1/token"
+HUBSPOT_AUTHORIZATION_URL = "https://app.hubspot.com/oauth/authorize"
+
+# Default CRM scopes for contacts, companies, and deals
+HUBSPOT_DEFAULT_SCOPES = [
+    "crm.objects.contacts.read",
+    "crm.objects.contacts.write",
+    "crm.objects.companies.read",
+    "crm.objects.companies.write",
+    "crm.objects.deals.read",
+    "crm.objects.deals.write",
+]
+
+
+class HubSpotOAuth2Provider(BaseOAuth2Provider):
+    """
+    HubSpot OAuth2 provider with pre-configured endpoints.
+
+    Handles HubSpot-specific OAuth2 behavior:
+    - Pre-configured token and authorization URLs
+    - Default CRM scopes for contacts, companies, and deals
+    - Token validation via HubSpot API
+
+    Example:
+        provider = HubSpotOAuth2Provider(
+            client_id="your-hubspot-client-id",
+            client_secret="your-hubspot-client-secret",
+            scopes=["crm.objects.contacts.read"],  # Override default scopes
+        )
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        scopes: list[str] | None = None,
+    ):
+        config = OAuth2Config(
+            token_url=HUBSPOT_TOKEN_URL,
+            authorization_url=HUBSPOT_AUTHORIZATION_URL,
+            client_id=client_id,
+            client_secret=client_secret,
+            default_scopes=scopes or HUBSPOT_DEFAULT_SCOPES,
+        )
+        super().__init__(config, provider_id="hubspot_oauth2")
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.OAUTH2]
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate HubSpot credential by making a lightweight API call.
+
+        Tests the access token against the contacts endpoint with limit=1.
+        """
+        access_token = credential.get_key("access_token")
+        if not access_token:
+            return False
+
+        try:
+            client = self._get_client()
+            response = client.get(
+                "https://api.hubapi.com/crm/v3/objects/contacts",
+                headers={
+                    "Authorization": f"Bearer {access_token}",
+                    "Accept": "application/json",
+                },
+                params={"limit": "1"},
+            )
+            return response.status_code == 200
+        except Exception:
+            return False
+
+    def _parse_token_response(self, response_data: dict[str, Any]) -> Any:
+        """Parse HubSpot token response."""
+        from .provider import OAuth2Token
+
+        return OAuth2Token.from_token_response(response_data)
@@ -11,11 +11,11 @@ from __future__ import annotations

 from dataclasses import dataclass, field
 from datetime import UTC, datetime, timedelta
-from enum import Enum
+from enum import StrEnum
 from typing import Any


-class TokenPlacement(str, Enum):
+class TokenPlacement(StrEnum):
    """Where to place the access token in HTTP requests."""

    HEADER_BEARER = "header_bearer"
@@ -111,14 +111,16 @@ class EncryptedFileStorage(CredentialStorage):
    If not set, a new key is generated (and must be persisted for data recovery).

    Example:
-        storage = EncryptedFileStorage("/var/hive/credentials")
+        storage = EncryptedFileStorage("~/.hive/credentials")
        storage.save(credential)
        credential = storage.load("brave_search")
    """

+    DEFAULT_PATH = "~/.hive/credentials"
+
    def __init__(
        self,
-        base_path: str | Path,
+        base_path: str | Path | None = None,
        encryption_key: bytes | None = None,
        key_env_var: str = "HIVE_CREDENTIAL_KEY",
    ):
@@ -126,7 +128,7 @@ class EncryptedFileStorage(CredentialStorage):
        Initialize encrypted storage.

        Args:
-            base_path: Directory for credential files
+            base_path: Directory for credential files. Defaults to ~/.hive/credentials.
            encryption_key: 32-byte Fernet key. If None, reads from env var.
            key_env_var: Environment variable containing encryption key
        """
@@ -137,7 +139,7 @@ class EncryptedFileStorage(CredentialStorage):
                "Encrypted storage requires 'cryptography'. Install with: pip install cryptography"
            ) from e

-        self.base_path = Path(base_path)
+        self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser()
        self._ensure_dirs()
        self._key_env_var = key_env_var

@@ -459,7 +461,7 @@ class CompositeStorage(CredentialStorage):

    Example:
        storage = CompositeStorage(
-            primary=EncryptedFileStorage("/var/hive/credentials"),
+            primary=EncryptedFileStorage("~/.hive/credentials"),
            fallbacks=[EnvVarStorage({"brave_search": "BRAVE_SEARCH_API_KEY"})]
        )
    """
@@ -45,7 +45,7 @@ class CredentialStore:
    Usage:
        # Basic usage
        store = CredentialStore(
-            storage=EncryptedFileStorage("/path/to/creds"),
+            storage=EncryptedFileStorage("~/.hive/credentials"),
            providers=[OAuth2Provider(), StaticProvider()]
        )

@@ -566,7 +566,7 @@ class CredentialStore:
    @classmethod
    def with_encrypted_storage(
        cls,
-        base_path: str,
+        base_path: str | None = None,
        providers: list[CredentialProvider] | None = None,
        **kwargs: Any,
    ) -> CredentialStore:
@@ -574,7 +574,7 @@ class CredentialStore:
        Create a credential store with encrypted file storage.

        Args:
-            base_path: Directory for credential files
+            base_path: Directory for credential files. Defaults to ~/.hive/credentials.
            providers: List of credential providers
            **kwargs: Additional arguments passed to CredentialStore

@@ -612,3 +612,97 @@ class CredentialStore:
            providers=providers,
            **kwargs,
        )
+
+    @classmethod
+    def with_aden_sync(
+        cls,
+        base_url: str = "https://api.adenhq.com",
+        cache_ttl_seconds: int = 300,
+        local_path: str | None = None,
+        auto_sync: bool = True,
+        **kwargs: Any,
+    ) -> CredentialStore:
+        """
+        Create a credential store with Aden server sync.
+
+        Automatically syncs OAuth2 tokens from the Aden authentication server.
+        Falls back to local-only storage if ADEN_API_KEY is not set or Aden
+        is unreachable.
+
+        Args:
+            base_url: Aden server URL (default: https://api.adenhq.com)
+            cache_ttl_seconds: How long to cache credentials locally (default: 5 min)
+            local_path: Path for local credential storage (default: ~/.hive/credentials)
+            auto_sync: Whether to sync all credentials on startup (default: True)
+            **kwargs: Additional arguments passed to CredentialStore
+
+        Returns:
+            CredentialStore configured with Aden sync
+
+        Example:
+            # Simple usage - just set ADEN_API_KEY env var
+            store = CredentialStore.with_aden_sync()
+
+            # Get HubSpot token (auto-refreshed via Aden)
+            token = store.get_key("hubspot", "access_token")
+        """
+        import os
+        from pathlib import Path
+
+        from .storage import EncryptedFileStorage
+
+        # Determine local storage path
+        if local_path is None:
+            local_path = str(Path.home() / ".hive" / "credentials")
+
+        local_storage = EncryptedFileStorage(base_path=local_path)
+
+        # Check if Aden is configured
+        api_key = os.environ.get("ADEN_API_KEY")
+        if not api_key:
+            logger.info("ADEN_API_KEY not set, using local-only credential storage")
+            return cls(storage=local_storage, **kwargs)
+
+        # Try to setup Aden sync
+        try:
+            from .aden import (
+                AdenCachedStorage,
+                AdenClientConfig,
+                AdenCredentialClient,
+                AdenSyncProvider,
+            )
+
+            # Create Aden client
+            client = AdenCredentialClient(AdenClientConfig(base_url=base_url))
+
+            # Create sync provider
+            provider = AdenSyncProvider(client=client)
+
+            # Use cached storage for offline resilience
+            cached_storage = AdenCachedStorage(
+                local_storage=local_storage,
+                aden_provider=provider,
+                cache_ttl_seconds=cache_ttl_seconds,
+            )
+
+            store = cls(
+                storage=cached_storage,
+                providers=[provider],
+                auto_refresh=True,
+                **kwargs,
+            )
+
+            # Initial sync
+            if auto_sync:
+                synced = provider.sync_all(store)
+                logger.info(f"Synced {synced} credentials from Aden server")
+
+            return store
+
+        except ImportError:
+            logger.warning("Aden components not available, using local storage")
+            return cls(storage=local_storage, **kwargs)
+
+        except Exception as e:
+            logger.warning(f"Failed to setup Aden sync: {e}. Using local storage.")
+            return cls(storage=local_storage, **kwargs)
@@ -1,7 +1,22 @@
 """Graph structures: Goals, Nodes, Edges, and Flexible Execution."""

+from framework.graph.client_io import (
+    ActiveNodeClientIO,
+    ClientIOGateway,
+    InertNodeClientIO,
+    NodeClientIO,
+)
 from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
+from framework.graph.context_handoff import ContextHandoff, HandoffContext
+from framework.graph.conversation import ConversationStore, Message, NodeConversation
 from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.event_loop_node import (
+    EventLoopNode,
+    JudgeProtocol,
+    JudgeVerdict,
+    LoopConfig,
+    OutputAccumulator,
+)
 from framework.graph.executor import GraphExecutor
 from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
 from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
@@ -72,4 +87,22 @@ __all__ = [
    "CodeSandbox",
    "safe_exec",
    "safe_eval",
+    # Conversation
+    "NodeConversation",
+    "ConversationStore",
+    "Message",
+    # Event Loop
+    "EventLoopNode",
+    "LoopConfig",
+    "OutputAccumulator",
+    "JudgeProtocol",
+    "JudgeVerdict",
+    # Context Handoff
+    "ContextHandoff",
+    "HandoffContext",
+    # Client I/O
+    "NodeClientIO",
+    "ActiveNodeClientIO",
+    "InertNodeClientIO",
+    "ClientIOGateway",
 ]
@@ -0,0 +1,170 @@
+"""
+Client I/O gateway for graph nodes.
+
+Provides the bridge between node code and external clients:
+- ActiveNodeClientIO: for client_facing=True nodes (streams output, accepts input)
+- InertNodeClientIO: for client_facing=False nodes (logs internally, redirects input)
+- ClientIOGateway: factory that creates the right variant per node
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.runtime.event_bus import EventBus
+
+logger = logging.getLogger(__name__)
+
+
+class NodeClientIO(ABC):
+    """Abstract base for node client I/O."""
+
+    @abstractmethod
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        """Emit output content. If is_final=True, signal end of stream."""
+
+    @abstractmethod
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        """Request input. Behavior depends on whether the node is client-facing."""
+
+
+class ActiveNodeClientIO(NodeClientIO):
+    """
+    Client I/O for client_facing=True nodes.
+
+    - emit_output() queues content and publishes CLIENT_OUTPUT_DELTA.
+    - request_input() publishes CLIENT_INPUT_REQUESTED, then awaits provide_input().
+    - output_stream() yields queued content until the final sentinel.
+    """
+
+    def __init__(
+        self,
+        node_id: str,
+        event_bus: EventBus | None = None,
+    ) -> None:
+        self.node_id = node_id
+        self._event_bus = event_bus
+
+        self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
+        self._output_snapshot = ""
+
+        self._input_event: asyncio.Event | None = None
+        self._input_result: str | None = None
+
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        self._output_snapshot += content
+        await self._output_queue.put(content)
+
+        if self._event_bus is not None:
+            await self._event_bus.emit_client_output_delta(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                content=content,
+                snapshot=self._output_snapshot,
+            )
+
+        if is_final:
+            await self._output_queue.put(None)
+
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        if self._input_event is not None:
+            raise RuntimeError("request_input already pending for this node")
+
+        self._input_event = asyncio.Event()
+        self._input_result = None
+
+        if self._event_bus is not None:
+            await self._event_bus.emit_client_input_requested(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                prompt=prompt,
+            )
+
+        try:
+            if timeout is not None:
+                await asyncio.wait_for(self._input_event.wait(), timeout=timeout)
+            else:
+                await self._input_event.wait()
+        finally:
+            self._input_event = None
+
+        if self._input_result is None:
+            raise RuntimeError("input event was set but no input was provided")
+        result = self._input_result
+        self._input_result = None
+        return result
+
+    async def provide_input(self, content: str) -> None:
+        """Called externally to fulfill a pending request_input()."""
+        if self._input_event is None:
+            raise RuntimeError("no pending request_input to fulfill")
+        self._input_result = content
+        self._input_event.set()
+
+    async def output_stream(self) -> AsyncIterator[str]:
+        """Async iterator that yields output chunks until the final sentinel."""
+        while True:
+            chunk = await self._output_queue.get()
+            if chunk is None:
+                break
+            yield chunk
+
+
+class InertNodeClientIO(NodeClientIO):
+    """
+    Client I/O for client_facing=False nodes.
+
+    - emit_output() publishes NODE_INTERNAL_OUTPUT (content is not discarded).
+    - request_input() publishes NODE_INPUT_BLOCKED and returns a redirect string.
+    """
+
+    def __init__(
+        self,
+        node_id: str,
+        event_bus: EventBus | None = None,
+    ) -> None:
+        self.node_id = node_id
+        self._event_bus = event_bus
+
+    async def emit_output(self, content: str, is_final: bool = False) -> None:
+        if self._event_bus is not None:
+            await self._event_bus.emit_node_internal_output(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                content=content,
+            )
+
+    async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
+        if self._event_bus is not None:
+            await self._event_bus.emit_node_input_blocked(
+                stream_id=self.node_id,
+                node_id=self.node_id,
+                prompt=prompt,
+            )
+        return (
+            "You are an internal processing node. There is no user to interact with."
+            " Work with the data provided in your inputs to complete your task."
+        )
+
+
+class ClientIOGateway:
+    """Factory that creates the appropriate NodeClientIO for a node."""
+
+    def __init__(self, event_bus: EventBus | None = None) -> None:
+        self._event_bus = event_bus
+
+    def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
+        if client_facing:
+            return ActiveNodeClientIO(
+                node_id=node_id,
+                event_bus=self._event_bus,
+            )
+        return InertNodeClientIO(
+            node_id=node_id,
+            event_bus=self._event_bus,
+        )
@@ -0,0 +1,191 @@
+"""Context handoff: summarize a completed NodeConversation for the next graph node."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+from framework.graph.conversation import _try_extract_key
+
+if TYPE_CHECKING:
+    from framework.graph.conversation import NodeConversation
+    from framework.llm.provider import LLMProvider
+
+logger = logging.getLogger(__name__)
+
+_TRUNCATE_CHARS = 500
+
+
+# ---------------------------------------------------------------------------
+# Data
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class HandoffContext:
+    """Structured summary of a completed node conversation."""
+
+    source_node_id: str
+    summary: str
+    key_outputs: dict[str, Any]
+    turn_count: int
+    total_tokens_used: int
+
+
+# ---------------------------------------------------------------------------
+# ContextHandoff
+# ---------------------------------------------------------------------------
+
+
+class ContextHandoff:
+    """Summarize a completed NodeConversation into a HandoffContext.
+
+    Parameters
+    ----------
+    llm : LLMProvider | None
+        Optional LLM provider for abstractive summarization.
+        When *None*, all summarization uses the extractive fallback.
+    """
+
+    def __init__(self, llm: LLMProvider | None = None) -> None:
+        self.llm = llm
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def summarize_conversation(
+        self,
+        conversation: NodeConversation,
+        node_id: str,
+        output_keys: list[str] | None = None,
+    ) -> HandoffContext:
+        """Produce a HandoffContext from *conversation*.
+
+        1. Extracts turn_count & total_tokens_used (sync properties).
+        2. Extracts key_outputs by scanning assistant messages most-recent-first.
+        3. Builds a summary via the LLM (if available) or extractive fallback.
+        """
+        turn_count = conversation.turn_count
+        total_tokens_used = conversation.estimate_tokens()
+        messages = conversation.messages  # defensive copy
+
+        # --- key outputs ---------------------------------------------------
+        key_outputs: dict[str, Any] = {}
+        if output_keys:
+            remaining = set(output_keys)
+            for msg in reversed(messages):
+                if msg.role != "assistant" or not remaining:
+                    continue
+                for key in list(remaining):
+                    value = _try_extract_key(msg.content, key)
+                    if value is not None:
+                        key_outputs[key] = value
+                        remaining.discard(key)
+
+        # --- summary -------------------------------------------------------
+        if self.llm is not None:
+            try:
+                summary = self._llm_summary(messages, output_keys or [])
+            except Exception:
+                logger.warning(
+                    "LLM summarization failed; falling back to extractive.",
+                    exc_info=True,
+                )
+                summary = self._extractive_summary(messages)
+        else:
+            summary = self._extractive_summary(messages)
+
+        return HandoffContext(
+            source_node_id=node_id,
+            summary=summary,
+            key_outputs=key_outputs,
+            turn_count=turn_count,
+            total_tokens_used=total_tokens_used,
+        )
+
+    @staticmethod
+    def format_as_input(handoff: HandoffContext) -> str:
+        """Render *handoff* as structured plain text for the next node's input."""
+        header = (
+            f"--- CONTEXT FROM: {handoff.source_node_id} "
+            f"({handoff.turn_count} turns, ~{handoff.total_tokens_used} tokens) ---"
+        )
+
+        sections: list[str] = [header, ""]
+
+        if handoff.key_outputs:
+            sections.append("KEY OUTPUTS:")
+            for k, v in handoff.key_outputs.items():
+                sections.append(f"- {k}: {v}")
+            sections.append("")
+
+        summary_text = handoff.summary or "No summary available."
+        sections.append("SUMMARY:")
+        sections.append(summary_text)
+        sections.append("")
+        sections.append("--- END CONTEXT ---")
+
+        return "\n".join(sections)
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extractive_summary(messages: list) -> str:
+        """Build a summary from key assistant messages without an LLM.
+
+        Strategy:
+        - Include the first assistant message (initial assessment).
+        - Include the last assistant message (final conclusion).
+        - Truncate each to ~500 chars.
+        """
+        if not messages:
+            return "Empty conversation."
+
+        assistant_msgs = [m for m in messages if m.role == "assistant"]
+        if not assistant_msgs:
+            return "No assistant responses."
+
+        parts: list[str] = []
+
+        first = assistant_msgs[0].content
+        parts.append(first[:_TRUNCATE_CHARS])
+
+        if len(assistant_msgs) > 1:
+            last = assistant_msgs[-1].content
+            parts.append(last[:_TRUNCATE_CHARS])
+
+        return "\n\n".join(parts)
+
+    def _llm_summary(self, messages: list, output_keys: list[str]) -> str:
+        """Produce a summary by calling the LLM provider."""
+        if self.llm is None:
+            raise ValueError("_llm_summary called without an LLM provider")
+
+        conversation_text = "\n".join(f"[{m.role}]: {m.content}" for m in messages)
+
+        key_hint = ""
+        if output_keys:
+            key_hint = (
+                "\nThe following output keys are especially important: "
+                + ", ".join(output_keys)
+                + ".\n"
+            )
+
+        system_prompt = (
+            "You are a concise summarizer. Given the conversation below, "
+            "produce a brief summary (at most ~500 tokens) that captures the "
+            "key decisions, findings, and outcomes. Focus on what was concluded "
+            "rather than the back-and-forth process." + key_hint
+        )
+
+        response = self.llm.complete(
+            messages=[{"role": "user", "content": conversation_text}],
+            system=system_prompt,
+            max_tokens=500,
+        )
+
+        return response.content.strip()
@@ -0,0 +1,600 @@
+"""NodeConversation: Message history management for graph nodes."""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from typing import Any, Literal, Protocol, runtime_checkable
+
+
+@dataclass
+class Message:
+    """A single message in a conversation.
+
+    Attributes:
+        seq: Monotonic sequence number.
+        role: One of "user", "assistant", or "tool".
+        content: Message text.
+        tool_use_id: Internal tool-use identifier (output as ``tool_call_id`` in LLM dicts).
+        tool_calls: OpenAI-format tool call list for assistant messages.
+        is_error: When True and role is "tool", ``to_llm_dict`` prepends "ERROR: " to content.
+    """
+
+    seq: int
+    role: Literal["user", "assistant", "tool"]
+    content: str
+    tool_use_id: str | None = None
+    tool_calls: list[dict[str, Any]] | None = None
+    is_error: bool = False
+
+    def to_llm_dict(self) -> dict[str, Any]:
+        """Convert to OpenAI-format message dict."""
+        if self.role == "user":
+            return {"role": "user", "content": self.content}
+
+        if self.role == "assistant":
+            d: dict[str, Any] = {"role": "assistant", "content": self.content}
+            if self.tool_calls:
+                d["tool_calls"] = self.tool_calls
+            return d
+
+        # role == "tool"
+        content = f"ERROR: {self.content}" if self.is_error else self.content
+        return {
+            "role": "tool",
+            "tool_call_id": self.tool_use_id,
+            "content": content,
+        }
+
+    def to_storage_dict(self) -> dict[str, Any]:
+        """Serialize all fields for persistence.  Omits None/default-False fields."""
+        d: dict[str, Any] = {
+            "seq": self.seq,
+            "role": self.role,
+            "content": self.content,
+        }
+        if self.tool_use_id is not None:
+            d["tool_use_id"] = self.tool_use_id
+        if self.tool_calls is not None:
+            d["tool_calls"] = self.tool_calls
+        if self.is_error:
+            d["is_error"] = self.is_error
+        return d
+
+    @classmethod
+    def from_storage_dict(cls, data: dict[str, Any]) -> Message:
+        """Deserialize from a storage dict."""
+        return cls(
+            seq=data["seq"],
+            role=data["role"],
+            content=data["content"],
+            tool_use_id=data.get("tool_use_id"),
+            tool_calls=data.get("tool_calls"),
+            is_error=data.get("is_error", False),
+        )
+
+
+def _extract_spillover_filename(content: str) -> str | None:
+    """Extract spillover filename from a truncated tool result.
+
+    Matches the pattern produced by EventLoopNode._truncate_tool_result():
+        "saved to 'tool_github_list_stargazers_abc123.txt'"
+    """
+    match = re.search(r"saved to '([^']+)'", content)
+    return match.group(1) if match else None
+
+
+# ---------------------------------------------------------------------------
+# ConversationStore protocol (Phase 2)
+# ---------------------------------------------------------------------------
+
+
+@runtime_checkable
+class ConversationStore(Protocol):
+    """Protocol for conversation persistence backends."""
+
+    async def write_part(self, seq: int, data: dict[str, Any]) -> None: ...
+
+    async def read_parts(self) -> list[dict[str, Any]]: ...
+
+    async def write_meta(self, data: dict[str, Any]) -> None: ...
+
+    async def read_meta(self) -> dict[str, Any] | None: ...
+
+    async def write_cursor(self, data: dict[str, Any]) -> None: ...
+
+    async def read_cursor(self) -> dict[str, Any] | None: ...
+
+    async def delete_parts_before(self, seq: int) -> None: ...
+
+    async def close(self) -> None: ...
+
+    async def destroy(self) -> None: ...
+
+
+# ---------------------------------------------------------------------------
+# NodeConversation
+# ---------------------------------------------------------------------------
+
+
+def _try_extract_key(content: str, key: str) -> str | None:
+    """Try 4 strategies to extract a *key*'s value from message content.
+
+    Strategies (in order):
+    1. Whole message is JSON — ``json.loads``, check for key.
+    2. Embedded JSON via ``find_json_object`` helper.
+    3. Colon format: ``key: value``.
+    4. Equals format: ``key = value``.
+    """
+    from framework.graph.node import find_json_object
+
+    # 1. Whole message is JSON
+    try:
+        parsed = json.loads(content)
+        if isinstance(parsed, dict) and key in parsed:
+            val = parsed[key]
+            return json.dumps(val) if not isinstance(val, str) else val
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    # 2. Embedded JSON via find_json_object
+    json_str = find_json_object(content)
+    if json_str:
+        try:
+            parsed = json.loads(json_str)
+            if isinstance(parsed, dict) and key in parsed:
+                val = parsed[key]
+                return json.dumps(val) if not isinstance(val, str) else val
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # 3. Colon format: key: value
+    match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
+    if match:
+        return match.group(1).strip()
+
+    # 4. Equals format: key = value
+    match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
+    if match:
+        return match.group(1).strip()
+
+    return None
+
+
+class NodeConversation:
+    """Message history for a graph node with optional write-through persistence.
+
+    When *store* is ``None`` the conversation works purely in-memory.
+    When a :class:`ConversationStore` is supplied every mutation is
+    persisted via write-through (meta is lazily written on the first
+    ``_persist`` call).
+    """
+
+    def __init__(
+        self,
+        system_prompt: str = "",
+        max_history_tokens: int = 32000,
+        compaction_threshold: float = 0.8,
+        output_keys: list[str] | None = None,
+        store: ConversationStore | None = None,
+    ) -> None:
+        self._system_prompt = system_prompt
+        self._max_history_tokens = max_history_tokens
+        self._compaction_threshold = compaction_threshold
+        self._output_keys = output_keys
+        self._store = store
+        self._messages: list[Message] = []
+        self._next_seq: int = 0
+        self._meta_persisted: bool = False
+        self._last_api_input_tokens: int | None = None
+
+    # --- Properties --------------------------------------------------------
+
+    @property
+    def system_prompt(self) -> str:
+        return self._system_prompt
+
+    @property
+    def messages(self) -> list[Message]:
+        """Return a defensive copy of the message list."""
+        return list(self._messages)
+
+    @property
+    def turn_count(self) -> int:
+        """Number of conversational turns (one turn = one user message)."""
+        return sum(1 for m in self._messages if m.role == "user")
+
+    @property
+    def message_count(self) -> int:
+        """Total number of messages (all roles)."""
+        return len(self._messages)
+
+    @property
+    def next_seq(self) -> int:
+        return self._next_seq
+
+    # --- Add messages ------------------------------------------------------
+
+    async def add_user_message(self, content: str) -> Message:
+        msg = Message(seq=self._next_seq, role="user", content=content)
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    async def add_assistant_message(
+        self,
+        content: str,
+        tool_calls: list[dict[str, Any]] | None = None,
+    ) -> Message:
+        msg = Message(
+            seq=self._next_seq,
+            role="assistant",
+            content=content,
+            tool_calls=tool_calls,
+        )
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    async def add_tool_result(
+        self,
+        tool_use_id: str,
+        content: str,
+        is_error: bool = False,
+    ) -> Message:
+        msg = Message(
+            seq=self._next_seq,
+            role="tool",
+            content=content,
+            tool_use_id=tool_use_id,
+            is_error=is_error,
+        )
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    # --- Query -------------------------------------------------------------
+
+    def to_llm_messages(self) -> list[dict[str, Any]]:
+        """Return messages as OpenAI-format dicts (system prompt excluded).
+
+        Automatically repairs orphaned tool_use blocks (assistant messages
+        with tool_calls that lack corresponding tool-result messages).  This
+        can happen when a loop is cancelled mid-tool-execution.
+        """
+        msgs = [m.to_llm_dict() for m in self._messages]
+        return self._repair_orphaned_tool_calls(msgs)
+
+    @staticmethod
+    def _repair_orphaned_tool_calls(
+        msgs: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        """Ensure every tool_call has a matching tool-result message."""
+        repaired: list[dict[str, Any]] = []
+        for i, m in enumerate(msgs):
+            repaired.append(m)
+            tool_calls = m.get("tool_calls")
+            if m.get("role") != "assistant" or not tool_calls:
+                continue
+            # Collect IDs of tool results that follow this assistant message
+            answered: set[str] = set()
+            for j in range(i + 1, len(msgs)):
+                if msgs[j].get("role") == "tool":
+                    tid = msgs[j].get("tool_call_id")
+                    if tid:
+                        answered.add(tid)
+                else:
+                    break  # stop at first non-tool message
+            # Patch any missing results
+            for tc in tool_calls:
+                tc_id = tc.get("id")
+                if tc_id and tc_id not in answered:
+                    repaired.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc_id,
+                            "content": "ERROR: Tool execution was interrupted.",
+                        }
+                    )
+        return repaired
+
+    def estimate_tokens(self) -> int:
+        """Best available token estimate.
+
+        Uses actual API input token count when available (set via
+        :meth:`update_token_count`), otherwise falls back to the rough
+        ``total_chars / 4`` heuristic.
+        """
+        if self._last_api_input_tokens is not None:
+            return self._last_api_input_tokens
+        total_chars = sum(len(m.content) for m in self._messages)
+        return total_chars // 4
+
+    def update_token_count(self, actual_input_tokens: int) -> None:
+        """Store actual API input token count for more accurate compaction.
+
+        Called by EventLoopNode after each LLM call with the ``input_tokens``
+        value from the API response.  This value includes system prompt and
+        tool definitions, so it may be higher than a message-only estimate.
+        """
+        self._last_api_input_tokens = actual_input_tokens
+
+    def usage_ratio(self) -> float:
+        """Current token usage as a fraction of *max_history_tokens*.
+
+        Returns 0.0 when ``max_history_tokens`` is zero (unlimited).
+        """
+        if self._max_history_tokens <= 0:
+            return 0.0
+        return self.estimate_tokens() / self._max_history_tokens
+
+    def needs_compaction(self) -> bool:
+        return self.estimate_tokens() >= self._max_history_tokens * self._compaction_threshold
+
+    # --- Output-key extraction ---------------------------------------------
+
+    def _extract_protected_values(self, messages: list[Message]) -> dict[str, str]:
+        """Scan assistant messages for output_key values before compaction.
+
+        Iterates most-recent-first. Once a key is found, it's skipped for
+        older messages (latest value wins).
+        """
+        if not self._output_keys:
+            return {}
+
+        found: dict[str, str] = {}
+        remaining_keys = set(self._output_keys)
+
+        for msg in reversed(messages):
+            if msg.role != "assistant" or not remaining_keys:
+                continue
+
+            for key in list(remaining_keys):
+                value = self._try_extract_key(msg.content, key)
+                if value is not None:
+                    found[key] = value
+                    remaining_keys.discard(key)
+
+        return found
+
+    def _try_extract_key(self, content: str, key: str) -> str | None:
+        """Try 4 strategies to extract a key's value from message content."""
+        return _try_extract_key(content, key)
+
+    # --- Lifecycle ---------------------------------------------------------
+
+    async def prune_old_tool_results(
+        self,
+        protect_tokens: int = 5000,
+        min_prune_tokens: int = 2000,
+    ) -> int:
+        """Replace old tool result content with compact placeholders.
+
+        Walks backward through messages. Recent tool results (within
+        *protect_tokens*) are kept intact. Older tool results have their
+        content replaced with a ~100-char placeholder that preserves the
+        spillover filename reference (if any). Message structure (role,
+        seq, tool_use_id) stays valid for the LLM API.
+
+        Error tool results are never pruned — they prevent re-calling
+        failing tools.
+
+        Returns the number of messages pruned (0 if nothing was pruned).
+        """
+        if not self._messages:
+            return 0
+
+        # Phase 1: Walk backward, classify tool results as protected vs pruneable
+        protected_tokens = 0
+        pruneable: list[int] = []  # indices into self._messages
+        pruneable_tokens = 0
+
+        for i in range(len(self._messages) - 1, -1, -1):
+            msg = self._messages[i]
+            if msg.role != "tool":
+                continue
+            if msg.is_error:
+                continue  # never prune errors
+            if msg.content.startswith("[Pruned tool result"):
+                continue  # already pruned
+
+            est = len(msg.content) // 4
+            if protected_tokens < protect_tokens:
+                protected_tokens += est
+            else:
+                pruneable.append(i)
+                pruneable_tokens += est
+
+        # Phase 2: Only prune if enough to be worthwhile
+        if pruneable_tokens < min_prune_tokens:
+            return 0
+
+        # Phase 3: Replace content with compact placeholder
+        count = 0
+        for i in pruneable:
+            msg = self._messages[i]
+            orig_len = len(msg.content)
+            spillover = _extract_spillover_filename(msg.content)
+
+            if spillover:
+                placeholder = (
+                    f"[Pruned tool result: {orig_len} chars. "
+                    f"Full data in '{spillover}'. "
+                    f"Use load_data('{spillover}') to retrieve.]"
+                )
+            else:
+                placeholder = f"[Pruned tool result: {orig_len} chars cleared from context.]"
+
+            self._messages[i] = Message(
+                seq=msg.seq,
+                role=msg.role,
+                content=placeholder,
+                tool_use_id=msg.tool_use_id,
+                tool_calls=msg.tool_calls,
+                is_error=msg.is_error,
+            )
+            count += 1
+
+            if self._store:
+                await self._store.write_part(msg.seq, self._messages[i].to_storage_dict())
+
+        # Reset token estimate — content lengths changed
+        self._last_api_input_tokens = None
+        return count
+
+    async def compact(self, summary: str, keep_recent: int = 2) -> None:
+        """Replace old messages with a summary, optionally keeping recent ones.
+
+        Args:
+            summary: Caller-provided summary text.
+            keep_recent: Number of recent messages to preserve (default 2).
+                         Clamped to [0, len(messages) - 1].
+        """
+        if not self._messages:
+            return
+
+        # Clamp: must discard at least 1 message
+        keep_recent = max(0, min(keep_recent, len(self._messages) - 1))
+
+        total = len(self._messages)
+        split = total - keep_recent if keep_recent > 0 else total
+
+        # Advance split past orphaned tool results at the boundary.
+        # Tool-role messages reference a tool_use from the preceding
+        # assistant message; if that assistant message falls into the
+        # compacted (old) portion the tool_result becomes invalid.
+        while split < total and self._messages[split].role == "tool":
+            split += 1
+
+        old_messages = list(self._messages[:split])
+        recent_messages = list(self._messages[split:])
+
+        # Extract protected values from messages being discarded
+        if self._output_keys:
+            protected = self._extract_protected_values(old_messages)
+            if protected:
+                lines = ["PRESERVED VALUES (do not lose these):"]
+                for k, v in protected.items():
+                    lines.append(f"- {k}: {v}")
+                lines.append("")
+                lines.append("CONVERSATION SUMMARY:")
+                lines.append(summary)
+                summary = "\n".join(lines)
+
+        # Determine summary seq
+        if recent_messages:
+            summary_seq = recent_messages[0].seq - 1
+        else:
+            summary_seq = self._next_seq
+            self._next_seq += 1
+
+        summary_msg = Message(seq=summary_seq, role="user", content=summary)
+
+        # Persist
+        if self._store:
+            delete_before = recent_messages[0].seq if recent_messages else self._next_seq
+            await self._store.delete_parts_before(delete_before)
+            await self._store.write_part(summary_msg.seq, summary_msg.to_storage_dict())
+            await self._store.write_cursor({"next_seq": self._next_seq})
+
+        self._messages = [summary_msg] + recent_messages
+        self._last_api_input_tokens = None  # reset; next LLM call will recalibrate
+
+    async def clear(self) -> None:
+        """Remove all messages, keep system prompt, preserve ``_next_seq``."""
+        if self._store:
+            await self._store.delete_parts_before(self._next_seq)
+            await self._store.write_cursor({"next_seq": self._next_seq})
+        self._messages.clear()
+        self._last_api_input_tokens = None
+
+    def export_summary(self) -> str:
+        """Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
+        prompt_preview = (
+            self._system_prompt[:80] + "..."
+            if len(self._system_prompt) > 80
+            else self._system_prompt
+        )
+
+        lines = [
+            "[STATS]",
+            f"turns: {self.turn_count}",
+            f"messages: {self.message_count}",
+            f"estimated_tokens: {self.estimate_tokens()}",
+            "",
+            "[CONFIG]",
+            f"system_prompt: {prompt_preview!r}",
+        ]
+
+        if self._output_keys:
+            lines.append(f"output_keys: {', '.join(self._output_keys)}")
+
+        lines.append("")
+        lines.append("[RECENT_MESSAGES]")
+        for m in self._messages[-5:]:
+            preview = m.content[:60] + "..." if len(m.content) > 60 else m.content
+            lines.append(f"  [{m.role}] {preview}")
+
+        return "\n".join(lines)
+
+    # --- Persistence internals ---------------------------------------------
+
+    async def _persist(self, message: Message) -> None:
+        """Write-through a single message.  No-op when store is None."""
+        if self._store is None:
+            return
+        if not self._meta_persisted:
+            await self._persist_meta()
+        await self._store.write_part(message.seq, message.to_storage_dict())
+        await self._store.write_cursor({"next_seq": self._next_seq})
+
+    async def _persist_meta(self) -> None:
+        """Lazily write conversation metadata to the store (called once)."""
+        if self._store is None:
+            return
+        await self._store.write_meta(
+            {
+                "system_prompt": self._system_prompt,
+                "max_history_tokens": self._max_history_tokens,
+                "compaction_threshold": self._compaction_threshold,
+                "output_keys": self._output_keys,
+            }
+        )
+        self._meta_persisted = True
+
+    # --- Restore -----------------------------------------------------------
+
+    @classmethod
+    async def restore(cls, store: ConversationStore) -> NodeConversation | None:
+        """Reconstruct a NodeConversation from a store.
+
+        Returns ``None`` if the store contains no metadata (i.e. the
+        conversation was never persisted).
+        """
+        meta = await store.read_meta()
+        if meta is None:
+            return None
+
+        conv = cls(
+            system_prompt=meta.get("system_prompt", ""),
+            max_history_tokens=meta.get("max_history_tokens", 32000),
+            compaction_threshold=meta.get("compaction_threshold", 0.8),
+            output_keys=meta.get("output_keys"),
+            store=store,
+        )
+        conv._meta_persisted = True
+
+        parts = await store.read_parts()
+        conv._messages = [Message.from_storage_dict(p) for p in parts]
+
+        cursor = await store.read_cursor()
+        if cursor:
+            conv._next_seq = cursor["next_seq"]
+        elif conv._messages:
+            conv._next_seq = conv._messages[-1].seq + 1
+
+        return conv
@@ -11,7 +11,6 @@ our edges can be created dynamically by a Builder agent based on the goal.

 Edge Types:
 - always: Always traverse after source completes
- always: Always traverse after source completes
 - on_success: Traverse only if source succeeds
 - on_failure: Traverse only if source fails
 - conditional: Traverse based on expression evaluation (SAFE SUBSET ONLY)
@@ -22,7 +21,7 @@ allowing the LLM to evaluate whether proceeding along an edge makes sense
 given the current goal, context, and execution state.
 """

-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field
@@ -30,7 +29,7 @@ from pydantic import BaseModel, Field
 from framework.graph.safe_eval import safe_eval


-class EdgeCondition(str, Enum):
+class EdgeCondition(StrEnum):
    """When an edge should be traversed."""

    ALWAYS = "always"  # Always after source completes
@@ -609,4 +608,40 @@ class GraphSpec(BaseModel):
                    continue
                errors.append(f"Node '{node.id}' is unreachable from entry")

+        # Client-facing fan-out validation
+        fan_outs = self.detect_fan_out_nodes()
+        for source_id, targets in fan_outs.items():
+            client_facing_targets = [
+                t
+                for t in targets
+                if self.get_node(t) and getattr(self.get_node(t), "client_facing", False)
+            ]
+            if len(client_facing_targets) > 1:
+                errors.append(
+                    f"Fan-out from '{source_id}' has multiple client-facing nodes: "
+                    f"{client_facing_targets}. Only one branch may be client-facing."
+                )
+
+        # Output key overlap on parallel event_loop nodes
+        for source_id, targets in fan_outs.items():
+            event_loop_targets = [
+                t
+                for t in targets
+                if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop"
+            ]
+            if len(event_loop_targets) > 1:
+                seen_keys: dict[str, str] = {}
+                for node_id in event_loop_targets:
+                    node = self.get_node(node_id)
+                    for key in getattr(node, "output_keys", []):
+                        if key in seen_keys:
+                            errors.append(
+                                f"Fan-out from '{source_id}': event_loop nodes "
+                                f"'{seen_keys[key]}' and '{node_id}' both write to "
+                                f"output_key '{key}'. Parallel event_loop nodes must "
+                                f"have disjoint output_keys to prevent last-wins data loss."
+                            )
+                        else:
+                            seen_keys[key] = node_id
+
        return errors
@@ -11,11 +11,12 @@ The executor:

 import asyncio
 import logging
+import warnings
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any

-from framework.graph.edge import EdgeSpec, GraphSpec
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
 from framework.graph.goal import Goal
 from framework.graph.node import (
    FunctionNode,
@@ -47,6 +48,26 @@ class ExecutionResult:
    paused_at: str | None = None  # Node ID where execution paused for HITL
    session_state: dict[str, Any] = field(default_factory=dict)  # State to resume from

+    # Execution quality metrics
+    total_retries: int = 0  # Total number of retries across all nodes
+    nodes_with_failures: list[str] = field(default_factory=list)  # Failed but recovered
+    retry_details: dict[str, int] = field(default_factory=dict)  # {node_id: retry_count}
+    had_partial_failures: bool = False  # True if any node failed but eventually succeeded
+    execution_quality: str = "clean"  # "clean", "degraded", or "failed"
+
+    # Visit tracking (for feedback/callback edges)
+    node_visit_counts: dict[str, int] = field(default_factory=dict)  # {node_id: visit_count}
+
+    @property
+    def is_clean_success(self) -> bool:
+        """True only if execution succeeded with no retries or failures."""
+        return self.success and self.execution_quality == "clean"
+
+    @property
+    def is_degraded_success(self) -> bool:
+        """True if execution succeeded but had retries or partial failures."""
+        return self.success and self.execution_quality == "degraded"
+

@dataclass
 class ParallelBranch:
@@ -233,6 +254,8 @@ class GraphExecutor:
        total_tokens = 0
        total_latency = 0
        node_retry_counts: dict[str, int] = {}  # Track retries per node
+        node_visit_counts: dict[str, int] = {}  # Track visits for feedback loops
+        _is_retry = False  # True when looping back for a retry (not a new visit)

        # Determine entry point (may differ if resuming)
        current_node_id = graph.get_entry_point(session_state)
@@ -261,6 +284,34 @@ class GraphExecutor:
                if node_spec is None:
                    raise RuntimeError(f"Node not found: {current_node_id}")

+                # Enforce max_node_visits (feedback/callback edge support)
+                # Don't increment visit count on retries — retries are not new visits
+                if not _is_retry:
+                    cnt = node_visit_counts.get(current_node_id, 0) + 1
+                    node_visit_counts[current_node_id] = cnt
+                _is_retry = False
+                max_visits = getattr(node_spec, "max_node_visits", 1)
+                if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
+                    self.logger.warning(
+                        f"   ⊘ Node '{node_spec.name}' visit limit reached "
+                        f"({node_visit_counts[current_node_id]}/{max_visits}), skipping"
+                    )
+                    # Skip execution — follow outgoing edges using current memory
+                    skip_result = NodeResult(success=True, output=memory.read_all())
+                    next_node = self._follow_edges(
+                        graph=graph,
+                        goal=goal,
+                        current_node_id=current_node_id,
+                        current_node_spec=node_spec,
+                        result=skip_result,
+                        memory=memory,
+                    )
+                    if next_node is None:
+                        self.logger.info("   → No more edges after visit limit, ending")
+                        break
+                    current_node_id = next_node
+                    continue
+
                path.append(current_node_id)

                # Check if pause (HITL) before execution
@@ -363,6 +414,15 @@ class GraphExecutor:
                    # [CORRECTED] Use node_spec.max_retries instead of hardcoded 3
                    max_retries = getattr(node_spec, "max_retries", 3)

+                    # Event loop nodes handle retry internally via judge —
+                    # executor retry is catastrophic (retry multiplication)
+                    if node_spec.node_type == "event_loop" and max_retries > 0:
+                        self.logger.warning(
+                            f"EventLoopNode '{node_spec.id}' has max_retries={max_retries}. "
+                            "Overriding to 0 — event loop nodes handle retry internally via judge."
+                        )
+                        max_retries = 0
+
                    if node_retry_counts[current_node_id] < max_retries:
                        # Retry - don't increment steps for retries
                        steps -= 1
@@ -378,6 +438,7 @@ class GraphExecutor:
                        self.logger.info(
                            f"   ↻ Retrying ({node_retry_counts[current_node_id]}/{max_retries})..."
                        )
+                        _is_retry = True
                        continue
                    else:
                        # Max retries exceeded - fail the execution
@@ -399,6 +460,11 @@ class GraphExecutor:
                                f"{max_retries} retries: {result.error}"
                            ),
                        )
+
+                        # Calculate quality metrics
+                        total_retries_count = sum(node_retry_counts.values())
+                        nodes_failed = list(node_retry_counts.keys())
+
                        return ExecutionResult(
                            success=False,
                            error=(
@@ -410,6 +476,12 @@ class GraphExecutor:
                            total_tokens=total_tokens,
                            total_latency_ms=total_latency,
                            path=path,
+                            total_retries=total_retries_count,
+                            nodes_with_failures=nodes_failed,
+                            retry_details=dict(node_retry_counts),
+                            had_partial_failures=len(nodes_failed) > 0,
+                            execution_quality="failed",
+                            node_visit_counts=dict(node_visit_counts),
                        )

                # Check if we just executed a pause node - if so, save state and return
@@ -430,6 +502,11 @@ class GraphExecutor:
                        narrative=f"Paused at {node_spec.name} after {steps} steps",
                    )

+                    # Calculate quality metrics
+                    total_retries_count = sum(node_retry_counts.values())
+                    nodes_failed = [nid for nid, count in node_retry_counts.items() if count > 0]
+                    exec_quality = "degraded" if total_retries_count > 0 else "clean"
+
                    return ExecutionResult(
                        success=True,
                        output=saved_memory,
@@ -439,6 +516,12 @@ class GraphExecutor:
                        path=path,
                        paused_at=node_spec.id,
                        session_state=session_state_out,
+                        total_retries=total_retries_count,
+                        nodes_with_failures=nodes_failed,
+                        retry_details=dict(node_retry_counts),
+                        had_partial_failures=len(nodes_failed) > 0,
+                        execution_quality=exec_quality,
+                        node_visit_counts=dict(node_visit_counts),
                    )

                # Check if this is a terminal node - if so, we're done
@@ -527,10 +610,24 @@ class GraphExecutor:
            self.logger.info(f"   Total tokens: {total_tokens}")
            self.logger.info(f"   Total latency: {total_latency}ms")

+            # Calculate execution quality metrics
+            total_retries_count = sum(node_retry_counts.values())
+            nodes_failed = [nid for nid, count in node_retry_counts.items() if count > 0]
+            exec_quality = "degraded" if total_retries_count > 0 else "clean"
+
+            # Update narrative to reflect execution quality
+            quality_suffix = ""
+            if exec_quality == "degraded":
+                retries = total_retries_count
+                failed = len(nodes_failed)
+                quality_suffix = f" ({retries} retries across {failed} nodes)"
+
            self.runtime.end_run(
                success=True,
                output_data=output,
-                narrative=f"Executed {steps} steps through path: {' -> '.join(path)}",
+                narrative=(
+                    f"Executed {steps} steps through path: {' -> '.join(path)}{quality_suffix}"
+                ),
            )

            return ExecutionResult(
@@ -540,6 +637,12 @@ class GraphExecutor:
                total_tokens=total_tokens,
                total_latency_ms=total_latency,
                path=path,
+                total_retries=total_retries_count,
+                nodes_with_failures=nodes_failed,
+                retry_details=dict(node_retry_counts),
+                had_partial_failures=len(nodes_failed) > 0,
+                execution_quality=exec_quality,
+                node_visit_counts=dict(node_visit_counts),
            )

        except Exception as e:
@@ -551,11 +654,22 @@ class GraphExecutor:
                success=False,
                narrative=f"Failed at step {steps}: {e}",
            )
+
+            # Calculate quality metrics even for exceptions
+            total_retries_count = sum(node_retry_counts.values())
+            nodes_failed = list(node_retry_counts.keys())
+
            return ExecutionResult(
                success=False,
                error=str(e),
                steps_executed=steps,
                path=path,
+                total_retries=total_retries_count,
+                nodes_with_failures=nodes_failed,
+                retry_details=dict(node_retry_counts),
+                had_partial_failures=len(nodes_failed) > 0,
+                execution_quality="failed",
+                node_visit_counts=dict(node_visit_counts),
            )

    def _build_context(
@@ -592,7 +706,15 @@ class GraphExecutor:
        )

    # Valid node types - no ambiguous "llm" type allowed
-    VALID_NODE_TYPES = {"llm_tool_use", "llm_generate", "router", "function", "human_input"}
+    VALID_NODE_TYPES = {
+        "llm_tool_use",
+        "llm_generate",
+        "router",
+        "function",
+        "human_input",
+        "event_loop",
+    }
+    DEPRECATED_NODE_TYPES = {"llm_tool_use": "event_loop", "llm_generate": "event_loop"}

    def _get_node_implementation(
        self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
@@ -610,6 +732,17 @@ class GraphExecutor:
                f"Use 'llm_tool_use' for nodes that call tools, 'llm_generate' for text generation."
            )

+        # Warn on deprecated node types
+        if node_spec.node_type in self.DEPRECATED_NODE_TYPES:
+            replacement = self.DEPRECATED_NODE_TYPES[node_spec.node_type]
+            warnings.warn(
+                f"Node type '{node_spec.node_type}' is deprecated. "
+                f"Use '{replacement}' instead. "
+                f"Node: '{node_spec.id}'",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        # Create based on type
        if node_spec.node_type == "llm_tool_use":
            if not node_spec.tools:
@@ -647,6 +780,13 @@ class GraphExecutor:
                cleanup_llm_model=cleanup_llm_model,
            )

+        if node_spec.node_type == "event_loop":
+            # Event loop nodes must be pre-registered (like function nodes)
+            raise RuntimeError(
+                f"EventLoopNode '{node_spec.id}' not found in registry. "
+                "Register it with executor.register_node() before execution."
+            )
+
        # Should never reach here due to validation above
        raise RuntimeError(f"Unhandled node type: {node_spec.node_type}")

@@ -757,6 +897,21 @@ class GraphExecutor:
            ):
                traversable.append(edge)

+        # Priority filtering for CONDITIONAL edges:
+        # When multiple CONDITIONAL edges match, keep only the highest-priority
+        # group.  This prevents mutually-exclusive conditional branches (e.g.
+        # forward vs. feedback) from incorrectly triggering fan-out.
+        # ON_SUCCESS / other edge types are unaffected.
+        if len(traversable) > 1:
+            conditionals = [e for e in traversable if e.condition == EdgeCondition.CONDITIONAL]
+            if len(conditionals) > 1:
+                max_prio = max(e.priority for e in conditionals)
+                traversable = [
+                    e
+                    for e in traversable
+                    if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
+                ]
+
        return traversable

    def _find_convergence_node(
@@ -843,6 +998,17 @@ class GraphExecutor:
                branch.status = "failed"
                branch.error = f"Node {branch.node_id} not found in graph"
                return branch, RuntimeError(branch.error)
+
+            effective_max_retries = node_spec.max_retries
+            if node_spec.node_type == "event_loop":
+                if effective_max_retries > 1:
+                    self.logger.warning(
+                        f"EventLoopNode '{node_spec.id}' has "
+                        f"max_retries={effective_max_retries}. Overriding "
+                        "to 1 — event loop nodes handle retry internally."
+                    )
+                effective_max_retries = 1
+
            branch.status = "running"

            try:
@@ -876,7 +1042,7 @@ class GraphExecutor:

                # Execute with retries
                last_result = None
-                for attempt in range(node_spec.max_retries):
+                for attempt in range(effective_max_retries):
                    branch.retry_count = attempt

                    # Build context for this branch
@@ -904,7 +1070,7 @@ class GraphExecutor:

                    self.logger.warning(
                        f"      ↻ Branch {node_spec.name}: "
-                        f"retry {attempt + 1}/{node_spec.max_retries}"
+                        f"retry {attempt + 1}/{effective_max_retries}"
                    )

                # All retries exhausted
@@ -913,7 +1079,7 @@ class GraphExecutor:
                branch.result = last_result
                self.logger.error(
                    f"      ✗ Branch {node_spec.name}: "
-                    f"failed after {node_spec.max_retries} attempts"
+                    f"failed after {effective_max_retries} attempts"
                )
                return branch, last_result

@@ -12,13 +12,13 @@ Goals are:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class GoalStatus(str, Enum):
+class GoalStatus(StrEnum):
    """Lifecycle status of a goal."""

    DRAFT = "draft"  # Being defined
@@ -6,11 +6,11 @@ where agents need to gather input from humans.
 """

 from dataclasses import dataclass, field
-from enum import Enum
+from enum import StrEnum
 from typing import Any


-class HITLInputType(str, Enum):
+class HITLInputType(StrEnum):
    """Type of input expected from human."""

    FREE_TEXT = "free_text"  # Open-ended text response
@@ -16,10 +16,12 @@ Protocol:
 """

 import asyncio
+import inspect
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 from dataclasses import dataclass, field
+from datetime import UTC
 from typing import Any

 from pydantic import BaseModel, Field
@@ -153,7 +155,10 @@ class NodeSpec(BaseModel):
    # Node behavior type
    node_type: str = Field(
        default="llm_tool_use",
-        description="Type: 'llm_tool_use', 'llm_generate', 'function', 'router', 'human_input'",
+        description=(
+            "Type: 'event_loop', 'function', 'router', 'human_input'. "
+            "Deprecated: 'llm_tool_use', 'llm_generate' (use 'event_loop' instead)."
+        ),
    )

    # Data flow
@@ -205,6 +210,15 @@ class NodeSpec(BaseModel):
    max_retries: int = Field(default=3)
    retry_on: list[str] = Field(default_factory=list, description="Error types to retry on")

+    # Visit limits (for feedback/callback edges)
+    max_node_visits: int = Field(
+        default=1,
+        description=(
+            "Max times this node executes in one graph run. "
+            "Set >1 for feedback loops. 0 = unlimited (max_steps guards)."
+        ),
+    )
+
    # Pydantic model for output validation
    output_model: type[BaseModel] | None = Field(
        default=None,
@@ -218,6 +232,12 @@ class NodeSpec(BaseModel):
        description="Maximum retries when Pydantic validation fails (with feedback to LLM)",
    )

+    # Client-facing behavior
+    client_facing: bool = Field(
+        default=False,
+        description="If True, this node streams output to the end user and can request input.",
+    )
+
    model_config = {"extra": "allow", "arbitrary_types_allowed": True}


@@ -669,6 +689,137 @@ Keep the same JSON structure but with shorter content values.
            return match.group(1).strip()
        return content

+    def _estimate_tokens(
+        self, model: str, system: str, messages: list[dict], tools: list | None
+    ) -> int:
+        """Estimate total input tokens for an LLM call."""
+        import json
+
+        try:
+            import litellm as _litellm
+        except ImportError:
+            # Rough estimate: 1 token ≈ 4 chars
+            total_chars = len(system)
+            for m in messages:
+                total_chars += len(str(m.get("content", "")))
+            if tools:
+                total_chars += len(
+                    json.dumps(
+                        [
+                            {
+                                "name": t.name,
+                                "description": t.description,
+                                "parameters": t.parameters,
+                            }
+                            for t in tools
+                        ],
+                        default=str,
+                    )
+                )
+            return total_chars // 4
+
+        total = 0
+        if system:
+            total += _litellm.token_counter(model=model, text=system)
+        for m in messages:
+            content = str(m.get("content", ""))
+            if content:
+                total += _litellm.token_counter(model=model, text=content)
+        if tools:
+            tools_text = json.dumps(
+                [
+                    {"name": t.name, "description": t.description, "parameters": t.parameters}
+                    for t in tools
+                ],
+                default=str,
+            )
+            total += _litellm.token_counter(model=model, text=tools_text)
+        return total
+
+    def _get_context_limit(self, model: str) -> int:
+        """Get usable input token budget (80% of model's max_input_tokens)."""
+        try:
+            import litellm as _litellm
+
+            info = _litellm.get_model_info(model)
+            max_input = info.get("max_input_tokens") or info.get("max_tokens") or 8192
+            return int(max_input * 0.8)
+        except Exception:
+            return 8192
+
+    def _compact_inputs(
+        self, ctx: NodeContext, system: str, messages: list[dict], tools: list | None
+    ) -> list[dict]:
+        """Compact message inputs if they exceed the model's context window.
+
+        Uses a sliding window strategy: iteratively halves the longest input
+        value until the total token count fits within the budget.
+        """
+        model = ctx.llm.model if hasattr(ctx.llm, "model") else "gpt-3.5-turbo"
+        budget = self._get_context_limit(model)
+        estimated = self._estimate_tokens(model, system, messages, tools)
+
+        if estimated <= budget:
+            return messages
+
+        logger.warning(
+            f"[compaction] Input tokens (~{estimated}) exceed budget ({budget}) "
+            f"for model {model}. Compacting inputs..."
+        )
+
+        # Parse user message into key:value pairs for selective truncation
+        if not messages or not messages[0].get("content"):
+            return messages
+
+        content = messages[0]["content"]
+        lines = content.split("\n")
+        pairs: list[tuple[str, str]] = []
+        for line in lines:
+            if ": " in line:
+                key, _, value = line.partition(": ")
+                pairs.append((key, value))
+            else:
+                pairs.append(("", line))
+
+        # Iteratively halve the longest value until we fit
+        max_iterations = 20
+        for i in range(max_iterations):
+            # Find longest value
+            longest_idx = -1
+            longest_len = 0
+            for idx, (key, value) in enumerate(pairs):
+                if key and len(value) > longest_len:
+                    longest_len = len(value)
+                    longest_idx = idx
+
+            if longest_idx == -1 or longest_len <= 100:
+                break
+
+            key, value = pairs[longest_idx]
+            new_len = max(longest_len // 2, 100)
+            pairs[longest_idx] = (key, value[:new_len] + "...")
+            logger.warning(f"[compaction] Truncated '{key}' from {longest_len} to {new_len} chars")
+
+            # Re-estimate
+            new_content = "\n".join(f"{k}: {v}" if k else v for k, v in pairs)
+            test_messages = [{"role": "user", "content": new_content}]
+            estimated = self._estimate_tokens(model, system, test_messages, tools)
+            if estimated <= budget:
+                logger.warning(
+                    f"[compaction] Fits within budget after {i + 1} rounds (~{estimated} tokens)"
+                )
+                return test_messages
+
+        # Final reassembly even if still over budget
+        final_content = "\n".join(f"{k}: {v}" if k else v for k, v in pairs)
+        final_messages = [{"role": "user", "content": final_content}]
+        final_est = self._estimate_tokens(model, system, final_messages, tools)
+        logger.warning(
+            f"[compaction] Still ~{final_est} tokens after max compaction "
+            f"(budget={budget}). Proceeding anyway."
+        )
+        return final_messages
+
    async def execute(self, ctx: NodeContext) -> NodeResult:
        """Execute the LLM node."""
        import time
@@ -711,6 +862,9 @@ Keep the same JSON structure but with shorter content values.
            # Build system prompt
            system = self._build_system_prompt(ctx)

+            # Compact inputs if they exceed the model's context window
+            messages = self._compact_inputs(ctx, system, messages, ctx.available_tools)
+
            # Log the LLM call details
            logger.info("      🤖 LLM Call:")
            logger.info(
@@ -1185,10 +1339,7 @@ Keep the same JSON structure but with shorter content values.
        # Use configured cleanup model, or fall back to defaults
        if cleanup_llm_model:
            # Use the configured cleanup model (LiteLLM handles API keys via env vars)
-            cleaner_llm = LiteLLMProvider(
-                model=cleanup_llm_model,
-                temperature=0.0,
-            )
+            cleaner_llm = LiteLLMProvider(model=cleanup_llm_model)
            logger.info(f"      Using configured cleanup LLM: {cleanup_llm_model}")
        else:
            # Fall back to default logic: Cerebras preferred, then Haiku
@@ -1203,13 +1354,11 @@ Keep the same JSON structure but with shorter content values.
                cleaner_llm = LiteLLMProvider(
                    api_key=os.environ.get("CEREBRAS_API_KEY"),
                    model="cerebras/llama-3.3-70b",
-                    temperature=0.0,
                )
            else:
                cleaner_llm = LiteLLMProvider(
                    api_key=api_key,
                    model="claude-3-5-haiku-20241022",
-                    temperature=0.0,
                )

        prompt = f"""Extract the JSON object from this LLM response.
@@ -1219,7 +1368,9 @@ Expected output keys: {output_keys}
 LLM Response:
 {raw_response}

-Output ONLY the JSON object, nothing else."""
+Output ONLY the JSON object, nothing else.
+If no valid JSON object exists in the response, output exactly: {{"error": "NO_JSON_FOUND"}}
+Do NOT fabricate data or return empty objects."""

        try:
            result = cleaner_llm.complete(
@@ -1266,6 +1417,14 @@ Output ONLY the JSON object, nothing else."""
                parsed = json.loads(cleaned)
            except json.JSONDecodeError:
                parsed = json.loads(_fix_unescaped_newlines_in_json(cleaned))
+
+            # Validate LLM didn't return empty or fabricated data
+            if parsed.get("error") == "NO_JSON_FOUND":
+                raise ValueError("Cannot parse JSON from response")
+            if not parsed or parsed == {}:
+                raise ValueError("Cannot parse JSON from response")
+            if all(v is None for v in parsed.values()):
+                raise ValueError("Cannot parse JSON from response")
            logger.info("      ✓ LLM cleaned JSON output")
            return parsed

@@ -1375,6 +1534,8 @@ Output ONLY the JSON object, nothing else."""

    def _build_system_prompt(self, ctx: NodeContext) -> str:
        """Build the system prompt."""
+        from datetime import datetime
+
        parts = []

        if ctx.node_spec.system_prompt:
@@ -1397,6 +1558,15 @@ Output ONLY the JSON object, nothing else."""

            parts.append(prompt)

+        # Inject current datetime so LLM knows "now"
+        utc_dt = datetime.now(UTC)
+        local_dt = datetime.now().astimezone()
+        local_tz_name = local_dt.tzname() or "Unknown"
+        parts.append("\n## Runtime Context")
+        parts.append(f"- Current Date/Time (UTC): {utc_dt.isoformat()}")
+        parts.append(f"- Local Timezone: {local_tz_name}")
+        parts.append(f"- Current Date/Time (Local): {local_dt.isoformat()}")
+
        if ctx.goal_context:
            parts.append("\n# Goal Context")
            parts.append(ctx.goal_context)
@@ -1598,8 +1768,19 @@ class FunctionNode(NodeProtocol):
        start = time.time()

        try:
-            # Call the function
-            result = self.func(**ctx.input_data)
+            # Filter input_data to only declared input_keys to prevent
+            # leaking extra memory keys from upstream nodes.
+            if ctx.node_spec.input_keys:
+                filtered = {
+                    k: v for k, v in ctx.input_data.items() if k in ctx.node_spec.input_keys
+                }
+            else:
+                filtered = ctx.input_data
+
+            # Call the function (supports both sync and async)
+            result = self.func(**filtered)
+            if inspect.isawaitable(result):
+                result = await result

            latency_ms = int((time.time() - start) * 1000)

@@ -118,7 +118,6 @@ class OutputCleaner:
                    self.llm = LiteLLMProvider(
                        api_key=api_key,
                        model=config.fast_model,
-                        temperature=0.0,  # Deterministic cleaning
                    )
                    logger.info(f"✓ Initialized OutputCleaner with {config.fast_model}")
                else:
@@ -11,13 +11,13 @@ The Plan is the contract between the external planner and the executor:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ActionType(str, Enum):
+class ActionType(StrEnum):
    """Types of actions a PlanStep can perform."""

    LLM_CALL = "llm_call"  # Call LLM for generation
@@ -27,7 +27,7 @@ class ActionType(str, Enum):
    CODE_EXECUTION = "code_execution"  # Execute dynamic code (sandboxed)


-class StepStatus(str, Enum):
+class StepStatus(StrEnum):
    """Status of a plan step."""

    PENDING = "pending"
@@ -38,8 +38,25 @@ class StepStatus(str, Enum):
    SKIPPED = "skipped"
    REJECTED = "rejected"  # Human rejected execution

+    def is_terminal(self) -> bool:
+        """Check if this status represents a terminal (finished) state.

-class ApprovalDecision(str, Enum):
+        Terminal states are states where the step will not execute further,
+        either because it completed successfully or failed/was skipped.
+        """
+        return self in (
+            StepStatus.COMPLETED,
+            StepStatus.FAILED,
+            StepStatus.SKIPPED,
+            StepStatus.REJECTED,
+        )
+
+    def is_successful(self) -> bool:
+        """Check if this status represents successful completion."""
+        return self == StepStatus.COMPLETED
+
+
+class ApprovalDecision(StrEnum):
    """Human decision on a step requiring approval."""

    APPROVE = "approve"  # Execute as planned
@@ -74,7 +91,7 @@ class ApprovalResult(BaseModel):
    model_config = {"extra": "allow"}


-class JudgmentAction(str, Enum):
+class JudgmentAction(StrEnum):
    """Actions the judge can take after evaluating a step."""

    ACCEPT = "accept"  # Step completed successfully, continue
@@ -161,11 +178,23 @@ class PlanStep(BaseModel):

    model_config = {"extra": "allow"}

-    def is_ready(self, completed_step_ids: set[str]) -> bool:
-        """Check if this step is ready to execute (all dependencies met)."""
+    def is_ready(self, terminal_step_ids: set[str]) -> bool:
+        """Check if this step is ready to execute (all dependencies finished).
+
+        A step is ready when:
+        1. Its status is PENDING (not yet started)
+        2. All its dependencies are in a terminal state (completed, failed, skipped, or rejected)
+
+        Note: This allows dependent steps to become "ready" even if their dependencies
+        failed. The executor should check if any dependencies failed and handle
+        accordingly (e.g., skip the step or mark it as blocked).
+
+        Args:
+            terminal_step_ids: Set of step IDs that are in a terminal state
+        """
        if self.status != StepStatus.PENDING:
            return False
-        return all(dep in completed_step_ids for dep in self.dependencies)
+        return all(dep in terminal_step_ids for dep in self.dependencies)


 class Judgment(BaseModel):
@@ -327,18 +356,46 @@ class Plan(BaseModel):
        return None

    def get_ready_steps(self) -> list[PlanStep]:
-        """Get all steps that are ready to execute."""
-        completed_ids = {s.id for s in self.steps if s.status == StepStatus.COMPLETED}
-        return [s for s in self.steps if s.is_ready(completed_ids)]
+        """Get all steps that are ready to execute.
+
+        A step is ready when all its dependencies are in terminal states
+        (completed, failed, skipped, or rejected).
+        """
+        terminal_ids = {s.id for s in self.steps if s.status.is_terminal()}
+        return [s for s in self.steps if s.is_ready(terminal_ids)]

    def get_completed_steps(self) -> list[PlanStep]:
        """Get all completed steps."""
        return [s for s in self.steps if s.status == StepStatus.COMPLETED]

    def is_complete(self) -> bool:
-        """Check if all steps are completed."""
+        """Check if all steps are in terminal states (finished executing).
+
+        Returns True when all steps have reached a terminal state, regardless
+        of whether they succeeded or failed. Use has_failed_steps() to check
+        if any steps failed.
+        """
+        return all(s.status.is_terminal() for s in self.steps)
+
+    def is_successful(self) -> bool:
+        """Check if all steps completed successfully."""
        return all(s.status == StepStatus.COMPLETED for s in self.steps)

+    def has_failed_steps(self) -> bool:
+        """Check if any steps failed, were skipped, or were rejected."""
+        return any(
+            s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
+            for s in self.steps
+        )
+
+    def get_failed_steps(self) -> list[PlanStep]:
+        """Get all steps that failed, were skipped, or were rejected."""
+        return [
+            s
+            for s in self.steps
+            if s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
+        ]
+
    def to_feedback_context(self) -> dict[str, Any]:
        """Create context for replanning."""
        return {
@@ -366,7 +423,7 @@ class Plan(BaseModel):
        }


-class ExecutionStatus(str, Enum):
+class ExecutionStatus(StrEnum):
    """Status of plan execution."""

    COMPLETED = "completed"
@@ -75,16 +75,6 @@ class SafeEvalVisitor(ast.NodeVisitor):
    def visit_Constant(self, node: ast.Constant) -> Any:
        return node.value

-    # --- Number/String/Bytes/NameConstant (Python < 3.8 compat if needed) ---
-    def visit_Num(self, node: ast.Num) -> Any:
-        return node.n
-
-    def visit_Str(self, node: ast.Str) -> Any:
-        return node.s
-
-    def visit_NameConstant(self, node: ast.NameConstant) -> Any:
-        return node.value
-
    # --- Data Structures ---
    def visit_List(self, node: ast.List) -> list:
        return [self.visit(elt) for elt in node.elts]
@@ -126,14 +126,16 @@ class OutputValidator:

        for key in expected_keys:
            if key not in output:
-                errors.append(f"Missing required output key: '{key}'")
+                if key not in nullable_keys:
+                    errors.append(f"Missing required output key: '{key}'")
            elif not allow_empty:
                value = output[key]
                if value is None:
                    if key not in nullable_keys:
                        errors.append(f"Output key '{key}' is None")
                elif isinstance(value, str) and len(value.strip()) == 0:
-                    errors.append(f"Output key '{key}' is empty string")
+                    if key not in nullable_keys:
+                        errors.append(f"Output key '{key}' is empty string")

        return ValidationResult(success=len(errors) == 0, errors=errors)

@@ -1,8 +1,31 @@
 """LLM provider abstraction."""

 from framework.llm.provider import LLMProvider, LLMResponse
+from framework.llm.stream_events import (
+    FinishEvent,
+    ReasoningDeltaEvent,
+    ReasoningStartEvent,
+    StreamErrorEvent,
+    StreamEvent,
+    TextDeltaEvent,
+    TextEndEvent,
+    ToolCallEvent,
+    ToolResultEvent,
+)

-__all__ = ["LLMProvider", "LLMResponse"]
+__all__ = [
+    "LLMProvider",
+    "LLMResponse",
+    "StreamEvent",
+    "TextDeltaEvent",
+    "TextEndEvent",
+    "ToolCallEvent",
+    "ToolResultEvent",
+    "ReasoningStartEvent",
+    "ReasoningDeltaEvent",
+    "FinishEvent",
+    "StreamErrorEvent",
+]

 try:
    from framework.llm.anthropic import AnthropicProvider  # noqa: F401
@@ -8,17 +8,17 @@ from framework.llm.litellm import LiteLLMProvider
 from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse


-def _get_api_key_from_credential_manager() -> str | None:
-    """Get API key from CredentialManager or environment.
+def _get_api_key_from_credential_store() -> str | None:
+    """Get API key from CredentialStoreAdapter or environment.

    Priority:
-    1. CredentialManager (supports .env hot-reload)
+    1. CredentialStoreAdapter (supports encrypted storage + env vars)
    2. os.environ fallback
    """
    try:
-        from aden_tools.credentials import CredentialManager
+        from aden_tools.credentials import CredentialStoreAdapter

-        creds = CredentialManager()
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except ImportError:
@@ -44,12 +44,12 @@ class AnthropicProvider(LLMProvider):
        Initialize the Anthropic provider.

        Args:
-            api_key: Anthropic API key. If not provided, uses CredentialManager
+            api_key: Anthropic API key. If not provided, uses CredentialStoreAdapter
                     or ANTHROPIC_API_KEY env var.
            model: Model to use (default: claude-haiku-4-5-20251001)
        """
        # Delegate to LiteLLMProvider internally.
-        self.api_key = api_key or _get_api_key_from_credential_manager()
+        self.api_key = api_key or _get_api_key_from_credential_store()
        if not self.api_key:
            raise ValueError(
                "Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass api_key."
@@ -7,16 +7,81 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """

+import asyncio
 import json
-from collections.abc import Callable
+import logging
+import time
+from collections.abc import AsyncIterator, Callable
+from datetime import datetime
+from pathlib import Path
 from typing import Any

 try:
    import litellm
+    from litellm.exceptions import RateLimitError
 except ImportError:
    litellm = None  # type: ignore[assignment]
+    RateLimitError = Exception  # type: ignore[assignment, misc]

 from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.stream_events import StreamEvent
+
+logger = logging.getLogger(__name__)
+
+RATE_LIMIT_MAX_RETRIES = 10
+RATE_LIMIT_BACKOFF_BASE = 2  # seconds
+
+# Directory for dumping failed requests
+FAILED_REQUESTS_DIR = Path.home() / ".hive" / "failed_requests"
+
+
+def _estimate_tokens(model: str, messages: list[dict]) -> tuple[int, str]:
+    """Estimate token count for messages. Returns (token_count, method)."""
+    # Try litellm's token counter first
+    if litellm is not None:
+        try:
+            count = litellm.token_counter(model=model, messages=messages)
+            return count, "litellm"
+        except Exception:
+            pass
+
+    # Fallback: rough estimate based on character count (~4 chars per token)
+    total_chars = sum(len(str(m.get("content", ""))) for m in messages)
+    return total_chars // 4, "estimate"
+
+
+def _dump_failed_request(
+    model: str,
+    kwargs: dict[str, Any],
+    error_type: str,
+    attempt: int,
+) -> str:
+    """Dump failed request to a file for debugging. Returns the file path."""
+    FAILED_REQUESTS_DIR.mkdir(parents=True, exist_ok=True)
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    filename = f"{error_type}_{model.replace('/', '_')}_{timestamp}.json"
+    filepath = FAILED_REQUESTS_DIR / filename
+
+    # Build dump data
+    messages = kwargs.get("messages", [])
+    dump_data = {
+        "timestamp": datetime.now().isoformat(),
+        "model": model,
+        "error_type": error_type,
+        "attempt": attempt,
+        "estimated_tokens": _estimate_tokens(model, messages),
+        "num_messages": len(messages),
+        "messages": messages,
+        "tools": kwargs.get("tools"),
+        "max_tokens": kwargs.get("max_tokens"),
+        "temperature": kwargs.get("temperature"),
+    }
+
+    with open(filepath, "w") as f:
+        json.dump(dump_data, f, indent=2, default=str)
+
+    return str(filepath)


 class LiteLLMProvider(LLMProvider):
@@ -85,6 +150,101 @@ class LiteLLMProvider(LLMProvider):
                "LiteLLM is not installed. Please install it with: pip install litellm"
            )

+    def _completion_with_rate_limit_retry(self, **kwargs: Any) -> Any:
+        """Call litellm.completion with retry on 429 rate limit errors and empty responses."""
+        model = kwargs.get("model", self.model)
+        for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
+            try:
+                response = litellm.completion(**kwargs)  # type: ignore[union-attr]
+
+                # Some providers (e.g. Gemini) return 200 with empty content on
+                # rate limit / quota exhaustion instead of a proper 429.  Treat
+                # empty responses the same as a rate-limit error and retry.
+                content = response.choices[0].message.content if response.choices else None
+                has_tool_calls = bool(response.choices and response.choices[0].message.tool_calls)
+                if not content and not has_tool_calls:
+                    # If the conversation ends with an assistant message,
+                    # an empty response is expected — don't retry.
+                    messages = kwargs.get("messages", [])
+                    last_role = next(
+                        (m["role"] for m in reversed(messages) if m.get("role") != "system"),
+                        None,
+                    )
+                    if last_role == "assistant":
+                        logger.debug(
+                            "[retry] Empty response after assistant message — "
+                            "expected, not retrying."
+                        )
+                        return response
+
+                    finish_reason = (
+                        response.choices[0].finish_reason if response.choices else "unknown"
+                    )
+                    # Dump full request to file for debugging
+                    token_count, token_method = _estimate_tokens(model, messages)
+                    dump_path = _dump_failed_request(
+                        model=model,
+                        kwargs=kwargs,
+                        error_type="empty_response",
+                        attempt=attempt,
+                    )
+                    logger.warning(
+                        f"[retry] Empty response - {len(messages)} messages, "
+                        f"~{token_count} tokens ({token_method}). "
+                        f"Full request dumped to: {dump_path}"
+                    )
+
+                    if attempt == RATE_LIMIT_MAX_RETRIES:
+                        logger.error(
+                            f"[retry] GAVE UP on {model} after {RATE_LIMIT_MAX_RETRIES + 1} "
+                            f"attempts — empty response "
+                            f"(finish_reason={finish_reason}, "
+                            f"choices={len(response.choices) if response.choices else 0})"
+                        )
+                        return response
+                    wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                    logger.warning(
+                        f"[retry] {model} returned empty response "
+                        f"(finish_reason={finish_reason}, "
+                        f"choices={len(response.choices) if response.choices else 0}) — "
+                        f"likely rate limited or quota exceeded. "
+                        f"Retrying in {wait}s "
+                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                    )
+                    time.sleep(wait)
+                    continue
+
+                return response
+            except RateLimitError as e:
+                # Dump full request to file for debugging
+                messages = kwargs.get("messages", [])
+                token_count, token_method = _estimate_tokens(model, messages)
+                dump_path = _dump_failed_request(
+                    model=model,
+                    kwargs=kwargs,
+                    error_type="rate_limit",
+                    attempt=attempt,
+                )
+                if attempt == RATE_LIMIT_MAX_RETRIES:
+                    logger.error(
+                        f"[retry] GAVE UP on {model} after {RATE_LIMIT_MAX_RETRIES + 1} "
+                        f"attempts — rate limit error: {e!s}. "
+                        f"~{token_count} tokens ({token_method}). "
+                        f"Full request dumped to: {dump_path}"
+                    )
+                    raise
+                wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                logger.warning(
+                    f"[retry] {model} rate limited (429): {e!s}. "
+                    f"~{token_count} tokens ({token_method}). "
+                    f"Full request dumped to: {dump_path}. "
+                    f"Retrying in {wait}s "
+                    f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                )
+                time.sleep(wait)
+        # unreachable, but satisfies type checker
+        raise RuntimeError("Exhausted rate limit retries")
+
    def complete(
        self,
        messages: list[dict[str, Any]],
@@ -133,7 +293,7 @@ class LiteLLMProvider(LLMProvider):
            kwargs["response_format"] = response_format

        # Make the call
-        response = litellm.completion(**kwargs)  # type: ignore[union-attr]
+        response = self._completion_with_rate_limit_retry(**kwargs)

        # Extract content
        content = response.choices[0].message.content or ""
@@ -189,7 +349,7 @@ class LiteLLMProvider(LLMProvider):
            if self.api_base:
                kwargs["api_base"] = self.api_base

-            response = litellm.completion(**kwargs)  # type: ignore[union-attr]
+            response = self._completion_with_rate_limit_retry(**kwargs)

            # Track tokens
            usage = response.usage
@@ -233,11 +393,18 @@ class LiteLLMProvider(LLMProvider):

            # Execute tools and add results.
            for tool_call in message.tool_calls:
-                # Parse arguments
                try:
                    args = json.loads(tool_call.function.arguments)
                except json.JSONDecodeError:
-                    args = {}
+                    # Surface error to LLM and skip tool execution
+                    current_messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tool_call.id,
+                            "content": "Invalid JSON arguments provided to tool.",
+                        }
+                    )
+                    continue

                tool_use = ToolUse(
                    id=tool_call.id,
@@ -280,3 +447,185 @@ class LiteLLMProvider(LLMProvider):
                },
            },
        }
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream a completion via litellm.acompletion(stream=True).
+
+        Yields StreamEvent objects as chunks arrive from the provider.
+        Tool call arguments are accumulated across chunks and yielded as
+        a single ToolCallEvent with fully parsed JSON when complete.
+
+        Empty responses (e.g. Gemini stealth rate-limits that return 200
+        with no content) are retried with exponential backoff, mirroring
+        the retry behaviour of ``_completion_with_rate_limit_retry``.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+            ToolCallEvent,
+        )
+
+        full_messages: list[dict[str, Any]] = []
+        if system:
+            full_messages.append({"role": "system", "content": system})
+        full_messages.extend(messages)
+
+        kwargs: dict[str, Any] = {
+            "model": self.model,
+            "messages": full_messages,
+            "max_tokens": max_tokens,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+            **self.extra_kwargs,
+        }
+        if self.api_key:
+            kwargs["api_key"] = self.api_key
+        if self.api_base:
+            kwargs["api_base"] = self.api_base
+        if tools:
+            kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
+
+        for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
+            # Post-stream events (ToolCall, TextEnd, Finish) are buffered
+            # because they depend on the full stream.  TextDeltaEvents are
+            # yielded immediately so callers see tokens in real time.
+            tail_events: list[StreamEvent] = []
+            accumulated_text = ""
+            tool_calls_acc: dict[int, dict[str, str]] = {}
+            input_tokens = 0
+            output_tokens = 0
+
+            try:
+                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
+
+                async for chunk in response:
+                    choice = chunk.choices[0] if chunk.choices else None
+                    if not choice:
+                        continue
+
+                    delta = choice.delta
+
+                    # --- Text content — yield immediately for real-time streaming ---
+                    if delta and delta.content:
+                        accumulated_text += delta.content
+                        yield TextDeltaEvent(
+                            content=delta.content,
+                            snapshot=accumulated_text,
+                        )
+
+                    # --- Tool calls (accumulate across chunks) ---
+                    if delta and delta.tool_calls:
+                        for tc in delta.tool_calls:
+                            idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
+                            if idx not in tool_calls_acc:
+                                tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
+                            if tc.id:
+                                tool_calls_acc[idx]["id"] = tc.id
+                            if tc.function:
+                                if tc.function.name:
+                                    tool_calls_acc[idx]["name"] = tc.function.name
+                                if tc.function.arguments:
+                                    tool_calls_acc[idx]["arguments"] += tc.function.arguments
+
+                    # --- Finish ---
+                    if choice.finish_reason:
+                        for _idx, tc_data in sorted(tool_calls_acc.items()):
+                            try:
+                                parsed_args = json.loads(tc_data["arguments"])
+                            except (json.JSONDecodeError, KeyError):
+                                parsed_args = {"_raw": tc_data.get("arguments", "")}
+                            tail_events.append(
+                                ToolCallEvent(
+                                    tool_use_id=tc_data["id"],
+                                    tool_name=tc_data["name"],
+                                    tool_input=parsed_args,
+                                )
+                            )
+
+                        if accumulated_text:
+                            tail_events.append(TextEndEvent(full_text=accumulated_text))
+
+                        usage = getattr(chunk, "usage", None)
+                        if usage:
+                            input_tokens = getattr(usage, "prompt_tokens", 0) or 0
+                            output_tokens = getattr(usage, "completion_tokens", 0) or 0
+
+                        tail_events.append(
+                            FinishEvent(
+                                stop_reason=choice.finish_reason,
+                                input_tokens=input_tokens,
+                                output_tokens=output_tokens,
+                                model=self.model,
+                            )
+                        )
+
+                # Check whether the stream produced any real content.
+                # (If text deltas were yielded above, has_content is True
+                # and we skip the retry path — nothing was yielded in vain.)
+                has_content = accumulated_text or tool_calls_acc
+                if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
+                    # If the conversation ends with an assistant message,
+                    # an empty stream is expected (nothing new to say).
+                    # Don't retry — just flush whatever we have.
+                    last_role = next(
+                        (m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
+                        None,
+                    )
+                    if last_role == "assistant":
+                        logger.debug(
+                            "[stream] Empty response after assistant message — "
+                            "expected, not retrying."
+                        )
+                        for event in tail_events:
+                            yield event
+                        return
+                    wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                    token_count, token_method = _estimate_tokens(
+                        self.model,
+                        full_messages,
+                    )
+                    dump_path = _dump_failed_request(
+                        model=self.model,
+                        kwargs=kwargs,
+                        error_type="empty_stream",
+                        attempt=attempt,
+                    )
+                    logger.warning(
+                        f"[stream-retry] {self.model} returned empty stream — "
+                        f"~{token_count} tokens ({token_method}). "
+                        f"Request dumped to: {dump_path}. "
+                        f"Retrying in {wait}s "
+                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+
+                # Success (or final attempt) — flush remaining events.
+                for event in tail_events:
+                    yield event
+                return
+
+            except RateLimitError as e:
+                if attempt < RATE_LIMIT_MAX_RETRIES:
+                    wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
+                    logger.warning(
+                        f"[stream-retry] {self.model} rate limited (429): {e!s}. "
+                        f"Retrying in {wait}s "
+                        f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+                yield StreamErrorEvent(error=str(e), recoverable=False)
+                return
+
+            except Exception as e:
+                yield StreamErrorEvent(error=str(e), recoverable=False)
+                return
@@ -2,10 +2,16 @@

 import json
 import re
-from collections.abc import Callable
+from collections.abc import AsyncIterator, Callable
 from typing import Any

 from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.stream_events import (
+    FinishEvent,
+    StreamEvent,
+    TextDeltaEvent,
+    TextEndEvent,
+)


 class MockLLMProvider(LLMProvider):
@@ -175,3 +181,28 @@ class MockLLMProvider(LLMProvider):
            output_tokens=0,
            stop_reason="mock_complete",
        )
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream a mock completion as word-level TextDeltaEvents.
+
+        Splits the mock response into words and yields each as a separate
+        TextDeltaEvent with an accumulating snapshot, exercising the full
+        streaming pipeline without any API calls.
+        """
+        content = self._generate_mock_response(system=system, json_mode=False)
+        words = content.split(" ")
+        accumulated = ""
+
+        for i, word in enumerate(words):
+            chunk = word if i == 0 else " " + word
+            accumulated += chunk
+            yield TextDeltaEvent(content=chunk, snapshot=accumulated)
+
+        yield TextEndEvent(full_text=accumulated)
+        yield FinishEvent(stop_reason="mock_complete", model=self.model)
@@ -1,7 +1,7 @@
 """LLM Provider abstraction for pluggable LLM backends."""

 from abc import ABC, abstractmethod
-from collections.abc import Callable
+from collections.abc import AsyncIterator, Callable
 from dataclasses import dataclass, field
 from typing import Any

@@ -108,3 +108,45 @@ class LLMProvider(ABC):
            Final LLMResponse after tool use completes
        """
        pass
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator["StreamEvent"]:
+        """
+        Stream a completion as an async iterator of StreamEvents.
+
+        Default implementation wraps complete() with synthetic events.
+        Subclasses SHOULD override for true streaming.
+
+        Tool orchestration is the CALLER's responsibility:
+        - Caller detects ToolCallEvent, executes tool, adds result
+          to messages, calls stream() again.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+        )
+
+        response = self.complete(
+            messages=messages,
+            system=system,
+            tools=tools,
+            max_tokens=max_tokens,
+        )
+        yield TextDeltaEvent(content=response.content, snapshot=response.content)
+        yield TextEndEvent(full_text=response.content)
+        yield FinishEvent(
+            stop_reason=response.stop_reason,
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            model=response.model,
+        )
+
+
+# Deferred import target for type annotation
+from framework.llm.stream_events import StreamEvent as StreamEvent  # noqa: E402, F401
@@ -0,0 +1,96 @@
+"""Stream event types for LLM streaming responses.
+
+Defines a discriminated union of frozen dataclasses representing every event
+a streaming LLM call can produce. These types form the contract between the
+LLM provider layer, EventLoopNode, event bus, persistence, and monitoring.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+
+@dataclass(frozen=True)
+class TextDeltaEvent:
+    """A chunk of text produced by the LLM."""
+
+    type: Literal["text_delta"] = "text_delta"
+    content: str = ""  # this chunk's text
+    snapshot: str = ""  # accumulated text so far
+
+
+@dataclass(frozen=True)
+class TextEndEvent:
+    """Signals that text generation is complete."""
+
+    type: Literal["text_end"] = "text_end"
+    full_text: str = ""
+
+
+@dataclass(frozen=True)
+class ToolCallEvent:
+    """The LLM has requested a tool call."""
+
+    type: Literal["tool_call"] = "tool_call"
+    tool_use_id: str = ""
+    tool_name: str = ""
+    tool_input: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class ToolResultEvent:
+    """Result of executing a tool call."""
+
+    type: Literal["tool_result"] = "tool_result"
+    tool_use_id: str = ""
+    content: str = ""
+    is_error: bool = False
+
+
+@dataclass(frozen=True)
+class ReasoningStartEvent:
+    """The LLM has started a reasoning/thinking block."""
+
+    type: Literal["reasoning_start"] = "reasoning_start"
+
+
+@dataclass(frozen=True)
+class ReasoningDeltaEvent:
+    """A chunk of reasoning/thinking content."""
+
+    type: Literal["reasoning_delta"] = "reasoning_delta"
+    content: str = ""
+
+
+@dataclass(frozen=True)
+class FinishEvent:
+    """The LLM has finished generating."""
+
+    type: Literal["finish"] = "finish"
+    stop_reason: str = ""
+    input_tokens: int = 0
+    output_tokens: int = 0
+    model: str = ""
+
+
+@dataclass(frozen=True)
+class StreamErrorEvent:
+    """An error occurred during streaming."""
+
+    type: Literal["error"] = "error"
+    error: str = ""
+    recoverable: bool = False
+
+
+# Discriminated union of all stream event types
+StreamEvent = (
+    TextDeltaEvent
+    | TextEndEvent
+    | ToolCallEvent
+    | ToolResultEvent
+    | ReasoningStartEvent
+    | ReasoningDeltaEvent
+    | FinishEvent
+    | StreamErrorEvent
+)
@@ -1,5 +1,4 @@
 """MCP servers for worker-bee."""

-from framework.mcp.agent_builder_server import mcp as agent_builder_server
-
-__all__ = ["agent_builder_server"]
+# Don't auto-import servers to avoid double-import issues when running with -m
+__all__ = []
@@ -22,6 +22,7 @@ from framework.graph.plan import Plan
 from framework.testing.prompts import (
    PYTEST_TEST_FILE_HEADER,
 )
+from framework.utils.io import atomic_write

 # Initialize MCP server
 mcp = FastMCP("agent-builder")
@@ -122,11 +123,11 @@ def _save_session(session: BuildSession):

    # Save session file
    session_file = SESSIONS_DIR / f"{session.id}.json"
-    with open(session_file, "w") as f:
+    with atomic_write(session_file) as f:
        json.dump(session.to_dict(), f, indent=2, default=str)

    # Update active session pointer
-    with open(ACTIVE_SESSION_FILE, "w") as f:
+    with atomic_write(ACTIVE_SESSION_FILE) as f:
        f.write(session.id)


@@ -246,7 +247,7 @@ def load_session_by_id(session_id: Annotated[str, "ID of the session to load"])
        _session = _load_session(session_id)

        # Update active session pointer
-        with open(ACTIVE_SESSION_FILE, "w") as f:
+        with atomic_write(ACTIVE_SESSION_FILE) as f:
            f.write(session_id)

        return json.dumps(
@@ -457,14 +458,27 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
        return None

    try:
-        from aden_tools.credentials import CredentialManager
+        from aden_tools.credentials import CREDENTIAL_SPECS

-        cred_manager = CredentialManager()
-        missing_creds = cred_manager.get_missing_for_tools(tools_list)
+        store = _get_credential_store()

-        if missing_creds:
-            cred_errors = []
-            for cred_name, spec in missing_creds:
+        # Build tool -> credential mapping
+        tool_to_cred: dict[str, str] = {}
+        for cred_name, spec in CREDENTIAL_SPECS.items():
+            for tool_name in spec.tools:
+                tool_to_cred[tool_name] = cred_name
+
+        # Find missing credentials
+        cred_errors = []
+        checked: set[str] = set()
+        for tool_name in tools_list:
+            cred_name = tool_to_cred.get(tool_name)
+            if cred_name is None or cred_name in checked:
+                continue
+            checked.add(cred_name)
+            spec = CREDENTIAL_SPECS[cred_name]
+            cred_id = spec.credential_id or cred_name
+            if spec.required and not store.is_available(cred_id):
                affected_tools = [t for t in tools_list if t in spec.tools]
                cred_errors.append(
                    {
@@ -476,15 +490,16 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
                    }
                )

+        if cred_errors:
            return {
                "valid": False,
                "errors": [f"Missing credentials for tools: {[e['env_var'] for e in cred_errors]}"],
                "missing_credentials": cred_errors,
-                "action_required": "Add the credentials to your .env file and retry",
+                "action_required": "Store credentials via store_credential and retry",
                "example": f"Add to .env:\n{cred_errors[0]['env_var']}=your_key_here",
                "message": (
                    "Cannot add node: missing API credentials. "
-                    "Add them to .env and retry this command."
+                    "Store them via store_credential and retry this command."
                ),
            }
    except ImportError as e:
@@ -492,7 +507,7 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
        return {
            "valid": True,
            "warnings": [
-                f"⚠️ Credential validation SKIPPED: aden_tools not available ({e}). "
+                f"Credential validation SKIPPED: aden_tools not available ({e}). "
                "Tools may fail at runtime if credentials are missing. "
                "Add tools/src to PYTHONPATH to enable validation."
            ],
@@ -501,6 +516,36 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
    return None


+def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
+    """
+    Validate and normalize agent_path.
+
+    Returns:
+        (Path, None) if valid
+        (None, error_json) if invalid
+    """
+    if not agent_path:
+        return None, json.dumps(
+            {
+                "success": False,
+                "error": "agent_path is required (e.g., 'exports/my_agent')",
+            }
+        )
+
+    path = Path(agent_path)
+
+    if not path.exists():
+        return None, json.dumps(
+            {
+                "success": False,
+                "error": f"Agent path not found: {path}",
+                "hint": "Run export_graph to create an agent in exports/ first",
+            }
+        )
+
+    return path, None
+
+
@mcp.tool()
 def add_node(
    node_id: Annotated[str, "Unique identifier for the node"],
@@ -1474,13 +1519,13 @@ def export_graph() -> str:

    # Write agent.json
    agent_json_path = exports_dir / "agent.json"
-    with open(agent_json_path, "w") as f:
+    with atomic_write(agent_json_path) as f:
        json.dump(export_data, f, indent=2, default=str)

    # Generate README.md
    readme_content = _generate_readme(session, export_data, all_tools)
    readme_path = exports_dir / "README.md"
-    with open(readme_path, "w") as f:
+    with atomic_write(readme_path) as f:
        f.write(readme_content)

    # Write mcp_servers.json if MCP servers are configured
@@ -1489,8 +1534,9 @@ def export_graph() -> str:
    if session.mcp_servers:
        mcp_config = {"servers": session.mcp_servers}
        mcp_servers_path = exports_dir / "mcp_servers.json"
-        with open(mcp_servers_path, "w") as f:
+        with atomic_write(mcp_servers_path) as f:
            json.dump(mcp_config, f, indent=2)
+
        mcp_servers_size = mcp_servers_path.stat().st_size

    # Get file sizes
@@ -2581,10 +2627,11 @@ def generate_constraint_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    agent_module = _get_agent_module_from_path(agent_path)
+    agent_module = _get_agent_module_from_path(path)

    # Format constraints for display
    constraints_formatted = (
@@ -2603,9 +2650,9 @@ def generate_constraint_tests(
    return json.dumps(
        {
            "goal_id": goal_id,
-            "agent_path": agent_path,
+            "agent_path": str(path),
            "agent_module": agent_module,
-            "output_file": f"{agent_path}/tests/test_constraints.py",
+            "output_file": f"{str(path)}/tests/test_constraints.py",
            "constraints": [c.model_dump() for c in goal.constraints] if goal.constraints else [],
            "constraints_formatted": constraints_formatted,
            "test_guidelines": {
@@ -2661,10 +2708,11 @@ def generate_success_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    agent_module = _get_agent_module_from_path(agent_path)
+    agent_module = _get_agent_module_from_path(path)

    # Parse node/tool names for context
    nodes = [n.strip() for n in node_names.split(",") if n.strip()]
@@ -2689,9 +2737,9 @@ def generate_success_tests(
    return json.dumps(
        {
            "goal_id": goal_id,
-            "agent_path": agent_path,
+            "agent_path": str(path),
            "agent_module": agent_module,
-            "output_file": f"{agent_path}/tests/test_success_criteria.py",
+            "output_file": f"{str(path)}/tests/test_success_criteria.py",
            "success_criteria": [c.model_dump() for c in goal.success_criteria]
            if goal.success_criteria
            else [],
@@ -2750,7 +2798,11 @@ def run_tests(
    import re
    import subprocess

-    tests_dir = Path(agent_path) / "tests"
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err
+
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -2941,10 +2993,11 @@ def debug_test(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    tests_dir = Path(agent_path) / "tests"
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -3085,10 +3138,11 @@ def list_tests(
    if not agent_path and _session:
        agent_path = f"exports/{_session.name}"

-    if not agent_path:
-        return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
+    path, err = _validate_agent_path(agent_path)
+    if err:
+        return err

-    tests_dir = Path(agent_path) / "tests"
+    tests_dir = path / "tests"

    if not tests_dir.exists():
        return json.dumps(
@@ -3229,9 +3283,273 @@ def load_exported_plan(
        return json.dumps({"success": False, "error": str(e)})


+# =============================================================================
+# CREDENTIAL STORE TOOLS
+# =============================================================================
+
+
+def _get_credential_store():
+    """Get a CredentialStore that checks encrypted files and env vars.
+
+    Uses CompositeStorage: encrypted file storage (primary) with env var fallback.
+    This ensures credentials stored via `store_credential` AND env vars are both found.
+    """
+    from framework.credentials import CredentialStore
+    from framework.credentials.storage import CompositeStorage, EncryptedFileStorage, EnvVarStorage
+
+    # Build env var mapping from CREDENTIAL_SPECS for the fallback
+    env_mapping: dict[str, str] = {}
+    try:
+        from aden_tools.credentials import CREDENTIAL_SPECS
+
+        for name, spec in CREDENTIAL_SPECS.items():
+            cred_id = spec.credential_id or name
+            env_mapping[cred_id] = spec.env_var
+    except ImportError:
+        pass
+
+    storage = CompositeStorage(
+        primary=EncryptedFileStorage(),
+        fallbacks=[EnvVarStorage(env_mapping=env_mapping)],
+    )
+    return CredentialStore(storage=storage)
+
+
+@mcp.tool()
+def check_missing_credentials(
+    agent_path: Annotated[str, "Path to the exported agent directory (e.g., 'exports/my-agent')"],
+) -> str:
+    """
+    Detect missing credentials for an agent by inspecting its tools and node types.
+
+    Returns a list of missing credentials with env var names, descriptions, and help URLs.
+    Use this before running or testing an agent to identify what needs to be configured.
+    """
+    try:
+        from aden_tools.credentials import CREDENTIAL_SPECS
+
+        from framework.runner import AgentRunner
+
+        runner = AgentRunner.load(agent_path)
+        runner.validate()
+
+        store = _get_credential_store()
+        info = runner.info()
+        node_types = list({node.node_type for node in runner.graph.nodes})
+
+        # Build reverse mappings: tool/node_type -> credential name
+        tool_to_cred: dict[str, str] = {}
+        node_type_to_cred: dict[str, str] = {}
+        for cred_name, spec in CREDENTIAL_SPECS.items():
+            for tool_name in spec.tools:
+                tool_to_cred[tool_name] = cred_name
+            for nt in spec.node_types:
+                node_type_to_cred[nt] = cred_name
+
+        # Gather missing credentials (tools + node types), deduplicated
+        seen: set[str] = set()
+        all_missing = []
+
+        for name_list, mapping in [
+            (info.required_tools, tool_to_cred),
+            (node_types, node_type_to_cred),
+        ]:
+            for item_name in name_list:
+                cred_name = mapping.get(item_name)
+                if cred_name is None or cred_name in seen:
+                    continue
+                seen.add(cred_name)
+                spec = CREDENTIAL_SPECS[cred_name]
+                cred_id = spec.credential_id or cred_name
+                if spec.required and not store.is_available(cred_id):
+                    all_missing.append(
+                        {
+                            "credential_name": cred_name,
+                            "env_var": spec.env_var,
+                            "description": spec.description,
+                            "help_url": spec.help_url,
+                            "tools": spec.tools,
+                        }
+                    )
+
+        # Also check what's already set
+        available = []
+        for name, spec in CREDENTIAL_SPECS.items():
+            if name in seen:
+                continue
+            cred_id = spec.credential_id or name
+            if store.is_available(cred_id):
+                relevant_tools = [t for t in spec.tools if t in info.required_tools]
+                relevant_nodes = [n for n in spec.node_types if n in node_types]
+                if relevant_tools or relevant_nodes:
+                    available.append(
+                        {
+                            "credential_name": name,
+                            "env_var": spec.env_var,
+                            "description": spec.description,
+                            "status": "available",
+                        }
+                    )
+
+        return json.dumps(
+            {
+                "agent": agent_path,
+                "missing": all_missing,
+                "available": available,
+                "total_missing": len(all_missing),
+                "ready": len(all_missing) == 0,
+            },
+            indent=2,
+        )
+    except Exception as e:
+        return json.dumps({"error": str(e)})
+
+
+@mcp.tool()
+def store_credential(
+    credential_name: Annotated[
+        str, "Logical credential name (e.g., 'hubspot', 'brave_search', 'anthropic')"
+    ],
+    credential_value: Annotated[str, "The secret value to store (API key, token, etc.)"],
+    key_name: Annotated[
+        str, "Key name within the credential (e.g., 'api_key', 'access_token')"
+    ] = "api_key",
+    display_name: Annotated[str, "Human-readable name (e.g., 'HubSpot Access Token')"] = "",
+) -> str:
+    """
+    Store a credential securely in the local encrypted store at ~/.hive/credentials.
+
+    Uses Fernet encryption (AES-128-CBC + HMAC). Requires HIVE_CREDENTIAL_KEY env var.
+    """
+    try:
+        from pydantic import SecretStr
+
+        from framework.credentials import CredentialKey, CredentialObject
+
+        store = _get_credential_store()
+
+        if not display_name:
+            display_name = credential_name.replace("_", " ").title()
+
+        cred = CredentialObject(
+            id=credential_name,
+            name=display_name,
+            keys={
+                key_name: CredentialKey(
+                    name=key_name,
+                    value=SecretStr(credential_value),
+                )
+            },
+        )
+        store.save_credential(cred)
+
+        return json.dumps(
+            {
+                "success": True,
+                "credential": credential_name,
+                "key": key_name,
+                "location": "~/.hive/credentials",
+                "encrypted": True,
+            }
+        )
+    except Exception as e:
+        return json.dumps({"success": False, "error": str(e)})
+
+
+@mcp.tool()
+def list_stored_credentials() -> str:
+    """
+    List all credentials currently stored in the local encrypted store.
+
+    Returns credential IDs and metadata (never returns secret values).
+    """
+    try:
+        store = _get_credential_store()
+        credential_ids = store.list_credentials()
+
+        credentials = []
+        for cred_id in credential_ids:
+            try:
+                cred = store.get_credential(cred_id)
+                credentials.append(
+                    {
+                        "id": cred.id,
+                        "name": cred.name,
+                        "keys": list(cred.keys.keys()),
+                        "created_at": cred.created_at.isoformat() if cred.created_at else None,
+                    }
+                )
+            except Exception:
+                credentials.append({"id": cred_id, "error": "Could not load"})
+
+        return json.dumps(
+            {
+                "count": len(credentials),
+                "credentials": credentials,
+                "location": "~/.hive/credentials",
+            },
+            indent=2,
+        )
+    except Exception as e:
+        return json.dumps({"error": str(e)})
+
+
+@mcp.tool()
+def delete_stored_credential(
+    credential_name: Annotated[str, "Logical credential name to delete (e.g., 'hubspot')"],
+) -> str:
+    """
+    Delete a credential from the local encrypted store.
+    """
+    try:
+        store = _get_credential_store()
+        deleted = store.delete_credential(credential_name)
+        return json.dumps(
+            {
+                "success": deleted,
+                "credential": credential_name,
+                "message": f"Credential '{credential_name}' deleted"
+                if deleted
+                else f"Credential '{credential_name}' not found",
+            }
+        )
+    except Exception as e:
+        return json.dumps({"success": False, "error": str(e)})
+
+
+@mcp.tool()
+def verify_credentials(
+    agent_path: Annotated[str, "Path to the exported agent directory (e.g., 'exports/my-agent')"],
+) -> str:
+    """
+    Verify that all required credentials are configured for an agent.
+
+    Runs the full validation pipeline and reports pass/fail status.
+    Use this after storing credentials to confirm the agent is ready to run.
+    """
+    try:
+        from framework.runner import AgentRunner
+
+        runner = AgentRunner.load(agent_path)
+        validation = runner.validate()
+
+        return json.dumps(
+            {
+                "agent": agent_path,
+                "ready": not validation.missing_credentials,
+                "missing_credentials": validation.missing_credentials,
+                "warnings": validation.warnings,
+                "errors": validation.errors,
+            },
+            indent=2,
+        )
+    except Exception as e:
+        return json.dumps({"error": str(e)})
+
+
 # =============================================================================
 # MAIN
 # =============================================================================

 if __name__ == "__main__":
-    mcp.run()
+    mcp.run(transport="stdio")
@@ -417,8 +417,9 @@ def cmd_list(args: argparse.Namespace) -> int:

    directory = Path(args.directory)
    if not directory.exists():
-        print(f"Directory not found: {directory}", file=sys.stderr)
-        return 1
+        # FIX: Handle missing directory gracefully on fresh install
+        print(f"No agents found in {directory}")
+        return 0

    agents = []
    for path in directory.iterdir():
@@ -458,7 +459,7 @@ def cmd_list(args: argparse.Namespace) -> int:
            print(f"  {agent['name']}")
            print(f"    Path: {agent['path']}")
            print(f"    Description: {agent['description']}")
-            print(f"    Steps: {agent['steps']}, Tools: {agent['tools']}")
+            print(f"    Nodes: {agent['nodes']}, Tools: {agent['tools']}")
            print()

    return 0
@@ -931,7 +932,10 @@ def _select_agent(agents_dir: Path) -> str | None:
    """Let user select an agent from available agents."""
    if not agents_dir.exists():
        print(f"Directory not found: {agents_dir}", file=sys.stderr)
-        return None
+        # fixes issue #696, creates an exports folder if it does not exist
+        agents_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Created directory: {agents_dir}", file=sys.stderr)
+        # return None

    agents = []
    for path in agents_dir.iterdir():
@@ -86,10 +86,14 @@ class MCPClient:
        """
        # If we have a persistent loop (for STDIO), use it
        if self._loop is not None:
-            future = asyncio.run_coroutine_threadsafe(coro, self._loop)
-            return future.result()
+            # Check if loop is running AND not closed
+            if self._loop.is_running() and not self._loop.is_closed():
+                future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+                return future.result()
+            # else: fall through to the standard approach below
+            # This handles the case when STDIO loop exists but is stopped/closed

-        # Otherwise, use the standard approach
+        # Standard approach: handle both sync and async contexts
        try:
            # Try to get the current event loop
            asyncio.get_running_loop()
@@ -400,19 +404,110 @@ class MCPClient:
        except Exception as e:
            raise RuntimeError(f"Failed to call tool via HTTP: {e}") from e

+    _CLEANUP_TIMEOUT = 10
+    _THREAD_JOIN_TIMEOUT = 12
+
+    async def _cleanup_stdio_async(self) -> None:
+        """Async cleanup for STDIO session and context managers.
+
+        Cleanup order is critical:
+        - The session must be closed BEFORE the stdio_context because the session
+          depends on the streams provided by stdio_context.
+        - This mirrors the initialization order in _connect_stdio(), where
+          stdio_context is entered first (providing streams), then the session is
+          created with those streams and entered.
+        - Do not change this ordering without carefully considering these dependencies.
+        """
+        # First: close session (depends on stdio_context streams)
+        try:
+            if self._session:
+                await self._session.__aexit__(None, None, None)
+        except asyncio.CancelledError:
+            logger.warning(
+                "MCP session cleanup was cancelled; proceeding with best-effort shutdown"
+            )
+        except Exception as e:
+            logger.warning(f"Error closing MCP session: {e}")
+        finally:
+            self._session = None
+
+        # Second: close stdio_context (provides the underlying streams)
+        try:
+            if self._stdio_context:
+                await self._stdio_context.__aexit__(None, None, None)
+        except asyncio.CancelledError:
+            logger.warning(
+                "STDIO context cleanup was cancelled; proceeding with best-effort shutdown"
+            )
+        except Exception as e:
+            logger.warning(f"Error closing STDIO context: {e}")
+        finally:
+            self._stdio_context = None
+
    def disconnect(self) -> None:
        """Disconnect from the MCP server."""
        # Clean up persistent STDIO connection
        if self._loop is not None:
-            # Stop event loop - this will cause context managers to clean up naturally
-            if self._loop and self._loop.is_running():
-                self._loop.call_soon_threadsafe(self._loop.stop)
+            cleanup_attempted = False

-            # Wait for thread to finish
+            # Properly close session and context managers before stopping loop
+            # Note: There's an inherent race condition between checking is_running()
+            # and calling run_coroutine_threadsafe(). We handle this by catching
+            # any exceptions that may occur if the loop stops between these calls.
+            if self._loop.is_running():
+                try:
+                    cleanup_future = asyncio.run_coroutine_threadsafe(
+                        self._cleanup_stdio_async(), self._loop
+                    )
+                    cleanup_future.result(timeout=self._CLEANUP_TIMEOUT)
+                    cleanup_attempted = True
+                except TimeoutError:
+                    # Cleanup took too long - may indicate stuck resources or slow MCP server
+                    cleanup_attempted = True
+                    logger.warning(f"Async cleanup timed out after {self._CLEANUP_TIMEOUT} seconds")
+                except RuntimeError as e:
+                    # Likely: loop stopped between is_running() check and run_coroutine_threadsafe()
+                    cleanup_attempted = True
+                    logger.debug(f"Event loop stopped during async cleanup: {e}")
+                except Exception as e:
+                    # Cleanup was attempted but failed (e.g., error in _cleanup_stdio_async())
+                    cleanup_attempted = True
+                    logger.warning(f"Error during async cleanup: {e}")
+
+                # Now stop the event loop
+                try:
+                    self._loop.call_soon_threadsafe(self._loop.stop)
+                except RuntimeError:
+                    # Loop may have already stopped
+                    pass
+
+            if not cleanup_attempted:
+                # Fallback: loop exists but is not running (e.g., crashed or stopped externally).
+                # At this point the loop and associated resources are in an undefined state.
+                # The context managers (_session, _stdio_context) were created in the loop's
+                # thread and may not be safely cleanable from here. Just log and proceed
+                # with reference clearing - the OS will reclaim resources on process exit.
+                logger.warning(
+                    "Event loop for STDIO MCP connection exists but is not running; "
+                    "skipping async cleanup. Resources may not be fully released."
+                )
+
+            # Wait for thread to finish (timeout proportional to cleanup timeout)
            if self._loop_thread and self._loop_thread.is_alive():
-                self._loop_thread.join(timeout=2)
+                self._loop_thread.join(timeout=self._THREAD_JOIN_TIMEOUT)
+                if self._loop_thread.is_alive():
+                    logger.warning(
+                        "Event loop thread for STDIO MCP connection did not terminate "
+                        f"within {self._THREAD_JOIN_TIMEOUT}s; thread may still be running."
+                    )

-            # Clear references
+            # Clear remaining references
+            # Note: _session and _stdio_context may already be None if _cleanup_stdio_async()
+            # succeeded. This redundant assignment is intentional for safety in cases where:
+            # 1. Cleanup timed out or failed
+            # 2. Cleanup was skipped (loop not running)
+            # 3. CancelledError interrupted cleanup
+            # Setting None to None is safe and ensures clean state.
            self._session = None
            self._stdio_context = None
            self._read_stream = None
@@ -1,6 +1,7 @@
 """Agent Runner - loads and runs exported agents."""

 import json
+import logging
 import os
 from collections.abc import Callable
 from dataclasses import dataclass, field
@@ -23,6 +24,44 @@ if TYPE_CHECKING:
    from framework.runner.protocol import AgentMessage, CapabilityResponse


+logger = logging.getLogger(__name__)
+
+# Configuration paths
+HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
+
+
+def get_hive_config() -> dict[str, Any]:
+    """Load hive configuration from ~/.hive/configuration.json."""
+    if not HIVE_CONFIG_FILE.exists():
+        return {}
+    try:
+        with open(HIVE_CONFIG_FILE) as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def get_claude_code_token() -> str | None:
+    """
+    Get the OAuth token from Claude Code subscription.
+
+    Reads from ~/.claude/.credentials.json which is created by the
+    Claude Code CLI when users authenticate with their subscription.
+
+    Returns:
+        The access token if available, None otherwise.
+    """
+    if not CLAUDE_CREDENTIALS_FILE.exists():
+        return None
+    try:
+        with open(CLAUDE_CREDENTIALS_FILE) as f:
+            creds = json.load(f)
+        return creds.get("claudeAiOauth", {}).get("accessToken")
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
@dataclass
 class AgentInfo:
    """Information about an exported agent."""
@@ -372,25 +411,8 @@ class AgentRunner:
        return self._tool_registry.register_mcp_server(server_config)

    def _load_mcp_servers_from_config(self, config_path: Path) -> None:
-        """
-        Load and register MCP servers from a configuration file.
-
-        Args:
-            config_path: Path to mcp_servers.json file
-        """
-        try:
-            with open(config_path) as f:
-                config = json.load(f)
-
-            servers = config.get("servers", [])
-            for server_config in servers:
-                try:
-                    self._tool_registry.register_mcp_server(server_config)
-                except Exception as e:
-                    server_name = server_config.get("name", "unknown")
-                    print(f"Warning: Failed to register MCP server '{server_name}': {e}")
-        except Exception as e:
-            print(f"Warning: Failed to load MCP servers config from {config_path}: {e}")
+        """Load and register MCP servers from a configuration file."""
+        self._tool_registry.load_mcp_config(config_path)

    def set_approval_callback(self, callback: Callable) -> None:
        """
@@ -426,15 +448,32 @@ class AgentRunner:

            self._llm = MockLLMProvider(model=self.model)
        else:
-            # Detect required API key from model name
-            api_key_env = self._get_api_key_env_var(self.model)
-            if api_key_env and os.environ.get(api_key_env):
-                from framework.llm.litellm import LiteLLMProvider
+            from framework.llm.litellm import LiteLLMProvider

-                self._llm = LiteLLMProvider(model=self.model)
-            elif api_key_env:
-                print(f"Warning: {api_key_env} not set. LLM calls will fail.")
-                print(f"Set it with: export {api_key_env}=your-api-key")
+            # Check if Claude Code subscription is configured
+            config = get_hive_config()
+            llm_config = config.get("llm", {})
+            use_claude_code = llm_config.get("use_claude_code_subscription", False)
+
+            api_key = None
+            if use_claude_code:
+                # Get OAuth token from Claude Code subscription
+                api_key = get_claude_code_token()
+                if not api_key:
+                    print("Warning: Claude Code subscription configured but no token found.")
+                    print("Run 'claude' to authenticate, then try again.")
+
+            if api_key:
+                # Use Claude Code subscription token
+                self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
+            else:
+                # Fall back to environment variable
+                api_key_env = self._get_api_key_env_var(self.model)
+                if api_key_env and os.environ.get(api_key_env):
+                    self._llm = LiteLLMProvider(model=self.model)
+                elif api_key_env:
+                    print(f"Warning: {api_key_env} not set. LLM calls will fail.")
+                    print(f"Set it with: export {api_key_env}=your-api-key")

        # Get tools for executor/runtime
        tools = list(self._tool_registry.get_tools().values())
@@ -530,6 +569,10 @@ class AgentRunner:
        """
        Execute the agent with given input data.

+        Validates credentials before execution. If any required credentials
+        are missing, returns an error result with instructions on how to
+        provide them.
+
        For single-entry-point agents, this is the standard execution path.
        For multi-entry-point agents, you can optionally specify which entry point to use.

@@ -542,6 +585,20 @@ class AgentRunner:
        Returns:
            ExecutionResult with output, path, and metrics
        """
+        # Validate credentials before execution (fail-fast)
+        validation = self.validate()
+        if validation.missing_credentials:
+            error_lines = ["Cannot run agent: missing required credentials\n"]
+            for warning in validation.warnings:
+                if "Missing " in warning:
+                    error_lines.append(f"  {warning}")
+            error_lines.append("\nSet the required environment variables and re-run the agent.")
+            error_msg = "\n".join(error_lines)
+            return ExecutionResult(
+                success=False,
+                error=error_msg,
+            )
+
        if self._uses_async_entry_points:
            # Multi-entry-point mode: use AgentRuntime
            return await self._run_with_agent_runtime(
@@ -822,28 +879,66 @@ class AgentRunner:
            warnings.append(f"Missing tool implementations: {', '.join(missing_tools)}")

        # Check credentials for required tools and node types
+        # Uses CredentialStore (encrypted files + env var fallback)
        missing_credentials = []
        try:
-            from aden_tools.credentials import CredentialManager
+            from aden_tools.credentials import CREDENTIAL_SPECS

-            cred_manager = CredentialManager()
+            from framework.credentials import CredentialStore
+            from framework.credentials.storage import (
+                CompositeStorage,
+                EncryptedFileStorage,
+                EnvVarStorage,
+            )

-            # Check tool credentials (Tier 2)
-            missing_creds = cred_manager.get_missing_for_tools(info.required_tools)
-            for _, spec in missing_creds:
-                missing_credentials.append(spec.env_var)
-                affected_tools = [t for t in info.required_tools if t in spec.tools]
-                tools_str = ", ".join(affected_tools)
-                warning_msg = f"Missing {spec.env_var} for {tools_str}"
-                if spec.help_url:
-                    warning_msg += f"\n  Get it at: {spec.help_url}"
-                warnings.append(warning_msg)
+            # Build env mapping for fallback
+            env_mapping = {
+                (spec.credential_id or name): spec.env_var
+                for name, spec in CREDENTIAL_SPECS.items()
+            }
+            storage = CompositeStorage(
+                primary=EncryptedFileStorage(),
+                fallbacks=[EnvVarStorage(env_mapping=env_mapping)],
+            )
+            store = CredentialStore(storage=storage)
+
+            # Build reverse mappings
+            tool_to_cred: dict[str, str] = {}
+            node_type_to_cred: dict[str, str] = {}
+            for cred_name, spec in CREDENTIAL_SPECS.items():
+                for tool_name in spec.tools:
+                    tool_to_cred[tool_name] = cred_name
+                for nt in spec.node_types:
+                    node_type_to_cred[nt] = cred_name
+
+            # Check tool credentials
+            checked: set[str] = set()
+            for tool_name in info.required_tools:
+                cred_name = tool_to_cred.get(tool_name)
+                if cred_name is None or cred_name in checked:
+                    continue
+                checked.add(cred_name)
+                spec = CREDENTIAL_SPECS[cred_name]
+                cred_id = spec.credential_id or cred_name
+                if spec.required and not store.is_available(cred_id):
+                    missing_credentials.append(spec.env_var)
+                    affected_tools = [t for t in info.required_tools if t in spec.tools]
+                    tools_str = ", ".join(affected_tools)
+                    warning_msg = f"Missing {spec.env_var} for {tools_str}"
+                    if spec.help_url:
+                        warning_msg += f"\n  Get it at: {spec.help_url}"
+                    warnings.append(warning_msg)

            # Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
            node_types = list({node.node_type for node in self.graph.nodes})
-            missing_node_creds = cred_manager.get_missing_for_node_types(node_types)
-            for _, spec in missing_node_creds:
-                if spec.env_var not in missing_credentials:  # Avoid duplicates
+            for nt in node_types:
+                cred_name = node_type_to_cred.get(nt)
+                if cred_name is None or cred_name in checked:
+                    continue
+                checked.add(cred_name)
+                spec = CREDENTIAL_SPECS[cred_name]
+                cred_id = spec.credential_id or cred_name
+                if spec.required and not store.is_available(cred_id):
                    missing_credentials.append(spec.env_var)
                    affected_types = [t for t in node_types if t in spec.node_types]
                    types_str = ", ".join(affected_types)
@@ -158,7 +158,26 @@ class ToolRegistry:
                            )
                            result = executor_func(tool_use)
                            if isinstance(result, ToolResult):
-                                return json.loads(result.content) if result.content else {}
+                                # ToolResult.content is expected to be JSON, but tools may
+                                # sometimes return invalid JSON. Guard against crashes here
+                                # and surface a structured error instead.
+                                if not result.content:
+                                    return {}
+                                try:
+                                    return json.loads(result.content)
+                                except json.JSONDecodeError as e:
+                                    logger.warning(
+                                        "Tool '%s' returned invalid JSON: %s",
+                                        tool_name,
+                                        str(e),
+                                    )
+                                    return {
+                                        "error": (
+                                            f"Invalid JSON response from tool '{tool_name}': "
+                                            f"{str(e)}"
+                                        ),
+                                        "raw_content": result.content,
+                                    }
                            return result

                        return executor
@@ -238,6 +257,34 @@ class ToolRegistry:
        """
        self._session_context.update(context)

+    def load_mcp_config(self, config_path: Path) -> None:
+        """
+        Load and register MCP servers from a config file.
+
+        Resolves relative ``cwd`` paths against the config file's parent
+        directory so callers never need to handle path resolution themselves.
+
+        Args:
+            config_path: Path to an ``mcp_servers.json`` file.
+        """
+        try:
+            with open(config_path) as f:
+                config = json.load(f)
+        except Exception as e:
+            logger.warning(f"Failed to load MCP config from {config_path}: {e}")
+            return
+
+        base_dir = config_path.parent
+        for server_config in config.get("servers", []):
+            cwd = server_config.get("cwd")
+            if cwd and not Path(cwd).is_absolute():
+                server_config["cwd"] = str((base_dir / cwd).resolve())
+            try:
+                self.register_mcp_server(server_config)
+            except Exception as e:
+                name = server_config.get("name", "unknown")
+                logger.warning(f"Failed to register MCP server '{name}': {e}")
+
    def register_mcp_server(
        self,
        server_config: dict[str, Any],
@@ -290,11 +337,21 @@ class ToolRegistry:
                tool = self._convert_mcp_tool_to_framework_tool(mcp_tool)

                # Create executor that calls the MCP server
-                def make_mcp_executor(client_ref: MCPClient, tool_name: str, registry_ref):
+                def make_mcp_executor(
+                    client_ref: MCPClient,
+                    tool_name: str,
+                    registry_ref,
+                    tool_params: set[str],
+                ):
                    def executor(inputs: dict) -> Any:
                        try:
-                            # Inject session context for tools that need it
-                            merged_inputs = {**registry_ref._session_context, **inputs}
+                            # Only inject session context params the tool accepts
+                            filtered_context = {
+                                k: v
+                                for k, v in registry_ref._session_context.items()
+                                if k in tool_params
+                            }
+                            merged_inputs = {**filtered_context, **inputs}
                            result = client_ref.call_tool(tool_name, merged_inputs)
                            # MCP tools return content array, extract the result
                            if isinstance(result, list) and len(result) > 0:
@@ -308,10 +365,11 @@ class ToolRegistry:

                    return executor

+                tool_params = set(mcp_tool.input_schema.get("properties", {}).keys())
                self.register(
                    mcp_tool.name,
                    tool,
-                    make_mcp_executor(client, mcp_tool.name, self),
+                    make_mcp_executor(client, mcp_tool.name, self, tool_params),
                )
                count += 1

@@ -12,13 +12,13 @@ import logging
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass, field
 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 logger = logging.getLogger(__name__)


-class EventType(str, Enum):
+class EventType(StrEnum):
    """Types of events that can be published."""

    # Execution lifecycle
@@ -41,6 +41,28 @@ class EventType(str, Enum):
    STREAM_STARTED = "stream_started"
    STREAM_STOPPED = "stream_stopped"

+    # Node event-loop lifecycle
+    NODE_LOOP_STARTED = "node_loop_started"
+    NODE_LOOP_ITERATION = "node_loop_iteration"
+    NODE_LOOP_COMPLETED = "node_loop_completed"
+
+    # LLM streaming observability
+    LLM_TEXT_DELTA = "llm_text_delta"
+    LLM_REASONING_DELTA = "llm_reasoning_delta"
+
+    # Tool lifecycle
+    TOOL_CALL_STARTED = "tool_call_started"
+    TOOL_CALL_COMPLETED = "tool_call_completed"
+
+    # Client I/O (client_facing=True nodes only)
+    CLIENT_OUTPUT_DELTA = "client_output_delta"
+    CLIENT_INPUT_REQUESTED = "client_input_requested"
+
+    # Internal node observability (client_facing=False nodes)
+    NODE_INTERNAL_OUTPUT = "node_internal_output"
+    NODE_INPUT_BLOCKED = "node_input_blocked"
+    NODE_STALLED = "node_stalled"
+
    # Custom events
    CUSTOM = "custom"

@@ -51,6 +73,7 @@ class AgentEvent:

    type: EventType
    stream_id: str
+    node_id: str | None = None  # Which node emitted this event
    execution_id: str | None = None
    data: dict[str, Any] = field(default_factory=dict)
    timestamp: datetime = field(default_factory=datetime.now)
@@ -61,6 +84,7 @@ class AgentEvent:
        return {
            "type": self.type.value,
            "stream_id": self.stream_id,
+            "node_id": self.node_id,
            "execution_id": self.execution_id,
            "data": self.data,
            "timestamp": self.timestamp.isoformat(),
@@ -80,6 +104,7 @@ class Subscription:
    event_types: set[EventType]
    handler: EventHandler
    filter_stream: str | None = None  # Only receive events from this stream
+    filter_node: str | None = None  # Only receive events from this node
    filter_execution: str | None = None  # Only receive events from this execution


@@ -138,6 +163,7 @@ class EventBus:
        event_types: list[EventType],
        handler: EventHandler,
        filter_stream: str | None = None,
+        filter_node: str | None = None,
        filter_execution: str | None = None,
    ) -> str:
        """
@@ -147,6 +173,7 @@ class EventBus:
            event_types: Types of events to receive
            handler: Async function to call when event occurs
            filter_stream: Only receive events from this stream
+            filter_node: Only receive events from this node
            filter_execution: Only receive events from this execution

        Returns:
@@ -160,6 +187,7 @@ class EventBus:
            event_types=set(event_types),
            handler=handler,
            filter_stream=filter_stream,
+            filter_node=filter_node,
            filter_execution=filter_execution,
        )

@@ -218,6 +246,10 @@ class EventBus:
        if subscription.filter_stream and subscription.filter_stream != event.stream_id:
            return False

+        # Check node filter
+        if subscription.filter_node and subscription.filter_node != event.node_id:
+            return False
+
        # Check execution filter
        if subscription.filter_execution and subscription.filter_execution != event.execution_id:
            return False
@@ -359,6 +391,248 @@ class EventBus:
            )
        )

+    # === NODE EVENT-LOOP PUBLISHERS ===
+
+    async def emit_node_loop_started(
+        self,
+        stream_id: str,
+        node_id: str,
+        execution_id: str | None = None,
+        max_iterations: int | None = None,
+    ) -> None:
+        """Emit node loop started event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_STARTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"max_iterations": max_iterations},
+            )
+        )
+
+    async def emit_node_loop_iteration(
+        self,
+        stream_id: str,
+        node_id: str,
+        iteration: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node loop iteration event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_ITERATION,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"iteration": iteration},
+            )
+        )
+
+    async def emit_node_loop_completed(
+        self,
+        stream_id: str,
+        node_id: str,
+        iterations: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node loop completed event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_LOOP_COMPLETED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"iterations": iterations},
+            )
+        )
+
+    # === LLM STREAMING PUBLISHERS ===
+
+    async def emit_llm_text_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        snapshot: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit LLM text delta event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content, "snapshot": snapshot},
+            )
+        )
+
+    async def emit_llm_reasoning_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit LLM reasoning delta event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.LLM_REASONING_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content},
+            )
+        )
+
+    # === TOOL LIFECYCLE PUBLISHERS ===
+
+    async def emit_tool_call_started(
+        self,
+        stream_id: str,
+        node_id: str,
+        tool_use_id: str,
+        tool_name: str,
+        tool_input: dict[str, Any] | None = None,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit tool call started event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_STARTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "tool_use_id": tool_use_id,
+                    "tool_name": tool_name,
+                    "tool_input": tool_input or {},
+                },
+            )
+        )
+
+    async def emit_tool_call_completed(
+        self,
+        stream_id: str,
+        node_id: str,
+        tool_use_id: str,
+        tool_name: str,
+        result: str = "",
+        is_error: bool = False,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit tool call completed event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_COMPLETED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "tool_use_id": tool_use_id,
+                    "tool_name": tool_name,
+                    "result": result,
+                    "is_error": is_error,
+                },
+            )
+        )
+
+    # === CLIENT I/O PUBLISHERS ===
+
+    async def emit_client_output_delta(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        snapshot: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit client output delta event (client_facing=True nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.CLIENT_OUTPUT_DELTA,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content, "snapshot": snapshot},
+            )
+        )
+
+    async def emit_client_input_requested(
+        self,
+        stream_id: str,
+        node_id: str,
+        prompt: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit client input requested event (client_facing=True nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.CLIENT_INPUT_REQUESTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"prompt": prompt},
+            )
+        )
+
+    # === INTERNAL NODE PUBLISHERS ===
+
+    async def emit_node_internal_output(
+        self,
+        stream_id: str,
+        node_id: str,
+        content: str,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node internal output event (client_facing=False nodes)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_INTERNAL_OUTPUT,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"content": content},
+            )
+        )
+
+    async def emit_node_stalled(
+        self,
+        stream_id: str,
+        node_id: str,
+        reason: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node stalled event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_STALLED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"reason": reason},
+            )
+        )
+
+    async def emit_node_input_blocked(
+        self,
+        stream_id: str,
+        node_id: str,
+        prompt: str = "",
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit node input blocked event."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.NODE_INPUT_BLOCKED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={"prompt": prompt},
+            )
+        )
+
    # === QUERY OPERATIONS ===

    def get_history(
@@ -410,6 +684,7 @@ class EventBus:
        self,
        event_type: EventType,
        stream_id: str | None = None,
+        node_id: str | None = None,
        execution_id: str | None = None,
        timeout: float | None = None,
    ) -> AgentEvent | None:
@@ -419,6 +694,7 @@ class EventBus:
        Args:
            event_type: Type of event to wait for
            stream_id: Filter by stream
+            node_id: Filter by node
            execution_id: Filter by execution
            timeout: Maximum time to wait (seconds)

@@ -438,6 +714,7 @@ class EventBus:
            event_types=[event_type],
            handler=handler,
            filter_stream=stream_id,
+            filter_node=node_id,
            filter_execution=execution_id,
        )

@@ -11,13 +11,13 @@ import asyncio
 import logging
 import time
 from dataclasses import dataclass, field
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 logger = logging.getLogger(__name__)


-class IsolationLevel(str, Enum):
+class IsolationLevel(StrEnum):
    """State isolation level for concurrent executions."""

    ISOLATED = "isolated"  # Private state per execution
@@ -25,7 +25,7 @@ class IsolationLevel(str, Enum):
    SYNCHRONIZED = "synchronized"  # Shared with write locks (strong consistency)


-class StateScope(str, Enum):
+class StateScope(StrEnum):
    """Scope for state operations."""

    EXECUTION = "execution"  # Local to a single execution
@@ -10,13 +10,13 @@ This is MORE important than actions because:
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, computed_field


-class DecisionType(str, Enum):
+class DecisionType(StrEnum):
    """Types of decisions an agent can make."""

    TOOL_SELECTION = "tool_selection"  # Which tool to use
@@ -6,7 +6,7 @@ summaries and metrics that Builder needs to understand what happened.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field, computed_field
@@ -14,7 +14,7 @@ from pydantic import BaseModel, Field, computed_field
 from framework.schemas.decision import Decision, Outcome


-class RunStatus(str, Enum):
+class RunStatus(StrEnum):
    """Status of a run."""

    RUNNING = "running"
@@ -1,5 +1,6 @@
 """Storage backends for runtime data."""

 from framework.storage.backend import FileStorage
+from framework.storage.conversation_store import FileConversationStore

-__all__ = ["FileStorage"]
+__all__ = ["FileStorage", "FileConversationStore"]
@@ -9,6 +9,7 @@ import json
 from pathlib import Path

 from framework.schemas.run import Run, RunStatus, RunSummary
+from framework.utils.io import atomic_write


 class FileStorage:
@@ -46,19 +47,53 @@ class FileStorage:
        for d in dirs:
            d.mkdir(parents=True, exist_ok=True)

+    def _validate_key(self, key: str) -> None:
+        """
+        Validate key to prevent path traversal attacks.
+
+        Args:
+            key: The key to validate
+
+        Raises:
+            ValueError: If key contains path traversal or dangerous patterns
+        """
+        if not key or key.strip() == "":
+            raise ValueError("Key cannot be empty")
+
+        # Block path separators
+        if "/" in key or "\\" in key:
+            raise ValueError(f"Invalid key format: path separators not allowed in '{key}'")
+
+        # Block parent directory references
+        if ".." in key or key.startswith("."):
+            raise ValueError(f"Invalid key format: path traversal detected in '{key}'")
+
+        # Block absolute paths
+        if key.startswith("/") or (len(key) > 1 and key[1] == ":"):
+            raise ValueError(f"Invalid key format: absolute paths not allowed in '{key}'")
+
+        # Block null bytes (Unix path injection)
+        if "\x00" in key:
+            raise ValueError("Invalid key format: null bytes not allowed")
+
+        # Block other dangerous special characters
+        dangerous_chars = {"<", ">", "|", "&", "$", "`", "'", '"'}
+        if any(char in key for char in dangerous_chars):
+            raise ValueError(f"Invalid key format: contains dangerous characters in '{key}'")
+
    # === RUN OPERATIONS ===

    def save_run(self, run: Run) -> None:
        """Save a run to storage."""
        # Save full run using Pydantic's model_dump_json
        run_path = self.base_path / "runs" / f"{run.id}.json"
-        with open(run_path, "w") as f:
+        with atomic_write(run_path) as f:
            f.write(run.model_dump_json(indent=2))

        # Save summary
        summary = RunSummary.from_run(run)
        summary_path = self.base_path / "summaries" / f"{run.id}.json"
-        with open(summary_path, "w") as f:
+        with atomic_write(summary_path) as f:
            f.write(summary.model_dump_json(indent=2))

        # Update indexes
@@ -72,7 +107,7 @@ class FileStorage:
        run_path = self.base_path / "runs" / f"{run_id}.json"
        if not run_path.exists():
            return None
-        with open(run_path) as f:
+        with open(run_path, encoding="utf-8") as f:
            return Run.model_validate_json(f.read())

    def load_summary(self, run_id: str) -> RunSummary | None:
@@ -85,7 +120,7 @@ class FileStorage:
                return RunSummary.from_run(run)
            return None

-        with open(summary_path) as f:
+        with open(summary_path, encoding="utf-8") as f:
            return RunSummary.model_validate_json(f.read())

    def delete_run(self, run_id: str) -> bool:
@@ -140,29 +175,32 @@ class FileStorage:

    def _get_index(self, index_type: str, key: str) -> list[str]:
        """Get values from an index."""
+        self._validate_key(key)  # Prevent path traversal
        index_path = self.base_path / "indexes" / index_type / f"{key}.json"
        if not index_path.exists():
            return []
-        with open(index_path) as f:
+        with open(index_path, encoding="utf-8") as f:
            return json.load(f)

    def _add_to_index(self, index_type: str, key: str, value: str) -> None:
        """Add a value to an index."""
+        self._validate_key(key)  # Prevent path traversal
        index_path = self.base_path / "indexes" / index_type / f"{key}.json"
-        values = self._get_index(index_type, key)
+        values = self._get_index(index_type, key)  # Already validated in _get_index
        if value not in values:
            values.append(value)
-            with open(index_path, "w") as f:
-                json.dump(values, f)
+            with atomic_write(index_path) as f:
+                json.dump(values, f, indent=2)

    def _remove_from_index(self, index_type: str, key: str, value: str) -> None:
        """Remove a value from an index."""
+        self._validate_key(key)  # Prevent path traversal
        index_path = self.base_path / "indexes" / index_type / f"{key}.json"
-        values = self._get_index(index_type, key)
+        values = self._get_index(index_type, key)  # Already validated in _get_index
        if value in values:
            values.remove(value)
-            with open(index_path, "w") as f:
-                json.dump(values, f)
+            with atomic_write(index_path) as f:
+                json.dump(values, f, indent=2)

    # === UTILITY ===

@@ -167,14 +167,18 @@ class ConcurrentStorage:
            run: Run to save
            immediate: If True, save immediately (bypasses batching)
        """
+        # Invalidate summary cache since the run data is changing
+        # This ensures load_summary() fetches fresh data after the save
+        self._cache.pop(f"summary:{run.id}", None)
+
        if immediate or not self._running:
            await self._save_run_locked(run)
+            # Update cache only after successful immediate write
+            self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
        else:
+            # For batched writes, cache will be updated in _flush_batch after successful write
            await self._write_queue.put(("run", run))

-        # Update cache
-        self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
-
    async def _save_run_locked(self, run: Run) -> None:
        """Save a run with file locking, including index locks."""
        lock_key = f"run:{run.id}"
@@ -363,8 +367,12 @@ class ConcurrentStorage:
            try:
                if item_type == "run":
                    await self._save_run_locked(item)
+                    # Update cache only after successful batched write
+                    # This fixes the race condition where cache was updated before write completed
+                    self._cache[f"run:{item.id}"] = CacheEntry(item, time.time())
            except Exception as e:
                logger.error(f"Failed to save {item_type}: {e}")
+                # Cache is NOT updated on failure - prevents stale/inconsistent cache state

    async def _flush_pending(self) -> None:
        """Flush all pending writes."""
@@ -0,0 +1,114 @@
+"""File-per-part ConversationStore implementation.
+
+Each conversation part is stored as a separate JSON file under a
+``parts/`` subdirectory.  Meta and cursor are stored as ``meta.json``
+and ``cursor.json`` in the base directory.
+
+Directory layout::
+
+    {base_path}/
+        meta.json
+        cursor.json
+        parts/
+            0000000000.json
+            0000000001.json
+            ...
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import shutil
+from pathlib import Path
+from typing import Any
+
+
+class FileConversationStore:
+    """File-per-part ConversationStore.
+
+    Uses one JSON file per message part, with ``pathlib.Path`` for
+    cross-platform path handling and ``asyncio.to_thread`` for
+    non-blocking I/O.
+    """
+
+    def __init__(self, base_path: str | Path) -> None:
+        self._base = Path(base_path)
+        self._parts_dir = self._base / "parts"
+
+    # --- sync helpers --------------------------------------------------------
+
+    def _write_json(self, path: Path, data: dict) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump(data, f)
+
+    def _read_json(self, path: Path) -> dict | None:
+        if not path.exists():
+            return None
+        try:
+            with open(path, encoding="utf-8") as f:
+                return json.load(f)
+        except (json.JSONDecodeError, ValueError):
+            return None
+
+    # --- async wrapper -------------------------------------------------------
+
+    async def _run(self, fn, *args):
+        return await asyncio.to_thread(fn, *args)
+
+    # --- ConversationStore interface -----------------------------------------
+
+    async def write_part(self, seq: int, data: dict[str, Any]) -> None:
+        path = self._parts_dir / f"{seq:010d}.json"
+        await self._run(self._write_json, path, data)
+
+    async def read_parts(self) -> list[dict[str, Any]]:
+        def _read_all() -> list[dict[str, Any]]:
+            if not self._parts_dir.exists():
+                return []
+            files = sorted(self._parts_dir.glob("*.json"))
+            parts = []
+            for f in files:
+                data = self._read_json(f)
+                if data is not None:
+                    parts.append(data)
+            return parts
+
+        return await self._run(_read_all)
+
+    async def write_meta(self, data: dict[str, Any]) -> None:
+        await self._run(self._write_json, self._base / "meta.json", data)
+
+    async def read_meta(self) -> dict[str, Any] | None:
+        return await self._run(self._read_json, self._base / "meta.json")
+
+    async def write_cursor(self, data: dict[str, Any]) -> None:
+        await self._run(self._write_json, self._base / "cursor.json", data)
+
+    async def read_cursor(self) -> dict[str, Any] | None:
+        return await self._run(self._read_json, self._base / "cursor.json")
+
+    async def delete_parts_before(self, seq: int) -> None:
+        def _delete() -> None:
+            if not self._parts_dir.exists():
+                return
+            for f in self._parts_dir.glob("*.json"):
+                file_seq = int(f.stem)
+                if file_seq < seq:
+                    f.unlink()
+
+        await self._run(_delete)
+
+    async def close(self) -> None:
+        """No-op — no persistent handles for file-per-part storage."""
+        pass
+
+    async def destroy(self) -> None:
+        """Delete the entire base directory and all persisted data."""
+
+        def _destroy() -> None:
+            if self._base.exists():
+                shutil.rmtree(self._base)
+
+        await self._run(_destroy)
@@ -6,13 +6,13 @@ programmatic/MCP-based approval.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ApprovalAction(str, Enum):
+class ApprovalAction(StrEnum):
    """Actions a user can take on a generated test."""

    APPROVE = "approve"  # Accept as-is
@@ -20,11 +20,11 @@ from {agent_module} import default_agent


 def _get_api_key():
-    """Get API key from CredentialManager (Anthropic) or environment (Any)."""
-    # 1. Try CredentialManager for Anthropic (the only provider it currently supports)
+    """Get API key from CredentialStoreAdapter or environment."""
+    # 1. Try CredentialStoreAdapter for Anthropic
    try:
-        from aden_tools.credentials import CredentialManager
-        creds = CredentialManager()
+        from aden_tools.credentials import CredentialStoreAdapter
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except (ImportError, KeyError):
@@ -54,10 +54,10 @@ import pytest


 def _get_api_key():
-    """Get API key from CredentialManager (Anthropic) or environment (Any)."""
+    """Get API key from CredentialStoreAdapter or environment."""
    try:
-        from aden_tools.credentials import CredentialManager
-        creds = CredentialManager()
+        from aden_tools.credentials import CredentialStoreAdapter
+        creds = CredentialStoreAdapter.default()
        if creds.is_available("anthropic"):
            return creds.get("anthropic")
    except (ImportError, KeyError):
@@ -6,13 +6,13 @@ but require mandatory user approval before being stored.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ApprovalStatus(str, Enum):
+class ApprovalStatus(StrEnum):
    """Status of user approval for a generated test."""

    PENDING = "pending"  # Awaiting user review
@@ -21,7 +21,7 @@ class ApprovalStatus(str, Enum):
    REJECTED = "rejected"  # User declined (with reason)


-class TestType(str, Enum):
+class TestType(StrEnum):
    """Type of test based on what it validates."""

    __test__ = False  # Not a pytest test class
@@ -6,13 +6,13 @@ categorization for guiding iteration strategy.
 """

 from datetime import datetime
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class ErrorCategory(str, Enum):
+class ErrorCategory(StrEnum):
    """
    Category of test failure for guiding iteration.

@@ -65,7 +65,7 @@ class TestStorage:

        # Save full test
        test_path = goal_dir / f"{test.id}.json"
-        with open(test_path, "w") as f:
+        with open(test_path, "w", encoding="utf-8") as f:
            f.write(test.model_dump_json(indent=2))

        # Update indexes
@@ -79,7 +79,7 @@ class TestStorage:
        test_path = self.base_path / "tests" / goal_id / f"{test_id}.json"
        if not test_path.exists():
            return None
-        with open(test_path) as f:
+        with open(test_path, encoding="utf-8") as f:
            return Test.model_validate_json(f.read())

    def delete_test(self, goal_id: str, test_id: str) -> bool:
@@ -175,12 +175,12 @@ class TestStorage:
        # Save with timestamp
        timestamp = result.timestamp.strftime("%Y%m%d_%H%M%S")
        result_path = results_dir / f"{timestamp}.json"
-        with open(result_path, "w") as f:
+        with open(result_path, "w", encoding="utf-8") as f:
            f.write(result.model_dump_json(indent=2))

        # Update latest
        latest_path = results_dir / "latest.json"
-        with open(latest_path, "w") as f:
+        with open(latest_path, "w", encoding="utf-8") as f:
            f.write(result.model_dump_json(indent=2))

    def get_latest_result(self, test_id: str) -> TestResult | None:
@@ -188,7 +188,7 @@ class TestStorage:
        latest_path = self.base_path / "results" / test_id / "latest.json"
        if not latest_path.exists():
            return None
-        with open(latest_path) as f:
+        with open(latest_path, encoding="utf-8") as f:
            return TestResult.model_validate_json(f.read())

    def get_result_history(self, test_id: str, limit: int = 10) -> list[TestResult]:
@@ -204,7 +204,7 @@ class TestStorage:

        results = []
        for f in result_files:
-            with open(f) as file:
+            with open(f, encoding="utf-8") as file:
                results.append(TestResult.model_validate_json(file.read()))

        return results
@@ -216,7 +216,7 @@ class TestStorage:
        index_path = self.base_path / "indexes" / index_type / f"{key}.json"
        if not index_path.exists():
            return []
-        with open(index_path) as f:
+        with open(index_path, encoding="utf-8") as f:
            return json.load(f)

    def _add_to_index(self, index_type: str, key: str, value: str) -> None:
@@ -225,7 +225,7 @@ class TestStorage:
        values = self._get_index(index_type, key)
        if value not in values:
            values.append(value)
-            with open(index_path, "w") as f:
+            with open(index_path, "w", encoding="utf-8") as f:
                json.dump(values, f)

    def _remove_from_index(self, index_type: str, key: str, value: str) -> None:
@@ -234,7 +234,7 @@ class TestStorage:
        values = self._get_index(index_type, key)
        if value in values:
            values.remove(value)
-            with open(index_path, "w") as f:
+            with open(index_path, "w", encoding="utf-8") as f:
                json.dump(values, f)

    # === UTILITY ===
@@ -0,0 +1,17 @@
+import os
+from contextlib import contextmanager
+from pathlib import Path
+
+
+@contextmanager
+def atomic_write(path: Path, mode: str = "w", encoding: str = "utf-8"):
+    tmp_path = path.with_suffix(path.suffix + ".tmp")
+    try:
+        with open(tmp_path, mode, encoding=encoding) as f:
+            yield f
+            f.flush()
+            os.fsync(f.fileno())
+        tmp_path.replace(path)
+    except BaseException:
+        tmp_path.unlink(missing_ok=True)
+        raise
@@ -5,22 +5,25 @@ description = "Goal-driven agent runtime with Builder-friendly observability"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
-    "pydantic>=2.0",
-    "anthropic>=0.40.0",
-    "httpx>=0.27.0",
-    "litellm>=1.81.0",
-    "mcp>=1.0.0",
-    "fastmcp>=2.0.0",
-    "pytest>=8.0",
-    "pytest-asyncio>=0.23",
-    "pytest-xdist>=3.0",
+  "pydantic>=2.0",
+  "anthropic>=0.40.0",
+  "httpx>=0.27.0",
+  "litellm>=1.81.0",
+  "mcp>=1.0.0",
+  "fastmcp>=2.0.0",
+  "pytest>=8.0",
+  "pytest-asyncio>=0.23",
+  "pytest-xdist>=3.0",
+  "tools",
 ]

-[project.optional-dependencies]
-dev = [
-    "ruff>=0.1.0",
-    "mypy>=1.0",
-]
+# [project.optional-dependencies]
+
+[project.scripts]
+hive = "framework.cli:main"
+
+[tool.uv.sources]
+tools = { workspace = true }

 [build-system]
 requires = ["hatchling"]
@@ -34,16 +37,17 @@ target-version = "py311"
 line-length = 100

 lint.select = [
-  "B",   # bugbear errors
-  "C4",  # flake8-comprehensions errors
-  "E",   # pycodestyle errors
-  "F",   # pyflakes errors
-  "I",   # import sorting
-  "Q",   # flake8-quotes errors
-  "UP",  # py-upgrade
-  "W",   # pycodestyle warnings
+  "B", # bugbear errors
+  "C4", # flake8-comprehensions errors
+  "E", # pycodestyle errors
+  "F", # pyflakes errors
+  "I", # import sorting
+  "Q", # flake8-quotes errors
+  "UP", # py-upgrade
+  "W", # pycodestyle warnings
 ]

+lint.per-file-ignores."demos/*" = ["E501"]
 lint.isort.combine-as-imports = true
 lint.isort.known-first-party = ["framework"]
 lint.isort.section-order = [
@@ -52,4 +56,7 @@ lint.isort.section-order = [
  "third-party",
  "first-party",
  "local-folder",
-]
+]
+
+[dependency-groups]
+dev = ["ty>=0.0.13", "ruff>=0.14.14"]
@@ -1,10 +0,0 @@
-# Development dependencies
-r requirements.txt
-
-# Testing
-pytest>=8.0
-pytest-asyncio>=0.23
-
-# Linting & type checking
-ruff>=0.1.0
-mypy>=1.0
@@ -1,14 +0,0 @@
-# Core dependencies
-pydantic>=2.0
-anthropic>=0.40.0
-httpx>=0.27.0
-litellm>=1.81.0
-
-# MCP server dependencies
-mcp
-fastmcp
-
-# Testing (required for test framework)
-pytest>=8.0
-pytest-asyncio>=0.23
-pytest-xdist>=3.0
@@ -0,0 +1,128 @@
+"""Tests for the hive CLI entry point and path auto-configuration."""
+
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+from framework.cli import _configure_paths
+
+
+@pytest.fixture
+def project_root():
+    """Return the project root directory."""
+    return Path(__file__).resolve().parent.parent.parent
+
+
+class TestConfigurePaths:
+    """Test _configure_paths auto-discovers exports/ and core/."""
+
+    def test_adds_exports_to_sys_path(self, project_root):
+        exports_dir = project_root / "exports"
+        if not exports_dir.is_dir():
+            pytest.skip("exports/ directory does not exist in this environment")
+
+        exports_str = str(exports_dir)
+        # Remove if already present to test fresh addition
+        original_path = sys.path.copy()
+        sys.path = [p for p in sys.path if p != exports_str]
+
+        try:
+            _configure_paths()
+            assert exports_str in sys.path
+        finally:
+            sys.path = original_path
+
+    def test_adds_core_to_sys_path(self, project_root):
+        core_dir = project_root / "core"
+        core_str = str(core_dir)
+        original_path = sys.path.copy()
+        sys.path = [p for p in sys.path if p != core_str]
+
+        try:
+            _configure_paths()
+            assert core_str in sys.path
+        finally:
+            sys.path = original_path
+
+    def test_does_not_duplicate_paths(self):
+        _configure_paths()
+        # Call twice — should not create duplicates
+        before = sys.path.copy()
+        _configure_paths()
+        assert sys.path == before
+
+    def test_handles_missing_exports_gracefully(self):
+        """If exports/ doesn't exist, _configure_paths should not crash."""
+        _configure_paths()
+
+
+class TestFrameworkModule:
+    """Test ``python -m framework`` invocation (the underlying module)."""
+
+    def test_module_help(self, project_root):
+        """Verify ``python -m framework --help`` prints usage."""
+        result = subprocess.run(
+            [sys.executable, "-m", "framework", "--help"],
+            capture_output=True,
+            text=True,
+            cwd=str(project_root / "core"),
+        )
+        assert result.returncode == 0
+        assert "hive" in result.stdout.lower() or "goal" in result.stdout.lower()
+
+    def test_module_list_subcommand(self, project_root):
+        """Verify ``python -m framework list --help`` registers the subcommand."""
+        result = subprocess.run(
+            [sys.executable, "-m", "framework", "list", "--help"],
+            capture_output=True,
+            text=True,
+            cwd=str(project_root / "core"),
+        )
+        assert result.returncode == 0
+        assert "agents" in result.stdout.lower() or "directory" in result.stdout.lower()
+
+
+class TestHiveEntryPoint:
+    """Test the ``hive`` console_scripts entry point.
+
+    These tests verify the actual ``hive`` command installed by
+    ``pip install -e core/``. If the entry point is not installed,
+    the tests are skipped gracefully.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _require_hive(self):
+        if shutil.which("hive") is None:
+            pytest.skip("'hive' entry point not installed (run: pip install -e core/)")
+
+    def test_hive_help(self):
+        """Verify ``hive --help`` exits 0 and prints usage."""
+        result = subprocess.run(
+            ["hive", "--help"],
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode == 0
+        assert "run" in result.stdout.lower()
+        assert "validate" in result.stdout.lower()
+
+    def test_hive_list_help(self):
+        """Verify ``hive list --help`` exits 0."""
+        result = subprocess.run(
+            ["hive", "list", "--help"],
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode == 0
+
+    def test_hive_run_missing_agent(self):
+        """Verify ``hive run`` with a non-existent agent prints an error."""
+        result = subprocess.run(
+            ["hive", "run", "nonexistent_agent_xyz"],
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode != 0
@@ -0,0 +1,237 @@
+"""
+Tests for client-facing fan-out and event_loop output_key overlap validation.
+
+Validates two rules added to GraphSpec.validate():
+1. Fan-out must not have multiple client_facing=True targets.
+2. Parallel event_loop nodes must have disjoint output_keys.
+"""
+
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.node import NodeSpec
+
+# ---------------------------------------------------------------------------
+# Rule 1: client_facing fan-out
+# ---------------------------------------------------------------------------
+
+
+class TestClientFacingFanOut:
+    """Fan-out to multiple client_facing=True targets must be rejected."""
+
+    def test_fan_out_two_client_facing_fails(self):
+        """Two client-facing targets on the same fan-out -> error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(id="b", name="b", description="Node b", client_facing=True),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 1
+        assert "'src'" in cf_errors[0]
+
+    def test_fan_out_one_client_facing_passes(self):
+        """Only one client-facing target -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(id="b", name="b", description="Node b", client_facing=False),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 0
+
+    def test_fan_out_zero_client_facing_passes(self):
+        """No client-facing targets at all -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(id="a", name="a", description="Node a"),
+                NodeSpec(id="b", name="b", description="Node b"),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        assert len(cf_errors) == 0
+
+
+# ---------------------------------------------------------------------------
+# Rule 2: event_loop output_key overlap
+# ---------------------------------------------------------------------------
+
+
+class TestEventLoopOutputKeyOverlap:
+    """Parallel event_loop nodes with overlapping output_keys must be rejected."""
+
+    def test_overlapping_output_keys_event_loop_fails(self):
+        """Two event_loop nodes sharing an output_key -> error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="event_loop",
+                    output_keys=["status", "shared"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["result", "shared"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 1
+        assert "'shared'" in key_errors[0]
+
+    def test_disjoint_output_keys_event_loop_passes(self):
+        """Two event_loop nodes with disjoint output_keys -> no error."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="event_loop",
+                    output_keys=["status"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["result"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 0
+
+    def test_overlapping_keys_non_event_loop_no_error(self):
+        """Non-event_loop nodes with overlapping keys -> no error (last-wins OK)."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="src",
+            nodes=[
+                NodeSpec(id="src", name="src", description="Source node"),
+                NodeSpec(
+                    id="a",
+                    name="a",
+                    description="Node a",
+                    node_type="llm_generate",
+                    output_keys=["shared"],
+                ),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="llm_generate",
+                    output_keys=["shared"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(key_errors) == 0
+
+
+# ---------------------------------------------------------------------------
+# Baseline: no fan-out -> no errors from these rules
+# ---------------------------------------------------------------------------
+
+
+class TestNoFanOutUnaffected:
+    """Linear graphs should not trigger either validation rule."""
+
+    def test_no_fan_out_unaffected(self):
+        """Linear chain with client_facing and event_loop nodes -> no errors."""
+        graph = GraphSpec(
+            id="g1",
+            goal_id="goal1",
+            entry_node="a",
+            terminal_nodes=["c"],
+            nodes=[
+                NodeSpec(id="a", name="a", description="Node a", client_facing=True),
+                NodeSpec(
+                    id="b",
+                    name="b",
+                    description="Node b",
+                    node_type="event_loop",
+                    output_keys=["x"],
+                ),
+                NodeSpec(
+                    id="c",
+                    name="c",
+                    description="Node c",
+                    client_facing=True,
+                    node_type="event_loop",
+                    output_keys=["x"],
+                ),
+            ],
+            edges=[
+                EdgeSpec(id="a->b", source="a", target="b", condition=EdgeCondition.ON_SUCCESS),
+                EdgeSpec(id="b->c", source="b", target="c", condition=EdgeCondition.ON_SUCCESS),
+            ],
+        )
+
+        errors = graph.validate()
+        cf_errors = [e for e in errors if "multiple client-facing" in e]
+        key_errors = [e for e in errors if "output_key" in e]
+        assert len(cf_errors) == 0
+        assert len(key_errors) == 0
@@ -0,0 +1,150 @@
+"""
+Tests for ClientIO gateway (WP-9).
+
+Covers:
+- ActiveNodeClientIO: emit_output → output_stream round-trip, request_input, timeout
+- InertNodeClientIO: emit_output publishes NODE_INTERNAL_OUTPUT, request_input returns redirect
+- ClientIOGateway: factory creates correct variant
+"""
+
+import asyncio
+
+import pytest
+
+from framework.graph.client_io import (
+    ActiveNodeClientIO,
+    ClientIOGateway,
+    InertNodeClientIO,
+    NodeClientIO,
+)
+from framework.runtime.event_bus import AgentEvent, EventType
+
+_AGENT_EVENT_FIELDS = {"stream_id", "node_id", "execution_id", "correlation_id"}
+
+
+class MockEventBus:
+    """Lightweight stand-in for EventBus that records published events."""
+
+    def __init__(self) -> None:
+        self.events: list[AgentEvent] = []
+
+    async def _record(self, event_type: EventType, **kwargs) -> None:
+        agent_kwargs = {k: v for k, v in kwargs.items() if k in _AGENT_EVENT_FIELDS}
+        data = {k: v for k, v in kwargs.items() if k not in _AGENT_EVENT_FIELDS}
+        self.events.append(AgentEvent(type=event_type, **agent_kwargs, data=data))
+
+    async def emit_client_output_delta(self, **kwargs) -> None:
+        await self._record(EventType.CLIENT_OUTPUT_DELTA, **kwargs)
+
+    async def emit_client_input_requested(self, **kwargs) -> None:
+        await self._record(EventType.CLIENT_INPUT_REQUESTED, **kwargs)
+
+    async def emit_node_internal_output(self, **kwargs) -> None:
+        await self._record(EventType.NODE_INTERNAL_OUTPUT, **kwargs)
+
+    async def emit_node_input_blocked(self, **kwargs) -> None:
+        await self._record(EventType.NODE_INPUT_BLOCKED, **kwargs)
+
+
+# --- ActiveNodeClientIO tests ---
+
+
+@pytest.mark.asyncio
+async def test_active_emit_and_consume():
+    """emit_output → output_stream round-trip works correctly."""
+    bus = MockEventBus()
+    io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
+
+    await io.emit_output("Hello ")
+    await io.emit_output("World", is_final=True)
+
+    chunks = []
+    async for chunk in io.output_stream():
+        chunks.append(chunk)
+
+    assert chunks == ["Hello ", "World"]
+    assert len(bus.events) == 2
+    assert all(e.type == EventType.CLIENT_OUTPUT_DELTA for e in bus.events)
+    # Verify snapshot accumulates
+    assert bus.events[0].data["snapshot"] == "Hello "
+    assert bus.events[1].data["snapshot"] == "Hello World"
+
+
+@pytest.mark.asyncio
+async def test_active_request_input():
+    """request_input blocks until provide_input is called."""
+    bus = MockEventBus()
+    io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
+
+    async def fulfill_later():
+        await asyncio.sleep(0.01)
+        await io.provide_input("user says hi")
+
+    task = asyncio.create_task(fulfill_later())
+    result = await io.request_input(prompt="What?")
+    await task
+
+    assert result == "user says hi"
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.CLIENT_INPUT_REQUESTED
+    assert bus.events[0].data["prompt"] == "What?"
+
+
+@pytest.mark.asyncio
+async def test_active_request_input_timeout():
+    """request_input raises TimeoutError when timeout expires."""
+    io = ActiveNodeClientIO(node_id="n1")
+
+    with pytest.raises(TimeoutError):
+        await io.request_input(prompt="waiting", timeout=0.01)
+
+
+# --- InertNodeClientIO tests ---
+
+
+@pytest.mark.asyncio
+async def test_inert_emit_publishes_internal():
+    """InertNodeClientIO.emit_output publishes NODE_INTERNAL_OUTPUT."""
+    bus = MockEventBus()
+    io = InertNodeClientIO(node_id="n2", event_bus=bus)
+
+    await io.emit_output("internal log")
+
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.NODE_INTERNAL_OUTPUT
+    assert bus.events[0].data["content"] == "internal log"
+
+
+@pytest.mark.asyncio
+async def test_inert_request_input_returns_redirect():
+    """request_input returns a redirect string and publishes NODE_INPUT_BLOCKED."""
+    bus = MockEventBus()
+    io = InertNodeClientIO(node_id="n2", event_bus=bus)
+
+    result = await io.request_input(prompt="need data")
+
+    assert "internal processing node" in result
+    assert len(bus.events) == 1
+    assert bus.events[0].type == EventType.NODE_INPUT_BLOCKED
+    assert bus.events[0].data["prompt"] == "need data"
+
+
+# --- ClientIOGateway tests ---
+
+
+def test_gateway_creates_active_for_client_facing():
+    """ClientIOGateway.create_io returns ActiveNodeClientIO when client_facing=True."""
+    gateway = ClientIOGateway()
+    io = gateway.create_io(node_id="n1", client_facing=True)
+
+    assert isinstance(io, ActiveNodeClientIO)
+    assert isinstance(io, NodeClientIO)
+
+
+def test_gateway_creates_inert_for_internal():
+    """ClientIOGateway.create_io returns InertNodeClientIO when client_facing=False."""
+    gateway = ClientIOGateway()
+    io = gateway.create_io(node_id="n2", client_facing=False)
+
+    assert isinstance(io, InertNodeClientIO)
+    assert isinstance(io, NodeClientIO)
@@ -0,0 +1,162 @@
+"""Tests for ConcurrentStorage race condition and cache invalidation fixes."""
+
+import asyncio
+from pathlib import Path
+
+import pytest
+
+from framework.schemas.run import Run, RunMetrics, RunStatus
+from framework.storage.concurrent import ConcurrentStorage
+
+
+def create_test_run(
+    run_id: str, goal_id: str = "test-goal", status: RunStatus = RunStatus.RUNNING
+) -> Run:
+    """Create a minimal test Run object."""
+    return Run(
+        id=run_id,
+        goal_id=goal_id,
+        status=status,
+        narrative="Test run",
+        metrics=RunMetrics(
+            nodes_executed=[],
+        ),
+        decisions=[],
+        problems=[],
+    )
+
+
+@pytest.mark.asyncio
+async def test_cache_invalidation_on_save(tmp_path: Path):
+    """Test that summary cache is invalidated when a run is saved.
+
+    This tests the fix for the cache invalidation bug where load_summary()
+    would return stale data after a run was updated.
+    """
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-1"
+
+        # Create and save initial run
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=True)
+
+        # Load summary to populate the cache
+        summary = await storage.load_summary(run_id)
+        assert summary is not None
+        assert summary.status == RunStatus.RUNNING
+
+        # Update run with new status
+        run.status = RunStatus.COMPLETED
+        await storage.save_run(run, immediate=True)
+
+        # Load summary again - should get fresh data, not cached stale data
+        summary = await storage.load_summary(run_id)
+        assert summary is not None
+        assert summary.status == RunStatus.COMPLETED, (
+            "Summary cache should be invalidated on save - got stale data"
+        )
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_batched_write_cache_consistency(tmp_path: Path):
+    """Test that cache is only updated after successful batched write.
+
+    This tests the fix for the race condition where cache was updated
+    before the batched write completed.
+    """
+    storage = ConcurrentStorage(tmp_path, batch_interval=0.05)
+    await storage.start()
+
+    try:
+        run_id = "test-run-2"
+
+        # Save via batching (immediate=False)
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=False)
+
+        # Before batch flush, cache should NOT contain the run
+        # (This is the fix - previously cache was updated immediately)
+        cache_key = f"run:{run_id}"
+        assert cache_key not in storage._cache, (
+            "Cache should not be updated before batch is flushed"
+        )
+
+        # Wait for batch to flush
+        await asyncio.sleep(0.1)
+
+        # After batch flush, cache should contain the run
+        assert cache_key in storage._cache, "Cache should be updated after batch flush"
+
+        # Verify data on disk matches cache
+        loaded_run = await storage.load_run(run_id, use_cache=False)
+        assert loaded_run is not None
+        assert loaded_run.id == run_id
+        assert loaded_run.status == RunStatus.RUNNING
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_immediate_write_updates_cache(tmp_path: Path):
+    """Test that immediate writes still update cache correctly."""
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-3"
+
+        # Save with immediate=True
+        run = create_test_run(run_id, status=RunStatus.COMPLETED)
+        await storage.save_run(run, immediate=True)
+
+        # Cache should be updated immediately for immediate writes
+        cache_key = f"run:{run_id}"
+        assert cache_key in storage._cache, "Cache should be updated after immediate write"
+
+        # Verify cached value is correct
+        cached_run = storage._cache[cache_key].value
+        assert cached_run.id == run_id
+        assert cached_run.status == RunStatus.COMPLETED
+    finally:
+        await storage.stop()
+
+
+@pytest.mark.asyncio
+async def test_summary_cache_invalidated_on_multiple_saves(tmp_path: Path):
+    """Test that summary cache is invalidated on each save, not just the first."""
+    storage = ConcurrentStorage(tmp_path)
+    await storage.start()
+
+    try:
+        run_id = "test-run-4"
+
+        # First save
+        run = create_test_run(run_id, status=RunStatus.RUNNING)
+        await storage.save_run(run, immediate=True)
+
+        # Load summary to cache it
+        summary1 = await storage.load_summary(run_id)
+        assert summary1.status == RunStatus.RUNNING
+
+        # Second save with new status
+        run.status = RunStatus.RUNNING
+        await storage.save_run(run, immediate=True)
+
+        # Load summary - should be fresh
+        summary2 = await storage.load_summary(run_id)
+        assert summary2.status == RunStatus.RUNNING
+
+        # Third save with final status
+        run.status = RunStatus.COMPLETED
+        await storage.save_run(run, immediate=True)
+
+        # Load summary - should be fresh again
+        summary3 = await storage.load_summary(run_id)
+        assert summary3.status == RunStatus.COMPLETED
+    finally:
+        await storage.stop()
@@ -0,0 +1,326 @@
+"""Tests for ContextHandoff and HandoffContext."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from framework.graph.context_handoff import ContextHandoff, HandoffContext
+from framework.graph.conversation import NodeConversation
+from framework.llm.mock import MockLLMProvider
+from framework.llm.provider import LLMProvider, LLMResponse
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class SpyLLMProvider(MockLLMProvider):
+    """MockLLMProvider that records whether complete() was called."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.complete_called = False
+        self.complete_call_args: dict[str, Any] | None = None
+
+    def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
+        self.complete_called = True
+        self.complete_call_args = {"messages": messages, **kwargs}
+        return super().complete(messages, **kwargs)
+
+
+class FailingLLMProvider(LLMProvider):
+    """LLM provider that always raises."""
+
+    def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
+        raise RuntimeError("LLM unavailable")
+
+    def complete_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list,
+        tool_executor: Any,
+        max_iterations: int = 10,
+    ) -> LLMResponse:
+        raise RuntimeError("LLM unavailable")
+
+
+async def _build_conversation(*pairs: tuple[str, str]) -> NodeConversation:
+    """Build a NodeConversation from (user, assistant) message pairs."""
+    conv = NodeConversation()
+    for user_msg, assistant_msg in pairs:
+        await conv.add_user_message(user_msg)
+        await conv.add_assistant_message(assistant_msg)
+    return conv
+
+
+# ---------------------------------------------------------------------------
+# TestHandoffContext
+# ---------------------------------------------------------------------------
+
+
+class TestHandoffContext:
+    def test_instantiation(self) -> None:
+        hc = HandoffContext(
+            source_node_id="node_A",
+            summary="Summary text",
+            key_outputs={"result": "42"},
+            turn_count=3,
+            total_tokens_used=1200,
+        )
+        assert hc.source_node_id == "node_A"
+        assert hc.summary == "Summary text"
+        assert hc.key_outputs == {"result": "42"}
+        assert hc.turn_count == 3
+        assert hc.total_tokens_used == 1200
+
+    def test_field_access(self) -> None:
+        hc = HandoffContext(
+            source_node_id="n1",
+            summary="s",
+            key_outputs={},
+            turn_count=0,
+            total_tokens_used=0,
+        )
+        assert hc.key_outputs == {}
+
+
+# ---------------------------------------------------------------------------
+# TestExtractiveSummary
+# ---------------------------------------------------------------------------
+
+
+class TestExtractiveSummary:
+    @pytest.mark.asyncio
+    async def test_extractive_summary_includes_first_last(self) -> None:
+        conv = await _build_conversation(
+            ("hello", "First response here."),
+            ("continue", "Middle response."),
+            ("finish", "Final conclusion."),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="test_node")
+
+        assert "First response here." in hc.summary
+        assert "Final conclusion." in hc.summary
+
+    @pytest.mark.asyncio
+    async def test_extractive_summary_metadata(self) -> None:
+        conv = await _build_conversation(
+            ("hi", "hello"),
+            ("bye", "goodbye"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="node_42")
+
+        assert hc.source_node_id == "node_42"
+        assert hc.turn_count == 2
+        assert hc.total_tokens_used > 0
+
+    @pytest.mark.asyncio
+    async def test_extractive_with_output_keys_colon(self) -> None:
+        conv = await _build_conversation(
+            ("what is the answer?", "answer: 42"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["answer"])
+
+        assert hc.key_outputs["answer"] == "42"
+
+    @pytest.mark.asyncio
+    async def test_extractive_with_output_keys_equals(self) -> None:
+        conv = await _build_conversation(
+            ("compute", "result = success"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["result"])
+
+        assert hc.key_outputs["result"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_extractive_json_output_keys(self) -> None:
+        conv = await _build_conversation(
+            ("give me json", '{"score": 95, "grade": "A"}'),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
+
+        assert hc.key_outputs["score"] == "95"
+        assert hc.key_outputs["grade"] == "A"
+
+    @pytest.mark.asyncio
+    async def test_extractive_empty_conversation(self) -> None:
+        conv = NodeConversation()
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="empty")
+
+        assert hc.summary == "Empty conversation."
+        assert hc.turn_count == 0
+        assert hc.key_outputs == {}
+
+    @pytest.mark.asyncio
+    async def test_extractive_no_assistant_messages(self) -> None:
+        conv = NodeConversation()
+        await conv.add_user_message("hello?")
+        await conv.add_user_message("anyone there?")
+
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="silent")
+
+        assert hc.summary == "No assistant responses."
+
+    @pytest.mark.asyncio
+    async def test_extractive_most_recent_wins(self) -> None:
+        conv = await _build_conversation(
+            ("first", "status: old_value"),
+            ("second", "status: new_value"),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n", output_keys=["status"])
+
+        assert hc.key_outputs["status"] == "new_value"
+
+    @pytest.mark.asyncio
+    async def test_extractive_truncation(self) -> None:
+        long_text = "x" * 1000
+        conv = await _build_conversation(
+            ("go", long_text),
+        )
+        ch = ContextHandoff()
+        hc = ch.summarize_conversation(conv, node_id="n")
+
+        # Summary should be truncated to ~500 chars
+        assert len(hc.summary) <= 500
+
+
+# ---------------------------------------------------------------------------
+# TestLLMSummary
+# ---------------------------------------------------------------------------
+
+
+class TestLLMSummary:
+    @pytest.mark.asyncio
+    async def test_llm_summary_calls_provider(self) -> None:
+        llm = SpyLLMProvider()
+        conv = await _build_conversation(
+            ("hi", "hello back"),
+            ("what now?", "we are done"),
+        )
+        ch = ContextHandoff(llm=llm)
+        hc = ch.summarize_conversation(conv, node_id="llm_node")
+
+        assert llm.complete_called, "LLM complete() was never invoked"
+        assert hc.summary == "This is a mock response for testing purposes."
+
+    @pytest.mark.asyncio
+    async def test_llm_summary_includes_output_key_hint(self) -> None:
+        llm = SpyLLMProvider()
+        conv = await _build_conversation(
+            ("compute", '{"score": 95}'),
+        )
+        ch = ContextHandoff(llm=llm)
+        ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
+
+        assert llm.complete_call_args is not None
+        system = llm.complete_call_args.get("system", "")
+        assert "score" in system
+        assert "grade" in system
+
+    @pytest.mark.asyncio
+    async def test_llm_fallback_on_error(self) -> None:
+        llm = FailingLLMProvider()
+        conv = await _build_conversation(
+            ("start", "First assistant message."),
+            ("end", "Last assistant message."),
+        )
+        ch = ContextHandoff(llm=llm)
+        hc = ch.summarize_conversation(conv, node_id="fallback_node")
+
+        # Should fall back to extractive (first + last assistant messages)
+        assert "First assistant message." in hc.summary
+        assert "Last assistant message." in hc.summary
+
+
+# ---------------------------------------------------------------------------
+# TestFormatAsInput
+# ---------------------------------------------------------------------------
+
+
+class TestFormatAsInput:
+    def test_format_structure(self) -> None:
+        hc = HandoffContext(
+            source_node_id="analyzer",
+            summary="Analysis complete.",
+            key_outputs={"score": "95"},
+            turn_count=5,
+            total_tokens_used=2000,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "--- CONTEXT FROM: analyzer" in output
+        assert "KEY OUTPUTS:" in output
+        assert "SUMMARY:" in output
+        assert "--- END CONTEXT ---" in output
+
+    def test_format_no_key_outputs(self) -> None:
+        hc = HandoffContext(
+            source_node_id="simple",
+            summary="Done.",
+            key_outputs={},
+            turn_count=1,
+            total_tokens_used=100,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "KEY OUTPUTS:" not in output
+        assert "SUMMARY:" in output
+
+    def test_format_content_values(self) -> None:
+        hc = HandoffContext(
+            source_node_id="node_X",
+            summary="Found 3 bugs.",
+            key_outputs={"bugs": "3", "severity": "high"},
+            turn_count=7,
+            total_tokens_used=5000,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "node_X" in output
+        assert "7 turns" in output
+        assert "~5000 tokens" in output
+        assert "- bugs: 3" in output
+        assert "- severity: high" in output
+        assert "Found 3 bugs." in output
+
+    def test_format_empty_summary(self) -> None:
+        hc = HandoffContext(
+            source_node_id="n",
+            summary="",
+            key_outputs={},
+            turn_count=0,
+            total_tokens_used=0,
+        )
+        output = ContextHandoff.format_as_input(hc)
+
+        assert "No summary available." in output
+
+    @pytest.mark.asyncio
+    async def test_format_as_input_usable_as_message(self) -> None:
+        """Formatted output can be fed into a NodeConversation as a user message."""
+        hc = HandoffContext(
+            source_node_id="prev_node",
+            summary="Completed analysis.",
+            key_outputs={"result": "42"},
+            turn_count=3,
+            total_tokens_used=900,
+        )
+        text = ContextHandoff.format_as_input(hc)
+
+        conv = NodeConversation()
+        msg = await conv.add_user_message(text)
+
+        assert msg.role == "user"
+        assert "CONTEXT FROM: prev_node" in msg.content
+        assert conv.turn_count == 1
@@ -0,0 +1,906 @@
+"""WP-8: Tests for EventLoopNode, OutputAccumulator, LoopConfig, JudgeProtocol.
+
+Uses real FileConversationStore (no mocks for storage) and a MockStreamingLLM
+that yields pre-programmed StreamEvents to control the loop deterministically.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import AsyncIterator
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from framework.graph.conversation import NodeConversation
+from framework.graph.event_loop_node import (
+    EventLoopNode,
+    JudgeProtocol,
+    JudgeVerdict,
+    LoopConfig,
+    OutputAccumulator,
+)
+from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, SharedMemory
+from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
+from framework.llm.stream_events import (
+    FinishEvent,
+    StreamErrorEvent,
+    TextDeltaEvent,
+    ToolCallEvent,
+)
+from framework.runtime.core import Runtime
+from framework.runtime.event_bus import EventBus, EventType
+from framework.storage.conversation_store import FileConversationStore
+
+# ---------------------------------------------------------------------------
+# Mock LLM that yields pre-programmed stream events
+# ---------------------------------------------------------------------------
+
+
+class MockStreamingLLM(LLMProvider):
+    """Mock LLM that yields pre-programmed StreamEvent sequences.
+
+    Each call to stream() consumes the next scenario from the list.
+    Cycles back to the beginning if more calls are made than scenarios.
+    """
+
+    def __init__(self, scenarios: list[list] | None = None):
+        self.scenarios = scenarios or []
+        self._call_index = 0
+        self.stream_calls: list[dict] = []
+
+    async def stream(
+        self,
+        messages: list[dict[str, Any]],
+        system: str = "",
+        tools: list[Tool] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator:
+        self.stream_calls.append({"messages": messages, "system": system, "tools": tools})
+        if not self.scenarios:
+            return
+        events = self.scenarios[self._call_index % len(self.scenarios)]
+        self._call_index += 1
+        for event in events:
+            yield event
+
+    def complete(self, messages, system="", **kwargs) -> LLMResponse:
+        return LLMResponse(content="Summary of conversation.", model="mock", stop_reason="stop")
+
+    def complete_with_tools(self, messages, system, tools, tool_executor, **kwargs) -> LLMResponse:
+        return LLMResponse(content="", model="mock", stop_reason="stop")
+
+
+# ---------------------------------------------------------------------------
+# Helper: build a simple text-only scenario
+# ---------------------------------------------------------------------------
+
+
+def text_scenario(text: str, input_tokens: int = 10, output_tokens: int = 5) -> list:
+    """Build a stream scenario that produces text and finishes."""
+    return [
+        TextDeltaEvent(content=text, snapshot=text),
+        FinishEvent(
+            stop_reason="stop", input_tokens=input_tokens, output_tokens=output_tokens, model="mock"
+        ),
+    ]
+
+
+def tool_call_scenario(
+    tool_name: str,
+    tool_input: dict,
+    tool_use_id: str = "call_1",
+    text: str = "",
+) -> list:
+    """Build a stream scenario that produces a tool call."""
+    events = []
+    if text:
+        events.append(TextDeltaEvent(content=text, snapshot=text))
+    events.append(
+        ToolCallEvent(tool_use_id=tool_use_id, tool_name=tool_name, tool_input=tool_input)
+    )
+    events.append(
+        FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock")
+    )
+    return events
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def runtime():
+    rt = MagicMock(spec=Runtime)
+    rt.start_run = MagicMock(return_value="run_1")
+    rt.decide = MagicMock(return_value="dec_1")
+    rt.record_outcome = MagicMock()
+    rt.end_run = MagicMock()
+    rt.report_problem = MagicMock()
+    rt.set_node = MagicMock()
+    return rt
+
+
+@pytest.fixture
+def node_spec():
+    return NodeSpec(
+        id="test_loop",
+        name="Test Loop",
+        description="A test event loop node",
+        node_type="event_loop",
+        output_keys=["result"],
+        system_prompt="You are a test assistant.",
+    )
+
+
+@pytest.fixture
+def memory():
+    return SharedMemory()
+
+
+def build_ctx(runtime, node_spec, memory, llm, tools=None, input_data=None, goal_context=""):
+    """Build a NodeContext for testing."""
+    return NodeContext(
+        runtime=runtime,
+        node_id=node_spec.id,
+        node_spec=node_spec,
+        memory=memory,
+        input_data=input_data or {},
+        llm=llm,
+        available_tools=tools or [],
+        goal_context=goal_context,
+    )
+
+
+# ===========================================================================
+# NodeProtocol conformance
+# ===========================================================================
+
+
+class TestNodeProtocolConformance:
+    def test_subclasses_node_protocol(self):
+        """EventLoopNode must be a subclass of NodeProtocol."""
+        assert issubclass(EventLoopNode, NodeProtocol)
+
+    def test_has_execute_method(self):
+        node = EventLoopNode()
+        assert hasattr(node, "execute")
+        assert asyncio.iscoroutinefunction(node.execute)
+
+    def test_has_validate_input(self):
+        node = EventLoopNode()
+        assert hasattr(node, "validate_input")
+
+
+# ===========================================================================
+# Basic loop execution
+# ===========================================================================
+
+
+class TestBasicLoop:
+    @pytest.mark.asyncio
+    async def test_basic_text_only_implicit_accept(self, runtime, node_spec, memory):
+        """No tools, no judge. LLM produces text, implicit accept on stop."""
+        # Override to no output_keys so implicit judge accepts immediately
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("Hello world")])
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+
+        node = EventLoopNode(config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert result.tokens_used > 0
+
+    @pytest.mark.asyncio
+    async def test_no_llm_returns_failure(self, runtime, node_spec, memory):
+        """ctx.llm=None should return failure immediately."""
+        ctx = build_ctx(runtime, node_spec, memory, llm=None)
+
+        node = EventLoopNode()
+        result = await node.execute(ctx)
+
+        assert result.success is False
+        assert "LLM" in result.error
+
+    @pytest.mark.asyncio
+    async def test_max_iterations_failure(self, runtime, node_spec, memory):
+        """When max_iterations is reached without acceptance, should fail."""
+        # LLM always produces text but never calls set_output, so implicit
+        # judge retries asking for missing keys
+        llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+
+        node = EventLoopNode(config=LoopConfig(max_iterations=2))
+        result = await node.execute(ctx)
+
+        assert result.success is False
+        assert "Max iterations" in result.error
+
+
+# ===========================================================================
+# Judge integration
+# ===========================================================================
+
+
+class TestJudgeIntegration:
+    @pytest.mark.asyncio
+    async def test_judge_accept(self, runtime, node_spec, memory):
+        """Mock judge ACCEPT -> success."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("Done!")])
+
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        judge.evaluate.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_judge_escalate(self, runtime, node_spec, memory):
+        """Mock judge ESCALATE -> failure."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("Attempt")])
+
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(
+            return_value=JudgeVerdict(action="ESCALATE", feedback="Tone violation")
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is False
+        assert "escalated" in result.error.lower()
+        assert "Tone violation" in result.error
+
+    @pytest.mark.asyncio
+    async def test_judge_retry_then_accept(self, runtime, node_spec, memory):
+        """RETRY twice, then ACCEPT. Should run 3 iterations."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(
+            scenarios=[
+                text_scenario("attempt 1"),
+                text_scenario("attempt 2"),
+                text_scenario("attempt 3"),
+            ]
+        )
+
+        call_count = 0
+
+        async def evaluate_fn(context):
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                return JudgeVerdict(action="RETRY", feedback="Try harder")
+            return JudgeVerdict(action="ACCEPT")
+
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(side_effect=evaluate_fn)
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=10))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert call_count == 3
+
+
+# ===========================================================================
+# set_output tool
+# ===========================================================================
+
+
+class TestSetOutput:
+    @pytest.mark.asyncio
+    async def test_set_output_accumulates(self, runtime, node_spec, memory):
+        """LLM calls set_output -> values appear in NodeResult.output."""
+        llm = MockStreamingLLM(
+            scenarios=[
+                # Turn 1: call set_output
+                tool_call_scenario("set_output", {"key": "result", "value": "42"}),
+                # Turn 2: text response (triggers implicit judge)
+                text_scenario("Done, result is 42"),
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert result.output["result"] == "42"
+
+    @pytest.mark.asyncio
+    async def test_set_output_rejects_invalid_key(self, runtime, node_spec, memory):
+        """set_output with key not in output_keys -> is_error=True."""
+        llm = MockStreamingLLM(
+            scenarios=[
+                # Turn 1: call set_output with bad key
+                tool_call_scenario("set_output", {"key": "bad_key", "value": "x"}),
+                # Turn 2: call set_output with good key
+                tool_call_scenario("set_output", {"key": "result", "value": "ok"}),
+                # Turn 3: text done
+                text_scenario("Done"),
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert result.output["result"] == "ok"
+        assert "bad_key" not in result.output
+
+    @pytest.mark.asyncio
+    async def test_missing_keys_triggers_retry(self, runtime, node_spec, memory):
+        """Judge accepts but output keys are missing -> retry with hint."""
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
+
+        llm = MockStreamingLLM(
+            scenarios=[
+                # Turn 1: text without set_output -> judge accepts but keys missing -> retry
+                text_scenario("I'll get to it"),
+                # Turn 2: set_output
+                tool_call_scenario("set_output", {"key": "result", "value": "done"}),
+                # Turn 3: text -> judge accepts, keys present -> success
+                text_scenario("All done"),
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert result.output["result"] == "done"
+
+
+# ===========================================================================
+# Stall detection
+# ===========================================================================
+
+
+class TestStallDetection:
+    @pytest.mark.asyncio
+    async def test_stall_detection(self, runtime, node_spec, memory):
+        """3 identical responses should trigger stall detection."""
+        node_spec.output_keys = []  # so implicit judge would accept
+        # But we need the judge to RETRY so we actually get 3 identical responses
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="RETRY"))
+
+        llm = MockStreamingLLM(scenarios=[text_scenario("same answer")])
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(
+            judge=judge,
+            config=LoopConfig(max_iterations=10, stall_detection_threshold=3),
+        )
+        result = await node.execute(ctx)
+
+        assert result.success is False
+        assert "stalled" in result.error.lower()
+
+
+# ===========================================================================
+# EventBus lifecycle events
+# ===========================================================================
+
+
+class TestEventBusLifecycle:
+    @pytest.mark.asyncio
+    async def test_lifecycle_events_published(self, runtime, node_spec, memory):
+        """NODE_LOOP_STARTED, NODE_LOOP_ITERATION, NODE_LOOP_COMPLETED should be published."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("ok")])
+        bus = EventBus()
+
+        received_events = []
+        bus.subscribe(
+            event_types=[
+                EventType.NODE_LOOP_STARTED,
+                EventType.NODE_LOOP_ITERATION,
+                EventType.NODE_LOOP_COMPLETED,
+            ],
+            handler=lambda e: received_events.append(e.type),
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert EventType.NODE_LOOP_STARTED in received_events
+        assert EventType.NODE_LOOP_ITERATION in received_events
+        assert EventType.NODE_LOOP_COMPLETED in received_events
+
+    @pytest.mark.asyncio
+    async def test_client_facing_uses_client_output_delta(self, runtime, memory):
+        """client_facing=True should emit CLIENT_OUTPUT_DELTA instead of LLM_TEXT_DELTA."""
+        spec = NodeSpec(
+            id="ui_node",
+            name="UI Node",
+            description="Streams to user",
+            node_type="event_loop",
+            output_keys=[],
+            client_facing=True,
+        )
+        llm = MockStreamingLLM(scenarios=[text_scenario("visible to user")])
+        bus = EventBus()
+
+        received_types = []
+        bus.subscribe(
+            event_types=[EventType.CLIENT_OUTPUT_DELTA, EventType.LLM_TEXT_DELTA],
+            handler=lambda e: received_types.append(e.type),
+        )
+
+        ctx = build_ctx(runtime, spec, memory, llm)
+        node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+
+        # client_facing + text-only blocks for user input; use shutdown to unblock
+        async def auto_shutdown():
+            await asyncio.sleep(0.05)
+            node.signal_shutdown()
+
+        task = asyncio.create_task(auto_shutdown())
+        await node.execute(ctx)
+        await task
+
+        assert EventType.CLIENT_OUTPUT_DELTA in received_types
+        assert EventType.LLM_TEXT_DELTA not in received_types
+
+
+# ===========================================================================
+# Client-facing blocking
+# ===========================================================================
+
+
+class TestClientFacingBlocking:
+    """Tests for native client_facing input blocking in EventLoopNode."""
+
+    @pytest.fixture
+    def client_spec(self):
+        return NodeSpec(
+            id="chat",
+            name="Chat",
+            description="chat node",
+            node_type="event_loop",
+            output_keys=[],
+            client_facing=True,
+        )
+
+    @pytest.mark.asyncio
+    async def test_client_facing_blocks_on_text(self, runtime, memory, client_spec):
+        """client_facing + text-only response blocks until inject_event."""
+        llm = MockStreamingLLM(
+            scenarios=[
+                text_scenario("Hello!"),
+                text_scenario("Got your message."),
+            ]
+        )
+        bus = EventBus()
+        node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+        ctx = build_ctx(runtime, client_spec, memory, llm)
+
+        async def user_responds():
+            await asyncio.sleep(0.05)
+            await node.inject_event("I need help")
+            await asyncio.sleep(0.05)
+            node.signal_shutdown()
+
+        user_task = asyncio.create_task(user_responds())
+        result = await node.execute(ctx)
+        await user_task
+
+        assert result.success is True
+        # LLM should have been called at least twice (first response + after inject)
+        assert llm._call_index >= 2
+
+    @pytest.mark.asyncio
+    async def test_client_facing_does_not_block_on_tools(self, runtime, memory):
+        """client_facing + tool calls should NOT block — judge evaluates normally."""
+        spec = NodeSpec(
+            id="chat",
+            name="Chat",
+            description="chat node",
+            node_type="event_loop",
+            output_keys=["result"],
+            client_facing=True,
+        )
+        # Scenario 1: LLM calls set_output (tool call present → no blocking, judge RETRYs)
+        # Scenario 2: LLM produces text (implicit judge sees output key set → ACCEPT)
+        # But scenario 2 is text-only on client_facing → would block.
+        # So we need shutdown to handle that case.
+        llm = MockStreamingLLM(
+            scenarios=[
+                tool_call_scenario("set_output", {"key": "result", "value": "done"}),
+                text_scenario("All set!"),
+            ]
+        )
+        node = EventLoopNode(config=LoopConfig(max_iterations=5))
+        ctx = build_ctx(runtime, spec, memory, llm)
+
+        # After set_output, implicit judge RETRYs (tool calls present).
+        # Next turn: text-only on client_facing → blocks.
+        # But implicit judge should ACCEPT first (output key is set, no tools).
+        # Actually, client_facing check happens BEFORE judge, so it blocks.
+        # Use shutdown as safety net.
+        async def auto_shutdown():
+            await asyncio.sleep(0.1)
+            node.signal_shutdown()
+
+        task = asyncio.create_task(auto_shutdown())
+        result = await node.execute(ctx)
+        await task
+
+        assert result.success is True
+        assert result.output["result"] == "done"
+
+    @pytest.mark.asyncio
+    async def test_non_client_facing_unchanged(self, runtime, memory):
+        """client_facing=False should not block — existing behavior."""
+        spec = NodeSpec(
+            id="internal",
+            name="Internal",
+            description="internal node",
+            node_type="event_loop",
+            output_keys=[],
+        )
+        llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
+        node = EventLoopNode(config=LoopConfig(max_iterations=2))
+        ctx = build_ctx(runtime, spec, memory, llm)
+
+        # Should complete without blocking (implicit judge ACCEPTs on no tools + no keys)
+        result = await node.execute(ctx)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_signal_shutdown_unblocks(self, runtime, memory, client_spec):
+        """signal_shutdown should unblock a waiting client_facing node."""
+        llm = MockStreamingLLM(scenarios=[text_scenario("Waiting...")])
+        bus = EventBus()
+        node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=10))
+        ctx = build_ctx(runtime, client_spec, memory, llm)
+
+        async def shutdown_after_delay():
+            await asyncio.sleep(0.05)
+            node.signal_shutdown()
+
+        task = asyncio.create_task(shutdown_after_delay())
+        result = await node.execute(ctx)
+        await task
+
+        assert result.success is True
+
+    @pytest.mark.asyncio
+    async def test_client_input_requested_event_published(self, runtime, memory, client_spec):
+        """CLIENT_INPUT_REQUESTED should be published when blocking."""
+        llm = MockStreamingLLM(scenarios=[text_scenario("Hello!")])
+        bus = EventBus()
+        received = []
+
+        async def capture(e):
+            received.append(e)
+
+        bus.subscribe(
+            event_types=[EventType.CLIENT_INPUT_REQUESTED],
+            handler=capture,
+        )
+
+        node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
+        ctx = build_ctx(runtime, client_spec, memory, llm)
+
+        async def shutdown():
+            await asyncio.sleep(0.05)
+            node.signal_shutdown()
+
+        task = asyncio.create_task(shutdown())
+        await node.execute(ctx)
+        await task
+
+        assert len(received) >= 1
+        assert received[0].type == EventType.CLIENT_INPUT_REQUESTED
+
+
+# ===========================================================================
+# Tool execution
+# ===========================================================================
+
+
+class TestToolExecution:
+    @pytest.mark.asyncio
+    async def test_tool_execution_feedback(self, runtime, node_spec, memory):
+        """Tool call -> result fed back to conversation via stream loop."""
+        node_spec.output_keys = []
+
+        def my_tool_executor(tool_use: ToolUse) -> ToolResult:
+            return ToolResult(
+                tool_use_id=tool_use.id,
+                content=f"Result for {tool_use.name}",
+                is_error=False,
+            )
+
+        llm = MockStreamingLLM(
+            scenarios=[
+                # Turn 1: call a tool
+                tool_call_scenario("search", {"query": "test"}, tool_use_id="call_search"),
+                # Turn 2: text response after seeing tool result
+                text_scenario("Found the answer"),
+            ]
+        )
+
+        ctx = build_ctx(
+            runtime,
+            node_spec,
+            memory,
+            llm,
+            tools=[Tool(name="search", description="Search", parameters={})],
+        )
+        node = EventLoopNode(
+            tool_executor=my_tool_executor,
+            config=LoopConfig(max_iterations=5),
+        )
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        # stream() should have been called twice (tool call turn + final text turn)
+        assert llm._call_index >= 2
+
+
+# ===========================================================================
+# Write-through persistence with real FileConversationStore
+# ===========================================================================
+
+
+class TestWriteThroughPersistence:
+    @pytest.mark.asyncio
+    async def test_messages_written_to_store(self, tmp_path, runtime, node_spec, memory):
+        """Messages should be persisted immediately via write-through."""
+        store = FileConversationStore(tmp_path / "conv")
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("Hello")])
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(
+            conversation_store=store,
+            config=LoopConfig(max_iterations=5),
+        )
+        result = await node.execute(ctx)
+
+        assert result.success is True
+
+        # Verify parts were written to disk
+        parts = await store.read_parts()
+        assert len(parts) >= 2  # at least initial user msg + assistant msg
+
+    @pytest.mark.asyncio
+    async def test_output_accumulator_write_through(self, tmp_path, runtime, node_spec, memory):
+        """set_output values should be persisted in cursor immediately."""
+        store = FileConversationStore(tmp_path / "conv")
+        llm = MockStreamingLLM(
+            scenarios=[
+                tool_call_scenario("set_output", {"key": "result", "value": "persisted_value"}),
+                text_scenario("Done"),
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(
+            conversation_store=store,
+            config=LoopConfig(max_iterations=5),
+        )
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        assert result.output["result"] == "persisted_value"
+
+        # Verify output was written to cursor on disk
+        cursor = await store.read_cursor()
+        assert cursor is not None
+        assert cursor["outputs"]["result"] == "persisted_value"
+
+
+# ===========================================================================
+# Crash recovery (restore from real FileConversationStore)
+# ===========================================================================
+
+
+class TestCrashRecovery:
+    @pytest.mark.asyncio
+    async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, memory):
+        """Populate a store with state, then verify EventLoopNode restores from it."""
+        store = FileConversationStore(tmp_path / "conv")
+
+        # Simulate a previous run that wrote conversation + cursor
+        conv = NodeConversation(
+            system_prompt="You are a test assistant.",
+            output_keys=["result"],
+            store=store,
+        )
+        await conv.add_user_message("Initial input")
+        await conv.add_assistant_message("Working on it...")
+
+        # Write cursor with iteration and outputs
+        await store.write_cursor(
+            {
+                "iteration": 1,
+                "next_seq": conv.next_seq,
+                "outputs": {"result": "partial_value"},
+            }
+        )
+
+        # Now create a new EventLoopNode and execute -- it should restore
+        node_spec.output_keys = []  # no required keys so implicit accept works
+        llm = MockStreamingLLM(scenarios=[text_scenario("Continuing...")])
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(
+            conversation_store=store,
+            config=LoopConfig(max_iterations=5),
+        )
+        result = await node.execute(ctx)
+
+        assert result.success is True
+        # Should have the restored output
+        assert result.output.get("result") == "partial_value"
+
+
+# ===========================================================================
+# External event injection
+# ===========================================================================
+
+
+class TestEventInjection:
+    @pytest.mark.asyncio
+    async def test_inject_event(self, runtime, node_spec, memory):
+        """inject_event() content should appear as user message in next iteration."""
+        node_spec.output_keys = []
+
+        judge_calls = []
+
+        async def evaluate_fn(context):
+            judge_calls.append(context)
+            if len(judge_calls) >= 2:
+                return JudgeVerdict(action="ACCEPT")
+            return JudgeVerdict(action="RETRY")
+
+        judge = AsyncMock(spec=JudgeProtocol)
+        judge.evaluate = AsyncMock(side_effect=evaluate_fn)
+
+        llm = MockStreamingLLM(
+            scenarios=[
+                text_scenario("iteration 1"),
+                text_scenario("iteration 2"),
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(
+            judge=judge,
+            config=LoopConfig(max_iterations=5),
+        )
+
+        # Pre-inject an event before execute runs
+        await node.inject_event("Priority: CEO wants meeting rescheduled")
+
+        result = await node.execute(ctx)
+        assert result.success is True
+
+        # Verify the injected content made it into the LLM messages
+        all_messages = []
+        for call in llm.stream_calls:
+            all_messages.extend(call["messages"])
+        injected_found = any("[External event]" in str(m.get("content", "")) for m in all_messages)
+        assert injected_found
+
+
+# ===========================================================================
+# Pause/resume
+# ===========================================================================
+
+
+class TestPauseResume:
+    @pytest.mark.asyncio
+    async def test_pause_returns_early(self, runtime, node_spec, memory):
+        """pause_requested in input_data should trigger early return."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(scenarios=[text_scenario("should not run")])
+
+        ctx = build_ctx(
+            runtime,
+            node_spec,
+            memory,
+            llm,
+            input_data={"pause_requested": True},
+        )
+        node = EventLoopNode(config=LoopConfig(max_iterations=10))
+        result = await node.execute(ctx)
+
+        # Should return success (paused, not failed)
+        assert result.success is True
+        # LLM should not have been called (paused before first turn)
+        assert llm._call_index == 0
+
+
+# ===========================================================================
+# Stream errors
+# ===========================================================================
+
+
+class TestStreamErrors:
+    @pytest.mark.asyncio
+    async def test_non_recoverable_stream_error_raises(self, runtime, node_spec, memory):
+        """Non-recoverable StreamErrorEvent should raise RuntimeError."""
+        node_spec.output_keys = []
+        llm = MockStreamingLLM(
+            scenarios=[
+                [StreamErrorEvent(error="Connection lost", recoverable=False)],
+            ]
+        )
+
+        ctx = build_ctx(runtime, node_spec, memory, llm)
+        node = EventLoopNode(config=LoopConfig(max_iterations=5))
+
+        with pytest.raises(RuntimeError, match="Stream error"):
+            await node.execute(ctx)
+
+
+# ===========================================================================
+# OutputAccumulator unit tests
+# ===========================================================================
+
+
+class TestOutputAccumulator:
+    @pytest.mark.asyncio
+    async def test_set_and_get(self):
+        acc = OutputAccumulator()
+        await acc.set("key1", "value1")
+        assert acc.get("key1") == "value1"
+        assert acc.get("nonexistent") is None
+
+    @pytest.mark.asyncio
+    async def test_to_dict(self):
+        acc = OutputAccumulator()
+        await acc.set("a", 1)
+        await acc.set("b", 2)
+        assert acc.to_dict() == {"a": 1, "b": 2}
+
+    @pytest.mark.asyncio
+    async def test_has_all_keys(self):
+        acc = OutputAccumulator()
+        assert acc.has_all_keys([]) is True
+        assert acc.has_all_keys(["x"]) is False
+        await acc.set("x", "val")
+        assert acc.has_all_keys(["x"]) is True
+
+    @pytest.mark.asyncio
+    async def test_write_through_to_real_store(self, tmp_path):
+        """OutputAccumulator should write through to FileConversationStore cursor."""
+        store = FileConversationStore(tmp_path / "acc_test")
+        acc = OutputAccumulator(store=store)
+
+        await acc.set("result", "hello")
+
+        cursor = await store.read_cursor()
+        assert cursor["outputs"]["result"] == "hello"
+
+    @pytest.mark.asyncio
+    async def test_restore_from_real_store(self, tmp_path):
+        """OutputAccumulator.restore() should rebuild from FileConversationStore."""
+        store = FileConversationStore(tmp_path / "acc_restore")
+        await store.write_cursor({"outputs": {"key1": "val1", "key2": "val2"}})
+
+        acc = await OutputAccumulator.restore(store)
+        assert acc.get("key1") == "val1"
+        assert acc.get("key2") == "val2"
+        assert acc.has_all_keys(["key1", "key2"]) is True
@@ -0,0 +1,265 @@
+"""
+Tests for event_loop node type wiring (Issue #2513).
+
+Covers:
+- NodeSpec.client_facing field
+- event_loop in VALID_NODE_TYPES
+- _get_node_implementation() event_loop branch
+- no-retry enforcement in serial execution path
+"""
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import GraphExecutor
+from framework.graph.goal import Goal
+from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
+from framework.runtime.core import Runtime
+
+
+class AlwaysFailsNode(NodeProtocol):
+    """A test node that always fails."""
+
+    def __init__(self):
+        self.attempt_count = 0
+
+    async def execute(self, ctx: NodeContext) -> NodeResult:
+        self.attempt_count += 1
+        return NodeResult(success=False, error=f"Permanent error (attempt {self.attempt_count})")
+
+
+class SucceedsOnceNode(NodeProtocol):
+    """A test node that always succeeds."""
+
+    async def execute(self, ctx: NodeContext) -> NodeResult:
+        return NodeResult(success=True, output={"result": "ok"})
+
+
+@pytest.fixture(autouse=True)
+def fast_sleep(monkeypatch):
+    """Mock asyncio.sleep to avoid real delays from exponential backoff."""
+    monkeypatch.setattr("asyncio.sleep", AsyncMock())
+
+
+@pytest.fixture
+def runtime():
+    """Create a mock Runtime for testing."""
+    runtime = MagicMock(spec=Runtime)
+    runtime.start_run = MagicMock(return_value="test_run_id")
+    runtime.decide = MagicMock(return_value="test_decision_id")
+    runtime.record_outcome = MagicMock()
+    runtime.end_run = MagicMock()
+    runtime.report_problem = MagicMock()
+    runtime.set_node = MagicMock()
+    return runtime
+
+
+# --- NodeSpec.client_facing tests ---
+
+
+def test_client_facing_defaults_false():
+    """NodeSpec without client_facing should default to False."""
+    spec = NodeSpec(
+        id="n1",
+        name="Node 1",
+        description="test",
+        node_type="llm_generate",
+    )
+    assert spec.client_facing is False
+
+
+def test_client_facing_explicit_true():
+    """NodeSpec with client_facing=True should retain the value."""
+    spec = NodeSpec(
+        id="n1",
+        name="Node 1",
+        description="test",
+        node_type="event_loop",
+        client_facing=True,
+    )
+    assert spec.client_facing is True
+
+
+# --- VALID_NODE_TYPES tests ---
+
+
+def test_event_loop_in_valid_node_types():
+    """'event_loop' must be in GraphExecutor.VALID_NODE_TYPES."""
+    assert "event_loop" in GraphExecutor.VALID_NODE_TYPES
+
+
+def test_event_loop_node_spec_accepted():
+    """Creating a NodeSpec with node_type='event_loop' should not raise."""
+    spec = NodeSpec(
+        id="el1",
+        name="Event Loop",
+        description="test",
+        node_type="event_loop",
+    )
+    assert spec.node_type == "event_loop"
+
+
+# --- _get_node_implementation() tests ---
+
+
+def test_unregistered_event_loop_raises(runtime):
+    """An event_loop node not in the registry should raise RuntimeError."""
+    spec = NodeSpec(
+        id="el1",
+        name="Event Loop",
+        description="test",
+        node_type="event_loop",
+    )
+    executor = GraphExecutor(runtime=runtime)
+
+    with pytest.raises(RuntimeError, match="not found in registry"):
+        executor._get_node_implementation(spec)
+
+
+def test_registered_event_loop_returns_impl(runtime):
+    """A registered event_loop node should be returned from the registry."""
+    spec = NodeSpec(
+        id="el1",
+        name="Event Loop",
+        description="test",
+        node_type="event_loop",
+    )
+    impl = SucceedsOnceNode()
+    executor = GraphExecutor(runtime=runtime)
+    executor.register_node("el1", impl)
+
+    result = executor._get_node_implementation(spec)
+    assert result is impl
+
+
+# --- No-retry enforcement (serial path) ---
+
+
+@pytest.mark.asyncio
+async def test_event_loop_max_retries_forced_zero(runtime):
+    """An event_loop node with max_retries=3 should only execute once (no retry)."""
+    node_spec = NodeSpec(
+        id="el_fail",
+        name="Failing Event Loop",
+        description="event loop that fails",
+        node_type="event_loop",
+        max_retries=3,
+        output_keys=["result"],
+    )
+
+    graph = GraphSpec(
+        id="test_graph",
+        goal_id="test_goal",
+        name="Test Graph",
+        entry_node="el_fail",
+        nodes=[node_spec],
+        edges=[],
+        terminal_nodes=["el_fail"],
+    )
+
+    goal = Goal(id="test_goal", name="Test", description="test")
+
+    executor = GraphExecutor(runtime=runtime)
+    failing_node = AlwaysFailsNode()
+    executor.register_node("el_fail", failing_node)
+
+    result = await executor.execute(graph, goal, {})
+
+    # Event loop nodes get max_retries overridden to 0, meaning execute once then fail
+    assert not result.success
+    assert failing_node.attempt_count == 1
+
+
+@pytest.mark.asyncio
+async def test_event_loop_max_retries_zero_no_warning(runtime, caplog):
+    """An event_loop node with max_retries=0 should not log a warning."""
+    node_spec = NodeSpec(
+        id="el_zero",
+        name="Zero Retry Event Loop",
+        description="event loop with 0 retries",
+        node_type="event_loop",
+        max_retries=0,
+        output_keys=["result"],
+    )
+
+    graph = GraphSpec(
+        id="test_graph",
+        goal_id="test_goal",
+        name="Test Graph",
+        entry_node="el_zero",
+        nodes=[node_spec],
+        edges=[],
+        terminal_nodes=["el_zero"],
+    )
+
+    goal = Goal(id="test_goal", name="Test", description="test")
+
+    executor = GraphExecutor(runtime=runtime)
+    failing_node = AlwaysFailsNode()
+    executor.register_node("el_zero", failing_node)
+
+    import logging
+
+    with caplog.at_level(logging.WARNING):
+        await executor.execute(graph, goal, {})
+
+    # max_retries=0 should not trigger the override warning
+    assert "Overriding to 0" not in caplog.text
+
+
+@pytest.mark.asyncio
+async def test_event_loop_max_retries_positive_logs_warning(runtime, caplog):
+    """An event_loop node with max_retries=3 should log a warning about override."""
+    node_spec = NodeSpec(
+        id="el_warn",
+        name="Warning Event Loop",
+        description="event loop with retries",
+        node_type="event_loop",
+        max_retries=3,
+        output_keys=["result"],
+    )
+
+    graph = GraphSpec(
+        id="test_graph",
+        goal_id="test_goal",
+        name="Test Graph",
+        entry_node="el_warn",
+        nodes=[node_spec],
+        edges=[],
+        terminal_nodes=["el_warn"],
+    )
+
+    goal = Goal(id="test_goal", name="Test", description="test")
+
+    executor = GraphExecutor(runtime=runtime)
+    failing_node = AlwaysFailsNode()
+    executor.register_node("el_warn", failing_node)
+
+    import logging
+
+    with caplog.at_level(logging.WARNING):
+        await executor.execute(graph, goal, {})
+
+    assert "Overriding to 0" in caplog.text
+    assert "el_warn" in caplog.text
+
+
+# --- Existing node types unaffected ---
+
+
+def test_existing_node_types_unchanged():
+    """All pre-existing node types must still be in VALID_NODE_TYPES with defaults preserved."""
+    expected = {"llm_tool_use", "llm_generate", "router", "function", "human_input"}
+    assert expected.issubset(GraphExecutor.VALID_NODE_TYPES)
+
+    # Default node_type is still llm_tool_use
+    spec = NodeSpec(id="x", name="X", description="x")
+    assert spec.node_type == "llm_tool_use"
+
+    # Default max_retries is still 3
+    assert spec.max_retries == 3
+
+    # Default client_facing is False
+    assert spec.client_facing is False
@@ -0,0 +1,978 @@
+"""Tests for extending the stream event type system.
+
+Validates that the StreamEvent discriminated union pattern supports:
+- Type-based dispatch (matching on event.type)
+- Pattern matching / isinstance branching
+- Custom event subclasses following the same frozen-dataclass convention
+- Serialization of mixed event sequences
+
+WP-2 tests validate EventType enum extension and node-level event routing:
+- All 12 new EventType enum members with correct string values
+- node_id routing on AgentEvent
+- filter_node on Subscription
+- Backward compatibility with existing enum members
+"""
+
+import asyncio
+from dataclasses import FrozenInstanceError, asdict, dataclass, field
+from typing import Any, Literal
+
+import pytest
+
+from framework.llm.stream_events import (
+    FinishEvent,
+    ReasoningDeltaEvent,
+    ReasoningStartEvent,
+    StreamErrorEvent,
+    TextDeltaEvent,
+    TextEndEvent,
+    ToolCallEvent,
+    ToolResultEvent,
+)
+from framework.runtime.event_bus import AgentEvent, EventBus, EventType, Subscription
+
+
+# ---------------------------------------------------------------------------
+# Helpers: type-based dispatch
+# ---------------------------------------------------------------------------
+def dispatch_event(event) -> str:
+    """Dispatch an event by its type field, returning a label."""
+    handlers = {
+        "text_delta": lambda e: f"text:{e.content}",
+        "text_end": lambda e: f"end:{len(e.full_text)}chars",
+        "tool_call": lambda e: f"call:{e.tool_name}",
+        "tool_result": lambda e: f"result:{e.tool_use_id}",
+        "reasoning_start": lambda _: "reasoning:start",
+        "reasoning_delta": lambda e: f"reasoning:{e.content[:20]}",
+        "finish": lambda e: f"finish:{e.stop_reason}",
+        "error": lambda e: f"error:{e.error}",
+    }
+    handler = handlers.get(event.type)
+    if handler is None:
+        return f"unknown:{event.type}"
+    return handler(event)
+
+
+def collect_text(events: list) -> str:
+    """Accumulate full text from a stream of events."""
+    for event in reversed(events):
+        if isinstance(event, TextEndEvent):
+            return event.full_text
+        if isinstance(event, TextDeltaEvent):
+            return event.snapshot
+    return ""
+
+
+def extract_tool_calls(events: list) -> list[dict[str, Any]]:
+    """Extract tool call info from a stream of events."""
+    return [
+        {"id": e.tool_use_id, "name": e.tool_name, "input": e.tool_input}
+        for e in events
+        if isinstance(e, ToolCallEvent)
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Type-based dispatch tests
+# ---------------------------------------------------------------------------
+class TestTypeDispatch:
+    """Dispatch on event.type string for handler routing."""
+
+    def test_dispatch_text_delta(self):
+        e = TextDeltaEvent(content="hello")
+        assert dispatch_event(e) == "text:hello"
+
+    def test_dispatch_text_end(self):
+        e = TextEndEvent(full_text="hello world")
+        assert dispatch_event(e) == "end:11chars"
+
+    def test_dispatch_tool_call(self):
+        e = ToolCallEvent(tool_name="web_search")
+        assert dispatch_event(e) == "call:web_search"
+
+    def test_dispatch_tool_result(self):
+        e = ToolResultEvent(tool_use_id="abc")
+        assert dispatch_event(e) == "result:abc"
+
+    def test_dispatch_reasoning_start(self):
+        e = ReasoningStartEvent()
+        assert dispatch_event(e) == "reasoning:start"
+
+    def test_dispatch_reasoning_delta(self):
+        e = ReasoningDeltaEvent(content="Let me think step by step")
+        assert dispatch_event(e) == "reasoning:Let me think step by"
+
+    def test_dispatch_finish(self):
+        e = FinishEvent(stop_reason="end_turn")
+        assert dispatch_event(e) == "finish:end_turn"
+
+    def test_dispatch_error(self):
+        e = StreamErrorEvent(error="timeout")
+        assert dispatch_event(e) == "error:timeout"
+
+
+# ---------------------------------------------------------------------------
+# isinstance-based filtering
+# ---------------------------------------------------------------------------
+class TestInstanceFiltering:
+    """Filter event streams using isinstance for each event type."""
+
+    @pytest.fixture
+    def text_stream(self) -> list:
+        """Simulate a text-only stream."""
+        return [
+            TextDeltaEvent(content="Hello", snapshot="Hello"),
+            TextDeltaEvent(content=" world", snapshot="Hello world"),
+            TextDeltaEvent(content="!", snapshot="Hello world!"),
+            TextEndEvent(full_text="Hello world!"),
+            FinishEvent(stop_reason="stop", input_tokens=10, output_tokens=3, model="test"),
+        ]
+
+    @pytest.fixture
+    def tool_stream(self) -> list:
+        """Simulate a tool call stream."""
+        return [
+            ToolCallEvent(
+                tool_use_id="call_1",
+                tool_name="get_weather",
+                tool_input={"city": "London"},
+            ),
+            ToolCallEvent(
+                tool_use_id="call_2",
+                tool_name="calculator",
+                tool_input={"expression": "2+2"},
+            ),
+            FinishEvent(stop_reason="tool_calls"),
+        ]
+
+    @pytest.fixture
+    def reasoning_stream(self) -> list:
+        """Simulate a stream with reasoning blocks."""
+        return [
+            ReasoningStartEvent(),
+            ReasoningDeltaEvent(content="Let me analyze this..."),
+            ReasoningDeltaEvent(content="The answer is 42."),
+            TextDeltaEvent(content="The answer is 42.", snapshot="The answer is 42."),
+            TextEndEvent(full_text="The answer is 42."),
+            FinishEvent(stop_reason="end_turn"),
+        ]
+
+    def test_collect_text(self, text_stream):
+        assert collect_text(text_stream) == "Hello world!"
+
+    def test_collect_text_from_tool_stream(self, tool_stream):
+        assert collect_text(tool_stream) == ""
+
+    def test_extract_tool_calls(self, tool_stream):
+        calls = extract_tool_calls(tool_stream)
+        assert len(calls) == 2
+        assert calls[0]["name"] == "get_weather"
+        assert calls[1]["name"] == "calculator"
+
+    def test_extract_tool_calls_from_text_stream(self, text_stream):
+        assert extract_tool_calls(text_stream) == []
+
+    def test_filter_text_deltas(self, text_stream):
+        deltas = [e for e in text_stream if isinstance(e, TextDeltaEvent)]
+        assert len(deltas) == 3
+
+    def test_filter_finish(self, text_stream):
+        finishes = [e for e in text_stream if isinstance(e, FinishEvent)]
+        assert len(finishes) == 1
+        assert finishes[0].stop_reason == "stop"
+
+    def test_reasoning_then_text(self, reasoning_stream):
+        reasoning = [e for e in reasoning_stream if isinstance(e, ReasoningDeltaEvent)]
+        text = collect_text(reasoning_stream)
+        assert len(reasoning) == 2
+        assert text == "The answer is 42."
+
+    def test_mixed_stream_type_counts(self, reasoning_stream):
+        type_counts = {}
+        for e in reasoning_stream:
+            type_counts[e.type] = type_counts.get(e.type, 0) + 1
+        assert type_counts == {
+            "reasoning_start": 1,
+            "reasoning_delta": 2,
+            "text_delta": 1,
+            "text_end": 1,
+            "finish": 1,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Custom event extension pattern
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class CustomMetricsEvent:
+    """Example custom event following the same pattern."""
+
+    type: Literal["custom_metrics"] = "custom_metrics"
+    latency_ms: float = 0.0
+    tokens_per_second: float = 0.0
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class CustomCitationEvent:
+    """Example citation event extending the pattern."""
+
+    type: Literal["citation"] = "citation"
+    source_url: str = ""
+    quote: str = ""
+    confidence: float = 0.0
+
+
+class TestCustomEventExtension:
+    """Custom events should follow the same frozen-dataclass convention."""
+
+    def test_custom_event_construction(self):
+        e = CustomMetricsEvent(latency_ms=150.5, tokens_per_second=42.3)
+        assert e.type == "custom_metrics"
+        assert e.latency_ms == 150.5
+
+    def test_custom_event_frozen(self):
+        e = CustomMetricsEvent()
+        with pytest.raises(FrozenInstanceError):
+            e.type = "modified"
+
+    def test_custom_event_serialization(self):
+        e = CustomMetricsEvent(
+            latency_ms=100.0,
+            tokens_per_second=50.0,
+            metadata={"provider": "anthropic"},
+        )
+        d = asdict(e)
+        assert d["type"] == "custom_metrics"
+        assert d["metadata"] == {"provider": "anthropic"}
+
+    def test_custom_event_dispatch(self):
+        """Custom events can extend the dispatch map."""
+        e = CustomMetricsEvent(latency_ms=200.0)
+        # Falls through to "unknown" in our dispatch_event
+        assert dispatch_event(e) == "unknown:custom_metrics"
+
+    def test_custom_event_in_mixed_stream(self):
+        """Custom events can coexist with standard events in a list."""
+        stream = [
+            TextDeltaEvent(content="hi", snapshot="hi"),
+            CustomMetricsEvent(latency_ms=50.0),
+            TextEndEvent(full_text="hi"),
+            CustomCitationEvent(source_url="https://example.com", quote="hi"),
+            FinishEvent(stop_reason="stop"),
+        ]
+        standard = [
+            e
+            for e in stream
+            if hasattr(e, "type")
+            and e.type
+            in {
+                "text_delta",
+                "text_end",
+                "tool_call",
+                "tool_result",
+                "reasoning_start",
+                "reasoning_delta",
+                "finish",
+                "error",
+            }
+        ]
+        custom = [
+            e
+            for e in stream
+            if e.type
+            not in {
+                "text_delta",
+                "text_end",
+                "tool_call",
+                "tool_result",
+                "reasoning_start",
+                "reasoning_delta",
+                "finish",
+                "error",
+            }
+        ]
+        assert len(standard) == 3
+        assert len(custom) == 2
+
+
+# ---------------------------------------------------------------------------
+# Serialization of full event sequences
+# ---------------------------------------------------------------------------
+class TestSequenceSerialization:
+    """Serialize entire event sequences, as done by the dump tests."""
+
+    def test_serialize_text_sequence(self):
+        events = [
+            TextDeltaEvent(content="Hello", snapshot="Hello"),
+            TextDeltaEvent(content=" world", snapshot="Hello world"),
+            TextEndEvent(full_text="Hello world"),
+            FinishEvent(stop_reason="stop", model="test-model"),
+        ]
+        serialized = [{"index": i, **asdict(e)} for i, e in enumerate(events)]
+        assert len(serialized) == 4
+        assert serialized[0]["index"] == 0
+        assert serialized[0]["type"] == "text_delta"
+        assert serialized[-1]["type"] == "finish"
+        assert serialized[-1]["model"] == "test-model"
+
+    def test_serialize_tool_sequence(self):
+        events = [
+            ToolCallEvent(
+                tool_use_id="call_1",
+                tool_name="search",
+                tool_input={"query": "test"},
+            ),
+            FinishEvent(stop_reason="tool_calls"),
+        ]
+        serialized = [{"index": i, **asdict(e)} for i, e in enumerate(events)]
+        assert serialized[0]["tool_input"] == {"query": "test"}
+        assert serialized[1]["stop_reason"] == "tool_calls"
+
+    def test_serialize_error_sequence(self):
+        events = [
+            TextDeltaEvent(content="partial"),
+            StreamErrorEvent(error="connection reset", recoverable=True),
+            FinishEvent(stop_reason="error"),
+        ]
+        serialized = [{"index": i, **asdict(e)} for i, e in enumerate(events)]
+        assert serialized[1]["type"] == "error"
+        assert serialized[1]["recoverable"] is True
+
+    def test_roundtrip_snapshot_accumulation(self):
+        """Verify snapshot grows monotonically through serialization."""
+        chunks = ["Hello", " beautiful", " world", "!"]
+        events = []
+        snapshot = ""
+        for chunk in chunks:
+            snapshot += chunk
+            events.append(TextDeltaEvent(content=chunk, snapshot=snapshot))
+
+        serialized = [asdict(e) for e in events]
+        for i in range(1, len(serialized)):
+            assert len(serialized[i]["snapshot"]) > len(serialized[i - 1]["snapshot"])
+        assert serialized[-1]["snapshot"] == "Hello beautiful world!"
+
+
+# ===========================================================================
+# WP-2: EventType Enum Extension + Node-Level Event Routing
+# ===========================================================================
+
+# The 12 new EventType members added by WP-2
+WP2_EVENT_TYPES = {
+    # Node event-loop lifecycle
+    EventType.NODE_LOOP_STARTED: "node_loop_started",
+    EventType.NODE_LOOP_ITERATION: "node_loop_iteration",
+    EventType.NODE_LOOP_COMPLETED: "node_loop_completed",
+    # LLM streaming observability
+    EventType.LLM_TEXT_DELTA: "llm_text_delta",
+    EventType.LLM_REASONING_DELTA: "llm_reasoning_delta",
+    # Tool lifecycle
+    EventType.TOOL_CALL_STARTED: "tool_call_started",
+    EventType.TOOL_CALL_COMPLETED: "tool_call_completed",
+    # Client I/O
+    EventType.CLIENT_OUTPUT_DELTA: "client_output_delta",
+    EventType.CLIENT_INPUT_REQUESTED: "client_input_requested",
+    # Internal node observability
+    EventType.NODE_INTERNAL_OUTPUT: "node_internal_output",
+    EventType.NODE_INPUT_BLOCKED: "node_input_blocked",
+    EventType.NODE_STALLED: "node_stalled",
+}
+
+# Pre-existing enum members that must remain unchanged
+ORIGINAL_EVENT_TYPES = {
+    EventType.EXECUTION_STARTED: "execution_started",
+    EventType.EXECUTION_COMPLETED: "execution_completed",
+    EventType.EXECUTION_FAILED: "execution_failed",
+    EventType.EXECUTION_PAUSED: "execution_paused",
+    EventType.EXECUTION_RESUMED: "execution_resumed",
+    EventType.STATE_CHANGED: "state_changed",
+    EventType.STATE_CONFLICT: "state_conflict",
+    EventType.GOAL_PROGRESS: "goal_progress",
+    EventType.GOAL_ACHIEVED: "goal_achieved",
+    EventType.CONSTRAINT_VIOLATION: "constraint_violation",
+    EventType.STREAM_STARTED: "stream_started",
+    EventType.STREAM_STOPPED: "stream_stopped",
+    EventType.CUSTOM: "custom",
+}
+
+
+# ---------------------------------------------------------------------------
+# WP-2 Part A: EventType enum members
+# ---------------------------------------------------------------------------
+class TestWP2EventTypeEnumMembers:
+    """All 12 new EventType members exist with correct string values."""
+
+    @pytest.mark.parametrize(
+        "member,expected_value",
+        WP2_EVENT_TYPES.items(),
+        ids=lambda x: x.name if isinstance(x, EventType) else x,
+    )
+    def test_new_member_value(self, member, expected_value):
+        assert member.value == expected_value
+
+    def test_all_12_new_members_exist(self):
+        assert len(WP2_EVENT_TYPES) == 12
+
+    def test_new_member_string_values_are_unique(self):
+        values = list(WP2_EVENT_TYPES.values())
+        assert len(values) == len(set(values))
+
+    def test_no_collision_with_original_members(self):
+        new_values = set(WP2_EVENT_TYPES.values())
+        old_values = set(ORIGINAL_EVENT_TYPES.values())
+        overlap = new_values & old_values
+        assert overlap == set(), f"Colliding values: {overlap}"
+
+    @pytest.mark.parametrize(
+        "member,expected_value",
+        ORIGINAL_EVENT_TYPES.items(),
+        ids=lambda x: x.name if isinstance(x, EventType) else x,
+    )
+    def test_original_members_unchanged(self, member, expected_value):
+        assert member.value == expected_value
+
+    def test_event_type_is_str_enum(self):
+        """EventType members compare equal to their string values."""
+        assert EventType.NODE_LOOP_STARTED == "node_loop_started"
+        assert EventType.LLM_TEXT_DELTA == "llm_text_delta"
+        assert EventType.LLM_TEXT_DELTA.value == "llm_text_delta"
+
+    def test_event_type_accessible_by_name(self):
+        assert EventType["NODE_LOOP_STARTED"] is EventType.NODE_LOOP_STARTED
+        assert EventType["TOOL_CALL_COMPLETED"] is EventType.TOOL_CALL_COMPLETED
+
+    def test_event_type_accessible_by_value(self):
+        assert EventType("node_loop_started") is EventType.NODE_LOOP_STARTED
+        assert EventType("tool_call_completed") is EventType.TOOL_CALL_COMPLETED
+
+
+# ---------------------------------------------------------------------------
+# WP-2 Part B: AgentEvent.node_id and Subscription.filter_node
+# ---------------------------------------------------------------------------
+class TestWP2AgentEventNodeId:
+    """AgentEvent supports node_id as a first-class field."""
+
+    def test_node_id_defaults_to_none(self):
+        event = AgentEvent(
+            type=EventType.EXECUTION_STARTED,
+            stream_id="stream-1",
+        )
+        assert event.node_id is None
+
+    def test_node_id_can_be_set(self):
+        event = AgentEvent(
+            type=EventType.LLM_TEXT_DELTA,
+            stream_id="stream-1",
+            node_id="email_composer",
+        )
+        assert event.node_id == "email_composer"
+
+    def test_node_id_in_to_dict(self):
+        event = AgentEvent(
+            type=EventType.TOOL_CALL_STARTED,
+            stream_id="stream-1",
+            node_id="search_node",
+        )
+        d = event.to_dict()
+        assert d["node_id"] == "search_node"
+
+    def test_node_id_none_in_to_dict(self):
+        event = AgentEvent(
+            type=EventType.EXECUTION_STARTED,
+            stream_id="stream-1",
+        )
+        d = event.to_dict()
+        assert "node_id" in d
+        assert d["node_id"] is None
+
+
+class TestWP2SubscriptionFilterNode:
+    """Subscription supports filter_node for node-level routing."""
+
+    @staticmethod
+    async def _noop_handler(event: AgentEvent) -> None:
+        pass
+
+    def test_filter_node_defaults_to_none(self):
+        sub = Subscription(
+            id="sub_1",
+            event_types={EventType.LLM_TEXT_DELTA},
+            handler=self._noop_handler,
+        )
+        assert sub.filter_node is None
+
+    def test_filter_node_can_be_set(self):
+        sub = Subscription(
+            id="sub_1",
+            event_types={EventType.LLM_TEXT_DELTA},
+            handler=self._noop_handler,
+            filter_node="email_composer",
+        )
+        assert sub.filter_node == "email_composer"
+
+
+# ---------------------------------------------------------------------------
+# WP-2 Part B: Node-level event routing integration tests
+# ---------------------------------------------------------------------------
+class TestWP2NodeLevelRouting:
+    """EventBus routes events by node_id using filter_node."""
+
+    @pytest.fixture
+    def bus(self):
+        return EventBus()
+
+    @pytest.mark.asyncio
+    async def test_filter_node_receives_matching_events(self, bus):
+        """Subscriber with filter_node='node-A' receives events from node-A."""
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA],
+            handler=handler,
+            filter_node="node-A",
+        )
+
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="stream-1",
+                node_id="node-A",
+                data={"content": "hello"},
+            )
+        )
+
+        assert len(received) == 1
+        assert received[0].node_id == "node-A"
+        assert received[0].data["content"] == "hello"
+
+    @pytest.mark.asyncio
+    async def test_filter_node_rejects_non_matching_events(self, bus):
+        """Subscriber with filter_node='node-B' does NOT receive node-A events."""
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA],
+            handler=handler,
+            filter_node="node-B",
+        )
+
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="stream-1",
+                node_id="node-A",
+                data={"content": "hello"},
+            )
+        )
+
+        assert len(received) == 0
+
+    @pytest.mark.asyncio
+    async def test_no_filter_node_receives_all_events(self, bus):
+        """Subscriber with no filter_node receives events from all nodes."""
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA],
+            handler=handler,
+        )
+
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="stream-1",
+                node_id="node-A",
+            )
+        )
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="stream-1",
+                node_id="node-B",
+            )
+        )
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="stream-1",
+                node_id=None,
+            )
+        )
+
+        assert len(received) == 3
+
+    @pytest.mark.asyncio
+    async def test_interleaved_nodes_separated_by_filter(self, bus):
+        """Two subscribers on different nodes get only their node's events."""
+        node_a_events = []
+        node_b_events = []
+
+        async def handler_a(event):
+            node_a_events.append(event)
+
+        async def handler_b(event):
+            node_b_events.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA],
+            handler=handler_a,
+            filter_node="email_sender",
+        )
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA],
+            handler=handler_b,
+            filter_node="inbox_scanner",
+        )
+
+        # Interleaved events from both nodes
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="webhook",
+                node_id="email_sender",
+                data={"content": "Dear Jo"},
+            )
+        )
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="webhook",
+                node_id="inbox_scanner",
+                data={"content": "RE: Meeting conf"},
+            )
+        )
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="webhook",
+                node_id="email_sender",
+                data={"content": "hn, Thank you for"},
+            )
+        )
+        await bus.publish(
+            AgentEvent(
+                type=EventType.LLM_TEXT_DELTA,
+                stream_id="webhook",
+                node_id="inbox_scanner",
+                data={"content": "irmed for Thursday"},
+            )
+        )
+
+        assert len(node_a_events) == 2
+        assert len(node_b_events) == 2
+        assert node_a_events[0].data["content"] == "Dear Jo"
+        assert node_a_events[1].data["content"] == "hn, Thank you for"
+        assert node_b_events[0].data["content"] == "RE: Meeting conf"
+        assert node_b_events[1].data["content"] == "irmed for Thursday"
+
+    @pytest.mark.asyncio
+    async def test_filter_node_combined_with_filter_stream(self, bus):
+        """filter_node and filter_stream work together."""
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.TOOL_CALL_STARTED],
+            handler=handler,
+            filter_stream="webhook",
+            filter_node="search_node",
+        )
+
+        # Matching both filters
+        await bus.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_STARTED,
+                stream_id="webhook",
+                node_id="search_node",
+            )
+        )
+        # Wrong stream
+        await bus.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_STARTED,
+                stream_id="api",
+                node_id="search_node",
+            )
+        )
+        # Wrong node
+        await bus.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_STARTED,
+                stream_id="webhook",
+                node_id="other_node",
+            )
+        )
+
+        assert len(received) == 1
+        assert received[0].stream_id == "webhook"
+        assert received[0].node_id == "search_node"
+
+    @pytest.mark.asyncio
+    async def test_wait_for_with_node_id(self, bus):
+        """wait_for() accepts node_id parameter for filtering."""
+
+        async def publish_later():
+            await asyncio.sleep(0.01)
+            await bus.publish(
+                AgentEvent(
+                    type=EventType.NODE_LOOP_COMPLETED,
+                    stream_id="stream-1",
+                    node_id="target_node",
+                    data={"iterations": 3},
+                )
+            )
+
+        task = asyncio.create_task(publish_later())
+        event = await bus.wait_for(
+            event_type=EventType.NODE_LOOP_COMPLETED,
+            node_id="target_node",
+            timeout=2.0,
+        )
+        await task
+
+        assert event is not None
+        assert event.node_id == "target_node"
+        assert event.data["iterations"] == 3
+
+    @pytest.mark.asyncio
+    async def test_wait_for_ignores_wrong_node(self, bus):
+        """wait_for() with node_id ignores events from other nodes."""
+
+        async def publish_wrong_then_right():
+            await asyncio.sleep(0.01)
+            # Wrong node — should be ignored
+            await bus.publish(
+                AgentEvent(
+                    type=EventType.NODE_LOOP_COMPLETED,
+                    stream_id="stream-1",
+                    node_id="wrong_node",
+                )
+            )
+            await asyncio.sleep(0.01)
+            # Right node
+            await bus.publish(
+                AgentEvent(
+                    type=EventType.NODE_LOOP_COMPLETED,
+                    stream_id="stream-1",
+                    node_id="target_node",
+                    data={"iterations": 5},
+                )
+            )
+
+        task = asyncio.create_task(publish_wrong_then_right())
+        event = await bus.wait_for(
+            event_type=EventType.NODE_LOOP_COMPLETED,
+            node_id="target_node",
+            timeout=2.0,
+        )
+        await task
+
+        assert event is not None
+        assert event.node_id == "target_node"
+        assert event.data["iterations"] == 5
+
+
+# ---------------------------------------------------------------------------
+# WP-2: Convenience publisher methods
+# ---------------------------------------------------------------------------
+class TestWP2ConveniencePublishers:
+    """EventBus convenience methods for new WP-2 event types."""
+
+    @pytest.fixture
+    def bus(self):
+        return EventBus()
+
+    @pytest.mark.asyncio
+    async def test_emit_node_loop_started(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.NODE_LOOP_STARTED], handler=handler)
+        await bus.emit_node_loop_started(
+            stream_id="s1",
+            node_id="n1",
+            max_iterations=10,
+        )
+
+        assert len(received) == 1
+        assert received[0].node_id == "n1"
+        assert received[0].data["max_iterations"] == 10
+
+    @pytest.mark.asyncio
+    async def test_emit_node_loop_iteration(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.NODE_LOOP_ITERATION], handler=handler)
+        await bus.emit_node_loop_iteration(
+            stream_id="s1",
+            node_id="n1",
+            iteration=3,
+        )
+
+        assert len(received) == 1
+        assert received[0].data["iteration"] == 3
+
+    @pytest.mark.asyncio
+    async def test_emit_node_loop_completed(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.NODE_LOOP_COMPLETED], handler=handler)
+        await bus.emit_node_loop_completed(
+            stream_id="s1",
+            node_id="n1",
+            iterations=5,
+        )
+
+        assert len(received) == 1
+        assert received[0].data["iterations"] == 5
+
+    @pytest.mark.asyncio
+    async def test_emit_llm_text_delta(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.LLM_TEXT_DELTA], handler=handler)
+        await bus.emit_llm_text_delta(
+            stream_id="s1",
+            node_id="n1",
+            content="hello",
+            snapshot="hello world",
+        )
+
+        assert len(received) == 1
+        assert received[0].data["content"] == "hello"
+        assert received[0].data["snapshot"] == "hello world"
+
+    @pytest.mark.asyncio
+    async def test_emit_tool_call_started(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.TOOL_CALL_STARTED], handler=handler)
+        await bus.emit_tool_call_started(
+            stream_id="s1",
+            node_id="n1",
+            tool_use_id="call_1",
+            tool_name="web_search",
+            tool_input={"query": "test"},
+        )
+
+        assert len(received) == 1
+        assert received[0].data["tool_name"] == "web_search"
+        assert received[0].data["tool_input"] == {"query": "test"}
+
+    @pytest.mark.asyncio
+    async def test_emit_tool_call_completed(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.TOOL_CALL_COMPLETED], handler=handler)
+        await bus.emit_tool_call_completed(
+            stream_id="s1",
+            node_id="n1",
+            tool_use_id="call_1",
+            tool_name="web_search",
+            result="3 results found",
+        )
+
+        assert len(received) == 1
+        assert received[0].data["result"] == "3 results found"
+        assert received[0].data["is_error"] is False
+
+    @pytest.mark.asyncio
+    async def test_emit_client_output_delta(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.CLIENT_OUTPUT_DELTA], handler=handler)
+        await bus.emit_client_output_delta(
+            stream_id="s1",
+            node_id="n1",
+            content="chunk",
+            snapshot="full chunk",
+        )
+
+        assert len(received) == 1
+        assert received[0].data["content"] == "chunk"
+
+    @pytest.mark.asyncio
+    async def test_emit_node_stalled(self, bus):
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(event_types=[EventType.NODE_STALLED], handler=handler)
+        await bus.emit_node_stalled(
+            stream_id="s1",
+            node_id="n1",
+            reason="no progress after 10 iterations",
+        )
+
+        assert len(received) == 1
+        assert received[0].data["reason"] == "no progress after 10 iterations"
+
+    @pytest.mark.asyncio
+    async def test_convenience_publishers_set_node_id(self, bus):
+        """All WP-2 convenience publishers set node_id on the emitted event."""
+        received = []
+
+        async def handler(event):
+            received.append(event)
+
+        bus.subscribe(
+            event_types=[EventType.LLM_TEXT_DELTA, EventType.TOOL_CALL_STARTED],
+            handler=handler,
+            filter_node="my_node",
+        )
+
+        await bus.emit_llm_text_delta(
+            stream_id="s1",
+            node_id="my_node",
+            content="hi",
+            snapshot="hi",
+        )
+        await bus.emit_tool_call_started(
+            stream_id="s1",
+            node_id="my_node",
+            tool_use_id="c1",
+            tool_name="calc",
+        )
+        # Wrong node — should not be received
+        await bus.emit_llm_text_delta(
+            stream_id="s1",
+            node_id="other_node",
+            content="bye",
+            snapshot="bye",
+        )
+
+        assert len(received) == 2
+        assert all(e.node_id == "my_node" for e in received)
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""Tests for Aden credential sync components."""`