chore: update developer doc

fix: remove requirements.txt
feat: add tool tests in CI
2026-02-01 19:49:35 -08:00 · 2026-02-01 19:45:32 -08:00 · 2026-02-01 19:38:33 -08:00 · 2026-02-01 19:11:44 -08:00 · 2026-02-01 15:19:11 +08:00 · 2026-01-31 17:59:31 +08:00
492 changed files with 81424 additions and 30714 deletions
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Edit|Write|NotebookEdit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "ruff check --fix \"$CLAUDE_FILE_PATH\" 2>/dev/null; ruff format \"$CLAUDE_FILE_PATH\" 2>/dev/null; true"
+          }
+        ]
+      }
+    ]
+  }
+}
@@ -0,0 +1,40 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(npm install:*)",
+      "Bash(npm test:*)",
+      "Skill(building-agents-construction)",
+      "Skill(building-agents-construction:*)",
+      "Bash(PYTHONPATH=core:exports pytest:*)",
+      "mcp__agent-builder__create_session",
+      "mcp__agent-builder__get_session_status",
+      "mcp__agent-builder__set_goal",
+      "mcp__agent-builder__list_mcp_servers",
+      "mcp__agent-builder__test_node",
+      "mcp__agent-builder__add_node",
+      "mcp__agent-builder__add_edge",
+      "mcp__agent-builder__validate_graph",
+      "Bash(ruff check:*)",
+      "Bash(PYTHONPATH=core:exports python:*)",
+      "mcp__agent-builder__list_tests",
+      "mcp__agent-builder__generate_constraint_tests",
+      "Bash(python -m agent:*)",
+      "Bash(python agent.py:*)",
+      "Bash(python -c:*)",
+      "Bash(done)",
+      "Bash(xargs cat:*)",
+      "mcp__agent-builder__list_mcp_tools",
+      "mcp__agent-builder__add_mcp_server",
+      "mcp__agent-builder__check_missing_credentials",
+      "mcp__agent-builder__store_credential",
+      "mcp__agent-builder__list_stored_credentials",
+      "mcp__agent-builder__delete_stored_credential",
+      "mcp__agent-builder__verify_credentials",
+      "Bash(PYTHONPATH=/home/timothy/oss/hive/core:/home/timothy/oss/hive/exports python:*)",
+      "Bash(PYTHONPATH=core:exports:tools/src python -m hubspot_input:*)",
+      "mcp__agent-builder__export_graph"
+    ]
+  },
+  "enabledMcpjsonServers": ["agent-builder", "tools"],
+  "enableAllProjectMcpServers": true
+}
@@ -0,0 +1,463 @@
+---
+name: agent-workflow
+description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates building-agents-* and testing-agent skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: workflow-orchestrator
+  orchestrates:
+    - building-agents-core
+    - building-agents-construction
+    - building-agents-patterns
+    - testing-agent
+    - setup-credentials
+---
+
+# Agent Development Workflow
+
+Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.
+
+## Overview
+
+This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:
+
+1. **Understand Concepts** → `/building-agents-core` (optional)
+2. **Build Structure** → `/building-agents-construction`
+3. **Optimize Design** → `/building-agents-patterns` (optional)
+4. **Setup Credentials** → `/setup-credentials` (if agent uses tools requiring API keys)
+5. **Test & Validate** → `/testing-agent`
+
+## When to Use This Workflow
+
+Use this meta-skill when:
+- Starting a new agent from scratch
+- Unclear which skill to use first
+- Need end-to-end guidance for agent development
+- Want consistent, repeatable agent builds
+
+**Skip this workflow** if:
+- You only need to test an existing agent → use `/testing-agent` directly
+- You know exactly which phase you're in → use specific skill directly
+
+## Quick Decision Tree
+
+```
+"Need to understand agent concepts" → building-agents-core
+"Build a new agent" → building-agents-construction
+"Optimize my agent design" → building-agents-patterns
+"Set up API keys for my agent" → setup-credentials
+"Test my agent" → testing-agent
+"Not sure what I need" → Read phases below, then decide
+"Agent has structure but needs implementation" → See agent directory STATUS.md
+```
+
+## Phase 0: Understand Concepts (Optional)
+
+**Duration**: 5-10 minutes
+**Skill**: `/building-agents-core`
+**Input**: Questions about agent architecture
+
+### When to Use
+
+- First time building an agent
+- Need to understand node types, edges, goals
+- Want to validate tool availability
+- Learning about pause/resume architecture
+
+### What This Phase Provides
+
+- Architecture overview (Python packages, not JSON)
+- Core concepts (Goal, Node, Edge, Pause/Resume)
+- Tool discovery and validation procedures
+- Workflow overview
+
+**Skip this phase** if you already understand agent fundamentals.
+
+## Phase 1: Build Agent Structure
+
+**Duration**: 15-30 minutes
+**Skill**: `/building-agents-construction`
+**Input**: User requirements ("Build an agent that...")
+
+### What This Phase Does
+
+Creates the complete agent architecture:
+- Package structure (`exports/agent_name/`)
+- Goal with success criteria and constraints
+- Workflow graph (nodes and edges)
+- Node specifications
+- CLI interface
+- Documentation
+
+### Process
+
+1. **Create package** - Directory structure with skeleton files
+2. **Define goal** - Success criteria and constraints written to agent.py
+3. **Design nodes** - Each node approved and written incrementally
+4. **Connect edges** - Workflow graph with conditional routing
+5. **Finalize** - Agent class, exports, and documentation
+
+### Outputs
+
+- ✅ `exports/agent_name/` package created
+- ✅ Goal defined in agent.py
+- ✅ 3-5 success criteria defined
+- ✅ 1-5 constraints defined
+- ✅ 5-10 nodes specified in nodes/__init__.py
+- ✅ 8-15 edges connecting workflow
+- ✅ Validated structure (passes `python -m agent_name validate`)
+- ✅ README.md with usage instructions
+- ✅ CLI commands (info, validate, run, shell)
+
+### Success Criteria
+
+You're ready for Phase 2 when:
+- Agent structure validates without errors
+- All nodes and edges are defined
+- CLI commands work (info, validate)
+- You see: "Agent complete: exports/agent_name/"
+
+### Common Outputs
+
+The building-agents-construction skill produces:
+```
+exports/agent_name/
+├── __init__.py          (package exports)
+├── __main__.py          (CLI interface)
+├── agent.py             (goal, graph, agent class)
+├── nodes/__init__.py    (node specifications)
+├── config.py            (configuration)
+├── implementations.py   (may be created for Python functions)
+└── README.md            (documentation)
+```
+
+### Next Steps
+
+**If structure complete and validated:**
+→ Check `exports/agent_name/STATUS.md` or `IMPLEMENTATION_GUIDE.md`
+→ These files explain implementation options
+→ You may need to add Python functions or MCP tools (not covered by current skills)
+
+**If want to optimize design:**
+→ Proceed to Phase 1.5 (building-agents-patterns)
+
+**If ready to test:**
+→ Proceed to Phase 2
+
+## Phase 1.5: Optimize Design (Optional)
+
+**Duration**: 10-15 minutes
+**Skill**: `/building-agents-patterns`
+**Input**: Completed agent structure
+
+### When to Use
+
+- Want to add pause/resume functionality
+- Need error handling patterns
+- Want to optimize performance
+- Need examples of complex routing
+- Want best practices guidance
+
+### What This Phase Provides
+
+- Practical examples and patterns
+- Pause/resume architecture
+- Error handling strategies
+- Anti-patterns to avoid
+- Performance optimization techniques
+
+**Skip this phase** if your agent design is straightforward.
+
+## Phase 2: Test & Validate
+
+**Duration**: 20-40 minutes
+**Skill**: `/testing-agent`
+**Input**: Working agent from Phase 1
+
+### What This Phase Does
+
+Creates comprehensive test suite:
+- Constraint tests (verify hard requirements)
+- Success criteria tests (measure goal achievement)
+- Edge case tests (handle failures gracefully)
+- Integration tests (end-to-end workflows)
+
+### Process
+
+1. **Analyze agent** - Read goal, constraints, success criteria
+2. **Generate tests** - Create pytest files in `exports/agent_name/tests/`
+3. **User approval** - Review and approve each test
+4. **Run evaluation** - Execute tests and collect results
+5. **Debug failures** - Identify and fix issues
+6. **Iterate** - Repeat until all tests pass
+
+### Outputs
+
+- ✅ Test files in `exports/agent_name/tests/`
+- ✅ Test report with pass/fail metrics
+- ✅ Coverage of all success criteria
+- ✅ Coverage of all constraints
+- ✅ Edge case handling verified
+
+### Success Criteria
+
+You're done when:
+- All tests pass
+- All success criteria validated
+- All constraints verified
+- Agent handles edge cases
+- Test coverage is comprehensive
+
+### Next Steps
+
+**Agent ready for:**
+- Production deployment
+- Integration into larger systems
+- Documentation and handoff
+- Continuous monitoring
+
+## Phase Transitions
+
+### From Phase 1 to Phase 2
+
+**Trigger signals:**
+- "Agent complete: exports/..."
+- Structure validation passes
+- README indicates implementation complete
+
+**Before proceeding:**
+- Verify agent can be imported: `from exports.agent_name import default_agent`
+- Check if implementation is needed (see STATUS.md or IMPLEMENTATION_GUIDE.md)
+- Confirm agent executes without import errors
+
+### Skipping Phases
+
+**When to skip Phase 1:**
+- Agent structure already exists
+- Only need to add tests
+- Modifying existing agent
+
+**When to skip Phase 2:**
+- Prototyping or exploring
+- Agent not production-bound
+- Manual testing sufficient
+
+## Common Patterns
+
+### Pattern 1: Complete New Build (Simple)
+
+```
+User: "Build an agent that monitors files"
+→ Use /building-agents-construction
+→ Agent structure created
+→ Use /testing-agent
+→ Tests created and passing
+→ Done: Production-ready agent
+```
+
+### Pattern 1b: Complete New Build (With Learning)
+
+```
+User: "Build an agent (first time)"
+→ Use /building-agents-core (understand concepts)
+→ Use /building-agents-construction (build structure)
+→ Use /building-agents-patterns (optimize design)
+→ Use /testing-agent (validate)
+→ Done: Production-ready agent
+```
+
+### Pattern 2: Test Existing Agent
+
+```
+User: "Test my agent at exports/my_agent"
+→ Skip Phase 1
+→ Use /testing-agent directly
+→ Tests created
+→ Done: Validated agent
+```
+
+### Pattern 3: Iterative Development
+
+```
+User: "Build an agent"
+→ Use /building-agents-construction (Phase 1)
+→ Implementation needed (see STATUS.md)
+→ [User implements functions]
+→ Use /testing-agent (Phase 2)
+→ Tests reveal bugs
+→ [Fix bugs manually]
+→ Re-run tests
+→ Done: Working agent
+```
+
+### Pattern 4: Complex Agent with Patterns
+
+```
+User: "Build an agent with multi-turn conversations"
+→ Use /building-agents-core (learn pause/resume)
+→ Use /building-agents-construction (build structure)
+→ Use /building-agents-patterns (implement pause/resume pattern)
+→ Use /testing-agent (validate conversation flows)
+→ Done: Complex conversational agent
+```
+
+## Skill Dependencies
+
+```
+agent-workflow (meta-skill)
+    │
+    ├── building-agents-core (foundational)
+    │   ├── Architecture concepts
+    │   ├── Node/Edge/Goal definitions
+    │   ├── Tool discovery procedures
+    │   └── Workflow overview
+    │
+    ├── building-agents-construction (procedural)
+    │   ├── Creates package structure
+    │   ├── Defines goal
+    │   ├── Adds nodes incrementally
+    │   ├── Connects edges
+    │   ├── Finalizes agent class
+    │   └── Requires: building-agents-core
+    │
+    ├── building-agents-patterns (reference)
+    │   ├── Best practices
+    │   ├── Pause/resume patterns
+    │   ├── Error handling
+    │   ├── Anti-patterns
+    │   └── Performance optimization
+    │
+    └── testing-agent
+        ├── Reads agent goal
+        ├── Generates tests
+        ├── Runs evaluation
+        └── Reports results
+```
+
+## Troubleshooting
+
+### "Agent structure won't validate"
+
+- Check node IDs match between nodes/__init__.py and agent.py
+- Verify all edges reference valid node IDs
+- Ensure entry_node exists in nodes list
+- Run: `PYTHONPATH=core:exports python -m agent_name validate`
+
+### "Agent has structure but won't run"
+
+- Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
+- Implementation may be needed (Python functions or MCP tools)
+- This is expected - building-agents-construction creates structure, not implementation
+- See implementation guide for completion options
+
+### "Tests are failing"
+
+- Review test output for specific failures
+- Check agent goal and success criteria
+- Verify constraints are met
+- Use `/testing-agent` to debug and iterate
+- Fix agent code and re-run tests
+
+### "Not sure which phase I'm in"
+
+Run these checks:
+
+```bash
+# Check if agent structure exists
+ls exports/my_agent/agent.py
+
+# Check if it validates
+PYTHONPATH=core:exports python -m my_agent validate
+
+# Check if tests exist
+ls exports/my_agent/tests/
+
+# If structure exists and validates → Phase 2 (testing)
+# If structure doesn't exist → Phase 1 (building)
+# If tests exist but failing → Debug phase
+```
+
+## Best Practices
+
+### For Phase 1 (Building)
+
+1. **Start with clear requirements** - Know what the agent should do
+2. **Define success criteria early** - Measurable goals drive design
+3. **Keep nodes focused** - One responsibility per node
+4. **Use descriptive names** - Node IDs should explain purpose
+5. **Validate incrementally** - Check structure after each major addition
+
+### For Phase 2 (Testing)
+
+1. **Test constraints first** - Hard requirements must pass
+2. **Mock external dependencies** - Use mock mode for LLMs/APIs
+3. **Cover edge cases** - Test failures, not just success paths
+4. **Iterate quickly** - Fix one test at a time
+5. **Document test patterns** - Future tests follow same structure
+
+### General Workflow
+
+1. **Use version control** - Git commit after each phase
+2. **Document decisions** - Update README with changes
+3. **Keep iterations small** - Build → Test → Fix → Repeat
+4. **Preserve working states** - Tag successful iterations
+5. **Learn from failures** - Failed tests reveal design issues
+
+## Exit Criteria
+
+You're done with the workflow when:
+
+✅ Agent structure validates
+✅ All tests pass
+✅ Success criteria met
+✅ Constraints verified
+✅ Documentation complete
+✅ Agent ready for deployment
+
+## Additional Resources
+
+- **building-agents-core**: See `.claude/skills/building-agents-core/SKILL.md`
+- **building-agents-construction**: See `.claude/skills/building-agents-construction/SKILL.md`
+- **building-agents-patterns**: See `.claude/skills/building-agents-patterns/SKILL.md`
+- **testing-agent**: See `.claude/skills/testing-agent/SKILL.md`
+- **Agent framework docs**: See `core/README.md`
+- **Example agents**: See `exports/` directory
+
+## Summary
+
+This workflow provides a proven path from concept to production-ready agent:
+
+1. **Learn** with `/building-agents-core` → Understand fundamentals (optional)
+2. **Build** with `/building-agents-construction` → Get validated structure
+3. **Optimize** with `/building-agents-patterns` → Apply best practices (optional)
+4. **Test** with `/testing-agent` → Get verified functionality
+
+The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.
+
+## Skill Selection Guide
+
+**Choose building-agents-core when:**
+- First time building agents
+- Need to understand architecture
+- Validating tool availability
+- Learning about node types and edges
+
+**Choose building-agents-construction when:**
+- Actually building an agent
+- Have clear requirements
+- Ready to write code
+- Want step-by-step guidance
+
+**Choose building-agents-patterns when:**
+- Agent structure complete
+- Need advanced patterns
+- Implementing pause/resume
+- Optimizing performance
+- Want best practices
+
+**Choose testing-agent when:**
+- Agent structure complete
+- Ready to validate functionality
+- Need comprehensive test coverage
+- Debugging agent behavior
@@ -0,0 +1,199 @@
+# Example: File Monitor Agent
+
+This example shows the complete agent-workflow in action for building a file monitoring agent.
+
+## Initial Request
+
+```
+User: "Build an agent that monitors ~/Downloads and copies new files to ~/Documents"
+```
+
+## Phase 1: Building (20 minutes)
+
+### Step 1: Create Structure
+
+Agent invokes `/building-agents` skill and:
+
+1. Creates `exports/file_monitor_agent/` package
+2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
+
+**Output**: Package structure visible immediately
+
+### Step 2: Define Goal
+
+```python
+goal = Goal(
+    id="file-monitor-copy",
+    name="Automated File Monitor & Copy",
+    success_criteria=[
+        # 100% detection rate
+        # 100% copy success
+        # 100% conflict resolution
+        # >99% uptime
+    ],
+    constraints=[
+        # Preserve originals
+        # Handle errors gracefully
+        # Track state
+        # Respect permissions
+    ]
+)
+```
+
+**Output**: Goal written to agent.py
+
+### Step 3: Design Nodes
+
+7 nodes approved and written incrementally:
+
+1. `initialize-state` - Set up tracking
+2. `list-downloads` - Scan directory
+3. `identify-new-files` - Find new files
+4. `check-for-new-files` - Router
+5. `copy-files` - Copy with conflict resolution
+6. `update-state` - Mark as processed
+7. `wait-interval` - Sleep between cycles
+
+**Output**: All nodes in nodes/__init__.py
+
+### Step 4: Connect Edges
+
+8 edges connecting the workflow loop:
+
+```
+initialize → list → identify → check
+                                ↓  ↓
+                              copy  wait
+                                ↓    ↑
+                              update ↓
+                                ↓    ↓
+                              wait → list (loop)
+```
+
+**Output**: Edges written to agent.py
+
+### Step 5: Finalize
+
+```bash
+$ PYTHONPATH=core:exports python -m file_monitor_agent validate
+✓ Agent is valid
+
+$ PYTHONPATH=core:exports python -m file_monitor_agent info
+Agent: File Monitor & Copy Agent
+Nodes: 7
+Edges: 8
+```
+
+**Phase 1 Complete**: Structure validated ✅
+
+### Status After Phase 1
+
+```
+exports/file_monitor_agent/
+├── __init__.py          ✅ (exports)
+├── __main__.py          ✅ (CLI)
+├── agent.py             ✅ (goal, graph, agent class)
+├── nodes/__init__.py    ✅ (7 nodes)
+├── config.py            ✅ (configuration)
+├── implementations.py   ✅ (Python functions)
+├── README.md            ✅ (documentation)
+├── IMPLEMENTATION_GUIDE.md ✅ (next steps)
+└── STATUS.md            ✅ (current state)
+```
+
+**Note**: Implementation gap exists - data flow needs connection (covered in STATUS.md)
+
+## Phase 2: Testing (25 minutes)
+
+### Step 1: Analyze Agent
+
+Agent invokes `/testing-agent` skill and:
+
+1. Reads goal from `exports/file_monitor_agent/agent.py`
+2. Identifies 4 success criteria to test
+3. Identifies 4 constraints to verify
+4. Plans test coverage
+
+### Step 2: Generate Tests
+
+Creates test files:
+
+```
+exports/file_monitor_agent/tests/
+├── conftest.py              (fixtures)
+├── test_constraints.py      (4 constraint tests)
+├── test_success_criteria.py (4 success tests)
+└── test_edge_cases.py       (error handling)
+```
+
+Tests approved incrementally by user.
+
+### Step 3: Run Tests
+
+```bash
+$ PYTHONPATH=core:exports pytest exports/file_monitor_agent/tests/
+
+test_constraints.py::test_preserves_originals     PASSED
+test_constraints.py::test_handles_errors          PASSED
+test_constraints.py::test_tracks_state            PASSED
+test_constraints.py::test_respects_permissions    PASSED
+
+test_success_criteria.py::test_detects_all_files  PASSED
+test_success_criteria.py::test_copies_all_files   PASSED
+test_success_criteria.py::test_resolves_conflicts PASSED
+test_success_criteria.py::test_continuous_run     PASSED
+
+test_edge_cases.py::test_empty_directory          PASSED
+test_edge_cases.py::test_permission_denied        PASSED
+test_edge_cases.py::test_disk_full                PASSED
+test_edge_cases.py::test_large_files              PASSED
+
+========================== 12 passed in 3.42s ==========================
+```
+
+**Phase 2 Complete**: All tests pass ✅
+
+## Final Output
+
+**Production-Ready Agent:**
+
+```bash
+# Run the agent
+./RUN_AGENT.sh
+
+# Or manually
+PYTHONPATH=core:exports:tools/src python -m file_monitor_agent run
+```
+
+**Capabilities:**
+- Monitors ~/Downloads continuously
+- Copies new files to ~/Documents
+- Resolves conflicts with timestamps
+- Handles errors gracefully
+- Tracks processed files
+- Runs as background service
+
+**Total Time**: ~45 minutes from concept to production
+
+## Key Learnings
+
+1. **Incremental building** - Files written immediately, visible throughout
+2. **Validation early** - Structure validated before moving to implementation
+3. **Test-driven** - Tests reveal real behavior
+4. **Documentation included** - README, STATUS, and guides auto-generated
+5. **Repeatable process** - Same workflow for any agent type
+
+## Variations
+
+**For simpler agents:**
+- Fewer nodes (3-5 instead of 7)
+- Simpler workflow (linear instead of looping)
+- Faster build time (10-15 minutes)
+
+**For complex agents:**
+- More nodes (10-15+)
+- Multiple subgraphs
+- Pause/resume points for human-in-the-loop
+- Longer build time (45-60 minutes)
+
+The workflow scales to your needs!
@@ -0,0 +1,361 @@
+---
+name: building-agents-construction
+description: Step-by-step guide for building goal-driven agents. Creates package structure, defines goals, adds nodes, connects edges, and finalizes agent class. Use when actively building an agent.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.0"
+  type: procedural
+  part_of: building-agents
+  requires: building-agents-core
+---
+
+# Agent Construction - EXECUTE THESE STEPS
+
+**THIS IS AN EXECUTABLE WORKFLOW. DO NOT DISPLAY THIS FILE. EXECUTE THE STEPS BELOW.**
+
+When this skill is loaded, IMMEDIATELY begin executing Step 1. Do not explain what you will do - just do it.
+
+---
+
+## STEP 1: Initialize Build Environment
+
+**EXECUTE THESE TOOL CALLS NOW:**
+
+1. Register the hive-tools MCP server:
+
+```
+mcp__agent-builder__add_mcp_server(
+    name="hive-tools",
+    transport="stdio",
+    command="python",
+    args='["mcp_server.py", "--stdio"]',
+    cwd="tools",
+    description="Hive tools MCP server"
+)
+```
+
+2. Create a build session (replace AGENT_NAME with the user's requested agent name in snake_case):
+
+```
+mcp__agent-builder__create_session(name="AGENT_NAME")
+```
+
+3. Discover available tools:
+
+```
+mcp__agent-builder__list_mcp_tools()
+```
+
+4. Create the package directory:
+
+```
+mkdir -p exports/AGENT_NAME/nodes
+```
+
+**AFTER completing these calls**, tell the user:
+
+> ✅ Build environment initialized
+>
+> - Session created
+> - Available tools: [list the tools from step 3]
+>
+> Proceeding to define the agent goal...
+
+**THEN immediately proceed to STEP 2.**
+
+---
+
+## STEP 2: Define and Approve Goal
+
+**PROPOSE a goal to the user.** Based on what they asked for, propose:
+
+- Goal ID (kebab-case)
+- Goal name
+- Goal description
+- 3-5 success criteria (each with: id, description, metric, target, weight)
+- 2-4 constraints (each with: id, description, constraint_type, category)
+
+**FORMAT your proposal as a clear summary, then ask for approval:**
+
+> **Proposed Goal: [Name]**
+>
+> [Description]
+>
+> **Success Criteria:**
+>
+> 1. [criterion 1]
+> 2. [criterion 2]
+>    ...
+>
+> **Constraints:**
+>
+> 1. [constraint 1]
+> 2. [constraint 2]
+>    ...
+
+**THEN call AskUserQuestion:**
+
+```
+AskUserQuestion(questions=[{
+    "question": "Do you approve this goal definition?",
+    "header": "Goal",
+    "options": [
+        {"label": "Approve", "description": "Goal looks good, proceed"},
+        {"label": "Modify", "description": "I want to change something"}
+    ],
+    "multiSelect": false
+}])
+```
+
+**WAIT for user response.**
+
+- If **Approve**: Call `mcp__agent-builder__set_goal(...)` with the goal details, then proceed to STEP 3
+- If **Modify**: Ask what they want to change, update proposal, ask again
+
+---
+
+## STEP 3: Design Node Workflow
+
+**BEFORE designing nodes**, review the available tools from Step 1. Nodes can ONLY use tools that exist.
+
+**DESIGN the workflow** as a series of nodes. For each node, determine:
+
+- node_id (kebab-case)
+- name
+- description
+- node_type: `"llm_generate"` (no tools) or `"llm_tool_use"` (uses tools)
+- input_keys (what data this node receives)
+- output_keys (what data this node produces)
+- tools (ONLY tools that exist - empty list for llm_generate)
+- system_prompt
+
+**PRESENT the workflow to the user:**
+
+> **Proposed Workflow: [N] nodes**
+>
+> 1. **[node-id]** - [description]
+>
+>    - Type: [llm_generate/llm_tool_use]
+>    - Input: [keys]
+>    - Output: [keys]
+>    - Tools: [tools or "none"]
+>
+> 2. **[node-id]** - [description]
+>    ...
+>
+> **Flow:** node1 → node2 → node3 → ...
+
+**THEN call AskUserQuestion:**
+
+```
+AskUserQuestion(questions=[{
+    "question": "Do you approve this workflow design?",
+    "header": "Workflow",
+    "options": [
+        {"label": "Approve", "description": "Workflow looks good, proceed to build nodes"},
+        {"label": "Modify", "description": "I want to change the workflow"}
+    ],
+    "multiSelect": false
+}])
+```
+
+**WAIT for user response.**
+
+- If **Approve**: Proceed to STEP 4
+- If **Modify**: Ask what they want to change, update design, ask again
+
+---
+
+## STEP 4: Build Nodes One by One
+
+**FOR EACH node in the approved workflow:**
+
+1. **Call** `mcp__agent-builder__add_node(...)` with the node details
+
+   - input_keys and output_keys must be JSON strings: `'["key1", "key2"]'`
+   - tools must be a JSON string: `'["tool1"]'` or `'[]'`
+
+2. **Call** `mcp__agent-builder__test_node(...)` to validate:
+
+```
+mcp__agent-builder__test_node(
+    node_id="the-node-id",
+    test_input='{"key": "test value"}',
+    mock_llm_response='{"output_key": "test output"}'
+)
+```
+
+3. **Check result:**
+
+   - If valid: Tell user "✅ Node [id] validated" and continue to next node
+   - If invalid: Show errors, fix the node, re-validate
+
+4. **Show progress** after each node:
+
+```
+mcp__agent-builder__get_session_status()
+```
+
+> ✅ Node [X] of [Y] complete: [node-id]
+
+**AFTER all nodes are added and validated**, proceed to STEP 5.
+
+---
+
+## STEP 5: Connect Edges
+
+**DETERMINE the edges** based on the workflow flow. For each connection:
+
+- edge_id (kebab-case)
+- source (node that outputs)
+- target (node that receives)
+- condition: `"on_success"`, `"always"`, `"on_failure"`, or `"conditional"`
+- condition_expr (Python expression, only if conditional)
+- priority (integer, lower = higher priority)
+
+**FOR EACH edge, call:**
+
+```
+mcp__agent-builder__add_edge(
+    edge_id="source-to-target",
+    source="source-node-id",
+    target="target-node-id",
+    condition="on_success",
+    condition_expr="",
+    priority=1
+)
+```
+
+**AFTER all edges are added, validate the graph:**
+
+```
+mcp__agent-builder__validate_graph()
+```
+
+- If valid: Tell user "✅ Graph structure validated" and proceed to STEP 6
+- If invalid: Show errors, fix edges, re-validate
+
+---
+
+## STEP 6: Generate Agent Package
+
+**EXPORT the graph data:**
+
+```
+mcp__agent-builder__export_graph()
+```
+
+This returns JSON with all the goal, nodes, edges, and MCP server configurations.
+
+**THEN write the Python package files** using the exported data. Create these files in `exports/AGENT_NAME/`:
+
+1. `config.py` - Runtime configuration with model settings
+2. `nodes/__init__.py` - All NodeSpec definitions
+3. `agent.py` - Goal, edges, graph config, and agent class
+4. `__init__.py` - Package exports
+5. `__main__.py` - CLI interface
+6. `mcp_servers.json` - MCP server configurations
+7. `README.md` - Usage documentation
+
+**IMPORTANT entry_points format:**
+
+- MUST be: `{"start": "first-node-id"}`
+- NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
+- NOT: `{"first-node-id"}` (WRONG - this is a set)
+
+**Use the example agent** at `.claude/skills/building-agents-construction/examples/online_research_agent/` as a template for file structure and patterns.
+
+**AFTER writing all files, tell the user:**
+
+> ✅ Agent package created: `exports/AGENT_NAME/`
+>
+> **Files generated:**
+>
+> - `__init__.py` - Package exports
+> - `agent.py` - Goal, nodes, edges, agent class
+> - `config.py` - Runtime configuration
+> - `__main__.py` - CLI interface
+> - `nodes/__init__.py` - Node definitions
+> - `mcp_servers.json` - MCP server config
+> - `README.md` - Usage documentation
+>
+> **Test your agent:**
+>
+> ```bash
+> cd /home/timothy/oss/hive
+> PYTHONPATH=core:exports python -m AGENT_NAME validate
+> PYTHONPATH=core:exports python -m AGENT_NAME info
+> ```
+
+---
+
+## STEP 7: Verify and Test
+
+**RUN validation:**
+
+```bash
+cd /home/timothy/oss/hive && PYTHONPATH=core:exports python -m AGENT_NAME validate
+```
+
+- If valid: Agent is complete!
+- If errors: Fix the issues and re-run
+
+**SHOW final session summary:**
+
+```
+mcp__agent-builder__get_session_status()
+```
+
+**TELL the user the agent is ready** and suggest next steps:
+
+- Run with mock mode to test without API calls
+- Use `/testing-agent` skill for comprehensive testing
+- Use `/setup-credentials` if the agent needs API keys
+
+---
+
+## REFERENCE: Node Types
+
+| Type           | tools param            | Use when                                       |
+| -------------- | ---------------------- | ---------------------------------------------- |
+| `llm_generate` | `'[]'`                 | Pure reasoning, JSON output, no external calls |
+| `llm_tool_use` | `'["tool1", "tool2"]'` | Needs to call MCP tools                        |
+
+---
+
+## REFERENCE: Edge Conditions
+
+| Condition     | When edge is followed                 |
+| ------------- | ------------------------------------- |
+| `on_success`  | Source node completed successfully    |
+| `on_failure`  | Source node failed                    |
+| `always`      | Always, regardless of success/failure |
+| `conditional` | When condition_expr evaluates to True |
+
+---
+
+## REFERENCE: System Prompt Best Practice
+
+For nodes with JSON output, include this in the system_prompt:
+
+```
+CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
+Just the JSON object starting with { and ending with }.
+
+Return this exact structure:
+{
+  "key1": "...",
+  "key2": "..."
+}
+```
+
+---
+
+## COMMON MISTAKES TO AVOID
+
+1. **Using tools that don't exist** - Always check `mcp__agent-builder__list_mcp_tools()` first
+2. **Wrong entry_points format** - Must be `{"start": "node-id"}`, NOT a set or list
+3. **Skipping validation** - Always validate nodes and graph before proceeding
+4. **Not waiting for approval** - Always ask user before major steps
+5. **Displaying this file** - Execute the steps, don't show documentation
@@ -0,0 +1,80 @@
+# Online Research Agent
+
+Deep-dive research agent that searches 10+ sources and produces comprehensive narrative reports with citations.
+
+## Features
+
+- Generates multiple search queries from a topic
+- Searches and fetches 15+ web sources
+- Evaluates and ranks sources by relevance
+- Synthesizes findings into themes
+- Writes narrative report with numbered citations
+- Quality checks for uncited claims
+- Saves report to local markdown file
+
+## Usage
+
+### CLI
+
+```bash
+# Show agent info
+python -m online_research_agent info
+
+# Validate structure
+python -m online_research_agent validate
+
+# Run research on a topic
+python -m online_research_agent run --topic "impact of AI on healthcare"
+
+# Interactive shell
+python -m online_research_agent shell
+```
+
+### Python API
+
+```python
+from online_research_agent import default_agent
+
+# Simple usage
+result = await default_agent.run({"topic": "climate change solutions"})
+
+# Check output
+if result.success:
+    print(f"Report saved to: {result.output['file_path']}")
+    print(result.output['final_report'])
+```
+
+## Workflow
+
+```
+parse-query → search-sources → fetch-content → evaluate-sources
+                                                      ↓
+                                write-report ← synthesize-findings
+                                      ↓
+                               quality-check → save-report
+```
+
+## Output
+
+Reports are saved to `./research_reports/` as markdown files with:
+
+1. Executive Summary
+2. Introduction
+3. Key Findings (by theme)
+4. Analysis
+5. Conclusion
+6. References
+
+## Requirements
+
+- Python 3.11+
+- LLM provider API key (Groq, Cerebras, etc.)
+- Internet access for web search/fetch
+
+## Configuration
+
+Edit `config.py` to change:
+
+- `model`: LLM model (default: groq/moonshotai/kimi-k2-instruct-0905)
+- `temperature`: Generation temperature (default: 0.7)
+- `max_tokens`: Max tokens per response (default: 16384)
@@ -0,0 +1,23 @@
+"""
+Online Research Agent - Deep-dive research with narrative reports.
+
+Research any topic by searching multiple sources, synthesizing information,
+and producing a well-structured narrative report with citations.
+"""
+
+from .agent import OnlineResearchAgent, default_agent, goal, nodes, edges
+from .config import RuntimeConfig, AgentMetadata, default_config, metadata
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "OnlineResearchAgent",
+    "default_agent",
+    "goal",
+    "nodes",
+    "edges",
+    "RuntimeConfig",
+    "AgentMetadata",
+    "default_config",
+    "metadata",
+]
@@ -0,0 +1,158 @@
+"""
+CLI entry point for Online Research Agent.
+
+Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
+"""
+
+import asyncio
+import json
+import logging
+import sys
+import click
+
+from .agent import default_agent, OnlineResearchAgent
+
+
+def setup_logging(verbose=False, debug=False):
+    """Configure logging for execution visibility."""
+    if debug:
+        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
+    elif verbose:
+        level, fmt = logging.INFO, "%(message)s"
+    else:
+        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
+    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
+    logging.getLogger("framework").setLevel(level)
+
+
+@click.group()
+@click.version_option(version="1.0.0")
+def cli():
+    """Online Research Agent - Deep-dive research with narrative reports."""
+    pass
+
+
+@cli.command()
+@click.option("--topic", "-t", type=str, required=True, help="Research topic")
+@click.option("--mock", is_flag=True, help="Run in mock mode")
+@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
+@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
+@click.option("--debug", is_flag=True, help="Show debug logging")
+def run(topic, mock, quiet, verbose, debug):
+    """Execute research on a topic."""
+    if not quiet:
+        setup_logging(verbose=verbose, debug=debug)
+
+    context = {"topic": topic}
+
+    result = asyncio.run(default_agent.run(context, mock_mode=mock))
+
+    output_data = {
+        "success": result.success,
+        "steps_executed": result.steps_executed,
+        "output": result.output,
+    }
+    if result.error:
+        output_data["error"] = result.error
+
+    click.echo(json.dumps(output_data, indent=2, default=str))
+    sys.exit(0 if result.success else 1)
+
+
+@cli.command()
+@click.option("--json", "output_json", is_flag=True)
+def info(output_json):
+    """Show agent information."""
+    info_data = default_agent.info()
+    if output_json:
+        click.echo(json.dumps(info_data, indent=2))
+    else:
+        click.echo(f"Agent: {info_data['name']}")
+        click.echo(f"Version: {info_data['version']}")
+        click.echo(f"Description: {info_data['description']}")
+        click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
+        click.echo(f"Entry: {info_data['entry_node']}")
+        click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
+
+
+@cli.command()
+def validate():
+    """Validate agent structure."""
+    validation = default_agent.validate()
+    if validation["valid"]:
+        click.echo("Agent is valid")
+    else:
+        click.echo("Agent has errors:")
+        for error in validation["errors"]:
+            click.echo(f"  ERROR: {error}")
+    sys.exit(0 if validation["valid"] else 1)
+
+
+@cli.command()
+@click.option("--verbose", "-v", is_flag=True)
+def shell(verbose):
+    """Interactive research session."""
+    asyncio.run(_interactive_shell(verbose))
+
+
+async def _interactive_shell(verbose=False):
+    """Async interactive shell."""
+    setup_logging(verbose=verbose)
+
+    click.echo("=== Online Research Agent ===")
+    click.echo("Enter a topic to research (or 'quit' to exit):\n")
+
+    agent = OnlineResearchAgent()
+    await agent.start()
+
+    try:
+        while True:
+            try:
+                topic = await asyncio.get_event_loop().run_in_executor(
+                    None, input, "Topic> "
+                )
+                if topic.lower() in ["quit", "exit", "q"]:
+                    click.echo("Goodbye!")
+                    break
+
+                if not topic.strip():
+                    continue
+
+                click.echo("\nResearching... (this may take a few minutes)\n")
+
+                result = await agent.trigger_and_wait("start", {"topic": topic})
+
+                if result is None:
+                    click.echo("\n[Execution timed out]\n")
+                    continue
+
+                if result.success:
+                    output = result.output
+                    if "file_path" in output:
+                        click.echo(f"\nReport saved to: {output['file_path']}\n")
+                    if "final_report" in output:
+                        click.echo("\n--- Report Preview ---\n")
+                        preview = (
+                            output["final_report"][:500] + "..."
+                            if len(output.get("final_report", "")) > 500
+                            else output.get("final_report", "")
+                        )
+                        click.echo(preview)
+                        click.echo("\n")
+                else:
+                    click.echo(f"\nResearch failed: {result.error}\n")
+
+            except KeyboardInterrupt:
+                click.echo("\nGoodbye!")
+                break
+            except Exception as e:
+                click.echo(f"Error: {e}", err=True)
+                import traceback
+
+                traceback.print_exc()
+    finally:
+        await agent.stop()
+
+
+if __name__ == "__main__":
+    cli()
@@ -0,0 +1,429 @@
+"""Agent graph construction for Online Research Agent."""
+
+from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.graph.edge import GraphSpec
+from framework.graph.executor import ExecutionResult
+from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.runtime.execution_stream import EntryPointSpec
+from framework.llm import LiteLLMProvider
+from framework.runner.tool_registry import ToolRegistry
+
+from .config import default_config, metadata
+from .nodes import (
+    parse_query_node,
+    search_sources_node,
+    fetch_content_node,
+    evaluate_sources_node,
+    synthesize_findings_node,
+    write_report_node,
+    quality_check_node,
+    save_report_node,
+)
+
+# Goal definition
+goal = Goal(
+    id="comprehensive-online-research",
+    name="Comprehensive Online Research",
+    description="Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations.",
+    success_criteria=[
+        SuccessCriterion(
+            id="source-coverage",
+            description="Query 10+ diverse sources",
+            metric="source_count",
+            target=">=10",
+            weight=0.20,
+        ),
+        SuccessCriterion(
+            id="relevance",
+            description="All sources directly address the query",
+            metric="relevance_score",
+            target="90%",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="synthesis",
+            description="Synthesize findings into coherent narrative",
+            metric="coherence_score",
+            target="85%",
+            weight=0.25,
+        ),
+        SuccessCriterion(
+            id="citations",
+            description="Include citations for all claims",
+            metric="citation_coverage",
+            target="100%",
+            weight=0.15,
+        ),
+        SuccessCriterion(
+            id="actionable",
+            description="Report answers the user's question",
+            metric="answer_completeness",
+            target="90%",
+            weight=0.15,
+        ),
+    ],
+    constraints=[
+        Constraint(
+            id="no-hallucination",
+            description="Only include information found in sources",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="source-attribution",
+            description="Every factual claim must cite its source",
+            constraint_type="quality",
+            category="accuracy",
+        ),
+        Constraint(
+            id="recency-preference",
+            description="Prefer recent sources when relevant",
+            constraint_type="quality",
+            category="relevance",
+        ),
+        Constraint(
+            id="no-paywalled",
+            description="Avoid sources that require payment to access",
+            constraint_type="functional",
+            category="accessibility",
+        ),
+    ],
+)
+
+# Node list
+nodes = [
+    parse_query_node,
+    search_sources_node,
+    fetch_content_node,
+    evaluate_sources_node,
+    synthesize_findings_node,
+    write_report_node,
+    quality_check_node,
+    save_report_node,
+]
+
+# Edge definitions
+edges = [
+    EdgeSpec(
+        id="parse-to-search",
+        source="parse-query",
+        target="search-sources",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="search-to-fetch",
+        source="search-sources",
+        target="fetch-content",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="fetch-to-evaluate",
+        source="fetch-content",
+        target="evaluate-sources",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="evaluate-to-synthesize",
+        source="evaluate-sources",
+        target="synthesize-findings",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="synthesize-to-write",
+        source="synthesize-findings",
+        target="write-report",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="write-to-quality",
+        source="write-report",
+        target="quality-check",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+    EdgeSpec(
+        id="quality-to-save",
+        source="quality-check",
+        target="save-report",
+        condition=EdgeCondition.ON_SUCCESS,
+        priority=1,
+    ),
+]
+
+# Graph configuration
+entry_node = "parse-query"
+entry_points = {"start": "parse-query"}
+pause_nodes = []
+terminal_nodes = ["save-report"]
+
+
+class OnlineResearchAgent:
+    """
+    Online Research Agent - Deep-dive research with narrative reports.
+
+    Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
+    """
+
+    def __init__(self, config=None):
+        self.config = config or default_config
+        self.goal = goal
+        self.nodes = nodes
+        self.edges = edges
+        self.entry_node = entry_node
+        self.entry_points = entry_points
+        self.pause_nodes = pause_nodes
+        self.terminal_nodes = terminal_nodes
+        self._runtime: AgentRuntime | None = None
+        self._graph: GraphSpec | None = None
+
+    def _build_entry_point_specs(self) -> list[EntryPointSpec]:
+        """Convert entry_points dict to EntryPointSpec list."""
+        specs = []
+        for ep_id, node_id in self.entry_points.items():
+            if ep_id == "start":
+                trigger_type = "manual"
+                name = "Start"
+            elif "_resume" in ep_id:
+                trigger_type = "resume"
+                name = f"Resume from {ep_id.replace('_resume', '')}"
+            else:
+                trigger_type = "manual"
+                name = ep_id.replace("-", " ").title()
+
+            specs.append(
+                EntryPointSpec(
+                    id=ep_id,
+                    name=name,
+                    entry_node=node_id,
+                    trigger_type=trigger_type,
+                    isolation_level="shared",
+                )
+            )
+        return specs
+
+    def _create_runtime(self, mock_mode=False) -> AgentRuntime:
+        """Create AgentRuntime instance."""
+        import json
+        from pathlib import Path
+
+        # Persistent storage in ~/.hive for telemetry and run history
+        storage_path = Path.home() / ".hive" / "online_research_agent"
+        storage_path.mkdir(parents=True, exist_ok=True)
+
+        tool_registry = ToolRegistry()
+
+        # Load MCP servers (always load, needed for tool validation)
+        agent_dir = Path(__file__).parent
+        mcp_config_path = agent_dir / "mcp_servers.json"
+
+        if mcp_config_path.exists():
+            with open(mcp_config_path) as f:
+                mcp_servers = json.load(f)
+
+            for server_config in mcp_servers.get("servers", []):
+                # Resolve relative cwd paths
+                cwd = server_config.get("cwd")
+                if cwd and not Path(cwd).is_absolute():
+                    server_config["cwd"] = str(agent_dir / cwd)
+                tool_registry.register_mcp_server(server_config)
+
+        llm = None
+        if not mock_mode:
+            # LiteLLMProvider uses environment variables for API keys
+            llm = LiteLLMProvider(
+                model=self.config.model,
+                api_key=self.config.api_key,
+                api_base=self.config.api_base,
+            )
+
+        self._graph = GraphSpec(
+            id="online-research-agent-graph",
+            goal_id=self.goal.id,
+            version="1.0.0",
+            entry_node=self.entry_node,
+            entry_points=self.entry_points,
+            terminal_nodes=self.terminal_nodes,
+            pause_nodes=self.pause_nodes,
+            nodes=self.nodes,
+            edges=self.edges,
+            default_model=self.config.model,
+            max_tokens=self.config.max_tokens,
+        )
+
+        # Create AgentRuntime with all entry points
+        self._runtime = create_agent_runtime(
+            graph=self._graph,
+            goal=self.goal,
+            storage_path=storage_path,
+            entry_points=self._build_entry_point_specs(),
+            llm=llm,
+            tools=list(tool_registry.get_tools().values()),
+            tool_executor=tool_registry.get_executor(),
+        )
+
+        return self._runtime
+
+    async def start(self, mock_mode=False) -> None:
+        """Start the agent runtime."""
+        if self._runtime is None:
+            self._create_runtime(mock_mode=mock_mode)
+        await self._runtime.start()
+
+    async def stop(self) -> None:
+        """Stop the agent runtime."""
+        if self._runtime is not None:
+            await self._runtime.stop()
+
+    async def trigger(
+        self,
+        entry_point: str,
+        input_data: dict,
+        correlation_id: str | None = None,
+        session_state: dict | None = None,
+    ) -> str:
+        """
+        Trigger execution at a specific entry point (non-blocking).
+
+        Args:
+            entry_point: Entry point ID (e.g., "start", "pause-node_resume")
+            input_data: Input data for the execution
+            correlation_id: Optional ID to correlate related executions
+            session_state: Optional session state to resume from (with paused_at, memory)
+
+        Returns:
+            Execution ID for tracking
+        """
+        if self._runtime is None or not self._runtime.is_running:
+            raise RuntimeError("Agent runtime not started. Call start() first.")
+        return await self._runtime.trigger(
+            entry_point, input_data, correlation_id, session_state=session_state
+        )
+
+    async def trigger_and_wait(
+        self,
+        entry_point: str,
+        input_data: dict,
+        timeout: float | None = None,
+        session_state: dict | None = None,
+    ) -> ExecutionResult | None:
+        """
+        Trigger execution and wait for completion.
+
+        Args:
+            entry_point: Entry point ID
+            input_data: Input data for the execution
+            timeout: Maximum time to wait (seconds)
+            session_state: Optional session state to resume from (with paused_at, memory)
+
+        Returns:
+            ExecutionResult or None if timeout
+        """
+        if self._runtime is None or not self._runtime.is_running:
+            raise RuntimeError("Agent runtime not started. Call start() first.")
+        return await self._runtime.trigger_and_wait(
+            entry_point, input_data, timeout, session_state=session_state
+        )
+
+    async def run(
+        self, context: dict, mock_mode=False, session_state=None
+    ) -> ExecutionResult:
+        """
+        Run the agent (convenience method for simple single execution).
+
+        For more control, use start() + trigger_and_wait() + stop().
+        """
+        await self.start(mock_mode=mock_mode)
+        try:
+            # Determine entry point based on session_state
+            if session_state and "paused_at" in session_state:
+                paused_node = session_state["paused_at"]
+                resume_key = f"{paused_node}_resume"
+                if resume_key in self.entry_points:
+                    entry_point = resume_key
+                else:
+                    entry_point = "start"
+            else:
+                entry_point = "start"
+
+            result = await self.trigger_and_wait(
+                entry_point, context, session_state=session_state
+            )
+            return result or ExecutionResult(success=False, error="Execution timeout")
+        finally:
+            await self.stop()
+
+    async def get_goal_progress(self) -> dict:
+        """Get goal progress across all executions."""
+        if self._runtime is None:
+            raise RuntimeError("Agent runtime not started")
+        return await self._runtime.get_goal_progress()
+
+    def get_stats(self) -> dict:
+        """Get runtime statistics."""
+        if self._runtime is None:
+            return {"running": False}
+        return self._runtime.get_stats()
+
+    def info(self):
+        """Get agent information."""
+        return {
+            "name": metadata.name,
+            "version": metadata.version,
+            "description": metadata.description,
+            "goal": {
+                "name": self.goal.name,
+                "description": self.goal.description,
+            },
+            "nodes": [n.id for n in self.nodes],
+            "edges": [e.id for e in self.edges],
+            "entry_node": self.entry_node,
+            "entry_points": self.entry_points,
+            "pause_nodes": self.pause_nodes,
+            "terminal_nodes": self.terminal_nodes,
+            "multi_entrypoint": True,
+        }
+
+    def validate(self):
+        """Validate agent structure."""
+        errors = []
+        warnings = []
+
+        node_ids = {node.id for node in self.nodes}
+        for edge in self.edges:
+            if edge.source not in node_ids:
+                errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
+            if edge.target not in node_ids:
+                errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
+
+        if self.entry_node not in node_ids:
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        for terminal in self.terminal_nodes:
+            if terminal not in node_ids:
+                errors.append(f"Terminal node '{terminal}' not found")
+
+        for pause in self.pause_nodes:
+            if pause not in node_ids:
+                errors.append(f"Pause node '{pause}' not found")
+
+        # Validate entry points
+        for ep_id, node_id in self.entry_points.items():
+            if node_id not in node_ids:
+                errors.append(
+                    f"Entry point '{ep_id}' references unknown node '{node_id}'"
+                )
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+        }
+
+
+# Create default instance
+default_agent = OnlineResearchAgent()
@@ -0,0 +1,43 @@
+"""Runtime configuration."""
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+def _load_preferred_model() -> str:
+    """Load preferred model from ~/.hive/configuration.json."""
+    config_path = Path.home() / ".hive" / "configuration.json"
+    if config_path.exists():
+        try:
+            with open(config_path) as f:
+                config = json.load(f)
+            llm = config.get("llm", {})
+            if llm.get("provider") and llm.get("model"):
+                return f"{llm['provider']}/{llm['model']}"
+        except Exception:
+            pass
+    return "anthropic/claude-sonnet-4-20250514"
+
+
+@dataclass
+class RuntimeConfig:
+    model: str = field(default_factory=_load_preferred_model)
+    temperature: float = 0.7
+    max_tokens: int = 8192
+    api_key: str | None = None
+    api_base: str | None = None
+
+
+default_config = RuntimeConfig()
+
+
+# Agent metadata
+@dataclass
+class AgentMetadata:
+    name: str = "Online Research Agent"
+    version: str = "1.0.0"
+    description: str = "Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations."
+
+
+metadata = AgentMetadata()
@@ -0,0 +1,9 @@
+{
+  "hive-tools": {
+    "transport": "stdio",
+    "command": "python",
+    "args": ["mcp_server.py", "--stdio"],
+    "cwd": "../../tools",
+    "description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
+  }
+}
@@ -0,0 +1,396 @@
+"""Node definitions for Online Research Agent."""
+
+from framework.graph import NodeSpec
+
+# Node 1: Parse Query
+parse_query_node = NodeSpec(
+    id="parse-query",
+    name="Parse Query",
+    description="Analyze the research topic and generate 3-5 diverse search queries to cover different aspects",
+    node_type="llm_generate",
+    input_keys=["topic"],
+    output_keys=["search_queries", "research_focus", "key_aspects"],
+    output_schema={
+        "research_focus": {
+            "type": "string",
+            "required": True,
+            "description": "Brief statement of what we're researching",
+        },
+        "key_aspects": {
+            "type": "array",
+            "required": True,
+            "description": "List of 3-5 key aspects to investigate",
+        },
+        "search_queries": {
+            "type": "array",
+            "required": True,
+            "description": "List of 3-5 search queries",
+        },
+    },
+    system_prompt="""\
+You are a research query strategist. Given a research topic, analyze it and generate search queries.
+
+Your task:
+1. Understand the core research question
+2. Identify 3-5 key aspects to investigate
+3. Generate 3-5 diverse search queries that will find comprehensive information
+
+CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
+
+Return this JSON structure:
+{
+  "research_focus": "Brief statement of what we're researching",
+  "key_aspects": ["aspect1", "aspect2", "aspect3"],
+  "search_queries": [
+    "query 1 - broad overview",
+    "query 2 - specific angle",
+    "query 3 - recent developments",
+    "query 4 - expert opinions",
+    "query 5 - data/statistics"
+  ]
+}
+""",
+    tools=[],
+    max_retries=3,
+)
+
+# Node 2: Search Sources
+search_sources_node = NodeSpec(
+    id="search-sources",
+    name="Search Sources",
+    description="Execute web searches using the generated queries to find 15+ source URLs",
+    node_type="llm_tool_use",
+    input_keys=["search_queries", "research_focus"],
+    output_keys=["source_urls", "search_results_summary"],
+    output_schema={
+        "source_urls": {
+            "type": "array",
+            "required": True,
+            "description": "List of source URLs found",
+        },
+        "search_results_summary": {
+            "type": "string",
+            "required": True,
+            "description": "Brief summary of what was found",
+        },
+    },
+    system_prompt="""\
+You are a research assistant executing web searches. Use the web_search tool to find sources.
+
+Your task:
+1. Execute each search query using web_search tool
+2. Collect URLs from search results
+3. Aim for 15+ diverse sources
+
+After searching, return JSON with found sources:
+{
+  "source_urls": ["url1", "url2", ...],
+  "search_results_summary": "Brief summary of what was found"
+}
+""",
+    tools=["web_search"],
+    max_retries=3,
+)
+
+# Node 3: Fetch Content
+fetch_content_node = NodeSpec(
+    id="fetch-content",
+    name="Fetch Content",
+    description="Fetch and extract content from the discovered source URLs",
+    node_type="llm_tool_use",
+    input_keys=["source_urls", "research_focus"],
+    output_keys=["fetched_sources", "fetch_errors"],
+    output_schema={
+        "fetched_sources": {
+            "type": "array",
+            "required": True,
+            "description": "List of fetched source objects with url, title, content",
+        },
+        "fetch_errors": {
+            "type": "array",
+            "required": True,
+            "description": "List of URLs that failed to fetch",
+        },
+    },
+    system_prompt="""\
+You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
+
+Your task:
+1. Fetch content from each source URL using web_scrape tool
+2. Extract the main content relevant to the research focus
+3. Track any URLs that failed to fetch
+
+After fetching, return JSON:
+{
+  "fetched_sources": [
+    {"url": "...", "title": "...", "content": "extracted text..."},
+    ...
+  ],
+  "fetch_errors": ["url that failed", ...]
+}
+""",
+    tools=["web_scrape"],
+    max_retries=3,
+)
+
+# Node 4: Evaluate Sources
+evaluate_sources_node = NodeSpec(
+    id="evaluate-sources",
+    name="Evaluate Sources",
+    description="Score sources for relevance and quality, filter to top 10",
+    node_type="llm_generate",
+    input_keys=["fetched_sources", "research_focus", "key_aspects"],
+    output_keys=["ranked_sources", "source_analysis"],
+    output_schema={
+        "ranked_sources": {
+            "type": "array",
+            "required": True,
+            "description": "List of ranked sources with scores",
+        },
+        "source_analysis": {
+            "type": "string",
+            "required": True,
+            "description": "Overview of source quality and coverage",
+        },
+    },
+    system_prompt="""\
+You are a source evaluator. Assess each source for quality and relevance.
+
+Scoring criteria:
+- Relevance to research focus (1-10)
+- Source credibility (1-10)
+- Information depth (1-10)
+- Recency if relevant (1-10)
+
+Your task:
+1. Score each source
+2. Rank by combined score
+3. Select top 10 sources
+4. Note what each source uniquely contributes
+
+Return JSON:
+{
+  "ranked_sources": [
+    {"url": "...", "title": "...", "content": "...", "score": 8.5, "unique_value": "..."},
+    ...
+  ],
+  "source_analysis": "Overview of source quality and coverage"
+}
+""",
+    tools=[],
+    max_retries=3,
+)
+
+# Node 5: Synthesize Findings
+synthesize_findings_node = NodeSpec(
+    id="synthesize-findings",
+    name="Synthesize Findings",
+    description="Extract key facts from sources and identify common themes",
+    node_type="llm_generate",
+    input_keys=["ranked_sources", "research_focus", "key_aspects"],
+    output_keys=["key_findings", "themes", "source_citations"],
+    output_schema={
+        "key_findings": {
+            "type": "array",
+            "required": True,
+            "description": "List of key findings with sources and confidence",
+        },
+        "themes": {
+            "type": "array",
+            "required": True,
+            "description": "List of themes with descriptions and supporting sources",
+        },
+        "source_citations": {
+            "type": "object",
+            "required": True,
+            "description": "Map of facts to supporting URLs",
+        },
+    },
+    system_prompt="""\
+You are a research synthesizer. Analyze multiple sources to extract insights.
+
+Your task:
+1. Identify key facts from each source
+2. Find common themes across sources
+3. Note contradictions or debates
+4. Build a citation map (fact -> source URL)
+
+Return JSON:
+{
+  "key_findings": [
+    {"finding": "...", "sources": ["url1", "url2"], "confidence": "high/medium/low"},
+    ...
+  ],
+  "themes": [
+    {"theme": "...", "description": "...", "supporting_sources": ["url1", ...]},
+    ...
+  ],
+  "source_citations": {
+    "fact or claim": ["supporting url1", "url2"],
+    ...
+  }
+}
+""",
+    tools=[],
+    max_retries=3,
+)
+
+# Node 6: Write Report
+write_report_node = NodeSpec(
+    id="write-report",
+    name="Write Report",
+    description="Generate a narrative report with proper citations",
+    node_type="llm_generate",
+    input_keys=[
+        "key_findings",
+        "themes",
+        "source_citations",
+        "research_focus",
+        "ranked_sources",
+    ],
+    output_keys=["report_content", "references"],
+    output_schema={
+        "report_content": {
+            "type": "string",
+            "required": True,
+            "description": "Full markdown report text with citations",
+        },
+        "references": {
+            "type": "array",
+            "required": True,
+            "description": "List of reference objects with number, url, title",
+        },
+    },
+    system_prompt="""\
+You are a research report writer. Create a well-structured narrative report.
+
+Report structure:
+1. Executive Summary (2-3 paragraphs)
+2. Introduction (context and scope)
+3. Key Findings (organized by theme)
+4. Analysis (synthesis and implications)
+5. Conclusion
+6. References (numbered list of all sources)
+
+Citation format: Use numbered citations like [1], [2] that correspond to the References section.
+
+IMPORTANT:
+- Every factual claim MUST have a citation
+- Write in clear, professional prose
+- Be objective and balanced
+- Highlight areas of consensus and debate
+
+Return JSON:
+{
+  "report_content": "Full markdown report text with citations...",
+  "references": [
+    {"number": 1, "url": "...", "title": "..."},
+    ...
+  ]
+}
+""",
+    tools=[],
+    max_retries=3,
+)
+
+# Node 7: Quality Check
+quality_check_node = NodeSpec(
+    id="quality-check",
+    name="Quality Check",
+    description="Verify all claims have citations and report is coherent",
+    node_type="llm_generate",
+    input_keys=["report_content", "references", "source_citations"],
+    output_keys=["quality_score", "issues", "final_report"],
+    output_schema={
+        "quality_score": {
+            "type": "number",
+            "required": True,
+            "description": "Quality score 0-1",
+        },
+        "issues": {
+            "type": "array",
+            "required": True,
+            "description": "List of issues found and fixed",
+        },
+        "final_report": {
+            "type": "string",
+            "required": True,
+            "description": "Corrected full report",
+        },
+    },
+    system_prompt="""\
+You are a quality assurance reviewer. Check the research report for issues.
+
+Check for:
+1. Uncited claims (factual statements without [n] citation)
+2. Broken citations (references to non-existent numbers)
+3. Coherence (logical flow between sections)
+4. Completeness (all key aspects covered)
+5. Accuracy (claims match source content)
+
+If issues found, fix them in the final report.
+
+Return JSON:
+{
+  "quality_score": 0.95,
+  "issues": [
+    {"type": "uncited_claim", "location": "paragraph 3", "fixed": true},
+    ...
+  ],
+  "final_report": "Corrected full report with all issues fixed..."
+}
+""",
+    tools=[],
+    max_retries=3,
+)
+
+# Node 8: Save Report
+save_report_node = NodeSpec(
+    id="save-report",
+    name="Save Report",
+    description="Write the final report to a local markdown file",
+    node_type="llm_tool_use",
+    input_keys=["final_report", "references", "research_focus"],
+    output_keys=["file_path", "save_status"],
+    output_schema={
+        "file_path": {
+            "type": "string",
+            "required": True,
+            "description": "Path where report was saved",
+        },
+        "save_status": {
+            "type": "string",
+            "required": True,
+            "description": "Status of save operation",
+        },
+    },
+    system_prompt="""\
+You are a file manager. Save the research report to disk.
+
+Your task:
+1. Generate a filename from the research focus (slugified, with date)
+2. Use the write_to_file tool to save the report as markdown
+3. Save to the ./research_reports/ directory
+
+Filename format: research_YYYY-MM-DD_topic-slug.md
+
+Return JSON:
+{
+  "file_path": "research_reports/research_2026-01-23_topic-name.md",
+  "save_status": "success"
+}
+""",
+    tools=["write_to_file"],
+    max_retries=3,
+)
+
+__all__ = [
+    "parse_query_node",
+    "search_sources_node",
+    "fetch_content_node",
+    "evaluate_sources_node",
+    "synthesize_findings_node",
+    "write_report_node",
+    "quality_check_node",
+    "save_report_node",
+]
@@ -0,0 +1,303 @@
+---
+name: building-agents-core
+description: Core concepts for goal-driven agents - architecture, node types, tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "1.0"
+  type: foundational
+  part_of: building-agents
+---
+
+# Building Agents - Core Concepts
+
+Foundational knowledge for building goal-driven agents as Python packages.
+
+## Architecture: Python Services (Not JSON Configs)
+
+Agents are built as Python packages:
+
+```
+exports/my_agent/
+├── __init__.py          # Package exports
+├── __main__.py          # CLI (run, info, validate, shell)
+├── agent.py             # Graph construction (goal, edges, agent class)
+├── nodes/__init__.py    # Node definitions (NodeSpec)
+├── config.py            # Runtime config
+└── README.md            # Documentation
+```
+
+**Key Principle: Agent is visible and editable during build**
+
+- ✅ Files created immediately as components are approved
+- ✅ User can watch files grow in their editor
+- ✅ No session state - just direct file writes
+- ✅ No "export" step - agent is ready when build completes
+
+## Core Concepts
+
+### Goal
+
+Success criteria and constraints (written to agent.py)
+
+```python
+goal = Goal(
+    id="research-goal",
+    name="Technical Research Agent",
+    description="Research technical topics thoroughly",
+    success_criteria=[
+        SuccessCriterion(
+            id="completeness",
+            description="Cover all aspects of topic",
+            metric="coverage_score",
+            target=">=0.9",
+            weight=0.4,
+        ),
+        # 3-5 success criteria total
+    ],
+    constraints=[
+        Constraint(
+            id="accuracy",
+            description="All information must be verified",
+            constraint_type="hard",
+            category="quality",
+        ),
+        # 1-5 constraints total
+    ],
+)
+```
+
+### Node
+
+Unit of work (written to nodes/__init__.py)
+
+**Node Types:**
+
+- `llm_generate` - Text generation, parsing
+- `llm_tool_use` - Actions requiring tools
+- `router` - Conditional branching
+- `function` - Deterministic operations
+
+```python
+search_node = NodeSpec(
+    id="search-web",
+    name="Search Web",
+    description="Search for information online",
+    node_type="llm_tool_use",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}",
+    tools=["web_search"],
+    max_retries=3,
+)
+```
+
+### Edge
+
+Connection between nodes (written to agent.py)
+
+**Edge Conditions:**
+
+- `on_success` - Proceed if node succeeds
+- `on_failure` - Handle errors
+- `always` - Always proceed
+- `conditional` - Based on expression
+
+```python
+EdgeSpec(
+    id="search-to-analyze",
+    source="search-web",
+    target="analyze-results",
+    condition=EdgeCondition.ON_SUCCESS,
+    priority=1,
+)
+```
+
+### Pause/Resume
+
+Multi-turn conversations
+
+- **Pause nodes** - Stop execution, wait for user input
+- **Resume entry points** - Continue from pause with user's response
+
+```python
+# Example pause/resume configuration
+pause_nodes = ["request-clarification"]
+entry_points = {
+    "start": "analyze-request",
+    "request-clarification_resume": "process-clarification"
+}
+```
+
+## Tool Discovery & Validation
+
+**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
+
+Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
+
+### Step 1: Register MCP Server (if not already done)
+
+```python
+mcp__agent-builder__add_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args='["mcp_server.py", "--stdio"]',
+    cwd="../tools"
+)
+```
+
+### Step 2: Discover Available Tools
+
+```python
+# List all tools from all registered servers
+mcp__agent-builder__list_mcp_tools()
+
+# Or list tools from a specific server
+mcp__agent-builder__list_mcp_tools(server_name="tools")
+```
+
+This returns available tools with their descriptions and parameters:
+
+```json
+{
+  "success": true,
+  "tools_by_server": {
+    "tools": [
+      {
+        "name": "web_search",
+        "description": "Search the web...",
+        "parameters": ["query"]
+      },
+      {
+        "name": "web_scrape",
+        "description": "Scrape a URL...",
+        "parameters": ["url"]
+      }
+    ]
+  },
+  "total_tools": 14
+}
+```
+
+### Step 3: Validate Before Adding Nodes
+
+Before writing a node with `tools=[...]`:
+
+1. Call `list_mcp_tools()` to get available tools
+2. Check each tool in your node exists in the response
+3. If a tool doesn't exist:
+   - **DO NOT proceed** with the node
+   - Inform the user: "The tool 'X' is not available. Available tools are: ..."
+   - Ask if they want to use an alternative or proceed without the tool
+
+### Tool Validation Anti-Patterns
+
+❌ **Never assume a tool exists** - always call `list_mcp_tools()` first
+❌ **Never write a node with unverified tools** - validate before writing
+❌ **Never silently drop tools** - if a tool doesn't exist, inform the user
+❌ **Never guess tool names** - use exact names from discovery response
+
+### Example Validation Flow
+
+```python
+# 1. User requests: "Add a node that searches the web"
+# 2. Discover available tools
+tools_response = mcp__agent-builder__list_mcp_tools()
+
+# 3. Check if web_search exists
+available = [t["name"] for tools in tools_response["tools_by_server"].values() for t in tools]
+if "web_search" not in available:
+    # Inform user and ask how to proceed
+    print("❌ 'web_search' not available. Available tools:", available)
+else:
+    # Proceed with node creation
+    # ...
+```
+
+## Workflow Overview: Incremental File Construction
+
+```
+1. CREATE PACKAGE → mkdir + write skeletons
+2. DEFINE GOAL → Write to agent.py + config.py
+3. FOR EACH NODE:
+   - Propose design
+   - User approves
+   - Write to nodes/__init__.py IMMEDIATELY ← FILE WRITTEN
+   - (Optional) Validate with test_node ← MCP VALIDATION
+   - User can open file and see it
+4. CONNECT EDGES → Update agent.py ← FILE WRITTEN
+   - (Optional) Validate with validate_graph ← MCP VALIDATION
+5. FINALIZE → Write agent class to agent.py ← FILE WRITTEN
+6. DONE - Agent ready at exports/my_agent/
+```
+
+**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
+
+### The Key Difference
+
+**OLD (Bad):**
+
+```
+MCP add_node → Session State → MCP add_node → Session State → ...
+                                                                ↓
+                                                     MCP export_graph
+                                                                ↓
+                                                       Files appear
+```
+
+**NEW (Good):**
+
+```
+Write node to file → (Optional: MCP test_node) → Write node to file → ...
+       ↓                                               ↓
+  File visible                                    File visible
+  immediately                                     immediately
+```
+
+**Bottom line:** Use Write/Edit for construction, MCP for validation if needed.
+
+## When to Use This Skill
+
+Use building-agents-core when:
+- Starting a new agent project and need to understand fundamentals
+- Need to understand agent architecture before building
+- Want to validate tool availability before proceeding
+- Learning about node types, edges, and graph execution
+
+**Next Steps:**
+- Ready to build? → Use `building-agents-construction` skill
+- Need patterns and examples? → Use `building-agents-patterns` skill
+
+## MCP Tools for Validation
+
+After writing files, optionally use MCP tools for validation:
+
+**test_node** - Validate node configuration with mock inputs
+```python
+mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "test query"}',
+    mock_llm_response='{"results": "mock output"}'
+)
+```
+
+**validate_graph** - Check graph structure
+```python
+mcp__agent-builder__validate_graph()
+# Returns: unreachable nodes, missing connections, etc.
+```
+
+**create_session** - Track session state for bookkeeping
+```python
+mcp__agent-builder__create_session(session_name="my-build")
+```
+
+**Key Point:** Files are written FIRST. MCP tools are for validation only.
+
+## Related Skills
+
+- **building-agents-construction** - Step-by-step building process
+- **building-agents-patterns** - Best practices and examples
+- **agent-workflow** - Complete workflow orchestrator
+- **testing-agent** - Test and validate completed agents
@@ -0,0 +1,497 @@
+---
+name: building-agents-patterns
+description: Best practices, patterns, and examples for building goal-driven agents. Includes pause/resume architecture, hybrid workflows, anti-patterns, and handoff to testing. Use when optimizing agent design.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "1.0"
+  type: reference
+  part_of: building-agents
+---
+
+# Building Agents - Patterns & Best Practices
+
+Design patterns, examples, and best practices for building robust goal-driven agents.
+
+**Prerequisites:** Complete agent structure using `building-agents-construction`.
+
+## Practical Example: Hybrid Workflow
+
+How to build a node using both direct file writes and optional MCP validation:
+
+```python
+# 1. WRITE TO FILE FIRST (Primary - makes it visible)
+node_code = '''
+search_node = NodeSpec(
+    id="search-web",
+    node_type="llm_tool_use",
+    input_keys=["query"],
+    output_keys=["search_results"],
+    system_prompt="Search the web for: {query}",
+    tools=["web_search"],
+)
+'''
+
+Edit(
+    file_path="exports/research_agent/nodes/__init__.py",
+    old_string="# Nodes will be added here",
+    new_string=node_code
+)
+
+print("✅ Added search_node to nodes/__init__.py")
+print("📁 Open exports/research_agent/nodes/__init__.py to see it!")
+
+# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
+validation = mcp__agent-builder__test_node(
+    node_id="search-web",
+    test_input='{"query": "python tutorials"}',
+    mock_llm_response='{"search_results": [...mock results...]}'
+)
+
+print(f"✓ Validation: {validation['success']}")
+```
+
+**User experience:**
+
+- Immediately sees node in their editor (from step 1)
+- Gets validation feedback (from step 2)
+- Can edit the file directly if needed
+
+This combines visibility (files) with validation (MCP tools).
+
+## Pause/Resume Architecture
+
+For agents needing multi-turn conversations with user interaction:
+
+### Basic Pause/Resume Flow
+
+```python
+# Define pause nodes - execution stops at these nodes
+pause_nodes = ["request-clarification", "await-approval"]
+
+# Define entry points - where to resume from each pause
+entry_points = {
+    "start": "analyze-request",  # Initial entry
+    "request-clarification_resume": "process-clarification",  # Resume from clarification
+    "await-approval_resume": "execute-action",  # Resume from approval
+}
+```
+
+### Example: Multi-Turn Research Agent
+
+```python
+# Nodes
+nodes = [
+    NodeSpec(id="analyze-request", ...),
+    NodeSpec(id="request-clarification", ...),  # PAUSE NODE
+    NodeSpec(id="process-clarification", ...),
+    NodeSpec(id="generate-results", ...),
+    NodeSpec(id="await-approval", ...),  # PAUSE NODE
+    NodeSpec(id="execute-action", ...),
+]
+
+# Edges with resume flows
+edges = [
+    EdgeSpec(
+        id="analyze-to-clarify",
+        source="analyze-request",
+        target="request-clarification",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="needs_clarification == true",
+    ),
+    # When resumed, goes to process-clarification
+    EdgeSpec(
+        id="clarify-to-process",
+        source="request-clarification",
+        target="process-clarification",
+        condition=EdgeCondition.ALWAYS,
+    ),
+    EdgeSpec(
+        id="results-to-approval",
+        source="generate-results",
+        target="await-approval",
+        condition=EdgeCondition.ALWAYS,
+    ),
+    # When resumed, goes to execute-action
+    EdgeSpec(
+        id="approval-to-execute",
+        source="await-approval",
+        target="execute-action",
+        condition=EdgeCondition.ALWAYS,
+    ),
+]
+
+# Configuration
+pause_nodes = ["request-clarification", "await-approval"]
+entry_points = {
+    "start": "analyze-request",
+    "request-clarification_resume": "process-clarification",
+    "await-approval_resume": "execute-action",
+}
+```
+
+### Running Pause/Resume Agents
+
+```python
+# Initial run - will pause at first pause node
+result1 = await agent.run(
+    context={"query": "research topic"},
+    session_state=None
+)
+
+# Check if paused
+if result1.paused_at:
+    print(f"Paused at: {result1.paused_at}")
+
+    # Resume with user input
+    result2 = await agent.run(
+        context={"user_response": "clarification details"},
+        session_state=result1.session_state  # Pass previous state
+    )
+```
+
+## Anti-Patterns
+
+### What NOT to Do
+
+❌ **Don't rely on `export_graph`** - Write files immediately, not at end
+```python
+# BAD: Building in session state, exporting at end
+mcp__agent-builder__add_node(...)
+mcp__agent-builder__add_node(...)
+mcp__agent-builder__export_graph()  # Files appear only now
+
+# GOOD: Writing files immediately
+Write(file_path="...", content=node_code)  # File visible now
+Write(file_path="...", content=node_code)  # File visible now
+```
+
+❌ **Don't hide code in session** - Write to files as components approved
+```python
+# BAD: Accumulating changes invisibly
+session.add_component(component1)
+session.add_component(component2)
+# User can't see anything yet
+
+# GOOD: Incremental visibility
+Edit(file_path="...", ...)  # User sees change 1
+Edit(file_path="...", ...)  # User sees change 2
+```
+
+❌ **Don't wait to write files** - Agent visible from first step
+```python
+# BAD: Building everything before writing
+design_all_nodes()
+design_all_edges()
+write_everything_at_once()
+
+# GOOD: Write as you go
+write_package_structure()  # Visible
+write_goal()  # Visible
+write_node_1()  # Visible
+write_node_2()  # Visible
+```
+
+❌ **Don't batch everything** - Write incrementally
+```python
+# BAD: Batching all nodes
+nodes = [design_node_1(), design_node_2(), ...]
+write_all_nodes(nodes)
+
+# GOOD: One at a time with user feedback
+write_node_1()  # User approves
+write_node_2()  # User approves
+write_node_3()  # User approves
+```
+
+### MCP Tools - Correct Usage
+
+**MCP tools OK for:**
+✅ `test_node` - Validate node configuration with mock inputs
+✅ `validate_graph` - Check graph structure
+✅ `create_session` - Track session state for bookkeeping
+✅ Other validation tools
+
+**Just don't:** Use MCP as the primary construction method or rely on export_graph
+
+## Best Practices
+
+### 1. Show Progress After Each Write
+
+```python
+# After writing a node
+print("✅ Added analyze_request_node to nodes/__init__.py")
+print("📊 Progress: 1/6 nodes added")
+print("📁 Open exports/my_agent/nodes/__init__.py to see it!")
+```
+
+### 2. Let User Open Files During Build
+
+```python
+# Encourage file inspection
+print("✅ Goal written to agent.py")
+print("")
+print("💡 Tip: Open exports/my_agent/agent.py in your editor to see the goal!")
+```
+
+### 3. Write Incrementally - One Component at a Time
+
+```python
+# Good flow
+write_package_structure()
+show_user("Package created")
+
+write_goal()
+show_user("Goal written")
+
+for node in nodes:
+    get_approval(node)
+    write_node(node)
+    show_user(f"Node {node.id} written")
+```
+
+### 4. Test As You Build
+
+```python
+# After adding several nodes
+print("💡 You can test current state with:")
+print("  PYTHONPATH=core:exports python -m my_agent validate")
+print("  PYTHONPATH=core:exports python -m my_agent info")
+```
+
+### 5. Keep User Informed
+
+```python
+# Clear status updates
+print("🔨 Creating package structure...")
+print("✅ Package created: exports/my_agent/")
+print("")
+print("📝 Next: Define agent goal")
+```
+
+## Continuous Monitoring Agents
+
+For agents that run continuously without terminal nodes:
+
+```python
+# No terminal nodes - loops forever
+terminal_nodes = []
+
+# Workflow loops back to start
+edges = [
+    EdgeSpec(id="monitor-to-check", source="monitor", target="check-condition"),
+    EdgeSpec(id="check-to-wait", source="check-condition", target="wait"),
+    EdgeSpec(id="wait-to-monitor", source="wait", target="monitor"),  # Loop
+]
+
+# Entry node only
+entry_node = "monitor"
+entry_points = {"start": "monitor"}
+pause_nodes = []
+```
+
+**Example: File Monitor**
+
+```python
+nodes = [
+    NodeSpec(id="list-files", ...),
+    NodeSpec(id="check-new-files", node_type="router", ...),
+    NodeSpec(id="process-files", ...),
+    NodeSpec(id="wait-interval", node_type="function", ...),
+]
+
+edges = [
+    EdgeSpec(id="list-to-check", source="list-files", target="check-new-files"),
+    EdgeSpec(
+        id="check-to-process",
+        source="check-new-files",
+        target="process-files",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="new_files_count > 0",
+    ),
+    EdgeSpec(
+        id="check-to-wait",
+        source="check-new-files",
+        target="wait-interval",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="new_files_count == 0",
+    ),
+    EdgeSpec(id="process-to-wait", source="process-files", target="wait-interval"),
+    EdgeSpec(id="wait-to-list", source="wait-interval", target="list-files"),  # Loop back
+]
+
+terminal_nodes = []  # No terminal - runs forever
+```
+
+## Complex Routing Patterns
+
+### Multi-Condition Router
+
+```python
+router_node = NodeSpec(
+    id="decision-router",
+    node_type="router",
+    input_keys=["analysis_result"],
+    output_keys=["decision"],
+    system_prompt="""
+    Based on the analysis result, decide the next action:
+    - If confidence > 0.9: route to "execute"
+    - If 0.5 <= confidence <= 0.9: route to "review"
+    - If confidence < 0.5: route to "clarify"
+
+    Return: {"decision": "execute|review|clarify"}
+    """,
+)
+
+# Edges for each route
+edges = [
+    EdgeSpec(
+        id="router-to-execute",
+        source="decision-router",
+        target="execute-action",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="decision == 'execute'",
+        priority=1,
+    ),
+    EdgeSpec(
+        id="router-to-review",
+        source="decision-router",
+        target="human-review",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="decision == 'review'",
+        priority=2,
+    ),
+    EdgeSpec(
+        id="router-to-clarify",
+        source="decision-router",
+        target="request-clarification",
+        condition=EdgeCondition.CONDITIONAL,
+        condition_expr="decision == 'clarify'",
+        priority=3,
+    ),
+]
+```
+
+## Error Handling Patterns
+
+### Graceful Failure with Fallback
+
+```python
+# Primary node with error handling
+nodes = [
+    NodeSpec(id="api-call", max_retries=3, ...),
+    NodeSpec(id="fallback-cache", ...),
+    NodeSpec(id="report-error", ...),
+]
+
+edges = [
+    # Success path
+    EdgeSpec(
+        id="api-success",
+        source="api-call",
+        target="process-results",
+        condition=EdgeCondition.ON_SUCCESS,
+    ),
+    # Fallback on failure
+    EdgeSpec(
+        id="api-to-fallback",
+        source="api-call",
+        target="fallback-cache",
+        condition=EdgeCondition.ON_FAILURE,
+        priority=1,
+    ),
+    # Report if fallback also fails
+    EdgeSpec(
+        id="fallback-to-error",
+        source="fallback-cache",
+        target="report-error",
+        condition=EdgeCondition.ON_FAILURE,
+        priority=1,
+    ),
+]
+```
+
+## Performance Optimization
+
+### Parallel Node Execution
+
+```python
+# Use multiple edges from same source for parallel execution
+edges = [
+    EdgeSpec(
+        id="start-to-search1",
+        source="start",
+        target="search-source-1",
+        condition=EdgeCondition.ALWAYS,
+    ),
+    EdgeSpec(
+        id="start-to-search2",
+        source="start",
+        target="search-source-2",
+        condition=EdgeCondition.ALWAYS,
+    ),
+    EdgeSpec(
+        id="start-to-search3",
+        source="start",
+        target="search-source-3",
+        condition=EdgeCondition.ALWAYS,
+    ),
+    # Converge results
+    EdgeSpec(
+        id="search1-to-merge",
+        source="search-source-1",
+        target="merge-results",
+    ),
+    EdgeSpec(
+        id="search2-to-merge",
+        source="search-source-2",
+        target="merge-results",
+    ),
+    EdgeSpec(
+        id="search3-to-merge",
+        source="search-source-3",
+        target="merge-results",
+    ),
+]
+```
+
+## Handoff to Testing
+
+When agent is complete, transition to testing phase:
+
+```python
+print("""
+✅ Agent complete: exports/my_agent/
+
+Next steps:
+1. Switch to testing-agent skill
+2. Generate and approve tests
+3. Run evaluation
+4. Debug any failures
+
+Command: "Test the agent at exports/my_agent/"
+""")
+```
+
+### Pre-Testing Checklist
+
+Before handing off to testing-agent:
+
+- [ ] Agent structure validates: `python -m agent_name validate`
+- [ ] All nodes defined in nodes/__init__.py
+- [ ] All edges connect valid nodes
+- [ ] Entry node specified
+- [ ] Agent can be imported: `from exports.agent_name import default_agent`
+- [ ] README.md with usage instructions
+- [ ] CLI commands work (info, validate)
+
+## Related Skills
+
+- **building-agents-core** - Fundamental concepts
+- **building-agents-construction** - Step-by-step building
+- **testing-agent** - Test and validate agents
+- **agent-workflow** - Complete workflow orchestrator
+
+---
+
+**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
@@ -0,0 +1,572 @@
+---
+name: setup-credentials
+description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the encrypted credential store at ~/.hive/credentials.
+license: Apache-2.0
+metadata:
+  author: hive
+  version: "2.1"
+  type: utility
+---
+
+# Setup Credentials
+
+Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
+
+## When to Use
+
+- Before running or testing an agent for the first time
+- When `AgentRunner.run()` fails with "missing required credentials"
+- When a user asks to configure credentials for an agent
+- After building a new agent that uses tools requiring API keys
+
+## Workflow
+
+### Step 1: Identify the Agent
+
+Determine which agent needs credentials. The user will either:
+
+- Name the agent directly (e.g., "set up credentials for hubspot-agent")
+- Have an agent directory open (check `exports/` for agent dirs)
+- Be working on an agent in the current session
+
+Locate the agent's directory under `exports/{agent_name}/`.
+
+### Step 2: Detect Required Credentials
+
+Read the agent's configuration to determine which tools and node types it uses:
+
+```python
+from core.framework.runner import AgentRunner
+
+runner = AgentRunner.load("exports/{agent_name}")
+validation = runner.validate()
+
+# validation.missing_credentials contains env var names
+# validation.warnings contains detailed messages with help URLs
+```
+
+Alternatively, check the credential store directly:
+
+```python
+from core.framework.credentials import CredentialStore
+
+# Use encrypted storage (default: ~/.hive/credentials)
+store = CredentialStore.with_encrypted_storage()
+
+# Check what's available
+available = store.list_credentials()
+print(f"Available credentials: {available}")
+
+# Check if specific credential exists
+if store.is_available("hubspot"):
+    print("HubSpot credential found")
+else:
+    print("HubSpot credential missing")
+```
+
+To see all known credential specs (for help URLs and setup instructions):
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+for name, spec in CREDENTIAL_SPECS.items():
+    print(f"{name}: env_var={spec.env_var}, aden={spec.aden_supported}")
+```
+
+### Step 3: Present Auth Options for Each Missing Credential
+
+For each missing credential, check what authentication methods are available:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec:
+    # Determine available auth options
+    auth_options = []
+    if spec.aden_supported:
+        auth_options.append("aden")
+    if spec.direct_api_key_supported:
+        auth_options.append("direct")
+    auth_options.append("custom")  # Always available
+
+    # Get setup info
+    setup_info = {
+        "env_var": spec.env_var,
+        "description": spec.description,
+        "help_url": spec.help_url,
+        "api_key_instructions": spec.api_key_instructions,
+    }
+```
+
+Present the available options using AskUserQuestion:
+
+```
+Choose how to configure HUBSPOT_ACCESS_TOKEN:
+
+  1) Aden Authorization Server (Recommended)
+     Secure OAuth2 flow via integration.adenhq.com
+     - Quick setup with automatic token refresh
+     - No need to manage API keys manually
+
+  2) Direct API Key
+     Enter your own API key manually
+     - Requires creating a HubSpot Private App
+     - Full control over scopes and permissions
+
+  3) Custom Credential Store (Advanced)
+     Programmatic configuration for CI/CD
+     - For automated deployments
+     - Requires manual API calls
+```
+
+### Step 4: Execute Auth Flow Based on User Choice
+
+#### Option 1: Aden Authorization Server
+
+This is the recommended flow for supported integrations (HubSpot, etc.).
+
+**How Aden OAuth Works:**
+
+The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
+
+1. User has an Aden account
+2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
+3. We just need to sync those credentials down to the local credential store
+
+**4.1a. Check for ADEN_API_KEY**
+
+```python
+import os
+aden_key = os.environ.get("ADEN_API_KEY")
+```
+
+If not set, guide user to get one from Aden (this is where they do OAuth):
+
+```python
+from aden_tools.credentials import open_browser, get_aden_setup_url
+
+# Open browser to Aden - user will sign up and connect integrations there
+url = get_aden_setup_url()  # https://integration.adenhq.com/setup
+success, msg = open_browser(url)
+
+print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
+print("Once done, copy your API key and return here.")
+```
+
+Ask user to provide the ADEN_API_KEY they received.
+
+**4.1b. Save ADEN_API_KEY to Shell Config**
+
+With user approval, persist ADEN_API_KEY to their shell config:
+
+```python
+from aden_tools.credentials import (
+    detect_shell,
+    add_env_var_to_shell_config,
+    get_shell_source_command,
+)
+
+shell_type = detect_shell()  # 'bash', 'zsh', or 'unknown'
+
+# Ask user for approval before modifying shell config
+# If approved:
+success, config_path = add_env_var_to_shell_config(
+    "ADEN_API_KEY",
+    user_provided_key,
+    comment="Aden authorization server API key"
+)
+
+if success:
+    source_cmd = get_shell_source_command()
+    print(f"Saved to {config_path}")
+    print(f"Run: {source_cmd}")
+```
+
+Also save to `~/.hive/configuration.json` for the framework:
+
+```python
+import json
+from pathlib import Path
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+config["aden"] = {
+    "api_key_configured": True,
+    "api_url": "https://api.adenhq.com"
+}
+
+config_path.parent.mkdir(parents=True, exist_ok=True)
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+**4.1c. Sync Credentials from Aden Server**
+
+Since the user has already authorized integrations on Aden, use the one-liner factory method:
+
+```python
+from core.framework.credentials import CredentialStore
+
+# This single call handles everything:
+# - Creates encrypted local storage at ~/.hive/credentials
+# - Configures Aden client from ADEN_API_KEY env var
+# - Syncs all credentials from Aden server automatically
+store = CredentialStore.with_aden_sync(
+    base_url="https://api.adenhq.com",
+    auto_sync=True,  # Syncs on creation
+)
+
+# Check what was synced
+synced = store.list_credentials()
+print(f"Synced credentials: {synced}")
+
+# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
+if "hubspot" not in synced:
+    print("HubSpot not found in your Aden account.")
+    print("Please visit https://integration.adenhq.com to connect HubSpot, then try again.")
+```
+
+For more control over the sync process:
+
+```python
+from core.framework.credentials import CredentialStore
+from core.framework.credentials.aden import (
+    AdenCredentialClient,
+    AdenClientConfig,
+    AdenSyncProvider,
+)
+
+# Create client (API key loaded from ADEN_API_KEY env var)
+client = AdenCredentialClient(AdenClientConfig(
+    base_url="https://api.adenhq.com",
+))
+
+# Create provider and store
+provider = AdenSyncProvider(client=client)
+store = CredentialStore.with_encrypted_storage()
+
+# Manual sync
+synced_count = provider.sync_all(store)
+print(f"Synced {synced_count} credentials from Aden")
+```
+
+**4.1d. Run Health Check**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+# Get the token from the store
+cred = store.get_credential("hubspot")
+token = cred.keys["access_token"].value.get_secret_value()
+
+result = check_credential_health("hubspot", token)
+if result.valid:
+    print("HubSpot credentials validated successfully!")
+else:
+    print(f"Validation failed: {result.message}")
+    # Offer to retry the OAuth flow
+```
+
+#### Option 2: Direct API Key
+
+For users who prefer manual API key management.
+
+**4.2a. Show Setup Instructions**
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+
+spec = CREDENTIAL_SPECS.get("hubspot")
+if spec and spec.api_key_instructions:
+    print(spec.api_key_instructions)
+# Output:
+# To get a HubSpot Private App token:
+# 1. Go to HubSpot Settings > Integrations > Private Apps
+# 2. Click "Create a private app"
+# 3. Name your app (e.g., "Hive Agent")
+# ...
+
+if spec and spec.help_url:
+    print(f"More info: {spec.help_url}")
+```
+
+**4.2b. Collect API Key from User**
+
+Use AskUserQuestion to securely collect the API key:
+
+```
+Please provide your HubSpot access token:
+(This will be stored securely in ~/.hive/credentials)
+```
+
+**4.2c. Run Health Check Before Storing**
+
+```python
+from aden_tools.credentials import check_credential_health
+
+result = check_credential_health("hubspot", user_provided_token)
+if not result.valid:
+    print(f"Warning: {result.message}")
+    # Ask user if they want to:
+    # 1. Try a different token
+    # 2. Continue anyway (not recommended)
+```
+
+**4.2d. Store in Encrypted Credential Store**
+
+```python
+from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+from pydantic import SecretStr
+
+store = CredentialStore.with_encrypted_storage()
+
+cred = CredentialObject(
+    id="hubspot",
+    name="HubSpot Access Token",
+    keys={
+        "access_token": CredentialKey(
+            name="access_token",
+            value=SecretStr(user_provided_token),
+        )
+    },
+)
+store.save_credential(cred)
+```
+
+**4.2e. Export to Current Session**
+
+```bash
+export HUBSPOT_ACCESS_TOKEN="the-value"
+```
+
+#### Option 3: Custom Credential Store (Advanced)
+
+For programmatic/CI/CD setups.
+
+**4.3a. Show Documentation**
+
+```
+For advanced credential management, you can use the CredentialStore API directly:
+
+  from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
+  from pydantic import SecretStr
+
+  store = CredentialStore.with_encrypted_storage()
+
+  cred = CredentialObject(
+      id="hubspot",
+      name="HubSpot Access Token",
+      keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
+  )
+  store.save_credential(cred)
+
+For CI/CD environments:
+  - Set HIVE_CREDENTIAL_KEY for encryption
+  - Pre-populate ~/.hive/credentials programmatically
+  - Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
+
+Documentation: See core/framework/credentials/README.md
+```
+
+### Step 5: Record Configuration Method
+
+Track which auth method was used for each credential in `~/.hive/configuration.json`:
+
+```python
+import json
+from pathlib import Path
+from datetime import datetime
+
+config_path = Path.home() / ".hive" / "configuration.json"
+config = json.loads(config_path.read_text()) if config_path.exists() else {}
+
+if "credential_methods" not in config:
+    config["credential_methods"] = {}
+
+config["credential_methods"]["hubspot"] = {
+    "method": "aden",  # or "direct" or "custom"
+    "configured_at": datetime.now().isoformat(),
+}
+
+config_path.write_text(json.dumps(config, indent=2))
+```
+
+### Step 6: Verify All Credentials
+
+Run validation again to confirm everything is set:
+
+```python
+runner = AgentRunner.load("exports/{agent_name}")
+validation = runner.validate()
+assert not validation.missing_credentials, "Still missing credentials!"
+```
+
+Report the result to the user.
+
+## Health Check Reference
+
+Health checks validate credentials by making lightweight API calls:
+
+| Credential     | Endpoint                                | What It Checks                    |
+| -------------- | --------------------------------------- | --------------------------------- |
+| `hubspot`      | `GET /crm/v3/objects/contacts?limit=1`  | Bearer token validity, CRM scopes |
+| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity                  |
+
+```python
+from aden_tools.credentials import check_credential_health, HealthCheckResult
+
+result: HealthCheckResult = check_credential_health("hubspot", token_value)
+# result.valid: bool
+# result.message: str
+# result.details: dict (status_code, rate_limited, etc.)
+```
+
+## Encryption Key (HIVE_CREDENTIAL_KEY)
+
+The encrypted credential store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
+
+- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
+- The user MUST persist this key (e.g., in `~/.bashrc` or a secrets manager)
+- Without this key, stored credentials cannot be decrypted
+- This is the ONLY secret that should live in `~/.bashrc` or environment config
+
+If `HIVE_CREDENTIAL_KEY` is not set:
+
+1. Let the store generate one
+2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
+3. Recommend adding it to `~/.bashrc` or their shell profile
+
+## Security Rules
+
+- **NEVER** log, print, or echo credential values in tool output
+- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
+- **NEVER** hardcode credentials in source code
+- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
+- **ALWAYS** use the encrypted credential store (`~/.hive/credentials`) for persistence
+- **ALWAYS** run health checks before storing credentials (when possible)
+- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
+- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
+
+## Credential Sources Reference
+
+All credential specs are defined in `tools/src/aden_tools/credentials/`:
+
+| File              | Category      | Credentials                                   | Aden Supported |
+| ----------------- | ------------- | --------------------------------------------- | -------------- |
+| `llm.py`          | LLM Providers | `anthropic`                                   | No             |
+| `search.py`       | Search Tools  | `brave_search`, `google_search`, `google_cse` | No             |
+| `integrations.py` | Integrations  | `hubspot`                                     | Yes            |
+
+**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
+variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
+Add them to `llm.py` as needed.
+
+To check what's registered:
+
+```python
+from aden_tools.credentials import CREDENTIAL_SPECS
+for name, spec in CREDENTIAL_SPECS.items():
+    print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
+```
+
+## Migration: CredentialManager → CredentialStore
+
+**CredentialManager is deprecated.** Use CredentialStore instead.
+
+| Old (Deprecated)                          | New (Recommended)                                                    |
+| ----------------------------------------- | -------------------------------------------------------------------- |
+| `CredentialManager()`                     | `CredentialStore.with_encrypted_storage()`                           |
+| `creds.get("hubspot")`                    | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
+| `creds.validate_for_tools(tools)`         | Use `store.is_available(cred_id)` per credential                     |
+| `creds.get_auth_options("hubspot")`       | Check `CREDENTIAL_SPECS["hubspot"].aden_supported`                   |
+| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly                        |
+
+**Why migrate?**
+
+- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
+- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
+- **CredentialStoreAdapter** exists for backward compatibility during migration
+
+```python
+# Old way (deprecated)
+from aden_tools.credentials import CredentialManager
+creds = CredentialManager()
+token = creds.get("hubspot")
+
+# New way (recommended)
+from core.framework.credentials import CredentialStore
+store = CredentialStore.with_encrypted_storage()
+token = store.get("hubspot")
+
+# With Aden sync (recommended for OAuth integrations)
+store = CredentialStore.with_aden_sync()
+token = store.get_key("hubspot", "access_token")
+```
+
+## Example Session
+
+```
+User: /setup-credentials for my hubspot-agent
+
+Agent: Let me check what credentials your hubspot-agent needs.
+
+[Runs validation, finds ANTHROPIC_API_KEY and HUBSPOT_ACCESS_TOKEN missing]
+
+Agent: Your hubspot-agent requires 2 credentials:
+
+  1. ANTHROPIC_API_KEY - Anthropic API key for LLM calls
+  2. HUBSPOT_ACCESS_TOKEN - HubSpot access token
+
+Let's set these up one at a time.
+
+--- Setting up ANTHROPIC_API_KEY ---
+
+This credential only supports direct API key entry.
+
+[Shows instructions for getting Anthropic API key]
+[AskUserQuestion: "Please provide your Anthropic API key:"]
+[User provides key]
+[Stores in credential store, exports to env]
+
+--- Setting up HUBSPOT_ACCESS_TOKEN ---
+
+[AskUserQuestion with 3 options]
+
+Choose how to configure HUBSPOT_ACCESS_TOKEN:
+  1) Aden Authorization Server (Recommended)
+  2) Direct API Key
+  3) Custom Credential Store (Advanced)
+
+User: 1 (Aden)
+
+Agent: Great! Let me check if you have an Aden API key configured...
+
+[Checks for ADEN_API_KEY, not found]
+[Opens browser to integration.adenhq.com/setup]
+
+Agent: I've opened Aden's setup page. Please:
+  1. Sign in or create an account
+  2. Connect your HubSpot account (OAuth happens on Aden's site)
+  3. Copy the API key shown after setup
+
+[AskUserQuestion: "Please provide your Aden API key:"]
+[User provides key]
+
+Agent: Would you like me to save this to your shell config (~/.zshrc) for future sessions?
+
+[User approves]
+[Saves to ~/.zshrc and ~/.hive/configuration.json]
+
+Agent: Let me sync your credentials from Aden...
+
+[Syncs credentials from Aden server - OAuth already done on Aden's side]
+[Runs health check]
+
+Agent: HubSpot credentials validated successfully!
+
+All credentials are now configured:
+  - ANTHROPIC_API_KEY: Stored in encrypted credential store
+  - HUBSPOT_ACCESS_TOKEN: Synced from Aden (OAuth completed on Aden)
+  - Validation passed - your agent is ready to run!
+```
@@ -0,0 +1,351 @@
+# Example: Testing a YouTube Research Agent
+
+This example walks through testing a YouTube research agent that finds relevant videos based on a topic.
+
+## Prerequisites
+
+- Agent built with building-agents skill at `exports/youtube-research/`
+- Goal defined with success criteria and constraints
+
+## Step 1: Load the Goal
+
+First, load the goal that was defined during the Goal stage:
+
+```json
+{
+    "id": "youtube-research",
+    "name": "YouTube Research Agent",
+    "description": "Find relevant YouTube videos on a given topic",
+    "success_criteria": [
+        {
+            "id": "find_videos",
+            "description": "Find 3-5 relevant videos",
+            "metric": "video_count",
+            "target": "3-5",
+            "weight": 1.0
+        },
+        {
+            "id": "relevance",
+            "description": "Videos must be relevant to the topic",
+            "metric": "relevance_score",
+            "target": ">0.8",
+            "weight": 0.8
+        }
+    ],
+    "constraints": [
+        {
+            "id": "api_limits",
+            "description": "Must not exceed YouTube API rate limits",
+            "constraint_type": "hard",
+            "category": "technical"
+        },
+        {
+            "id": "content_safety",
+            "description": "Must filter out inappropriate content",
+            "constraint_type": "hard",
+            "category": "safety"
+        }
+    ]
+}
+```
+
+## Step 2: Get Constraint Test Guidelines
+
+During the Goal stage (or early Eval), get test guidelines for constraints:
+
+```python
+result = generate_constraint_tests(
+    goal_id="youtube-research",
+    goal_json='<goal JSON above>',
+    agent_path="exports/youtube-research"
+)
+```
+
+**The result contains guidelines (not generated tests):**
+- `output_file`: Where to write tests
+- `file_header`: Imports and fixtures to use
+- `test_template`: Format for test functions
+- `constraints_formatted`: The constraints to test
+- `test_guidelines`: Rules for writing tests
+
+## Step 3: Write Constraint Tests
+
+Using the guidelines, write tests directly with the Write tool:
+
+```python
+# Write constraint tests using the provided file_header and guidelines
+Write(
+    file_path="exports/youtube-research/tests/test_constraints.py",
+    content='''
+"""Constraint tests for youtube-research agent."""
+
+import os
+import pytest
+from exports.youtube_research import default_agent
+
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
+    reason="API key required for real testing."
+)
+
+
+@pytest.mark.asyncio
+async def test_constraint_api_limits_respected():
+    """Verify API rate limits are not exceeded."""
+    import time
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+
+    for i in range(10):
+        result = await default_agent.run({"topic": f"test_{i}"}, mock_mode=mock_mode)
+        time.sleep(0.1)
+
+    # Should complete without rate limit errors
+    assert "rate limit" not in str(result).lower()
+
+
+@pytest.mark.asyncio
+async def test_constraint_content_safety_filter():
+    """Verify inappropriate content is filtered."""
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+    result = await default_agent.run({"topic": "general topic"}, mock_mode=mock_mode)
+
+    for video in result.videos:
+        assert video.safe_for_work is True
+        assert video.age_restricted is False
+'''
+)
+```
+
+## Step 4: Get Success Criteria Test Guidelines
+
+After the agent is built, get success criteria test guidelines:
+
+```python
+result = generate_success_tests(
+    goal_id="youtube-research",
+    goal_json='<goal JSON>',
+    node_names="search_node,filter_node,rank_node,format_node",
+    tool_names="youtube_search,video_details,channel_info",
+    agent_path="exports/youtube-research"
+)
+```
+
+## Step 5: Write Success Criteria Tests
+
+Using the guidelines, write success criteria tests:
+
+```python
+Write(
+    file_path="exports/youtube-research/tests/test_success_criteria.py",
+    content='''
+"""Success criteria tests for youtube-research agent."""
+
+import os
+import pytest
+from exports.youtube_research import default_agent
+
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
+    reason="API key required for real testing."
+)
+
+
+@pytest.mark.asyncio
+async def test_find_videos_happy_path():
+    """Test finding videos for a common topic."""
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+    result = await default_agent.run({"topic": "machine learning"}, mock_mode=mock_mode)
+
+    assert result.success
+    assert 3 <= len(result.videos) <= 5
+    assert all(v.title for v in result.videos)
+    assert all(v.video_id for v in result.videos)
+
+
+@pytest.mark.asyncio
+async def test_find_videos_minimum_boundary():
+    """Test at minimum threshold (3 videos)."""
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+    result = await default_agent.run({"topic": "niche topic xyz"}, mock_mode=mock_mode)
+
+    assert len(result.videos) >= 3
+
+
+@pytest.mark.asyncio
+async def test_relevance_score_threshold():
+    """Test relevance scoring meets threshold."""
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+    result = await default_agent.run({"topic": "python programming"}, mock_mode=mock_mode)
+
+    for video in result.videos:
+        assert video.relevance_score > 0.8
+
+
+@pytest.mark.asyncio
+async def test_find_videos_no_results_graceful():
+    """Test graceful handling of no results."""
+    mock_mode = bool(os.environ.get("MOCK_MODE"))
+    result = await default_agent.run({"topic": "xyznonexistent123"}, mock_mode=mock_mode)
+
+    # Should not crash, return empty or message
+    assert result.videos == [] or result.message
+'''
+)
+```
+
+## Step 6: Run All Tests
+
+Execute all tests:
+
+```python
+result = run_tests(
+    goal_id="youtube-research",
+    agent_path="exports/youtube-research",
+    test_types='["all"]',
+    parallel=4
+)
+```
+
+**Results:**
+
+```json
+{
+    "goal_id": "youtube-research",
+    "overall_passed": false,
+    "summary": {
+        "total": 6,
+        "passed": 5,
+        "failed": 1,
+        "pass_rate": "83.3%"
+    },
+    "duration_ms": 4521,
+    "results": [
+        {"test_id": "test_constraint_api_001", "passed": true, "duration_ms": 1234},
+        {"test_id": "test_constraint_content_001", "passed": true, "duration_ms": 456},
+        {"test_id": "test_success_001", "passed": true, "duration_ms": 789},
+        {"test_id": "test_success_002", "passed": true, "duration_ms": 654},
+        {"test_id": "test_success_003", "passed": true, "duration_ms": 543},
+        {"test_id": "test_success_004", "passed": false, "duration_ms": 845,
+         "error_category": "IMPLEMENTATION_ERROR",
+         "error_message": "TypeError: 'NoneType' object has no attribute 'videos'"}
+    ]
+}
+```
+
+## Step 7: Debug the Failed Test
+
+```python
+result = debug_test(
+    goal_id="youtube-research",
+    test_name="test_find_videos_no_results_graceful",
+    agent_path="exports/youtube-research"
+)
+```
+
+**Debug Output:**
+
+```json
+{
+    "test_id": "test_success_004",
+    "test_name": "test_find_videos_no_results_graceful",
+    "input": {"topic": "xyznonexistent123"},
+    "expected": "Empty list or message",
+    "actual": {"error": "TypeError: 'NoneType' object has no attribute 'videos'"},
+    "passed": false,
+    "error_message": "TypeError: 'NoneType' object has no attribute 'videos'",
+    "error_category": "IMPLEMENTATION_ERROR",
+    "stack_trace": "Traceback (most recent call last):\n  File \"filter_node.py\", line 42\n    for video in result.videos:\nTypeError: 'NoneType' object has no attribute 'videos'",
+    "logs": [
+        {"timestamp": "2026-01-20T10:00:01", "node": "search_node", "level": "INFO", "msg": "Searching for: xyznonexistent123"},
+        {"timestamp": "2026-01-20T10:00:02", "node": "search_node", "level": "WARNING", "msg": "No results found"},
+        {"timestamp": "2026-01-20T10:00:02", "node": "filter_node", "level": "ERROR", "msg": "NoneType error"}
+    ],
+    "runtime_data": {
+        "execution_path": ["start", "search_node", "filter_node"],
+        "node_outputs": {
+            "search_node": null
+        }
+    },
+    "suggested_fix": "Add null check in filter_node before accessing .videos attribute",
+    "iteration_guidance": {
+        "stage": "Agent",
+        "action": "Fix the code in nodes/edges",
+        "restart_required": false,
+        "description": "The goal is correct, but filter_node doesn't handle null results from search_node."
+    }
+}
+```
+
+## Step 8: Iterate Based on Category
+
+Since this is an **IMPLEMENTATION_ERROR**, we:
+
+1. **Don't restart** the Goal → Agent → Eval flow
+2. **Fix the agent** using building-agents skill:
+   - Modify `filter_node` to handle null results
+3. **Re-run Eval** (tests only)
+
+### Fix in building-agents:
+
+```python
+# Update the filter_node to handle null
+add_node(
+    node_id="filter_node",
+    name="Filter Node",
+    description="Filter and rank videos",
+    node_type="function",
+    input_keys=["search_results"],
+    output_keys=["filtered_videos"],
+    system_prompt="""
+    Filter videos by relevance.
+    IMPORTANT: Handle case where search_results is None or empty.
+    Return empty list if no results.
+    """
+)
+```
+
+### Re-export and re-test:
+
+```python
+# Re-export the fixed agent
+export_graph(path="exports/youtube-research")
+
+# Re-run tests
+result = run_tests(
+    goal_id="youtube-research",
+    agent_path="exports/youtube-research",
+    test_types='["all"]'
+)
+```
+
+**Updated Results:**
+
+```json
+{
+    "goal_id": "youtube-research",
+    "overall_passed": true,
+    "summary": {
+        "total": 6,
+        "passed": 6,
+        "failed": 0,
+        "pass_rate": "100.0%"
+    }
+}
+```
+
+## Summary
+
+1. **Got guidelines** for constraint tests during Goal stage
+2. **Wrote** constraint tests using Write tool
+3. **Got guidelines** for success criteria tests during Eval stage
+4. **Wrote** success criteria tests using Write tool
+5. **Ran** tests in parallel
+6. **Debugged** the one failure
+7. **Categorized** as IMPLEMENTATION_ERROR
+8. **Fixed** the agent (not the goal)
+9. **Re-ran** Eval only (didn't restart full flow)
+10. **Passed** all tests
+
+The agent is now validated and ready for production use.
@@ -0,0 +1,145 @@
+# Triage Issue Skill
+
+Analyze a GitHub issue, verify claims against the codebase, and close invalid issues with a technical response.
+
+## Trigger
+
+User provides a GitHub issue URL or number, e.g.:
+- `/triage-issue 1970`
+- `/triage-issue https://github.com/adenhq/hive/issues/1970`
+
+## Workflow
+
+### Step 1: Fetch Issue Details
+
+```bash
+gh issue view <number> --repo adenhq/hive --json title,body,state,labels,author
+```
+
+Extract:
+- Title
+- Body (the claim/bug report)
+- Current state
+- Labels
+- Author
+
+If issue is already closed, inform user and stop.
+
+### Step 2: Analyze the Claim
+
+Read the issue body and identify:
+1. **The core claim** - What is the user asserting?
+2. **Technical specifics** - File paths, function names, code snippets mentioned
+3. **Expected behavior** - What do they think should happen?
+4. **Severity claimed** - Security issue? Bug? Feature request?
+
+### Step 3: Investigate the Codebase
+
+For each technical claim:
+1. Find the referenced code using Grep/Glob/Read
+2. Understand the actual implementation
+3. Check if the claim accurately describes the behavior
+4. Look for related tests, documentation, or design decisions
+
+### Step 4: Evaluate Validity
+
+Categorize the issue as one of:
+
+| Category | Action |
+|----------|--------|
+| **Valid Bug** | Do NOT close. Inform user this is a real issue. |
+| **Valid Feature Request** | Do NOT close. Suggest labeling appropriately. |
+| **Misunderstanding** | Prepare technical explanation for why behavior is correct. |
+| **Fundamentally Flawed** | Prepare critique explaining the technical impossibility or design rationale. |
+| **Duplicate** | Find the original issue and prepare duplicate notice. |
+| **Incomplete** | Prepare request for more information. |
+
+### Step 5: Draft Response
+
+For issues to be closed, draft a response that:
+
+1. **Acknowledges the concern** - Don't be dismissive
+2. **Explains the actual behavior** - With code references
+3. **Provides technical rationale** - Why it works this way
+4. **References industry standards** - If applicable
+5. **Offers alternatives** - If there's a better approach for the user
+
+Use this template:
+
+```markdown
+## Analysis
+
+[Brief summary of what was investigated]
+
+## Technical Details
+
+[Explanation with code references]
+
+## Why This Is Working As Designed
+
+[Rationale]
+
+## Recommendation
+
+[What the user should do instead, if applicable]
+
+---
+*This issue was reviewed and closed by the maintainers.*
+```
+
+### Step 6: User Review
+
+Present the draft to the user with:
+
+```
+## Issue #<number>: <title>
+
+**Claim:** <summary of claim>
+
+**Finding:** <valid/invalid/misunderstanding/etc>
+
+**Draft Response:**
+<the markdown response>
+
+---
+Do you want me to post this comment and close the issue?
+```
+
+Use AskUserQuestion with options:
+- "Post and close" - Post comment, close issue
+- "Edit response" - Let user modify the response
+- "Skip" - Don't take action
+
+### Step 7: Execute Action
+
+If user approves:
+
+```bash
+# Post comment
+gh issue comment <number> --repo adenhq/hive --body "<response>"
+
+# Close issue
+gh issue close <number> --repo adenhq/hive --reason "not planned"
+```
+
+Report success with link to the issue.
+
+## Important Guidelines
+
+1. **Never close valid issues** - If there's any merit to the claim, don't close it
+2. **Be respectful** - The reporter took time to file the issue
+3. **Be technical** - Provide code references and evidence
+4. **Be educational** - Help them understand, don't just dismiss
+5. **Check twice** - Make sure you understand the code before declaring something invalid
+6. **Consider edge cases** - Maybe their environment reveals a real issue
+
+## Example Critiques
+
+### Security Misunderstanding
+> "The claim that secrets are exposed in plaintext misunderstands the encryption architecture. While `SecretStr` is used for logging protection, actual encryption is provided by Fernet (AES-128-CBC) at the storage layer. The code path is: serialize → encrypt → write. Only encrypted bytes touch disk."
+
+### Impossible Request
+> "The requested feature would require [X] which violates [fundamental constraint]. This is not a limitation of our implementation but a fundamental property of [technology/protocol]."
+
+### Already Handled
+> "This scenario is already handled by [code reference]. The reporter may be using an older version or misconfigured environment."
@@ -0,0 +1,20 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core",
+      "env": {
+        "PYTHONPATH": "../tools/src"
+      }
+    },
+    "tools": {
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "tools",
+      "env": {
+        "PYTHONPATH": "src"
+      }
+    }
+  }
+}
@@ -0,0 +1 @@
+../../.claude/skills/agent-workflow
@@ -0,0 +1 @@
+../../.claude/skills/building-agents-construction
@@ -0,0 +1 @@
+../../.claude/skills/building-agents-core
@@ -0,0 +1 @@
+../../.claude/skills/building-agents-patterns
@@ -0,0 +1 @@
+../../.claude/skills/testing-agent
@@ -0,0 +1,18 @@
+This project uses ruff for Python linting and formatting.
+
+Rules:
+- Line length: 100 characters
+- Python target: 3.11+
+- Use double quotes for strings
+- Sort imports with isort (ruff I rules): stdlib, third-party, first-party (framework), local
+- Combine as-imports
+- Use type hints on all function signatures
+- Use `from __future__ import annotations` for modern type syntax
+- Raise exceptions with `from` in except blocks (B904)
+- No unused imports (F401), no unused variables (F841)
+- Prefer list/dict/set comprehensions over map/filter (C4)
+
+Run `make lint` to auto-fix, `make check` to verify without modifying files.
+Run `make format` to apply ruff formatting.
+
+The ruff config lives in core/pyproject.toml under [tool.ruff].
@@ -11,6 +11,9 @@ indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true

+[*.py]
+indent_size = 4
+
 [*.md]
 trim_trailing_whitespace = false

@@ -0,0 +1,124 @@
+# Normalize line endings for all text files
+* text=auto
+
+# Source code
+*.py text diff=python
+*.js text
+*.ts text
+*.jsx text
+*.tsx text
+*.json text
+*.yaml text
+*.yml text
+*.toml text
+*.ini text
+*.cfg text
+
+# Shell scripts (must use LF)
+*.sh text eol=lf
+quickstart.sh text eol=lf
+
+# PowerShell scripts (Windows-friendly)
+*.ps1 text eol=lf
+*.psm1 text eol=lf
+
+# Windows batch files (must use CRLF)
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Documentation
+*.md text
+*.txt text
+*.rst text
+*.tex text
+
+# Configuration files
+.gitignore text
+.gitattributes text
+.editorconfig text
+Dockerfile text
+docker-compose.yml text
+requirements*.txt text
+pyproject.toml text
+setup.py text
+setup.cfg text
+MANIFEST.in text
+LICENSE text
+README* text
+CHANGELOG* text
+CONTRIBUTING* text
+CODE_OF_CONDUCT* text
+
+# Web files
+*.html text
+*.css text
+*.scss text
+*.sass text
+
+# Data files
+*.xml text
+*.csv text
+*.sql text
+
+# Graphics (binary)
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.svg binary
+*.eps binary
+*.bmp binary
+*.tif binary
+*.tiff binary
+
+# Archives (binary)
+*.zip binary
+*.tar binary
+*.gz binary
+*.bz2 binary
+*.7z binary
+*.rar binary
+
+# Python compiled (binary)
+*.pyc binary
+*.pyo binary
+*.pyd binary
+*.whl binary
+*.egg binary
+
+# System libraries (binary)
+*.so binary
+*.dll binary
+*.dylib binary
+*.lib binary
+*.a binary
+
+# Documents (binary)
+*.pdf binary
+*.doc binary
+*.docx binary
+*.ppt binary
+*.pptx binary
+*.xls binary
+*.xlsx binary
+
+# Fonts (binary)
+*.ttf binary
+*.otf binary
+*.woff binary
+*.woff2 binary
+*.eot binary
+
+# Audio/Video (binary)
+*.mp3 binary
+*.mp4 binary
+*.wav binary
+*.avi binary
+*.mov binary
+*.flv binary
+
+# Database files (binary)
+*.db binary
+*.sqlite binary
+*.sqlite3 binary
@@ -8,7 +8,6 @@
 /hive/ @adenhq/maintainers

 # Infrastructure
-/docker-compose*.yml @adenhq/maintainers
 /.github/ @adenhq/maintainers

 # Documentation
@@ -29,13 +29,12 @@ If applicable, add screenshots to help explain your problem.
 ## Environment

 - OS: [e.g., Ubuntu 22.04, macOS 14]
- Docker version: [e.g., 24.0.0]
- Node version: [e.g., 20.10.0]
- Browser (if applicable): [e.g., Chrome 120]
+- Python version: [e.g., 3.11.0]
+- Docker version (if applicable): [e.g., 24.0.0]

 ## Configuration

-Relevant parts of your `config.yaml` (remove any sensitive data):
+Relevant parts of your agent configuration or environment setup (remove any sensitive data):

 ```yaml
 # paste here
@@ -24,8 +24,8 @@ Fixes #(issue number)

 Describe the tests you ran to verify your changes:

- [ ] Unit tests pass (`npm run test`)
- [ ] Lint passes (`npm run lint`)
+- [ ] Unit tests pass (`cd core && pytest tests/`)
+- [ ] Lint passes (`cd core && ruff check .`)
 - [ ] Manual testing performed

 ## Checklist
@@ -0,0 +1,34 @@
+name: Auto-close duplicate issues
+description: Auto-closes issues that are duplicates of existing issues
+on:
+  schedule:
+    - cron: "0 */6 * * *"
+  workflow_dispatch:
+
+jobs:
+  auto-close-duplicates:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Run auto-close-duplicates tests
+        run: bun test scripts/auto-close-duplicates
+
+      - name: Auto-close duplicate issues
+        run: bun run scripts/auto-close-duplicates.ts
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
+          GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
+          STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
@@ -12,84 +12,127 @@ concurrency:

 jobs:
  lint:
-    name: Lint
+    name: Lint Python
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+          cache: 'pip'

      - name: Install dependencies
-        run: npm ci
+        run: |
+          cd core
+          pip install -e .
+          pip install -r requirements-dev.txt

-      - name: Run linter
-        run: npm run lint
+      - name: Ruff lint
+        run: |
+          ruff check core/
+          ruff check tools/
+
+      - name: Ruff format
+        run: |
+          ruff format --check core/
+          ruff format --check tools/

  test:
-    name: Test
-    runs-on: ubuntu-latest
+    name: Test Python Framework
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
    steps:
      - uses: actions/checkout@v4

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+          cache: 'pip'

      - name: Install dependencies
-        run: npm ci
+        run: |
+          cd core
+          pip install -e .
+          pip install -r requirements-dev.txt

      - name: Run tests
-        run: npm run test
+        run: |
+          cd core
+          pytest tests/ -v

-  build:
-    name: Build
+  test-tools:
+    name: Test Tools
    runs-on: ubuntu-latest
-    needs: [lint, test]
    steps:
      - uses: actions/checkout@v4

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies and run tests
+        run: |
+          cd tools
+          uv sync --extra dev
+          uv pip install --python .venv/bin/python -e ../core
+          uv run --extra dev pytest tests/ -v
+
+  validate:
+    name: Validate Agent Exports
+    runs-on: ubuntu-latest
+    needs: [lint, test, test-tools]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'

      - name: Install dependencies
-        run: npm ci
+        run: |
+          cd core
+          pip install -e .
+          pip install -r requirements-dev.txt

-      - name: Build packages
-        run: npm run build
+      - name: Validate exported agents
+        run: |
+          # Check that agent exports have valid structure
+          if [ ! -d "exports" ]; then
+            echo "No exports/ directory found, skipping validation"
+            exit 0
+          fi

-  docker:
-    name: Docker Build
-    runs-on: ubuntu-latest
-    needs: [lint, test]
-    steps:
-      - uses: actions/checkout@v4
+          shopt -s nullglob
+          agent_dirs=(exports/*/)
+          shopt -u nullglob

-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+          if [ ${#agent_dirs[@]} -eq 0 ]; then
+            echo "No agent directories in exports/, skipping validation"
+            exit 0
+          fi

-      - name: Build frontend image
-        uses: docker/build-push-action@v5
-        with:
-          context: ./honeycomb
-          push: false
-          tags: honeycomb-frontend:test
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+          validated=0
+          for agent_dir in "${agent_dirs[@]}"; do
+            if [ -f "$agent_dir/agent.json" ]; then
+              echo "Validating $agent_dir"
+              python -c "import json; json.load(open('$agent_dir/agent.json'))"
+              validated=$((validated + 1))
+            fi
+          done

-      - name: Build backend image
-        uses: docker/build-push-action@v5
-        with:
-          context: ./hive
-          push: false
-          tags: honeycomb-backend:test
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+          if [ "$validated" -eq 0 ]; then
+            echo "No agent.json files found in exports/, skipping validation"
+          else
+            echo "Validated $validated agent(s)"
+          fi
@@ -0,0 +1,97 @@
+name: Issue Triage
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  triage:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      issues: write
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Triage and check for duplicates
+        uses: anthropics/claude-code-action@v1
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          allowed_non_write_users: "*"
+          prompt: |
+            Analyze this new issue and perform triage tasks.
+
+            Issue: #${{ github.event.issue.number }}
+            Repository: ${{ github.repository }}
+
+            ## Your Tasks:
+
+            ### 1. Get issue details
+            Use mcp__github__get_issue to get the full details of issue #${{ github.event.issue.number }}
+
+            ### 2. Check for duplicates
+            Search for similar existing issues using mcp__github__search_issues with relevant keywords from the issue title and body.
+
+            Criteria for duplicates:
+            - Same bug or error being reported
+            - Same feature request (even if worded differently)
+            - Same question being asked
+            - Issues describing the same root problem
+
+            If you find a duplicate:
+            - Add a comment using EXACTLY this format (required for auto-close to work):
+              "Found a possible duplicate of #<issue_number>: <brief explanation of why it's a duplicate>"
+            - Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
+            - Suggest the user react with a thumbs-down if they disagree
+
+            ### 3. Check for Low-Quality / AI Spam
+            Analyze the issue quality. We are receiving many low-effort, AI-generated spam issues.
+            Flag the issue as INVALID if it matches these criteria:
+            - **Vague/Generic**: Title is "Fix bug" or "Error" without specific context.
+            - **Hallucinated**: Refers to files or features that do not exist in this repo.
+            - **Template Filler**: Body contains "Insert description here" or unrelated gibberish.
+            - **Low Effort**: No reproduction steps, no logs, only 1-2 sentences.
+
+            If identified as spam/low-quality:
+            - Add the "invalid" label.
+            - Add a comment:
+              "This issue has been automatically flagged as low-quality or potentially AI-generated spam. It lacks specific details (logs, reproduction steps, file references) required for us to help. Please open a new issue following the template exactly if this is a legitimate request."
+            - Do NOT proceed to other steps.
+
+            ### 4. Check for invalid issues (General)
+            If the issue is not spam but still lacks information:
+            - Add the "invalid" label
+            - Comment asking for clarification
+
+            ### 5. Categorize with labels (if NOT a duplicate or spam)
+            Apply appropriate labels based on the issue content. Use ONLY these labels:
+            - bug: Something isn't working
+            - enhancement: New feature or request
+            - question: Further information is requested
+            - documentation: Improvements or additions to documentation
+            - good first issue: Good for newcomers (if issue is well-defined and small scope)
+            - help wanted: Extra attention is needed (if issue needs community input)
+            - backlog: Tracked for the future, but not currently planned or prioritized
+
+            You may apply multiple labels if appropriate (e.g., "bug" and "help wanted").
+
+            ## Tools Available:
+            - mcp__github__get_issue: Get issue details
+            - mcp__github__search_issues: Search for similar issues
+            - mcp__github__list_issues: List recent issues if needed
+            - mcp__github__add_issue_comment: Add a comment
+            - mcp__github__update_issue: Add labels
+            - mcp__github__get_issue_comments: Get existing comments
+
+            Be thorough but efficient. Focus on accurate categorization and finding true duplicates.
+
+          claude_args: |
+            --model claude-haiku-4-5-20251001
+            --allowedTools "mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues,mcp__github__add_issue_comment,mcp__github__update_issue,mcp__github__get_issue_comments"
@@ -0,0 +1,204 @@
+name: PR Check Command
+
+on:
+  issue_comment:
+    types: [created]
+
+jobs:
+  check-pr:
+    # Only run on PR comments that start with /check
+    if: github.event.issue.pull_request && startsWith(github.event.comment.body, '/check')
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+      checks: write
+      statuses: write
+
+    steps:
+      - name: Check PR requirements
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.issue.number;
+            console.log(`Triggered by /check comment on PR #${prNumber}`);
+
+            // Fetch PR data
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: prNumber,
+            });
+
+            const prBody = pr.body || '';
+            const prTitle = pr.title || '';
+            const prAuthor = pr.user.login;
+            const headSha = pr.head.sha;
+
+            // Create a check run in progress
+            const { data: checkRun } = await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: 'check-requirements',
+              head_sha: headSha,
+              status: 'in_progress',
+              started_at: new Date().toISOString(),
+            });
+
+            // Extract issue numbers
+            const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+            const allText = `${prTitle} ${prBody}`;
+            const matches = [...allText.matchAll(issuePattern)];
+            const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+            console.log(`PR #${prNumber}:`);
+            console.log(`  Author: ${prAuthor}`);
+            console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+            if (issueNumbers.length === 0) {
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description
+
+            **Why is this required?** See #472 for details.`;
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: message,
+              });
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              // Update check run to failure
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'failure',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'Missing linked issue',
+                  summary: 'PR must reference an issue (e.g., `Fixes #123`)',
+                },
+              });
+
+              core.setFailed('PR must reference an issue');
+              return;
+            }
+
+            // Check if PR author is assigned to any linked issue
+            let issueWithAuthorAssigned = null;
+            let issuesWithoutAuthor = [];
+
+            for (const issueNum of issueNumbers) {
+              try {
+                const { data: issue } = await github.rest.issues.get({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: issueNum,
+                });
+
+                const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                if (assigneeLogins.includes(prAuthor)) {
+                  issueWithAuthorAssigned = issueNum;
+                  console.log(`  Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+                  break;
+                } else {
+                  issuesWithoutAuthor.push({
+                    number: issueNum,
+                    assignees: assigneeLogins
+                  });
+                  console.log(`  Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'}`);
+                }
+              } catch (error) {
+                console.log(`  Issue #${issueNum} not found`);
+              }
+            }
+
+            if (!issueWithAuthorAssigned) {
+              const issueList = issuesWithoutAuthor.map(i =>
+                `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+              ).join(', ');
+
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR
+
+            **Why is this required?** See #472 for details.`;
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: message,
+              });
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              // Update check run to failure
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'failure',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'PR author not assigned to issue',
+                  summary: `PR author @${prAuthor} must be assigned to one of the linked issues: ${issueList}`,
+                },
+              });
+
+              core.setFailed('PR author must be assigned to the linked issue');
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: `✅ PR requirements met! Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+              });
+
+              // Update check run to success
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: checkRun.id,
+                status: 'completed',
+                conclusion: 'success',
+                completed_at: new Date().toISOString(),
+                output: {
+                  title: 'Requirements met',
+                  summary: `Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+                },
+              });
+
+              console.log(`PR requirements met!`);
+            }
@@ -0,0 +1,138 @@
+name: PR Requirements Backfill
+
+on:
+  workflow_dispatch:
+
+jobs:
+  check-all-open-prs:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Check all open PRs
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { data: pullRequests } = await github.rest.pulls.list({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              per_page: 100,
+            });
+
+            console.log(`Found ${pullRequests.length} open PRs`);
+
+            for (const pr of pullRequests) {
+              const prNumber = pr.number;
+              const prBody = pr.body || '';
+              const prTitle = pr.title || '';
+              const prAuthor = pr.user.login;
+
+              console.log(`\nChecking PR #${prNumber}: ${prTitle}`);
+
+              // Extract issue numbers from body and title
+              const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+              const allText = `${prTitle} ${prBody}`;
+              const matches = [...allText.matchAll(issuePattern)];
+              const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+              console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+              if (issueNumbers.length === 0) {
+                console.log(`  ❌ No linked issue - closing PR`);
+
+                const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description`;
+
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+
+                await github.rest.pulls.update({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  pull_number: prNumber,
+                  state: 'closed',
+                });
+
+                continue;
+              }
+
+              // Check if any linked issue has the PR author as assignee
+              let issueWithAuthorAssigned = null;
+              let issuesWithoutAuthor = [];
+
+              for (const issueNum of issueNumbers) {
+                try {
+                  const { data: issue } = await github.rest.issues.get({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    issue_number: issueNum,
+                  });
+
+                  const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                  if (assigneeLogins.includes(prAuthor)) {
+                    issueWithAuthorAssigned = issueNum;
+                    break;
+                  } else {
+                    issuesWithoutAuthor.push({
+                      number: issueNum,
+                      assignees: assigneeLogins
+                    });
+                  }
+                } catch (error) {
+                  console.log(`  Issue #${issueNum} not found or inaccessible`);
+                }
+              }
+
+              if (!issueWithAuthorAssigned) {
+                const issueList = issuesWithoutAuthor.map(i =>
+                  `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+                ).join(', ');
+
+                console.log(`  ❌ PR author not assigned to any linked issue - closing PR`);
+
+                const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR`;
+
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+
+                await github.rest.pulls.update({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  pull_number: prNumber,
+                  state: 'closed',
+                });
+              } else {
+                console.log(`  ✅ PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+              }
+            }
+
+            console.log('\nBackfill complete!');
@@ -0,0 +1,189 @@
+name: PR Requirements Check
+
+on:
+  pull_request_target:
+    types: [opened, reopened, edited, synchronize]
+
+jobs:
+  check-requirements:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Check PR has linked issue with assignee
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = context.payload.pull_request;
+            const prNumber = pr.number;
+            const prBody = pr.body || '';
+            const prTitle = pr.title || '';
+            const prLabels = (pr.labels || []).map(l => l.name);
+
+            // Allow micro-fix and documentation PRs without a linked issue
+            const isMicroFix = prLabels.includes('micro-fix') || /micro-fix/i.test(prTitle);
+            const isDocumentation = prLabels.includes('documentation') || /\bdocs?\b/i.test(prTitle);
+            if (isMicroFix || isDocumentation) {
+              const reason = isMicroFix ? 'micro-fix' : 'documentation';
+              console.log(`PR #${prNumber} is a ${reason}, skipping issue requirement.`);
+              return;
+            }
+
+            // Extract issue numbers from body and title
+            // Matches: fixes #123, closes #123, resolves #123, or plain #123
+            const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+
+            const allText = `${prTitle} ${prBody}`;
+            const matches = [...allText.matchAll(issuePattern)];
+            const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+            console.log(`PR #${prNumber}:`);
+            console.log(`  Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+            if (issueNumbers.length === 0) {
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **Missing:** No linked issue found.
+
+            **To fix:**
+            1. Create or find an existing issue for this work
+            2. Assign yourself to the issue
+            3. Re-open this PR and add \`Fixes #123\` in the description
+
+            **Exception:** To bypass this requirement, you can:
+            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
+            **Why is this required?** See #472 for details.`;
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+              });
+
+              const botComment = comments.data.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              );
+
+              if (!botComment) {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+              }
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              core.setFailed('PR must reference an issue');
+              return;
+            }
+
+            // Check if any linked issue has the PR author as assignee
+            const prAuthor = pr.user.login;
+            let issueWithAuthorAssigned = null;
+            let issuesWithoutAuthor = [];
+
+            for (const issueNum of issueNumbers) {
+              try {
+                const { data: issue } = await github.rest.issues.get({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: issueNum,
+                });
+
+                const assigneeLogins = (issue.assignees || []).map(a => a.login);
+                if (assigneeLogins.includes(prAuthor)) {
+                  issueWithAuthorAssigned = issueNum;
+                  console.log(`  Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+                  break;
+                } else {
+                  issuesWithoutAuthor.push({
+                    number: issueNum,
+                    assignees: assigneeLogins
+                  });
+                  console.log(`  Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'} (PR author: ${prAuthor})`);
+                }
+              } catch (error) {
+                console.log(`  Issue #${issueNum} not found or inaccessible`);
+              }
+            }
+
+            if (!issueWithAuthorAssigned) {
+              const issueList = issuesWithoutAuthor.map(i =>
+                `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+              ).join(', ');
+
+              const message = `## PR Closed - Requirements Not Met
+
+            This PR has been automatically closed because it doesn't meet the requirements.
+
+            **PR Author:** @${prAuthor}
+            **Found issues:** ${issueList}
+            **Problem:** The PR author must be assigned to the linked issue.
+
+            **To fix:**
+            1. Assign yourself (@${prAuthor}) to one of the linked issues
+            2. Re-open this PR
+
+            **Exception:** To bypass this requirement, you can:
+            - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+            - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+            **Micro-fix requirements** (must meet ALL):
+            | Qualifies | Disqualifies |
+            |-----------|--------------|
+            | < 20 lines changed | Any functional bug fix |
+            | Typos & Documentation & Linting | Refactoring for "clean code" |
+            | No logic/API/DB changes | New features (even tiny ones) |
+
+            **Why is this required?** See #472 for details.`;
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+              });
+
+              const botComment = comments.data.find(
+                (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+              );
+
+              if (!botComment) {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body: message,
+                });
+              }
+
+              await github.rest.pulls.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                pull_number: prNumber,
+                state: 'closed',
+              });
+
+              core.setFailed('PR author must be assigned to the linked issue');
+            } else {
+              console.log(`PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+            }
@@ -7,7 +7,6 @@ on:

 permissions:
  contents: write
-  packages: write

 jobs:
  release:
@@ -18,20 +17,22 @@ jobs:
        with:
          fetch-depth: 0

-      - name: Setup Node.js
-        uses: actions/setup-node@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
        with:
-          node-version: '20'
-          cache: 'npm'
+          python-version: '3.11'
+          cache: 'pip'

      - name: Install dependencies
-        run: npm ci
-
-      - name: Build packages
-        run: npm run build
+        run: |
+          cd core
+          pip install -e .
+          pip install -r requirements-dev.txt

      - name: Run tests
-        run: npm run test
+        run: |
+          cd core
+          pytest tests/ -v

      - name: Generate changelog
        id: changelog
@@ -46,50 +47,3 @@ jobs:
          generate_release_notes: true
          draft: false
          prerelease: ${{ contains(github.ref, '-') }}
-
-  docker-publish:
-    name: Publish Docker Images
-    runs-on: ubuntu-latest
-    needs: release
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ghcr.io/${{ github.repository }}/frontend
-            ghcr.io/${{ github.repository }}/backend
-          tags: |
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}.{{minor}}
-            type=semver,pattern={{major}}
-
-      - name: Build and push frontend
-        uses: docker/build-push-action@v5
-        with:
-          context: ./honeycomb
-          push: true
-          tags: ghcr.io/${{ github.repository }}/frontend:${{ github.ref_name }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: Build and push backend
-        uses: docker/build-push-action@v5
-        with:
-          context: ./hive
-          push: true
-          tags: ghcr.io/${{ github.repository }}/backend:${{ github.ref_name }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
@@ -5,15 +5,14 @@ node_modules/
 # Build outputs
 dist/
 build/
+workdir/
 .next/
 out/

-# Environment files (generated from config.yaml)
+# Environment files
 .env
 .env.local
 .env.*.local
-honeycomb/.env
-hive/.env

 # User configuration (copied from .example)
 config.yaml
@@ -43,12 +42,31 @@ pnpm-debug.log*
 # Testing
 coverage/
 .nyc_output/
+.pytest_cache/

 # TypeScript
 *.tsbuildinfo

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+.eggs/
+*.egg
+uv.lock
+
+# Generated runtime data
+core/data/
+
 # Misc
 *.local
 .cache/
 tmp/
 temp/
+
+exports/*
+
+.agent-builder-sessions/*
+
+.venv
@@ -0,0 +1,20 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": ".venv/bin/python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core",
+      "env": {
+        "PYTHONPATH": "../tools/src"
+      }
+    },
+    "tools": {
+      "command": ".venv/bin/python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "tools",
+      "env": {
+        "PYTHONPATH": "src:../core"
+      }
+    }
+  }
+}
@@ -0,0 +1,18 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.6
+    hooks:
+      - id: ruff
+        name: ruff lint (core)
+        args: [--fix]
+        files: ^core/
+      - id: ruff
+        name: ruff lint (tools)
+        args: [--fix]
+        files: ^tools/
+      - id: ruff-format
+        name: ruff format (core)
+        files: ^core/
+      - id: ruff-format
+        name: ruff format (tools)
+        files: ^tools/
@@ -0,0 +1 @@
+3.11
@@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "charliermarsh.ruff",
+    "editorconfig.editorconfig",
+    "ms-python.python"
+  ]
+}
@@ -25,8 +25,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed
 - N/A

+
 ### Fixed
- N/A
+- tools: Fixed web_scrape tool attempting to parse non-HTML content (PDF, JSON) as HTML (#487)

 ### Security
 - N/A
@@ -1,37 +1,63 @@
-# Contributing to Hive
+# Contributing to Aden Agent Framework

-Thank you for your interest in contributing to Hive! This document provides guidelines and information for contributors.
+Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. We’re especially looking for help building tools, integrations([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If you’re interested in extending its functionality, this is the perfect place to start. 

 ## Code of Conduct

 By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).

+## Issue Assignment Policy
+
+To prevent duplicate work and respect contributors' time, we require issue assignment before submitting PRs.
+
+### How to Claim an Issue
+
+1. **Find an Issue:** Browse existing issues or create a new one
+2. **Claim It:** Leave a comment (e.g., *"I'd like to work on this!"*)
+3. **Wait for Assignment:** A maintainer will assign you within 24 hours. Issues with reproducible steps or proposals are prioritized.
+4. **Submit Your PR:** Once assigned, you're ready to contribute
+
+> **Note:** PRs for unassigned issues may be delayed or closed if someone else was already assigned.
+
+### Exceptions (No Assignment Needed)
+
+You may submit PRs without prior assignment for:
+- **Documentation:** Fixing typos or clarifying instructions — add the `documentation` label or include `doc`/`docs` in your PR title to bypass the linked issue requirement
+- **Micro-fixes:** Add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement. Micro-fixes must meet **all** qualification criteria:
+
+  | Qualifies | Disqualifies |
+  |-----------|--------------|
+  | < 20 lines changed | Any functional bug fix |
+  | Typos & Documentation & Linting | Refactoring for "clean code" |
+  | No logic/API/DB changes | New features (even tiny ones) |
+
 ## Getting Started

 1. Fork the repository
 2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
 3. Create a feature branch: `git checkout -b feature/your-feature-name`
 4. Make your changes
-5. Run tests: `npm run test`
+5. Run checks and tests:
+   ```bash
+   make check    # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
+   make test     # Core tests (cd core && pytest tests/ -v)
+   ```
 6. Commit your changes following our commit conventions
 7. Push to your fork and submit a Pull Request

 ## Development Setup

 ```bash
-# Install dependencies
-npm install
-
-# Copy configuration
-cp config.yaml.example config.yaml
-
-# Generate environment files
-npm run setup
-
-# Start development environment
-docker compose up
+# Install Python packages and verify setup
+./quickstart.sh
 ```

+> **Windows Users:**  
+> If you are on native Windows, it is recommended to use **WSL (Windows Subsystem for Linux)**.  
+> Alternatively, make sure to run PowerShell or Git Bash with Python 3.11+ installed, and disable "App Execution Aliases" in Windows settings.
+
+> **Tip:** Installing Claude Code skills is optional for running existing agents, but required if you plan to **build new agents**.
+
 ## Commit Convention

 We follow [Conventional Commits](https://www.conventionalcommits.org/):
@@ -62,11 +88,12 @@ docs(readme): update installation instructions

 ## Pull Request Process

-1. Update documentation if needed
-2. Add tests for new functionality
-3. Ensure all tests pass
-4. Update the CHANGELOG.md if applicable
-5. Request review from maintainers
+1. **Get assigned to the issue first** (see [Issue Assignment Policy](#issue-assignment-policy))
+2. Update documentation if needed
+3. Add tests for new functionality
+4. Ensure `make check` and `make test` pass
+5. Update the CHANGELOG.md if applicable
+6. Request review from maintainers

 ### PR Title Format

@@ -77,32 +104,52 @@ feat(component): add new feature description

 ## Project Structure

- `honeycomb/` - React frontend application
- `hive/` - Node.js backend API
+- `core/` - Core framework (agent runtime, graph executor, protocols)
+- `tools/` - MCP Tools Package (tools for agent capabilities)
+- `exports/` - Agent packages and examples
 - `docs/` - Documentation
 - `scripts/` - Build and utility scripts
+- `.claude/` - Claude Code skills for building/testing agents

 ## Code Style

- Use TypeScript for all new code
- Follow existing code patterns
+- Use Python 3.11+ for all new code
+- Follow PEP 8 style guide
+- Add type hints to function signatures
+- Write docstrings for classes and public functions
 - Use meaningful variable and function names
- Add comments for complex logic
 - Keep functions focused and small

 ## Testing

-```bash
-# Run all tests
-npm run test
+> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
+>
+> ```bash
+> PYTHONPATH=core:exports python -m agent_name test
+> ```

-# Run tests for a specific package
-npm run test --workspace=honeycomb
-npm run test --workspace=hive
+```bash
+# Run lint and format checks (mirrors CI lint job)
+make check
+
+# Run core framework tests (mirrors CI test job)
+make test
+
+# Or run tests directly
+cd core && pytest tests/ -v
+
+# Run tests for a specific agent
+PYTHONPATH=core:exports python -m agent_name test
 ```

+> **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
+
+## Contributor License Agreement
+
+By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
+
 ## Questions?

 Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).

-Thank you for contributing!
+Thank you for contributing!
@@ -0,0 +1,495 @@
+# Agent Development Environment Setup
+
+Complete setup guide for building and running goal-driven agents with the Aden Agent Framework.
+
+## Quick Setup
+
+```bash
+# Run the automated setup script
+./quickstart.sh
+```
+
+> **Note for Windows Users:**  
+> Running the setup script on native Windows shells (PowerShell / Git Bash) may sometimes fail due to Python App Execution Aliases.  
+> It is **strongly recommended to use WSL (Windows Subsystem for Linux)** for a smoother setup experience.
+
+This will:
+
+- Check Python version (requires 3.11+)
+- Install the core framework package (`framework`)
+- Install the tools package (`aden_tools`)
+- Fix package compatibility issues (openai + litellm)
+- Verify all installations
+
+## Alpine Linux Setup
+
+If you are using Alpine Linux (e.g., inside a Docker container), you must install system dependencies and use a virtual environment before running the setup script:
+
+1. Install System Dependencies:
+```bash
+apk update
+apk add bash git python3 py3-pip nodejs npm curl build-base python3-dev linux-headers libffi-dev
+```
+2. Set up Virtual Environment (Required for Python 3.12+):
+```
+python3 -m venv venv
+source venv/bin/activate
+pip install --upgrade pip setuptools wheel
+```
+3. Run the Quickstart Script:
+```
+./quickstart.sh
+```
+
+## Manual Setup (Alternative)
+
+If you prefer to set up manually or the script fails:
+
+### 1. Install Core Framework
+
+```bash
+cd core
+pip install -e .
+```
+
+### 2. Install Tools Package
+
+```bash
+cd tools
+pip install -e .
+```
+
+### 3. Upgrade OpenAI Package
+
+```bash
+# litellm requires openai >= 1.0.0
+pip install --upgrade "openai>=1.0.0"
+```
+
+### 4. Verify Installation
+
+```bash
+python -c "import framework; print('✓ framework OK')"
+python -c "import aden_tools; print('✓ aden_tools OK')"
+python -c "import litellm; print('✓ litellm OK')"
+```
+
+> **Windows Tip:**  
+> On Windows, if the verification commands fail, ensure you are running them in **WSL** or after **disabling Python App Execution Aliases** in Windows Settings → Apps → App Execution Aliases.
+
+## Requirements
+
+### Python Version
+
+- **Minimum:** Python 3.11
+- **Recommended:** Python 3.11 or 3.12
+- **Tested on:** Python 3.11, 3.12, 3.13
+
+### System Requirements
+
+- pip (latest version)
+- 2GB+ RAM
+- Internet connection (for LLM API calls)
+- For Windows users: WSL 2 is recommended for full compatibility.
+
+### API Keys (Optional)
+
+For running agents with real LLMs:
+
+```bash
+export ANTHROPIC_API_KEY="your-key-here"
+```
+
+## Running Agents
+
+All agent commands must be run from the project root with `PYTHONPATH` set:
+
+```bash
+# From /hive/ directory
+PYTHONPATH=core:exports python -m agent_name COMMAND
+```
+
+### Example Commands
+
+After building an agent via `/building-agents-construction`, use these commands:
+
+```bash
+# Validate agent structure
+PYTHONPATH=core:exports python -m your_agent_name validate
+
+# Show agent information
+PYTHONPATH=core:exports python -m your_agent_name info
+
+# Run agent with input
+PYTHONPATH=core:exports python -m your_agent_name run --input '{
+  "task": "Your input here"
+}'
+
+# Run in mock mode (no LLM calls)
+PYTHONPATH=core:exports python -m your_agent_name run --mock --input '{...}'
+```
+
+## Building New Agents and Run Flow
+
+Build and run an agent using Claude Code CLI with the agent building skills:
+
+### 1. Install Claude Skills (One-time)
+
+```bash
+./quickstart.sh
+```
+
+This verifies agent-related Claude Code skills are available:
+
+- `/building-agents-construction` - Step-by-step build guide
+- `/building-agents-core` - Fundamental concepts
+- `/building-agents-patterns` - Best practices
+- `/testing-agent` - Test and validate agents
+- `/agent-workflow` - Complete workflow
+
+### 2. Build an Agent
+
+```
+claude> /building-agents-construction
+```
+
+Follow the prompts to:
+
+1. Define your agent's goal
+2. Design the workflow nodes
+3. Connect nodes with edges
+4. Generate the agent package under `exports/`
+
+This step creates the initial agent structure required for further development.
+
+### 3. Define Agent Logic
+
+```
+claude> /building-agents-core
+```
+
+Follow the prompts to:
+
+1. Understand the agent architecture and file structure
+2. Define the agent's goal, success criteria, and constraints
+3. Learn node types (LLM, tool-use, router, function)
+4. Discover and validate available tools before use
+
+This step establishes the core concepts and rules needed before building an agent.
+
+### 4. Apply Agent Patterns
+
+```
+claude> /building-agents-patterns
+```
+
+Follow the prompts to:
+
+1. Apply best-practice agent design patterns
+2. Add pause/resume flows for multi-turn interactions
+3. Improve robustness with routing, fallbacks, and retries
+4. Avoid common anti-patterns during agent construction
+
+This step helps optimize agent design before final testing.
+
+### 5. Test Your Agent
+
+```
+claude> /testing-agent
+```
+Follow the prompts to:
+
+1. Generate test guidelines for constraints and success criteria
+2. Write agent tests directly under `exports/{agent}/tests/`
+3. Run goal-based evaluation tests
+4. Debug failing tests and iterate on agent improvements
+
+This step verifies that the agent meets its goals before production use.
+
+### 6. Agent Development Workflow (End-to-End)
+
+```
+claude> /agent-workflow
+```
+
+Follow the guided flow to:
+
+1. Understand core agent concepts (optional)
+2. Build the agent structure step by step
+3. Apply best-practice design patterns (optional)
+4. Test and validate the agent against its goals
+
+This workflow orchestrates all agent-building skills to take you from idea → production-ready agent.
+
+## Troubleshooting
+
+### "externally-managed-environment" error (PEP 668)
+
+**Cause:** Python 3.12+ on macOS/Homebrew, WSL, or some Linux distros prevents system-wide pip installs.
+
+**Solution:** Create and use a virtual environment:
+
+```bash
+# Create virtual environment
+python3 -m venv .venv
+
+# Activate it
+source .venv/bin/activate  # macOS/Linux
+# .venv\Scripts\activate   # Windows
+
+# Then run setup
+./quickstart.sh
+```
+
+Always activate the venv before running agents:
+
+```bash
+source .venv/bin/activate
+PYTHONPATH=core:exports python -m your_agent_name demo
+```
+
+### "ModuleNotFoundError: No module named 'framework'"
+
+**Solution:** Install the core package:
+
+```bash
+cd core && pip install -e .
+```
+
+### "ModuleNotFoundError: No module named 'aden_tools'"
+
+**Solution:** Install the tools package:
+
+```bash
+cd tools && pip install -e .
+```
+
+Or run the setup script:
+
+```bash
+./quickstart.sh
+```
+
+### "ModuleNotFoundError: No module named 'openai.\_models'"
+
+**Cause:** Outdated `openai` package (0.27.x) incompatible with `litellm`
+
+**Solution:** Upgrade openai:
+
+```bash
+pip install --upgrade "openai>=1.0.0"
+```
+
+### "No module named 'your_agent_name'"
+
+**Cause:** Not running from project root, missing PYTHONPATH, or agent not yet created
+
+**Solution:** Ensure you're in the project root directory, have built an agent, and use:
+
+```bash
+PYTHONPATH=core:exports python -m your_agent_name validate
+```
+
+### Agent imports fail with "broken installation"
+
+**Symptom:** `pip list` shows packages pointing to non-existent directories
+
+**Solution:** Reinstall packages properly:
+
+```bash
+# Remove broken installations
+pip uninstall -y framework tools
+
+# Reinstall correctly
+./quickstart.sh
+```
+
+## Package Structure
+
+The Hive framework consists of three Python packages:
+
+```
+hive/
+├── core/                    # Core framework (runtime, graph executor, LLM providers)
+│   ├── framework/
+│   ├── .venv/              # Created by quickstart.sh
+│   └── pyproject.toml
+│
+├── tools/                   # Tools and MCP servers
+│   ├── src/
+│   │   └── aden_tools/     # Actual package location
+│   ├── .venv/              # Created by quickstart.sh
+│   └── pyproject.toml
+│
+└── exports/                 # Agent packages (user-created, gitignored)
+    └── your_agent_name/     # Created via /building-agents-construction
+```
+
+## Separate Virtual Environments
+
+The project uses **separate virtual environments** for `core` and `tools` packages to:
+
+- Isolate dependencies and avoid conflicts
+- Allow independent development and testing of each package
+- Enable MCP servers to run with their specific dependencies
+
+### How It Works
+
+When you run `./quickstart.sh` or `uv sync` in each directory:
+
+1. **core/.venv/** - Contains the `framework` package and its dependencies (anthropic, litellm, mcp, etc.)
+2. **tools/.venv/** - Contains the `aden_tools` package and its dependencies (beautifulsoup4, pandas, etc.)
+
+### Cross-Package Imports
+
+The `core` and `tools` packages are **intentionally independent**:
+
+- **No cross-imports**: `framework` does not import `aden_tools` directly, and vice versa
+- **Communication via MCP**: Tools are exposed to agents through MCP servers, not direct Python imports
+- **Runtime integration**: The agent runner loads tools via the MCP protocol at runtime
+
+If you need to use both packages in a single script (e.g., for testing), you have two options:
+
+```bash
+# Option 1: Install both in a shared environment
+python -m venv .venv
+source .venv/bin/activate
+pip install -e core/ -e tools/
+
+# Option 2: Use PYTHONPATH (for quick testing)
+PYTHONPATH=core:tools/src python your_script.py
+```
+
+### MCP Server Configuration
+
+The `.mcp.json` at project root configures MCP servers to use their respective virtual environments:
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "core/.venv/bin/python",
+      "args": ["-m", "framework.mcp.agent_builder_server"]
+    },
+    "tools": {
+      "command": "tools/.venv/bin/python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"]
+    }
+  }
+}
+```
+
+This ensures each MCP server runs with its correct dependencies.
+
+### Why PYTHONPATH is Required
+
+The packages are installed in **editable mode** (`pip install -e`), which means:
+
+- `framework` and `aden_tools` are globally importable (no PYTHONPATH needed)
+- `exports` is NOT installed as a package (PYTHONPATH required)
+
+This design allows agents in `exports/` to be:
+
+- Developed independently
+- Version controlled separately
+- Deployed as standalone packages
+
+## Development Workflow
+
+### 1. Setup (Once)
+
+```bash
+./quickstart.sh
+```
+
+### 2. Build Agent (Claude Code)
+
+```
+claude> /building-agents-construction
+Enter goal: "Build an agent that processes customer support tickets"
+```
+
+### 3. Validate Agent
+
+```bash
+PYTHONPATH=core:exports python -m your_agent_name validate
+```
+
+### 4. Test Agent
+
+```
+claude> /testing-agent
+```
+
+### 5. Run Agent
+
+```bash
+PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+```
+
+## IDE Setup
+
+### VSCode
+
+Add to `.vscode/settings.json`:
+
+```json
+{
+  "python.analysis.extraPaths": [
+    "${workspaceFolder}/core",
+    "${workspaceFolder}/exports"
+  ],
+  "python.autoComplete.extraPaths": [
+    "${workspaceFolder}/core",
+    "${workspaceFolder}/exports"
+  ]
+}
+```
+
+### PyCharm
+
+1. Open Project Settings → Project Structure
+2. Mark `core` as Sources Root
+3. Mark `exports` as Sources Root
+
+## Environment Variables
+
+### Required for LLM Operations
+
+```bash
+export ANTHROPIC_API_KEY="sk-ant-..."
+```
+
+### Optional Configuration
+
+```bash
+# Credentials storage location (default: ~/.aden/credentials)
+export ADEN_CREDENTIALS_PATH="/custom/path"
+
+# Agent storage location (default: /tmp)
+export AGENT_STORAGE_PATH="/custom/storage"
+```
+
+## Additional Resources
+
+- **Framework Documentation:** [core/README.md](core/README.md)
+- **Tools Documentation:** [tools/README.md](tools/README.md)
+- **Example Agents:** [exports/](exports/)
+- **Agent Building Guide:** [.claude/skills/building-agents-construction/SKILL.md](.claude/skills/building-agents-construction/SKILL.md)
+- **Testing Guide:** [.claude/skills/testing-agent/SKILL.md](.claude/skills/testing-agent/SKILL.md)
+
+## Contributing
+
+When contributing agent packages:
+
+1. Place agents in `exports/agent_name/`
+2. Follow the standard agent structure (see existing agents)
+3. Include README.md with usage instructions
+4. Add tests if using `/testing-agent`
+5. Document required environment variables
+
+## Support
+
+- **Issues:** https://github.com/adenhq/hive/issues
+- **Discord:** https://discord.com/invite/MXE49hrKDk
+- **Documentation:** https://docs.adenhq.com/
@@ -0,0 +1,26 @@
+.PHONY: lint format check test install-hooks help
+
+help: ## Show this help
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
+		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'
+
+lint: ## Run ruff linter (with auto-fix)
+	cd core && ruff check --fix .
+	cd tools && ruff check --fix .
+
+format: ## Run ruff formatter
+	cd core && ruff format .
+	cd tools && ruff format .
+
+check: ## Run all checks without modifying files (CI-safe)
+	cd core && ruff check .
+	cd tools && ruff check .
+	cd core && ruff format --check .
+	cd tools && ruff format --check .
+
+test: ## Run all tests
+	cd core && python -m pytest tests/ -v
+
+install-hooks: ## Install pre-commit hooks
+	pip install pre-commit
+	pre-commit install
@@ -0,0 +1,51 @@
+## Summary
+- **Added HubSpot integration** — new HubSpot MCP tool with search, get, create, and update operations for contacts, companies, and deals. Includes OAuth2 provider for HubSpot credentials and credential store adapter for the tools layer.
+- **Replaced web_scrape tool with Playwright + stealth** — swapped httpx/BeautifulSoup for a headless Chromium browser using `playwright` (async API) and `playwright-stealth`, enabling JS-rendered page scraping and bot detection evasion
+- **Added empty response retry logic** — LLM provider now detects empty responses (e.g. Gemini returning 200 with no content on rate limit) and retries with exponential backoff, preventing hallucinated output from the cleanup LLM
+- **Added context-aware input compaction** — LLM nodes now estimate input token count before calling the model and progressively truncate the largest values if they exceed the context window budget
+- **Increased rate limit retries to 10** with verbose `[retry]` and `[compaction]` logging that includes model name, finish reason, and attempt count
+- **Updated setup scripts** — `scripts/setup-python.sh` now installs Playwright Chromium browser automatically for web scraping support
+- **Interactive quickstart onboarding** — `quickstart.sh` rewritten as bee-themed interactive wizard that detects existing API keys (including Claude Code subscription), lets user pick ONE default LLM provider, and saves configuration to `~/.hive/configuration.json`
+- **Fixed lint errors** across `hubspot_tool.py` (line length) and `agent_builder_server.py` (unused variable)
+
+## Changed files
+
+### HubSpot Integration
+- `tools/src/aden_tools/tools/hubspot_tool/` — New MCP tool: contacts, companies, and deals CRUD
+- `tools/src/aden_tools/tools/__init__.py` — Registered HubSpot tools
+- `tools/src/aden_tools/credentials/integrations.py` — HubSpot credential integration
+- `tools/src/aden_tools/credentials/__init__.py` — Updated credential exports
+- `core/framework/credentials/oauth2/hubspot_provider.py` — HubSpot OAuth2 provider
+- `core/framework/credentials/oauth2/__init__.py` — Registered HubSpot OAuth2 provider
+- `core/framework/runner/runner.py` — Updated runner for credential support
+
+### Web Scrape Rewrite
+- `tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py` — Playwright async rewrite
+- `tools/src/aden_tools/tools/web_scrape_tool/README.md` — Updated docs
+- `tools/pyproject.toml` — Added `playwright`, `playwright-stealth` deps
+- `tools/Dockerfile` — Added `playwright install chromium --with-deps`
+- `scripts/setup-python.sh` — Added Playwright Chromium browser install step
+
+### LLM Reliability
+- `core/framework/llm/litellm.py` — Empty response retry + max retries 10 + verbose logging
+- `core/framework/graph/node.py` — Input compaction via `_compact_inputs()`, `_estimate_tokens()`, `_get_context_limit()`
+
+### Quickstart & Setup
+- `quickstart.sh` — Interactive bee-themed onboarding wizard with single provider selection
+- `~/.hive/configuration.json` — New user config file for default LLM provider/model
+
+### Fixes
+- `core/framework/mcp/agent_builder_server.py` — Removed unused variable
+- `tools/src/aden_tools/tools/hubspot_tool/hubspot_tool.py` — Fixed E501 line length violations
+
+## Test plan
+- [ ] Run `make lint` — passes clean
+- [ ] Run `./quickstart.sh` and verify interactive flow works, config saved to `~/.hive/configuration.json`
+- [ ] Run `./scripts/setup-python.sh` and verify Playwright Chromium installs
+- [ ] Run `pytest tests/tools/test_web_scrape_tool.py -v`
+- [ ] Run agent against a JS-heavy site and verify `web_scrape` returns rendered content
+- [ ] Set `HUBSPOT_ACCESS_TOKEN` and verify HubSpot tool CRUD operations work
+- [ ] Trigger rate limit and verify `[retry]` logs appear with correct attempt counts
+- [ ] Run agent with large inputs and verify `[compaction]` logs show truncation
+
+🤖 Generated with [Claude Code](https://claude.com/claude-code)
@@ -1,9 +1,16 @@
-# Hive
-
-Hive is an easy way to craete reliable agenst with expanding toolkits. 
+<p align="center">
+  <img width="100%" alt="Hive Banner" src="https://storage.googleapis.com/aden-prod-assets/website/aden-title-card.png" />
+</p>

 <p align="center">
-  <img width="100%" alt="Hive Banner" src="https://storage.googleapis.com/aden-prod-assets/website/title-card.png" />
+  <a href="README.md">English</a> |
+  <a href="docs/i18n/zh-CN.md">简体中文</a> |
+  <a href="docs/i18n/es.md">Español</a> |
+  <a href="docs/i18n/hi.md">हिन्दी</a> |
+  <a href="docs/i18n/pt.md">Português</a> |
+  <a href="docs/i18n/ja.md">日本語</a> |
+  <a href="docs/i18n/ru.md">Русский</a> |
+  <a href="docs/i18n/ko.md">한국어</a>
 </p>

 [![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -13,12 +20,40 @@ Hive is an easy way to craete reliable agenst with expanding toolkits.
 [![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
 [![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)

+<p align="center">
+  <img src="https://img.shields.io/badge/AI_Agents-Self--Improving-brightgreen?style=flat-square" alt="AI Agents" />
+  <img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
+  <img src="https://img.shields.io/badge/Goal--Driven-Development-purple?style=flat-square" alt="Goal-Driven" />
+  <img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
+  <img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
+</p>
+<p align="center">
+  <img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
+  <img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
+  <img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
+  <img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
+</p>
+
 ## Overview

-Hive provides advanced runtime control for your AI agents, enabling you to observe, intervene, and dynamically adjust agent behavior as it executes. By giving you real-time visibility and control, Hive helps you build more reliable AI systems—catching and correcting issues during execution rather than reacting after failures occur.
+Build reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.

 Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.

+## What is Aden
+
+<p align="center">
+  <img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
+</p>
+
+Aden is a platform for building, deploying, operating, and adapting AI agents:
+
+- **Build** - A Coding Agent generates specialized Worker Agents (Sales, Marketing, Ops) from natural language goals
+- **Deploy** - Headless deployment with CI/CD integration and full API lifecycle management
+- **Operate** - Real-time monitoring, observability, and runtime guardrails keep agents reliable
+- **Adapt** - Continuous evaluation, supervision, and adaptation ensure agents improve over time
+- **Infra** - Shared memory, LLM integrations, tools, and skills power every agent
+
 ## Quick Links

 - **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
@@ -31,8 +66,8 @@ Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and

 ### Prerequisites

- [Docker](https://docs.docker.com/get-docker/) (v20.10+)
- [Docker Compose](https://docs.docker.com/compose/install/) (v2.0+)
+- [Python 3.11+](https://www.python.org/downloads/) for agent development
+- Claude Code or Cursor for utilizing agent skills

 ### Installation

@@ -41,75 +76,248 @@ Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and
 git clone https://github.com/adenhq/hive.git
 cd hive

-# Copy and configure
-cp config.yaml.example config.yaml
-
-# Run setup and start services
-npm run setup
-docker compose up
+# Run quickstart setup
+./quickstart.sh
 ```

-**Access the application:**
+This sets up:
+- **framework** - Core agent runtime and graph executor (in `core/.venv`)
+- **aden_tools** - MCP tools for agent capabilities (in `tools/.venv`)
+- All required Python dependencies

- Dashboard: http://localhost:3000
- API: http://localhost:4000
- Health: http://localhost:4000/health
+### Build Your First Agent
+
+```bash
+# Build an agent using Claude Code
+claude> /building-agents-construction
+
+# Test your agent
+claude> /testing-agent
+
+# Run your agent
+PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+```
+
+**[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
+
+### Cursor IDE Support
+
+Skills are also available in Cursor. To enable:
+
+1. Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`)
+2. Run `MCP: Enable` to enable MCP servers
+3. Restart Cursor to load the MCP servers from `.cursor/mcp.json`
+4. Type `/` in Agent chat and search for skills (e.g., `/building-agents-construction`)

 ## Features

- **Observe** - Real-time visibility into agent execution, decisions, and performance
- **Metrics & Analytics** - Track costs, latency, and token usage with TimescaleDB
- **Cost Control** - Set budgets and policies to manage LLM spending
- **Real-time Events** - WebSocket streaming for live agent monitoring
- **Self-Hostable** - Deploy on your own infrastructure with full control
- **Production-Ready** - Built for scale and reliability
+- **Goal-Driven Development** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
+- **Adaptiveness** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
+- **Dynamic Node Connections** - No predefined edges; connection code is generated by any capable LLM based on your goals
+- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
+- **Human-in-the-Loop** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
+- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
+- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
+- **Production-Ready** - Self-hostable, built for scale and reliability

-## Project Structure
+## Why Aden

-```
-hive/
-├── honeycomb/          # Frontend (React + TypeScript + Vite)
-├── hive/               # Backend (Node.js + TypeScript + Express)
-├── docs/               # Documentation
-├── scripts/            # Build and utility scripts
-├── config.yaml.example # Configuration template
-└── docker-compose.yml  # Container orchestration
+Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.
+
+```mermaid
+flowchart LR
+    GOAL["Define Goal"] --> GEN["Auto-Generate Graph"]
+    GEN --> EXEC["Execute Agents"]
+    EXEC --> MON["Monitor & Observe"]
+    MON --> CHECK{{"Pass?"}}
+    CHECK -- "Yes" --> DONE["Deliver Result"]
+    CHECK -- "No" --> EVOLVE["Evolve Graph"]
+    EVOLVE --> EXEC
+
+    GOAL -.- V1["Natural Language"]
+    GEN -.- V2["Instant Architecture"]
+    EXEC -.- V3["Easy Integrations"]
+    MON -.- V4["Full visibility"]
+    EVOLVE -.- V5["Adaptability"]
+    DONE -.- V6["Reliable outcomes"]
+
+    style GOAL fill:#ffbe42,stroke:#cc5d00,stroke-width:2px,color:#333
+    style GEN fill:#ffb100,stroke:#cc5d00,stroke-width:2px,color:#333
+    style EXEC fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style MON fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style CHECK fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
+    style DONE fill:#4caf50,stroke:#2e7d32,stroke-width:2px,color:#fff
+    style EVOLVE fill:#e8763d,stroke:#cc5d00,stroke-width:2px,color:#fff
+    style V1 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V2 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V3 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V4 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V5 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
+    style V6 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
 ```

-## Development
+### The Aden Advantage

-### Local Development with Hot Reload
+| Traditional Frameworks     | Aden                                   |
+| -------------------------- | -------------------------------------- |
+| Hardcode agent workflows   | Describe goals in natural language     |
+| Manual graph definition    | Auto-generated agent graphs            |
+| Reactive error handling    | Outcome-evaluation and adaptiveness               |
+| Static tool configurations | Dynamic SDK-wrapped nodes              |
+| Separate monitoring setup  | Built-in real-time observability       |
+| DIY budget management      | Integrated cost controls & degradation |
+
+### How It Works
+
+1. **Define Your Goal** → Describe what you want to achieve in plain English
+2. **Coding Agent Generates** → Creates the agent graph, connection code, and test cases
+3. **Workers Execute** → SDK-wrapped nodes run with full observability and tool access
+4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
+5. **Adaptiveness** → On failure, the system evolves the graph and redeploys automatically
+
+## Run pre-built Agents (Coming Soon)
+
+### Run a sample agent
+Aden Hive provides a list of featured agents that you can use and build on top of.
+
+### Run an agent shared by others
+Put the agent in `exports/` and run `PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'`
+
+
+For building and running goal-driven agents with the framework:

 ```bash
-# Copy development overrides
-cp docker-compose.override.yml.example docker-compose.override.yml
+# One-time setup
+./quickstart.sh

-# Start with hot reload enabled
-docker compose up
+# This sets up:
+# - framework package (core runtime)
+# - aden_tools package (MCP tools)
+# - All Python dependencies
+
+# Build new agents using Claude Code skills
+claude> /building-agents-construction
+
+# Test agents
+claude> /testing-agent
+
+# Run agents
+PYTHONPATH=core:exports python -m agent_name run --input '{...}'
 ```

-### Running Without Docker
-
-```bash
-# Install dependencies
-npm install
-
-# Generate environment files
-npm run generate:env
-
-# Start frontend (in honeycomb/)
-cd honeycomb && npm run dev
-
-# Start backend (in hive/)
-cd hive && npm run dev
-```
+See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions.

 ## Documentation

 - **[Developer Guide](DEVELOPER.md)** - Comprehensive guide for developers
 - [Getting Started](docs/getting-started.md) - Quick setup instructions
 - [Configuration Guide](docs/configuration.md) - All configuration options
- [Architecture Overview](docs/architecture.md) - System design and structure
+- [Architecture Overview](docs/architecture/README.md) - System design and structure
+
+## Roadmap
+
+Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [ROADMAP.md](ROADMAP.md) for details.
+
+```mermaid
+flowchart TD
+subgraph Foundation
+    direction LR
+    subgraph arch["Architecture"]
+        a1["Node-Based Architecture"]:::done
+        a2["Python SDK"]:::done
+        a3["LLM Integration"]:::done
+        a4["Communication Protocol"]:::done
+    end
+    subgraph ca["Coding Agent"]
+        b1["Goal Creation Session"]:::done
+        b2["Worker Agent Creation"]
+        b3["MCP Tools"]:::done
+    end
+    subgraph wa["Worker Agent"]
+        c1["Human-in-the-Loop"]:::done
+        c2["Callback Handlers"]:::done
+        c3["Intervention Points"]:::done
+        c4["Streaming Interface"]
+    end
+    subgraph cred["Credentials"]
+        d1["Setup Process"]:::done
+        d2["Pluggable Sources"]:::done
+        d3["Enterprise Secrets"]
+        d4["Integration Tools"]:::done
+    end
+    subgraph tools["Tools"]
+        e1["File Use"]:::done
+        e2["Memory STM/LTM"]:::done
+        e3["Web Search/Scraper"]:::done
+        e4["CSV/PDF"]:::done
+        e5["Excel/Email"]
+    end
+    subgraph core["Core"]
+        f1["Eval System"]
+        f2["Pydantic Validation"]:::done
+        f3["Documentation"]:::done
+        f4["Adaptiveness"]
+        f5["Sample Agents"]
+    end
+end
+
+subgraph Expansion
+    direction LR
+    subgraph intel["Intelligence"]
+        g1["Guardrails"]
+        g2["Streaming Mode"]
+        g3["Image Generation"]
+        g4["Semantic Search"]
+    end
+    subgraph mem["Memory Iteration"]
+        h1["Message Model & Sessions"]
+        h2["Storage Migration"]
+        h3["Context Building"]
+        h4["Proactive Compaction"]
+        h5["Token Tracking"]
+    end
+    subgraph evt["Event System"]
+        i1["Event Bus for Nodes"]
+    end
+    subgraph cas["Coding Agent Support"]
+        j1["Claude Code"]
+        j2["Cursor"]
+        j3["Opencode"]
+        j4["Antigravity"]
+    end
+    subgraph plat["Platform"]
+        k1["JavaScript/TypeScript SDK"]
+        k2["Custom Tool Integrator"]
+        k3["Windows Support"]
+    end
+    subgraph dep["Deployment"]
+        l1["Self-Hosted"]
+        l2["Cloud Services"]
+        l3["CI/CD Pipeline"]
+    end
+    subgraph tmpl["Templates"]
+        m1["Sales Agent"]
+        m2["Marketing Agent"]
+        m3["Analytics Agent"]
+        m4["Training Agent"]
+        m5["Smart Form Agent"]
+    end
+end
+
+classDef done fill:#9e9e9e,color:#fff,stroke:#757575
+```
+## Contributing
+
+We welcome contributions from the community! We’re especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If you’re interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+
+**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work. 
+
+1. Find or create an issue and get assigned
+2. Fork the repository
+3. Create your feature branch (`git checkout -b feature/amazing-feature`)
+4. Commit your changes (`git commit -m 'Add amazing feature'`)
+5. Push to the branch (`git push origin feature/amazing-feature`)
+6. Open a Pull Request

 ## Community & Support

@@ -119,16 +327,6 @@ We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature req
 - Twitter/X - [@adenhq](https://x.com/aden_hq)
 - LinkedIn - [Company Page](https://www.linkedin.com/company/teamaden/)

-## Contributing
-
-We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
-
-1. Fork the repository
-2. Create your feature branch (`git checkout -b feature/amazing-feature`)
-3. Commit your changes (`git commit -m 'Add amazing feature'`)
-4. Push to the branch (`git push origin feature/amazing-feature`)
-5. Open a Pull Request
-
 ## Join Our Team

 **We're hiring!** Join us in engineering, research, and go-to-market roles.
@@ -143,8 +341,82 @@ For security concerns, please see [SECURITY.md](SECURITY.md).

 This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.

+## Frequently Asked Questions (FAQ)
+
+**Q: Does Hive depend on LangChain or other agent frameworks?**
+
+No. Hive is built from the ground up with no dependencies on LangChain, CrewAI, or other agent frameworks. The framework is designed to be lean and flexible, generating agent graphs dynamically rather than relying on predefined components.
+
+**Q: What LLM providers does Hive support?**
+
+Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
+
+**Q: Can I use Hive with local AI models like Ollama?**
+
+Yes! Hive supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.
+
+**Q: What makes Hive different from other agent frameworks?**
+
+Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, evolves the agent graph, and redeploys. This self-improving loop is unique to Aden.
+
+**Q: Is Hive open-source?**
+
+Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.
+
+**Q: Does Hive collect data from users?**
+
+Hive collects telemetry data for monitoring and observability purposes, including token usage, latency metrics, and cost tracking. Content capture (prompts and responses) is configurable and stored with team-scoped data isolation. All data stays within your infrastructure when self-hosted.
+
+**Q: What deployment options does Hive support?**
+
+Hive supports self-hosted deployments via Python packages. See the [Environment Setup Guide](ENVIRONMENT_SETUP.md) for installation instructions. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
+
+**Q: Can Hive handle complex, production-scale use cases?**
+
+Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
+
+**Q: Does Hive support human-in-the-loop workflows?**
+
+Yes, Hive fully supports human-in-the-loop workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
+
+**Q: What monitoring and debugging tools does Hive provide?**
+
+Hive includes comprehensive observability features: real-time WebSocket streaming for live agent execution monitoring, TimescaleDB-powered analytics for cost and performance metrics, health check endpoints for Kubernetes integration, and MCP tools for agent execution, including file operations, web search, data processing, and more.
+
+**Q: What programming languages does Hive support?**
+
+The Hive framework is built in Python. A JavaScript/TypeScript SDK is on the roadmap.
+
+**Q: Can Aden agents interact with external tools and APIs?**
+
+Yes. Aden's SDK-wrapped nodes provide built-in tool access, and the framework supports flexible tool ecosystems. Agents can integrate with external APIs, databases, and services through the node architecture.
+
+**Q: How does cost control work in Hive?**
+
+Hive provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.
+
+**Q: Where can I find examples and documentation?**
+
+Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [DEVELOPER.md](DEVELOPER.md) guide.
+
+**Q: How can I contribute to Aden?**
+
+Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
+
+**Q: When will my team start seeing results from Aden's adaptive agents?**
+
+Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
+
+**Q: How does Hive compare to other agent frameworks?**
+
+Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
+
+**Q: Does Aden offer enterprise support?**
+
+For enterprise inquiries, contact the Aden team through [adenhq.com](https://adenhq.com) or join our [Discord community](https://discord.com/invite/MXE49hrKDk) for support and discussions.
+
 ---

 <p align="center">
-  Made with care by the <a href="https://adenhq.com">Aden</a> team
+  Made with 🔥 Passion in San Francisco
 </p>
@@ -0,0 +1,299 @@
+# Product Roadmap
+
+Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here
+
+```mermaid
+flowchart TD
+subgraph Foundation
+    direction LR
+    subgraph arch["Architecture"]
+        a1["Node-Based Architecture"]:::done
+        a2["Python SDK"]:::done
+        a3["LLM Integration"]:::done
+        a4["Communication Protocol"]:::done
+    end
+    subgraph ca["Coding Agent"]
+        b1["Goal Creation Session"]:::done
+        b2["Worker Agent Creation"]
+        b3["MCP Tools"]:::done
+    end
+    subgraph wa["Worker Agent"]
+        c1["Human-in-the-Loop"]:::done
+        c2["Callback Handlers"]:::done
+        c3["Intervention Points"]:::done
+        c4["Streaming Interface"]
+    end
+    subgraph cred["Credentials"]
+        d1["Setup Process"]:::done
+        d2["Pluggable Sources"]:::done
+        d3["Enterprise Secrets"]
+        d4["Integration Tools"]:::done
+    end
+    subgraph tools["Tools"]
+        e1["File Use"]:::done
+        e2["Memory STM/LTM"]:::done
+        e3["Web Search/Scraper"]:::done
+        e4["CSV/PDF"]:::done
+        e5["Excel/Email"]
+    end
+    subgraph core["Core"]
+        f1["Eval System"]
+        f2["Pydantic Validation"]:::done
+        f3["Documentation"]:::done
+        f4["Adaptiveness"]
+        f5["Sample Agents"]
+    end
+end
+
+subgraph Expansion
+    direction LR
+    subgraph intel["Intelligence"]
+        g1["Guardrails"]
+        g2["Streaming Mode"]
+        g3["Image Generation"]
+        g4["Semantic Search"]
+    end
+    subgraph mem["Memory Iteration"]
+        h1["Message Model & Sessions"]
+        h2["Storage Migration"]
+        h3["Context Building"]
+        h4["Proactive Compaction"]
+        h5["Token Tracking"]
+    end
+    subgraph evt["Event System"]
+        i1["Event Bus for Nodes"]
+    end
+    subgraph cas["Coding Agent Support"]
+        j1["Claude Code"]
+        j2["Cursor"]
+        j3["Opencode"]
+        j4["Antigravity"]
+    end
+    subgraph plat["Platform"]
+        k1["JavaScript/TypeScript SDK"]
+        k2["Custom Tool Integrator"]
+        k3["Windows Support"]
+    end
+    subgraph dep["Deployment"]
+        l1["Self-Hosted"]
+        l2["Cloud Services"]
+        l3["CI/CD Pipeline"]
+    end
+    subgraph tmpl["Templates"]
+        m1["Sales Agent"]
+        m2["Marketing Agent"]
+        m3["Analytics Agent"]
+        m4["Training Agent"]
+        m5["Smart Form Agent"]
+    end
+end
+
+classDef done fill:#9e9e9e,color:#fff,stroke:#757575
+```
+
+---
+
+## Phase 1: Foundation
+
+### Backbone Architecture
+- [ ] **Node-Based Architecture (Agent as a node)**
+    - [x] Object schema definition
+    - [x] Node wrapper SDK
+    - [x] Shared memory access
+    - [ ] Default monitoring hooks
+    - [x] Tool access layer
+    - [x] LLM integration layer (Natively supports all mainstream LLMs through LiteLLM)
+        - [x] Anthropic
+        - [x] OpenAI
+        - [x] Google
+- [x] **Communication protocol between nodes**
+- [x] **[Coding Agent] Goal Creation Session** (separate from coding session)
+    - [x] Instruction back and forth
+    - [x] Goal Object schema definition
+    - [x] Being able to generate the test cases
+    - [x] Test case validation for worker agent (Outcome driven)
+- [ ] **[Coding Agent] Worker Agent Creation**
+    - [x] Coding Agent tools
+    - [ ] Use Template Agent as a start
+    - [x] Use our MCP tools
+- [ ] **[Worker Agent] Human-in-the-Loop**
+    - [x] Worker Agents request with questions and options
+    - [x] Callback Handler System to receive events throughout execution
+    - [x] Tool-Based Intervention Points (tool to pause execution and request human input)
+    - [x] Multiple entrypoint for different event source (e.g. Human input, webhook)
+    - [ ] Streaming Interface for Real-time Monitoring
+    - [x] Request State Management
+
+### Credential Management
+- [x] **Credentials Setup Process**
+    - [x] Install Credential MCP
+- [x] **Pluggable Credential Sources**
+    - [x] **Abstraction & Local Sources**
+        - [x] Introduce `CredentialSource` base class
+        - [x] Refactor existing logic into `EnvVarSource`
+        - [x] Implementation of Source Priority Chain mechanism
+        - [ ] Foundation unit tests
+    - [ ] **Enterprise Secret Managers**
+        - [x] `VaultSource` (HashiCorp Vault)
+        - [ ] `AWSSecretsSource` (AWS Secrets Manager)
+        - [ ] `AzureKeyVaultSource` (Azure Key Vault)
+        - [ ] Management of optional provider dependencies
+    - [ ] **Advanced Features**
+        - [x] Credential expiration and auto-refresh
+        - [ ] Audit logging for compliance/tracking
+        - [ ] Per-environment configuration support
+    - [ ] **Documentation & DX**
+        - [ ] Comprehensive source documentation
+        - [ ] Example configurations for all providers
+    - [x] **Integration as tools coverage**
+        - [x] Gsuite Tools
+        - [x] Social Media
+            - [ ] Twitter(X)
+            - [x] Github
+            - [ ] Instagram
+        - [ ] SAAS
+            - [ ] Hubspot
+            - [ ] Slack
+            - [ ] Teams
+            - [ ] Zoom
+            - [ ] Stripe
+            - [ ] Salesforce
+
+> [!IMPORTANT]
+> **Community Contribution Wanted**: We appreciate help from the community to expand the "Integration as tools" capability. Leave an issue of the integration you want to support via Hive!
+
+### Essential Tools
+- [x] **File Use Tool Kit**
+- [X] **Memory Tools**
+    - [x] STM Layer Tool (state-based short-term memory)
+    - [x] LTM Layer Tool (RLM - long-term memory)
+- [ ] **Infrastructure Tools**
+    - [x] Runtime Log Tool (logs for coding agent)
+    - [x] Web Search
+    - [x] Web Scraper
+    - [x] CSV tools
+    - [x] PDF tools
+    - [ ] Excel tools
+    - [ ] Email Tools
+    - [ ] Recipe for "Add your own tools"
+
+### Memory & File System
+- [x] DB for long-term persistent memory (Filesystem as durable scratchpad pattern)
+- [x] Session Local memory isolation
+
+### Eval System (Basic)
+- [x] Test Driven - Run test case for all agent iteration
+- [ ] Failure recording mechanism
+- [ ] SDK for defining failure conditions
+- [ ] Basic observability hooks
+- [ ] User-driven log analysis (OSS approach)
+
+### Data Validation
+- [x] Natively Support data validation of LLMs output with Pydantic
+
+### Developer Experience
+- [ ] **MVP Features**
+    - [ ] Debugging mode
+    - [ ] CLI tools for memory management
+    - [ ] CLI tools for credential management
+- [ ] **MVP Resources & Documentation**
+    - [x] Quick start guide
+    - [x] Goal creation guide
+    - [x] Agent creation guide
+    - [x] GitHub Page setup
+    - [x] README with examples
+    - [x] Contributing guidelines
+    - [ ] Introduction Video
+
+### Adaptiveness
+- [ ] Runtime data feedback loop
+- [ ] Instant Developer Feedback for improvement
+
+### Sample Agents
+- [ ] Knowledge Agent
+- [ ] Blog Writer Agent
+- [ ] SDR Agent
+
+---
+
+## Phase 2: Expansion
+
+### Basic Guardrails
+- [ ] Support Basic Monitoring from Agent node SDK
+- [ ] SDK guardrail implementation (in node)
+- [ ] Guardrail type support (Determined Condition as Guardrails)
+
+### Agent Capability
+- [ ] Streaming mode support
+- [ ] Image Generation support
+- [ ] Take end user input Image and flatfile understand capability
+
+### Event-loop For Nodes (Opencode-style)
+- [ ] **Event bus**
+
+### Memory System Iteration
+- [ ] **Message Model & Session Management**
+    - [ ] Introduce `Message` class with structured content types
+    - [ ] Implement `Session` classes for conversation state
+- [ ] **Storage Migration**
+    - [ ] Implement granular per-message file persistence (`/message/[agentID]/...`)
+    - [ ] Migrate from monolithic run storage
+- [ ] **Context Building & Conversation Loop**
+    - [ ] Implement `Message.stream(sessionID)`
+    - [ ] Update `LLMNode.execute()` for full context building
+    - [ ] Implement `Message.toModelMessages()` conversion
+- [ ] **Proactive Compaction**
+    - [ ] Implement proactive overflow detection
+    - [ ] Develop backward-scanning pruning strategy (e.g., clearing old tool outputs)
+- [ ] **Enhanced Token Tracking**
+    - [ ] Extend `LLMResponse` to track reasoning and cache tokens
+    - [ ] Integrate granular token metrics into compaction logic
+
+### Coding Agent Support
+- [ ] Claude Code
+- [ ] Cursor
+- [ ] Opencode
+- [ ] Antigravity
+
+### File System Enhancement
+- [ ] Semantic Search integration
+- [ ] Interactive File System in product (frontend integration)
+
+### More Worker Tools
+- [ ] Custom Tool Integrator
+- [ ] Integration as a tool (Credential Store & Support)
+- [ ] **Core Agent Tools**
+    - [ ] Node Discovery Tool (find other agents in the graph)
+    - [ ] HITL Tool (pause execution for human approval)
+    - [ ] Wake-up Tool (resume agent tasks)
+
+### Deployment (Self-Hosted)
+- [ ] Docker container standardization
+- [ ] Headless backend execution
+- [ ] Exposed API for frontend attachment
+- [ ] Local monitoring & observability
+- [ ] Basic lifecycle APIs (Start, Stop, Pause, Resume)
+
+### Deployment (Cloud)
+- [ ] Cloud Service Options
+- [ ] Support deployment to 3rd-party platforms
+- [ ] Self-deploy + orchestrator connection
+- [ ] **CI/CD Pipeline**
+    - [ ] Automated test execution
+    - [ ] Agent version control
+    - [ ] All tests must pass for deployment
+
+### Developer Experience Enhancement
+- [ ] Tool usage documentation
+- [ ] Discord Support Channel
+
+### More Agent Templates
+- [ ] GTM Sales Agent (workflow)
+- [ ] GTM Marketing Agent (workflow)
+- [ ] Analytics Agent
+- [ ] Training Agent
+- [ ] Smart Entry / Form Agent (self-evolution emphasis)
+
+### Cross-Platform
+- [ ] JavaScript / TypeScript Version SDK
+- [ ] Better windows support
@@ -1,118 +0,0 @@
-# Hive Configuration
-# ======================
-# Copy this file to config.yaml and customize for your environment.
-# Run `npm run setup` to generate .env files from this configuration.
-#
-# For detailed documentation, see: docs/configuration.md
-
-# -----------------------------------------------------------------------------
-# Application Settings
-# -----------------------------------------------------------------------------
-app:
-  # Application name (displayed in UI and logs)
-  name: Hive
-
-  # Environment: development, production, or test
-  environment: development
-
-  # Log level: debug, info, warn, error
-  log_level: info
-
-# -----------------------------------------------------------------------------
-# Server Configuration
-# -----------------------------------------------------------------------------
-server:
-  # Frontend settings
-  frontend:
-    # Port for the frontend application
-    port: 3000
-
-  # Backend (Hive) settings
-  backend:
-    # Port for the backend API
-    port: 4000
-
-    # Host to bind to (0.0.0.0 for all interfaces)
-    host: 0.0.0.0
-
-# -----------------------------------------------------------------------------
-# TimescaleDB Configuration (Time-series metrics storage)
-# -----------------------------------------------------------------------------
-timescaledb:
-  # Connection URL for TimescaleDB
-  # Format: postgresql://user:password@host:port/database
-  url: postgresql://postgres:postgres@localhost:5432/aden_tsdb
-
-  # External port mapping (for docker-compose)
-  port: 5432
-
-# -----------------------------------------------------------------------------
-# MongoDB Configuration (Policies, pricing, control config)
-# -----------------------------------------------------------------------------
-mongodb:
-  # Connection URL for MongoDB
-  url: mongodb://localhost:27017
-
-  # Database name for main data
-  database: aden
-
-  # Database name for ERP data
-  erp_database: erp
-
-  # External port mapping (for docker-compose)
-  port: 27017
-
-# -----------------------------------------------------------------------------
-# Redis Configuration (Caching and Socket.IO)
-# -----------------------------------------------------------------------------
-redis:
-  # Connection URL for Redis
-  url: redis://localhost:6379
-
-  # External port mapping (for docker-compose)
-  port: 6379
-
-# -----------------------------------------------------------------------------
-# Authentication & Security
-# -----------------------------------------------------------------------------
-auth:
-  # JWT secret key - CHANGE THIS IN PRODUCTION!
-  # Generate with: openssl rand -base64 32
-  jwt_secret: change-this-to-a-secure-random-string-min-32-chars
-
-  # JWT token expiration (e.g., 1h, 7d, 30d)
-  jwt_expires_in: 7d
-
-  # Passphrase for additional encryption - CHANGE THIS IN PRODUCTION!
-  passphrase: change-this-to-a-secure-passphrase
-
-# -----------------------------------------------------------------------------
-# NPM Configuration
-# -----------------------------------------------------------------------------
-npm:
-  # NPM token for private package access (if needed)
-  token: ""
-
-# -----------------------------------------------------------------------------
-# CORS Configuration
-# -----------------------------------------------------------------------------
-cors:
-  # Allowed origin for CORS requests
-  # In production, set this to your frontend URL
-  origin: http://localhost:3000
-
-# -----------------------------------------------------------------------------
-# Feature Flags
-# -----------------------------------------------------------------------------
-features:
-  # Enable user registration
-  registration: true
-
-  # Enable API rate limiting
-  rate_limiting: false
-
-  # Enable request logging
-  request_logging: true
-
-  # Enable MCP (Model Context Protocol) server
-  mcp_server: true
@@ -0,0 +1,4 @@
+exports/
+docs/
+.pytest_cache/
+**/__pycache__/
@@ -0,0 +1,14 @@
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "core"
+    },
+    "tools": {
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "tools"
+    }
+  }
+}
@@ -0,0 +1,413 @@
+# Agent Builder MCP Tools - MCP Integration Guide
+
+This guide explains how to use the new MCP integration tools in the agent builder MCP server.
+
+## Overview
+
+The agent builder now supports registering external MCP servers as tool sources. This allows you to:
+
+1. Register MCP servers (like tools) during agent building
+2. Discover available tools from those servers
+3. Use those tools in your agent nodes
+4. Automatically generate `mcp_servers.json` configuration on export
+
+## New MCP Tools
+
+### `add_mcp_server`
+
+Register an MCP server as a tool source for your agent.
+
+**Parameters:**
+
+- `name` (string, required): Unique name for the MCP server
+- `transport` (string, required): Transport type - "stdio" or "http"
+- `command` (string): Command to run (for stdio transport)
+- `args` (string): JSON array of command arguments (for stdio)
+- `cwd` (string): Working directory (for stdio)
+- `env` (string): JSON object of environment variables (for stdio)
+- `url` (string): Server URL (for http transport)
+- `headers` (string): JSON object of HTTP headers (for http)
+- `description` (string): Description of the MCP server
+
+**Example - STDIO:**
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": "[\"mcp_server.py\", \"--stdio\"]",
+    "cwd": "../tools",
+    "description": "Aden tools for web search and file operations"
+  }
+}
+```
+
+**Example - HTTP:**
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "remote-tools",
+    "transport": "http",
+    "url": "http://localhost:4001",
+    "description": "Remote tool server"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "server": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": ["mcp_server.py", "--stdio"],
+    "cwd": "../tools",
+    "description": "Aden tools..."
+  },
+  "tools_discovered": 6,
+  "tools": [
+    "web_search",
+    "web_scrape",
+    "file_read",
+    "file_write",
+    "pdf_read",
+    "example_tool"
+  ],
+  "total_mcp_servers": 1,
+  "note": "MCP server 'tools' registered with 6 tools. These tools can now be used in llm_tool_use nodes."
+}
+```
+
+### `list_mcp_servers`
+
+List all registered MCP servers.
+
+**Parameters:** None
+
+**Response:**
+
+```json
+{
+  "mcp_servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "description": "Aden tools..."
+    }
+  ],
+  "total": 1
+}
+```
+
+### `list_mcp_tools`
+
+List tools available from registered MCP servers.
+
+**Parameters:**
+
+- `server_name` (string, optional): Name of specific server to list tools from. If omitted, lists tools from all servers.
+
+**Example:**
+
+```json
+{
+  "name": "list_mcp_tools",
+  "arguments": {
+    "server_name": "tools"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "tools_by_server": {
+    "tools": [
+      {
+        "name": "web_search",
+        "description": "Search the web for information using Brave Search API...",
+        "parameters": ["query", "num_results", "country"]
+      },
+      {
+        "name": "web_scrape",
+        "description": "Scrape and extract text content from a webpage...",
+        "parameters": ["url", "selector", "include_links", "max_length"]
+      }
+    ]
+  },
+  "total_tools": 6,
+  "note": "Use these tool names in the 'tools' parameter when adding llm_tool_use nodes"
+}
+```
+
+### `remove_mcp_server`
+
+Remove a registered MCP server.
+
+**Parameters:**
+
+- `name` (string, required): Name of the MCP server to remove
+
+**Example:**
+
+```json
+{
+  "name": "remove_mcp_server",
+  "arguments": {
+    "name": "tools"
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "removed": "tools",
+  "remaining_servers": 0
+}
+```
+
+## Workflow Example
+
+Here's a complete workflow for building an agent with MCP tools:
+
+### 1. Create Session
+
+```json
+{
+  "name": "create_session",
+  "arguments": {
+    "name": "web-research-agent"
+  }
+}
+```
+
+### 2. Register MCP Server
+
+```json
+{
+  "name": "add_mcp_server",
+  "arguments": {
+    "name": "tools",
+    "transport": "stdio",
+    "command": "python",
+    "args": "[\"mcp_server.py\", \"--stdio\"]",
+    "cwd": "../tools"
+  }
+}
+```
+
+### 3. List Available Tools
+
+```json
+{
+  "name": "list_mcp_tools",
+  "arguments": {
+    "server_name": "tools"
+  }
+}
+```
+
+### 4. Set Goal
+
+```json
+{
+  "name": "set_goal",
+  "arguments": {
+    "goal_id": "web-research",
+    "name": "Web Research Agent",
+    "description": "Search the web and summarize findings",
+    "success_criteria": "[{\"id\": \"search-success\", \"description\": \"Successfully retrieve search results\", \"metric\": \"results_count\", \"target\": \">= 3\", \"weight\": 1.0}]"
+  }
+}
+```
+
+### 5. Add Node with MCP Tool
+
+```json
+{
+  "name": "add_node",
+  "arguments": {
+    "node_id": "web-searcher",
+    "name": "Web Search",
+    "description": "Search the web for information",
+    "node_type": "llm_tool_use",
+    "input_keys": "[\"query\"]",
+    "output_keys": "[\"search_results\"]",
+    "system_prompt": "Search for {query} using the web_search tool",
+    "tools": "[\"web_search\"]"
+  }
+}
+```
+
+Note: `web_search` is now available because we registered the tools MCP server!
+
+### 6. Export Agent
+
+```json
+{
+  "name": "export_graph",
+  "arguments": {}
+}
+```
+
+The export will create:
+
+- `exports/web-research-agent/agent.json` - Agent specification
+- `exports/web-research-agent/README.md` - Documentation
+- `exports/web-research-agent/mcp_servers.json` - **MCP server configuration** ✨
+
+## MCP Configuration File
+
+When you export an agent with registered MCP servers, an `mcp_servers.json` file is automatically created:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "description": "Aden tools for web search and file operations"
+    }
+  ]
+}
+```
+
+This file is automatically loaded by the AgentRunner when the agent is executed, making the MCP tools available at runtime.
+
+## Using the Exported Agent
+
+Once exported, load and run the agent normally:
+
+```python
+from framework.runner.runner import AgentRunner
+
+# Load agent - MCP servers auto-load from mcp_servers.json
+runner = AgentRunner.load("exports/web-research-agent")
+
+# Run with input
+result = await runner.run({"query": "latest AI breakthroughs"})
+
+# The web_search tool from tools is automatically available!
+```
+
+## Benefits
+
+1. **Discoverable Tools**: See what tools are available before using them
+2. **Validation**: Connection is tested when registering the server
+3. **Automatic Configuration**: No manual file editing required
+4. **Documentation**: README includes MCP server information
+5. **Runtime Ready**: Exported agents work immediately with configured tools
+
+## Common MCP Servers
+
+### tools
+
+Provides:
+
+- `web_search` - Brave Search API integration
+- `web_scrape` - Web page content extraction
+- `file_read` / `file_write` - File operations
+- `pdf_read` - PDF text extraction
+
+### Custom MCP Servers
+
+You can register any MCP server that follows the Model Context Protocol specification.
+
+## Troubleshooting
+
+### "Failed to connect to MCP server"
+
+- Verify the `command` and `args` are correct
+- Check that the server is accessible at the specified path/URL
+- Ensure any required environment variables are set
+- For STDIO: verify the command can be executed from the `cwd`
+- For HTTP: verify the server is running and accessible
+
+### Tools not appearing
+
+- Use `list_mcp_tools` to verify tools were discovered
+- Check the tool names match exactly (case-sensitive)
+- Ensure the MCP server is still registered (`list_mcp_servers`)
+
+### Export doesn't include mcp_servers.json
+
+- Verify you registered at least one MCP server
+- Check `get_session_status` to see `mcp_servers_count > 0`
+- Re-export the agent after registering servers
+
+## Credential Validation
+
+When adding nodes with tools that require API keys (like `web_search`), the agent builder automatically validates that the required credentials are available.
+
+### How It Works
+
+When you call `add_node` or `update_node` with a `tools` parameter, the agent builder:
+
+1. Checks which tools require credentials (e.g., `web_search` requires `BRAVE_SEARCH_API_KEY`)
+2. Validates those credentials are set in the environment or `.env` file
+3. Returns an error if any credentials are missing
+
+### Missing Credentials Error
+
+If credentials are missing, you'll receive a response like:
+
+```json
+{
+  "valid": false,
+  "errors": ["Missing credentials for tools: ['BRAVE_SEARCH_API_KEY']"],
+  "missing_credentials": [
+    {
+      "credential": "brave_search",
+      "env_var": "BRAVE_SEARCH_API_KEY",
+      "tools_affected": ["web_search"],
+      "help_url": "https://brave.com/search/api/",
+      "description": "API key for Brave Search"
+    }
+  ],
+  "action_required": "Add the credentials to your .env file and retry",
+  "example": "Add to .env:\nBRAVE_SEARCH_API_KEY=your_key_here",
+  "message": "Cannot add node: missing API credentials. Add them to .env and retry this command."
+}
+```
+
+### Fixing Credential Errors
+
+1. Get the required API key from the URL in `help_url`
+2. Add it to your environment:
+
+   ```bash
+   # Option 1: Export directly
+   export BRAVE_SEARCH_API_KEY=your-key-here
+
+   # Option 2: Add to tools/.env
+   echo "BRAVE_SEARCH_API_KEY=your-key-here" >> tools/.env
+   ```
+
+3. Retry the `add_node` command
+
+### Required Credentials by Tool
+
+| Tool         | Credential             | Get Key                                               |
+| ------------ | ---------------------- | ----------------------------------------------------- |
+| `web_search` | `BRAVE_SEARCH_API_KEY` | [brave.com/search/api](https://brave.com/search/api/) |
+
+Note: The MCP server itself requires `ANTHROPIC_API_KEY` at startup for LLM operations.
@@ -0,0 +1,364 @@
+# MCP Integration Guide
+
+This guide explains how to integrate Model Context Protocol (MCP) servers with the Hive Core Framework, enabling agents to use tools from external MCP servers.
+
+## Overview
+
+The framework provides built-in support for MCP servers, allowing you to:
+
+- **Register MCP servers** via STDIO or HTTP transport
+- **Auto-discover tools** from registered servers
+- **Use MCP tools** seamlessly in your agents
+- **Manage multiple MCP servers** simultaneously
+
+## Quick Start
+
+### 1. Register an MCP Server Programmatically
+
+```python
+from framework.runner.runner import AgentRunner
+
+# Load your agent
+runner = AgentRunner.load("exports/my-agent")
+
+# Register tools MCP server
+runner.register_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "aden_tools.mcp_server", "--stdio"],
+    cwd="/path/to/tools"
+)
+
+# Tools are now available to your agent
+result = await runner.run({"input": "data"})
+```
+
+### 2. Use Configuration File
+
+Create `mcp_servers.json` in your agent folder:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools"
+    }
+  ]
+}
+```
+
+The framework will automatically load and register these servers when you load the agent:
+
+```python
+runner = AgentRunner.load("exports/my-agent")  # MCP servers auto-loaded
+```
+
+## Transport Types
+
+### STDIO Transport
+
+Best for local MCP servers running as subprocesses:
+
+```python
+runner.register_mcp_server(
+    name="local-tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "my_tools.server", "--stdio"],
+    cwd="/path/to/my-tools",
+    env={
+        "API_KEY": "your-key-here"
+    }
+)
+```
+
+**Configuration:**
+
+- `command`: Executable to run (e.g., "python", "node")
+- `args`: List of command-line arguments
+- `cwd`: Working directory for the process
+- `env`: Environment variables (optional)
+
+### HTTP Transport
+
+Best for remote MCP servers or containerized deployments:
+
+```python
+runner.register_mcp_server(
+    name="remote-tools",
+    transport="http",
+    url="http://localhost:4001",
+    headers={
+        "Authorization": "Bearer token"
+    }
+)
+```
+
+**Configuration:**
+
+- `url`: Base URL of the MCP server
+- `headers`: HTTP headers to include (optional)
+
+## Using MCP Tools in Agents
+
+Once registered, MCP tools are available just like any other tool:
+
+### In Node Specifications
+
+```python
+from framework.builder.workflow import WorkflowBuilder
+
+builder = WorkflowBuilder()
+
+# Add a node that uses MCP tools
+builder.add_node(
+    node_id="researcher",
+    name="Web Researcher",
+    node_type="llm_tool_use",
+    system_prompt="Research the topic using web_search",
+    tools=["web_search"],  # Tool from tools MCP server
+    input_keys=["topic"],
+    output_keys=["findings"]
+)
+```
+
+### In Agent.json
+
+Tools from MCP servers can be referenced in your agent.json just like built-in tools:
+
+```json
+{
+  "nodes": [
+    {
+      "id": "searcher",
+      "name": "Web Searcher",
+      "node_type": "llm_tool_use",
+      "system_prompt": "Search for information about {topic}",
+      "tools": ["web_search", "web_scrape"],
+      "input_keys": ["topic"],
+      "output_keys": ["results"]
+    }
+  ]
+}
+```
+
+## Available Tools from tools
+
+When you register the `tools` MCP server, the following tools become available:
+
+- **web_search**: Search the web using Brave Search API
+- **web_scrape**: Scrape content from a URL
+- **file_read**: Read file contents
+- **file_write**: Write content to a file
+- **pdf_read**: Extract text from PDF files
+
+## Environment Variables
+
+Some MCP tools require environment variables. You can pass them in the configuration:
+
+### Via Programmatic Registration
+
+```python
+runner.register_mcp_server(
+    name="tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "aden_tools.mcp_server", "--stdio"],
+    cwd="../tools",
+    env={
+        "BRAVE_SEARCH_API_KEY": os.environ["BRAVE_SEARCH_API_KEY"]
+    }
+)
+```
+
+### Via Configuration File
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools",
+      "env": {
+        "BRAVE_SEARCH_API_KEY": "${BRAVE_SEARCH_API_KEY}"
+      }
+    }
+  ]
+}
+```
+
+The framework will substitute `${VAR_NAME}` with values from the environment.
+
+## Multiple MCP Servers
+
+You can register multiple MCP servers to access different sets of tools:
+
+```json
+{
+  "servers": [
+    {
+      "name": "tools",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["-m", "aden_tools.mcp_server", "--stdio"],
+      "cwd": "../tools"
+    },
+    {
+      "name": "database-tools",
+      "transport": "http",
+      "url": "http://localhost:5001"
+    },
+    {
+      "name": "analytics-tools",
+      "transport": "http",
+      "url": "http://analytics-server:6001"
+    }
+  ]
+}
+```
+
+All tools from all servers will be available to your agent.
+
+## Best Practices
+
+### 1. Use STDIO for Development
+
+STDIO transport is easier to debug and doesn't require managing server processes:
+
+```python
+runner.register_mcp_server(
+    name="dev-tools",
+    transport="stdio",
+    command="python",
+    args=["-m", "my_tools.server", "--stdio"]
+)
+```
+
+### 2. Use HTTP for Production
+
+HTTP transport is better for:
+
+- Containerized deployments
+- Shared tools across multiple agents
+- Remote tool execution
+
+```python
+runner.register_mcp_server(
+    name="prod-tools",
+    transport="http",
+    url="http://tools-service:8000"
+)
+```
+
+### 3. Handle Cleanup
+
+Always clean up MCP connections when done:
+
+```python
+try:
+    runner = AgentRunner.load("exports/my-agent")
+    runner.register_mcp_server(...)
+    result = await runner.run(input_data)
+finally:
+    runner.cleanup()  # Disconnects all MCP servers
+```
+
+Or use context manager:
+
+```python
+async with AgentRunner.load("exports/my-agent") as runner:
+    runner.register_mcp_server(...)
+    result = await runner.run(input_data)
+    # Automatic cleanup
+```
+
+### 4. Tool Name Conflicts
+
+If multiple MCP servers provide tools with the same name, the last registered server wins. To avoid conflicts:
+
+- Use unique tool names in your MCP servers
+- Register servers in priority order (most important last)
+- Use separate agents for different tool sets
+
+## Troubleshooting
+
+### Connection Errors
+
+If you get connection errors with STDIO transport:
+
+1. Check that the command and path are correct
+2. Verify the MCP server starts successfully standalone
+3. Check environment variables are set correctly
+4. Look at stderr output for error messages
+
+### Tool Not Found
+
+If a tool is registered but not found:
+
+1. Verify the server registered successfully (check logs)
+2. List available tools: `runner._tool_registry.get_registered_names()`
+3. Check tool name spelling in your node configuration
+
+### HTTP Server Not Responding
+
+If HTTP transport fails:
+
+1. Verify the server is running: `curl http://localhost:4001/health`
+2. Check firewall settings
+3. Verify the URL and port are correct
+
+## Example: Full Agent with MCP Tools
+
+Here's a complete example of an agent that uses MCP tools:
+
+```python
+import asyncio
+from pathlib import Path
+from framework.runner.runner import AgentRunner
+
+async def main():
+    # Create agent path
+    agent_path = Path("exports/web-research-agent")
+
+    # Load agent
+    runner = AgentRunner.load(agent_path)
+
+    # Register MCP server
+    runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+        env={
+            "BRAVE_SEARCH_API_KEY": "your-api-key"
+        }
+    )
+
+    # Run agent
+    result = await runner.run({
+        "query": "latest developments in quantum computing"
+    })
+
+    print(f"Research complete: {result}")
+
+    # Cleanup
+    runner.cleanup()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## See Also
+
+- [MCP_SERVER_GUIDE.md](MCP_SERVER_GUIDE.md) - Building your own MCP servers
+- [examples/mcp_integration_example.py](examples/mcp_integration_example.py) - More examples
+- [examples/mcp_servers.json](examples/mcp_servers.json) - Example configuration
@@ -0,0 +1,393 @@
+# MCP Server Guide - Agent Builder
+
+This guide covers the MCP (Model Context Protocol) server for building goal-driven agents.
+
+## Setup
+
+### Quick Setup
+
+```bash
+# Using the setup script (recommended)
+python setup_mcp.py
+
+# Or using bash
+./setup_mcp.sh
+```
+
+### Manual Configuration
+
+Add to your MCP client configuration (e.g., Claude Desktop):
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "/path/to/goal-agent"
+    }
+  }
+}
+```
+
+## Available MCP Tools
+
+### Session Management
+
+#### `create_session`
+Create a new agent building session.
+
+**Parameters:**
+- `name` (string, required): Name of the agent
+
+**Example:**
+```json
+{
+  "name": "research-summary-agent"
+}
+```
+
+#### `get_session_status`
+Get the current status of the build session.
+
+**Returns:**
+- Session name
+- Goal status
+- Number of nodes
+- Number of edges
+- Validation status
+
+---
+
+### Goal Definition
+
+#### `set_goal`
+Define the goal for the agent with success criteria and constraints.
+
+**Parameters:**
+- `goal_id` (string, required): Unique identifier for the goal
+- `name` (string, required): Human-readable name
+- `description` (string, required): What the agent should accomplish
+- `success_criteria` (string, required): JSON array of success criteria
+- `constraints` (string, optional): JSON array of constraints
+
+**Success Criterion Structure:**
+```json
+{
+  "id": "criterion_id",
+  "description": "What should be achieved",
+  "metric": "How to measure it",
+  "target": "Target value",
+  "weight": 1.0
+}
+```
+
+**Constraint Structure:**
+```json
+{
+  "id": "constraint_id",
+  "description": "What must not happen",
+  "constraint_type": "hard|soft",
+  "category": "safety|quality|performance"
+}
+```
+
+---
+
+### Node Management
+
+#### `add_node`
+Add a processing node to the agent graph.
+
+**Parameters:**
+- `node_id` (string, required): Unique node identifier
+- `name` (string, required): Human-readable name
+- `description` (string, required): What this node does
+- `node_type` (string, required): One of: `llm_generate`, `llm_tool_use`, `router`, `function`
+- `input_keys` (string, required): JSON array of input variable names
+- `output_keys` (string, required): JSON array of output variable names
+- `system_prompt` (string, optional): System prompt for LLM nodes
+- `tools` (string, optional): JSON array of tool names for tool_use nodes
+- `routes` (string, optional): JSON object of route mappings for router nodes
+
+**Node Types:**
+
+1. **llm_generate**: Uses LLM to generate output from inputs
+   - Requires: `system_prompt`
+   - Tools: Not used
+
+2. **llm_tool_use**: Uses LLM with tools to accomplish tasks
+   - Requires: `system_prompt`, `tools`
+   - Tools: Array of tool names (e.g., `["web_search", "web_fetch"]`)
+
+3. **router**: LLM-powered routing to different paths
+   - Requires: `system_prompt`, `routes`
+   - Routes: Object mapping route names to target node IDs
+   - Example: `{"pass": "success_node", "fail": "retry_node"}`
+
+4. **function**: Executes a pre-defined function
+   - System prompt describes the function behavior
+   - No LLM calls, pure computation
+
+**Example:**
+```json
+{
+  "node_id": "search_sources",
+  "name": "Search Sources",
+  "description": "Searches for relevant sources on the topic",
+  "node_type": "llm_tool_use",
+  "input_keys": "[\"topic\", \"search_queries\"]",
+  "output_keys": "[\"sources\", \"source_count\"]",
+  "system_prompt": "Search for sources using the provided queries...",
+  "tools": "[\"web_search\"]"
+}
+```
+
+---
+
+### Edge Management
+
+#### `add_edge`
+Connect two nodes with an edge to define execution flow.
+
+**Parameters:**
+- `edge_id` (string, required): Unique edge identifier
+- `source` (string, required): Source node ID
+- `target` (string, required): Target node ID
+- `condition` (string, optional): When to traverse: `on_success` (default) or `on_failure`
+- `condition_expr` (string, optional): Python expression for conditional routing
+- `priority` (integer, optional): Edge priority (default: 0)
+
+**Example:**
+```json
+{
+  "edge_id": "search_to_extract",
+  "source": "search_sources",
+  "target": "extract_content",
+  "condition": "on_success"
+}
+```
+
+---
+
+### Graph Validation
+
+#### `validate_graph`
+Validate the complete graph structure.
+
+**Checks:**
+- Entry node exists
+- All nodes are reachable from entry
+- Terminal nodes have no outgoing edges
+- No cycles (unless explicitly allowed)
+- Context flow: all required inputs are available
+
+**Returns:**
+- `valid` (boolean)
+- `errors` (array): List of validation errors
+- `warnings` (array): Non-critical issues
+- `entry_node` (string): Entry node ID
+- `terminal_nodes` (array): Terminal node IDs
+
+---
+
+### Graph Export
+
+#### `export_graph`
+Export the validated graph as an agent specification.
+
+**What it does:**
+1. Validates the graph
+2. Auto-generates missing edges from router routes
+3. Writes files to disk:
+   - `exports/{agent-name}/agent.json` - Full agent specification
+   - `exports/{agent-name}/README.md` - Auto-generated documentation
+
+**Returns:**
+- `success` (boolean)
+- `files_written` (object): Paths and sizes of written files
+- `agent` (object): Agent metadata
+- `graph` (object): Graph specification
+- `goal` (object): Goal definition
+- `required_tools` (array): All tools used by the agent
+
+**Important:** This tool automatically writes files to the `exports/` directory!
+
+---
+
+### Testing
+
+#### `test_node`
+Test a single node with sample inputs.
+
+**Parameters:**
+- `node_id` (string, required): Node to test
+- `test_input` (string, required): JSON object with input values
+- `mock_llm_response` (string, optional): Mock LLM response for testing
+
+**Example:**
+```json
+{
+  "node_id": "research_planner",
+  "test_input": "{\"topic\": \"LLM compaction\"}"
+}
+```
+
+#### `test_graph`
+Test the complete agent graph with sample inputs.
+
+**Parameters:**
+- `test_input` (string, required): JSON object with initial inputs
+- `dry_run` (boolean, optional): Simulate without LLM calls (default: true)
+- `max_steps` (integer, optional): Maximum execution steps (default: 10)
+
+**Example:**
+```json
+{
+  "test_input": "{\"topic\": \"AI safety\"}",
+  "dry_run": true,
+  "max_steps": 10
+}
+```
+
+---
+
+### Evaluation Rules
+
+#### `add_evaluation_rule`
+Add a rule for the HybridJudge to evaluate node outputs.
+
+**Parameters:**
+- `rule_id` (string, required): Unique rule identifier
+- `description` (string, required): What this rule checks
+- `condition` (string, required): Python expression to evaluate
+- `action` (string, required): Action to take: `accept`, `retry`, `escalate`
+- `priority` (integer, optional): Rule priority (default: 0)
+- `feedback_template` (string, optional): Feedback message template
+
+**Condition Examples:**
+- `'result.get("success") == True'` - Check for success flag
+- `'result.get("error_type") == "timeout"'` - Check error type
+- `'len(result.get("data", [])) > 0'` - Check for non-empty data
+
+**Example:**
+```json
+{
+  "rule_id": "timeout_retry",
+  "description": "Retry on timeout errors",
+  "condition": "result.get('error_type') == 'timeout'",
+  "action": "retry",
+  "priority": 10,
+  "feedback_template": "Timeout occurred, retrying..."
+}
+```
+
+#### `list_evaluation_rules`
+List all configured evaluation rules.
+
+#### `remove_evaluation_rule`
+Remove an evaluation rule.
+
+**Parameters:**
+- `rule_id` (string, required): Rule to remove
+
+---
+
+## Example Workflow
+
+Here's a complete workflow for building a research agent:
+
+```python
+# 1. Create session
+create_session(name="research-agent")
+
+# 2. Define goal
+set_goal(
+    goal_id="research-goal",
+    name="Research Topic Agent",
+    description="Research a topic and produce a summary",
+    success_criteria=json.dumps([{
+        "id": "comprehensive",
+        "description": "Cover main aspects",
+        "metric": "Key topics addressed",
+        "target": "At least 3-5 aspects",
+        "weight": 1.0
+    }])
+)
+
+# 3. Add nodes
+add_node(
+    node_id="planner",
+    name="Research Planner",
+    description="Creates research strategy",
+    node_type="llm_generate",
+    input_keys='["topic"]',
+    output_keys='["strategy", "queries"]',
+    system_prompt="Analyze topic and create research plan..."
+)
+
+add_node(
+    node_id="searcher",
+    name="Search Sources",
+    description="Find relevant sources",
+    node_type="llm_tool_use",
+    input_keys='["queries"]',
+    output_keys='["sources"]',
+    system_prompt="Search for sources...",
+    tools='["web_search"]'
+)
+
+# 4. Connect nodes
+add_edge(
+    edge_id="plan_to_search",
+    source="planner",
+    target="searcher"
+)
+
+# 5. Validate
+validate_graph()
+
+# 6. Export
+export_graph()
+```
+
+The exported agent will be saved to `exports/research-agent/`.
+
+---
+
+## Tips
+
+1. **Start with the goal**: Define clear success criteria before building nodes
+2. **Test nodes individually**: Use `test_node` to verify each node works
+3. **Use router nodes for branching**: Don't create edges manually for routers - define routes and they'll be auto-generated
+4. **Add evaluation rules**: Help the judge evaluate outputs deterministically
+5. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
+6. **Check exports**: Review the generated README.md to verify your agent structure
+
+---
+
+## Common Issues
+
+### "Node X is unreachable from entry"
+- Make sure there's a path of edges from the entry node to all nodes
+- Check that you've defined edges connecting your nodes
+
+### "Missing required input Y for node X"
+- Ensure previous nodes output the required inputs
+- Check your input_keys and output_keys match
+
+### "Router routes don't match edges"
+- Don't worry! The export tool auto-generates missing edges from routes
+- If you see this warning, it's informational only
+
+### "Cannot find tool Z"
+- Verify the tool name matches available tools (e.g., "web_search", "web_fetch")
+- Check the `required_tools` section in the exported agent
+
+---
+
+## Resources
+
+- **Framework Documentation**: See [README.md](README.md)
+- **Example Agents**: Check the `exports/` directory for examples
+- **MCP Protocol**: https://modelcontextprotocol.io
@@ -0,0 +1,203 @@
+# Framework
+
+A goal-driven agent runtime with Builder-friendly observability.
+
+## Overview
+
+Framework provides a runtime framework that captures **decisions**, not just actions. This enables a "Builder" LLM to analyze and improve agent behavior by understanding:
+
+- What the agent was trying to accomplish
+- What options it considered
+- What it chose and why
+- What happened as a result
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+## MCP Server Setup
+
+The framework includes an MCP (Model Context Protocol) server for building agents. To set up the MCP server:
+
+### Automated Setup
+
+**Using bash (Linux/macOS):**
+```bash
+./setup_mcp.sh
+```
+
+**Using Python (cross-platform):**
+```bash
+python setup_mcp.py
+```
+
+The setup script will:
+1. Install the framework package
+2. Install MCP dependencies (mcp, fastmcp)
+3. Create/verify `.mcp.json` configuration
+4. Test the MCP server module
+
+### Manual Setup
+
+If you prefer manual setup:
+
+```bash
+# Install framework
+pip install -e .
+
+# Install MCP dependencies
+pip install mcp fastmcp
+
+# Test the server
+python -m framework.mcp.agent_builder_server
+```
+
+### Using with MCP Clients
+
+To use the agent builder with Claude Desktop or other MCP clients, add this to your MCP client configuration:
+
+```json
+{
+  "mcpServers": {
+    "agent-builder": {
+      "command": "python",
+      "args": ["-m", "framework.mcp.agent_builder_server"],
+      "cwd": "/path/to/goal-agent"
+    }
+  }
+}
+```
+
+The MCP server provides tools for:
+- Creating agent building sessions
+- Defining goals with success criteria
+- Adding nodes (llm_generate, llm_tool_use, router, function)
+- Connecting nodes with edges
+- Validating and exporting agent graphs
+- Testing nodes and full agent graphs
+
+## Quick Start
+
+### Calculator Agent
+
+Run an LLM-powered calculator:
+
+```bash
+# Single calculation
+python -m framework calculate "2 + 3 * 4"
+
+# Interactive mode
+python -m framework interactive
+
+# Analyze runs with Builder
+python -m framework analyze calculator
+```
+
+### Using the Runtime
+
+```python
+from framework import Runtime
+
+runtime = Runtime("/path/to/storage")
+
+# Start a run
+run_id = runtime.start_run("my_goal", "Description of what we're doing")
+
+# Record a decision
+decision_id = runtime.decide(
+    intent="Choose how to process the data",
+    options=[
+        {"id": "fast", "description": "Quick processing", "pros": ["Fast"], "cons": ["Less accurate"]},
+        {"id": "thorough", "description": "Detailed processing", "pros": ["Accurate"], "cons": ["Slower"]},
+    ],
+    chosen="thorough",
+    reasoning="Accuracy is more important for this task"
+)
+
+# Record the outcome
+runtime.record_outcome(
+    decision_id=decision_id,
+    success=True,
+    result={"processed": 100},
+    summary="Processed 100 items with detailed analysis"
+)
+
+# End the run
+runtime.end_run(success=True, narrative="Successfully processed all data")
+```
+
+### Testing Agents
+
+The framework includes a goal-based testing framework for validating agent behavior.
+
+Tests are generated using MCP tools (`generate_constraint_tests`, `generate_success_tests`) which return guidelines. Claude writes tests directly using the Write tool based on these guidelines.
+
+```bash
+# Run tests against an agent
+python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
+
+# Debug failed tests
+python -m framework test-debug <agent_path> <test_name>
+
+# List tests for a goal
+python -m framework test-list <goal_id>
+```
+
+For detailed testing workflows, see the [testing-agent skill](../.claude/skills/testing-agent/SKILL.md).
+
+### Analyzing Agent Behavior with Builder
+
+The BuilderQuery interface allows you to analyze agent runs and identify improvements:
+
+```python
+from framework import BuilderQuery
+
+query = BuilderQuery("/path/to/storage")
+
+# Find patterns across runs
+patterns = query.find_patterns("my_goal")
+print(f"Success rate: {patterns.success_rate:.1%}")
+
+# Analyze a failure
+analysis = query.analyze_failure("run_123")
+print(f"Root cause: {analysis.root_cause}")
+print(f"Suggestions: {analysis.suggestions}")
+
+# Get improvement recommendations
+suggestions = query.suggest_improvements("my_goal")
+for s in suggestions:
+    print(f"[{s['priority']}] {s['recommendation']}")
+```
+
+## Architecture
+
+```
+┌─────────────────┐
+│  Human Engineer │  ← Supervision, approval
+└────────┬────────┘
+         │
+┌────────▼────────┐
+│   Builder LLM   │  ← Analyzes runs, suggests improvements
+│  (BuilderQuery) │
+└────────┬────────┘
+         │
+┌────────▼────────┐
+│   Agent LLM     │  ← Executes tasks, records decisions
+│    (Runtime)    │
+└─────────────────┘
+```
+
+## Key Concepts
+
+- **Decision**: The atomic unit of agent behavior. Captures intent, options, choice, and reasoning.
+- **Run**: A complete execution with all decisions and outcomes.
+- **Runtime**: Interface agents use to record their behavior.
+- **BuilderQuery**: Interface Builder uses to analyze agent behavior.
+
+## Requirements
+
+- Python 3.11+
+- pydantic >= 2.0
+- anthropic >= 0.40.0 (for LLM-powered agents)
@@ -0,0 +1,123 @@
+"""
+Minimal Manual Agent Example
+----------------------------
+This example demonstrates how to build and run an agent programmatically
+without using the Claude Code CLI or external LLM APIs.
+
+It uses 'function' nodes to define logic in pure Python, making it perfect
+for understanding the core runtime loop:
+Setup -> Graph definition -> Execution -> Result
+
+Run with:
+    PYTHONPATH=core python core/examples/manual_agent.py
+"""
+
+import asyncio
+
+from framework.graph import EdgeCondition, EdgeSpec, Goal, GraphSpec, NodeSpec
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+
+# 1. Define Node Logic (Pure Python Functions)
+def greet(name: str) -> str:
+    """Generate a simple greeting."""
+    return f"Hello, {name}!"
+
+
+def uppercase(greeting: str) -> str:
+    """Convert text to uppercase."""
+    return greeting.upper()
+
+
+async def main():
+    print("🚀 Setting up Manual Agent...")
+
+    # 2. Define the Goal
+    # Every agent needs a goal with success criteria
+    goal = Goal(
+        id="greet-user",
+        name="Greet User",
+        description="Generate a friendly uppercase greeting",
+        success_criteria=[
+            {
+                "id": "greeting_generated",
+                "description": "Greeting produced",
+                "metric": "custom",
+                "target": "any",
+            }
+        ],
+    )
+
+    # 3. Define Nodes
+    # Nodes describe steps in the process
+    node1 = NodeSpec(
+        id="greeter",
+        name="Greeter",
+        description="Generates a simple greeting",
+        node_type="function",
+        function="greet",  # Matches the registered function name
+        input_keys=["name"],
+        output_keys=["greeting"],
+    )
+
+    node2 = NodeSpec(
+        id="uppercaser",
+        name="Uppercaser",
+        description="Converts greeting to uppercase",
+        node_type="function",
+        function="uppercase",
+        input_keys=["greeting"],
+        output_keys=["final_greeting"],
+    )
+
+    # 4. Define Edges
+    # Edges define the flow between nodes
+    edge1 = EdgeSpec(
+        id="greet-to-upper",
+        source="greeter",
+        target="uppercaser",
+        condition=EdgeCondition.ON_SUCCESS,
+    )
+
+    # 5. Create Graph
+    # The graph works like a blueprint connecting nodes and edges
+    graph = GraphSpec(
+        id="greeting-agent",
+        goal_id="greet-user",
+        entry_node="greeter",
+        terminal_nodes=["uppercaser"],
+        nodes=[node1, node2],
+        edges=[edge1],
+    )
+
+    # 6. Initialize Runtime & Executor
+    # Runtime handles state/memory; Executor runs the graph
+    from pathlib import Path
+
+    runtime = Runtime(storage_path=Path("./agent_logs"))
+    executor = GraphExecutor(runtime=runtime)
+
+    # 7. Register Function Implementations
+    # Connect string names in NodeSpecs to actual Python functions
+    executor.register_function("greeter", greet)
+    executor.register_function("uppercaser", uppercase)
+
+    # 8. Execute Agent
+    print("▶ Executing agent with input: name='Alice'...")
+
+    result = await executor.execute(graph=graph, goal=goal, input_data={"name": "Alice"})
+
+    # 9. Verify Results
+    if result.success:
+        print("\n✅ Success!")
+        print(f"Path taken: {' -> '.join(result.path)}")
+        print(f"Final output: {result.output.get('final_greeting')}")
+    else:
+        print(f"\n❌ Failed: {result.error}")
+
+
+if __name__ == "__main__":
+    # Optional: Enable logging to see internal decision flow
+    # logging.basicConfig(level=logging.INFO)
+    asyncio.run(main())
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""
+Example: Integrating MCP Servers with the Core Framework
+
+This example demonstrates how to:
+1. Register MCP servers programmatically
+2. Use MCP tools in agents
+3. Load MCP servers from configuration files
+"""
+
+import asyncio
+from pathlib import Path
+
+from framework.runner.runner import AgentRunner
+
+
+async def example_1_programmatic_registration():
+    """Example 1: Register MCP server programmatically"""
+    print("\n=== Example 1: Programmatic MCP Server Registration ===\n")
+
+    # Load an existing agent
+    runner = AgentRunner.load("exports/task-planner")
+
+    # Register tools MCP server via STDIO
+    num_tools = runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+    )
+
+    print(f"Registered {num_tools} tools from tools MCP server")
+
+    # List all available tools
+    tools = runner._tool_registry.get_tools()
+    print(f"\nAvailable tools: {list(tools.keys())}")
+
+    # Run the agent with MCP tools available
+    result = await runner.run(
+        {"objective": "Search for 'Claude AI' and summarize the top 3 results"}
+    )
+
+    print(f"\nAgent result: {result}")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def example_2_http_transport():
+    """Example 2: Connect to MCP server via HTTP"""
+    print("\n=== Example 2: HTTP MCP Server Connection ===\n")
+
+    # First, start the tools MCP server in HTTP mode:
+    # cd tools && python mcp_server.py --port 4001
+
+    runner = AgentRunner.load("exports/task-planner")
+
+    # Register tools via HTTP
+    num_tools = runner.register_mcp_server(
+        name="tools-http",
+        transport="http",
+        url="http://localhost:4001",
+    )
+
+    print(f"Registered {num_tools} tools from HTTP MCP server")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def example_3_config_file():
+    """Example 3: Load MCP servers from configuration file"""
+    print("\n=== Example 3: Load from Configuration File ===\n")
+
+    # Create a test agent folder with mcp_servers.json
+    test_agent_path = Path("exports/task-planner")
+
+    # Copy example config (in practice, you'd place this in your agent folder)
+    import shutil
+
+    shutil.copy("examples/mcp_servers.json", test_agent_path / "mcp_servers.json")
+
+    # Load agent - MCP servers will be auto-discovered
+    runner = AgentRunner.load(test_agent_path)
+
+    # Tools are automatically available
+    tools = runner._tool_registry.get_tools()
+    print(f"Available tools: {list(tools.keys())}")
+
+    # Cleanup
+    runner.cleanup()
+
+    # Clean up the test config
+    (test_agent_path / "mcp_servers.json").unlink()
+
+
+async def example_4_custom_agent_with_mcp_tools():
+    """Example 4: Build custom agent that uses MCP tools"""
+    print("\n=== Example 4: Custom Agent with MCP Tools ===\n")
+
+    from framework.builder.workflow import GraphBuilder
+
+    # Create a workflow builder
+    builder = GraphBuilder()
+
+    # Define goal
+    builder.set_goal(
+        goal_id="web-researcher",
+        name="Web Research Agent",
+        description="Search the web and summarize findings",
+    )
+
+    # Add success criteria
+    builder.add_success_criterion(
+        "search-results", "Successfully retrieve at least 3 web search results"
+    )
+    builder.add_success_criterion("summary", "Provide a clear, concise summary of the findings")
+
+    # Add nodes that will use MCP tools
+    builder.add_node(
+        node_id="web-searcher",
+        name="Web Search",
+        description="Search the web for information",
+        node_type="llm_tool_use",
+        system_prompt="Search for {query} and return the top results. Use the web_search tool.",
+        tools=["web_search"],  # This tool comes from tools MCP server
+        input_keys=["query"],
+        output_keys=["search_results"],
+    )
+
+    builder.add_node(
+        node_id="summarizer",
+        name="Summarize Results",
+        description="Summarize the search results",
+        node_type="llm_generate",
+        system_prompt="Summarize the following search results in 2-3 sentences: {search_results}",
+        input_keys=["search_results"],
+        output_keys=["summary"],
+    )
+
+    # Connect nodes
+    builder.add_edge("web-searcher", "summarizer")
+
+    # Set entry point
+    builder.set_entry("web-searcher")
+    builder.set_terminal("summarizer")
+
+    # Export the agent
+    export_path = Path("exports/web-research-agent")
+    export_path.mkdir(parents=True, exist_ok=True)
+    builder.export(export_path)
+
+    # Load and register MCP server
+    runner = AgentRunner.load(export_path)
+    runner.register_mcp_server(
+        name="tools",
+        transport="stdio",
+        command="python",
+        args=["-m", "aden_tools.mcp_server", "--stdio"],
+        cwd="../tools",
+    )
+
+    # Run the agent
+    result = await runner.run({"query": "latest AI breakthroughs 2026"})
+
+    print(f"\nAgent completed with result:\n{result}")
+
+    # Cleanup
+    runner.cleanup()
+
+
+async def main():
+    """Run all examples"""
+    print("=" * 60)
+    print("MCP Integration Examples")
+    print("=" * 60)
+
+    try:
+        # Run examples
+        await example_1_programmatic_registration()
+        # await example_2_http_transport()  # Requires HTTP server running
+        # await example_3_config_file()
+        # await example_4_custom_agent_with_mcp_tools()
+
+    except Exception as e:
+        print(f"\nError running example: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,22 @@
+{
+  "servers": [
+    {
+      "name": "tools",
+      "description": "Aden tools including web search, file operations, and PDF reading",
+      "transport": "stdio",
+      "command": "python",
+      "args": ["mcp_server.py", "--stdio"],
+      "cwd": "../tools",
+      "env": {
+        "BRAVE_SEARCH_API_KEY": "${BRAVE_SEARCH_API_KEY}"
+      }
+    },
+    {
+      "name": "tools-http",
+      "description": "Aden tools via HTTP (for Docker deployments)",
+      "transport": "http",
+      "url": "http://localhost:4001",
+      "headers": {}
+    }
+  ]
+}
@@ -0,0 +1,70 @@
+"""
+Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
+
+The runtime is designed around DECISIONS, not just actions. Every significant
+choice the agent makes is captured with:
+- What it was trying to do (intent)
+- What options it considered
+- What it chose and why
+- What happened as a result
+- Whether that was good or bad (evaluated post-hoc)
+
+This gives the Builder LLM the information it needs to improve agent behavior.
+
+## Testing Framework
+
+The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
+- Generate tests from Goal success_criteria and constraints
+- Mandatory user approval before tests are stored
+- Parallel test execution with error categorization
+- Debug tools with fix suggestions
+
+See `framework.testing` for details.
+"""
+
+from framework.builder.query import BuilderQuery
+from framework.llm import AnthropicProvider, LLMProvider
+from framework.runner import AgentOrchestrator, AgentRunner
+from framework.runtime.core import Runtime
+from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
+from framework.schemas.run import Problem, Run, RunSummary
+
+# Testing framework
+from framework.testing import (
+    ApprovalStatus,
+    DebugTool,
+    ErrorCategory,
+    Test,
+    TestResult,
+    TestStorage,
+    TestSuiteResult,
+)
+
+__all__ = [
+    # Schemas
+    "Decision",
+    "Option",
+    "Outcome",
+    "DecisionEvaluation",
+    "Run",
+    "RunSummary",
+    "Problem",
+    # Runtime
+    "Runtime",
+    # Builder
+    "BuilderQuery",
+    # LLM
+    "LLMProvider",
+    "AnthropicProvider",
+    # Runner
+    "AgentRunner",
+    "AgentOrchestrator",
+    # Testing
+    "Test",
+    "TestResult",
+    "TestSuiteResult",
+    "TestStorage",
+    "ApprovalStatus",
+    "ErrorCategory",
+    "DebugTool",
+]
@@ -0,0 +1,6 @@
+"""Allow running as ``python -m framework``, which powers the ``hive`` console entry point."""
+
+from framework.cli import main
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,21 @@
+"""Builder interface for analyzing and building agents."""
+
+from framework.builder.query import BuilderQuery
+from framework.builder.workflow import (
+    BuildPhase,
+    BuildSession,
+    GraphBuilder,
+    TestCase,
+    TestResult,
+    ValidationResult,
+)
+
+__all__ = [
+    "BuilderQuery",
+    "GraphBuilder",
+    "BuildSession",
+    "BuildPhase",
+    "ValidationResult",
+    "TestCase",
+    "TestResult",
+]
@@ -0,0 +1,501 @@
+"""
+Builder Query Interface - How I (Builder) analyze agent runs.
+
+This is designed around the questions I need to answer:
+1. What happened? (summaries, narratives)
+2. Why did it fail? (failure analysis, decision traces)
+3. What patterns emerge? (across runs, across nodes)
+4. What should we change? (suggestions)
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+from framework.schemas.decision import Decision
+from framework.schemas.run import Run, RunStatus, RunSummary
+from framework.storage.backend import FileStorage
+
+
+class FailureAnalysis:
+    """Structured analysis of why a run failed."""
+
+    def __init__(
+        self,
+        run_id: str,
+        failure_point: str,
+        root_cause: str,
+        decision_chain: list[str],
+        problems: list[str],
+        suggestions: list[str],
+    ):
+        self.run_id = run_id
+        self.failure_point = failure_point
+        self.root_cause = root_cause
+        self.decision_chain = decision_chain
+        self.problems = problems
+        self.suggestions = suggestions
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "run_id": self.run_id,
+            "failure_point": self.failure_point,
+            "root_cause": self.root_cause,
+            "decision_chain": self.decision_chain,
+            "problems": self.problems,
+            "suggestions": self.suggestions,
+        }
+
+    def __str__(self) -> str:
+        lines = [
+            f"=== Failure Analysis for {self.run_id} ===",
+            "",
+            f"Failure Point: {self.failure_point}",
+            f"Root Cause: {self.root_cause}",
+            "",
+            "Decision Chain Leading to Failure:",
+        ]
+        for i, dec in enumerate(self.decision_chain, 1):
+            lines.append(f"  {i}. {dec}")
+
+        if self.problems:
+            lines.append("")
+            lines.append("Reported Problems:")
+            for prob in self.problems:
+                lines.append(f"  - {prob}")
+
+        if self.suggestions:
+            lines.append("")
+            lines.append("Suggestions:")
+            for sug in self.suggestions:
+                lines.append(f"  → {sug}")
+
+        return "\n".join(lines)
+
+
+class PatternAnalysis:
+    """Patterns detected across multiple runs."""
+
+    def __init__(
+        self,
+        goal_id: str,
+        run_count: int,
+        success_rate: float,
+        common_failures: list[tuple[str, int]],
+        problematic_nodes: list[tuple[str, float]],
+        decision_patterns: dict[str, Any],
+    ):
+        self.goal_id = goal_id
+        self.run_count = run_count
+        self.success_rate = success_rate
+        self.common_failures = common_failures
+        self.problematic_nodes = problematic_nodes
+        self.decision_patterns = decision_patterns
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "goal_id": self.goal_id,
+            "run_count": self.run_count,
+            "success_rate": self.success_rate,
+            "common_failures": self.common_failures,
+            "problematic_nodes": self.problematic_nodes,
+            "decision_patterns": self.decision_patterns,
+        }
+
+    def __str__(self) -> str:
+        lines = [
+            f"=== Pattern Analysis for Goal {self.goal_id} ===",
+            "",
+            f"Runs Analyzed: {self.run_count}",
+            f"Success Rate: {self.success_rate:.1%}",
+        ]
+
+        if self.common_failures:
+            lines.append("")
+            lines.append("Common Failures:")
+            for failure, count in self.common_failures:
+                lines.append(f"  - {failure} ({count} occurrences)")
+
+        if self.problematic_nodes:
+            lines.append("")
+            lines.append("Problematic Nodes (failure rate):")
+            for node, rate in self.problematic_nodes:
+                lines.append(f"  - {node}: {rate:.1%} failure rate")
+
+        return "\n".join(lines)
+
+
+class BuilderQuery:
+    """
+    The interface I (Builder) use to understand what agents are doing.
+
+    This is optimized for the questions I need to answer when analyzing
+    agent behavior and deciding what to improve.
+    """
+
+    def __init__(self, storage_path: str | Path):
+        self.storage = FileStorage(storage_path)
+
+    # === WHAT HAPPENED? ===
+
+    def get_run_summary(self, run_id: str) -> RunSummary | None:
+        """Get a quick summary of a run."""
+        return self.storage.load_summary(run_id)
+
+    def get_full_run(self, run_id: str) -> Run | None:
+        """Get the complete run with all decisions."""
+        return self.storage.load_run(run_id)
+
+    def list_runs_for_goal(self, goal_id: str) -> list[RunSummary]:
+        """Get summaries of all runs for a goal."""
+        run_ids = self.storage.get_runs_by_goal(goal_id)
+        summaries = []
+        for run_id in run_ids:
+            summary = self.storage.load_summary(run_id)
+            if summary:
+                summaries.append(summary)
+        return summaries
+
+    def get_recent_failures(self, limit: int = 10) -> list[RunSummary]:
+        """Get recent failed runs."""
+        run_ids = self.storage.get_runs_by_status(RunStatus.FAILED)
+        summaries = []
+        for run_id in run_ids[:limit]:
+            summary = self.storage.load_summary(run_id)
+            if summary:
+                summaries.append(summary)
+        return summaries
+
+    # === WHY DID IT FAIL? ===
+
+    def analyze_failure(self, run_id: str) -> FailureAnalysis | None:
+        """
+        Deep analysis of why a run failed.
+
+        This is my primary tool for understanding what went wrong.
+        """
+        run = self.storage.load_run(run_id)
+        if run is None or run.status != RunStatus.FAILED:
+            return None
+
+        # Find the first failed decision
+        failed_decisions = [d for d in run.decisions if not d.was_successful]
+        if not failed_decisions:
+            failure_point = "Unknown - no decision marked as failed"
+            root_cause = "Run failed but all decisions succeeded (external cause?)"
+        else:
+            first_failure = failed_decisions[0]
+            failure_point = first_failure.summary_for_builder()
+            root_cause = first_failure.outcome.error if first_failure.outcome else "Unknown"
+
+        # Build the decision chain leading to failure
+        decision_chain = []
+        for d in run.decisions:
+            decision_chain.append(d.summary_for_builder())
+            if not d.was_successful:
+                break
+
+        # Extract problems
+        problems = [f"[{p.severity}] {p.description}" for p in run.problems]
+
+        # Generate suggestions based on the failure
+        suggestions = self._generate_suggestions(run, failed_decisions)
+
+        return FailureAnalysis(
+            run_id=run_id,
+            failure_point=failure_point,
+            root_cause=root_cause,
+            decision_chain=decision_chain,
+            problems=problems,
+            suggestions=suggestions,
+        )
+
+    def get_decision_trace(self, run_id: str) -> list[str]:
+        """Get a readable trace of all decisions in a run."""
+        run = self.storage.load_run(run_id)
+        if run is None:
+            return []
+        return [d.summary_for_builder() for d in run.decisions]
+
+    # === WHAT PATTERNS EMERGE? ===
+
+    def find_patterns(self, goal_id: str) -> PatternAnalysis | None:
+        """
+        Find patterns across runs for a goal.
+
+        This helps me understand systemic issues vs one-off failures.
+        """
+        run_ids = self.storage.get_runs_by_goal(goal_id)
+        if not run_ids:
+            return None
+
+        runs = []
+        for run_id in run_ids:
+            run = self.storage.load_run(run_id)
+            if run:
+                runs.append(run)
+
+        if not runs:
+            return None
+
+        # Calculate success rate
+        completed = [r for r in runs if r.status == RunStatus.COMPLETED]
+        success_rate = len(completed) / len(runs) if runs else 0.0
+
+        # Find common failures
+        failure_counts: dict[str, int] = defaultdict(int)
+        for run in runs:
+            for decision in run.decisions:
+                if not decision.was_successful and decision.outcome:
+                    error = decision.outcome.error or "Unknown error"
+                    failure_counts[error] += 1
+
+        common_failures = sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+
+        # Find problematic nodes
+        node_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"total": 0, "failed": 0})
+        for run in runs:
+            for decision in run.decisions:
+                node_stats[decision.node_id]["total"] += 1
+                if not decision.was_successful:
+                    node_stats[decision.node_id]["failed"] += 1
+
+        problematic_nodes = []
+        for node_id, stats in node_stats.items():
+            if stats["total"] > 0:
+                failure_rate = stats["failed"] / stats["total"]
+                if failure_rate > 0.1:  # More than 10% failure rate
+                    problematic_nodes.append((node_id, failure_rate))
+
+        problematic_nodes.sort(key=lambda x: x[1], reverse=True)
+
+        # Decision patterns
+        decision_patterns = self._analyze_decision_patterns(runs)
+
+        return PatternAnalysis(
+            goal_id=goal_id,
+            run_count=len(runs),
+            success_rate=success_rate,
+            common_failures=common_failures,
+            problematic_nodes=problematic_nodes,
+            decision_patterns=decision_patterns,
+        )
+
+    def compare_runs(self, run_id_1: str, run_id_2: str) -> dict[str, Any]:
+        """Compare two runs to understand what differed."""
+        run1 = self.storage.load_run(run_id_1)
+        run2 = self.storage.load_run(run_id_2)
+
+        if run1 is None or run2 is None:
+            return {"error": "One or both runs not found"}
+
+        return {
+            "run_1": {
+                "id": run1.id,
+                "status": run1.status.value,
+                "decisions": len(run1.decisions),
+                "success_rate": run1.metrics.success_rate,
+            },
+            "run_2": {
+                "id": run2.id,
+                "status": run2.status.value,
+                "decisions": len(run2.decisions),
+                "success_rate": run2.metrics.success_rate,
+            },
+            "differences": self._find_differences(run1, run2),
+        }
+
+    # === WHAT SHOULD WE CHANGE? ===
+
+    def suggest_improvements(self, goal_id: str) -> list[dict[str, Any]]:
+        """
+        Generate improvement suggestions based on run analysis.
+
+        This is what I use to propose changes to the human engineer.
+        """
+        patterns = self.find_patterns(goal_id)
+        if patterns is None:
+            return []
+
+        suggestions = []
+
+        # Suggestion: Fix problematic nodes
+        for node_id, failure_rate in patterns.problematic_nodes:
+            suggestions.append(
+                {
+                    "type": "node_improvement",
+                    "target": node_id,
+                    "reason": f"Node has {failure_rate:.1%} failure rate",
+                    "recommendation": (
+                        f"Review and improve node '{node_id}' - "
+                        "high failure rate suggests prompt or tool issues"
+                    ),
+                    "priority": "high" if failure_rate > 0.3 else "medium",
+                }
+            )
+
+        # Suggestion: Address common failures
+        for failure, count in patterns.common_failures:
+            if count >= 2:
+                suggestions.append(
+                    {
+                        "type": "error_handling",
+                        "target": failure,
+                        "reason": f"Error occurred {count} times",
+                        "recommendation": f"Add handling for: {failure}",
+                        "priority": "high" if count >= 5 else "medium",
+                    }
+                )
+
+        # Suggestion: Overall success rate
+        if patterns.success_rate < 0.8:
+            suggestions.append(
+                {
+                    "type": "architecture",
+                    "target": goal_id,
+                    "reason": f"Goal success rate is only {patterns.success_rate:.1%}",
+                    "recommendation": (
+                        "Consider restructuring the agent graph or improving goal definition"
+                    ),
+                    "priority": "high",
+                }
+            )
+
+        return suggestions
+
+    def get_node_performance(self, node_id: str) -> dict[str, Any]:
+        """Get performance metrics for a specific node across all runs."""
+        run_ids = self.storage.get_runs_by_node(node_id)
+
+        total_decisions = 0
+        successful_decisions = 0
+        total_latency = 0
+        total_tokens = 0
+        decision_types: dict[str, int] = defaultdict(int)
+
+        for run_id in run_ids:
+            run = self.storage.load_run(run_id)
+            if run:
+                for decision in run.decisions:
+                    if decision.node_id == node_id:
+                        total_decisions += 1
+                        if decision.was_successful:
+                            successful_decisions += 1
+                        if decision.outcome:
+                            total_latency += decision.outcome.latency_ms
+                            total_tokens += decision.outcome.tokens_used
+                        decision_types[decision.decision_type.value] += 1
+
+        return {
+            "node_id": node_id,
+            "total_decisions": total_decisions,
+            "success_rate": successful_decisions / total_decisions if total_decisions > 0 else 0,
+            "avg_latency_ms": total_latency / total_decisions if total_decisions > 0 else 0,
+            "total_tokens": total_tokens,
+            "decision_type_distribution": dict(decision_types),
+        }
+
+    # === PRIVATE HELPERS ===
+
+    def _generate_suggestions(
+        self,
+        run: Run,
+        failed_decisions: list[Decision],
+    ) -> list[str]:
+        """Generate suggestions based on failure analysis."""
+        suggestions = []
+
+        for decision in failed_decisions:
+            # Check if there were alternatives
+            if len(decision.options) > 1:
+                chosen = decision.chosen_option
+                alternatives = [o for o in decision.options if o.id != decision.chosen_option_id]
+                if alternatives:
+                    alt_desc = alternatives[0].description
+                    chosen_desc = chosen.description if chosen else "unknown"
+                    suggestions.append(
+                        f"Consider alternative: '{alt_desc}' instead of '{chosen_desc}'"
+                    )
+
+            # Check for missing context
+            if not decision.input_context:
+                suggestions.append(
+                    f"Decision '{decision.intent}' had no input context - "
+                    "ensure relevant data is passed"
+                )
+
+            # Check for constraint issues
+            if decision.active_constraints:
+                constraints = ", ".join(decision.active_constraints)
+                suggestions.append(f"Review constraints: {constraints} - may be too restrictive")
+
+        # Check for reported problems with suggestions
+        for problem in run.problems:
+            if problem.suggested_fix:
+                suggestions.append(problem.suggested_fix)
+
+        return suggestions
+
+    def _analyze_decision_patterns(self, runs: list[Run]) -> dict[str, Any]:
+        """Analyze decision patterns across runs."""
+        type_counts: dict[str, int] = defaultdict(int)
+        option_counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+
+        for run in runs:
+            for decision in run.decisions:
+                type_counts[decision.decision_type.value] += 1
+
+                # Track which options are chosen for similar intents
+                intent_key = decision.intent[:50]  # Truncate for grouping
+                if decision.chosen_option:
+                    option_counts[intent_key][decision.chosen_option.description] += 1
+
+        # Find most common choices per intent
+        common_choices = {}
+        for intent, choices in option_counts.items():
+            if choices:
+                most_common = max(choices.items(), key=lambda x: x[1])
+                common_choices[intent] = {
+                    "choice": most_common[0],
+                    "count": most_common[1],
+                    "alternatives": len(choices) - 1,
+                }
+
+        return {
+            "decision_type_distribution": dict(type_counts),
+            "common_choices": common_choices,
+        }
+
+    def _find_differences(self, run1: Run, run2: Run) -> list[str]:
+        """Find key differences between two runs."""
+        differences = []
+
+        # Status difference
+        if run1.status != run2.status:
+            differences.append(f"Status: {run1.status.value} vs {run2.status.value}")
+
+        # Decision count difference
+        if len(run1.decisions) != len(run2.decisions):
+            differences.append(f"Decision count: {len(run1.decisions)} vs {len(run2.decisions)}")
+
+        # Find first divergence point
+        for i, (d1, d2) in enumerate(zip(run1.decisions, run2.decisions, strict=False)):
+            if d1.chosen_option_id != d2.chosen_option_id:
+                differences.append(
+                    f"Diverged at decision {i}: "
+                    f"chose '{d1.chosen_option_id}' vs '{d2.chosen_option_id}'"
+                )
+                break
+
+        # Node differences
+        nodes1 = set(run1.metrics.nodes_executed)
+        nodes2 = set(run2.metrics.nodes_executed)
+        if nodes1 != nodes2:
+            only_1 = nodes1 - nodes2
+            only_2 = nodes2 - nodes1
+            if only_1:
+                differences.append(f"Nodes only in run 1: {only_1}")
+            if only_2:
+                differences.append(f"Nodes only in run 2: {only_2}")
+
+        return differences
@@ -0,0 +1,807 @@
+"""
+GraphBuilder Workflow - Enforced incremental building with HITL approval.
+
+The build process:
+1. Define Goal → APPROVE
+2. Add Node → VALIDATE → TEST → APPROVE
+3. Add Edge → VALIDATE → TEST → APPROVE
+4. Repeat until graph is complete
+5. Final integration test → APPROVE
+6. Export
+
+Each step requires validation and human approval before proceeding.
+You cannot skip steps or bypass validation.
+"""
+
+from collections.abc import Callable
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.goal import Goal
+from framework.graph.node import NodeSpec
+
+
+class BuildPhase(str, Enum):
+    """Current phase of the build process."""
+
+    INIT = "init"  # Just started
+    GOAL_DRAFT = "goal_draft"  # Drafting goal
+    GOAL_APPROVED = "goal_approved"  # Goal approved
+    ADDING_NODES = "adding_nodes"  # Adding nodes
+    ADDING_EDGES = "adding_edges"  # Adding edges
+    TESTING = "testing"  # Running tests
+    APPROVED = "approved"  # Fully approved
+    EXPORTED = "exported"  # Exported to file
+
+
+class ValidationResult(BaseModel):
+    """Result of a validation check."""
+
+    valid: bool
+    errors: list[str] = Field(default_factory=list)
+    warnings: list[str] = Field(default_factory=list)
+    suggestions: list[str] = Field(default_factory=list)
+
+
+class TestCase(BaseModel):
+    """A test case for validating agent behavior."""
+
+    id: str
+    description: str
+    input: dict[str, Any]
+    expected_output: Any = None  # None means just check it doesn't error
+    expected_contains: str | None = None
+
+
+class TestResult(BaseModel):
+    """Result of running a test case."""
+
+    test_id: str
+    passed: bool
+    actual_output: Any = None
+    error: str | None = None
+    execution_path: list[str] = Field(default_factory=list)
+
+
+class BuildSession(BaseModel):
+    """
+    Persistent build session state.
+
+    Saved after each approved step so you can resume later.
+    """
+
+    id: str
+    name: str
+    phase: BuildPhase = BuildPhase.INIT
+    created_at: datetime = Field(default_factory=datetime.now)
+    updated_at: datetime = Field(default_factory=datetime.now)
+
+    # The artifacts being built
+    goal: Goal | None = None
+    nodes: list[NodeSpec] = Field(default_factory=list)
+    edges: list[EdgeSpec] = Field(default_factory=list)
+
+    # Test cases
+    test_cases: list[TestCase] = Field(default_factory=list)
+    test_results: list[TestResult] = Field(default_factory=list)
+
+    # Approval history
+    approvals: list[dict[str, Any]] = Field(default_factory=list)
+
+    # Tools (stored as dicts for serialization)
+    tools: list[dict[str, Any]] = Field(default_factory=list)
+
+    model_config = {"extra": "allow"}
+
+
+class GraphBuilder:
+    """
+    Enforced incremental graph building with HITL approval.
+
+    Usage:
+        builder = GraphBuilder("my-agent")
+
+        # Step 1: Define and approve goal
+        builder.set_goal(goal)
+        builder.validate()  # Must pass
+        builder.approve("Goal looks good")  # Human approval required
+
+        # Step 2: Add nodes one by one
+        builder.add_node(node_spec)
+        builder.validate()  # Must pass
+        builder.test(test_case)  # Must pass
+        builder.approve("Node works")
+
+        # Step 3: Add edges
+        builder.add_edge(edge_spec)
+        builder.validate()
+        builder.approve("Edge correct")
+
+        # Step 4: Final approval
+        builder.run_all_tests()
+        builder.final_approve("Ready for production")
+
+        # Step 5: Export
+        graph = builder.export()
+    """
+
+    def __init__(
+        self,
+        name: str,
+        storage_path: Path | str | None = None,
+        session_id: str | None = None,
+    ):
+        self.storage_path = Path(storage_path) if storage_path else Path.home() / ".core" / "builds"
+        self.storage_path.mkdir(parents=True, exist_ok=True)
+
+        if session_id:
+            self.session = self._load_session(session_id)
+        else:
+            self.session = BuildSession(
+                id=f"build_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+                name=name,
+            )
+
+        self._pending_validation: ValidationResult | None = None
+
+    # =========================================================================
+    # PHASE 1: GOAL
+    # =========================================================================
+
+    def set_goal(self, goal: Goal) -> ValidationResult:
+        """
+        Set the goal for this agent.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.INIT, BuildPhase.GOAL_DRAFT])
+
+        self.session.goal = goal
+        self.session.phase = BuildPhase.GOAL_DRAFT
+
+        validation = self._validate_goal(goal)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_goal(self, goal: Goal) -> ValidationResult:
+        """Validate a goal definition."""
+        errors = []
+        warnings = []
+        suggestions = []
+
+        if not goal.id:
+            errors.append("Goal must have an id")
+        if not goal.name:
+            errors.append("Goal must have a name")
+        if not goal.description:
+            errors.append("Goal must have a description")
+
+        if not goal.success_criteria:
+            errors.append("Goal must have at least one success criterion")
+        else:
+            for sc in goal.success_criteria:
+                if not sc.description:
+                    errors.append(f"Success criterion '{sc.id}' needs a description")
+
+        if not goal.constraints:
+            warnings.append("Consider adding constraints to define boundaries")
+
+        if not goal.required_capabilities:
+            suggestions.append("Specify required_capabilities (e.g., ['llm', 'tools'])")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+            suggestions=suggestions,
+        )
+
+    # =========================================================================
+    # PHASE 2: NODES
+    # =========================================================================
+
+    def add_node(self, node: NodeSpec) -> ValidationResult:
+        """
+        Add a node to the graph.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.GOAL_APPROVED, BuildPhase.ADDING_NODES])
+
+        # Check for duplicate
+        if any(n.id == node.id for n in self.session.nodes):
+            return ValidationResult(
+                valid=False,
+                errors=[f"Node with id '{node.id}' already exists"],
+            )
+
+        self.session.nodes.append(node)
+        self.session.phase = BuildPhase.ADDING_NODES
+
+        validation = self._validate_node(node)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_node(self, node: NodeSpec) -> ValidationResult:
+        """Validate a node definition."""
+        errors = []
+        warnings = []
+        suggestions = []
+
+        if not node.id:
+            errors.append("Node must have an id")
+        if not node.name:
+            errors.append("Node must have a name")
+        if not node.description:
+            warnings.append(f"Node '{node.id}' should have a description")
+
+        # Type-specific validation
+        if node.node_type == "llm_tool_use":
+            if not node.tools:
+                errors.append(f"LLM tool node '{node.id}' must specify tools")
+            if not node.system_prompt:
+                warnings.append(f"LLM node '{node.id}' should have a system_prompt")
+
+        if node.node_type == "router":
+            if not node.routes:
+                errors.append(f"Router node '{node.id}' must specify routes")
+
+        if node.node_type == "function":
+            if not node.function:
+                errors.append(f"Function node '{node.id}' must specify function name")
+
+        # Check input/output keys
+        if not node.input_keys:
+            suggestions.append(f"Consider specifying input_keys for '{node.id}'")
+        if not node.output_keys:
+            suggestions.append(f"Consider specifying output_keys for '{node.id}'")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+            suggestions=suggestions,
+        )
+
+    def update_node(self, node_id: str, **updates) -> ValidationResult:
+        """Update an existing node."""
+        self._require_phase([BuildPhase.ADDING_NODES])
+
+        for i, node in enumerate(self.session.nodes):
+            if node.id == node_id:
+                node_dict = node.model_dump()
+                node_dict.update(updates)
+                updated_node = NodeSpec(**node_dict)
+                self.session.nodes[i] = updated_node
+
+                validation = self._validate_node(updated_node)
+                self._pending_validation = validation
+                self._save_session()
+                return validation
+
+        return ValidationResult(valid=False, errors=[f"Node '{node_id}' not found"])
+
+    def remove_node(self, node_id: str) -> ValidationResult:
+        """Remove a node (only if no edges reference it)."""
+        self._require_phase([BuildPhase.ADDING_NODES])
+
+        # Check for edge references
+        for edge in self.session.edges:
+            if edge.source == node_id or edge.target == node_id:
+                return ValidationResult(
+                    valid=False,
+                    errors=[f"Cannot remove node '{node_id}': referenced by edge '{edge.id}'"],
+                )
+
+        self.session.nodes = [n for n in self.session.nodes if n.id != node_id]
+        self._save_session()
+
+        return ValidationResult(valid=True)
+
+    # =========================================================================
+    # PHASE 3: EDGES
+    # =========================================================================
+
+    def add_edge(self, edge: EdgeSpec) -> ValidationResult:
+        """
+        Add an edge to the graph.
+
+        Returns validation result. Must call approve() after validation passes.
+        """
+        self._require_phase([BuildPhase.ADDING_NODES, BuildPhase.ADDING_EDGES])
+
+        # Check for duplicate
+        if any(e.id == edge.id for e in self.session.edges):
+            return ValidationResult(
+                valid=False,
+                errors=[f"Edge with id '{edge.id}' already exists"],
+            )
+
+        self.session.edges.append(edge)
+        self.session.phase = BuildPhase.ADDING_EDGES
+
+        validation = self._validate_edge(edge)
+        self._pending_validation = validation
+        self._save_session()
+
+        return validation
+
+    def _validate_edge(self, edge: EdgeSpec) -> ValidationResult:
+        """Validate an edge definition."""
+        errors = []
+        warnings = []
+
+        if not edge.id:
+            errors.append("Edge must have an id")
+
+        # Check source exists
+        if not any(n.id == edge.source for n in self.session.nodes):
+            errors.append(f"Edge source '{edge.source}' not found in nodes")
+
+        # Check target exists
+        if not any(n.id == edge.target for n in self.session.nodes):
+            errors.append(f"Edge target '{edge.target}' not found in nodes")
+
+        # Warn about conditional edges without expressions
+        if edge.condition == EdgeCondition.CONDITIONAL and not edge.condition_expr:
+            warnings.append(f"Conditional edge '{edge.id}' has no condition_expr")
+
+        return ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+        )
+
+    # =========================================================================
+    # VALIDATION & TESTING
+    # =========================================================================
+
+    def validate(self) -> ValidationResult:
+        """Validate the entire current graph state."""
+        errors = []
+        warnings = []
+
+        # Must have a goal
+        if not self.session.goal:
+            errors.append("No goal defined")
+            return ValidationResult(valid=False, errors=errors)
+
+        # Must have at least one node
+        if not self.session.nodes:
+            errors.append("No nodes defined")
+
+        # Check for entry node
+        entry_candidates = []
+        for node in self.session.nodes:
+            # A node is an entry candidate if no edges point to it
+            if not any(e.target == node.id for e in self.session.edges):
+                entry_candidates.append(node.id)
+
+        if len(entry_candidates) == 0 and self.session.nodes:
+            errors.append("No entry node found (all nodes have incoming edges)")
+        elif len(entry_candidates) > 1:
+            warnings.append(f"Multiple entry candidates: {entry_candidates}. Specify one.")
+
+        # Check for terminal nodes
+        terminal_candidates = []
+        for node in self.session.nodes:
+            if not any(e.source == node.id for e in self.session.edges):
+                terminal_candidates.append(node.id)
+
+        if not terminal_candidates and self.session.nodes:
+            warnings.append("No terminal nodes found (all nodes have outgoing edges)")
+
+        # Check reachability
+        if entry_candidates and self.session.nodes:
+            reachable = self._compute_reachable(entry_candidates[0])
+            unreachable = [n.id for n in self.session.nodes if n.id not in reachable]
+            if unreachable:
+                errors.append(f"Unreachable nodes: {unreachable}")
+
+        validation = ValidationResult(
+            valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings,
+        )
+        self._pending_validation = validation
+        return validation
+
+    def _compute_reachable(self, start: str) -> set[str]:
+        """Compute all nodes reachable from start."""
+        reachable = set()
+        to_visit = [start]
+
+        while to_visit:
+            current = to_visit.pop()
+            if current in reachable:
+                continue
+            reachable.add(current)
+
+            for edge in self.session.edges:
+                if edge.source == current:
+                    to_visit.append(edge.target)
+
+            # Also follow router routes
+            for node in self.session.nodes:
+                if node.id == current and node.routes:
+                    for target in node.routes.values():
+                        to_visit.append(target)
+
+        return reachable
+
+    def add_test(self, test: TestCase) -> None:
+        """Add a test case."""
+        self.session.test_cases.append(test)
+        self._save_session()
+
+    def run_test(
+        self,
+        test: TestCase,
+        executor_factory: Callable,
+    ) -> TestResult:
+        """
+        Run a single test case.
+
+        executor_factory should return a configured GraphExecutor.
+        """
+        self._require_phase([BuildPhase.ADDING_NODES, BuildPhase.ADDING_EDGES, BuildPhase.TESTING])
+        self.session.phase = BuildPhase.TESTING
+
+        try:
+            # Build temporary graph for testing
+            graph = self._build_graph()
+            executor = executor_factory()
+
+            # Run the test
+            import asyncio
+
+            result = asyncio.run(
+                executor.execute(
+                    graph=graph,
+                    goal=self.session.goal,
+                    input_data=test.input,
+                )
+            )
+
+            # Check result
+            passed = result.success
+            if test.expected_output is not None:
+                passed = passed and (result.output.get("result") == test.expected_output)
+            if test.expected_contains:
+                output_str = str(result.output)
+                passed = passed and (test.expected_contains in output_str)
+
+            test_result = TestResult(
+                test_id=test.id,
+                passed=passed,
+                actual_output=result.output,
+                execution_path=result.path,
+            )
+
+        except Exception as e:
+            test_result = TestResult(
+                test_id=test.id,
+                passed=False,
+                error=str(e),
+            )
+
+        self.session.test_results.append(test_result)
+        self._save_session()
+
+        return test_result
+
+    def run_all_tests(self, executor_factory: Callable) -> list[TestResult]:
+        """Run all test cases."""
+        results = []
+        for test in self.session.test_cases:
+            result = self.run_test(test, executor_factory)
+            results.append(result)
+        return results
+
+    # =========================================================================
+    # APPROVAL
+    # =========================================================================
+
+    def approve(self, comment: str) -> bool:
+        """
+        Approve the current pending change.
+
+        Must have a passing validation to approve.
+        Returns True if approved, False if validation failed.
+        """
+        if self._pending_validation is None:
+            raise RuntimeError("Nothing to approve. Run validation first.")
+
+        if not self._pending_validation.valid:
+            return False
+
+        self.session.approvals.append(
+            {
+                "phase": self.session.phase.value,
+                "comment": comment,
+                "timestamp": datetime.now().isoformat(),
+                "validation": self._pending_validation.model_dump(),
+            }
+        )
+
+        # Advance phase if appropriate
+        if self.session.phase == BuildPhase.GOAL_DRAFT:
+            self.session.phase = BuildPhase.GOAL_APPROVED
+
+        self._pending_validation = None
+        self._save_session()
+
+        return True
+
+    def final_approve(self, comment: str) -> bool:
+        """
+        Final approval for the complete graph.
+
+        Requires all tests to pass.
+        """
+        # Run final validation
+        validation = self.validate()
+        if not validation.valid:
+            self._pending_validation = validation
+            return False
+
+        # Check test results
+        if self.session.test_cases:
+            failed_tests = [t for t in self.session.test_results if not t.passed]
+            if failed_tests:
+                self._pending_validation = ValidationResult(
+                    valid=False,
+                    errors=[f"Failed tests: {[t.test_id for t in failed_tests]}"],
+                )
+                return False
+
+        self.session.phase = BuildPhase.APPROVED
+        self.session.approvals.append(
+            {
+                "phase": "final",
+                "comment": comment,
+                "timestamp": datetime.now().isoformat(),
+            }
+        )
+
+        self._save_session()
+        return True
+
+    # =========================================================================
+    # EXPORT
+    # =========================================================================
+
+    def export(self) -> GraphSpec:
+        """
+        Export the approved graph.
+
+        Requires final approval.
+        """
+        self._require_phase([BuildPhase.APPROVED])
+
+        graph = self._build_graph()
+
+        self.session.phase = BuildPhase.EXPORTED
+        self._save_session()
+
+        return graph
+
+    def _build_graph(self) -> GraphSpec:
+        """Build a GraphSpec from current session."""
+        # Determine entry node
+        entry_node = None
+        for node in self.session.nodes:
+            if not any(e.target == node.id for e in self.session.edges):
+                entry_node = node.id
+                break
+
+        # Determine terminal nodes
+        terminal_nodes = []
+        for node in self.session.nodes:
+            if not any(e.source == node.id for e in self.session.edges):
+                terminal_nodes.append(node.id)
+
+        # Collect all memory keys
+        memory_keys = set()
+        for node in self.session.nodes:
+            memory_keys.update(node.input_keys)
+            memory_keys.update(node.output_keys)
+
+        return GraphSpec(
+            id=f"{self.session.name}-graph",
+            goal_id=self.session.goal.id if self.session.goal else "",
+            entry_node=entry_node or "",
+            terminal_nodes=terminal_nodes,
+            nodes=self.session.nodes,
+            edges=self.session.edges,
+            memory_keys=list(memory_keys),
+        )
+
+    def export_to_file(self, path: Path | str) -> None:
+        """Export the graph to a Python file."""
+        self._require_phase([BuildPhase.APPROVED, BuildPhase.EXPORTED])
+
+        graph = self._build_graph()
+
+        # Generate Python code
+        code = self._generate_code(graph)
+
+        Path(path).write_text(code)
+        self.session.phase = BuildPhase.EXPORTED
+        self._save_session()
+
+    def _generate_code(self, graph: GraphSpec) -> str:
+        """Generate Python code for the graph."""
+        lines = [
+            '"""',
+            f"Generated agent: {self.session.name}",
+            f"Generated at: {datetime.now().isoformat()}",
+            '"""',
+            "",
+            "from framework.graph import (",
+            "    Goal, SuccessCriterion, Constraint,",
+            "    NodeSpec, EdgeSpec, EdgeCondition,",
+            ")",
+            "from framework.graph.edge import GraphSpec",
+            "from framework.graph.goal import GoalStatus",
+            "",
+            "",
+            "# Goal",
+        ]
+
+        if self.session.goal:
+            goal_json = self.session.goal.model_dump_json(indent=4)
+            lines.append("GOAL = Goal.model_validate_json('''")
+            lines.append(goal_json)
+            lines.append("''')")
+        else:
+            lines.append("GOAL = None")
+
+        lines.extend(
+            [
+                "",
+                "",
+                "# Nodes",
+                "NODES = [",
+            ]
+        )
+
+        for node in self.session.nodes:
+            node_json = node.model_dump_json(indent=4)
+            lines.append("    NodeSpec.model_validate_json('''")
+            lines.append(node_json)
+            lines.append("    '''),")
+
+        lines.extend(
+            [
+                "]",
+                "",
+                "",
+                "# Edges",
+                "EDGES = [",
+            ]
+        )
+
+        for edge in self.session.edges:
+            edge_json = edge.model_dump_json(indent=4)
+            lines.append("    EdgeSpec.model_validate_json('''")
+            lines.append(edge_json)
+            lines.append("    '''),")
+
+        lines.extend(
+            [
+                "]",
+                "",
+                "",
+                "# Graph",
+            ]
+        )
+
+        graph_json = graph.model_dump_json(indent=4)
+        lines.append("GRAPH = GraphSpec.model_validate_json('''")
+        lines.append(graph_json)
+        lines.append("''')")
+
+        return "\n".join(lines)
+
+    # =========================================================================
+    # SESSION MANAGEMENT
+    # =========================================================================
+
+    def _require_phase(self, allowed: list[BuildPhase]) -> None:
+        """Ensure we're in an allowed phase."""
+        if self.session.phase not in allowed:
+            raise RuntimeError(
+                f"Cannot perform this action in phase '{self.session.phase.value}'. "
+                f"Allowed phases: {[p.value for p in allowed]}"
+            )
+
+    def _save_session(self) -> None:
+        """Save session to disk."""
+        self.session.updated_at = datetime.now()
+        path = self.storage_path / f"{self.session.id}.json"
+        path.write_text(self.session.model_dump_json(indent=2))
+
+    def _load_session(self, session_id: str) -> BuildSession:
+        """Load session from disk."""
+        path = self.storage_path / f"{session_id}.json"
+        if not path.exists():
+            raise FileNotFoundError(f"Session not found: {session_id}")
+        return BuildSession.model_validate_json(path.read_text())
+
+    @classmethod
+    def list_sessions(cls, storage_path: Path | str | None = None) -> list[str]:
+        """List all saved sessions."""
+        path = Path(storage_path) if storage_path else Path.home() / ".core" / "builds"
+        if not path.exists():
+            return []
+        return [f.stem for f in path.glob("*.json")]
+
+    # =========================================================================
+    # STATUS
+    # =========================================================================
+
+    def status(self) -> dict[str, Any]:
+        """Get current build status."""
+        return {
+            "session_id": self.session.id,
+            "name": self.session.name,
+            "phase": self.session.phase.value,
+            "goal": self.session.goal.name if self.session.goal else None,
+            "nodes": len(self.session.nodes),
+            "edges": len(self.session.edges),
+            "tests": len(self.session.test_cases),
+            "tests_passed": sum(1 for t in self.session.test_results if t.passed),
+            "approvals": len(self.session.approvals),
+            "pending_validation": self._pending_validation.model_dump()
+            if self._pending_validation
+            else None,
+        }
+
+    def show(self) -> str:
+        """Show current graph as text."""
+        lines = [
+            f"=== Build: {self.session.name} ===",
+            f"Phase: {self.session.phase.value}",
+            "",
+        ]
+
+        if self.session.goal:
+            lines.extend(
+                [
+                    f"Goal: {self.session.goal.name}",
+                    f"  {self.session.goal.description}",
+                    "",
+                ]
+            )
+
+        if self.session.nodes:
+            lines.append("Nodes:")
+            for node in self.session.nodes:
+                lines.append(f"  [{node.id}] {node.name} ({node.node_type})")
+            lines.append("")
+
+        if self.session.edges:
+            lines.append("Edges:")
+            for edge in self.session.edges:
+                lines.append(f"  {edge.source} --{edge.condition.value}--> {edge.target}")
+            lines.append("")
+
+        if self._pending_validation:
+            lines.append("Pending Validation:")
+            lines.append(f"  Valid: {self._pending_validation.valid}")
+            for err in self._pending_validation.errors:
+                lines.append(f"  ERROR: {err}")
+            for warn in self._pending_validation.warnings:
+                lines.append(f"  WARN: {warn}")
+
+        return "\n".join(lines)
@@ -0,0 +1,85 @@
+"""
+Command-line interface for Aden Hive.
+
+Usage:
+    hive run exports/my-agent --input '{"key": "value"}'
+    hive info exports/my-agent
+    hive validate exports/my-agent
+    hive list exports/
+    hive dispatch exports/ --input '{"key": "value"}'
+    hive shell exports/my-agent
+
+Testing commands:
+    hive test-run <agent_path> --goal <goal_id>
+    hive test-debug <goal_id> <test_id>
+    hive test-list <goal_id>
+    hive test-stats <goal_id>
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+
+def _configure_paths():
+    """Auto-configure sys.path so agents in exports/ are discoverable.
+
+    Resolves the project root by walking up from this file (framework/cli.py lives
+    inside core/framework/) or from CWD, then adds the exports/ directory to sys.path
+    if it exists. This eliminates the need for manual PYTHONPATH configuration.
+    """
+    # Strategy 1: resolve relative to this file (works when installed via pip install -e core/)
+    framework_dir = Path(__file__).resolve().parent  # core/framework/
+    core_dir = framework_dir.parent  # core/
+    project_root = core_dir.parent  # project root
+
+    # Strategy 2: if project_root doesn't look right, fall back to CWD
+    if not (project_root / "exports").is_dir() and not (project_root / "core").is_dir():
+        project_root = Path.cwd()
+
+    # Add exports/ to sys.path so agents are importable as top-level packages
+    exports_dir = project_root / "exports"
+    if exports_dir.is_dir():
+        exports_str = str(exports_dir)
+        if exports_str not in sys.path:
+            sys.path.insert(0, exports_str)
+
+    # Ensure core/ is also in sys.path (for non-editable-install scenarios)
+    core_str = str(project_root / "core")
+    if (project_root / "core").is_dir() and core_str not in sys.path:
+        sys.path.insert(0, core_str)
+
+
+def main():
+    _configure_paths()
+
+    parser = argparse.ArgumentParser(
+        prog="hive",
+        description="Aden Hive - Build and run goal-driven agents",
+    )
+    parser.add_argument(
+        "--model",
+        default="claude-haiku-4-5-20251001",
+        help="Anthropic model to use",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Register runner commands (run, info, validate, list, dispatch, shell)
+    from framework.runner.cli import register_commands
+
+    register_commands(subparsers)
+
+    # Register testing commands (test-run, test-debug, test-list, test-stats)
+    from framework.testing.cli import register_testing_commands
+
+    register_testing_commands(subparsers)
+
+    args = parser.parse_args()
+
+    if hasattr(args, "func"):
+        sys.exit(args.func(args))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,122 @@
+"""
+Credential Store - Production-ready credential management for Hive.
+
+This module provides secure credential storage with:
+- Key-vault structure: Credentials as objects with multiple keys
+- Template-based usage: {{cred.key}} patterns for injection
+- Bipartisan model: Store stores values, tools define usage
+- Provider system: Extensible lifecycle management (refresh, validate)
+- Multiple backends: Encrypted files, env vars, HashiCorp Vault
+
+Quick Start:
+    from core.framework.credentials import CredentialStore, CredentialObject
+
+    # Create store with encrypted storage
+    store = CredentialStore.with_encrypted_storage()  # defaults to ~/.hive/credentials
+
+    # Get a credential
+    api_key = store.get("brave_search")
+
+    # Resolve templates in headers
+    headers = store.resolve_headers({
+        "Authorization": "Bearer {{github_oauth.access_token}}"
+    })
+
+    # Save a new credential
+    store.save_credential(CredentialObject(
+        id="my_api",
+        keys={"api_key": CredentialKey(name="api_key", value=SecretStr("xxx"))}
+    ))
+
+For OAuth2 support:
+    from core.framework.credentials.oauth2 import BaseOAuth2Provider, OAuth2Config
+
+For Aden server sync:
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
+For Vault integration:
+    from core.framework.credentials.vault import HashiCorpVaultStorage
+"""
+
+from .models import (
+    CredentialDecryptionError,
+    CredentialError,
+    CredentialKey,
+    CredentialKeyNotFoundError,
+    CredentialNotFoundError,
+    CredentialObject,
+    CredentialRefreshError,
+    CredentialType,
+    CredentialUsageSpec,
+    CredentialValidationError,
+)
+from .provider import (
+    BearerTokenProvider,
+    CredentialProvider,
+    StaticProvider,
+)
+from .storage import (
+    CompositeStorage,
+    CredentialStorage,
+    EncryptedFileStorage,
+    EnvVarStorage,
+    InMemoryStorage,
+)
+from .store import CredentialStore
+from .template import TemplateResolver
+
+# Aden sync components (lazy import to avoid httpx dependency when not needed)
+# Usage: from core.framework.credentials.aden import AdenSyncProvider
+# Or: from core.framework.credentials import AdenSyncProvider
+try:
+    from .aden import (
+        AdenCachedStorage,
+        AdenClientConfig,
+        AdenCredentialClient,
+        AdenSyncProvider,
+    )
+
+    _ADEN_AVAILABLE = True
+except ImportError:
+    _ADEN_AVAILABLE = False
+
+__all__ = [
+    # Main store
+    "CredentialStore",
+    # Models
+    "CredentialObject",
+    "CredentialKey",
+    "CredentialType",
+    "CredentialUsageSpec",
+    # Providers
+    "CredentialProvider",
+    "StaticProvider",
+    "BearerTokenProvider",
+    # Storage backends
+    "CredentialStorage",
+    "EncryptedFileStorage",
+    "EnvVarStorage",
+    "InMemoryStorage",
+    "CompositeStorage",
+    # Template resolution
+    "TemplateResolver",
+    # Exceptions
+    "CredentialError",
+    "CredentialNotFoundError",
+    "CredentialKeyNotFoundError",
+    "CredentialRefreshError",
+    "CredentialValidationError",
+    "CredentialDecryptionError",
+    # Aden sync (optional - requires httpx)
+    "AdenSyncProvider",
+    "AdenCredentialClient",
+    "AdenClientConfig",
+    "AdenCachedStorage",
+]
+
+# Track Aden availability for runtime checks
+ADEN_AVAILABLE = _ADEN_AVAILABLE
@@ -0,0 +1,76 @@
+"""
+Aden Credential Sync.
+
+Components for synchronizing credentials with the Aden authentication server.
+
+The Aden server handles OAuth2 authorization flows and maintains refresh tokens.
+These components fetch and cache access tokens locally while delegating
+lifecycle management to Aden.
+
+Components:
+- AdenCredentialClient: HTTP client for Aden API
+- AdenSyncProvider: CredentialProvider that syncs with Aden
+- AdenCachedStorage: Storage with local cache + Aden fallback
+
+Quick Start:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
+    # Configure (API key loaded from ADEN_API_KEY env var)
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+    ))
+
+    provider = AdenSyncProvider(client=client)
+
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Initial sync
+    provider.sync_all(store)
+
+    # Use normally
+    token = store.get_key("hubspot", "access_token")
+
+See docs/aden-credential-sync.md for detailed documentation.
+"""
+
+from .client import (
+    AdenAuthenticationError,
+    AdenClientConfig,
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenIntegrationInfo,
+    AdenNotFoundError,
+    AdenRateLimitError,
+    AdenRefreshError,
+)
+from .provider import AdenSyncProvider
+from .storage import AdenCachedStorage
+
+__all__ = [
+    # Client
+    "AdenCredentialClient",
+    "AdenClientConfig",
+    "AdenCredentialResponse",
+    "AdenIntegrationInfo",
+    # Client errors
+    "AdenClientError",
+    "AdenAuthenticationError",
+    "AdenNotFoundError",
+    "AdenRateLimitError",
+    "AdenRefreshError",
+    # Provider
+    "AdenSyncProvider",
+    # Storage
+    "AdenCachedStorage",
+]
@@ -0,0 +1,466 @@
+"""
+Aden Credential Client.
+
+HTTP client for communicating with the Aden authentication server.
+The Aden server handles OAuth2 authorization flows and token management.
+This client fetches tokens and delegates refresh operations to Aden.
+
+Usage:
+    # API key loaded from ADEN_API_KEY environment variable by default
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url="https://api.adenhq.com",
+    ))
+
+    # Or explicitly provide the API key
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url="https://api.adenhq.com",
+        api_key="your-api-key",
+    ))
+
+    # Fetch a credential
+    response = client.get_credential("hubspot")
+    if response:
+        print(f"Token expires at: {response.expires_at}")
+
+    # Request a refresh
+    refreshed = client.request_refresh("hubspot")
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class AdenClientError(Exception):
+    """Base exception for Aden client errors."""
+
+    pass
+
+
+class AdenAuthenticationError(AdenClientError):
+    """Raised when API key is invalid or revoked."""
+
+    pass
+
+
+class AdenNotFoundError(AdenClientError):
+    """Raised when integration is not found."""
+
+    pass
+
+
+class AdenRefreshError(AdenClientError):
+    """Raised when token refresh fails."""
+
+    def __init__(
+        self,
+        message: str,
+        requires_reauthorization: bool = False,
+        reauthorization_url: str | None = None,
+    ):
+        super().__init__(message)
+        self.requires_reauthorization = requires_reauthorization
+        self.reauthorization_url = reauthorization_url
+
+
+class AdenRateLimitError(AdenClientError):
+    """Raised when rate limited."""
+
+    def __init__(self, message: str, retry_after: int = 60):
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+@dataclass
+class AdenClientConfig:
+    """Configuration for Aden API client."""
+
+    base_url: str
+    """Base URL of the Aden server (e.g., 'https://api.adenhq.com')."""
+
+    api_key: str | None = None
+    """Agent's API key for authenticating with Aden.
+    If not provided, loaded from ADEN_API_KEY environment variable."""
+
+    tenant_id: str | None = None
+    """Optional tenant ID for multi-tenant deployments."""
+
+    timeout: float = 30.0
+    """Request timeout in seconds."""
+
+    retry_attempts: int = 3
+    """Number of retry attempts for transient failures."""
+
+    retry_delay: float = 1.0
+    """Base delay between retries in seconds (exponential backoff)."""
+
+    def __post_init__(self) -> None:
+        """Load API key from environment if not provided."""
+        if self.api_key is None:
+            self.api_key = os.environ.get("ADEN_API_KEY")
+            if not self.api_key:
+                raise ValueError(
+                    "Aden API key not provided. Either pass api_key to AdenClientConfig "
+                    "or set the ADEN_API_KEY environment variable."
+                )
+
+
+@dataclass
+class AdenCredentialResponse:
+    """Response from Aden server containing credential data."""
+
+    integration_id: str
+    """Unique identifier for the integration (e.g., 'hubspot')."""
+
+    integration_type: str
+    """Type of integration (e.g., 'hubspot', 'github', 'slack')."""
+
+    access_token: str
+    """The access token for API calls."""
+
+    token_type: str = "Bearer"
+    """Token type (usually 'Bearer')."""
+
+    expires_at: datetime | None = None
+    """When the access token expires (UTC)."""
+
+    scopes: list[str] = field(default_factory=list)
+    """OAuth2 scopes granted to this token."""
+
+    metadata: dict[str, Any] = field(default_factory=dict)
+    """Additional integration-specific metadata."""
+
+    @classmethod
+    def from_dict(
+        cls, data: dict[str, Any], integration_id: str | None = None
+    ) -> AdenCredentialResponse:
+        """Create from API response dictionary."""
+        expires_at = None
+        if data.get("expires_at"):
+            expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
+
+        return cls(
+            integration_id=integration_id or data.get("alias", data.get("provider", "")),
+            integration_type=data.get("provider", ""),
+            access_token=data["access_token"],
+            token_type=data.get("token_type", "Bearer"),
+            expires_at=expires_at,
+            scopes=data.get("scopes", []),
+            metadata={"email": data.get("email")} if data.get("email") else {},
+        )
+
+
+@dataclass
+class AdenIntegrationInfo:
+    """Information about an available integration."""
+
+    integration_id: str
+    integration_type: str
+    status: str  # "active", "requires_reauth", "expired"
+    expires_at: datetime | None = None
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> AdenIntegrationInfo:
+        """Create from API response dictionary."""
+        expires_at = None
+        if data.get("expires_at"):
+            expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
+
+        return cls(
+            integration_id=data["integration_id"],
+            integration_type=data.get("provider", data["integration_id"]),
+            status=data.get("status", "unknown"),
+            expires_at=expires_at,
+        )
+
+
+class AdenCredentialClient:
+    """
+    HTTP client for Aden credential server.
+
+    Handles communication with the Aden authentication server,
+    including fetching credentials, requesting refreshes, and
+    reporting usage statistics.
+
+    The client automatically handles:
+    - Retries with exponential backoff for transient failures
+    - Proper error classification (auth, not found, rate limit, etc.)
+    - Request headers for authentication and tenant isolation
+
+    Usage:
+        # API key loaded from ADEN_API_KEY environment variable
+        config = AdenClientConfig(
+            base_url="https://api.adenhq.com",
+        )
+
+        client = AdenCredentialClient(config)
+
+        # Fetch a credential
+        cred = client.get_credential("hubspot")
+        if cred:
+            headers = {"Authorization": f"Bearer {cred.access_token}"}
+
+        # List all integrations
+        integrations = client.list_integrations()
+        for info in integrations:
+            print(f"{info.integration_id}: {info.status}")
+
+        # Clean up
+        client.close()
+    """
+
+    def __init__(self, config: AdenClientConfig):
+        """
+        Initialize the Aden client.
+
+        Args:
+            config: Client configuration including base URL and API key.
+        """
+        self.config = config
+        self._client: httpx.Client | None = None
+
+    def _get_client(self) -> httpx.Client:
+        """Get or create the HTTP client."""
+        if self._client is None:
+            headers = {
+                "Authorization": f"Bearer {self.config.api_key}",
+                "Content-Type": "application/json",
+                "User-Agent": "hive-credential-store/1.0",
+            }
+
+            if self.config.tenant_id:
+                headers["X-Tenant-ID"] = self.config.tenant_id
+
+            self._client = httpx.Client(
+                base_url=self.config.base_url,
+                timeout=self.config.timeout,
+                headers=headers,
+            )
+
+        return self._client
+
+    def _request_with_retry(
+        self,
+        method: str,
+        path: str,
+        **kwargs: Any,
+    ) -> httpx.Response:
+        """Make a request with retry logic."""
+        client = self._get_client()
+        last_error: Exception | None = None
+
+        for attempt in range(self.config.retry_attempts):
+            try:
+                response = client.request(method, path, **kwargs)
+
+                # Handle specific error codes
+                if response.status_code == 401:
+                    raise AdenAuthenticationError("Agent API key is invalid or revoked")
+
+                if response.status_code == 404:
+                    raise AdenNotFoundError(f"Integration not found: {path}")
+
+                if response.status_code == 429:
+                    retry_after = int(response.headers.get("Retry-After", 60))
+                    raise AdenRateLimitError(
+                        "Rate limited by Aden server",
+                        retry_after=retry_after,
+                    )
+
+                if response.status_code == 400:
+                    data = response.json()
+                    if data.get("error") == "refresh_failed":
+                        raise AdenRefreshError(
+                            data.get("message", "Token refresh failed"),
+                            requires_reauthorization=data.get("requires_reauthorization", False),
+                            reauthorization_url=data.get("reauthorization_url"),
+                        )
+
+                # Success or other error
+                response.raise_for_status()
+                return response
+
+            except (httpx.ConnectError, httpx.TimeoutException) as e:
+                last_error = e
+                if attempt < self.config.retry_attempts - 1:
+                    delay = self.config.retry_delay * (2**attempt)
+                    logger.warning(
+                        f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}"
+                    )
+                    time.sleep(delay)
+                else:
+                    raise AdenClientError(f"Failed to connect to Aden server: {e}") from e
+
+            except (
+                AdenAuthenticationError,
+                AdenNotFoundError,
+                AdenRefreshError,
+                AdenRateLimitError,
+            ):
+                # Don't retry these errors
+                raise
+
+        # Should not reach here, but just in case
+        raise AdenClientError(
+            f"Request failed after {self.config.retry_attempts} attempts"
+        ) from last_error
+
+    def get_credential(self, integration_id: str) -> AdenCredentialResponse | None:
+        """
+        Fetch the current credential for an integration.
+
+        The Aden server may refresh the token internally if it's expired
+        before returning it.
+
+        Args:
+            integration_id: The integration identifier (e.g., 'hubspot').
+
+        Returns:
+            Credential response with access token, or None if not found.
+
+        Raises:
+            AdenAuthenticationError: If API key is invalid.
+            AdenClientError: For connection failures.
+        """
+        try:
+            response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}")
+            data = response.json()
+            return AdenCredentialResponse.from_dict(data, integration_id=integration_id)
+        except AdenNotFoundError:
+            return None
+
+    def request_refresh(self, integration_id: str) -> AdenCredentialResponse:
+        """
+        Request the Aden server to refresh the token.
+
+        Use this when the local store detects an expired or near-expiry token.
+        The Aden server handles the actual OAuth2 refresh token flow.
+
+        Args:
+            integration_id: The integration identifier.
+
+        Returns:
+            Credential response with new access token.
+
+        Raises:
+            AdenRefreshError: If refresh fails (may require re-authorization).
+            AdenNotFoundError: If integration not found.
+            AdenAuthenticationError: If API key is invalid.
+            AdenRateLimitError: If rate limited.
+        """
+        response = self._request_with_retry("POST", f"/v1/credentials/{integration_id}/refresh")
+        data = response.json()
+        return AdenCredentialResponse.from_dict(data, integration_id=integration_id)
+
+    def list_integrations(self) -> list[AdenIntegrationInfo]:
+        """
+        List all integrations available for this agent/tenant.
+
+        Returns:
+            List of integration info objects.
+
+        Raises:
+            AdenAuthenticationError: If API key is invalid.
+            AdenClientError: For connection failures.
+        """
+        response = self._request_with_retry("GET", "/v1/credentials")
+        data = response.json()
+        return [AdenIntegrationInfo.from_dict(item) for item in data.get("integrations", [])]
+
+    def validate_token(self, integration_id: str) -> dict[str, Any]:
+        """
+        Check if a token is still valid without fetching it.
+
+        Args:
+            integration_id: The integration identifier.
+
+        Returns:
+            Dict with 'valid' bool and optional 'expires_at', 'reason',
+            'requires_reauthorization', 'reauthorization_url'.
+
+        Raises:
+            AdenNotFoundError: If integration not found.
+            AdenAuthenticationError: If API key is invalid.
+        """
+        response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}/validate")
+        return response.json()
+
+    def report_usage(
+        self,
+        integration_id: str,
+        operation: str,
+        status: str = "success",
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Report credential usage statistics to Aden.
+
+        This is optional and used for analytics/billing.
+
+        Args:
+            integration_id: The integration identifier.
+            operation: Operation name (e.g., 'api_call').
+            status: Operation status ('success', 'error').
+            metadata: Additional operation metadata.
+        """
+        try:
+            self._request_with_retry(
+                "POST",
+                f"/v1/credentials/{integration_id}/usage",
+                json={
+                    "operation": operation,
+                    "status": status,
+                    "timestamp": datetime.utcnow().isoformat() + "Z",
+                    "metadata": metadata or {},
+                },
+            )
+        except Exception as e:
+            # Usage reporting is best-effort, don't fail on errors
+            logger.warning(f"Failed to report usage for '{integration_id}': {e}")
+
+    def health_check(self) -> dict[str, Any]:
+        """
+        Check Aden server health and connectivity.
+
+        Returns:
+            Dict with 'status', 'version', 'timestamp', and optionally 'error'.
+        """
+        try:
+            client = self._get_client()
+            response = client.get("/health")
+            if response.status_code == 200:
+                data = response.json()
+                data["latency_ms"] = response.elapsed.total_seconds() * 1000
+                return data
+            return {
+                "status": "degraded",
+                "error": f"Unexpected status code: {response.status_code}",
+            }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+
+    def close(self) -> None:
+        """Close the HTTP client and release resources."""
+        if self._client:
+            self._client.close()
+            self._client = None
+
+    def __enter__(self) -> AdenCredentialClient:
+        """Context manager entry."""
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        """Context manager exit."""
+        self.close()
@@ -0,0 +1,415 @@
+"""
+Aden Sync Provider.
+
+Provider that synchronizes credentials with the Aden authentication server.
+The Aden server is the authoritative source for OAuth2 tokens - this provider
+fetches and caches tokens locally while delegating refresh operations to Aden.
+
+Usage:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+    )
+
+    # Configure client (API key loaded from ADEN_API_KEY env var)
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+    ))
+
+    # Create provider
+    provider = AdenSyncProvider(client=client)
+
+    # Create store
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Initial sync from Aden
+    provider.sync_all(store)
+
+    # Use normally - auto-refreshes via Aden when needed
+    token = store.get_key("hubspot", "access_token")
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialRefreshError, CredentialType
+from ..provider import CredentialProvider
+from .client import (
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenRefreshError,
+)
+
+if TYPE_CHECKING:
+    from ..store import CredentialStore
+
+logger = logging.getLogger(__name__)
+
+
+class AdenSyncProvider(CredentialProvider):
+    """
+    Provider that synchronizes credentials with the Aden server.
+
+    The Aden server handles OAuth2 authorization flows and maintains
+    refresh tokens. This provider:
+
+    - Fetches access tokens from the Aden server
+    - Delegates token refresh to the Aden server
+    - Caches tokens locally in the credential store
+    - Optionally reports usage statistics back to Aden
+
+    Key benefits:
+    - Client secrets never leave the Aden server
+    - Refresh token security (stored only on Aden)
+    - Centralized audit logging
+    - Multi-tenant support
+
+    Usage:
+        client = AdenCredentialClient(AdenClientConfig(
+            base_url="https://api.adenhq.com",
+            api_key=os.environ["ADEN_API_KEY"],
+        ))
+
+        provider = AdenSyncProvider(client=client)
+
+        store = CredentialStore(
+            storage=EncryptedFileStorage(),
+            providers=[provider],
+            auto_refresh=True,
+        )
+    """
+
+    def __init__(
+        self,
+        client: AdenCredentialClient,
+        provider_id: str = "aden_sync",
+        refresh_buffer_minutes: int = 5,
+        report_usage: bool = False,
+    ):
+        """
+        Initialize the Aden sync provider.
+
+        Args:
+            client: Configured Aden API client.
+            provider_id: Unique identifier for this provider instance.
+                        Useful for multi-tenant scenarios (e.g., 'aden_tenant_123').
+            refresh_buffer_minutes: Minutes before expiry to trigger refresh.
+                                   Default is 5 minutes.
+            report_usage: Whether to report usage statistics to Aden server.
+        """
+        self._client = client
+        self._provider_id = provider_id
+        self._refresh_buffer = timedelta(minutes=refresh_buffer_minutes)
+        self._report_usage = report_usage
+
+    @property
+    def provider_id(self) -> str:
+        """Unique identifier for this provider."""
+        return self._provider_id
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        """Credential types this provider can manage."""
+        return [CredentialType.OAUTH2, CredentialType.BEARER_TOKEN]
+
+    def can_handle(self, credential: CredentialObject) -> bool:
+        """
+        Check if this provider can handle a credential.
+
+        Returns True if:
+        - Credential type is supported (OAUTH2 or BEARER_TOKEN)
+        - Credential's provider_id matches this provider, OR
+        - Credential has '_aden_managed' metadata flag
+        """
+        if credential.credential_type not in self.supported_types:
+            return False
+
+        # Check if credential is explicitly linked to this provider
+        if credential.provider_id == self.provider_id:
+            return True
+
+        # Check for Aden-managed flag in metadata
+        aden_flag = credential.keys.get("_aden_managed")
+        if aden_flag and aden_flag.value.get_secret_value() == "true":
+            return True
+
+        return False
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Refresh credential by requesting new token from Aden server.
+
+        The Aden server handles the actual OAuth2 refresh token flow.
+        This method simply fetches the result.
+
+        Args:
+            credential: The credential to refresh.
+
+        Returns:
+            Updated credential with new access token.
+
+        Raises:
+            CredentialRefreshError: If refresh fails.
+        """
+        try:
+            # Request Aden to refresh the token
+            aden_response = self._client.request_refresh(credential.id)
+
+            # Update credential with new values
+            credential = self._update_credential_from_aden(credential, aden_response)
+
+            logger.info(f"Refreshed credential '{credential.id}' via Aden server")
+
+            # Report usage if enabled
+            if self._report_usage:
+                self._client.report_usage(
+                    integration_id=credential.id,
+                    operation="token_refresh",
+                    status="success",
+                )
+
+            return credential
+
+        except AdenRefreshError as e:
+            logger.error(f"Aden refresh failed for '{credential.id}': {e}")
+
+            if e.requires_reauthorization:
+                raise CredentialRefreshError(
+                    f"Integration '{credential.id}' requires re-authorization. "
+                    f"Visit: {e.reauthorization_url or 'your Aden dashboard'}"
+                ) from e
+
+            raise CredentialRefreshError(
+                f"Failed to refresh credential '{credential.id}': {e}"
+            ) from e
+
+        except AdenClientError as e:
+            logger.error(f"Aden client error for '{credential.id}': {e}")
+
+            # Check if local token is still valid
+            access_key = credential.keys.get("access_token")
+            if access_key and access_key.expires_at:
+                if datetime.now(UTC) < access_key.expires_at:
+                    logger.warning(f"Aden unavailable, using cached token for '{credential.id}'")
+                    return credential
+
+            raise CredentialRefreshError(
+                f"Aden server unavailable and token expired for '{credential.id}'"
+            ) from e
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate credential via Aden server introspection.
+
+        Args:
+            credential: The credential to validate.
+
+        Returns:
+            True if credential is valid.
+        """
+        try:
+            result = self._client.validate_token(credential.id)
+            return result.get("valid", False)
+        except AdenClientError:
+            # Fall back to local validation
+            access_key = credential.keys.get("access_token")
+            if access_key is None:
+                return False
+
+            if access_key.expires_at is None:
+                # No expiration - assume valid
+                return True
+
+            return datetime.now(UTC) < access_key.expires_at
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """
+        Check if credential should be refreshed.
+
+        Returns True if access_token is expired or within the refresh buffer.
+
+        Args:
+            credential: The credential to check.
+
+        Returns:
+            True if credential should be refreshed.
+        """
+        access_key = credential.keys.get("access_token")
+        if access_key is None:
+            return False
+
+        if access_key.expires_at is None:
+            return False
+
+        # Refresh if within buffer of expiration
+        return datetime.now(UTC) >= (access_key.expires_at - self._refresh_buffer)
+
+    def fetch_from_aden(self, integration_id: str) -> CredentialObject | None:
+        """
+        Fetch credential directly from Aden server.
+
+        Use this for initial population or when local cache is missing.
+
+        Args:
+            integration_id: The integration identifier (e.g., 'hubspot').
+
+        Returns:
+            CredentialObject if found, None otherwise.
+
+        Raises:
+            AdenClientError: For connection failures.
+        """
+        aden_response = self._client.get_credential(integration_id)
+        if aden_response is None:
+            return None
+
+        return self._aden_response_to_credential(aden_response)
+
+    def sync_all(self, store: CredentialStore) -> int:
+        """
+        Sync all credentials from Aden server to local store.
+
+        Fetches the list of available integrations from Aden and
+        populates the local credential store with current tokens.
+
+        Args:
+            store: The credential store to populate.
+
+        Returns:
+            Number of credentials synced.
+        """
+        synced = 0
+
+        try:
+            integrations = self._client.list_integrations()
+
+            for info in integrations:
+                if info.status != "active":
+                    logger.warning(
+                        f"Skipping integration '{info.integration_id}': status={info.status}"
+                    )
+                    continue
+
+                try:
+                    cred = self.fetch_from_aden(info.integration_id)
+                    if cred:
+                        store.save_credential(cred)
+                        synced += 1
+                        logger.info(f"Synced credential '{info.integration_id}' from Aden")
+                except Exception as e:
+                    logger.warning(f"Failed to sync '{info.integration_id}': {e}")
+
+        except AdenClientError as e:
+            logger.error(f"Failed to list integrations from Aden: {e}")
+
+        return synced
+
+    def report_credential_usage(
+        self,
+        credential: CredentialObject,
+        operation: str,
+        status: str = "success",
+        metadata: dict | None = None,
+    ) -> None:
+        """
+        Report credential usage to Aden server.
+
+        Args:
+            credential: The credential that was used.
+            operation: Operation name (e.g., 'api_call').
+            status: Operation status ('success', 'error').
+            metadata: Additional metadata.
+        """
+        if self._report_usage:
+            self._client.report_usage(
+                integration_id=credential.id,
+                operation=operation,
+                status=status,
+                metadata=metadata or {},
+            )
+
+    def _update_credential_from_aden(
+        self,
+        credential: CredentialObject,
+        aden_response: AdenCredentialResponse,
+    ) -> CredentialObject:
+        """Update credential object from Aden response."""
+        # Update access token
+        credential.keys["access_token"] = CredentialKey(
+            name="access_token",
+            value=SecretStr(aden_response.access_token),
+            expires_at=aden_response.expires_at,
+        )
+
+        # Update scopes if present
+        if aden_response.scopes:
+            credential.keys["scope"] = CredentialKey(
+                name="scope",
+                value=SecretStr(" ".join(aden_response.scopes)),
+            )
+
+        # Mark as Aden-managed
+        credential.keys["_aden_managed"] = CredentialKey(
+            name="_aden_managed",
+            value=SecretStr("true"),
+        )
+
+        # Store integration type
+        credential.keys["_integration_type"] = CredentialKey(
+            name="_integration_type",
+            value=SecretStr(aden_response.integration_type),
+        )
+
+        # Update timestamps
+        credential.last_refreshed = datetime.now(UTC)
+        credential.provider_id = self.provider_id
+
+        return credential
+
+    def _aden_response_to_credential(
+        self,
+        aden_response: AdenCredentialResponse,
+    ) -> CredentialObject:
+        """Convert Aden response to CredentialObject."""
+        keys: dict[str, CredentialKey] = {
+            "access_token": CredentialKey(
+                name="access_token",
+                value=SecretStr(aden_response.access_token),
+                expires_at=aden_response.expires_at,
+            ),
+            "_aden_managed": CredentialKey(
+                name="_aden_managed",
+                value=SecretStr("true"),
+            ),
+            "_integration_type": CredentialKey(
+                name="_integration_type",
+                value=SecretStr(aden_response.integration_type),
+            ),
+        }
+
+        if aden_response.scopes:
+            keys["scope"] = CredentialKey(
+                name="scope",
+                value=SecretStr(" ".join(aden_response.scopes)),
+            )
+
+        return CredentialObject(
+            id=aden_response.integration_id,
+            credential_type=CredentialType.OAUTH2,
+            keys=keys,
+            provider_id=self.provider_id,
+            auto_refresh=True,
+        )
@@ -0,0 +1,307 @@
+"""
+Aden Cached Storage.
+
+Storage backend that combines local cache with Aden server fallback.
+Provides offline resilience by caching credentials locally while
+keeping them synchronized with the Aden server.
+
+Usage:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.storage import EncryptedFileStorage
+    from core.framework.credentials.aden import (
+        AdenCredentialClient,
+        AdenClientConfig,
+        AdenSyncProvider,
+        AdenCachedStorage,
+    )
+
+    # Configure
+    client = AdenCredentialClient(AdenClientConfig(
+        base_url=os.environ["ADEN_API_URL"],
+        api_key=os.environ["ADEN_API_KEY"],
+    ))
+    provider = AdenSyncProvider(client=client)
+
+    # Create cached storage
+    storage = AdenCachedStorage(
+        local_storage=EncryptedFileStorage(),
+        aden_provider=provider,
+        cache_ttl_seconds=300,  # Re-check Aden every 5 minutes
+    )
+
+    # Create store
+    store = CredentialStore(
+        storage=storage,
+        providers=[provider],
+        auto_refresh=True,
+    )
+
+    # Credentials automatically fetched from Aden on first access
+    # Cached locally for 5 minutes
+    # Falls back to cache if Aden is unreachable
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from ..storage import CredentialStorage
+
+if TYPE_CHECKING:
+    from ..models import CredentialObject
+    from .provider import AdenSyncProvider
+
+logger = logging.getLogger(__name__)
+
+
+class AdenCachedStorage(CredentialStorage):
+    """
+    Storage with local cache and Aden server fallback.
+
+    This storage provides:
+    - **Reads**: Try local cache first, fallback to Aden if stale/missing
+    - **Writes**: Always write to local cache
+    - **Offline resilience**: Uses cached credentials when Aden is unreachable
+
+    The cache TTL determines how long to trust local credentials before
+    checking with the Aden server for updates. This balances:
+    - Performance (fewer network calls)
+    - Freshness (tokens stay current)
+    - Resilience (works during brief outages)
+
+    Usage:
+        storage = AdenCachedStorage(
+            local_storage=EncryptedFileStorage(),
+            aden_provider=provider,
+            cache_ttl_seconds=300,  # 5 minutes
+        )
+
+        store = CredentialStore(
+            storage=storage,
+            providers=[provider],
+        )
+
+        # First access fetches from Aden
+        # Subsequent accesses use cache until TTL expires
+        token = store.get_key("hubspot", "access_token")
+    """
+
+    def __init__(
+        self,
+        local_storage: CredentialStorage,
+        aden_provider: AdenSyncProvider,
+        cache_ttl_seconds: int = 300,
+        prefer_local: bool = True,
+    ):
+        """
+        Initialize Aden-cached storage.
+
+        Args:
+            local_storage: Local storage backend for caching (e.g., EncryptedFileStorage).
+            aden_provider: Provider for fetching from Aden server.
+            cache_ttl_seconds: How long to trust local cache before checking Aden.
+                              Default is 300 seconds (5 minutes).
+            prefer_local: If True, use local cache when available and fresh.
+                         If False, always check Aden first.
+        """
+        self._local = local_storage
+        self._aden_provider = aden_provider
+        self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
+        self._prefer_local = prefer_local
+        self._cache_timestamps: dict[str, datetime] = {}
+
+    def save(self, credential: CredentialObject) -> None:
+        """
+        Save credential to local cache.
+
+        Args:
+            credential: The credential to save.
+        """
+        self._local.save(credential)
+        self._cache_timestamps[credential.id] = datetime.now(UTC)
+        logger.debug(f"Cached credential '{credential.id}'")
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """
+        Load credential from cache, with Aden fallback.
+
+        The loading strategy depends on the `prefer_local` setting:
+
+        If prefer_local=True (default):
+        1. Check if local cache exists and is fresh (within TTL)
+        2. If fresh, return cached credential
+        3. If stale or missing, fetch from Aden
+        4. Update local cache with Aden response
+        5. If Aden fails, fall back to stale cache
+
+        If prefer_local=False:
+        1. Always try to fetch from Aden first
+        2. Update local cache with response
+        3. Fall back to local cache only if Aden fails
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            CredentialObject if found, None otherwise.
+        """
+        local_cred = self._local.load(credential_id)
+
+        # If we prefer local and have a fresh cache, use it
+        if self._prefer_local and local_cred and self._is_cache_fresh(credential_id):
+            logger.debug(f"Using cached credential '{credential_id}'")
+            return local_cred
+
+        # Try to fetch from Aden
+        try:
+            aden_cred = self._aden_provider.fetch_from_aden(credential_id)
+            if aden_cred:
+                # Update local cache
+                self.save(aden_cred)
+                logger.debug(f"Fetched credential '{credential_id}' from Aden")
+                return aden_cred
+        except Exception as e:
+            logger.warning(f"Failed to fetch '{credential_id}' from Aden: {e}")
+
+            # Fall back to local cache if Aden fails
+            if local_cred:
+                logger.info(f"Using stale cached credential '{credential_id}'")
+                return local_cred
+
+        # Return local credential if it exists (may be None)
+        return local_cred
+
+    def delete(self, credential_id: str) -> bool:
+        """
+        Delete credential from local cache.
+
+        Note: This does NOT delete the credential from the Aden server.
+        It only removes the local cache entry.
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            True if credential existed and was deleted.
+        """
+        self._cache_timestamps.pop(credential_id, None)
+        return self._local.delete(credential_id)
+
+    def list_all(self) -> list[str]:
+        """
+        List credentials from local cache.
+
+        Returns:
+            List of credential IDs in local cache.
+        """
+        return self._local.list_all()
+
+    def exists(self, credential_id: str) -> bool:
+        """
+        Check if credential exists in local cache.
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            True if credential exists locally.
+        """
+        return self._local.exists(credential_id)
+
+    def _is_cache_fresh(self, credential_id: str) -> bool:
+        """
+        Check if local cache is still fresh (within TTL).
+
+        Args:
+            credential_id: The credential identifier.
+
+        Returns:
+            True if cache is fresh, False if stale or not cached.
+        """
+        cached_at = self._cache_timestamps.get(credential_id)
+        if cached_at is None:
+            return False
+        return datetime.now(UTC) - cached_at < self._cache_ttl
+
+    def invalidate_cache(self, credential_id: str) -> None:
+        """
+        Invalidate cache for a specific credential.
+
+        The next load() call will fetch from Aden regardless of TTL.
+
+        Args:
+            credential_id: The credential identifier.
+        """
+        self._cache_timestamps.pop(credential_id, None)
+        logger.debug(f"Invalidated cache for '{credential_id}'")
+
+    def invalidate_all(self) -> None:
+        """Invalidate all cache entries."""
+        self._cache_timestamps.clear()
+        logger.debug("Invalidated all cache entries")
+
+    def sync_all_from_aden(self) -> int:
+        """
+        Sync all credentials from Aden server to local cache.
+
+        Fetches the list of available integrations from Aden and
+        updates the local cache with current tokens.
+
+        Returns:
+            Number of credentials synced.
+        """
+        synced = 0
+
+        try:
+            integrations = self._aden_provider._client.list_integrations()
+
+            for info in integrations:
+                if info.status != "active":
+                    logger.warning(
+                        f"Skipping integration '{info.integration_id}': status={info.status}"
+                    )
+                    continue
+
+                try:
+                    cred = self._aden_provider.fetch_from_aden(info.integration_id)
+                    if cred:
+                        self.save(cred)
+                        synced += 1
+                        logger.info(f"Synced credential '{info.integration_id}' from Aden")
+                except Exception as e:
+                    logger.warning(f"Failed to sync '{info.integration_id}': {e}")
+
+        except Exception as e:
+            logger.error(f"Failed to list integrations from Aden: {e}")
+
+        return synced
+
+    def get_cache_info(self) -> dict[str, dict]:
+        """
+        Get cache status information for all credentials.
+
+        Returns:
+            Dict mapping credential_id to cache info (cached_at, is_fresh, ttl_remaining).
+        """
+        now = datetime.now(UTC)
+        info = {}
+
+        for cred_id in self.list_all():
+            cached_at = self._cache_timestamps.get(cred_id)
+            if cached_at:
+                ttl_remaining = (cached_at + self._cache_ttl - now).total_seconds()
+                info[cred_id] = {
+                    "cached_at": cached_at.isoformat(),
+                    "is_fresh": ttl_remaining > 0,
+                    "ttl_remaining_seconds": max(0, ttl_remaining),
+                }
+            else:
+                info[cred_id] = {
+                    "cached_at": None,
+                    "is_fresh": False,
+                    "ttl_remaining_seconds": 0,
+                }
+
+        return info
@@ -0,0 +1 @@
+"""Tests for Aden credential sync components."""
@@ -0,0 +1,670 @@
+"""
+Tests for Aden credential sync components.
+
+Tests cover:
+- AdenCredentialClient: HTTP client for Aden API
+- AdenSyncProvider: Provider that syncs with Aden
+- AdenCachedStorage: Storage with local cache + Aden fallback
+"""
+
+from datetime import UTC, datetime, timedelta
+from unittest.mock import Mock
+
+import pytest
+from pydantic import SecretStr
+
+from framework.credentials import (
+    CredentialKey,
+    CredentialObject,
+    CredentialStore,
+    CredentialType,
+    InMemoryStorage,
+)
+from framework.credentials.aden import (
+    AdenCachedStorage,
+    AdenClientConfig,
+    AdenClientError,
+    AdenCredentialClient,
+    AdenCredentialResponse,
+    AdenIntegrationInfo,
+    AdenRefreshError,
+    AdenSyncProvider,
+)
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def aden_config():
+    """Create a test Aden client config."""
+    return AdenClientConfig(
+        base_url="https://api.test-aden.com",
+        api_key="test-api-key",
+        tenant_id="test-tenant",
+        timeout=5.0,
+        retry_attempts=2,
+        retry_delay=0.1,
+    )
+
+
+@pytest.fixture
+def mock_client(aden_config):
+    """Create a mock Aden client."""
+    client = Mock(spec=AdenCredentialClient)
+    client.config = aden_config
+    return client
+
+
+@pytest.fixture
+def aden_response():
+    """Create a sample Aden credential response."""
+    return AdenCredentialResponse(
+        integration_id="hubspot",
+        integration_type="hubspot",
+        access_token="test-access-token",
+        token_type="Bearer",
+        expires_at=datetime.now(UTC) + timedelta(hours=1),
+        scopes=["crm.objects.contacts.read", "crm.objects.contacts.write"],
+        metadata={"portal_id": "12345"},
+    )
+
+
+@pytest.fixture
+def provider(mock_client):
+    """Create an AdenSyncProvider with mock client."""
+    return AdenSyncProvider(
+        client=mock_client,
+        provider_id="test_aden",
+        refresh_buffer_minutes=5,
+        report_usage=False,
+    )
+
+
+@pytest.fixture
+def local_storage():
+    """Create an in-memory storage for testing."""
+    return InMemoryStorage()
+
+
+@pytest.fixture
+def cached_storage(local_storage, provider):
+    """Create an AdenCachedStorage for testing."""
+    return AdenCachedStorage(
+        local_storage=local_storage,
+        aden_provider=provider,
+        cache_ttl_seconds=60,
+        prefer_local=True,
+    )
+
+
+# =============================================================================
+# AdenCredentialResponse Tests
+# =============================================================================
+
+
+class TestAdenCredentialResponse:
+    """Tests for AdenCredentialResponse dataclass."""
+
+    def test_from_dict_basic(self):
+        """Test creating response from dict."""
+        data = {
+            "integration_id": "github",
+            "integration_type": "github",
+            "access_token": "ghp_xxxxx",
+        }
+
+        response = AdenCredentialResponse.from_dict(data)
+
+        assert response.integration_id == "github"
+        assert response.integration_type == "github"
+        assert response.access_token == "ghp_xxxxx"
+        assert response.token_type == "Bearer"
+        assert response.expires_at is None
+        assert response.scopes == []
+
+    def test_from_dict_full(self):
+        """Test creating response with all fields."""
+        data = {
+            "integration_id": "hubspot",
+            "integration_type": "hubspot",
+            "access_token": "token123",
+            "token_type": "Bearer",
+            "expires_at": "2026-01-28T15:30:00Z",
+            "scopes": ["read", "write"],
+            "metadata": {"key": "value"},
+        }
+
+        response = AdenCredentialResponse.from_dict(data)
+
+        assert response.integration_id == "hubspot"
+        assert response.access_token == "token123"
+        assert response.expires_at is not None
+        assert response.scopes == ["read", "write"]
+        assert response.metadata == {"key": "value"}
+
+
+class TestAdenIntegrationInfo:
+    """Tests for AdenIntegrationInfo dataclass."""
+
+    def test_from_dict(self):
+        """Test creating integration info from dict."""
+        data = {
+            "integration_id": "slack",
+            "integration_type": "slack",
+            "status": "active",
+            "expires_at": "2026-02-01T00:00:00Z",
+        }
+
+        info = AdenIntegrationInfo.from_dict(data)
+
+        assert info.integration_id == "slack"
+        assert info.integration_type == "slack"
+        assert info.status == "active"
+        assert info.expires_at is not None
+
+
+# =============================================================================
+# AdenSyncProvider Tests
+# =============================================================================
+
+
+class TestAdenSyncProvider:
+    """Tests for AdenSyncProvider."""
+
+    def test_provider_id(self, provider):
+        """Test provider ID."""
+        assert provider.provider_id == "test_aden"
+
+    def test_supported_types(self, provider):
+        """Test supported credential types."""
+        assert CredentialType.OAUTH2 in provider.supported_types
+        assert CredentialType.BEARER_TOKEN in provider.supported_types
+
+    def test_can_handle_oauth2(self, provider):
+        """Test can_handle returns True for OAUTH2 credentials with matching provider_id."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+            provider_id="test_aden",
+        )
+
+        assert provider.can_handle(cred) is True
+
+    def test_can_handle_aden_managed(self, provider):
+        """Test can_handle returns True for Aden-managed credentials."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "_aden_managed": CredentialKey(
+                    name="_aden_managed",
+                    value=SecretStr("true"),
+                )
+            },
+        )
+
+        assert provider.can_handle(cred) is True
+
+    def test_can_handle_wrong_type(self, provider):
+        """Test can_handle returns False for unsupported types."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.API_KEY,
+            keys={},
+        )
+
+        assert provider.can_handle(cred) is False
+
+    def test_refresh_success(self, provider, mock_client, aden_response):
+        """Test successful credential refresh."""
+        mock_client.request_refresh.return_value = aden_response
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("old-token"),
+                )
+            },
+            provider_id="test_aden",
+        )
+
+        refreshed = provider.refresh(cred)
+
+        assert refreshed.keys["access_token"].value.get_secret_value() == "test-access-token"
+        assert refreshed.keys["_aden_managed"].value.get_secret_value() == "true"
+        assert refreshed.last_refreshed is not None
+        mock_client.request_refresh.assert_called_once_with("hubspot")
+
+    def test_refresh_requires_reauth(self, provider, mock_client):
+        """Test refresh that requires re-authorization."""
+        mock_client.request_refresh.side_effect = AdenRefreshError(
+            "Token revoked",
+            requires_reauthorization=True,
+            reauthorization_url="https://aden.com/reauth",
+        )
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+
+        from framework.credentials import CredentialRefreshError
+
+        with pytest.raises(CredentialRefreshError) as exc_info:
+            provider.refresh(cred)
+
+        assert "re-authorization" in str(exc_info.value).lower()
+
+    def test_refresh_aden_unavailable_cached_valid(self, provider, mock_client):
+        """Test refresh falls back to cache when Aden is unavailable and token is valid."""
+        mock_client.request_refresh.side_effect = AdenClientError("Connection failed")
+
+        # Token expires in 1 hour - still valid
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("cached-token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        # Should return the cached credential instead of failing
+        result = provider.refresh(cred)
+
+        assert result.keys["access_token"].value.get_secret_value() == "cached-token"
+
+    def test_should_refresh_expired(self, provider):
+        """Test should_refresh returns True for expired token."""
+        past = datetime.now(UTC) - timedelta(hours=1)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=past,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is True
+
+    def test_should_refresh_within_buffer(self, provider):
+        """Test should_refresh returns True when within buffer."""
+        # Expires in 3 minutes (buffer is 5 minutes)
+        soon = datetime.now(UTC) + timedelta(minutes=3)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=soon,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is True
+
+    def test_should_refresh_still_valid(self, provider):
+        """Test should_refresh returns False for valid token."""
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        assert provider.should_refresh(cred) is False
+
+    def test_fetch_from_aden(self, provider, mock_client, aden_response):
+        """Test fetching credential from Aden."""
+        mock_client.get_credential.return_value = aden_response
+
+        cred = provider.fetch_from_aden("hubspot")
+
+        assert cred is not None
+        assert cred.id == "hubspot"
+        assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
+        assert cred.auto_refresh is True
+
+    def test_fetch_from_aden_not_found(self, provider, mock_client):
+        """Test fetch returns None when not found."""
+        mock_client.get_credential.return_value = None
+
+        cred = provider.fetch_from_aden("nonexistent")
+
+        assert cred is None
+
+    def test_sync_all(self, provider, mock_client, aden_response):
+        """Test syncing all credentials."""
+        mock_client.list_integrations.return_value = [
+            AdenIntegrationInfo(
+                integration_id="hubspot",
+                integration_type="hubspot",
+                status="active",
+            ),
+            AdenIntegrationInfo(
+                integration_id="github",
+                integration_type="github",
+                status="requires_reauth",  # Should be skipped
+            ),
+        ]
+        mock_client.get_credential.return_value = aden_response
+
+        store = CredentialStore(storage=InMemoryStorage())
+        synced = provider.sync_all(store)
+
+        assert synced == 1  # Only active one was synced
+        assert store.get_credential("hubspot") is not None
+
+    def test_validate_via_aden(self, provider, mock_client):
+        """Test validation via Aden introspection."""
+        mock_client.validate_token.return_value = {"valid": True}
+
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+
+        assert provider.validate(cred) is True
+
+    def test_validate_fallback_to_local(self, provider, mock_client):
+        """Test validation falls back to local check when Aden fails."""
+        mock_client.validate_token.side_effect = AdenClientError("Failed")
+
+        future = datetime.now(UTC) + timedelta(hours=1)
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                    expires_at=future,
+                )
+            },
+        )
+
+        assert provider.validate(cred) is True
+
+
+# =============================================================================
+# AdenCachedStorage Tests
+# =============================================================================
+
+
+class TestAdenCachedStorage:
+    """Tests for AdenCachedStorage."""
+
+    def test_save_updates_cache_timestamp(self, cached_storage):
+        """Test save updates cache timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("token"),
+                )
+            },
+        )
+
+        cached_storage.save(cred)
+
+        assert "test" in cached_storage._cache_timestamps
+        assert cached_storage.exists("test")
+
+    def test_load_from_fresh_cache(self, cached_storage, local_storage):
+        """Test load returns cached credential when fresh."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("cached-token"),
+                )
+            },
+        )
+
+        # Save to both local storage and update timestamp
+        local_storage.save(cred)
+        cached_storage._cache_timestamps["test"] = datetime.now(UTC)
+
+        loaded = cached_storage.load("test")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "cached-token"
+
+    def test_load_from_aden_when_stale(
+        self, cached_storage, local_storage, provider, mock_client, aden_response
+    ):
+        """Test load fetches from Aden when cache is stale."""
+        # Create stale cached credential
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("stale-token"),
+                )
+            },
+        )
+        local_storage.save(cred)
+
+        # Set cache timestamp to be stale (2 minutes ago, TTL is 60 seconds)
+        cached_storage._cache_timestamps["hubspot"] = datetime.now(UTC) - timedelta(minutes=2)
+
+        # Mock Aden response
+        mock_client.get_credential.return_value = aden_response
+
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "test-access-token"
+
+    def test_load_falls_back_to_stale_when_aden_fails(
+        self, cached_storage, local_storage, provider, mock_client
+    ):
+        """Test load falls back to stale cache when Aden fails."""
+        # Create stale cached credential
+        cred = CredentialObject(
+            id="hubspot",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr("stale-token"),
+                )
+            },
+        )
+        local_storage.save(cred)
+        cached_storage._cache_timestamps["hubspot"] = datetime.now(UTC) - timedelta(minutes=2)
+
+        # Aden fails
+        mock_client.get_credential.side_effect = AdenClientError("Connection failed")
+
+        loaded = cached_storage.load("hubspot")
+
+        assert loaded is not None
+        assert loaded.keys["access_token"].value.get_secret_value() == "stale-token"
+
+    def test_delete_removes_cache_timestamp(self, cached_storage, local_storage):
+        """Test delete removes cache timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+        cached_storage.save(cred)
+
+        assert "test" in cached_storage._cache_timestamps
+
+        cached_storage.delete("test")
+
+        assert "test" not in cached_storage._cache_timestamps
+        assert not cached_storage.exists("test")
+
+    def test_invalidate_cache(self, cached_storage, local_storage):
+        """Test invalidate_cache removes timestamp."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.OAUTH2,
+            keys={},
+        )
+        cached_storage.save(cred)
+
+        cached_storage.invalidate_cache("test")
+
+        assert "test" not in cached_storage._cache_timestamps
+        # Credential still exists in local storage
+        assert local_storage.exists("test")
+
+    def test_invalidate_all(self, cached_storage):
+        """Test invalidate_all clears all timestamps."""
+        for i in range(3):
+            cached_storage._cache_timestamps[f"test_{i}"] = datetime.now(UTC)
+
+        cached_storage.invalidate_all()
+
+        assert len(cached_storage._cache_timestamps) == 0
+
+    def test_is_cache_fresh(self, cached_storage):
+        """Test _is_cache_fresh logic."""
+        # Fresh cache
+        cached_storage._cache_timestamps["fresh"] = datetime.now(UTC)
+        assert cached_storage._is_cache_fresh("fresh") is True
+
+        # Stale cache
+        cached_storage._cache_timestamps["stale"] = datetime.now(UTC) - timedelta(minutes=5)
+        assert cached_storage._is_cache_fresh("stale") is False
+
+        # No cache
+        assert cached_storage._is_cache_fresh("nonexistent") is False
+
+    def test_get_cache_info(self, cached_storage, local_storage):
+        """Test get_cache_info returns status for all credentials."""
+        # Add some credentials
+        for name in ["fresh", "stale"]:
+            cred = CredentialObject(
+                id=name,
+                credential_type=CredentialType.OAUTH2,
+                keys={},
+            )
+            local_storage.save(cred)
+
+        cached_storage._cache_timestamps["fresh"] = datetime.now(UTC)
+        cached_storage._cache_timestamps["stale"] = datetime.now(UTC) - timedelta(minutes=5)
+
+        info = cached_storage.get_cache_info()
+
+        assert "fresh" in info
+        assert info["fresh"]["is_fresh"] is True
+        assert info["fresh"]["ttl_remaining_seconds"] > 0
+
+        assert "stale" in info
+        assert info["stale"]["is_fresh"] is False
+        assert info["stale"]["ttl_remaining_seconds"] == 0
+
+
+# =============================================================================
+# Integration Tests
+# =============================================================================
+
+
+class TestAdenIntegration:
+    """Integration tests for Aden sync components."""
+
+    def test_full_workflow(self, mock_client, aden_response):
+        """Test full workflow: sync, get, refresh."""
+        # Setup
+        mock_client.list_integrations.return_value = [
+            AdenIntegrationInfo(
+                integration_id="hubspot",
+                integration_type="hubspot",
+                status="active",
+            ),
+        ]
+        mock_client.get_credential.return_value = aden_response
+        mock_client.request_refresh.return_value = AdenCredentialResponse(
+            integration_id="hubspot",
+            integration_type="hubspot",
+            access_token="refreshed-token",
+            expires_at=datetime.now(UTC) + timedelta(hours=2),
+            scopes=[],
+        )
+
+        provider = AdenSyncProvider(client=mock_client)
+        storage = InMemoryStorage()
+        store = CredentialStore(
+            storage=storage,
+            providers=[provider],
+            auto_refresh=True,
+        )
+
+        # Initial sync
+        synced = provider.sync_all(store)
+        assert synced == 1
+
+        # Get credential
+        cred = store.get_credential("hubspot")
+        assert cred is not None
+        assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
+
+        # Simulate expiration
+        cred.keys["access_token"] = CredentialKey(
+            name="access_token",
+            value=SecretStr("test-access-token"),
+            expires_at=datetime.now(UTC) - timedelta(hours=1),  # Expired
+        )
+        storage.save(cred)
+
+        # Refresh should be triggered
+        refreshed = provider.refresh(cred)
+        assert refreshed.keys["access_token"].value.get_secret_value() == "refreshed-token"
+
+    def test_cached_storage_with_store(self, mock_client, aden_response):
+        """Test AdenCachedStorage with CredentialStore."""
+        mock_client.get_credential.return_value = aden_response
+
+        provider = AdenSyncProvider(client=mock_client)
+        local_storage = InMemoryStorage()
+        cached_storage = AdenCachedStorage(
+            local_storage=local_storage,
+            aden_provider=provider,
+            cache_ttl_seconds=300,
+        )
+
+        # First load fetches from Aden
+        cred = cached_storage.load("hubspot")
+        assert cred is not None
+        mock_client.get_credential.assert_called_once()
+
+        # Second load uses cache
+        mock_client.get_credential.reset_mock()
+        cred2 = cached_storage.load("hubspot")
+        assert cred2 is not None
+        mock_client.get_credential.assert_not_called()
@@ -0,0 +1,293 @@
+"""
+Core data models for the credential store.
+
+This module defines the key-vault structure where credentials are objects
+containing one or more keys (e.g., api_key, access_token, refresh_token).
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field, SecretStr
+
+
+def _utc_now() -> datetime:
+    """Get current UTC time as timezone-aware datetime."""
+    return datetime.now(UTC)
+
+
+class CredentialType(str, Enum):
+    """Types of credentials the store can manage."""
+
+    API_KEY = "api_key"
+    """Simple API key (e.g., Brave Search, OpenAI)"""
+
+    OAUTH2 = "oauth2"
+    """OAuth2 with refresh token support"""
+
+    BASIC_AUTH = "basic_auth"
+    """Username/password pair"""
+
+    BEARER_TOKEN = "bearer_token"
+    """JWT or bearer token without refresh"""
+
+    CUSTOM = "custom"
+    """User-defined credential type"""
+
+
+class CredentialKey(BaseModel):
+    """
+    A single key within a credential object.
+
+    Example: 'api_key' within a 'brave_search' credential
+
+    Attributes:
+        name: Key name (e.g., 'api_key', 'access_token')
+        value: Secret value (SecretStr prevents accidental logging)
+        expires_at: Optional expiration time
+        metadata: Additional key-specific metadata
+    """
+
+    name: str
+    value: SecretStr
+    expires_at: datetime | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+    model_config = {"extra": "allow"}
+
+    @property
+    def is_expired(self) -> bool:
+        """Check if this key has expired."""
+        if self.expires_at is None:
+            return False
+        return datetime.now(UTC) >= self.expires_at
+
+    def get_secret_value(self) -> str:
+        """Get the actual secret value (use sparingly)."""
+        return self.value.get_secret_value()
+
+
+class CredentialObject(BaseModel):
+    """
+    A credential object containing one or more keys.
+
+    This is the key-vault structure where each credential can have
+    multiple keys (e.g., access_token, refresh_token, expires_at).
+
+    Example:
+        CredentialObject(
+            id="github_oauth",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(name="access_token", value=SecretStr("ghp_xxx")),
+                "refresh_token": CredentialKey(name="refresh_token", value=SecretStr("ghr_xxx")),
+            },
+            provider_id="oauth2"
+        )
+
+    Attributes:
+        id: Unique identifier (e.g., 'brave_search', 'github_oauth')
+        credential_type: Type of credential (API_KEY, OAUTH2, etc.)
+        keys: Dictionary of key name to CredentialKey
+        provider_id: ID of provider responsible for lifecycle management
+        auto_refresh: Whether to automatically refresh when expired
+    """
+
+    id: str = Field(description="Unique identifier (e.g., 'brave_search', 'github_oauth')")
+    credential_type: CredentialType = CredentialType.API_KEY
+    keys: dict[str, CredentialKey] = Field(default_factory=dict)
+
+    # Lifecycle management
+    provider_id: str | None = Field(
+        default=None,
+        description="ID of provider responsible for lifecycle (e.g., 'oauth2', 'static')",
+    )
+    last_refreshed: datetime | None = None
+    auto_refresh: bool = False
+
+    # Usage tracking
+    last_used: datetime | None = None
+    use_count: int = 0
+
+    # Metadata
+    description: str = ""
+    tags: list[str] = Field(default_factory=list)
+    created_at: datetime = Field(default_factory=_utc_now)
+    updated_at: datetime = Field(default_factory=_utc_now)
+
+    model_config = {"extra": "allow"}
+
+    def get_key(self, key_name: str) -> str | None:
+        """
+        Get a specific key's value.
+
+        Args:
+            key_name: Name of the key to retrieve
+
+        Returns:
+            The key's secret value, or None if not found
+        """
+        key = self.keys.get(key_name)
+        if key is None:
+            return None
+        return key.get_secret_value()
+
+    def set_key(
+        self,
+        key_name: str,
+        value: str,
+        expires_at: datetime | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Set or update a key.
+
+        Args:
+            key_name: Name of the key
+            value: Secret value
+            expires_at: Optional expiration time
+            metadata: Optional key-specific metadata
+        """
+        self.keys[key_name] = CredentialKey(
+            name=key_name,
+            value=SecretStr(value),
+            expires_at=expires_at,
+            metadata=metadata or {},
+        )
+        self.updated_at = datetime.now(UTC)
+
+    def has_key(self, key_name: str) -> bool:
+        """Check if a key exists."""
+        return key_name in self.keys
+
+    @property
+    def needs_refresh(self) -> bool:
+        """Check if any key is expired or near expiration."""
+        for key in self.keys.values():
+            if key.is_expired:
+                return True
+        return False
+
+    @property
+    def is_valid(self) -> bool:
+        """Check if credential has at least one non-expired key."""
+        if not self.keys:
+            return False
+        return not all(key.is_expired for key in self.keys.values())
+
+    def record_usage(self) -> None:
+        """Record that this credential was used."""
+        self.last_used = datetime.now(UTC)
+        self.use_count += 1
+
+    def get_default_key(self) -> str | None:
+        """
+        Get the default key value.
+
+        Priority: 'value' > 'api_key' > 'access_token' > first key
+
+        Returns:
+            The default key's value, or None if no keys exist
+        """
+        for key_name in ["value", "api_key", "access_token"]:
+            if key_name in self.keys:
+                return self.get_key(key_name)
+
+        if self.keys:
+            first_key = next(iter(self.keys))
+            return self.get_key(first_key)
+
+        return None
+
+
+class CredentialUsageSpec(BaseModel):
+    """
+    Specification for how a tool uses credentials.
+
+    This implements the "bipartisan" model where the credential store
+    just stores values, and tools define how those values are used
+    in HTTP requests (headers, query params, body).
+
+    Example:
+        CredentialUsageSpec(
+            credential_id="brave_search",
+            required_keys=["api_key"],
+            headers={"X-Subscription-Token": "{{api_key}}"}
+        )
+
+        CredentialUsageSpec(
+            credential_id="github_oauth",
+            required_keys=["access_token"],
+            headers={"Authorization": "Bearer {{access_token}}"}
+        )
+
+    Attributes:
+        credential_id: ID of credential to use
+        required_keys: Keys that must be present
+        headers: Header templates with {{key}} placeholders
+        query_params: Query parameter templates
+        body_fields: Request body field templates
+    """
+
+    credential_id: str = Field(description="ID of credential to use (e.g., 'brave_search')")
+    required_keys: list[str] = Field(default_factory=list, description="Keys that must be present")
+
+    # Injection templates (bipartisan model)
+    headers: dict[str, str] = Field(
+        default_factory=dict,
+        description="Header templates (e.g., {'Authorization': 'Bearer {{access_token}}'})",
+    )
+    query_params: dict[str, str] = Field(
+        default_factory=dict,
+        description="Query param templates (e.g., {'api_key': '{{api_key}}'})",
+    )
+    body_fields: dict[str, str] = Field(
+        default_factory=dict,
+        description="Request body field templates",
+    )
+
+    # Metadata
+    required: bool = True
+    description: str = ""
+    help_url: str = ""
+
+    model_config = {"extra": "allow"}
+
+
+class CredentialError(Exception):
+    """Base exception for credential-related errors."""
+
+    pass
+
+
+class CredentialNotFoundError(CredentialError):
+    """Raised when a referenced credential doesn't exist."""
+
+    pass
+
+
+class CredentialKeyNotFoundError(CredentialError):
+    """Raised when a referenced key doesn't exist in a credential."""
+
+    pass
+
+
+class CredentialRefreshError(CredentialError):
+    """Raised when credential refresh fails."""
+
+    pass
+
+
+class CredentialValidationError(CredentialError):
+    """Raised when credential validation fails."""
+
+    pass
+
+
+class CredentialDecryptionError(CredentialError):
+    """Raised when credential decryption fails."""
+
+    pass
@@ -0,0 +1,92 @@
+"""
+OAuth2 support for the credential store.
+
+This module provides OAuth2 credential management with:
+- Token types and configuration (OAuth2Token, OAuth2Config)
+- Generic OAuth2 provider (BaseOAuth2Provider)
+- Token lifecycle management (TokenLifecycleManager)
+
+Quick Start:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.oauth2 import BaseOAuth2Provider, OAuth2Config
+
+    # Configure OAuth2 provider
+    provider = BaseOAuth2Provider(OAuth2Config(
+        token_url="https://oauth2.example.com/token",
+        client_id="your-client-id",
+        client_secret="your-client-secret",
+        default_scopes=["read", "write"],
+    ))
+
+    # Create store with OAuth2 provider
+    store = CredentialStore.with_encrypted_storage(
+        providers=[provider]  # defaults to ~/.hive/credentials
+    )
+
+    # Get token using client credentials
+    token = provider.client_credentials_grant()
+
+    # Save to store
+    from core.framework.credentials import CredentialObject, CredentialKey, CredentialType
+    from pydantic import SecretStr
+
+    store.save_credential(CredentialObject(
+        id="my_api",
+        credential_type=CredentialType.OAUTH2,
+        keys={
+            "access_token": CredentialKey(
+                name="access_token",
+                value=SecretStr(token.access_token),
+                expires_at=token.expires_at,
+            ),
+            "refresh_token": CredentialKey(
+                name="refresh_token",
+                value=SecretStr(token.refresh_token),
+            ) if token.refresh_token else None,
+        },
+        provider_id="oauth2",
+        auto_refresh=True,
+    ))
+
+For advanced lifecycle management:
+    from core.framework.credentials.oauth2 import TokenLifecycleManager
+
+    manager = TokenLifecycleManager(
+        provider=provider,
+        credential_id="my_api",
+        store=store,
+    )
+
+    # Get valid token (auto-refreshes if needed)
+    token = manager.sync_get_valid_token()
+    headers = manager.get_request_headers()
+"""
+
+from .base_provider import BaseOAuth2Provider
+from .hubspot_provider import HubSpotOAuth2Provider
+from .lifecycle import TokenLifecycleManager, TokenRefreshResult
+from .provider import (
+    OAuth2Config,
+    OAuth2Error,
+    OAuth2Token,
+    RefreshTokenInvalidError,
+    TokenExpiredError,
+    TokenPlacement,
+)
+
+__all__ = [
+    # Types
+    "OAuth2Token",
+    "OAuth2Config",
+    "TokenPlacement",
+    # Providers
+    "BaseOAuth2Provider",
+    "HubSpotOAuth2Provider",
+    # Lifecycle
+    "TokenLifecycleManager",
+    "TokenRefreshResult",
+    # Errors
+    "OAuth2Error",
+    "TokenExpiredError",
+    "RefreshTokenInvalidError",
+]
@@ -0,0 +1,486 @@
+"""
+Base OAuth2 provider implementation.
+
+This module provides a generic OAuth2 provider that works with standard
+OAuth2 servers. OSS users can extend this class for custom providers.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import Any
+from urllib.parse import urlencode
+
+from ..models import CredentialObject, CredentialRefreshError, CredentialType
+from ..provider import CredentialProvider
+from .provider import (
+    OAuth2Config,
+    OAuth2Error,
+    OAuth2Token,
+    TokenPlacement,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class BaseOAuth2Provider(CredentialProvider):
+    """
+    Generic OAuth2 provider implementation.
+
+    Works with standard OAuth2 servers (RFC 6749). Override methods for
+    provider-specific behavior.
+
+    Supported grant types:
+    - Client Credentials: For server-to-server authentication
+    - Refresh Token: For refreshing expired access tokens
+    - Authorization Code: For user-authorized access (requires callback handling)
+
+    OSS users can extend this class for custom providers:
+
+        class GitHubOAuth2Provider(BaseOAuth2Provider):
+            def __init__(self, client_id: str, client_secret: str):
+                super().__init__(OAuth2Config(
+                    token_url="https://github.com/login/oauth/access_token",
+                    authorization_url="https://github.com/login/oauth/authorize",
+                    client_id=client_id,
+                    client_secret=client_secret,
+                    default_scopes=["repo", "user"],
+                ))
+
+            def exchange_code(self, code: str, redirect_uri: str, **kwargs) -> OAuth2Token:
+                # GitHub returns data as form-encoded by default
+                # Override to handle this
+                ...
+
+    Example usage:
+        provider = BaseOAuth2Provider(OAuth2Config(
+            token_url="https://oauth2.example.com/token",
+            client_id="my-client-id",
+            client_secret="my-client-secret",
+        ))
+
+        # Get token using client credentials
+        token = provider.client_credentials_grant()
+
+        # Refresh an expired token
+        new_token = provider.refresh_token(old_token.refresh_token)
+    """
+
+    def __init__(self, config: OAuth2Config, provider_id: str = "oauth2"):
+        """
+        Initialize the OAuth2 provider.
+
+        Args:
+            config: OAuth2 configuration
+            provider_id: Unique identifier for this provider instance
+        """
+        self.config = config
+        self._provider_id = provider_id
+        self._client: Any | None = None
+
+    @property
+    def provider_id(self) -> str:
+        return self._provider_id
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.OAUTH2, CredentialType.BEARER_TOKEN]
+
+    def _get_client(self) -> Any:
+        """Get or create HTTP client."""
+        if self._client is None:
+            try:
+                import httpx
+
+                self._client = httpx.Client(timeout=self.config.request_timeout)
+            except ImportError as e:
+                raise ImportError(
+                    "OAuth2 provider requires 'httpx'. Install with: pip install httpx"
+                ) from e
+        return self._client
+
+    def _close_client(self) -> None:
+        """Close the HTTP client."""
+        if self._client is not None:
+            self._client.close()
+            self._client = None
+
+    def __del__(self) -> None:
+        """Cleanup HTTP client on deletion."""
+        self._close_client()
+
+    # --- Grant Types ---
+
+    def get_authorization_url(
+        self,
+        state: str,
+        redirect_uri: str,
+        scopes: list[str] | None = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Generate authorization URL for user consent (Authorization Code flow).
+
+        Args:
+            state: Anti-CSRF state parameter (should be random and verified)
+            redirect_uri: Callback URL to receive the authorization code
+            scopes: Requested scopes (defaults to config.default_scopes)
+            **kwargs: Additional provider-specific parameters
+
+        Returns:
+            URL to redirect user for authorization
+
+        Raises:
+            ValueError: If authorization_url is not configured
+        """
+        if not self.config.authorization_url:
+            raise ValueError("authorization_url not configured for this provider")
+
+        params = {
+            "client_id": self.config.client_id,
+            "redirect_uri": redirect_uri,
+            "response_type": "code",
+            "state": state,
+            "scope": " ".join(scopes or self.config.default_scopes),
+            **kwargs,
+        }
+
+        return f"{self.config.authorization_url}?{urlencode(params)}"
+
+    def exchange_code(
+        self,
+        code: str,
+        redirect_uri: str,
+        **kwargs: Any,
+    ) -> OAuth2Token:
+        """
+        Exchange authorization code for tokens (Authorization Code flow).
+
+        Args:
+            code: Authorization code from callback
+            redirect_uri: Same redirect_uri used in authorization request
+            **kwargs: Additional provider-specific parameters
+
+        Returns:
+            OAuth2Token with access_token and optional refresh_token
+
+        Raises:
+            OAuth2Error: If token exchange fails
+        """
+        data = {
+            "grant_type": "authorization_code",
+            "client_id": self.config.client_id,
+            "client_secret": self.config.client_secret,
+            "code": code,
+            "redirect_uri": redirect_uri,
+            **self.config.extra_token_params,
+            **kwargs,
+        }
+
+        return self._token_request(data)
+
+    def client_credentials_grant(
+        self,
+        scopes: list[str] | None = None,
+        **kwargs: Any,
+    ) -> OAuth2Token:
+        """
+        Obtain token using client credentials (Client Credentials flow).
+
+        This is for server-to-server authentication where no user is involved.
+
+        Args:
+            scopes: Requested scopes (defaults to config.default_scopes)
+            **kwargs: Additional provider-specific parameters
+
+        Returns:
+            OAuth2Token (typically without refresh_token)
+
+        Raises:
+            OAuth2Error: If token request fails
+        """
+        data = {
+            "grant_type": "client_credentials",
+            "client_id": self.config.client_id,
+            "client_secret": self.config.client_secret,
+            **self.config.extra_token_params,
+            **kwargs,
+        }
+
+        if scopes or self.config.default_scopes:
+            data["scope"] = " ".join(scopes or self.config.default_scopes)
+
+        return self._token_request(data)
+
+    def refresh_access_token(
+        self,
+        refresh_token: str,
+        scopes: list[str] | None = None,
+        **kwargs: Any,
+    ) -> OAuth2Token:
+        """
+        Refresh an expired access token (Refresh Token flow).
+
+        Args:
+            refresh_token: The refresh token
+            scopes: Scopes to request (defaults to original scopes)
+            **kwargs: Additional provider-specific parameters
+
+        Returns:
+            New OAuth2Token (may include new refresh_token)
+
+        Raises:
+            OAuth2Error: If refresh fails
+            RefreshTokenInvalidError: If refresh token is revoked/invalid
+        """
+        data = {
+            "grant_type": "refresh_token",
+            "client_id": self.config.client_id,
+            "client_secret": self.config.client_secret,
+            "refresh_token": refresh_token,
+            **self.config.extra_token_params,
+            **kwargs,
+        }
+
+        if scopes:
+            data["scope"] = " ".join(scopes)
+
+        return self._token_request(data)
+
+    def revoke_token(
+        self,
+        token: str,
+        token_type_hint: str = "access_token",
+    ) -> bool:
+        """
+        Revoke a token (RFC 7009).
+
+        Args:
+            token: The token to revoke
+            token_type_hint: "access_token" or "refresh_token"
+
+        Returns:
+            True if revocation succeeded
+        """
+        if not self.config.revocation_url:
+            logger.warning("revocation_url not configured, cannot revoke token")
+            return False
+
+        try:
+            client = self._get_client()
+            response = client.post(
+                self.config.revocation_url,
+                data={
+                    "token": token,
+                    "token_type_hint": token_type_hint,
+                    "client_id": self.config.client_id,
+                    "client_secret": self.config.client_secret,
+                },
+                headers={"Accept": "application/json", **self.config.extra_headers},
+            )
+            # RFC 7009: 200 indicates success (even if token was already invalid)
+            return response.status_code == 200
+        except Exception as e:
+            logger.error(f"Token revocation failed: {e}")
+            return False
+
+    # --- CredentialProvider Interface ---
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Refresh a credential using its refresh token.
+
+        Implements CredentialProvider.refresh().
+
+        Args:
+            credential: The credential to refresh
+
+        Returns:
+            Updated credential with new access_token
+
+        Raises:
+            CredentialRefreshError: If refresh fails
+        """
+        refresh_tok = credential.get_key("refresh_token")
+        if not refresh_tok:
+            raise CredentialRefreshError(f"Credential '{credential.id}' has no refresh_token")
+
+        try:
+            new_token = self.refresh_access_token(refresh_tok)
+        except OAuth2Error as e:
+            if e.error == "invalid_grant":
+                raise CredentialRefreshError(
+                    f"Refresh token for '{credential.id}' is invalid or revoked. "
+                    "Re-authorization required."
+                ) from e
+            raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
+
+        # Update credential
+        credential.set_key("access_token", new_token.access_token, expires_at=new_token.expires_at)
+
+        # Update refresh token if a new one was issued
+        if new_token.refresh_token and new_token.refresh_token != refresh_tok:
+            credential.set_key("refresh_token", new_token.refresh_token)
+
+        credential.last_refreshed = datetime.now(UTC)
+        logger.info(f"Refreshed OAuth2 credential '{credential.id}'")
+
+        return credential
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate that credential has a valid (non-expired) access_token.
+
+        Args:
+            credential: The credential to validate
+
+        Returns:
+            True if credential has valid access_token
+        """
+        access_key = credential.keys.get("access_token")
+        if access_key is None:
+            return False
+        return not access_key.is_expired
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """
+        Check if credential should be refreshed.
+
+        Returns True if access_token is expired or within 5 minutes of expiry.
+        """
+        access_key = credential.keys.get("access_token")
+        if access_key is None:
+            return False
+
+        if access_key.expires_at is None:
+            return False
+
+        buffer = timedelta(minutes=5)
+        return datetime.now(UTC) >= (access_key.expires_at - buffer)
+
+    def revoke(self, credential: CredentialObject) -> bool:
+        """
+        Revoke all tokens in a credential.
+
+        Args:
+            credential: The credential to revoke
+
+        Returns:
+            True if all revocations succeeded
+        """
+        success = True
+
+        # Revoke access token
+        access_token = credential.get_key("access_token")
+        if access_token:
+            if not self.revoke_token(access_token, "access_token"):
+                success = False
+
+        # Revoke refresh token
+        refresh_token = credential.get_key("refresh_token")
+        if refresh_token:
+            if not self.revoke_token(refresh_token, "refresh_token"):
+                success = False
+
+        return success
+
+    # --- Token Request Helpers ---
+
+    def _token_request(self, data: dict[str, Any]) -> OAuth2Token:
+        """
+        Make a token request to the OAuth2 server.
+
+        Args:
+            data: Form data for the token request
+
+        Returns:
+            OAuth2Token from the response
+
+        Raises:
+            OAuth2Error: If request fails or returns an error
+        """
+        client = self._get_client()
+
+        headers = {
+            "Accept": "application/json",
+            "Content-Type": "application/x-www-form-urlencoded",
+            **self.config.extra_headers,
+        }
+
+        response = client.post(self.config.token_url, data=data, headers=headers)
+
+        # Parse response
+        content_type = response.headers.get("content-type", "")
+        if "application/json" in content_type:
+            response_data = response.json()
+        else:
+            # Some providers (like GitHub) may return form-encoded
+            response_data = self._parse_form_response(response.text)
+
+        # Check for error
+        if response.status_code != 200 or "error" in response_data:
+            error = response_data.get("error", "unknown_error")
+            description = response_data.get("error_description", response.text)
+            raise OAuth2Error(
+                error=error, description=description, status_code=response.status_code
+            )
+
+        return OAuth2Token.from_token_response(response_data)
+
+    def _parse_form_response(self, text: str) -> dict[str, str]:
+        """Parse form-encoded response (some providers use this instead of JSON)."""
+        from urllib.parse import parse_qs
+
+        parsed = parse_qs(text)
+        return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
+
+    # --- Token Formatting for Requests ---
+
+    def format_for_request(self, token: OAuth2Token) -> dict[str, Any]:
+        """
+        Format token for use in HTTP requests (bipartisan model).
+
+        Args:
+            token: The OAuth2 token
+
+        Returns:
+            Dict with 'headers', 'params', or 'data' keys as appropriate
+        """
+        placement = self.config.token_placement
+
+        if placement == TokenPlacement.HEADER_BEARER:
+            return {"headers": {"Authorization": f"{token.token_type} {token.access_token}"}}
+
+        elif placement == TokenPlacement.HEADER_CUSTOM:
+            header_name = self.config.custom_header_name or "X-Access-Token"
+            return {"headers": {header_name: token.access_token}}
+
+        elif placement == TokenPlacement.QUERY_PARAM:
+            return {"params": {self.config.query_param_name: token.access_token}}
+
+        elif placement == TokenPlacement.BODY_PARAM:
+            return {"data": {"access_token": token.access_token}}
+
+        return {}
+
+    def format_credential_for_request(self, credential: CredentialObject) -> dict[str, Any]:
+        """
+        Format a credential for use in HTTP requests.
+
+        Args:
+            credential: The credential containing access_token
+
+        Returns:
+            Dict with 'headers', 'params', or 'data' keys as appropriate
+        """
+        access_token = credential.get_key("access_token")
+        if not access_token:
+            return {}
+
+        token = OAuth2Token(
+            access_token=access_token,
+            token_type=credential.keys.get("token_type", "Bearer") or "Bearer",
+        )
+
+        return self.format_for_request(token)
@@ -0,0 +1,112 @@
+"""
+HubSpot-specific OAuth2 provider.
+
+Pre-configured for HubSpot's OAuth2 endpoints and CRM scopes.
+Extends BaseOAuth2Provider for HubSpot-specific behavior.
+
+Usage:
+    provider = HubSpotOAuth2Provider(
+        client_id="your-client-id",
+        client_secret="your-client-secret",
+    )
+
+    # Use with credential store
+    store = CredentialStore(
+        storage=EncryptedFileStorage(),  # defaults to ~/.hive/credentials
+        providers=[provider],
+    )
+
+See: https://developers.hubspot.com/docs/api/oauth-quickstart-guide
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from ..models import CredentialObject, CredentialType
+from .base_provider import BaseOAuth2Provider
+from .provider import OAuth2Config
+
+logger = logging.getLogger(__name__)
+
+# HubSpot OAuth2 endpoints
+HUBSPOT_TOKEN_URL = "https://api.hubapi.com/oauth/v1/token"
+HUBSPOT_AUTHORIZATION_URL = "https://app.hubspot.com/oauth/authorize"
+
+# Default CRM scopes for contacts, companies, and deals
+HUBSPOT_DEFAULT_SCOPES = [
+    "crm.objects.contacts.read",
+    "crm.objects.contacts.write",
+    "crm.objects.companies.read",
+    "crm.objects.companies.write",
+    "crm.objects.deals.read",
+    "crm.objects.deals.write",
+]
+
+
+class HubSpotOAuth2Provider(BaseOAuth2Provider):
+    """
+    HubSpot OAuth2 provider with pre-configured endpoints.
+
+    Handles HubSpot-specific OAuth2 behavior:
+    - Pre-configured token and authorization URLs
+    - Default CRM scopes for contacts, companies, and deals
+    - Token validation via HubSpot API
+
+    Example:
+        provider = HubSpotOAuth2Provider(
+            client_id="your-hubspot-client-id",
+            client_secret="your-hubspot-client-secret",
+            scopes=["crm.objects.contacts.read"],  # Override default scopes
+        )
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        scopes: list[str] | None = None,
+    ):
+        config = OAuth2Config(
+            token_url=HUBSPOT_TOKEN_URL,
+            authorization_url=HUBSPOT_AUTHORIZATION_URL,
+            client_id=client_id,
+            client_secret=client_secret,
+            default_scopes=scopes or HUBSPOT_DEFAULT_SCOPES,
+        )
+        super().__init__(config, provider_id="hubspot_oauth2")
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.OAUTH2]
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate HubSpot credential by making a lightweight API call.
+
+        Tests the access token against the contacts endpoint with limit=1.
+        """
+        access_token = credential.get_key("access_token")
+        if not access_token:
+            return False
+
+        try:
+            client = self._get_client()
+            response = client.get(
+                "https://api.hubapi.com/crm/v3/objects/contacts",
+                headers={
+                    "Authorization": f"Bearer {access_token}",
+                    "Accept": "application/json",
+                },
+                params={"limit": "1"},
+            )
+            return response.status_code == 200
+        except Exception:
+            return False
+
+    def _parse_token_response(self, response_data: dict[str, Any]) -> Any:
+        """Parse HubSpot token response."""
+        from .provider import OAuth2Token
+
+        return OAuth2Token.from_token_response(response_data)
@@ -0,0 +1,363 @@
+"""
+Token lifecycle management for OAuth2 credentials.
+
+This module provides the TokenLifecycleManager which coordinates
+automatic token refresh with the credential store.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialType
+from .base_provider import BaseOAuth2Provider
+from .provider import OAuth2Token
+
+if TYPE_CHECKING:
+    from ..store import CredentialStore
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TokenRefreshResult:
+    """Result of a token refresh operation."""
+
+    success: bool
+    token: OAuth2Token | None = None
+    error: str | None = None
+    needs_reauthorization: bool = False
+
+
+class TokenLifecycleManager:
+    """
+    Manages the complete lifecycle of OAuth2 tokens.
+
+    Responsibilities:
+    - Coordinate with CredentialStore for persistence
+    - Automatically refresh expired tokens
+    - Handle refresh failures gracefully
+    - Provide callbacks for monitoring
+
+    This class is useful when you need more control over token management
+    than the basic auto-refresh in CredentialStore provides.
+
+    Usage:
+        manager = TokenLifecycleManager(
+            provider=github_provider,
+            credential_id="github_oauth",
+            store=credential_store,
+        )
+
+        # Get valid token (auto-refreshes if needed)
+        token = await manager.get_valid_token()
+
+        # Use token
+        headers = provider.format_for_request(token)
+
+    Synchronous usage:
+        # For synchronous code, use sync_ methods
+        token = manager.sync_get_valid_token()
+    """
+
+    def __init__(
+        self,
+        provider: BaseOAuth2Provider,
+        credential_id: str,
+        store: CredentialStore,
+        refresh_buffer_minutes: int = 5,
+        on_token_refreshed: Callable[[OAuth2Token], None] | None = None,
+        on_refresh_failed: Callable[[str], None] | None = None,
+    ):
+        """
+        Initialize the lifecycle manager.
+
+        Args:
+            provider: OAuth2 provider for token operations
+            credential_id: ID of the credential in the store
+            store: Credential store for persistence
+            refresh_buffer_minutes: Minutes before expiry to trigger refresh
+            on_token_refreshed: Callback when token is refreshed
+            on_refresh_failed: Callback when refresh fails
+        """
+        self.provider = provider
+        self.credential_id = credential_id
+        self.store = store
+        self.refresh_buffer = timedelta(minutes=refresh_buffer_minutes)
+        self.on_token_refreshed = on_token_refreshed
+        self.on_refresh_failed = on_refresh_failed
+
+        # In-memory cache for performance
+        self._cached_token: OAuth2Token | None = None
+        self._cache_time: datetime | None = None
+
+    # --- Async Token Access ---
+
+    async def get_valid_token(self) -> OAuth2Token | None:
+        """
+        Get a valid access token, refreshing if necessary.
+
+        This is the main entry point for async code.
+
+        Returns:
+            Valid OAuth2Token or None if unavailable
+        """
+        # Check cache first
+        if self._cached_token and not self._needs_refresh(self._cached_token):
+            return self._cached_token
+
+        # Load from store
+        credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+        if credential is None:
+            return None
+
+        # Convert to OAuth2Token
+        token = self._credential_to_token(credential)
+        if token is None:
+            return None
+
+        # Refresh if needed
+        if self._needs_refresh(token):
+            result = await self._async_refresh_token(credential)
+            if result.success and result.token:
+                token = result.token
+            elif result.needs_reauthorization:
+                logger.warning(f"Token for {self.credential_id} needs reauthorization")
+                return None
+            else:
+                # Use existing token if still technically valid
+                if token.is_expired:
+                    return None
+                logger.warning(f"Refresh failed for {self.credential_id}, using existing token")
+
+        self._cached_token = token
+        self._cache_time = datetime.now(UTC)
+        return token
+
+    async def acquire_token_client_credentials(
+        self,
+        scopes: list[str] | None = None,
+    ) -> OAuth2Token:
+        """
+        Acquire a new token using client credentials flow.
+
+        For service-to-service authentication.
+
+        Args:
+            scopes: Scopes to request
+
+        Returns:
+            New OAuth2Token
+        """
+        # Run in executor to avoid blocking
+        loop = asyncio.get_event_loop()
+        token = await loop.run_in_executor(
+            None, lambda: self.provider.client_credentials_grant(scopes=scopes)
+        )
+
+        self._save_token_to_store(token)
+        self._cached_token = token
+        return token
+
+    async def revoke(self) -> bool:
+        """
+        Revoke tokens and clear from store.
+
+        Returns:
+            True if revocation succeeded
+        """
+        credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+        if credential:
+            self.provider.revoke(credential)
+
+        self.store.delete_credential(self.credential_id)
+        self._cached_token = None
+        return True
+
+    # --- Synchronous Token Access ---
+
+    def sync_get_valid_token(self) -> OAuth2Token | None:
+        """
+        Synchronous version of get_valid_token().
+
+        For use in synchronous code.
+        """
+        # Check cache
+        if self._cached_token and not self._needs_refresh(self._cached_token):
+            return self._cached_token
+
+        # Load from store
+        credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+        if credential is None:
+            return None
+
+        token = self._credential_to_token(credential)
+        if token is None:
+            return None
+
+        # Refresh if needed
+        if self._needs_refresh(token):
+            result = self._sync_refresh_token(credential)
+            if result.success and result.token:
+                token = result.token
+            elif result.needs_reauthorization:
+                logger.warning(f"Token for {self.credential_id} needs reauthorization")
+                return None
+            else:
+                if token.is_expired:
+                    return None
+
+        self._cached_token = token
+        self._cache_time = datetime.now(UTC)
+        return token
+
+    def sync_acquire_token_client_credentials(
+        self,
+        scopes: list[str] | None = None,
+    ) -> OAuth2Token:
+        """Synchronous version of acquire_token_client_credentials()."""
+        token = self.provider.client_credentials_grant(scopes=scopes)
+        self._save_token_to_store(token)
+        self._cached_token = token
+        return token
+
+    # --- Helper Methods ---
+
+    def _needs_refresh(self, token: OAuth2Token) -> bool:
+        """Check if token needs refresh."""
+        if token.expires_at is None:
+            return False
+        return datetime.now(UTC) >= (token.expires_at - self.refresh_buffer)
+
+    def _credential_to_token(self, credential: CredentialObject) -> OAuth2Token | None:
+        """Convert credential to OAuth2Token."""
+        access_token = credential.get_key("access_token")
+        if not access_token:
+            return None
+
+        expires_at = None
+        access_key = credential.keys.get("access_token")
+        if access_key:
+            expires_at = access_key.expires_at
+
+        return OAuth2Token(
+            access_token=access_token,
+            token_type="Bearer",
+            expires_at=expires_at,
+            refresh_token=credential.get_key("refresh_token"),
+            scope=credential.get_key("scope"),
+        )
+
+    def _save_token_to_store(self, token: OAuth2Token) -> None:
+        """Save token to credential store."""
+        credential = CredentialObject(
+            id=self.credential_id,
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(
+                    name="access_token",
+                    value=SecretStr(token.access_token),
+                    expires_at=token.expires_at,
+                ),
+            },
+            provider_id=self.provider.provider_id,
+            auto_refresh=True,
+        )
+
+        if token.refresh_token:
+            credential.keys["refresh_token"] = CredentialKey(
+                name="refresh_token",
+                value=SecretStr(token.refresh_token),
+            )
+
+        if token.scope:
+            credential.keys["scope"] = CredentialKey(
+                name="scope",
+                value=SecretStr(token.scope),
+            )
+
+        self.store.save_credential(credential)
+
+    async def _async_refresh_token(self, credential: CredentialObject) -> TokenRefreshResult:
+        """Async wrapper for token refresh."""
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, lambda: self._sync_refresh_token(credential))
+
+    def _sync_refresh_token(self, credential: CredentialObject) -> TokenRefreshResult:
+        """Synchronously refresh token."""
+        refresh_token = credential.get_key("refresh_token")
+        if not refresh_token:
+            return TokenRefreshResult(
+                success=False,
+                error="No refresh token available",
+                needs_reauthorization=True,
+            )
+
+        try:
+            new_token = self.provider.refresh_access_token(refresh_token)
+
+            # Save to store
+            self._save_token_to_store(new_token)
+
+            # Notify callback
+            if self.on_token_refreshed:
+                self.on_token_refreshed(new_token)
+
+            logger.info(f"Token refreshed for {self.credential_id}")
+            return TokenRefreshResult(success=True, token=new_token)
+
+        except Exception as e:
+            error_msg = str(e)
+
+            # Check for refresh token revocation
+            if "invalid_grant" in error_msg.lower():
+                return TokenRefreshResult(
+                    success=False,
+                    error=error_msg,
+                    needs_reauthorization=True,
+                )
+
+            if self.on_refresh_failed:
+                self.on_refresh_failed(error_msg)
+
+            logger.error(f"Token refresh failed for {self.credential_id}: {e}")
+            return TokenRefreshResult(success=False, error=error_msg)
+
+    def invalidate_cache(self) -> None:
+        """Clear cached token."""
+        self._cached_token = None
+        self._cache_time = None
+
+    # --- Convenience Methods ---
+
+    def get_request_headers(self) -> dict[str, str]:
+        """
+        Get headers for HTTP request with current token.
+
+        Returns empty dict if no valid token.
+        """
+        token = self.sync_get_valid_token()
+        if token is None:
+            return {}
+
+        result = self.provider.format_for_request(token)
+        return result.get("headers", {})
+
+    def get_request_kwargs(self) -> dict:
+        """
+        Get kwargs for HTTP request (headers, params, etc.).
+
+        Returns empty dict if no valid token.
+        """
+        token = self.sync_get_valid_token()
+        if token is None:
+            return {}
+
+        return self.provider.format_for_request(token)
@@ -0,0 +1,213 @@
+"""
+OAuth2 types and configuration.
+
+This module defines the core OAuth2 data structures:
+- OAuth2Token: Represents an access token with metadata
+- OAuth2Config: Configuration for OAuth2 endpoints
+- TokenPlacement: Where to place tokens in requests
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import UTC, datetime, timedelta
+from enum import Enum
+from typing import Any
+
+
+class TokenPlacement(str, Enum):
+    """Where to place the access token in HTTP requests."""
+
+    HEADER_BEARER = "header_bearer"
+    """Authorization: Bearer <token> (most common)"""
+
+    HEADER_CUSTOM = "header_custom"
+    """Custom header name (e.g., X-Access-Token)"""
+
+    QUERY_PARAM = "query_param"
+    """Query parameter (e.g., ?access_token=<token>)"""
+
+    BODY_PARAM = "body_param"
+    """Form body parameter"""
+
+
+@dataclass
+class OAuth2Token:
+    """
+    Represents an OAuth2 token with metadata.
+
+    Attributes:
+        access_token: The access token string
+        token_type: Token type (usually "Bearer")
+        expires_at: When the token expires
+        refresh_token: Optional refresh token
+        scope: Granted scopes (space-separated)
+        raw_response: Original token response from server
+    """
+
+    access_token: str
+    token_type: str = "Bearer"
+    expires_at: datetime | None = None
+    refresh_token: str | None = None
+    scope: str | None = None
+    raw_response: dict[str, Any] = field(default_factory=dict)
+
+    @property
+    def is_expired(self) -> bool:
+        """
+        Check if token is expired.
+
+        Uses a 5-minute buffer to account for clock skew and
+        request latency.
+        """
+        if self.expires_at is None:
+            return False
+        buffer = timedelta(minutes=5)
+        return datetime.now(UTC) >= (self.expires_at - buffer)
+
+    @property
+    def can_refresh(self) -> bool:
+        """Check if token can be refreshed (has refresh_token)."""
+        return self.refresh_token is not None and self.refresh_token.strip() != ""
+
+    @property
+    def expires_in_seconds(self) -> int | None:
+        """Get seconds until expiration, or None if no expiration."""
+        if self.expires_at is None:
+            return None
+        delta = self.expires_at - datetime.now(UTC)
+        return max(0, int(delta.total_seconds()))
+
+    @classmethod
+    def from_token_response(cls, data: dict[str, Any]) -> OAuth2Token:
+        """
+        Create OAuth2Token from an OAuth2 token endpoint response.
+
+        Args:
+            data: Token response JSON (access_token, token_type, expires_in, etc.)
+
+        Returns:
+            OAuth2Token instance
+        """
+        expires_at = None
+        if "expires_in" in data:
+            expires_at = datetime.now(UTC) + timedelta(seconds=data["expires_in"])
+
+        return cls(
+            access_token=data["access_token"],
+            token_type=data.get("token_type", "Bearer"),
+            expires_at=expires_at,
+            refresh_token=data.get("refresh_token"),
+            scope=data.get("scope"),
+            raw_response=data,
+        )
+
+
+@dataclass
+class OAuth2Config:
+    """
+    Configuration for an OAuth2 provider.
+
+    This contains all the information needed to perform OAuth2 operations
+    for a specific provider (GitHub, Google, Salesforce, etc.).
+
+    Attributes:
+        token_url: URL for token endpoint (required)
+        authorization_url: URL for authorization endpoint (optional, for auth code flow)
+        revocation_url: URL for token revocation (optional)
+        introspection_url: URL for token introspection (optional)
+        client_id: OAuth2 client ID
+        client_secret: OAuth2 client secret
+        default_scopes: Default scopes to request
+        token_placement: How to include token in requests
+        custom_header_name: Header name when using HEADER_CUSTOM placement
+        query_param_name: Query param name when using QUERY_PARAM placement
+        extra_token_params: Additional parameters for token requests
+        request_timeout: Timeout for HTTP requests in seconds
+
+    Example:
+        config = OAuth2Config(
+            token_url="https://github.com/login/oauth/access_token",
+            authorization_url="https://github.com/login/oauth/authorize",
+            client_id="your-client-id",
+            client_secret="your-client-secret",
+            default_scopes=["repo", "user"],
+        )
+    """
+
+    # Endpoints (only token_url is strictly required)
+    token_url: str
+    authorization_url: str | None = None
+    revocation_url: str | None = None
+    introspection_url: str | None = None
+
+    # Client credentials
+    client_id: str = ""
+    client_secret: str = ""
+
+    # Scopes
+    default_scopes: list[str] = field(default_factory=list)
+
+    # Token placement for API calls (bipartisan model)
+    token_placement: TokenPlacement = TokenPlacement.HEADER_BEARER
+    custom_header_name: str | None = None
+    query_param_name: str = "access_token"
+
+    # Request configuration
+    extra_token_params: dict[str, str] = field(default_factory=dict)
+    request_timeout: float = 30.0
+
+    # Additional headers for token requests
+    extra_headers: dict[str, str] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        """Validate configuration."""
+        if not self.token_url:
+            raise ValueError("token_url is required")
+
+        if self.token_placement == TokenPlacement.HEADER_CUSTOM and not self.custom_header_name:
+            raise ValueError("custom_header_name is required when using HEADER_CUSTOM placement")
+
+
+class OAuth2Error(Exception):
+    """
+    OAuth2 protocol error.
+
+    Attributes:
+        error: OAuth2 error code (e.g., 'invalid_grant', 'invalid_client')
+        description: Human-readable error description
+        status_code: HTTP status code from the response
+    """
+
+    def __init__(
+        self,
+        error: str,
+        description: str = "",
+        status_code: int = 0,
+    ):
+        self.error = error
+        self.description = description
+        self.status_code = status_code
+        super().__init__(f"{error}: {description}" if description else error)
+
+
+class TokenExpiredError(OAuth2Error):
+    """Raised when a token has expired and cannot be used."""
+
+    def __init__(self, credential_id: str):
+        super().__init__(
+            error="token_expired",
+            description=f"Token for '{credential_id}' has expired",
+        )
+        self.credential_id = credential_id
+
+
+class RefreshTokenInvalidError(OAuth2Error):
+    """Raised when the refresh token is invalid or revoked."""
+
+    def __init__(self, credential_id: str, reason: str = ""):
+        description = f"Refresh token for '{credential_id}' is invalid"
+        if reason:
+            description += f": {reason}"
+        super().__init__(error="invalid_grant", description=description)
+        self.credential_id = credential_id
@@ -0,0 +1,283 @@
+"""
+Provider interface for credential lifecycle management.
+
+Providers handle credential lifecycle operations:
+- Refresh: Obtain new tokens when expired
+- Validate: Check if credentials are still working
+- Revoke: Invalidate credentials when no longer needed
+
+OSS users can implement custom providers by subclassing CredentialProvider.
+"""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from datetime import UTC, datetime, timedelta
+
+from .models import CredentialObject, CredentialRefreshError, CredentialType
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialProvider(ABC):
+    """
+    Abstract base class for credential providers.
+
+    Providers handle credential lifecycle operations:
+    - refresh(): Obtain new tokens when expired
+    - validate(): Check if credentials are still working
+    - should_refresh(): Determine if a credential needs refresh
+    - revoke(): Invalidate credentials (optional)
+
+    Example custom provider:
+        class MyCustomProvider(CredentialProvider):
+            @property
+            def provider_id(self) -> str:
+                return "my_custom"
+
+            @property
+            def supported_types(self) -> List[CredentialType]:
+                return [CredentialType.CUSTOM]
+
+            def refresh(self, credential: CredentialObject) -> CredentialObject:
+                # Custom refresh logic
+                new_token = my_api.refresh(credential.get_key("api_key"))
+                credential.set_key("access_token", new_token)
+                return credential
+
+            def validate(self, credential: CredentialObject) -> bool:
+                token = credential.get_key("access_token")
+                return my_api.validate(token)
+    """
+
+    @property
+    @abstractmethod
+    def provider_id(self) -> str:
+        """
+        Unique identifier for this provider.
+
+        Examples: 'static', 'oauth2', 'my_custom_auth'
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def supported_types(self) -> list[CredentialType]:
+        """
+        Credential types this provider can manage.
+
+        Returns:
+            List of CredentialType enums this provider supports
+        """
+        pass
+
+    @abstractmethod
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Refresh the credential (e.g., use refresh_token to get new access_token).
+
+        This method should:
+        1. Use existing credential data to obtain new values
+        2. Update the credential object with new values
+        3. Set appropriate expiration times
+        4. Update last_refreshed timestamp
+
+        Args:
+            credential: The credential to refresh
+
+        Returns:
+            Updated credential with new values
+
+        Raises:
+            CredentialRefreshError: If refresh fails
+        """
+        pass
+
+    @abstractmethod
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate that a credential is still working.
+
+        This might involve:
+        - Checking expiration times
+        - Making a test API call
+        - Validating token signatures
+
+        Args:
+            credential: The credential to validate
+
+        Returns:
+            True if credential is valid, False otherwise
+        """
+        pass
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """
+        Determine if a credential should be refreshed.
+
+        Default implementation: refresh if any key is expired or within
+        5 minutes of expiry. Override for custom logic.
+
+        Args:
+            credential: The credential to check
+
+        Returns:
+            True if credential should be refreshed
+        """
+        buffer = timedelta(minutes=5)
+        now = datetime.now(UTC)
+
+        for key in credential.keys.values():
+            if key.expires_at is not None:
+                if key.expires_at <= now + buffer:
+                    return True
+        return False
+
+    def revoke(self, credential: CredentialObject) -> bool:
+        """
+        Revoke a credential (optional operation).
+
+        Not all providers support revocation. The default implementation
+        logs a warning and returns False.
+
+        Args:
+            credential: The credential to revoke
+
+        Returns:
+            True if revocation succeeded, False otherwise
+        """
+        logger.warning(f"Provider '{self.provider_id}' does not support revocation")
+        return False
+
+    def can_handle(self, credential: CredentialObject) -> bool:
+        """
+        Check if this provider can handle a credential.
+
+        Args:
+            credential: The credential to check
+
+        Returns:
+            True if this provider can manage the credential
+        """
+        return credential.credential_type in self.supported_types
+
+
+class StaticProvider(CredentialProvider):
+    """
+    Provider for static credentials that never need refresh.
+
+    Use for simple API keys that don't expire, such as:
+    - Brave Search API key
+    - OpenAI API key
+    - Basic auth credentials
+
+    Static credentials are always considered valid if they have at least one key.
+    """
+
+    @property
+    def provider_id(self) -> str:
+        return "static"
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.API_KEY, CredentialType.BASIC_AUTH, CredentialType.CUSTOM]
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Static credentials don't need refresh.
+
+        Returns the credential unchanged.
+        """
+        logger.debug(f"Static credential '{credential.id}' does not need refresh")
+        return credential
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate that credential has at least one key with a value.
+
+        For static credentials, we can't verify the key works without
+        making an API call, so we just check existence.
+        """
+        if not credential.keys:
+            return False
+
+        # Check at least one key has a non-empty value
+        for key in credential.keys.values():
+            try:
+                value = key.get_secret_value()
+                if value and value.strip():
+                    return True
+            except Exception:
+                continue
+
+        return False
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """Static credentials never need refresh."""
+        return False
+
+
+class BearerTokenProvider(CredentialProvider):
+    """
+    Provider for bearer tokens without refresh capability.
+
+    Use for JWTs or tokens that:
+    - Have an expiration time
+    - Cannot be refreshed (no refresh token)
+    - Must be re-obtained when expired
+
+    This provider validates based on expiration time only.
+    """
+
+    @property
+    def provider_id(self) -> str:
+        return "bearer_token"
+
+    @property
+    def supported_types(self) -> list[CredentialType]:
+        return [CredentialType.BEARER_TOKEN]
+
+    def refresh(self, credential: CredentialObject) -> CredentialObject:
+        """
+        Bearer tokens without refresh capability cannot be refreshed.
+
+        Raises:
+            CredentialRefreshError: Always, as refresh is not supported
+        """
+        raise CredentialRefreshError(
+            f"Bearer token '{credential.id}' cannot be refreshed. "
+            "Obtain a new token and save it to the credential store."
+        )
+
+    def validate(self, credential: CredentialObject) -> bool:
+        """
+        Validate based on expiration time.
+
+        Returns True if token exists and is not expired.
+        """
+        access_key = credential.keys.get("access_token") or credential.keys.get("token")
+        if access_key is None:
+            return False
+
+        # Check if expired
+        return not access_key.is_expired
+
+    def should_refresh(self, credential: CredentialObject) -> bool:
+        """
+        Check if token is expired or near expiration.
+
+        Note: Even though this returns True for expired tokens,
+        refresh() will fail. This allows the store to know the
+        credential needs attention.
+        """
+        buffer = timedelta(minutes=5)
+        now = datetime.now(UTC)
+
+        for key_name in ["access_token", "token"]:
+            key = credential.keys.get(key_name)
+            if key and key.expires_at:
+                if key.expires_at <= now + buffer:
+                    return True
+
+        return False
@@ -0,0 +1,518 @@
+"""
+Storage backends for the credential store.
+
+This module provides abstract and concrete storage implementations:
+- CredentialStorage: Abstract base class
+- EncryptedFileStorage: Fernet-encrypted JSON files (default for production)
+- EnvVarStorage: Environment variable reading (backward compatibility)
+- InMemoryStorage: For testing
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from abc import ABC, abstractmethod
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+from pydantic import SecretStr
+
+from .models import CredentialDecryptionError, CredentialKey, CredentialObject, CredentialType
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialStorage(ABC):
+    """
+    Abstract storage backend for credentials.
+
+    Implementations must provide save, load, delete, list_all, and exists methods.
+    All implementations should handle serialization of SecretStr values securely.
+    """
+
+    @abstractmethod
+    def save(self, credential: CredentialObject) -> None:
+        """
+        Save a credential to storage.
+
+        Args:
+            credential: The credential object to save
+        """
+        pass
+
+    @abstractmethod
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """
+        Load a credential from storage.
+
+        Args:
+            credential_id: The ID of the credential to load
+
+        Returns:
+            CredentialObject if found, None otherwise
+        """
+        pass
+
+    @abstractmethod
+    def delete(self, credential_id: str) -> bool:
+        """
+        Delete a credential from storage.
+
+        Args:
+            credential_id: The ID of the credential to delete
+
+        Returns:
+            True if the credential existed and was deleted, False otherwise
+        """
+        pass
+
+    @abstractmethod
+    def list_all(self) -> list[str]:
+        """
+        List all credential IDs in storage.
+
+        Returns:
+            List of credential IDs
+        """
+        pass
+
+    @abstractmethod
+    def exists(self, credential_id: str) -> bool:
+        """
+        Check if a credential exists in storage.
+
+        Args:
+            credential_id: The ID to check
+
+        Returns:
+            True if credential exists, False otherwise
+        """
+        pass
+
+
+class EncryptedFileStorage(CredentialStorage):
+    """
+    Encrypted file-based credential storage.
+
+    Uses Fernet symmetric encryption (AES-128-CBC + HMAC) for at-rest encryption.
+    Each credential is stored as a separate encrypted JSON file.
+
+    Directory structure:
+        {base_path}/
+            credentials/
+                {credential_id}.enc   # Encrypted credential JSON
+            metadata/
+                index.json            # Index of all credentials (unencrypted)
+
+    The encryption key is read from the HIVE_CREDENTIAL_KEY environment variable.
+    If not set, a new key is generated (and must be persisted for data recovery).
+
+    Example:
+        storage = EncryptedFileStorage("~/.hive/credentials")
+        storage.save(credential)
+        credential = storage.load("brave_search")
+    """
+
+    DEFAULT_PATH = "~/.hive/credentials"
+
+    def __init__(
+        self,
+        base_path: str | Path | None = None,
+        encryption_key: bytes | None = None,
+        key_env_var: str = "HIVE_CREDENTIAL_KEY",
+    ):
+        """
+        Initialize encrypted storage.
+
+        Args:
+            base_path: Directory for credential files. Defaults to ~/.hive/credentials.
+            encryption_key: 32-byte Fernet key. If None, reads from env var.
+            key_env_var: Environment variable containing encryption key
+        """
+        try:
+            from cryptography.fernet import Fernet
+        except ImportError as e:
+            raise ImportError(
+                "Encrypted storage requires 'cryptography'. Install with: pip install cryptography"
+            ) from e
+
+        self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser()
+        self._ensure_dirs()
+        self._key_env_var = key_env_var
+
+        # Get or generate encryption key
+        if encryption_key:
+            self._key = encryption_key
+        else:
+            key_str = os.environ.get(key_env_var)
+            if key_str:
+                self._key = key_str.encode()
+            else:
+                # Generate new key
+                self._key = Fernet.generate_key()
+                logger.warning(
+                    f"Generated new encryption key. To persist credentials across restarts, "
+                    f"set {key_env_var}={self._key.decode()}"
+                )
+
+        self._fernet = Fernet(self._key)
+
+    def _ensure_dirs(self) -> None:
+        """Create directory structure."""
+        (self.base_path / "credentials").mkdir(parents=True, exist_ok=True)
+        (self.base_path / "metadata").mkdir(parents=True, exist_ok=True)
+
+    def _cred_path(self, credential_id: str) -> Path:
+        """Get the file path for a credential."""
+        # Sanitize credential_id to prevent path traversal
+        safe_id = credential_id.replace("/", "_").replace("\\", "_").replace("..", "_")
+        return self.base_path / "credentials" / f"{safe_id}.enc"
+
+    def save(self, credential: CredentialObject) -> None:
+        """Encrypt and save credential."""
+        # Serialize credential
+        data = self._serialize_credential(credential)
+        json_bytes = json.dumps(data, default=str).encode()
+
+        # Encrypt
+        encrypted = self._fernet.encrypt(json_bytes)
+
+        # Write to file
+        cred_path = self._cred_path(credential.id)
+        with open(cred_path, "wb") as f:
+            f.write(encrypted)
+
+        # Update index
+        self._update_index(credential.id, "save", credential.credential_type.value)
+        logger.debug(f"Saved encrypted credential '{credential.id}'")
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """Load and decrypt credential."""
+        cred_path = self._cred_path(credential_id)
+        if not cred_path.exists():
+            return None
+
+        # Read encrypted data
+        with open(cred_path, "rb") as f:
+            encrypted = f.read()
+
+        # Decrypt
+        try:
+            json_bytes = self._fernet.decrypt(encrypted)
+            data = json.loads(json_bytes.decode())
+        except Exception as e:
+            raise CredentialDecryptionError(
+                f"Failed to decrypt credential '{credential_id}': {e}"
+            ) from e
+
+        # Deserialize
+        return self._deserialize_credential(data)
+
+    def delete(self, credential_id: str) -> bool:
+        """Delete a credential file."""
+        cred_path = self._cred_path(credential_id)
+        if cred_path.exists():
+            cred_path.unlink()
+            self._update_index(credential_id, "delete")
+            logger.debug(f"Deleted credential '{credential_id}'")
+            return True
+        return False
+
+    def list_all(self) -> list[str]:
+        """List all credential IDs."""
+        index_path = self.base_path / "metadata" / "index.json"
+        if not index_path.exists():
+            return []
+        with open(index_path) as f:
+            index = json.load(f)
+        return list(index.get("credentials", {}).keys())
+
+    def exists(self, credential_id: str) -> bool:
+        """Check if credential exists."""
+        return self._cred_path(credential_id).exists()
+
+    def _serialize_credential(self, credential: CredentialObject) -> dict[str, Any]:
+        """Convert credential to JSON-serializable dict, extracting secret values."""
+        data = credential.model_dump(mode="json")
+
+        # Extract actual secret values from SecretStr
+        for key_name, key_data in data.get("keys", {}).items():
+            if "value" in key_data:
+                # SecretStr serializes as "**********", need actual value
+                actual_key = credential.keys.get(key_name)
+                if actual_key:
+                    key_data["value"] = actual_key.get_secret_value()
+
+        return data
+
+    def _deserialize_credential(self, data: dict[str, Any]) -> CredentialObject:
+        """Reconstruct credential from dict, wrapping values in SecretStr."""
+        # Convert plain values back to SecretStr
+        for key_data in data.get("keys", {}).values():
+            if "value" in key_data and isinstance(key_data["value"], str):
+                key_data["value"] = SecretStr(key_data["value"])
+
+        return CredentialObject.model_validate(data)
+
+    def _update_index(
+        self,
+        credential_id: str,
+        operation: str,
+        credential_type: str | None = None,
+    ) -> None:
+        """Update the metadata index."""
+        index_path = self.base_path / "metadata" / "index.json"
+
+        if index_path.exists():
+            with open(index_path) as f:
+                index = json.load(f)
+        else:
+            index = {"credentials": {}, "version": "1.0"}
+
+        if operation == "save":
+            index["credentials"][credential_id] = {
+                "updated_at": datetime.now(UTC).isoformat(),
+                "type": credential_type,
+            }
+        elif operation == "delete":
+            index["credentials"].pop(credential_id, None)
+
+        index["last_modified"] = datetime.now(UTC).isoformat()
+
+        with open(index_path, "w") as f:
+            json.dump(index, f, indent=2)
+
+
+class EnvVarStorage(CredentialStorage):
+    """
+    Environment variable-based storage for backward compatibility.
+
+    Maps credential IDs to environment variable patterns.
+    Supports hot-reload from .env files using python-dotenv.
+
+    This storage is READ-ONLY - credentials cannot be saved at runtime.
+
+    Example:
+        storage = EnvVarStorage(
+            env_mapping={"brave_search": "BRAVE_SEARCH_API_KEY"},
+            dotenv_path=Path(".env")
+        )
+        credential = storage.load("brave_search")
+    """
+
+    def __init__(
+        self,
+        env_mapping: dict[str, str] | None = None,
+        dotenv_path: Path | None = None,
+    ):
+        """
+        Initialize env var storage.
+
+        Args:
+            env_mapping: Map of credential_id -> env_var_name
+                        e.g., {"brave_search": "BRAVE_SEARCH_API_KEY"}
+                        If not provided, uses {CREDENTIAL_ID}_API_KEY pattern
+            dotenv_path: Path to .env file for hot-reload support
+        """
+        self._env_mapping = env_mapping or {}
+        self._dotenv_path = dotenv_path or Path.cwd() / ".env"
+
+    def _get_env_var_name(self, credential_id: str) -> str:
+        """Get the environment variable name for a credential."""
+        if credential_id in self._env_mapping:
+            return self._env_mapping[credential_id]
+        # Default pattern: CREDENTIAL_ID_API_KEY
+        return f"{credential_id.upper().replace('-', '_')}_API_KEY"
+
+    def _read_env_value(self, env_var: str) -> str | None:
+        """Read value from env var or .env file."""
+        # Check os.environ first (takes precedence)
+        value = os.environ.get(env_var)
+        if value:
+            return value
+
+        # Fallback: read from .env file (hot-reload)
+        if self._dotenv_path.exists():
+            try:
+                from dotenv import dotenv_values
+
+                values = dotenv_values(self._dotenv_path)
+                return values.get(env_var)
+            except ImportError:
+                logger.debug("python-dotenv not installed, skipping .env file")
+                return None
+
+        return None
+
+    def save(self, credential: CredentialObject) -> None:
+        """Cannot save to environment variables at runtime."""
+        raise NotImplementedError(
+            "EnvVarStorage is read-only. Set environment variables "
+            "externally or use EncryptedFileStorage."
+        )
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """Load credential from environment variable."""
+        env_var = self._get_env_var_name(credential_id)
+        value = self._read_env_value(env_var)
+
+        if not value:
+            return None
+
+        return CredentialObject(
+            id=credential_id,
+            credential_type=CredentialType.API_KEY,
+            keys={"api_key": CredentialKey(name="api_key", value=SecretStr(value))},
+            description=f"Loaded from {env_var}",
+        )
+
+    def delete(self, credential_id: str) -> bool:
+        """Cannot delete environment variables at runtime."""
+        raise NotImplementedError(
+            "EnvVarStorage is read-only. Unset environment variables externally."
+        )
+
+    def list_all(self) -> list[str]:
+        """List credentials that are available in environment."""
+        available = []
+
+        # Check mapped credentials
+        for cred_id in self._env_mapping.keys():
+            if self.exists(cred_id):
+                available.append(cred_id)
+
+        return available
+
+    def exists(self, credential_id: str) -> bool:
+        """Check if credential is available in environment."""
+        env_var = self._get_env_var_name(credential_id)
+        return self._read_env_value(env_var) is not None
+
+    def add_mapping(self, credential_id: str, env_var: str) -> None:
+        """
+        Add a credential ID to environment variable mapping.
+
+        Args:
+            credential_id: The credential identifier
+            env_var: The environment variable name
+        """
+        self._env_mapping[credential_id] = env_var
+
+
+class InMemoryStorage(CredentialStorage):
+    """
+    In-memory storage for testing.
+
+    Credentials are stored in a dictionary and lost when the process exits.
+
+    Example:
+        storage = InMemoryStorage()
+        storage.save(credential)
+        credential = storage.load("test_cred")
+    """
+
+    def __init__(self, initial_data: dict[str, CredentialObject] | None = None):
+        """
+        Initialize in-memory storage.
+
+        Args:
+            initial_data: Optional dict of credential_id -> CredentialObject
+        """
+        self._data: dict[str, CredentialObject] = initial_data or {}
+
+    def save(self, credential: CredentialObject) -> None:
+        """Save credential to memory."""
+        self._data[credential.id] = credential
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """Load credential from memory."""
+        return self._data.get(credential_id)
+
+    def delete(self, credential_id: str) -> bool:
+        """Delete credential from memory."""
+        if credential_id in self._data:
+            del self._data[credential_id]
+            return True
+        return False
+
+    def list_all(self) -> list[str]:
+        """List all credential IDs."""
+        return list(self._data.keys())
+
+    def exists(self, credential_id: str) -> bool:
+        """Check if credential exists."""
+        return credential_id in self._data
+
+    def clear(self) -> None:
+        """Clear all credentials."""
+        self._data.clear()
+
+
+class CompositeStorage(CredentialStorage):
+    """
+    Composite storage that reads from multiple backends.
+
+    Useful for layering storages, e.g., encrypted file with env var fallback:
+    - Writes go to the primary storage
+    - Reads check primary first, then fallback storages
+
+    Example:
+        storage = CompositeStorage(
+            primary=EncryptedFileStorage("~/.hive/credentials"),
+            fallbacks=[EnvVarStorage({"brave_search": "BRAVE_SEARCH_API_KEY"})]
+        )
+    """
+
+    def __init__(
+        self,
+        primary: CredentialStorage,
+        fallbacks: list[CredentialStorage] | None = None,
+    ):
+        """
+        Initialize composite storage.
+
+        Args:
+            primary: Primary storage for writes and first read attempt
+            fallbacks: List of fallback storages to check if primary doesn't have credential
+        """
+        self._primary = primary
+        self._fallbacks = fallbacks or []
+
+    def save(self, credential: CredentialObject) -> None:
+        """Save to primary storage."""
+        self._primary.save(credential)
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """Load from primary, then fallbacks."""
+        # Try primary first
+        credential = self._primary.load(credential_id)
+        if credential is not None:
+            return credential
+
+        # Try fallbacks
+        for fallback in self._fallbacks:
+            credential = fallback.load(credential_id)
+            if credential is not None:
+                return credential
+
+        return None
+
+    def delete(self, credential_id: str) -> bool:
+        """Delete from primary storage only."""
+        return self._primary.delete(credential_id)
+
+    def list_all(self) -> list[str]:
+        """List credentials from all storages."""
+        all_ids = set(self._primary.list_all())
+        for fallback in self._fallbacks:
+            all_ids.update(fallback.list_all())
+        return list(all_ids)
+
+    def exists(self, credential_id: str) -> bool:
+        """Check if credential exists in any storage."""
+        if self._primary.exists(credential_id):
+            return True
+        return any(fallback.exists(credential_id) for fallback in self._fallbacks)
@@ -0,0 +1,708 @@
+"""
+Main credential store orchestrating storage, providers, and template resolution.
+
+The CredentialStore is the primary interface for credential management, providing:
+- Multi-backend storage (file, env, vault)
+- Provider-based lifecycle management (refresh, validate)
+- Template resolution for {{cred.key}} patterns
+- Caching with TTL for performance
+- Thread-safe operations
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from datetime import UTC, datetime
+from typing import Any
+
+from pydantic import SecretStr
+
+from .models import (
+    CredentialKey,
+    CredentialObject,
+    CredentialRefreshError,
+    CredentialUsageSpec,
+)
+from .provider import CredentialProvider, StaticProvider
+from .storage import CredentialStorage, EnvVarStorage, InMemoryStorage
+from .template import TemplateResolver
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialStore:
+    """
+    Main credential store orchestrating storage, providers, and template resolution.
+
+    Features:
+    - Multi-backend storage (file, env, vault)
+    - Provider-based lifecycle management (refresh, validate)
+    - Template resolution for {{cred.key}} patterns
+    - Caching with TTL for performance
+    - Thread-safe operations
+
+    Usage:
+        # Basic usage
+        store = CredentialStore(
+            storage=EncryptedFileStorage("~/.hive/credentials"),
+            providers=[OAuth2Provider(), StaticProvider()]
+        )
+
+        # Get a credential
+        cred = store.get_credential("github_oauth")
+
+        # Resolve templates in headers
+        headers = store.resolve_headers({
+            "Authorization": "Bearer {{github_oauth.access_token}}"
+        })
+
+        # Register a tool's credential requirements
+        store.register_usage(CredentialUsageSpec(
+            credential_id="brave_search",
+            required_keys=["api_key"],
+            headers={"X-Subscription-Token": "{{brave_search.api_key}}"}
+        ))
+    """
+
+    def __init__(
+        self,
+        storage: CredentialStorage | None = None,
+        providers: list[CredentialProvider] | None = None,
+        cache_ttl_seconds: int = 300,
+        auto_refresh: bool = True,
+    ):
+        """
+        Initialize the credential store.
+
+        Args:
+            storage: Storage backend. Defaults to EnvVarStorage for compatibility.
+            providers: List of credential providers. Defaults to [StaticProvider()].
+            cache_ttl_seconds: How long to cache credentials in memory (default: 5 minutes).
+            auto_refresh: Whether to auto-refresh expired credentials on access.
+        """
+        self._storage = storage or EnvVarStorage()
+        self._providers: dict[str, CredentialProvider] = {}
+        self._usage_specs: dict[str, CredentialUsageSpec] = {}
+
+        # Cache: credential_id -> (CredentialObject, cached_at)
+        self._cache: dict[str, tuple[CredentialObject, datetime]] = {}
+        self._cache_ttl = cache_ttl_seconds
+        self._lock = threading.RLock()
+
+        self._auto_refresh = auto_refresh
+
+        # Register providers
+        for provider in providers or [StaticProvider()]:
+            self.register_provider(provider)
+
+        # Template resolver
+        self._resolver = TemplateResolver(self)
+
+    # --- Provider Management ---
+
+    def register_provider(self, provider: CredentialProvider) -> None:
+        """
+        Register a credential provider.
+
+        Args:
+            provider: The provider to register
+        """
+        self._providers[provider.provider_id] = provider
+        logger.debug(f"Registered credential provider: {provider.provider_id}")
+
+    def get_provider(self, provider_id: str) -> CredentialProvider | None:
+        """
+        Get a provider by ID.
+
+        Args:
+            provider_id: The provider identifier
+
+        Returns:
+            The provider if found, None otherwise
+        """
+        return self._providers.get(provider_id)
+
+    def get_provider_for_credential(
+        self, credential: CredentialObject
+    ) -> CredentialProvider | None:
+        """
+        Get the appropriate provider for a credential.
+
+        Args:
+            credential: The credential to find a provider for
+
+        Returns:
+            The provider if found, None otherwise
+        """
+        # First, check if credential specifies a provider
+        if credential.provider_id:
+            provider = self._providers.get(credential.provider_id)
+            if provider:
+                return provider
+
+        # Fall back to finding a provider that supports this type
+        for provider in self._providers.values():
+            if provider.can_handle(credential):
+                return provider
+
+        return None
+
+    # --- Usage Spec Management ---
+
+    def register_usage(self, spec: CredentialUsageSpec) -> None:
+        """
+        Register how a tool uses credentials.
+
+        Args:
+            spec: The usage specification
+        """
+        self._usage_specs[spec.credential_id] = spec
+
+    def get_usage_spec(self, credential_id: str) -> CredentialUsageSpec | None:
+        """
+        Get the usage spec for a credential.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            The usage spec if registered, None otherwise
+        """
+        return self._usage_specs.get(credential_id)
+
+    # --- Credential Access ---
+
+    def get_credential(
+        self,
+        credential_id: str,
+        refresh_if_needed: bool = True,
+    ) -> CredentialObject | None:
+        """
+        Get a credential by ID.
+
+        Args:
+            credential_id: The credential identifier
+            refresh_if_needed: If True, refresh expired credentials
+
+        Returns:
+            CredentialObject or None if not found
+        """
+        with self._lock:
+            # Check cache
+            cached = self._get_from_cache(credential_id)
+            if cached is not None:
+                if refresh_if_needed and self._should_refresh(cached):
+                    return self._refresh_credential(cached)
+                return cached
+
+            # Load from storage
+            credential = self._storage.load(credential_id)
+            if credential is None:
+                return None
+
+            # Refresh if needed
+            if refresh_if_needed and self._should_refresh(credential):
+                credential = self._refresh_credential(credential)
+
+            # Cache
+            self._add_to_cache(credential)
+
+            return credential
+
+    def get_key(self, credential_id: str, key_name: str) -> str | None:
+        """
+        Convenience method to get a specific key value.
+
+        Args:
+            credential_id: The credential identifier
+            key_name: The key within the credential
+
+        Returns:
+            The key value or None if not found
+        """
+        credential = self.get_credential(credential_id)
+        if credential is None:
+            return None
+        return credential.get_key(key_name)
+
+    def get(self, credential_id: str) -> str | None:
+        """
+        Legacy compatibility: get the primary key value.
+
+        For single-key credentials, returns that key.
+        For multi-key, returns 'value', 'api_key', or 'access_token'.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            The primary key value or None
+        """
+        credential = self.get_credential(credential_id)
+        if credential is None:
+            return None
+        return credential.get_default_key()
+
+    # --- Template Resolution ---
+
+    def resolve(self, template: str) -> str:
+        """
+        Resolve credential templates in a string.
+
+        Args:
+            template: String containing {{cred.key}} patterns
+
+        Returns:
+            Template with all references resolved
+
+        Example:
+            >>> store.resolve("Bearer {{github.access_token}}")
+            "Bearer ghp_xxxxxxxxxxxx"
+        """
+        return self._resolver.resolve(template)
+
+    def resolve_headers(self, headers: dict[str, str]) -> dict[str, str]:
+        """
+        Resolve credential templates in headers dictionary.
+
+        Args:
+            headers: Dict of header name to template value
+
+        Returns:
+            Dict with all templates resolved
+
+        Example:
+            >>> store.resolve_headers({
+            ...     "Authorization": "Bearer {{github.access_token}}"
+            ... })
+            {"Authorization": "Bearer ghp_xxx"}
+        """
+        return self._resolver.resolve_headers(headers)
+
+    def resolve_params(self, params: dict[str, str]) -> dict[str, str]:
+        """
+        Resolve credential templates in query parameters dictionary.
+
+        Args:
+            params: Dict of param name to template value
+
+        Returns:
+            Dict with all templates resolved
+        """
+        return self._resolver.resolve_params(params)
+
+    def resolve_for_usage(self, credential_id: str) -> dict[str, Any]:
+        """
+        Get resolved request kwargs for a registered usage spec.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            Dict with 'headers', 'params', etc. keys as appropriate
+
+        Raises:
+            ValueError: If no usage spec is registered for the credential
+        """
+        spec = self._usage_specs.get(credential_id)
+        if spec is None:
+            raise ValueError(f"No usage spec registered for '{credential_id}'")
+
+        result: dict[str, Any] = {}
+
+        if spec.headers:
+            result["headers"] = self.resolve_headers(spec.headers)
+
+        if spec.query_params:
+            result["params"] = self.resolve_params(spec.query_params)
+
+        if spec.body_fields:
+            result["data"] = {key: self.resolve(value) for key, value in spec.body_fields.items()}
+
+        return result
+
+    # --- Credential Management ---
+
+    def save_credential(self, credential: CredentialObject) -> None:
+        """
+        Save a credential to storage.
+
+        Args:
+            credential: The credential to save
+        """
+        with self._lock:
+            self._storage.save(credential)
+            self._add_to_cache(credential)
+            logger.info(f"Saved credential '{credential.id}'")
+
+    def delete_credential(self, credential_id: str) -> bool:
+        """
+        Delete a credential from storage.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            True if the credential existed and was deleted
+        """
+        with self._lock:
+            self._remove_from_cache(credential_id)
+            result = self._storage.delete(credential_id)
+            if result:
+                logger.info(f"Deleted credential '{credential_id}'")
+            return result
+
+    def list_credentials(self) -> list[str]:
+        """
+        List all available credential IDs.
+
+        Returns:
+            List of credential IDs
+        """
+        return self._storage.list_all()
+
+    def is_available(self, credential_id: str) -> bool:
+        """
+        Check if a credential is available.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            True if credential exists and is accessible
+        """
+        return self.get_credential(credential_id, refresh_if_needed=False) is not None
+
+    # --- Validation ---
+
+    def validate_for_usage(self, credential_id: str) -> list[str]:
+        """
+        Validate that a credential meets its usage spec requirements.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            List of missing keys or errors. Empty list if valid.
+        """
+        spec = self._usage_specs.get(credential_id)
+        if spec is None:
+            return []  # No requirements registered
+
+        credential = self.get_credential(credential_id)
+        if credential is None:
+            return [f"Credential '{credential_id}' not found"]
+
+        errors = []
+        for key_name in spec.required_keys:
+            if not credential.has_key(key_name):
+                errors.append(f"Missing required key '{key_name}'")
+
+        return errors
+
+    def validate_all(self) -> dict[str, list[str]]:
+        """
+        Validate all registered usage specs.
+
+        Returns:
+            Dict mapping credential_id to list of errors.
+            Only includes credentials with errors.
+        """
+        errors = {}
+        for cred_id in self._usage_specs.keys():
+            cred_errors = self.validate_for_usage(cred_id)
+            if cred_errors:
+                errors[cred_id] = cred_errors
+        return errors
+
+    def validate_credential(self, credential_id: str) -> bool:
+        """
+        Validate a credential using its provider.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            True if credential is valid
+        """
+        credential = self.get_credential(credential_id, refresh_if_needed=False)
+        if credential is None:
+            return False
+
+        provider = self.get_provider_for_credential(credential)
+        if provider is None:
+            # No provider, assume valid if has keys
+            return bool(credential.keys)
+
+        return provider.validate(credential)
+
+    # --- Lifecycle Management ---
+
+    def _should_refresh(self, credential: CredentialObject) -> bool:
+        """Check if credential should be refreshed."""
+        if not self._auto_refresh:
+            return False
+
+        if not credential.auto_refresh:
+            return False
+
+        provider = self.get_provider_for_credential(credential)
+        if provider is None:
+            return False
+
+        return provider.should_refresh(credential)
+
+    def _refresh_credential(self, credential: CredentialObject) -> CredentialObject:
+        """Refresh a credential using its provider."""
+        provider = self.get_provider_for_credential(credential)
+        if provider is None:
+            logger.warning(f"No provider found for credential '{credential.id}'")
+            return credential
+
+        try:
+            refreshed = provider.refresh(credential)
+            refreshed.last_refreshed = datetime.now(UTC)
+
+            # Persist the refreshed credential
+            self._storage.save(refreshed)
+            self._add_to_cache(refreshed)
+
+            logger.info(f"Refreshed credential '{credential.id}'")
+            return refreshed
+
+        except CredentialRefreshError as e:
+            logger.error(f"Failed to refresh credential '{credential.id}': {e}")
+            return credential
+
+    def refresh_credential(self, credential_id: str) -> CredentialObject | None:
+        """
+        Manually refresh a credential.
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            The refreshed credential, or None if not found
+
+        Raises:
+            CredentialRefreshError: If refresh fails
+        """
+        credential = self.get_credential(credential_id, refresh_if_needed=False)
+        if credential is None:
+            return None
+
+        return self._refresh_credential(credential)
+
+    # --- Caching ---
+
+    def _get_from_cache(self, credential_id: str) -> CredentialObject | None:
+        """Get credential from cache if not expired."""
+        if credential_id not in self._cache:
+            return None
+
+        credential, cached_at = self._cache[credential_id]
+        age = (datetime.now(UTC) - cached_at).total_seconds()
+
+        if age > self._cache_ttl:
+            del self._cache[credential_id]
+            return None
+
+        return credential
+
+    def _add_to_cache(self, credential: CredentialObject) -> None:
+        """Add credential to cache."""
+        self._cache[credential.id] = (credential, datetime.now(UTC))
+
+    def _remove_from_cache(self, credential_id: str) -> None:
+        """Remove credential from cache."""
+        self._cache.pop(credential_id, None)
+
+    def clear_cache(self) -> None:
+        """Clear the credential cache."""
+        with self._lock:
+            self._cache.clear()
+
+    # --- Factory Methods ---
+
+    @classmethod
+    def for_testing(
+        cls,
+        credentials: dict[str, dict[str, str]],
+    ) -> CredentialStore:
+        """
+        Create a credential store for testing with mock credentials.
+
+        Args:
+            credentials: Dict mapping credential_id to {key_name: value}
+                        e.g., {"brave_search": {"api_key": "test-key"}}
+
+        Returns:
+            CredentialStore with in-memory credentials
+
+        Example:
+            store = CredentialStore.for_testing({
+                "brave_search": {"api_key": "test-brave-key"},
+                "github_oauth": {
+                    "access_token": "test-token",
+                    "refresh_token": "test-refresh"
+                }
+            })
+        """
+        # Convert test data to CredentialObjects
+        cred_objects: dict[str, CredentialObject] = {}
+
+        for cred_id, keys in credentials.items():
+            cred_objects[cred_id] = CredentialObject(
+                id=cred_id,
+                keys={k: CredentialKey(name=k, value=SecretStr(v)) for k, v in keys.items()},
+            )
+
+        return cls(
+            storage=InMemoryStorage(cred_objects),
+            auto_refresh=False,
+        )
+
+    @classmethod
+    def with_encrypted_storage(
+        cls,
+        base_path: str | None = None,
+        providers: list[CredentialProvider] | None = None,
+        **kwargs: Any,
+    ) -> CredentialStore:
+        """
+        Create a credential store with encrypted file storage.
+
+        Args:
+            base_path: Directory for credential files. Defaults to ~/.hive/credentials.
+            providers: List of credential providers
+            **kwargs: Additional arguments passed to CredentialStore
+
+        Returns:
+            CredentialStore with EncryptedFileStorage
+        """
+        from .storage import EncryptedFileStorage
+
+        return cls(
+            storage=EncryptedFileStorage(base_path),
+            providers=providers,
+            **kwargs,
+        )
+
+    @classmethod
+    def with_env_storage(
+        cls,
+        env_mapping: dict[str, str] | None = None,
+        providers: list[CredentialProvider] | None = None,
+        **kwargs: Any,
+    ) -> CredentialStore:
+        """
+        Create a credential store with environment variable storage.
+
+        Args:
+            env_mapping: Map of credential_id -> env_var_name
+            providers: List of credential providers
+            **kwargs: Additional arguments passed to CredentialStore
+
+        Returns:
+            CredentialStore with EnvVarStorage
+        """
+        return cls(
+            storage=EnvVarStorage(env_mapping),
+            providers=providers,
+            **kwargs,
+        )
+
+    @classmethod
+    def with_aden_sync(
+        cls,
+        base_url: str = "https://api.adenhq.com",
+        cache_ttl_seconds: int = 300,
+        local_path: str | None = None,
+        auto_sync: bool = True,
+        **kwargs: Any,
+    ) -> CredentialStore:
+        """
+        Create a credential store with Aden server sync.
+
+        Automatically syncs OAuth2 tokens from the Aden authentication server.
+        Falls back to local-only storage if ADEN_API_KEY is not set or Aden
+        is unreachable.
+
+        Args:
+            base_url: Aden server URL (default: https://api.adenhq.com)
+            cache_ttl_seconds: How long to cache credentials locally (default: 5 min)
+            local_path: Path for local credential storage (default: ~/.hive/credentials)
+            auto_sync: Whether to sync all credentials on startup (default: True)
+            **kwargs: Additional arguments passed to CredentialStore
+
+        Returns:
+            CredentialStore configured with Aden sync
+
+        Example:
+            # Simple usage - just set ADEN_API_KEY env var
+            store = CredentialStore.with_aden_sync()
+
+            # Get HubSpot token (auto-refreshed via Aden)
+            token = store.get_key("hubspot", "access_token")
+        """
+        import os
+        from pathlib import Path
+
+        from .storage import EncryptedFileStorage
+
+        # Determine local storage path
+        if local_path is None:
+            local_path = str(Path.home() / ".hive" / "credentials")
+
+        local_storage = EncryptedFileStorage(base_path=local_path)
+
+        # Check if Aden is configured
+        api_key = os.environ.get("ADEN_API_KEY")
+        if not api_key:
+            logger.info("ADEN_API_KEY not set, using local-only credential storage")
+            return cls(storage=local_storage, **kwargs)
+
+        # Try to setup Aden sync
+        try:
+            from .aden import (
+                AdenCachedStorage,
+                AdenClientConfig,
+                AdenCredentialClient,
+                AdenSyncProvider,
+            )
+
+            # Create Aden client
+            client = AdenCredentialClient(AdenClientConfig(base_url=base_url))
+
+            # Create sync provider
+            provider = AdenSyncProvider(client=client)
+
+            # Use cached storage for offline resilience
+            cached_storage = AdenCachedStorage(
+                local_storage=local_storage,
+                aden_provider=provider,
+                cache_ttl_seconds=cache_ttl_seconds,
+            )
+
+            store = cls(
+                storage=cached_storage,
+                providers=[provider],
+                auto_refresh=True,
+                **kwargs,
+            )
+
+            # Initial sync
+            if auto_sync:
+                synced = provider.sync_all(store)
+                logger.info(f"Synced {synced} credentials from Aden server")
+
+            return store
+
+        except ImportError:
+            logger.warning("Aden components not available, using local storage")
+            return cls(storage=local_storage, **kwargs)
+
+        except Exception as e:
+            logger.warning(f"Failed to setup Aden sync: {e}. Using local storage.")
+            return cls(storage=local_storage, **kwargs)
@@ -0,0 +1,219 @@
+"""
+Template resolution system for credential injection.
+
+This module handles {{cred.key}} patterns, enabling the bipartisan model
+where tools specify how credentials are used in HTTP requests.
+
+Template Syntax:
+    {{credential_id.key_name}} - Access specific key
+    {{credential_id}}          - Access default key (value, api_key, or access_token)
+
+Examples:
+    "Bearer {{github_oauth.access_token}}" -> "Bearer ghp_xxx"
+    "X-API-Key: {{brave_search.api_key}}"  -> "X-API-Key: BSAKxxx"
+    "{{brave_search}}"                      -> "BSAKxxx" (uses default key)
+"""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from .models import CredentialKeyNotFoundError, CredentialNotFoundError
+
+if TYPE_CHECKING:
+    from .store import CredentialStore
+
+
+class TemplateResolver:
+    """
+    Resolves credential templates like {{cred.key}} into actual values.
+
+    Usage:
+        resolver = TemplateResolver(credential_store)
+
+        # Resolve single template string
+        auth_header = resolver.resolve("Bearer {{github_oauth.access_token}}")
+
+        # Resolve all headers at once
+        headers = resolver.resolve_headers({
+            "Authorization": "Bearer {{github_oauth.access_token}}",
+            "X-API-Key": "{{brave_search.api_key}}"
+        })
+    """
+
+    # Matches {{credential_id}} or {{credential_id.key_name}}
+    TEMPLATE_PATTERN = re.compile(r"\{\{([a-zA-Z0-9_-]+)(?:\.([a-zA-Z0-9_-]+))?\}\}")
+
+    def __init__(self, credential_store: CredentialStore):
+        """
+        Initialize the template resolver.
+
+        Args:
+            credential_store: The credential store to resolve references against
+        """
+        self._store = credential_store
+
+    def resolve(self, template: str, fail_on_missing: bool = True) -> str:
+        """
+        Resolve all credential references in a template string.
+
+        Args:
+            template: String containing {{cred.key}} patterns
+            fail_on_missing: If True, raise error on missing credentials
+
+        Returns:
+            Template with all references replaced with actual values
+
+        Raises:
+            CredentialNotFoundError: If credential doesn't exist and fail_on_missing=True
+            CredentialKeyNotFoundError: If key doesn't exist in credential
+
+        Example:
+            >>> resolver.resolve("Bearer {{github_oauth.access_token}}")
+            "Bearer ghp_xxxxxxxxxxxx"
+        """
+
+        def replace_match(match: re.Match) -> str:
+            cred_id = match.group(1)
+            key_name = match.group(2)  # May be None
+
+            credential = self._store.get_credential(cred_id, refresh_if_needed=True)
+            if credential is None:
+                if fail_on_missing:
+                    raise CredentialNotFoundError(f"Credential '{cred_id}' not found")
+                return match.group(0)  # Return original template
+
+            # Get specific key or default
+            if key_name:
+                value = credential.get_key(key_name)
+                if value is None:
+                    raise CredentialKeyNotFoundError(
+                        f"Key '{key_name}' not found in credential '{cred_id}'"
+                    )
+            else:
+                # Use default key
+                value = credential.get_default_key()
+                if value is None:
+                    raise CredentialKeyNotFoundError(f"Credential '{cred_id}' has no keys")
+
+            # Record usage
+            credential.record_usage()
+
+            return value
+
+        return self.TEMPLATE_PATTERN.sub(replace_match, template)
+
+    def resolve_headers(
+        self,
+        header_templates: dict[str, str],
+        fail_on_missing: bool = True,
+    ) -> dict[str, str]:
+        """
+        Resolve templates in a headers dictionary.
+
+        Args:
+            header_templates: Dict of header name to template value
+            fail_on_missing: If True, raise error on missing credentials
+
+        Returns:
+            Dict with all templates resolved to actual values
+
+        Example:
+            >>> resolver.resolve_headers({
+            ...     "Authorization": "Bearer {{github_oauth.access_token}}",
+            ...     "X-API-Key": "{{brave_search.api_key}}"
+            ... })
+            {"Authorization": "Bearer ghp_xxx", "X-API-Key": "BSAKxxx"}
+        """
+        return {
+            key: self.resolve(value, fail_on_missing) for key, value in header_templates.items()
+        }
+
+    def resolve_params(
+        self,
+        param_templates: dict[str, str],
+        fail_on_missing: bool = True,
+    ) -> dict[str, str]:
+        """
+        Resolve templates in a query parameters dictionary.
+
+        Args:
+            param_templates: Dict of param name to template value
+            fail_on_missing: If True, raise error on missing credentials
+
+        Returns:
+            Dict with all templates resolved to actual values
+        """
+        return {key: self.resolve(value, fail_on_missing) for key, value in param_templates.items()}
+
+    def has_templates(self, text: str) -> bool:
+        """
+        Check if text contains any credential templates.
+
+        Args:
+            text: String to check
+
+        Returns:
+            True if text contains {{...}} patterns
+        """
+        return bool(self.TEMPLATE_PATTERN.search(text))
+
+    def extract_references(self, text: str) -> list[tuple[str, str | None]]:
+        """
+        Extract all credential references from text.
+
+        Args:
+            text: String to extract references from
+
+        Returns:
+            List of (credential_id, key_name) tuples.
+            key_name is None if only credential_id was specified.
+
+        Example:
+            >>> resolver.extract_references("{{github.token}} and {{brave_search.api_key}}")
+            [("github", "token"), ("brave_search", "api_key")]
+        """
+        return [(match.group(1), match.group(2)) for match in self.TEMPLATE_PATTERN.finditer(text)]
+
+    def validate_references(self, text: str) -> list[str]:
+        """
+        Validate all credential references in text without resolving.
+
+        Args:
+            text: String containing template references
+
+        Returns:
+            List of error messages for invalid references.
+            Empty list if all references are valid.
+        """
+        errors = []
+        references = self.extract_references(text)
+
+        for cred_id, key_name in references:
+            credential = self._store.get_credential(cred_id, refresh_if_needed=False)
+
+            if credential is None:
+                errors.append(f"Credential '{cred_id}' not found")
+                continue
+
+            if key_name:
+                if not credential.has_key(key_name):
+                    errors.append(f"Key '{key_name}' not found in credential '{cred_id}'")
+            elif not credential.keys:
+                errors.append(f"Credential '{cred_id}' has no keys")
+
+        return errors
+
+    def get_required_credentials(self, text: str) -> list[str]:
+        """
+        Get list of credential IDs required by a template string.
+
+        Args:
+            text: String containing template references
+
+        Returns:
+            List of unique credential IDs referenced in the text
+        """
+        references = self.extract_references(text)
+        return list(dict.fromkeys(cred_id for cred_id, _ in references))
@@ -0,0 +1 @@
+"""Tests for the credential store module."""
@@ -0,0 +1,707 @@
+"""
+Comprehensive tests for the credential store module.
+
+Tests cover:
+- Core models (CredentialObject, CredentialKey, CredentialUsageSpec)
+- Template resolution
+- Storage backends (InMemoryStorage, EnvVarStorage, EncryptedFileStorage)
+- Providers (StaticProvider, BearerTokenProvider)
+- Main CredentialStore
+- OAuth2 module
+"""
+
+import os
+import tempfile
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from core.framework.credentials import (
+    CompositeStorage,
+    CredentialKey,
+    CredentialKeyNotFoundError,
+    CredentialNotFoundError,
+    CredentialObject,
+    CredentialStore,
+    CredentialType,
+    CredentialUsageSpec,
+    EncryptedFileStorage,
+    EnvVarStorage,
+    InMemoryStorage,
+    StaticProvider,
+    TemplateResolver,
+)
+from pydantic import SecretStr
+
+
+class TestCredentialKey:
+    """Tests for CredentialKey model."""
+
+    def test_create_basic_key(self):
+        """Test creating a basic credential key."""
+        key = CredentialKey(name="api_key", value=SecretStr("test-value"))
+        assert key.name == "api_key"
+        assert key.get_secret_value() == "test-value"
+        assert key.expires_at is None
+        assert not key.is_expired
+
+    def test_key_with_expiration(self):
+        """Test key with expiration time."""
+        future = datetime.now(UTC) + timedelta(hours=1)
+        key = CredentialKey(name="token", value=SecretStr("xxx"), expires_at=future)
+        assert not key.is_expired
+
+    def test_expired_key(self):
+        """Test that expired key is detected."""
+        past = datetime.now(UTC) - timedelta(hours=1)
+        key = CredentialKey(name="token", value=SecretStr("xxx"), expires_at=past)
+        assert key.is_expired
+
+    def test_key_with_metadata(self):
+        """Test key with metadata."""
+        key = CredentialKey(
+            name="token",
+            value=SecretStr("xxx"),
+            metadata={"client_id": "abc", "scope": "read"},
+        )
+        assert key.metadata["client_id"] == "abc"
+
+
+class TestCredentialObject:
+    """Tests for CredentialObject model."""
+
+    def test_create_simple_credential(self):
+        """Test creating a simple API key credential."""
+        cred = CredentialObject(
+            id="brave_search",
+            credential_type=CredentialType.API_KEY,
+            keys={"api_key": CredentialKey(name="api_key", value=SecretStr("test-key"))},
+        )
+        assert cred.id == "brave_search"
+        assert cred.credential_type == CredentialType.API_KEY
+        assert cred.get_key("api_key") == "test-key"
+
+    def test_create_multi_key_credential(self):
+        """Test creating a credential with multiple keys."""
+        cred = CredentialObject(
+            id="github_oauth",
+            credential_type=CredentialType.OAUTH2,
+            keys={
+                "access_token": CredentialKey(name="access_token", value=SecretStr("ghp_xxx")),
+                "refresh_token": CredentialKey(name="refresh_token", value=SecretStr("ghr_xxx")),
+            },
+        )
+        assert cred.get_key("access_token") == "ghp_xxx"
+        assert cred.get_key("refresh_token") == "ghr_xxx"
+        assert cred.get_key("nonexistent") is None
+
+    def test_set_key(self):
+        """Test setting a key on a credential."""
+        cred = CredentialObject(id="test", keys={})
+        cred.set_key("new_key", "new_value")
+        assert cred.get_key("new_key") == "new_value"
+
+    def test_set_key_with_expiration(self):
+        """Test setting a key with expiration."""
+        cred = CredentialObject(id="test", keys={})
+        expires = datetime.now(UTC) + timedelta(hours=1)
+        cred.set_key("token", "xxx", expires_at=expires)
+        assert cred.keys["token"].expires_at == expires
+
+    def test_needs_refresh(self):
+        """Test needs_refresh property."""
+        past = datetime.now(UTC) - timedelta(hours=1)
+        cred = CredentialObject(
+            id="test",
+            keys={"token": CredentialKey(name="token", value=SecretStr("xxx"), expires_at=past)},
+        )
+        assert cred.needs_refresh
+
+    def test_get_default_key(self):
+        """Test get_default_key returns appropriate default."""
+        # With api_key
+        cred = CredentialObject(
+            id="test",
+            keys={"api_key": CredentialKey(name="api_key", value=SecretStr("key-value"))},
+        )
+        assert cred.get_default_key() == "key-value"
+
+        # With access_token
+        cred2 = CredentialObject(
+            id="test",
+            keys={
+                "access_token": CredentialKey(name="access_token", value=SecretStr("token-value"))
+            },
+        )
+        assert cred2.get_default_key() == "token-value"
+
+    def test_record_usage(self):
+        """Test recording credential usage."""
+        cred = CredentialObject(id="test", keys={})
+        assert cred.use_count == 0
+        assert cred.last_used is None
+
+        cred.record_usage()
+        assert cred.use_count == 1
+        assert cred.last_used is not None
+
+
+class TestCredentialUsageSpec:
+    """Tests for CredentialUsageSpec model."""
+
+    def test_create_usage_spec(self):
+        """Test creating a usage spec."""
+        spec = CredentialUsageSpec(
+            credential_id="brave_search",
+            required_keys=["api_key"],
+            headers={"X-Subscription-Token": "{{api_key}}"},
+        )
+        assert spec.credential_id == "brave_search"
+        assert "api_key" in spec.required_keys
+        assert "{{api_key}}" in spec.headers.values()
+
+
+class TestInMemoryStorage:
+    """Tests for InMemoryStorage."""
+
+    def test_save_and_load(self):
+        """Test saving and loading a credential."""
+        storage = InMemoryStorage()
+        cred = CredentialObject(
+            id="test",
+            keys={"key": CredentialKey(name="key", value=SecretStr("value"))},
+        )
+
+        storage.save(cred)
+        loaded = storage.load("test")
+
+        assert loaded is not None
+        assert loaded.id == "test"
+        assert loaded.get_key("key") == "value"
+
+    def test_load_nonexistent(self):
+        """Test loading a nonexistent credential."""
+        storage = InMemoryStorage()
+        assert storage.load("nonexistent") is None
+
+    def test_delete(self):
+        """Test deleting a credential."""
+        storage = InMemoryStorage()
+        cred = CredentialObject(id="test", keys={})
+        storage.save(cred)
+
+        assert storage.delete("test")
+        assert storage.load("test") is None
+        assert not storage.delete("test")
+
+    def test_list_all(self):
+        """Test listing all credentials."""
+        storage = InMemoryStorage()
+        storage.save(CredentialObject(id="a", keys={}))
+        storage.save(CredentialObject(id="b", keys={}))
+
+        ids = storage.list_all()
+        assert "a" in ids
+        assert "b" in ids
+
+    def test_exists(self):
+        """Test checking if credential exists."""
+        storage = InMemoryStorage()
+        storage.save(CredentialObject(id="test", keys={}))
+
+        assert storage.exists("test")
+        assert not storage.exists("nonexistent")
+
+    def test_clear(self):
+        """Test clearing all credentials."""
+        storage = InMemoryStorage()
+        storage.save(CredentialObject(id="test", keys={}))
+        storage.clear()
+
+        assert storage.list_all() == []
+
+
+class TestEnvVarStorage:
+    """Tests for EnvVarStorage."""
+
+    def test_load_from_env(self):
+        """Test loading credential from environment variable."""
+        with patch.dict(os.environ, {"TEST_API_KEY": "test-value"}):
+            storage = EnvVarStorage(env_mapping={"test": "TEST_API_KEY"})
+            cred = storage.load("test")
+
+            assert cred is not None
+            assert cred.get_key("api_key") == "test-value"
+
+    def test_load_nonexistent(self):
+        """Test loading when env var is not set."""
+        storage = EnvVarStorage(env_mapping={"test": "NONEXISTENT_VAR"})
+        assert storage.load("test") is None
+
+    def test_default_env_var_pattern(self):
+        """Test default env var naming pattern."""
+        with patch.dict(os.environ, {"MY_SERVICE_API_KEY": "value"}):
+            storage = EnvVarStorage()
+            cred = storage.load("my_service")
+
+            assert cred is not None
+            assert cred.get_key("api_key") == "value"
+
+    def test_save_raises(self):
+        """Test that save raises NotImplementedError."""
+        storage = EnvVarStorage()
+        with pytest.raises(NotImplementedError):
+            storage.save(CredentialObject(id="test", keys={}))
+
+    def test_delete_raises(self):
+        """Test that delete raises NotImplementedError."""
+        storage = EnvVarStorage()
+        with pytest.raises(NotImplementedError):
+            storage.delete("test")
+
+
+class TestEncryptedFileStorage:
+    """Tests for EncryptedFileStorage."""
+
+    @pytest.fixture
+    def temp_dir(self):
+        """Create a temporary directory for tests."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def storage(self, temp_dir):
+        """Create EncryptedFileStorage for tests."""
+        return EncryptedFileStorage(temp_dir)
+
+    def test_save_and_load(self, storage):
+        """Test saving and loading encrypted credential."""
+        cred = CredentialObject(
+            id="test",
+            credential_type=CredentialType.API_KEY,
+            keys={"api_key": CredentialKey(name="api_key", value=SecretStr("secret-value"))},
+        )
+
+        storage.save(cred)
+        loaded = storage.load("test")
+
+        assert loaded is not None
+        assert loaded.id == "test"
+        assert loaded.get_key("api_key") == "secret-value"
+
+    def test_encryption_key_from_env(self, temp_dir):
+        """Test using encryption key from environment variable."""
+        from cryptography.fernet import Fernet
+
+        key = Fernet.generate_key().decode()
+        with patch.dict(os.environ, {"HIVE_CREDENTIAL_KEY": key}):
+            storage = EncryptedFileStorage(temp_dir)
+            cred = CredentialObject(
+                id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+            )
+            storage.save(cred)
+
+            # Create new storage instance with same key
+            storage2 = EncryptedFileStorage(temp_dir)
+            loaded = storage2.load("test")
+            assert loaded is not None
+            assert loaded.get_key("k") == "v"
+
+    def test_list_all(self, storage):
+        """Test listing all credentials."""
+        storage.save(CredentialObject(id="cred1", keys={}))
+        storage.save(CredentialObject(id="cred2", keys={}))
+
+        ids = storage.list_all()
+        assert "cred1" in ids
+        assert "cred2" in ids
+
+    def test_delete(self, storage):
+        """Test deleting a credential."""
+        storage.save(CredentialObject(id="test", keys={}))
+        assert storage.delete("test")
+        assert storage.load("test") is None
+
+
+class TestCompositeStorage:
+    """Tests for CompositeStorage."""
+
+    def test_read_from_primary(self):
+        """Test reading from primary storage."""
+        primary = InMemoryStorage()
+        primary.save(
+            CredentialObject(
+                id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))}
+            )
+        )
+
+        fallback = InMemoryStorage()
+        fallback.save(
+            CredentialObject(
+                id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
+            )
+        )
+
+        storage = CompositeStorage(primary, [fallback])
+        cred = storage.load("test")
+
+        # Should get from primary
+        assert cred.get_key("k") == "primary"
+
+    def test_fallback_when_not_in_primary(self):
+        """Test fallback when credential not in primary."""
+        primary = InMemoryStorage()
+        fallback = InMemoryStorage()
+        fallback.save(
+            CredentialObject(
+                id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
+            )
+        )
+
+        storage = CompositeStorage(primary, [fallback])
+        cred = storage.load("test")
+
+        assert cred.get_key("k") == "fallback"
+
+    def test_write_to_primary_only(self):
+        """Test that writes go to primary only."""
+        primary = InMemoryStorage()
+        fallback = InMemoryStorage()
+
+        storage = CompositeStorage(primary, [fallback])
+        storage.save(CredentialObject(id="test", keys={}))
+
+        assert primary.exists("test")
+        assert not fallback.exists("test")
+
+
+class TestStaticProvider:
+    """Tests for StaticProvider."""
+
+    def test_provider_id(self):
+        """Test provider ID."""
+        provider = StaticProvider()
+        assert provider.provider_id == "static"
+
+    def test_supported_types(self):
+        """Test supported credential types."""
+        provider = StaticProvider()
+        assert CredentialType.API_KEY in provider.supported_types
+        assert CredentialType.CUSTOM in provider.supported_types
+
+    def test_refresh_returns_unchanged(self):
+        """Test that refresh returns credential unchanged."""
+        provider = StaticProvider()
+        cred = CredentialObject(
+            id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+        )
+
+        refreshed = provider.refresh(cred)
+        assert refreshed.get_key("k") == "v"
+
+    def test_validate_with_keys(self):
+        """Test validation with keys present."""
+        provider = StaticProvider()
+        cred = CredentialObject(
+            id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+        )
+
+        assert provider.validate(cred)
+
+    def test_validate_without_keys(self):
+        """Test validation without keys."""
+        provider = StaticProvider()
+        cred = CredentialObject(id="test", keys={})
+
+        assert not provider.validate(cred)
+
+    def test_should_refresh(self):
+        """Test that static provider never needs refresh."""
+        provider = StaticProvider()
+        cred = CredentialObject(id="test", keys={})
+
+        assert not provider.should_refresh(cred)
+
+
+class TestTemplateResolver:
+    """Tests for TemplateResolver."""
+
+    @pytest.fixture
+    def store(self):
+        """Create a test store with credentials."""
+        return CredentialStore.for_testing(
+            {
+                "brave_search": {"api_key": "test-brave-key"},
+                "github_oauth": {"access_token": "ghp_xxx", "refresh_token": "ghr_xxx"},
+            }
+        )
+
+    @pytest.fixture
+    def resolver(self, store):
+        """Create a resolver with the test store."""
+        return TemplateResolver(store)
+
+    def test_resolve_simple(self, resolver):
+        """Test resolving a simple template."""
+        result = resolver.resolve("Bearer {{github_oauth.access_token}}")
+        assert result == "Bearer ghp_xxx"
+
+    def test_resolve_multiple(self, resolver):
+        """Test resolving multiple templates."""
+        result = resolver.resolve("{{github_oauth.access_token}} and {{brave_search.api_key}}")
+        assert "ghp_xxx" in result
+        assert "test-brave-key" in result
+
+    def test_resolve_default_key(self, resolver):
+        """Test resolving credential without key specified."""
+        result = resolver.resolve("Key: {{brave_search}}")
+        assert "test-brave-key" in result
+
+    def test_resolve_headers(self, resolver):
+        """Test resolving headers dict."""
+        headers = resolver.resolve_headers(
+            {
+                "Authorization": "Bearer {{github_oauth.access_token}}",
+                "X-API-Key": "{{brave_search.api_key}}",
+            }
+        )
+        assert headers["Authorization"] == "Bearer ghp_xxx"
+        assert headers["X-API-Key"] == "test-brave-key"
+
+    def test_resolve_missing_credential(self, resolver):
+        """Test error on missing credential."""
+        with pytest.raises(CredentialNotFoundError):
+            resolver.resolve("{{nonexistent.key}}")
+
+    def test_resolve_missing_key(self, resolver):
+        """Test error on missing key."""
+        with pytest.raises(CredentialKeyNotFoundError):
+            resolver.resolve("{{github_oauth.nonexistent}}")
+
+    def test_has_templates(self, resolver):
+        """Test detecting templates in text."""
+        assert resolver.has_templates("{{cred.key}}")
+        assert resolver.has_templates("Bearer {{token}}")
+        assert not resolver.has_templates("no templates here")
+
+    def test_extract_references(self, resolver):
+        """Test extracting credential references."""
+        refs = resolver.extract_references("{{github.token}} and {{brave.key}}")
+        assert ("github", "token") in refs
+        assert ("brave", "key") in refs
+
+
+class TestCredentialStore:
+    """Tests for CredentialStore."""
+
+    def test_for_testing_factory(self):
+        """Test creating store for testing."""
+        store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+
+        assert store.get("test") == "value"
+        assert store.get_key("test", "api_key") == "value"
+
+    def test_get_credential(self):
+        """Test getting a credential."""
+        store = CredentialStore.for_testing({"test": {"key": "value"}})
+
+        cred = store.get_credential("test")
+        assert cred is not None
+        assert cred.get_key("key") == "value"
+
+    def test_get_nonexistent(self):
+        """Test getting nonexistent credential."""
+        store = CredentialStore.for_testing({})
+        assert store.get_credential("nonexistent") is None
+        assert store.get("nonexistent") is None
+
+    def test_save_and_load(self):
+        """Test saving and loading a credential."""
+        store = CredentialStore.for_testing({})
+
+        cred = CredentialObject(id="new", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
+        store.save_credential(cred)
+
+        loaded = store.get_credential("new")
+        assert loaded is not None
+        assert loaded.get_key("k") == "v"
+
+    def test_delete_credential(self):
+        """Test deleting a credential."""
+        store = CredentialStore.for_testing({"test": {"k": "v"}})
+
+        assert store.delete_credential("test")
+        assert store.get_credential("test") is None
+
+    def test_list_credentials(self):
+        """Test listing all credentials."""
+        store = CredentialStore.for_testing({"a": {"k": "v"}, "b": {"k": "v"}})
+
+        ids = store.list_credentials()
+        assert "a" in ids
+        assert "b" in ids
+
+    def test_is_available(self):
+        """Test checking credential availability."""
+        store = CredentialStore.for_testing({"test": {"k": "v"}})
+
+        assert store.is_available("test")
+        assert not store.is_available("nonexistent")
+
+    def test_resolve_templates(self):
+        """Test template resolution through store."""
+        store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+
+        result = store.resolve("Key: {{test.api_key}}")
+        assert result == "Key: value"
+
+    def test_resolve_headers(self):
+        """Test resolving headers through store."""
+        store = CredentialStore.for_testing({"test": {"token": "xxx"}})
+
+        headers = store.resolve_headers({"Authorization": "Bearer {{test.token}}"})
+        assert headers["Authorization"] == "Bearer xxx"
+
+    def test_register_provider(self):
+        """Test registering a provider."""
+        store = CredentialStore.for_testing({})
+        provider = StaticProvider()
+
+        store.register_provider(provider)
+        assert store.get_provider("static") is provider
+
+    def test_register_usage_spec(self):
+        """Test registering a usage spec."""
+        store = CredentialStore.for_testing({})
+        spec = CredentialUsageSpec(
+            credential_id="test",
+            required_keys=["api_key"],
+            headers={"X-Key": "{{api_key}}"},
+        )
+
+        store.register_usage(spec)
+        assert store.get_usage_spec("test") is spec
+
+    def test_validate_for_usage(self):
+        """Test validating credential for usage spec."""
+        store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+        spec = CredentialUsageSpec(credential_id="test", required_keys=["api_key"])
+        store.register_usage(spec)
+
+        errors = store.validate_for_usage("test")
+        assert errors == []
+
+    def test_validate_for_usage_missing_key(self):
+        """Test validation with missing required key."""
+        store = CredentialStore.for_testing({"test": {"other_key": "value"}})
+        spec = CredentialUsageSpec(credential_id="test", required_keys=["api_key"])
+        store.register_usage(spec)
+
+        errors = store.validate_for_usage("test")
+        assert "api_key" in errors[0]
+
+    def test_caching(self):
+        """Test that credentials are cached."""
+        storage = InMemoryStorage()
+        store = CredentialStore(storage=storage, cache_ttl_seconds=60)
+
+        storage.save(
+            CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
+        )
+
+        # First load
+        store.get_credential("test")
+
+        # Delete from storage
+        storage.delete("test")
+
+        # Should still get from cache
+        cred2 = store.get_credential("test")
+        assert cred2 is not None
+
+    def test_clear_cache(self):
+        """Test clearing the cache."""
+        storage = InMemoryStorage()
+        store = CredentialStore(storage=storage)
+
+        storage.save(CredentialObject(id="test", keys={}))
+        store.get_credential("test")  # Cache it
+
+        storage.delete("test")
+        store.clear_cache()
+
+        # Should not find in cache now
+        assert store.get_credential("test") is None
+
+
+class TestOAuth2Module:
+    """Tests for OAuth2 module."""
+
+    def test_oauth2_token_from_response(self):
+        """Test creating OAuth2Token from token response."""
+        from core.framework.credentials.oauth2 import OAuth2Token
+
+        response = {
+            "access_token": "xxx",
+            "token_type": "Bearer",
+            "expires_in": 3600,
+            "refresh_token": "yyy",
+            "scope": "read write",
+        }
+
+        token = OAuth2Token.from_token_response(response)
+        assert token.access_token == "xxx"
+        assert token.token_type == "Bearer"
+        assert token.refresh_token == "yyy"
+        assert token.scope == "read write"
+        assert token.expires_at is not None
+
+    def test_token_is_expired(self):
+        """Test token expiration check."""
+        from core.framework.credentials.oauth2 import OAuth2Token
+
+        # Not expired
+        future = datetime.now(UTC) + timedelta(hours=1)
+        token = OAuth2Token(access_token="xxx", expires_at=future)
+        assert not token.is_expired
+
+        # Expired
+        past = datetime.now(UTC) - timedelta(hours=1)
+        expired_token = OAuth2Token(access_token="xxx", expires_at=past)
+        assert expired_token.is_expired
+
+    def test_token_can_refresh(self):
+        """Test token refresh capability check."""
+        from core.framework.credentials.oauth2 import OAuth2Token
+
+        with_refresh = OAuth2Token(access_token="xxx", refresh_token="yyy")
+        assert with_refresh.can_refresh
+
+        without_refresh = OAuth2Token(access_token="xxx")
+        assert not without_refresh.can_refresh
+
+    def test_oauth2_config_validation(self):
+        """Test OAuth2Config validation."""
+        from core.framework.credentials.oauth2 import OAuth2Config, TokenPlacement
+
+        # Valid config
+        config = OAuth2Config(
+            token_url="https://example.com/token", client_id="id", client_secret="secret"
+        )
+        assert config.token_url == "https://example.com/token"
+
+        # Missing token_url
+        with pytest.raises(ValueError):
+            OAuth2Config(token_url="")
+
+        # HEADER_CUSTOM without custom_header_name
+        with pytest.raises(ValueError):
+            OAuth2Config(
+                token_url="https://example.com/token",
+                token_placement=TokenPlacement.HEADER_CUSTOM,
+            )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
@@ -0,0 +1,55 @@
+"""
+HashiCorp Vault integration for the credential store.
+
+This module provides enterprise-grade secret management through
+HashiCorp Vault integration.
+
+Quick Start:
+    from core.framework.credentials import CredentialStore
+    from core.framework.credentials.vault import HashiCorpVaultStorage
+
+    # Configure Vault storage
+    storage = HashiCorpVaultStorage(
+        url="https://vault.example.com:8200",
+        # token read from VAULT_TOKEN env var
+        mount_point="secret",
+        path_prefix="hive/agents/prod"
+    )
+
+    # Create credential store with Vault backend
+    store = CredentialStore(storage=storage)
+
+    # Use normally - credentials are stored in Vault
+    credential = store.get_credential("my_api")
+
+Requirements:
+    pip install hvac
+
+Authentication:
+    Set the VAULT_TOKEN environment variable or pass the token directly:
+
+        export VAULT_TOKEN="hvs.xxxxxxxxxxxxx"
+
+    For production, consider using Vault auth methods:
+    - Kubernetes auth
+    - AppRole auth
+    - AWS IAM auth
+
+Vault Configuration:
+    Ensure KV v2 secrets engine is enabled:
+
+        vault secrets enable -path=secret kv-v2
+
+    Grant appropriate policies:
+
+        path "secret/data/hive/credentials/*" {
+            capabilities = ["create", "read", "update", "delete", "list"]
+        }
+        path "secret/metadata/hive/credentials/*" {
+            capabilities = ["list", "delete"]
+        }
+"""
+
+from .hashicorp import HashiCorpVaultStorage
+
+__all__ = ["HashiCorpVaultStorage"]
@@ -0,0 +1,394 @@
+"""
+HashiCorp Vault storage adapter.
+
+Provides integration with HashiCorp Vault for enterprise secret management.
+Requires the 'hvac' package: pip install hvac
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from datetime import datetime
+from typing import Any
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialType
+from ..storage import CredentialStorage
+
+logger = logging.getLogger(__name__)
+
+
+class HashiCorpVaultStorage(CredentialStorage):
+    """
+    HashiCorp Vault storage adapter.
+
+    Features:
+    - KV v2 secrets engine support
+    - Namespace support (Enterprise)
+    - Automatic secret versioning
+    - Audit logging via Vault
+
+    The adapter stores credentials in Vault's KV v2 secrets engine with
+    the following structure:
+
+        {mount_point}/data/{path_prefix}/{credential_id}
+        └── data:
+            ├── _type: "oauth2"
+            ├── access_token: "xxx"
+            ├── refresh_token: "yyy"
+            ├── _expires_access_token: "2024-01-26T12:00:00"
+            └── _provider_id: "oauth2"
+
+    Example:
+        storage = HashiCorpVaultStorage(
+            url="https://vault.example.com:8200",
+            token="hvs.xxx",  # Or use VAULT_TOKEN env var
+            mount_point="secret",
+            path_prefix="hive/credentials"
+        )
+
+        store = CredentialStore(storage=storage)
+
+        # Credentials are now stored in Vault
+        store.save_credential(credential)
+        credential = store.get_credential("my_api")
+
+    Authentication:
+        The adapter uses token-based authentication. The token can be provided:
+        1. Directly via the 'token' parameter
+        2. Via the VAULT_TOKEN environment variable
+
+        For production, consider using:
+        - Kubernetes auth method
+        - AppRole auth method
+        - AWS IAM auth method
+
+    Requirements:
+        pip install hvac
+    """
+
+    def __init__(
+        self,
+        url: str,
+        token: str | None = None,
+        mount_point: str = "secret",
+        path_prefix: str = "hive/credentials",
+        namespace: str | None = None,
+        verify_ssl: bool = True,
+    ):
+        """
+        Initialize Vault storage.
+
+        Args:
+            url: Vault server URL (e.g., https://vault.example.com:8200)
+            token: Vault token. If None, reads from VAULT_TOKEN env var
+            mount_point: KV secrets engine mount point (default: "secret")
+            path_prefix: Path prefix for all credentials
+            namespace: Vault namespace (Enterprise feature)
+            verify_ssl: Whether to verify SSL certificates
+
+        Raises:
+            ImportError: If hvac is not installed
+            ValueError: If authentication fails
+        """
+        try:
+            import hvac
+        except ImportError as e:
+            raise ImportError(
+                "HashiCorp Vault support requires 'hvac'. Install with: pip install hvac"
+            ) from e
+
+        self._url = url
+        self._token = token or os.environ.get("VAULT_TOKEN")
+        self._mount = mount_point
+        self._prefix = path_prefix
+        self._namespace = namespace
+
+        if not self._token:
+            raise ValueError(
+                "Vault token required. Set VAULT_TOKEN env var or pass token parameter."
+            )
+
+        self._client = hvac.Client(
+            url=url,
+            token=self._token,
+            namespace=namespace,
+            verify=verify_ssl,
+        )
+
+        if not self._client.is_authenticated():
+            raise ValueError("Vault authentication failed. Check token and server URL.")
+
+        logger.info(f"Connected to HashiCorp Vault at {url}")
+
+    def _path(self, credential_id: str) -> str:
+        """Build Vault path for credential."""
+        # Sanitize credential_id
+        safe_id = credential_id.replace("/", "_").replace("\\", "_")
+        return f"{self._prefix}/{safe_id}"
+
+    def save(self, credential: CredentialObject) -> None:
+        """Save credential to Vault KV v2."""
+        path = self._path(credential.id)
+        data = self._serialize_for_vault(credential)
+
+        try:
+            self._client.secrets.kv.v2.create_or_update_secret(
+                path=path,
+                secret=data,
+                mount_point=self._mount,
+            )
+            logger.debug(f"Saved credential '{credential.id}' to Vault at {path}")
+        except Exception as e:
+            logger.error(f"Failed to save credential '{credential.id}' to Vault: {e}")
+            raise
+
+    def load(self, credential_id: str) -> CredentialObject | None:
+        """Load credential from Vault."""
+        path = self._path(credential_id)
+
+        try:
+            response = self._client.secrets.kv.v2.read_secret_version(
+                path=path,
+                mount_point=self._mount,
+            )
+            data = response["data"]["data"]
+            return self._deserialize_from_vault(credential_id, data)
+        except Exception as e:
+            # Check if it's a "not found" error
+            error_str = str(e).lower()
+            if "not found" in error_str or "404" in error_str:
+                logger.debug(f"Credential '{credential_id}' not found in Vault")
+                return None
+            logger.error(f"Failed to load credential '{credential_id}' from Vault: {e}")
+            raise
+
+    def delete(self, credential_id: str) -> bool:
+        """Delete credential from Vault (all versions)."""
+        path = self._path(credential_id)
+
+        try:
+            self._client.secrets.kv.v2.delete_metadata_and_all_versions(
+                path=path,
+                mount_point=self._mount,
+            )
+            logger.debug(f"Deleted credential '{credential_id}' from Vault")
+            return True
+        except Exception as e:
+            error_str = str(e).lower()
+            if "not found" in error_str or "404" in error_str:
+                return False
+            logger.error(f"Failed to delete credential '{credential_id}' from Vault: {e}")
+            raise
+
+    def list_all(self) -> list[str]:
+        """List all credentials under the prefix."""
+        try:
+            response = self._client.secrets.kv.v2.list_secrets(
+                path=self._prefix,
+                mount_point=self._mount,
+            )
+            keys = response.get("data", {}).get("keys", [])
+            # Remove trailing slashes from folder names
+            return [k.rstrip("/") for k in keys]
+        except Exception as e:
+            error_str = str(e).lower()
+            if "not found" in error_str or "404" in error_str:
+                return []
+            logger.error(f"Failed to list credentials from Vault: {e}")
+            raise
+
+    def exists(self, credential_id: str) -> bool:
+        """Check if credential exists in Vault."""
+        try:
+            path = self._path(credential_id)
+            self._client.secrets.kv.v2.read_secret_version(
+                path=path,
+                mount_point=self._mount,
+            )
+            return True
+        except Exception:
+            return False
+
+    def _serialize_for_vault(self, credential: CredentialObject) -> dict[str, Any]:
+        """Convert credential to Vault secret format."""
+        data: dict[str, Any] = {
+            "_type": credential.credential_type.value,
+        }
+
+        if credential.provider_id:
+            data["_provider_id"] = credential.provider_id
+
+        if credential.description:
+            data["_description"] = credential.description
+
+        if credential.auto_refresh:
+            data["_auto_refresh"] = "true"
+
+        # Store each key
+        for key_name, key in credential.keys.items():
+            data[key_name] = key.get_secret_value()
+
+            if key.expires_at:
+                data[f"_expires_{key_name}"] = key.expires_at.isoformat()
+
+            if key.metadata:
+                data[f"_metadata_{key_name}"] = str(key.metadata)
+
+        return data
+
+    def _deserialize_from_vault(self, credential_id: str, data: dict[str, Any]) -> CredentialObject:
+        """Reconstruct credential from Vault secret."""
+        # Extract metadata fields
+        cred_type = CredentialType(data.pop("_type", "api_key"))
+        provider_id = data.pop("_provider_id", None)
+        description = data.pop("_description", "")
+        auto_refresh = data.pop("_auto_refresh", "") == "true"
+
+        # Build keys dict
+        keys: dict[str, CredentialKey] = {}
+
+        # Find all non-metadata keys
+        key_names = [k for k in data.keys() if not k.startswith("_")]
+
+        for key_name in key_names:
+            value = data[key_name]
+
+            # Check for expiration
+            expires_at = None
+            expires_key = f"_expires_{key_name}"
+            if expires_key in data:
+                try:
+                    expires_at = datetime.fromisoformat(data[expires_key])
+                except (ValueError, TypeError):
+                    pass
+
+            # Check for metadata
+            metadata: dict[str, Any] = {}
+            metadata_key = f"_metadata_{key_name}"
+            if metadata_key in data:
+                try:
+                    import ast
+
+                    metadata = ast.literal_eval(data[metadata_key])
+                except (ValueError, SyntaxError):
+                    pass
+
+            keys[key_name] = CredentialKey(
+                name=key_name,
+                value=SecretStr(value),
+                expires_at=expires_at,
+                metadata=metadata,
+            )
+
+        return CredentialObject(
+            id=credential_id,
+            credential_type=cred_type,
+            keys=keys,
+            provider_id=provider_id,
+            description=description,
+            auto_refresh=auto_refresh,
+        )
+
+    # --- Vault-Specific Operations ---
+
+    def get_secret_metadata(self, credential_id: str) -> dict[str, Any] | None:
+        """
+        Get Vault metadata for a secret (version info, timestamps, etc.).
+
+        Args:
+            credential_id: The credential identifier
+
+        Returns:
+            Metadata dict or None if not found
+        """
+        path = self._path(credential_id)
+
+        try:
+            response = self._client.secrets.kv.v2.read_secret_metadata(
+                path=path,
+                mount_point=self._mount,
+            )
+            return response.get("data", {})
+        except Exception:
+            return None
+
+    def soft_delete(self, credential_id: str, versions: list[int] | None = None) -> bool:
+        """
+        Soft delete specific versions (can be recovered).
+
+        Args:
+            credential_id: The credential identifier
+            versions: Version numbers to delete. If None, deletes latest.
+
+        Returns:
+            True if successful
+        """
+        path = self._path(credential_id)
+
+        try:
+            if versions:
+                self._client.secrets.kv.v2.delete_secret_versions(
+                    path=path,
+                    versions=versions,
+                    mount_point=self._mount,
+                )
+            else:
+                self._client.secrets.kv.v2.delete_latest_version_of_secret(
+                    path=path,
+                    mount_point=self._mount,
+                )
+            return True
+        except Exception as e:
+            logger.error(f"Soft delete failed for '{credential_id}': {e}")
+            return False
+
+    def undelete(self, credential_id: str, versions: list[int]) -> bool:
+        """
+        Recover soft-deleted versions.
+
+        Args:
+            credential_id: The credential identifier
+            versions: Version numbers to recover
+
+        Returns:
+            True if successful
+        """
+        path = self._path(credential_id)
+
+        try:
+            self._client.secrets.kv.v2.undelete_secret_versions(
+                path=path,
+                versions=versions,
+                mount_point=self._mount,
+            )
+            return True
+        except Exception as e:
+            logger.error(f"Undelete failed for '{credential_id}': {e}")
+            return False
+
+    def load_version(self, credential_id: str, version: int) -> CredentialObject | None:
+        """
+        Load a specific version of a credential.
+
+        Args:
+            credential_id: The credential identifier
+            version: Version number to load
+
+        Returns:
+            CredentialObject or None
+        """
+        path = self._path(credential_id)
+
+        try:
+            response = self._client.secrets.kv.v2.read_secret_version(
+                path=path,
+                version=version,
+                mount_point=self._mount,
+            )
+            data = response["data"]["data"]
+            return self._deserialize_from_vault(credential_id, data)
+        except Exception:
+            return None
@@ -0,0 +1,80 @@
+"""Graph structures: Goals, Nodes, Edges, and Flexible Execution."""
+
+from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
+from framework.graph.conversation import ConversationStore, Message, NodeConversation
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.graph.executor import GraphExecutor
+from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
+from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
+from framework.graph.judge import HybridJudge, create_default_judge
+from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
+
+# Flexible execution (Worker-Judge pattern)
+from framework.graph.plan import (
+    ActionSpec,
+    ActionType,
+    # HITL (Human-in-the-loop)
+    ApprovalDecision,
+    ApprovalRequest,
+    ApprovalResult,
+    EvaluationRule,
+    ExecutionStatus,
+    Judgment,
+    JudgmentAction,
+    Plan,
+    PlanExecutionResult,
+    PlanStep,
+    StepStatus,
+    load_export,
+)
+from framework.graph.worker_node import StepExecutionResult, WorkerNode
+
+__all__ = [
+    # Goal
+    "Goal",
+    "SuccessCriterion",
+    "Constraint",
+    "GoalStatus",
+    # Node
+    "NodeSpec",
+    "NodeContext",
+    "NodeResult",
+    "NodeProtocol",
+    # Edge
+    "EdgeSpec",
+    "EdgeCondition",
+    "GraphSpec",
+    # Executor (fixed graph)
+    "GraphExecutor",
+    # Plan (flexible execution)
+    "Plan",
+    "PlanStep",
+    "ActionSpec",
+    "ActionType",
+    "StepStatus",
+    "Judgment",
+    "JudgmentAction",
+    "EvaluationRule",
+    "PlanExecutionResult",
+    "ExecutionStatus",
+    "load_export",
+    # HITL (Human-in-the-loop)
+    "ApprovalDecision",
+    "ApprovalRequest",
+    "ApprovalResult",
+    # Worker-Judge
+    "HybridJudge",
+    "create_default_judge",
+    "WorkerNode",
+    "StepExecutionResult",
+    "FlexibleGraphExecutor",
+    "ExecutorConfig",
+    # Code Sandbox
+    "CodeSandbox",
+    "safe_exec",
+    "safe_eval",
+    # Conversation
+    "NodeConversation",
+    "ConversationStore",
+    "Message",
+]
@@ -0,0 +1,413 @@
+"""
+Code Sandbox for Safe Execution of Dynamic Code.
+
+Provides a restricted execution environment for code generated by
+the external planner. This is critical for open-ended planning where
+the planner can create arbitrary code actions.
+
+Security measures:
+1. Restricted builtins (no file I/O, no imports of dangerous modules)
+2. Timeout enforcement
+3. Memory limits (via resource module on Unix)
+4. Namespace isolation
+"""
+
+import ast
+import signal
+import sys
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from typing import Any
+
+# Safe builtins whitelist
+SAFE_BUILTINS = {
+    # Basic types
+    "True": True,
+    "False": False,
+    "None": None,
+    # Type constructors
+    "bool": bool,
+    "int": int,
+    "float": float,
+    "str": str,
+    "list": list,
+    "dict": dict,
+    "set": set,
+    "tuple": tuple,
+    "frozenset": frozenset,
+    # Basic functions
+    "abs": abs,
+    "all": all,
+    "any": any,
+    "bin": bin,
+    "chr": chr,
+    "divmod": divmod,
+    "enumerate": enumerate,
+    "filter": filter,
+    "format": format,
+    "hex": hex,
+    "isinstance": isinstance,
+    "issubclass": issubclass,
+    "iter": iter,
+    "len": len,
+    "map": map,
+    "max": max,
+    "min": min,
+    "next": next,
+    "oct": oct,
+    "ord": ord,
+    "pow": pow,
+    "range": range,
+    "repr": repr,
+    "reversed": reversed,
+    "round": round,
+    "slice": slice,
+    "sorted": sorted,
+    "sum": sum,
+    "zip": zip,
+}
+
+# Modules that can be imported
+ALLOWED_MODULES = {
+    "math",
+    "json",
+    "re",
+    "datetime",
+    "collections",
+    "itertools",
+    "functools",
+    "operator",
+    "string",
+    "random",
+    "statistics",
+    "decimal",
+    "fractions",
+}
+
+# Dangerous AST nodes to block
+BLOCKED_AST_NODES = {
+    ast.Import,
+    ast.ImportFrom,
+    ast.Global,
+    ast.Nonlocal,
+}
+
+
+class CodeSandboxError(Exception):
+    """Error during sandboxed code execution."""
+
+    pass
+
+
+class TimeoutError(CodeSandboxError):
+    """Code execution timed out."""
+
+    pass
+
+
+class SecurityError(CodeSandboxError):
+    """Code contains potentially dangerous operations."""
+
+    pass
+
+
+@dataclass
+class SandboxResult:
+    """Result of sandboxed code execution."""
+
+    success: bool
+    result: Any = None
+    error: str | None = None
+    stdout: str = ""
+    variables: dict[str, Any] = field(default_factory=dict)
+    execution_time_ms: int = 0
+
+
+class RestrictedImporter:
+    """Custom importer that only allows whitelisted modules."""
+
+    def __init__(self, allowed_modules: set[str]):
+        self.allowed_modules = allowed_modules
+        self._cache: dict[str, Any] = {}
+
+    def __call__(self, name: str, *args, **kwargs):
+        if name not in self.allowed_modules:
+            raise SecurityError(f"Import of module '{name}' is not allowed")
+
+        if name not in self._cache:
+            import importlib
+
+            self._cache[name] = importlib.import_module(name)
+
+        return self._cache[name]
+
+
+class CodeValidator:
+    """Validates code for safety before execution."""
+
+    def __init__(self, blocked_nodes: set[type] | None = None):
+        self.blocked_nodes = blocked_nodes or BLOCKED_AST_NODES
+
+    def validate(self, code: str) -> list[str]:
+        """
+        Validate code and return list of issues.
+
+        Returns empty list if code is safe.
+        """
+        issues = []
+
+        try:
+            tree = ast.parse(code)
+        except SyntaxError as e:
+            return [f"Syntax error: {e}"]
+
+        for node in ast.walk(tree):
+            # Check for blocked node types
+            if type(node) in self.blocked_nodes:
+                lineno = getattr(node, "lineno", "?")
+                issues.append(f"Blocked operation: {type(node).__name__} at line {lineno}")
+
+            # Check for dangerous attribute access
+            if isinstance(node, ast.Attribute):
+                if node.attr.startswith("_"):
+                    issues.append(
+                        f"Access to private attribute '{node.attr}' at line {node.lineno}"
+                    )
+
+            # Check for exec/eval calls
+            if isinstance(node, ast.Call):
+                if isinstance(node.func, ast.Name):
+                    if node.func.id in ("exec", "eval", "compile", "__import__"):
+                        issues.append(
+                            f"Blocked function call: {node.func.id} at line {node.lineno}"
+                        )
+
+        return issues
+
+
+class CodeSandbox:
+    """
+    Sandboxed environment for executing dynamic code.
+
+    Usage:
+        sandbox = CodeSandbox(timeout_seconds=5)
+        result = sandbox.execute(
+            code="x = 1 + 2\\nresult = x * 3",
+            inputs={"multiplier": 2},
+        )
+        if result.success:
+            print(result.variables["result"])  # 6
+    """
+
+    def __init__(
+        self,
+        timeout_seconds: int = 10,
+        allowed_modules: set[str] | None = None,
+        safe_builtins: dict[str, Any] | None = None,
+    ):
+        self.timeout_seconds = timeout_seconds
+        self.allowed_modules = allowed_modules or ALLOWED_MODULES
+        self.safe_builtins = safe_builtins or SAFE_BUILTINS
+        self.validator = CodeValidator()
+        self.importer = RestrictedImporter(self.allowed_modules)
+
+    @contextmanager
+    def _timeout_context(self, seconds: int):
+        """Context manager for timeout enforcement."""
+
+        def handler(signum, frame):
+            raise TimeoutError(f"Code execution timed out after {seconds} seconds")
+
+        # Only works on Unix-like systems
+        if hasattr(signal, "SIGALRM"):
+            old_handler = signal.signal(signal.SIGALRM, handler)
+            signal.alarm(seconds)
+            try:
+                yield
+            finally:
+                signal.alarm(0)
+                signal.signal(signal.SIGALRM, old_handler)
+        else:
+            # Windows: no timeout support, just execute
+            yield
+
+    def _create_namespace(self, inputs: dict[str, Any]) -> dict[str, Any]:
+        """Create isolated namespace for code execution."""
+        namespace = {
+            "__builtins__": dict(self.safe_builtins),
+            "__import__": self.importer,
+        }
+
+        # Add input variables
+        namespace.update(inputs)
+
+        return namespace
+
+    def execute(
+        self,
+        code: str,
+        inputs: dict[str, Any] | None = None,
+        extract_vars: list[str] | None = None,
+    ) -> SandboxResult:
+        """
+        Execute code in sandbox.
+
+        Args:
+            code: Python code to execute
+            inputs: Variables to inject into namespace
+            extract_vars: Variable names to extract from namespace after execution
+
+        Returns:
+            SandboxResult with execution outcome
+        """
+        import time
+
+        inputs = inputs or {}
+        extract_vars = extract_vars or []
+
+        # Validate code first
+        issues = self.validator.validate(code)
+        if issues:
+            return SandboxResult(
+                success=False,
+                error=f"Code validation failed: {'; '.join(issues)}",
+            )
+
+        # Create isolated namespace
+        namespace = self._create_namespace(inputs)
+
+        # Capture stdout
+        import io
+
+        old_stdout = sys.stdout
+        sys.stdout = captured_stdout = io.StringIO()
+
+        start_time = time.time()
+
+        try:
+            with self._timeout_context(self.timeout_seconds):
+                # Compile and execute
+                compiled = compile(code, "<sandbox>", "exec")
+                exec(compiled, namespace)
+
+            execution_time_ms = int((time.time() - start_time) * 1000)
+
+            # Extract requested variables
+            extracted = {}
+            for var in extract_vars:
+                if var in namespace:
+                    extracted[var] = namespace[var]
+
+            # Also extract any new variables (not in inputs or builtins)
+            for key, value in namespace.items():
+                if key not in inputs and key not in self.safe_builtins and not key.startswith("_"):
+                    extracted[key] = value
+
+            return SandboxResult(
+                success=True,
+                result=namespace.get("result"),  # Convention: 'result' is the return value
+                stdout=captured_stdout.getvalue(),
+                variables=extracted,
+                execution_time_ms=execution_time_ms,
+            )
+
+        except TimeoutError as e:
+            return SandboxResult(
+                success=False,
+                error=str(e),
+                execution_time_ms=self.timeout_seconds * 1000,
+            )
+
+        except SecurityError as e:
+            return SandboxResult(
+                success=False,
+                error=f"Security violation: {e}",
+                execution_time_ms=int((time.time() - start_time) * 1000),
+            )
+
+        except Exception as e:
+            return SandboxResult(
+                success=False,
+                error=f"{type(e).__name__}: {e}",
+                stdout=captured_stdout.getvalue(),
+                execution_time_ms=int((time.time() - start_time) * 1000),
+            )
+
+        finally:
+            sys.stdout = old_stdout
+
+    def execute_expression(
+        self,
+        expression: str,
+        inputs: dict[str, Any] | None = None,
+    ) -> SandboxResult:
+        """
+        Execute a single expression and return its value.
+
+        Simpler than execute() - just evaluates one expression.
+        """
+        inputs = inputs or {}
+
+        # Validate
+        try:
+            ast.parse(expression, mode="eval")
+        except SyntaxError as e:
+            return SandboxResult(success=False, error=f"Syntax error: {e}")
+
+        namespace = self._create_namespace(inputs)
+
+        try:
+            with self._timeout_context(self.timeout_seconds):
+                result = eval(expression, namespace)
+
+            return SandboxResult(success=True, result=result)
+
+        except Exception as e:
+            return SandboxResult(
+                success=False,
+                error=f"{type(e).__name__}: {e}",
+            )
+
+
+# Singleton instance with default settings
+default_sandbox = CodeSandbox()
+
+
+def safe_exec(
+    code: str,
+    inputs: dict[str, Any] | None = None,
+    timeout_seconds: int = 10,
+) -> SandboxResult:
+    """
+    Convenience function for safe code execution.
+
+    Args:
+        code: Python code to execute
+        inputs: Variables to inject
+        timeout_seconds: Max execution time
+
+    Returns:
+        SandboxResult
+    """
+    sandbox = CodeSandbox(timeout_seconds=timeout_seconds)
+    return sandbox.execute(code, inputs)
+
+
+def safe_eval(
+    expression: str,
+    inputs: dict[str, Any] | None = None,
+    timeout_seconds: int = 5,
+) -> SandboxResult:
+    """
+    Convenience function for safe expression evaluation.
+
+    Args:
+        expression: Python expression to evaluate
+        inputs: Variables to inject
+        timeout_seconds: Max execution time
+
+    Returns:
+        SandboxResult
+    """
+    sandbox = CodeSandbox(timeout_seconds=timeout_seconds)
+    return sandbox.execute_expression(expression, inputs)
@@ -0,0 +1,426 @@
+"""NodeConversation: Message history management for graph nodes."""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from typing import Any, Literal, Protocol, runtime_checkable
+
+
+@dataclass
+class Message:
+    """A single message in a conversation.
+
+    Attributes:
+        seq: Monotonic sequence number.
+        role: One of "user", "assistant", or "tool".
+        content: Message text.
+        tool_use_id: Internal tool-use identifier (output as ``tool_call_id`` in LLM dicts).
+        tool_calls: OpenAI-format tool call list for assistant messages.
+        is_error: When True and role is "tool", ``to_llm_dict`` prepends "ERROR: " to content.
+    """
+
+    seq: int
+    role: Literal["user", "assistant", "tool"]
+    content: str
+    tool_use_id: str | None = None
+    tool_calls: list[dict[str, Any]] | None = None
+    is_error: bool = False
+
+    def to_llm_dict(self) -> dict[str, Any]:
+        """Convert to OpenAI-format message dict."""
+        if self.role == "user":
+            return {"role": "user", "content": self.content}
+
+        if self.role == "assistant":
+            d: dict[str, Any] = {"role": "assistant", "content": self.content}
+            if self.tool_calls:
+                d["tool_calls"] = self.tool_calls
+            return d
+
+        # role == "tool"
+        content = f"ERROR: {self.content}" if self.is_error else self.content
+        return {
+            "role": "tool",
+            "tool_call_id": self.tool_use_id,
+            "content": content,
+        }
+
+    def to_storage_dict(self) -> dict[str, Any]:
+        """Serialize all fields for persistence.  Omits None/default-False fields."""
+        d: dict[str, Any] = {
+            "seq": self.seq,
+            "role": self.role,
+            "content": self.content,
+        }
+        if self.tool_use_id is not None:
+            d["tool_use_id"] = self.tool_use_id
+        if self.tool_calls is not None:
+            d["tool_calls"] = self.tool_calls
+        if self.is_error:
+            d["is_error"] = self.is_error
+        return d
+
+    @classmethod
+    def from_storage_dict(cls, data: dict[str, Any]) -> Message:
+        """Deserialize from a storage dict."""
+        return cls(
+            seq=data["seq"],
+            role=data["role"],
+            content=data["content"],
+            tool_use_id=data.get("tool_use_id"),
+            tool_calls=data.get("tool_calls"),
+            is_error=data.get("is_error", False),
+        )
+
+
+# ---------------------------------------------------------------------------
+# ConversationStore protocol (Phase 2)
+# ---------------------------------------------------------------------------
+
+
+@runtime_checkable
+class ConversationStore(Protocol):
+    """Protocol for conversation persistence backends."""
+
+    async def write_part(self, seq: int, data: dict[str, Any]) -> None: ...
+
+    async def read_parts(self) -> list[dict[str, Any]]: ...
+
+    async def write_meta(self, data: dict[str, Any]) -> None: ...
+
+    async def read_meta(self) -> dict[str, Any] | None: ...
+
+    async def write_cursor(self, data: dict[str, Any]) -> None: ...
+
+    async def read_cursor(self) -> dict[str, Any] | None: ...
+
+    async def delete_parts_before(self, seq: int) -> None: ...
+
+    async def close(self) -> None: ...
+
+    async def destroy(self) -> None: ...
+
+
+# ---------------------------------------------------------------------------
+# NodeConversation
+# ---------------------------------------------------------------------------
+
+
+class NodeConversation:
+    """Message history for a graph node with optional write-through persistence.
+
+    When *store* is ``None`` the conversation works purely in-memory.
+    When a :class:`ConversationStore` is supplied every mutation is
+    persisted via write-through (meta is lazily written on the first
+    ``_persist`` call).
+    """
+
+    def __init__(
+        self,
+        system_prompt: str = "",
+        max_history_tokens: int = 32000,
+        compaction_threshold: float = 0.8,
+        output_keys: list[str] | None = None,
+        store: ConversationStore | None = None,
+    ) -> None:
+        self._system_prompt = system_prompt
+        self._max_history_tokens = max_history_tokens
+        self._compaction_threshold = compaction_threshold
+        self._output_keys = output_keys
+        self._store = store
+        self._messages: list[Message] = []
+        self._next_seq: int = 0
+        self._meta_persisted: bool = False
+
+    # --- Properties --------------------------------------------------------
+
+    @property
+    def system_prompt(self) -> str:
+        return self._system_prompt
+
+    @property
+    def messages(self) -> list[Message]:
+        """Return a defensive copy of the message list."""
+        return list(self._messages)
+
+    @property
+    def turn_count(self) -> int:
+        """Number of conversational turns (one turn = one user message)."""
+        return sum(1 for m in self._messages if m.role == "user")
+
+    @property
+    def message_count(self) -> int:
+        """Total number of messages (all roles)."""
+        return len(self._messages)
+
+    @property
+    def next_seq(self) -> int:
+        return self._next_seq
+
+    # --- Add messages ------------------------------------------------------
+
+    async def add_user_message(self, content: str) -> Message:
+        msg = Message(seq=self._next_seq, role="user", content=content)
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    async def add_assistant_message(
+        self,
+        content: str,
+        tool_calls: list[dict[str, Any]] | None = None,
+    ) -> Message:
+        msg = Message(
+            seq=self._next_seq,
+            role="assistant",
+            content=content,
+            tool_calls=tool_calls,
+        )
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    async def add_tool_result(
+        self,
+        tool_use_id: str,
+        content: str,
+        is_error: bool = False,
+    ) -> Message:
+        msg = Message(
+            seq=self._next_seq,
+            role="tool",
+            content=content,
+            tool_use_id=tool_use_id,
+            is_error=is_error,
+        )
+        self._messages.append(msg)
+        self._next_seq += 1
+        await self._persist(msg)
+        return msg
+
+    # --- Query -------------------------------------------------------------
+
+    def to_llm_messages(self) -> list[dict[str, Any]]:
+        """Return messages as OpenAI-format dicts (system prompt excluded)."""
+        return [m.to_llm_dict() for m in self._messages]
+
+    def estimate_tokens(self) -> int:
+        """Rough token estimate: total characters / 4."""
+        total_chars = sum(len(m.content) for m in self._messages)
+        return total_chars // 4
+
+    def needs_compaction(self) -> bool:
+        return self.estimate_tokens() >= self._max_history_tokens * self._compaction_threshold
+
+    # --- Output-key extraction ---------------------------------------------
+
+    def _extract_protected_values(self, messages: list[Message]) -> dict[str, str]:
+        """Scan assistant messages for output_key values before compaction.
+
+        Iterates most-recent-first. Once a key is found, it's skipped for
+        older messages (latest value wins).
+        """
+        if not self._output_keys:
+            return {}
+
+        found: dict[str, str] = {}
+        remaining_keys = set(self._output_keys)
+
+        for msg in reversed(messages):
+            if msg.role != "assistant" or not remaining_keys:
+                continue
+
+            for key in list(remaining_keys):
+                value = self._try_extract_key(msg.content, key)
+                if value is not None:
+                    found[key] = value
+                    remaining_keys.discard(key)
+
+        return found
+
+    def _try_extract_key(self, content: str, key: str) -> str | None:
+        """Try 4 strategies to extract a key's value from message content."""
+        from framework.graph.node import find_json_object
+
+        # 1. Whole message is JSON
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict) and key in parsed:
+                val = parsed[key]
+                return json.dumps(val) if not isinstance(val, str) else val
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+        # 2. Embedded JSON via find_json_object
+        json_str = find_json_object(content)
+        if json_str:
+            try:
+                parsed = json.loads(json_str)
+                if isinstance(parsed, dict) and key in parsed:
+                    val = parsed[key]
+                    return json.dumps(val) if not isinstance(val, str) else val
+            except (json.JSONDecodeError, TypeError):
+                pass
+
+        # 3. Colon format: key: value
+        match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
+        if match:
+            return match.group(1).strip()
+
+        # 4. Equals format: key = value
+        match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
+        if match:
+            return match.group(1).strip()
+
+        return None
+
+    # --- Lifecycle ---------------------------------------------------------
+
+    async def compact(self, summary: str, keep_recent: int = 2) -> None:
+        """Replace old messages with a summary, optionally keeping recent ones.
+
+        Args:
+            summary: Caller-provided summary text.
+            keep_recent: Number of recent messages to preserve (default 2).
+                         Clamped to [0, len(messages) - 1].
+        """
+        if not self._messages:
+            return
+
+        # Clamp: must discard at least 1 message
+        keep_recent = max(0, min(keep_recent, len(self._messages) - 1))
+
+        if keep_recent > 0:
+            old_messages = self._messages[:-keep_recent]
+            recent_messages = self._messages[-keep_recent:]
+        else:
+            old_messages = self._messages
+            recent_messages = []
+
+        # Extract protected values from messages being discarded
+        if self._output_keys:
+            protected = self._extract_protected_values(old_messages)
+            if protected:
+                lines = ["PRESERVED VALUES (do not lose these):"]
+                for k, v in protected.items():
+                    lines.append(f"- {k}: {v}")
+                lines.append("")
+                lines.append("CONVERSATION SUMMARY:")
+                lines.append(summary)
+                summary = "\n".join(lines)
+
+        # Determine summary seq
+        if recent_messages:
+            summary_seq = recent_messages[0].seq - 1
+        else:
+            summary_seq = self._next_seq
+            self._next_seq += 1
+
+        summary_msg = Message(seq=summary_seq, role="user", content=summary)
+
+        # Persist
+        if self._store:
+            delete_before = recent_messages[0].seq if recent_messages else self._next_seq
+            await self._store.delete_parts_before(delete_before)
+            await self._store.write_part(summary_msg.seq, summary_msg.to_storage_dict())
+            await self._store.write_cursor({"next_seq": self._next_seq})
+
+        self._messages = [summary_msg] + recent_messages
+
+    async def clear(self) -> None:
+        """Remove all messages, keep system prompt, preserve ``_next_seq``."""
+        if self._store:
+            await self._store.delete_parts_before(self._next_seq)
+            await self._store.write_cursor({"next_seq": self._next_seq})
+        self._messages.clear()
+
+    def export_summary(self) -> str:
+        """Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
+        prompt_preview = (
+            self._system_prompt[:80] + "..."
+            if len(self._system_prompt) > 80
+            else self._system_prompt
+        )
+
+        lines = [
+            "[STATS]",
+            f"turns: {self.turn_count}",
+            f"messages: {self.message_count}",
+            f"estimated_tokens: {self.estimate_tokens()}",
+            "",
+            "[CONFIG]",
+            f"system_prompt: {prompt_preview!r}",
+        ]
+
+        if self._output_keys:
+            lines.append(f"output_keys: {', '.join(self._output_keys)}")
+
+        lines.append("")
+        lines.append("[RECENT_MESSAGES]")
+        for m in self._messages[-5:]:
+            preview = m.content[:60] + "..." if len(m.content) > 60 else m.content
+            lines.append(f"  [{m.role}] {preview}")
+
+        return "\n".join(lines)
+
+    # --- Persistence internals ---------------------------------------------
+
+    async def _persist(self, message: Message) -> None:
+        """Write-through a single message.  No-op when store is None."""
+        if self._store is None:
+            return
+        if not self._meta_persisted:
+            await self._persist_meta()
+        await self._store.write_part(message.seq, message.to_storage_dict())
+        await self._store.write_cursor({"next_seq": self._next_seq})
+
+    async def _persist_meta(self) -> None:
+        """Lazily write conversation metadata to the store (called once)."""
+        if self._store is None:
+            return
+        await self._store.write_meta(
+            {
+                "system_prompt": self._system_prompt,
+                "max_history_tokens": self._max_history_tokens,
+                "compaction_threshold": self._compaction_threshold,
+                "output_keys": self._output_keys,
+            }
+        )
+        self._meta_persisted = True
+
+    # --- Restore -----------------------------------------------------------
+
+    @classmethod
+    async def restore(cls, store: ConversationStore) -> NodeConversation | None:
+        """Reconstruct a NodeConversation from a store.
+
+        Returns ``None`` if the store contains no metadata (i.e. the
+        conversation was never persisted).
+        """
+        meta = await store.read_meta()
+        if meta is None:
+            return None
+
+        conv = cls(
+            system_prompt=meta.get("system_prompt", ""),
+            max_history_tokens=meta.get("max_history_tokens", 32000),
+            compaction_threshold=meta.get("compaction_threshold", 0.8),
+            output_keys=meta.get("output_keys"),
+            store=store,
+        )
+        conv._meta_persisted = True
+
+        parts = await store.read_parts()
+        conv._messages = [Message.from_storage_dict(p) for p in parts]
+
+        cursor = await store.read_cursor()
+        if cursor:
+            conv._next_seq = cursor["next_seq"]
+        elif conv._messages:
+            conv._next_seq = conv._messages[-1].seq + 1
+
+        return conv
@@ -0,0 +1,611 @@
+"""
+Edge Protocol - How nodes connect in a graph.
+
+Edges define:
+1. Source and target nodes
+2. Conditions for traversal
+3. Data mapping between nodes
+
+Unlike traditional graph frameworks where edges are programmatic,
+our edges can be created dynamically by a Builder agent based on the goal.
+
+Edge Types:
+- always: Always traverse after source completes
+- on_success: Traverse only if source succeeds
+- on_failure: Traverse only if source fails
+- conditional: Traverse based on expression evaluation (SAFE SUBSET ONLY)
+- llm_decide: Let LLM decide based on goal and context (goal-aware routing)
+
+The llm_decide condition is particularly powerful for goal-driven agents,
+allowing the LLM to evaluate whether proceeding along an edge makes sense
+given the current goal, context, and execution state.
+"""
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from framework.graph.safe_eval import safe_eval
+
+
+class EdgeCondition(str, Enum):
+    """When an edge should be traversed."""
+
+    ALWAYS = "always"  # Always after source completes
+    ON_SUCCESS = "on_success"  # Only if source succeeds
+    ON_FAILURE = "on_failure"  # Only if source fails
+    CONDITIONAL = "conditional"  # Based on expression
+    LLM_DECIDE = "llm_decide"  # Let LLM decide based on goal and context
+
+
+class EdgeSpec(BaseModel):
+    """
+    Specification for an edge between nodes.
+
+    Examples:
+        # Simple success-based routing
+        EdgeSpec(
+            id="calc-to-format",
+            source="calculator",
+            target="formatter",
+            condition=EdgeCondition.ON_SUCCESS,
+            input_mapping={"result": "value_to_format"}
+        )
+
+        # Conditional routing based on output
+        EdgeSpec(
+            id="validate-to-retry",
+            source="validator",
+            target="retry_handler",
+            condition=EdgeCondition.CONDITIONAL,
+            condition_expr="output.confidence < 0.8",
+        )
+
+        # LLM-powered routing (goal-aware)
+        EdgeSpec(
+            id="search-to-filter",
+            source="search_results",
+            target="filter_results",
+            condition=EdgeCondition.LLM_DECIDE,
+            description="Only filter if results need refinement to meet goal",
+        )
+    """
+
+    id: str
+    source: str = Field(description="Source node ID")
+    target: str = Field(description="Target node ID")
+
+    # When to traverse
+    condition: EdgeCondition = EdgeCondition.ALWAYS
+    condition_expr: str | None = Field(
+        default=None,
+        description="Expression for CONDITIONAL edges, e.g., 'output.confidence > 0.8'",
+    )
+
+    # Data flow
+    input_mapping: dict[str, str] = Field(
+        default_factory=dict,
+        description="Map source outputs to target inputs: {target_key: source_key}",
+    )
+
+    # Priority for multiple outgoing edges
+    priority: int = Field(default=0, description="Higher priority edges are evaluated first")
+
+    # Metadata
+    description: str = ""
+
+    model_config = {"extra": "allow"}
+
+    def should_traverse(
+        self,
+        source_success: bool,
+        source_output: dict[str, Any],
+        memory: dict[str, Any],
+        llm: Any | None = None,
+        goal: Any | None = None,
+        source_node_name: str | None = None,
+        target_node_name: str | None = None,
+    ) -> bool:
+        """
+        Determine if this edge should be traversed.
+
+        Args:
+            source_success: Whether the source node succeeded
+            source_output: Output from the source node
+            memory: Current shared memory state
+            llm: LLM provider for LLM_DECIDE edges
+            goal: Goal object for LLM_DECIDE edges
+            source_node_name: Name of source node (for LLM context)
+            target_node_name: Name of target node (for LLM context)
+
+        Returns:
+            True if the edge should be traversed
+        """
+        if self.condition == EdgeCondition.ALWAYS:
+            return True
+
+        if self.condition == EdgeCondition.ON_SUCCESS:
+            return source_success
+
+        if self.condition == EdgeCondition.ON_FAILURE:
+            return not source_success
+
+        if self.condition == EdgeCondition.CONDITIONAL:
+            return self._evaluate_condition(source_output, memory)
+
+        if self.condition == EdgeCondition.LLM_DECIDE:
+            if llm is None or goal is None:
+                # Fallback to ON_SUCCESS if LLM not available
+                return source_success
+            return self._llm_decide(
+                llm=llm,
+                goal=goal,
+                source_success=source_success,
+                source_output=source_output,
+                memory=memory,
+                source_node_name=source_node_name,
+                target_node_name=target_node_name,
+            )
+
+        return False
+
+    def _evaluate_condition(
+        self,
+        output: dict[str, Any],
+        memory: dict[str, Any],
+    ) -> bool:
+        """Evaluate a conditional expression."""
+        if not self.condition_expr:
+            return True
+
+        # Build evaluation context
+        # Include memory keys directly for easier access in conditions
+        context = {
+            "output": output,
+            "memory": memory,
+            "result": output.get("result"),
+            "true": True,  # Allow lowercase true/false in conditions
+            "false": False,
+            **memory,  # Unpack memory keys directly into context
+        }
+
+        try:
+            # Safe evaluation using AST-based whitelist
+            return bool(safe_eval(self.condition_expr, context))
+        except Exception as e:
+            # Log the error for debugging
+            import logging
+
+            logger = logging.getLogger(__name__)
+            logger.warning(f"      ⚠ Condition evaluation failed: {self.condition_expr}")
+            logger.warning(f"         Error: {e}")
+            logger.warning(f"         Available context keys: {list(context.keys())}")
+            return False
+
+    def _llm_decide(
+        self,
+        llm: Any,
+        goal: Any,
+        source_success: bool,
+        source_output: dict[str, Any],
+        memory: dict[str, Any],
+        source_node_name: str | None,
+        target_node_name: str | None,
+    ) -> bool:
+        """
+        Use LLM to decide if this edge should be traversed.
+
+        The LLM evaluates whether proceeding to the target node
+        is the best next step toward achieving the goal.
+        """
+        import json
+
+        # Build context for LLM
+        prompt = f"""You are evaluating whether to proceed along an edge in an agent workflow.
+
+**Goal**: {goal.name}
+{goal.description}
+
+**Current State**:
+- Just completed: {source_node_name or "unknown node"}
+- Success: {source_success}
+- Output: {json.dumps(source_output, default=str)}
+
+**Decision**:
+Should we proceed to: {target_node_name or self.target}?
+Edge description: {self.description or "No description"}
+
+**Context from memory**:
+{json.dumps({k: str(v)[:100] for k, v in list(memory.items())[:5]}, indent=2)}
+
+Evaluate whether proceeding to this next node is the right step toward achieving the goal.
+Consider:
+1. Does the current output suggest we should proceed?
+2. Is this the logical next step given the goal?
+3. Are there any issues that would make proceeding unwise?
+
+Respond with ONLY a JSON object:
+{{"proceed": true/false, "reasoning": "brief explanation"}}"""
+
+        try:
+            response = llm.complete(
+                messages=[{"role": "user", "content": prompt}],
+                system="You are a routing agent. Respond with JSON only.",
+                max_tokens=150,
+            )
+
+            # Parse response
+            import re
+
+            json_match = re.search(r"\{[^{}]*\}", response.content, re.DOTALL)
+            if json_match:
+                data = json.loads(json_match.group())
+                proceed = data.get("proceed", False)
+                reasoning = data.get("reasoning", "")
+
+                # Log the decision (using basic print for now)
+                import logging
+
+                logger = logging.getLogger(__name__)
+                logger.info(f"      🤔 LLM routing decision: {'PROCEED' if proceed else 'SKIP'}")
+                logger.info(f"         Reason: {reasoning}")
+
+                return proceed
+
+        except Exception as e:
+            # Fallback: proceed on success
+            import logging
+
+            logger = logging.getLogger(__name__)
+            logger.warning(f"      ⚠ LLM routing failed, defaulting to on_success: {e}")
+            return source_success
+
+        return source_success
+
+    def map_inputs(
+        self,
+        source_output: dict[str, Any],
+        memory: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        Map source outputs to target inputs.
+
+        Args:
+            source_output: Output from source node
+            memory: Current shared memory
+
+        Returns:
+            Input dict for target node
+        """
+        if not self.input_mapping:
+            # Default: pass through all outputs
+            return dict(source_output)
+
+        result = {}
+        for target_key, source_key in self.input_mapping.items():
+            # Try source output first, then memory
+            if source_key in source_output:
+                result[target_key] = source_output[source_key]
+            elif source_key in memory:
+                result[target_key] = memory[source_key]
+
+        return result
+
+
+class AsyncEntryPointSpec(BaseModel):
+    """
+    Specification for an asynchronous entry point.
+
+    Used with AgentRuntime for multi-entry-point agents that handle
+    concurrent execution streams (e.g., webhook + API handlers).
+
+    Example:
+        AsyncEntryPointSpec(
+            id="webhook",
+            name="Zendesk Webhook Handler",
+            entry_node="process-webhook",
+            trigger_type="webhook",
+            isolation_level="shared",
+        )
+    """
+
+    id: str = Field(description="Unique identifier for this entry point")
+    name: str = Field(description="Human-readable name")
+    entry_node: str = Field(description="Node ID to start execution from")
+    trigger_type: str = Field(
+        default="manual",
+        description="How this entry point is triggered: webhook, api, timer, event, manual",
+    )
+    trigger_config: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Trigger-specific configuration (e.g., webhook URL, timer interval)",
+    )
+    isolation_level: str = Field(
+        default="shared", description="State isolation: isolated, shared, or synchronized"
+    )
+    priority: int = Field(default=0, description="Execution priority (higher = more priority)")
+    max_concurrent: int = Field(
+        default=10, description="Maximum concurrent executions for this entry point"
+    )
+
+    model_config = {"extra": "allow"}
+
+
+class GraphSpec(BaseModel):
+    """
+    Complete specification of an agent graph.
+
+    Contains all nodes, edges, and metadata needed to execute.
+
+    For single-entry-point agents (traditional pattern):
+        GraphSpec(
+            id="calculator-graph",
+            goal_id="calc-001",
+            entry_node="input_parser",
+            terminal_nodes=["output_formatter", "error_handler"],
+            nodes=[...],
+            edges=[...],
+        )
+
+    For multi-entry-point agents (concurrent streams):
+        GraphSpec(
+            id="support-agent-graph",
+            goal_id="support-001",
+            entry_node="process-webhook",  # Default entry
+            async_entry_points=[
+                AsyncEntryPointSpec(
+                    id="webhook",
+                    name="Zendesk Webhook",
+                    entry_node="process-webhook",
+                    trigger_type="webhook",
+                ),
+                AsyncEntryPointSpec(
+                    id="api",
+                    name="API Handler",
+                    entry_node="process-request",
+                    trigger_type="api",
+                ),
+            ],
+            nodes=[...],
+            edges=[...],
+        )
+    """
+
+    id: str
+    goal_id: str
+    version: str = "1.0.0"
+
+    # Graph structure
+    entry_node: str = Field(description="ID of the first node to execute")
+    entry_points: dict[str, str] = Field(
+        default_factory=dict,
+        description="Named entry points for resuming execution. Format: {name: node_id}",
+    )
+    async_entry_points: list[AsyncEntryPointSpec] = Field(
+        default_factory=list,
+        description=(
+            "Asynchronous entry points for concurrent execution streams (used with AgentRuntime)"
+        ),
+    )
+    terminal_nodes: list[str] = Field(
+        default_factory=list, description="IDs of nodes that end execution"
+    )
+    pause_nodes: list[str] = Field(
+        default_factory=list, description="IDs of nodes that pause execution for HITL input"
+    )
+
+    # Components
+    nodes: list[Any] = Field(  # NodeSpec, but avoiding circular import
+        default_factory=list, description="All node specifications"
+    )
+    edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
+
+    # Shared memory keys
+    memory_keys: list[str] = Field(
+        default_factory=list, description="Keys available in shared memory"
+    )
+
+    # Default LLM settings
+    default_model: str = "claude-haiku-4-5-20251001"
+    max_tokens: int = 1024
+
+    # Cleanup LLM for JSON extraction fallback (fast/cheap model preferred)
+    # If not set, uses CEREBRAS_API_KEY -> cerebras/llama-3.3-70b or
+    # ANTHROPIC_API_KEY -> claude-3-5-haiku as fallback
+    cleanup_llm_model: str | None = None
+
+    # Execution limits
+    max_steps: int = Field(default=100, description="Maximum node executions before timeout")
+    max_retries_per_node: int = 3
+
+    # Metadata
+    description: str = ""
+    created_by: str = ""  # "human" or "builder_agent"
+
+    model_config = {"extra": "allow"}
+
+    def get_node(self, node_id: str) -> Any | None:
+        """Get a node by ID."""
+        for node in self.nodes:
+            if node.id == node_id:
+                return node
+        return None
+
+    def has_async_entry_points(self) -> bool:
+        """Check if this graph uses async entry points (multi-stream execution)."""
+        return len(self.async_entry_points) > 0
+
+    def get_async_entry_point(self, entry_point_id: str) -> AsyncEntryPointSpec | None:
+        """Get an async entry point by ID."""
+        for ep in self.async_entry_points:
+            if ep.id == entry_point_id:
+                return ep
+        return None
+
+    def get_outgoing_edges(self, node_id: str) -> list[EdgeSpec]:
+        """Get all edges leaving a node, sorted by priority."""
+        edges = [e for e in self.edges if e.source == node_id]
+        return sorted(edges, key=lambda e: -e.priority)
+
+    def get_incoming_edges(self, node_id: str) -> list[EdgeSpec]:
+        """Get all edges entering a node."""
+        return [e for e in self.edges if e.target == node_id]
+
+    def detect_fan_out_nodes(self) -> dict[str, list[str]]:
+        """
+        Detect nodes that fan-out to multiple targets.
+
+        A fan-out occurs when a node has multiple outgoing edges with the same
+        condition (typically ON_SUCCESS) that should execute in parallel.
+
+        Returns:
+            Dict mapping source_node_id -> list of parallel target_node_ids
+        """
+        fan_outs: dict[str, list[str]] = {}
+        for node in self.nodes:
+            outgoing = self.get_outgoing_edges(node.id)
+            # Fan-out: multiple edges with ON_SUCCESS condition
+            success_edges = [e for e in outgoing if e.condition == EdgeCondition.ON_SUCCESS]
+            if len(success_edges) > 1:
+                fan_outs[node.id] = [e.target for e in success_edges]
+        return fan_outs
+
+    def detect_fan_in_nodes(self) -> dict[str, list[str]]:
+        """
+        Detect nodes that receive from multiple sources (fan-in / convergence).
+
+        A fan-in occurs when a node has multiple incoming edges, meaning
+        it should wait for all predecessor branches to complete.
+
+        Returns:
+            Dict mapping target_node_id -> list of source_node_ids
+        """
+        fan_ins: dict[str, list[str]] = {}
+        for node in self.nodes:
+            incoming = self.get_incoming_edges(node.id)
+            if len(incoming) > 1:
+                fan_ins[node.id] = [e.source for e in incoming]
+        return fan_ins
+
+    def get_entry_point(self, session_state: dict | None = None) -> str:
+        """
+        Get the appropriate entry point based on session state.
+
+        Args:
+            session_state: Optional session state with 'paused_at' or 'resume_from' key
+
+        Returns:
+            Node ID to start execution from
+        """
+        if not session_state:
+            return self.entry_node
+
+        # Check if resuming from a pause node
+        paused_at = session_state.get("paused_at")
+        if paused_at and paused_at in self.pause_nodes:
+            # Look for a resume entry point
+            resume_key = f"{paused_at}_resume"
+            if resume_key in self.entry_points:
+                return self.entry_points[resume_key]
+
+        # Check for explicit resume_from
+        resume_from = session_state.get("resume_from")
+        if resume_from:
+            if resume_from in self.entry_points:
+                return self.entry_points[resume_from]
+            elif resume_from in [n.id for n in self.nodes]:
+                return resume_from
+
+        # Default to main entry
+        return self.entry_node
+
+    def validate(self) -> list[str]:
+        """Validate the graph structure."""
+        errors = []
+
+        # Check entry node exists
+        if not self.get_node(self.entry_node):
+            errors.append(f"Entry node '{self.entry_node}' not found")
+
+        # Check async entry points
+        seen_entry_ids = set()
+        for entry_point in self.async_entry_points:
+            # Check for duplicate IDs
+            if entry_point.id in seen_entry_ids:
+                errors.append(f"Duplicate async entry point ID: '{entry_point.id}'")
+            seen_entry_ids.add(entry_point.id)
+
+            # Check entry node exists
+            if not self.get_node(entry_point.entry_node):
+                errors.append(
+                    f"Async entry point '{entry_point.id}' references "
+                    f"missing node '{entry_point.entry_node}'"
+                )
+
+            # Validate isolation level
+            valid_isolation = {"isolated", "shared", "synchronized"}
+            if entry_point.isolation_level not in valid_isolation:
+                errors.append(
+                    f"Async entry point '{entry_point.id}' has invalid isolation_level "
+                    f"'{entry_point.isolation_level}'. Valid: {valid_isolation}"
+                )
+
+            # Validate trigger type
+            valid_triggers = {"webhook", "api", "timer", "event", "manual"}
+            if entry_point.trigger_type not in valid_triggers:
+                errors.append(
+                    f"Async entry point '{entry_point.id}' has invalid trigger_type "
+                    f"'{entry_point.trigger_type}'. Valid: {valid_triggers}"
+                )
+
+        # Check terminal nodes exist
+        for term in self.terminal_nodes:
+            if not self.get_node(term):
+                errors.append(f"Terminal node '{term}' not found")
+
+        # Check edge references
+        for edge in self.edges:
+            if not self.get_node(edge.source):
+                errors.append(f"Edge '{edge.id}' references missing source '{edge.source}'")
+            if not self.get_node(edge.target):
+                errors.append(f"Edge '{edge.id}' references missing target '{edge.target}'")
+
+        # Check for unreachable nodes
+        # Start with main entry node and all entry points (for pause/resume architecture)
+        reachable = set()
+        to_visit = [self.entry_node]
+
+        # Add all entry points as valid starting points (they're reachable by definition)
+        for entry_point_node in self.entry_points.values():
+            to_visit.append(entry_point_node)
+
+        # Add all async entry points as valid starting points
+        for async_entry in self.async_entry_points:
+            to_visit.append(async_entry.entry_node)
+
+        # Traverse from all entry points
+        while to_visit:
+            current = to_visit.pop()
+            if current in reachable:
+                continue
+            reachable.add(current)
+            for edge in self.get_outgoing_edges(current):
+                to_visit.append(edge.target)
+
+        # Build set of async entry point nodes for quick lookup
+        async_entry_nodes = {ep.entry_node for ep in self.async_entry_points}
+
+        for node in self.nodes:
+            if node.id not in reachable:
+                # Skip if node is a pause node, entry point target, or async entry
+                # (pause/resume architecture and async entry points make reachable)
+                if (
+                    node.id in self.pause_nodes
+                    or node.id in self.entry_points.values()
+                    or node.id in async_entry_nodes
+                ):
+                    continue
+                errors.append(f"Node '{node.id}' is unreachable from entry")
+
+        return errors
@@ -0,0 +1,552 @@
+"""
+Flexible Graph Executor with Worker-Judge Loop.
+
+Executes plans created by external planner (Claude Code, etc.)
+using a Worker-Judge loop:
+
+1. External planner creates Plan
+2. FlexibleGraphExecutor receives Plan
+3. Worker executes each step
+4. Judge evaluates each result
+5. If Judge says "replan" → return to external planner with feedback
+6. If Judge says "escalate" → request human intervention
+7. If all steps complete → return success
+
+This keeps planning external while execution/evaluation is internal.
+"""
+
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any
+
+from framework.graph.code_sandbox import CodeSandbox
+from framework.graph.goal import Goal
+from framework.graph.judge import HybridJudge, create_default_judge
+from framework.graph.plan import (
+    ApprovalDecision,
+    ApprovalRequest,
+    ApprovalResult,
+    ExecutionStatus,
+    Judgment,
+    JudgmentAction,
+    Plan,
+    PlanExecutionResult,
+    PlanStep,
+    StepStatus,
+)
+from framework.graph.worker_node import StepExecutionResult, WorkerNode
+from framework.llm.provider import LLMProvider, Tool
+from framework.runtime.core import Runtime
+
+# Type alias for approval callback
+ApprovalCallback = Callable[[ApprovalRequest], ApprovalResult]
+
+
+@dataclass
+class ExecutorConfig:
+    """Configuration for FlexibleGraphExecutor."""
+
+    max_retries_per_step: int = 3
+    max_total_steps: int = 100
+    timeout_seconds: int = 300
+    enable_parallel_execution: bool = False  # Future: parallel step execution
+
+
+class FlexibleGraphExecutor:
+    """
+    Executes plans with Worker-Judge loop.
+
+    Plans come from external source (Claude Code, etc.).
+    Returns feedback for replanning if needed.
+
+    Usage:
+        executor = FlexibleGraphExecutor(
+            runtime=runtime,
+            llm=llm_provider,
+            tools=tools,
+        )
+
+        result = await executor.execute_plan(plan, goal, context)
+
+        if result.status == ExecutionStatus.NEEDS_REPLAN:
+            # External planner should create new plan using result.feedback
+            new_plan = external_planner.replan(result.feedback_context)
+            result = await executor.execute_plan(new_plan, goal, result.feedback_context)
+    """
+
+    def __init__(
+        self,
+        runtime: Runtime,
+        llm: LLMProvider | None = None,
+        tools: dict[str, Tool] | None = None,
+        tool_executor: Callable | None = None,
+        functions: dict[str, Callable] | None = None,
+        judge: HybridJudge | None = None,
+        config: ExecutorConfig | None = None,
+        approval_callback: ApprovalCallback | None = None,
+    ):
+        """
+        Initialize the FlexibleGraphExecutor.
+
+        Args:
+            runtime: Runtime for decision logging
+            llm: LLM provider for Worker and Judge
+            tools: Available tools
+            tool_executor: Function to execute tools
+            functions: Registered functions
+            judge: Custom judge (defaults to HybridJudge with default rules)
+            config: Executor configuration
+            approval_callback: Callback for human-in-the-loop approval.
+                If None, steps requiring approval will pause execution.
+        """
+        self.runtime = runtime
+        self.llm = llm
+        self.tools = tools or {}
+        self.tool_executor = tool_executor
+        self.functions = functions or {}
+        self.config = config or ExecutorConfig()
+        self.approval_callback = approval_callback
+
+        # Create judge
+        self.judge = judge or create_default_judge(llm)
+
+        # Create worker
+        self.worker = WorkerNode(
+            runtime=runtime,
+            llm=llm,
+            tools=tools,
+            tool_executor=tool_executor,
+            functions=functions,
+            sandbox=CodeSandbox(),
+        )
+
+    async def execute_plan(
+        self,
+        plan: Plan,
+        goal: Goal,
+        context: dict[str, Any] | None = None,
+    ) -> PlanExecutionResult:
+        """
+        Execute a plan created by external planner.
+
+        Args:
+            plan: The plan to execute
+            goal: The goal context
+            context: Initial context (e.g., from previous execution)
+
+        Returns:
+            PlanExecutionResult with status and feedback
+        """
+        context = context or {}
+        context.update(plan.context)  # Merge plan's accumulated context
+
+        # Start run
+        _run_id = self.runtime.start_run(
+            goal_id=goal.id,
+            goal_description=goal.description,
+            input_data={"plan_id": plan.id, "revision": plan.revision},
+        )
+
+        steps_executed = 0
+        total_tokens = 0
+        total_latency = 0
+
+        try:
+            while steps_executed < self.config.max_total_steps:
+                # Get next ready steps
+                ready_steps = plan.get_ready_steps()
+
+                if not ready_steps:
+                    # Check if we're done or stuck
+                    if plan.is_complete():
+                        break
+                    else:
+                        # No ready steps but not complete - something's wrong
+                        return self._create_result(
+                            status=ExecutionStatus.NEEDS_REPLAN,
+                            plan=plan,
+                            context=context,
+                            feedback=(
+                                "No executable steps available but plan not complete. "
+                                "Check dependencies."
+                            ),
+                            steps_executed=steps_executed,
+                            total_tokens=total_tokens,
+                            total_latency=total_latency,
+                        )
+
+                # Execute next step (for now, sequential; could be parallel)
+                step = ready_steps[0]
+                # Debug: show ready steps
+                # ready_ids = [s.id for s in ready_steps]
+                # print(f"  [DEBUG] Ready steps: {ready_ids}, executing: {step.id}")
+
+                # APPROVAL CHECK - before execution
+                if step.requires_approval:
+                    approval_result = await self._request_approval(step, context)
+
+                    if approval_result is None:
+                        # No callback, pause execution
+                        step.status = StepStatus.AWAITING_APPROVAL
+                        return self._create_result(
+                            status=ExecutionStatus.AWAITING_APPROVAL,
+                            plan=plan,
+                            context=context,
+                            feedback=f"Step '{step.id}' requires approval: {step.description}",
+                            steps_executed=steps_executed,
+                            total_tokens=total_tokens,
+                            total_latency=total_latency,
+                        )
+
+                    if approval_result.decision == ApprovalDecision.REJECT:
+                        step.status = StepStatus.REJECTED
+                        step.error = approval_result.reason or "Rejected by human"
+                        # Skip this step and continue with dependents marked as skipped
+                        self._skip_dependent_steps(plan, step.id)
+                        continue
+
+                    if approval_result.decision == ApprovalDecision.ABORT:
+                        return self._create_result(
+                            status=ExecutionStatus.ABORTED,
+                            plan=plan,
+                            context=context,
+                            feedback=approval_result.reason or "Aborted by human",
+                            steps_executed=steps_executed,
+                            total_tokens=total_tokens,
+                            total_latency=total_latency,
+                        )
+
+                    if approval_result.decision == ApprovalDecision.MODIFY:
+                        # Apply modifications to step
+                        if approval_result.modifications:
+                            self._apply_modifications(step, approval_result.modifications)
+
+                    # APPROVE - continue to execution
+
+                step.status = StepStatus.IN_PROGRESS
+                step.started_at = datetime.now()
+                step.attempts += 1
+
+                # WORK
+                work_result = await self.worker.execute(step, context)
+                steps_executed += 1
+                total_tokens += work_result.tokens_used
+                total_latency += work_result.latency_ms
+
+                # JUDGE
+                judgment = await self.judge.evaluate(
+                    step=step,
+                    result=work_result.__dict__,
+                    goal=goal,
+                    context=context,
+                )
+
+                # Handle judgment
+                result = await self._handle_judgment(
+                    step=step,
+                    work_result=work_result,
+                    judgment=judgment,
+                    plan=plan,
+                    goal=goal,
+                    context=context,
+                    steps_executed=steps_executed,
+                    total_tokens=total_tokens,
+                    total_latency=total_latency,
+                )
+
+                if result is not None:
+                    # Judgment resulted in early return (replan/escalate)
+                    self.runtime.end_run(
+                        success=False,
+                        narrative=f"Execution stopped: {result.status.value}",
+                    )
+                    return result
+
+            # All steps completed successfully
+            self.runtime.end_run(
+                success=True,
+                output_data=context,
+                narrative=f"Plan completed: {steps_executed} steps executed",
+            )
+
+            return self._create_result(
+                status=ExecutionStatus.COMPLETED,
+                plan=plan,
+                context=context,
+                steps_executed=steps_executed,
+                total_tokens=total_tokens,
+                total_latency=total_latency,
+            )
+
+        except Exception as e:
+            self.runtime.report_problem(
+                severity="critical",
+                description=str(e),
+            )
+            self.runtime.end_run(
+                success=False,
+                narrative=f"Execution failed: {e}",
+            )
+
+            return PlanExecutionResult(
+                status=ExecutionStatus.FAILED,
+                error=str(e),
+                feedback=f"Execution error: {e}",
+                feedback_context=plan.to_feedback_context(),
+                completed_steps=[s.id for s in plan.get_completed_steps()],
+                steps_executed=steps_executed,
+                total_tokens=total_tokens,
+                total_latency_ms=total_latency,
+            )
+
+    async def _handle_judgment(
+        self,
+        step: PlanStep,
+        work_result: StepExecutionResult,
+        judgment: Judgment,
+        plan: Plan,
+        goal: Goal,
+        context: dict[str, Any],
+        steps_executed: int,
+        total_tokens: int,
+        total_latency: int,
+    ) -> PlanExecutionResult | None:
+        """
+        Handle judgment and return result if execution should stop.
+
+        Returns None to continue execution, or PlanExecutionResult to stop.
+        """
+        if judgment.action == JudgmentAction.ACCEPT:
+            # Step succeeded - update state and continue
+            step.status = StepStatus.COMPLETED
+            step.completed_at = datetime.now()
+            step.result = work_result.outputs
+
+            # Map outputs to expected output keys
+            # If output has generic "result" key but step expects specific keys, map it
+            outputs_to_store = work_result.outputs.copy()
+            if step.expected_outputs and "result" in outputs_to_store:
+                result_value = outputs_to_store["result"]
+                # For each expected output key that's not in outputs, map from "result"
+                for expected_key in step.expected_outputs:
+                    if expected_key not in outputs_to_store:
+                        outputs_to_store[expected_key] = result_value
+
+            # Update context with mapped outputs
+            context.update(outputs_to_store)
+
+            # Store in plan context for replanning feedback
+            plan.context[step.id] = outputs_to_store
+
+            return None  # Continue execution
+
+        elif judgment.action == JudgmentAction.RETRY:
+            # Retry step if under limit
+            if step.attempts < step.max_retries:
+                step.status = StepStatus.PENDING
+                step.error = judgment.feedback
+
+                # Record retry decision
+                self.runtime.decide(
+                    intent=f"Retry step {step.id}",
+                    options=[{"id": "retry", "description": "Retry with feedback"}],
+                    chosen="retry",
+                    reasoning=judgment.reasoning,
+                    context={"attempt": step.attempts, "feedback": judgment.feedback},
+                )
+
+                return None  # Continue (step will be retried)
+            else:
+                # Max retries exceeded - escalate to replan
+                step.status = StepStatus.FAILED
+                step.error = f"Max retries ({step.max_retries}) exceeded: {judgment.feedback}"
+
+                return self._create_result(
+                    status=ExecutionStatus.NEEDS_REPLAN,
+                    plan=plan,
+                    context=context,
+                    feedback=(
+                        f"Step '{step.id}' failed after {step.attempts} attempts: "
+                        f"{judgment.feedback}"
+                    ),
+                    steps_executed=steps_executed,
+                    total_tokens=total_tokens,
+                    total_latency=total_latency,
+                )
+
+        elif judgment.action == JudgmentAction.REPLAN:
+            # Return to external planner
+            step.status = StepStatus.FAILED
+            step.error = judgment.feedback
+
+            return self._create_result(
+                status=ExecutionStatus.NEEDS_REPLAN,
+                plan=plan,
+                context=context,
+                feedback=judgment.feedback or f"Step '{step.id}' requires replanning",
+                steps_executed=steps_executed,
+                total_tokens=total_tokens,
+                total_latency=total_latency,
+            )
+
+        elif judgment.action == JudgmentAction.ESCALATE:
+            # Request human intervention
+            return self._create_result(
+                status=ExecutionStatus.NEEDS_ESCALATION,
+                plan=plan,
+                context=context,
+                feedback=judgment.feedback or f"Step '{step.id}' requires human intervention",
+                steps_executed=steps_executed,
+                total_tokens=total_tokens,
+                total_latency=total_latency,
+            )
+
+        return None  # Unknown action - continue
+
+    def _create_result(
+        self,
+        status: ExecutionStatus,
+        plan: Plan,
+        context: dict[str, Any],
+        feedback: str | None = None,
+        steps_executed: int = 0,
+        total_tokens: int = 0,
+        total_latency: int = 0,
+    ) -> PlanExecutionResult:
+        """Create a PlanExecutionResult."""
+        return PlanExecutionResult(
+            status=status,
+            results=context,
+            feedback=feedback,
+            feedback_context=plan.to_feedback_context(),
+            completed_steps=[s.id for s in plan.get_completed_steps()],
+            steps_executed=steps_executed,
+            total_tokens=total_tokens,
+            total_latency_ms=total_latency,
+        )
+
+    def register_function(self, name: str, func: Callable) -> None:
+        """Register a function for FUNCTION actions."""
+        self.functions[name] = func
+        self.worker.register_function(name, func)
+
+    def register_tool(self, tool: Tool) -> None:
+        """Register a tool for TOOL_USE actions."""
+        self.tools[tool.name] = tool
+        self.worker.register_tool(tool)
+
+    def add_evaluation_rule(self, rule) -> None:
+        """Add an evaluation rule to the judge."""
+        self.judge.add_rule(rule)
+
+    async def _request_approval(
+        self,
+        step: PlanStep,
+        context: dict[str, Any],
+    ) -> ApprovalResult | None:
+        """
+        Request human approval for a step.
+
+        Returns None if no callback is set (execution should pause).
+        """
+        if self.approval_callback is None:
+            return None
+
+        # Build preview of what will happen
+        preview_parts = []
+        if step.action.tool_name:
+            preview_parts.append(f"Tool: {step.action.tool_name}")
+            if step.action.tool_args:
+                import json
+
+                args_preview = json.dumps(step.action.tool_args, indent=2, default=str)
+                if len(args_preview) > 500:
+                    args_preview = args_preview[:500] + "..."
+                preview_parts.append(f"Args: {args_preview}")
+        elif step.action.prompt:
+            prompt_preview = (
+                step.action.prompt[:300] + "..."
+                if len(step.action.prompt) > 300
+                else step.action.prompt
+            )
+            preview_parts.append(f"Prompt: {prompt_preview}")
+
+        # Include step inputs resolved from context (what will be sent/used)
+        relevant_context = {}
+        for input_key, input_value in step.inputs.items():
+            # Resolve variable references like "$email_sequence"
+            if isinstance(input_value, str) and input_value.startswith("$"):
+                context_key = input_value[1:]  # Remove $ prefix
+                if context_key in context:
+                    relevant_context[input_key] = context[context_key]
+            else:
+                relevant_context[input_key] = input_value
+
+        request = ApprovalRequest(
+            step_id=step.id,
+            step_description=step.description,
+            action_type=step.action.action_type.value,
+            action_details={
+                "tool_name": step.action.tool_name,
+                "tool_args": step.action.tool_args,
+                "prompt": step.action.prompt,
+            },
+            context=relevant_context,
+            approval_message=step.approval_message,
+            preview="\n".join(preview_parts) if preview_parts else None,
+        )
+
+        return self.approval_callback(request)
+
+    def _skip_dependent_steps(self, plan: Plan, rejected_step_id: str) -> None:
+        """Mark steps that depend on a rejected step as skipped."""
+        for step in plan.steps:
+            if rejected_step_id in step.dependencies:
+                if step.status == StepStatus.PENDING:
+                    step.status = StepStatus.SKIPPED
+                    step.error = f"Skipped because dependency '{rejected_step_id}' was rejected"
+                    # Recursively skip dependents
+                    self._skip_dependent_steps(plan, step.id)
+
+    def _apply_modifications(self, step: PlanStep, modifications: dict[str, Any]) -> None:
+        """Apply human modifications to a step before execution."""
+        # Allow modifying tool args
+        if "tool_args" in modifications and step.action.tool_args:
+            step.action.tool_args.update(modifications["tool_args"])
+
+        # Allow modifying prompt
+        if "prompt" in modifications:
+            step.action.prompt = modifications["prompt"]
+
+        # Allow modifying inputs
+        if "inputs" in modifications:
+            step.inputs.update(modifications["inputs"])
+
+    def set_approval_callback(self, callback: ApprovalCallback) -> None:
+        """Set the approval callback for HITL steps."""
+        self.approval_callback = callback
+
+
+# Convenience function for simple execution
+async def execute_plan(
+    plan: Plan,
+    goal: Goal,
+    runtime: Runtime,
+    llm: LLMProvider | None = None,
+    tools: dict[str, Tool] | None = None,
+    tool_executor: Callable | None = None,
+    context: dict[str, Any] | None = None,
+) -> PlanExecutionResult:
+    """
+    Execute a plan with default configuration.
+
+    Convenience function for simple use cases.
+    """
+    executor = FlexibleGraphExecutor(
+        runtime=runtime,
+        llm=llm,
+        tools=tools,
+        tool_executor=tool_executor,
+    )
+    return await executor.execute_plan(plan, goal, context)
@@ -0,0 +1,198 @@
+"""
+Goal Schema - The source of truth for agent behavior.
+
+A Goal defines WHAT the agent should achieve, not HOW. The graph structure
+(nodes and edges) is derived from the goal, not hardcoded.
+
+Goals are:
+- Declarative: Define success criteria, not implementation
+- Measurable: Success criteria are checkable
+- Constrained: Boundaries the agent must respect
+- Versionable: Can evolve based on runtime feedback
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class GoalStatus(str, Enum):
+    """Lifecycle status of a goal."""
+
+    DRAFT = "draft"  # Being defined
+    READY = "ready"  # Ready for agent creation
+    ACTIVE = "active"  # Has an agent graph, can execute
+    COMPLETED = "completed"  # Achieved
+    FAILED = "failed"  # Could not be achieved
+    SUSPENDED = "suspended"  # Paused for revision
+
+
+class SuccessCriterion(BaseModel):
+    """
+    A measurable condition that defines success.
+
+    Each criterion should be:
+    - Specific: Clear what it means
+    - Measurable: Can be evaluated programmatically or by LLM
+    - Achievable: Within the agent's capabilities
+    """
+
+    id: str
+    description: str = Field(description="Human-readable description of what success looks like")
+    metric: str = Field(
+        description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'"
+    )
+    target: Any = Field(description="The target value or condition")
+    weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)")
+    met: bool = False
+
+    model_config = {"extra": "allow"}
+
+
+class Constraint(BaseModel):
+    """
+    A boundary the agent must respect.
+
+    Constraints are either:
+    - Hard: Violation means failure
+    - Soft: Violation is discouraged but allowed
+    """
+
+    id: str
+    description: str
+    constraint_type: str = Field(
+        description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)"
+    )
+    category: str = Field(
+        default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'"
+    )
+    check: str = Field(
+        default="", description="How to check: expression, function name, or 'llm_judge'"
+    )
+
+    model_config = {"extra": "allow"}
+
+
+class Goal(BaseModel):
+    """
+    The source of truth for agent behavior.
+
+    A Goal defines:
+    - WHAT to achieve (success criteria)
+    - WHAT NOT to do (constraints)
+    - CONTEXT for decision-making
+
+    The agent graph (nodes, edges) is derived from this goal.
+
+    Example:
+        goal = Goal(
+            id="calc-001",
+            name="Calculator",
+            description="Perform mathematical calculations accurately",
+            success_criteria=[
+                SuccessCriterion(
+                    id="accuracy",
+                    description="Result matches expected mathematical answer",
+                    metric="output_equals",
+                    target="expected_result",
+                    weight=1.0
+                )
+            ],
+            constraints=[
+                Constraint(
+                    id="no-crash",
+                    description="Handle invalid inputs gracefully, return 'Error'",
+                    constraint_type="hard",
+                    category="safety",
+                    check="output != exception"
+                )
+            ]
+        )
+    """
+
+    id: str
+    name: str
+    description: str
+    status: GoalStatus = GoalStatus.DRAFT
+
+    # What defines success
+    success_criteria: list[SuccessCriterion] = Field(default_factory=list)
+
+    # What the agent must respect
+    constraints: list[Constraint] = Field(default_factory=list)
+
+    # Context for the agent
+    context: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional context: domain knowledge, user preferences, etc.",
+    )
+
+    # Capabilities required
+    required_capabilities: list[str] = Field(
+        default_factory=list,
+        description="What the agent needs: 'llm', 'web_search', 'code_execution', etc.",
+    )
+
+    # Input/output schema
+    input_schema: dict[str, Any] = Field(default_factory=dict, description="Expected input format")
+    output_schema: dict[str, Any] = Field(
+        default_factory=dict, description="Expected output format"
+    )
+
+    # Versioning for evolution
+    version: str = "1.0.0"
+    parent_version: str | None = None
+    evolution_reason: str | None = None
+
+    # Timestamps
+    created_at: datetime = Field(default_factory=datetime.now)
+    updated_at: datetime = Field(default_factory=datetime.now)
+
+    model_config = {"extra": "allow"}
+
+    def is_success(self) -> bool:
+        """Check if all weighted success criteria are met."""
+        if not self.success_criteria:
+            return False
+
+        total_weight = sum(c.weight for c in self.success_criteria)
+        met_weight = sum(c.weight for c in self.success_criteria if c.met)
+
+        return met_weight >= total_weight * 0.9  # 90% threshold
+
+    def check_constraint(self, constraint_id: str, value: Any) -> bool:
+        """Check if a specific constraint is satisfied."""
+        for c in self.constraints:
+            if c.id == constraint_id:
+                # This would be expanded with actual evaluation logic
+                return True
+        return True
+
+    def to_prompt_context(self) -> str:
+        """Generate context string for LLM prompts."""
+        lines = [
+            f"# Goal: {self.name}",
+            f"{self.description}",
+            "",
+            "## Success Criteria:",
+        ]
+
+        for sc in self.success_criteria:
+            lines.append(f"- {sc.description}")
+
+        if self.constraints:
+            lines.append("")
+            lines.append("## Constraints:")
+            for c in self.constraints:
+                severity = "MUST" if c.constraint_type == "hard" else "SHOULD"
+                lines.append(f"- [{severity}] {c.description}")
+
+        if self.context:
+            lines.append("")
+            lines.append("## Context:")
+            for key, value in self.context.items():
+                lines.append(f"- {key}: {value}")
+
+        return "\n".join(lines)
@@ -0,0 +1,255 @@
+"""
+Standardized HITL (Human-In-The-Loop) Protocol
+
+This module defines the formal structure for pause/resume interactions
+where agents need to gather input from humans.
+"""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any
+
+
+class HITLInputType(str, Enum):
+    """Type of input expected from human."""
+
+    FREE_TEXT = "free_text"  # Open-ended text response
+    STRUCTURED = "structured"  # Specific fields to fill
+    SELECTION = "selection"  # Choose from options
+    APPROVAL = "approval"  # Yes/no/modify decision
+    MULTI_FIELD = "multi_field"  # Multiple related inputs
+
+
+@dataclass
+class HITLQuestion:
+    """A single question to ask the human."""
+
+    id: str
+    question: str
+    input_type: HITLInputType = HITLInputType.FREE_TEXT
+
+    # For SELECTION type
+    options: list[str] = field(default_factory=list)
+
+    # For STRUCTURED type
+    fields: dict[str, str] = field(default_factory=dict)  # {field_name: description}
+
+    # Metadata
+    required: bool = True
+    help_text: str = ""
+
+
+@dataclass
+class HITLRequest:
+    """
+    Formal request for human input at a pause node.
+
+    This is what the agent produces when it needs human input.
+    """
+
+    # Context
+    objective: str  # What we're trying to accomplish
+    current_state: str  # Where we are in the process
+
+    # What we need
+    questions: list[HITLQuestion] = field(default_factory=list)
+    missing_info: list[str] = field(default_factory=list)
+
+    # Guidance
+    instructions: str = ""
+    examples: list[str] = field(default_factory=list)
+
+    # Metadata
+    request_id: str = ""
+    node_id: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "objective": self.objective,
+            "current_state": self.current_state,
+            "questions": [
+                {
+                    "id": q.id,
+                    "question": q.question,
+                    "input_type": q.input_type.value,
+                    "options": q.options,
+                    "fields": q.fields,
+                    "required": q.required,
+                    "help_text": q.help_text,
+                }
+                for q in self.questions
+            ],
+            "missing_info": self.missing_info,
+            "instructions": self.instructions,
+            "examples": self.examples,
+            "request_id": self.request_id,
+            "node_id": self.node_id,
+        }
+
+
+@dataclass
+class HITLResponse:
+    """
+    Human's response to a HITL request.
+
+    This is what gets passed back when resuming from a pause.
+    """
+
+    # Original request reference
+    request_id: str
+
+    # Human's answers
+    answers: dict[str, Any] = field(default_factory=dict)  # {question_id: answer}
+    raw_input: str = ""  # Raw text if provided
+
+    # Metadata
+    response_time_ms: int = 0
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "request_id": self.request_id,
+            "answers": self.answers,
+            "raw_input": self.raw_input,
+            "response_time_ms": self.response_time_ms,
+        }
+
+
+class HITLProtocol:
+    """
+    Standardized protocol for HITL interactions.
+
+    Usage in pause nodes:
+
+    1. Pause Node: Generates HITLRequest with questions
+    2. Executor: Saves state and returns request to user
+    3. User: Provides HITLResponse with answers
+    4. Resume Node: Processes response and merges into context
+    """
+
+    @staticmethod
+    def create_request(
+        objective: str,
+        questions: list[HITLQuestion],
+        missing_info: list[str] | None = None,
+        node_id: str = "",
+    ) -> HITLRequest:
+        """Create a standardized HITL request."""
+        return HITLRequest(
+            objective=objective,
+            current_state="Awaiting clarification",
+            questions=questions,
+            missing_info=missing_info or [],
+            request_id=f"{node_id}_{hash(objective) % 10000}",
+            node_id=node_id,
+        )
+
+    @staticmethod
+    def parse_response(
+        raw_input: str,
+        request: HITLRequest,
+        use_haiku: bool = True,
+    ) -> HITLResponse:
+        """
+        Parse human's raw input into structured response.
+
+        Uses Haiku to intelligently extract answers for each question.
+        """
+        import os
+
+        response = HITLResponse(request_id=request.request_id, raw_input=raw_input)
+
+        # If no questions, just return raw input
+        if not request.questions:
+            return response
+
+        # Try to use Haiku for intelligent parsing
+        api_key = os.environ.get("ANTHROPIC_API_KEY")
+        if not use_haiku or not api_key:
+            # Simple fallback: treat as answer to first question
+            if request.questions:
+                response.answers[request.questions[0].id] = raw_input
+            return response
+
+        # Use Haiku to extract answers
+        try:
+            import json
+
+            import anthropic
+
+            questions_str = "\n".join(
+                [f"{i + 1}. {q.question} (id: {q.id})" for i, q in enumerate(request.questions)]
+            )
+
+            prompt = f"""Parse the user's response and extract answers for each question.
+
+Questions asked:
+{questions_str}
+
+User's response:
+{raw_input}
+
+Extract the answer for each question. Output JSON with question IDs as keys.
+
+Example format:
+{{"question-1": "answer here", "question-2": "answer here"}}"""
+
+            client = anthropic.Anthropic(api_key=api_key)
+            message = client.messages.create(
+                model="claude-3-5-haiku-20241022",
+                max_tokens=500,
+                messages=[{"role": "user", "content": prompt}],
+            )
+
+            # Parse Haiku's response
+            import re
+
+            response_text = message.content[0].text.strip()
+            json_match = re.search(r"\{[^{}]*\}", response_text, re.DOTALL)
+
+            if json_match:
+                parsed = json.loads(json_match.group())
+                response.answers = parsed
+
+        except Exception:
+            # Fallback: use raw input for first question
+            if request.questions:
+                response.answers[request.questions[0].id] = raw_input
+
+        return response
+
+    @staticmethod
+    def format_for_display(request: HITLRequest) -> str:
+        """Format HITL request for user-friendly display."""
+        parts = []
+
+        if request.objective:
+            parts.append(f"📋 Objective: {request.objective}")
+
+        if request.current_state:
+            parts.append(f"📍 Current State: {request.current_state}")
+
+        if request.instructions:
+            parts.append(f"\n{request.instructions}")
+
+        if request.questions:
+            parts.append(f"\n❓ Questions ({len(request.questions)}):")
+            for i, q in enumerate(request.questions, 1):
+                parts.append(f"{i}. {q.question}")
+                if q.help_text:
+                    parts.append(f"   💡 {q.help_text}")
+                if q.options:
+                    parts.append(f"   Options: {', '.join(q.options)}")
+
+        if request.missing_info:
+            parts.append("\n📝 Missing Information:")
+            for info in request.missing_info:
+                parts.append(f"  • {info}")
+
+        if request.examples:
+            parts.append("\n📚 Examples:")
+            for example in request.examples:
+                parts.append(f"  • {example}")
+
+        return "\n".join(parts)
@@ -0,0 +1,406 @@
+"""
+Hybrid Judge for Evaluating Plan Step Results.
+
+The HybridJudge evaluates step execution results using:
+1. Rule-based evaluation (fast, deterministic)
+2. LLM-based evaluation (fallback for ambiguous cases)
+
+Escalation path: rules → LLM → human
+"""
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from framework.graph.code_sandbox import safe_eval
+from framework.graph.goal import Goal
+from framework.graph.plan import (
+    EvaluationRule,
+    Judgment,
+    JudgmentAction,
+    PlanStep,
+)
+from framework.llm.provider import LLMProvider
+
+
+@dataclass
+class RuleEvaluationResult:
+    """Result of rule-based evaluation."""
+
+    is_definitive: bool  # True if a rule matched definitively
+    judgment: Judgment | None = None
+    context: dict[str, Any] = field(default_factory=dict)
+    rules_checked: int = 0
+    rule_matched: str | None = None
+
+
+class HybridJudge:
+    """
+    Evaluates plan step results using rules first, then LLM fallback.
+
+    Usage:
+        judge = HybridJudge(llm=llm_provider)
+        judge.add_rule(EvaluationRule(
+            id="success_check",
+            condition="result.get('success') == True",
+            action=JudgmentAction.ACCEPT,
+        ))
+
+        judgment = await judge.evaluate(step, result, goal)
+    """
+
+    def __init__(
+        self,
+        llm: LLMProvider | None = None,
+        rules: list[EvaluationRule] | None = None,
+        llm_confidence_threshold: float = 0.7,
+    ):
+        """
+        Initialize the HybridJudge.
+
+        Args:
+            llm: LLM provider for ambiguous cases
+            rules: Initial evaluation rules
+            llm_confidence_threshold: Confidence below this triggers escalation
+        """
+        self.llm = llm
+        self.rules: list[EvaluationRule] = rules or []
+        self.llm_confidence_threshold = llm_confidence_threshold
+
+        # Sort rules by priority (higher first)
+        self._sort_rules()
+
+    def _sort_rules(self):
+        """Sort rules by priority."""
+        self.rules.sort(key=lambda r: -r.priority)
+
+    def add_rule(self, rule: EvaluationRule) -> None:
+        """Add an evaluation rule."""
+        self.rules.append(rule)
+        self._sort_rules()
+
+    def remove_rule(self, rule_id: str) -> bool:
+        """Remove a rule by ID. Returns True if found and removed."""
+        for i, rule in enumerate(self.rules):
+            if rule.id == rule_id:
+                self.rules.pop(i)
+                return True
+        return False
+
+    async def evaluate(
+        self,
+        step: PlanStep,
+        result: Any,
+        goal: Goal,
+        context: dict[str, Any] | None = None,
+    ) -> Judgment:
+        """
+        Evaluate a step result.
+
+        Args:
+            step: The executed plan step
+            result: The result of executing the step
+            goal: The goal context for evaluation
+            context: Additional context from previous steps
+
+        Returns:
+            Judgment with action and feedback
+        """
+        context = context or {}
+
+        # Try rule-based evaluation first
+        rule_result = self._evaluate_rules(step, result, goal, context)
+
+        if rule_result.is_definitive:
+            return rule_result.judgment
+
+        # Fall back to LLM evaluation
+        if self.llm:
+            return await self._evaluate_llm(step, result, goal, context, rule_result)
+
+        # No LLM available - default to accept with low confidence
+        return Judgment(
+            action=JudgmentAction.ACCEPT,
+            reasoning="No definitive rule matched and no LLM available for evaluation",
+            confidence=0.5,
+            llm_used=False,
+        )
+
+    def _evaluate_rules(
+        self,
+        step: PlanStep,
+        result: Any,
+        goal: Goal,
+        context: dict[str, Any],
+    ) -> RuleEvaluationResult:
+        """Evaluate step using rules."""
+        rules_checked = 0
+
+        # Build evaluation context
+        eval_context = {
+            "step": step.model_dump() if hasattr(step, "model_dump") else step,
+            "result": result,
+            "goal": goal.model_dump() if hasattr(goal, "model_dump") else goal,
+            "context": context,
+            "success": isinstance(result, dict) and result.get("success", False),
+            "error": isinstance(result, dict) and result.get("error"),
+        }
+
+        for rule in self.rules:
+            rules_checked += 1
+
+            # Evaluate rule condition
+            eval_result = safe_eval(rule.condition, eval_context)
+
+            if eval_result.success and eval_result.result:
+                # Rule matched!
+                feedback = self._format_feedback(rule.feedback_template, eval_context)
+
+                return RuleEvaluationResult(
+                    is_definitive=True,
+                    judgment=Judgment(
+                        action=rule.action,
+                        reasoning=rule.description,
+                        feedback=feedback if feedback else None,
+                        rule_matched=rule.id,
+                        confidence=1.0,
+                        llm_used=False,
+                    ),
+                    rules_checked=rules_checked,
+                    rule_matched=rule.id,
+                )
+
+        # No rule matched definitively
+        return RuleEvaluationResult(
+            is_definitive=False,
+            context=eval_context,
+            rules_checked=rules_checked,
+        )
+
+    def _format_feedback(
+        self,
+        template: str,
+        context: dict[str, Any],
+    ) -> str:
+        """Format feedback template with context values."""
+        if not template:
+            return ""
+
+        try:
+            return template.format(**context)
+        except (KeyError, ValueError):
+            return template
+
+    async def _evaluate_llm(
+        self,
+        step: PlanStep,
+        result: Any,
+        goal: Goal,
+        context: dict[str, Any],
+        rule_result: RuleEvaluationResult,
+    ) -> Judgment:
+        """Evaluate step using LLM."""
+        system_prompt = self._build_llm_system_prompt(goal)
+        user_prompt = self._build_llm_user_prompt(step, result, context, rule_result)
+
+        try:
+            response = self.llm.complete(
+                messages=[{"role": "user", "content": user_prompt}],
+                system=system_prompt,
+            )
+
+            # Parse LLM response
+            judgment = self._parse_llm_response(response.content)
+            judgment.llm_used = True
+
+            # Check confidence threshold
+            if judgment.confidence < self.llm_confidence_threshold:
+                # Low confidence - escalate
+                return Judgment(
+                    action=JudgmentAction.ESCALATE,
+                    reasoning=(
+                        f"LLM confidence ({judgment.confidence:.2f}) "
+                        f"below threshold ({self.llm_confidence_threshold})"
+                    ),
+                    feedback=judgment.feedback,
+                    confidence=judgment.confidence,
+                    llm_used=True,
+                    context={"original_judgment": judgment.model_dump()},
+                )
+
+            return judgment
+
+        except Exception as e:
+            # LLM failed - escalate
+            return Judgment(
+                action=JudgmentAction.ESCALATE,
+                reasoning=f"LLM evaluation failed: {e}",
+                feedback="Human review needed due to LLM error",
+                llm_used=True,
+            )
+
+    def _build_llm_system_prompt(self, goal: Goal) -> str:
+        """Build system prompt for LLM judge."""
+        return f"""You are a judge evaluating the execution of a plan step.
+
+GOAL: {goal.description}
+
+SUCCESS CRITERIA:
+{chr(10).join(f"- {sc.description}" for sc in goal.success_criteria)}
+
+CONSTRAINTS:
+{chr(10).join(f"- {c.description}" for c in goal.constraints)}
+
+Your task is to evaluate whether the step was executed successfully and decide the next action.
+
+Respond in this exact format:
+ACTION: [ACCEPT|RETRY|REPLAN|ESCALATE]
+CONFIDENCE: [0.0-1.0]
+REASONING: [Your reasoning]
+FEEDBACK: [Feedback for retry/replan, or empty if accepting]
+
+Actions:
+- ACCEPT: Step completed successfully, continue to next step
+- RETRY: Step failed but can be retried with feedback
+- REPLAN: Step failed in a way that requires replanning
+- ESCALATE: Requires human intervention
+"""
+
+    def _build_llm_user_prompt(
+        self,
+        step: PlanStep,
+        result: Any,
+        context: dict[str, Any],
+        rule_result: RuleEvaluationResult,
+    ) -> str:
+        """Build user prompt for LLM judge."""
+        return f"""Evaluate this step execution:
+
+STEP: {step.description}
+STEP ID: {step.id}
+ACTION TYPE: {step.action.action_type}
+EXPECTED OUTPUTS: {step.expected_outputs}
+
+RESULT:
+{result}
+
+CONTEXT FROM PREVIOUS STEPS:
+{context}
+
+RULES CHECKED: {rule_result.rules_checked} (none matched definitively)
+
+Please evaluate and provide your judgment."""
+
+    def _parse_llm_response(self, response: str) -> Judgment:
+        """Parse LLM response into Judgment."""
+        lines = response.strip().split("\n")
+
+        action = JudgmentAction.ACCEPT
+        confidence = 0.8
+        reasoning = ""
+        feedback = ""
+
+        for line in lines:
+            line = line.strip()
+            if line.startswith("ACTION:"):
+                action_str = line.split(":", 1)[1].strip().upper()
+                try:
+                    action = JudgmentAction(action_str.lower())
+                except ValueError:
+                    action = JudgmentAction.ESCALATE
+
+            elif line.startswith("CONFIDENCE:"):
+                try:
+                    confidence = float(line.split(":", 1)[1].strip())
+                except ValueError:
+                    confidence = 0.5
+
+            elif line.startswith("REASONING:"):
+                reasoning = line.split(":", 1)[1].strip()
+
+            elif line.startswith("FEEDBACK:"):
+                feedback = line.split(":", 1)[1].strip()
+
+        return Judgment(
+            action=action,
+            reasoning=reasoning or "LLM evaluation",
+            feedback=feedback if feedback else None,
+            confidence=confidence,
+        )
+
+
+# Factory function for creating judge with common rules
+def create_default_judge(llm: LLMProvider | None = None) -> HybridJudge:
+    """
+    Create a HybridJudge with commonly useful default rules.
+
+    Args:
+        llm: LLM provider for fallback evaluation
+
+    Returns:
+        Configured HybridJudge instance
+    """
+    judge = HybridJudge(llm=llm)
+
+    # Rule: Accept on explicit success flag
+    judge.add_rule(
+        EvaluationRule(
+            id="explicit_success",
+            description="Step explicitly marked as successful",
+            condition="isinstance(result, dict) and result.get('success') == True",
+            action=JudgmentAction.ACCEPT,
+            priority=100,
+        )
+    )
+
+    # Rule: Retry on transient errors
+    judge.add_rule(
+        EvaluationRule(
+            id="transient_error_retry",
+            description="Transient error that can be retried",
+            condition=(
+                "isinstance(result, dict) and "
+                "result.get('error_type') in ['timeout', 'rate_limit', 'connection_error']"
+            ),
+            action=JudgmentAction.RETRY,
+            feedback_template="Transient error: {result[error]}. Please retry.",
+            priority=90,
+        )
+    )
+
+    # Rule: Replan on missing data
+    judge.add_rule(
+        EvaluationRule(
+            id="missing_data_replan",
+            description="Required data not available",
+            condition="isinstance(result, dict) and result.get('error_type') == 'missing_data'",
+            action=JudgmentAction.REPLAN,
+            feedback_template="Missing required data: {result[error]}. Plan needs adjustment.",
+            priority=80,
+        )
+    )
+
+    # Rule: Escalate on security issues
+    judge.add_rule(
+        EvaluationRule(
+            id="security_escalate",
+            description="Security issue detected",
+            condition="isinstance(result, dict) and result.get('error_type') == 'security'",
+            action=JudgmentAction.ESCALATE,
+            feedback_template="Security issue detected: {result[error]}",
+            priority=200,
+        )
+    )
+
+    # Rule: Fail on max retries exceeded
+    judge.add_rule(
+        EvaluationRule(
+            id="max_retries_fail",
+            description="Maximum retries exceeded",
+            condition="step.get('attempts', 0) >= step.get('max_retries', 3)",
+            action=JudgmentAction.REPLAN,
+            feedback_template="Step '{step[id]}' failed after {step[attempts]} attempts",
+            priority=150,
+        )
+    )
+
+    return judge
@@ -0,0 +1,392 @@
+"""
+Output Cleaner - Framework-level I/O validation and cleaning.
+
+Validates node outputs match expected schemas and uses fast LLM
+to clean malformed outputs before they flow to the next node.
+
+This prevents cascading failures and dramatically improves execution success rates.
+"""
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def _heuristic_repair(text: str) -> dict | None:
+    """
+    Attempt to repair JSON without an LLM call.
+
+    Handles common errors:
+    - Markdown code blocks
+    - Python booleans/None (True -> true)
+    - Single quotes instead of double quotes
+    """
+    if not isinstance(text, str):
+        return None
+
+    # 1. Strip Markdown code blocks
+    text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.MULTILINE)
+    text = re.sub(r"\s*```$", "", text, flags=re.MULTILINE)
+    text = text.strip()
+
+    # 2. Find outermost JSON-like structure (greedy match)
+    match = re.search(r"(\{.*\}|\[.*\])", text, re.DOTALL)
+    if match:
+        candidate = match.group(1)
+
+        # 3. Common fixes
+        # Fix Python constants
+        candidate = re.sub(r"\bTrue\b", "true", candidate)
+        candidate = re.sub(r"\bFalse\b", "false", candidate)
+        candidate = re.sub(r"\bNone\b", "null", candidate)
+
+        # 4. Attempt load
+        try:
+            return json.loads(candidate)
+        except json.JSONDecodeError:
+            # 5. Advanced: Try swapping single quotes if double quotes fail
+            # This is risky but effective for simple dicts
+            try:
+                if "'" in candidate and '"' not in candidate:
+                    candidate_swapped = candidate.replace("'", '"')
+                    return json.loads(candidate_swapped)
+            except json.JSONDecodeError:
+                pass
+
+    return None
+
+
+@dataclass
+class CleansingConfig:
+    """Configuration for output cleansing."""
+
+    enabled: bool = True
+    fast_model: str = "cerebras/llama-3.3-70b"  # Fast, cheap model for cleaning
+    max_retries: int = 2
+    cache_successful_patterns: bool = True
+    fallback_to_raw: bool = True  # If cleaning fails, pass raw output
+    log_cleanings: bool = True  # Log when cleansing happens
+
+
+@dataclass
+class ValidationResult:
+    """Result of output validation."""
+
+    valid: bool
+    errors: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+    cleaned_output: dict[str, Any] | None = None
+
+
+class OutputCleaner:
+    """
+    Framework-level output validation and cleaning.
+
+    Uses heuristics and fast LLM to clean malformed outputs
+    before they flow to the next node.
+    """
+
+    def __init__(self, config: CleansingConfig, llm_provider=None):
+        """
+        Initialize the output cleaner.
+
+        Args:
+            config: Cleansing configuration
+            llm_provider: Optional LLM provider.
+        """
+        self.config = config
+        self.success_cache: dict[str, Any] = {}  # Cache successful patterns
+        self.failure_count: dict[str, int] = {}  # Track edge failures
+        self.cleansing_count = 0  # Track total cleanings performed
+
+        # Initialize LLM provider for cleaning
+        if llm_provider:
+            self.llm = llm_provider
+        elif config.enabled:
+            # Create dedicated fast LLM provider for cleaning
+            try:
+                import os
+
+                from framework.llm.litellm import LiteLLMProvider
+
+                api_key = os.environ.get("CEREBRAS_API_KEY")
+                if api_key:
+                    self.llm = LiteLLMProvider(
+                        api_key=api_key,
+                        model=config.fast_model,
+                    )
+                    logger.info(f"✓ Initialized OutputCleaner with {config.fast_model}")
+                else:
+                    logger.warning("⚠ CEREBRAS_API_KEY not found, output cleaning will be disabled")
+                    self.llm = None
+            except ImportError:
+                logger.warning("⚠ LiteLLMProvider not available, output cleaning disabled")
+                self.llm = None
+        else:
+            self.llm = None
+
+    def validate_output(
+        self,
+        output: dict[str, Any],
+        source_node_id: str,
+        target_node_spec: Any,  # NodeSpec
+    ) -> ValidationResult:
+        """
+        Validate output matches target node's expected input schema.
+
+        Returns:
+            ValidationResult with errors and optionally cleaned output
+        """
+        errors = []
+        warnings = []
+
+        # Check 1: Required input keys present
+        for key in target_node_spec.input_keys:
+            if key not in output:
+                errors.append(f"Missing required key: '{key}'")
+                continue
+
+            value = output[key]
+
+            # Check 2: Detect if value is JSON string (the JSON parsing trap!)
+            if isinstance(value, str):
+                # Try parsing as JSON to detect the trap
+                try:
+                    parsed = json.loads(value)
+                    if isinstance(parsed, dict):
+                        if key in parsed:
+                            # Key exists in parsed JSON - classic parsing failure!
+                            errors.append(
+                                f"Key '{key}' contains JSON string with nested '{key}' field - "
+                                f"likely parsing failure from LLM node"
+                            )
+                        elif len(value) > 100:
+                            # Large JSON string, but doesn't contain the key
+                            warnings.append(
+                                f"Key '{key}' contains JSON string ({len(value)} chars)"
+                            )
+                except json.JSONDecodeError:
+                    # Not JSON, check if suspiciously large
+                    if len(value) > 500:
+                        warnings.append(
+                            f"Key '{key}' contains large string ({len(value)} chars), "
+                            f"possibly entire LLM response"
+                        )
+
+            # Check 3: Type validation (if schema provided)
+            if hasattr(target_node_spec, "input_schema") and target_node_spec.input_schema:
+                expected_schema = target_node_spec.input_schema.get(key)
+                if expected_schema:
+                    expected_type = expected_schema.get("type")
+                    if expected_type and not self._type_matches(value, expected_type):
+                        actual_type = type(value).__name__
+                        errors.append(
+                            f"Key '{key}': expected type '{expected_type}', got '{actual_type}'"
+                        )
+
+        # Warnings don't make validation fail, but errors do
+        is_valid = len(errors) == 0
+
+        if not is_valid and self.config.log_cleanings:
+            logger.warning(
+                f"⚠ Output validation failed for {source_node_id} → {target_node_spec.id}: "
+                f"{len(errors)} error(s), {len(warnings)} warning(s)"
+            )
+
+        return ValidationResult(
+            valid=is_valid,
+            errors=errors,
+            warnings=warnings,
+        )
+
+    def clean_output(
+        self,
+        output: dict[str, Any],
+        source_node_id: str,
+        target_node_spec: Any,  # NodeSpec
+        validation_errors: list[str],
+    ) -> dict[str, Any]:
+        """
+        Use heuristics and fast LLM to clean malformed output.
+
+        Args:
+            output: Raw output from source node
+            source_node_id: ID of source node
+            target_node_spec: Target node spec (for schema)
+            validation_errors: Errors from validation
+
+        Returns:
+            Cleaned output matching target schema
+        """
+        if not self.config.enabled:
+            logger.warning("⚠ Output cleansing disabled in config")
+            return output
+
+        # --- PHASE 1: Fast Heuristic Repair (Avoids LLM call) ---
+        # Often the output is just a string containing JSON, or has minor syntax errors
+        # If output is a dictionary but malformed, we might need to serialize it first
+        # to try and fix the underlying string representation if it came from raw text
+
+        # Heuristic: Check if any value is actually a JSON string that should be promoted
+        # This handles the "JSON Parsing Trap" where LLM returns {"key": "{\"nested\": ...}"}
+        heuristic_fixed = False
+        fixed_output = output.copy()
+
+        for key, value in output.items():
+            if isinstance(value, str):
+                repaired = _heuristic_repair(value)
+                if repaired and isinstance(repaired, dict | list):
+                    # Check if this repaired structure looks like what we want
+                    # e.g. if the key is 'data' and the string contained valid JSON
+                    fixed_output[key] = repaired
+                    heuristic_fixed = True
+
+        # If we fixed something, re-validate manually to see if it's enough
+        if heuristic_fixed:
+            logger.info("⚡ Heuristic repair applied (nested JSON expansion)")
+            return fixed_output
+
+        # --- PHASE 2: LLM-based Repair ---
+        if not self.llm:
+            logger.warning("⚠ No LLM provider available for cleansing")
+            return output
+
+        # Build schema description for target node
+        schema_desc = self._build_schema_description(target_node_spec)
+
+        # Create cleansing prompt
+        prompt = f"""Clean this malformed agent output to match the expected schema.
+
+VALIDATION ERRORS:
+{chr(10).join(f"- {e}" for e in validation_errors)}
+
+EXPECTED SCHEMA for node '{target_node_spec.id}':
+{schema_desc}
+
+RAW OUTPUT from node '{source_node_id}':
+{json.dumps(output, indent=2, default=str)}
+
+INSTRUCTIONS:
+1. Extract values that match the expected schema keys
+2. If a value is a JSON string, parse it and extract the correct field
+3. Convert types to match the schema (string, dict, list, number, boolean)
+4. Remove extra fields not in the expected schema
+5. Ensure all required keys are present
+
+Return ONLY valid JSON matching the expected schema. No explanations, no markdown."""
+
+        try:
+            if self.config.log_cleanings:
+                logger.info(
+                    f"🧹 Cleaning output from '{source_node_id}' using {self.config.fast_model}"
+                )
+
+            response = self.llm.complete(
+                messages=[{"role": "user", "content": prompt}],
+                system=(
+                    "You clean malformed agent outputs. Return only valid JSON matching the schema."
+                ),
+                max_tokens=2048,  # Sufficient for cleaning most outputs
+            )
+
+            # Parse cleaned output
+            cleaned_text = response.content.strip()
+
+            # Apply heuristic repair to the LLM's output too (just in case)
+            cleaned = _heuristic_repair(cleaned_text)
+
+            if not cleaned:
+                # Fallback to standard load if heuristic returns None (unlikely for LLM output)
+                cleaned = json.loads(cleaned_text)
+
+            if isinstance(cleaned, dict):
+                self.cleansing_count += 1
+                if self.config.log_cleanings:
+                    logger.info(
+                        f"✓ Output cleaned successfully (total cleanings: {self.cleansing_count})"
+                    )
+                return cleaned
+            else:
+                logger.warning(f"⚠ Cleaned output is not a dict: {type(cleaned)}")
+                if self.config.fallback_to_raw:
+                    return output
+                else:
+                    raise ValueError(f"Cleaning produced {type(cleaned)}, expected dict")
+
+        except json.JSONDecodeError as e:
+            logger.error(f"✗ Failed to parse cleaned JSON: {e}")
+            if self.config.fallback_to_raw:
+                logger.info("↩ Falling back to raw output")
+                return output
+            else:
+                raise
+
+        except Exception as e:
+            logger.error(f"✗ Output cleaning failed: {e}")
+            if self.config.fallback_to_raw:
+                logger.info("↩ Falling back to raw output")
+                return output
+            else:
+                raise
+
+    def _build_schema_description(self, node_spec: Any) -> str:
+        """Build human-readable schema description from NodeSpec."""
+        lines = ["{"]
+
+        for key in node_spec.input_keys:
+            # Get type hint and description if available
+            if hasattr(node_spec, "input_schema") and node_spec.input_schema:
+                schema = node_spec.input_schema.get(key, {})
+                type_hint = schema.get("type", "any")
+                description = schema.get("description", "")
+                required = schema.get("required", True)
+
+                line = f'  "{key}": {type_hint}'
+                if description:
+                    line += f"  // {description}"
+                if required:
+                    line += " (required)"
+                lines.append(line + ",")
+            else:
+                # No schema, just show the key
+                lines.append(f'  "{key}": any  // (required)')
+
+        lines.append("}")
+        return "\n".join(lines)
+
+    def _type_matches(self, value: Any, expected_type: str) -> bool:
+        """Check if value matches expected type."""
+        type_map = {
+            "string": str,
+            "str": str,
+            "int": int,
+            "integer": int,
+            "float": float,
+            "number": (int, float),
+            "bool": bool,
+            "boolean": bool,
+            "dict": dict,
+            "object": dict,
+            "list": list,
+            "array": list,
+            "any": object,  # Matches everything
+        }
+
+        expected_class = type_map.get(expected_type.lower())
+        if expected_class:
+            return isinstance(value, expected_class)
+
+        # Unknown type, allow it
+        return True
+
+    def get_stats(self) -> dict[str, Any]:
+        """Get cleansing statistics."""
+        return {
+            "total_cleanings": self.cleansing_count,
+            "failure_count": dict(self.failure_count),
+            "cache_size": len(self.success_cache),
+        }
@@ -0,0 +1,513 @@
+"""
+Plan Data Structures for Flexible Execution.
+
+Plans are created externally (by Claude Code or another LLM agent) and
+executed internally by the FlexibleGraphExecutor with Worker-Judge loop.
+
+The Plan is the contract between the external planner and the executor:
+- Planner creates a Plan with PlanSteps
+- Executor runs steps and judges results
+- If replanning needed, returns feedback to external planner
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ActionType(str, Enum):
+    """Types of actions a PlanStep can perform."""
+
+    LLM_CALL = "llm_call"  # Call LLM for generation
+    TOOL_USE = "tool_use"  # Use a registered tool
+    SUB_GRAPH = "sub_graph"  # Execute a sub-graph
+    FUNCTION = "function"  # Call a Python function
+    CODE_EXECUTION = "code_execution"  # Execute dynamic code (sandboxed)
+
+
+class StepStatus(str, Enum):
+    """Status of a plan step."""
+
+    PENDING = "pending"
+    AWAITING_APPROVAL = "awaiting_approval"  # Waiting for human approval
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+    REJECTED = "rejected"  # Human rejected execution
+
+    def is_terminal(self) -> bool:
+        """Check if this status represents a terminal (finished) state.
+
+        Terminal states are states where the step will not execute further,
+        either because it completed successfully or failed/was skipped.
+        """
+        return self in (
+            StepStatus.COMPLETED,
+            StepStatus.FAILED,
+            StepStatus.SKIPPED,
+            StepStatus.REJECTED,
+        )
+
+    def is_successful(self) -> bool:
+        """Check if this status represents successful completion."""
+        return self == StepStatus.COMPLETED
+
+
+class ApprovalDecision(str, Enum):
+    """Human decision on a step requiring approval."""
+
+    APPROVE = "approve"  # Execute as planned
+    REJECT = "reject"  # Skip this step
+    MODIFY = "modify"  # Execute with modifications
+    ABORT = "abort"  # Stop entire execution
+
+
+class ApprovalRequest(BaseModel):
+    """Request for human approval before executing a step."""
+
+    step_id: str
+    step_description: str
+    action_type: str
+    action_details: dict[str, Any] = Field(default_factory=dict)
+    context: dict[str, Any] = Field(default_factory=dict)
+    approval_message: str | None = None
+
+    # Preview of what will happen
+    preview: str | None = None
+
+    model_config = {"extra": "allow"}
+
+
+class ApprovalResult(BaseModel):
+    """Result of human approval decision."""
+
+    decision: ApprovalDecision
+    reason: str | None = None
+    modifications: dict[str, Any] = Field(default_factory=dict)
+
+    model_config = {"extra": "allow"}
+
+
+class JudgmentAction(str, Enum):
+    """Actions the judge can take after evaluating a step."""
+
+    ACCEPT = "accept"  # Step completed successfully, continue
+    RETRY = "retry"  # Retry the step with feedback
+    REPLAN = "replan"  # Return to external planner for new plan
+    ESCALATE = "escalate"  # Request human intervention
+
+
+class ActionSpec(BaseModel):
+    """
+    Specification for an action to be executed.
+
+    This is the "what to do" part of a PlanStep.
+    """
+
+    action_type: ActionType
+
+    # For LLM_CALL
+    prompt: str | None = None
+    system_prompt: str | None = None
+    model: str | None = None
+
+    # For TOOL_USE
+    tool_name: str | None = None
+    tool_args: dict[str, Any] = Field(default_factory=dict)
+
+    # For SUB_GRAPH
+    graph_id: str | None = None
+
+    # For FUNCTION
+    function_name: str | None = None
+    function_args: dict[str, Any] = Field(default_factory=dict)
+
+    # For CODE_EXECUTION
+    code: str | None = None
+    language: str = "python"
+
+    model_config = {"extra": "allow"}
+
+
+class PlanStep(BaseModel):
+    """
+    A single step in a plan.
+
+    Created by external planner, executed by Worker, evaluated by Judge.
+    """
+
+    id: str
+    description: str
+    action: ActionSpec
+
+    # Data flow
+    inputs: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Input data for this step (can reference previous step outputs)",
+    )
+    expected_outputs: list[str] = Field(
+        default_factory=list, description="Keys this step should produce"
+    )
+
+    # Dependencies
+    dependencies: list[str] = Field(
+        default_factory=list, description="IDs of steps that must complete before this one"
+    )
+
+    # Human-in-the-loop (HITL)
+    requires_approval: bool = Field(
+        default=False, description="If True, requires human approval before execution"
+    )
+    approval_message: str | None = Field(
+        default=None, description="Message to show human when requesting approval"
+    )
+
+    # Execution state
+    status: StepStatus = StepStatus.PENDING
+    result: Any | None = None
+    error: str | None = None
+    attempts: int = 0
+    max_retries: int = 3
+
+    # Metadata
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+
+    model_config = {"extra": "allow"}
+
+    def is_ready(self, terminal_step_ids: set[str]) -> bool:
+        """Check if this step is ready to execute (all dependencies finished).
+
+        A step is ready when:
+        1. Its status is PENDING (not yet started)
+        2. All its dependencies are in a terminal state (completed, failed, skipped, or rejected)
+
+        Note: This allows dependent steps to become "ready" even if their dependencies
+        failed. The executor should check if any dependencies failed and handle
+        accordingly (e.g., skip the step or mark it as blocked).
+
+        Args:
+            terminal_step_ids: Set of step IDs that are in a terminal state
+        """
+        if self.status != StepStatus.PENDING:
+            return False
+        return all(dep in terminal_step_ids for dep in self.dependencies)
+
+
+class Judgment(BaseModel):
+    """
+    Result of judging a step execution.
+
+    The Judge evaluates step results and decides what to do next.
+    """
+
+    action: JudgmentAction
+    reasoning: str
+    feedback: str | None = None  # For retry/replan - what went wrong
+
+    # For rule-based judgments
+    rule_matched: str | None = None
+
+    # For LLM-based judgments
+    confidence: float = 1.0
+    llm_used: bool = False
+
+    # Context for replanning
+    context: dict[str, Any] = Field(default_factory=dict)
+
+    model_config = {"extra": "allow"}
+
+
+class EvaluationRule(BaseModel):
+    """
+    A rule for the HybridJudge to evaluate step results.
+
+    Rules are checked before falling back to LLM evaluation.
+    """
+
+    id: str
+    description: str
+
+    # Condition (Python expression evaluated with result, step, goal context)
+    condition: str
+
+    # What to do if condition matches
+    action: JudgmentAction
+    feedback_template: str = ""  # Can use {result}, {step}, etc.
+
+    # Priority (higher = checked first)
+    priority: int = 0
+
+    model_config = {"extra": "allow"}
+
+
+class Plan(BaseModel):
+    """
+    A complete execution plan.
+
+    Created by external planner (Claude Code, etc).
+    Executed by FlexibleGraphExecutor.
+    """
+
+    id: str
+    goal_id: str
+    description: str
+
+    # Steps to execute
+    steps: list[PlanStep] = Field(default_factory=list)
+
+    # Execution state
+    revision: int = 1  # Incremented on replan
+    current_step_idx: int = 0
+
+    # Accumulated context from execution
+    context: dict[str, Any] = Field(default_factory=dict)
+
+    # Metadata
+    created_at: datetime = Field(default_factory=datetime.now)
+    created_by: str = "external"  # Who created this plan
+
+    # Previous attempt info (for replanning)
+    previous_feedback: str | None = None
+
+    model_config = {"extra": "allow"}
+
+    @classmethod
+    def from_json(cls, data: str | dict) -> "Plan":
+        """
+        Load a Plan from exported JSON.
+
+        This handles the output from export_graph() and properly converts
+        action_type strings to ActionType enums.
+
+        Args:
+            data: JSON string or dict from export_graph()
+
+        Returns:
+            Plan object ready for FlexibleGraphExecutor
+
+        Example:
+            # Load from export_graph() output
+            exported = export_graph()
+            plan = Plan.from_json(exported)
+
+            # Load from file
+            with open("plan.json") as f:
+                plan = Plan.from_json(json.load(f))
+        """
+        import json as json_module
+
+        if isinstance(data, str):
+            data = json_module.loads(data)
+
+        # Handle nested "plan" key from export_graph output
+        if "plan" in data:
+            data = data["plan"]
+
+        # Convert steps
+        steps = []
+        for step_data in data.get("steps", []):
+            action_data = step_data.get("action", {})
+
+            # Convert action_type string to enum
+            action_type_str = action_data.get("action_type", "function")
+            action_type = ActionType(action_type_str)
+
+            action = ActionSpec(
+                action_type=action_type,
+                prompt=action_data.get("prompt"),
+                system_prompt=action_data.get("system_prompt"),
+                tool_name=action_data.get("tool_name"),
+                tool_args=action_data.get("tool_args", {}),
+                function_name=action_data.get("function_name"),
+                function_args=action_data.get("function_args", {}),
+                code=action_data.get("code"),
+            )
+
+            step = PlanStep(
+                id=step_data["id"],
+                description=step_data.get("description", ""),
+                action=action,
+                inputs=step_data.get("inputs", {}),
+                expected_outputs=step_data.get("expected_outputs", []),
+                dependencies=step_data.get("dependencies", []),
+                requires_approval=step_data.get("requires_approval", False),
+                approval_message=step_data.get("approval_message"),
+            )
+            steps.append(step)
+
+        return cls(
+            id=data.get("id", "plan"),
+            goal_id=data.get("goal_id", ""),
+            description=data.get("description", ""),
+            steps=steps,
+            context=data.get("context", {}),
+            revision=data.get("revision", 1),
+        )
+
+    def get_step(self, step_id: str) -> PlanStep | None:
+        """Get a step by ID."""
+        for step in self.steps:
+            if step.id == step_id:
+                return step
+        return None
+
+    def get_ready_steps(self) -> list[PlanStep]:
+        """Get all steps that are ready to execute.
+
+        A step is ready when all its dependencies are in terminal states
+        (completed, failed, skipped, or rejected).
+        """
+        terminal_ids = {s.id for s in self.steps if s.status.is_terminal()}
+        return [s for s in self.steps if s.is_ready(terminal_ids)]
+
+    def get_completed_steps(self) -> list[PlanStep]:
+        """Get all completed steps."""
+        return [s for s in self.steps if s.status == StepStatus.COMPLETED]
+
+    def is_complete(self) -> bool:
+        """Check if all steps are in terminal states (finished executing).
+
+        Returns True when all steps have reached a terminal state, regardless
+        of whether they succeeded or failed. Use has_failed_steps() to check
+        if any steps failed.
+        """
+        return all(s.status.is_terminal() for s in self.steps)
+
+    def is_successful(self) -> bool:
+        """Check if all steps completed successfully."""
+        return all(s.status == StepStatus.COMPLETED for s in self.steps)
+
+    def has_failed_steps(self) -> bool:
+        """Check if any steps failed, were skipped, or were rejected."""
+        return any(
+            s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
+            for s in self.steps
+        )
+
+    def get_failed_steps(self) -> list[PlanStep]:
+        """Get all steps that failed, were skipped, or were rejected."""
+        return [
+            s
+            for s in self.steps
+            if s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
+        ]
+
+    def to_feedback_context(self) -> dict[str, Any]:
+        """Create context for replanning."""
+        return {
+            "plan_id": self.id,
+            "revision": self.revision,
+            "completed_steps": [
+                {
+                    "id": s.id,
+                    "description": s.description,
+                    "result": s.result,
+                }
+                for s in self.get_completed_steps()
+            ],
+            "failed_steps": [
+                {
+                    "id": s.id,
+                    "description": s.description,
+                    "error": s.error,
+                    "attempts": s.attempts,
+                }
+                for s in self.steps
+                if s.status == StepStatus.FAILED
+            ],
+            "context": self.context,
+        }
+
+
+class ExecutionStatus(str, Enum):
+    """Status of plan execution."""
+
+    COMPLETED = "completed"
+    AWAITING_APPROVAL = "awaiting_approval"  # Paused for human approval
+    NEEDS_REPLAN = "needs_replan"
+    NEEDS_ESCALATION = "needs_escalation"
+    REJECTED = "rejected"  # Human rejected a step
+    ABORTED = "aborted"  # Human aborted execution
+    FAILED = "failed"
+
+
+class PlanExecutionResult(BaseModel):
+    """
+    Result of executing a plan.
+
+    Returned to external planner with status and feedback.
+    """
+
+    status: ExecutionStatus
+
+    # Results from completed steps
+    results: dict[str, Any] = Field(default_factory=dict)
+
+    # For needs_replan - what to tell the planner
+    feedback: str | None = None
+    feedback_context: dict[str, Any] = Field(default_factory=dict)
+
+    # Steps that completed before stopping
+    completed_steps: list[str] = Field(default_factory=list)
+
+    # Metrics
+    steps_executed: int = 0
+    total_tokens: int = 0
+    total_latency_ms: int = 0
+
+    # Error info (for failed status)
+    error: str | None = None
+
+    model_config = {"extra": "allow"}
+
+
+def load_export(data: str | dict) -> tuple["Plan", Any]:
+    """
+    Load both Plan and Goal from export_graph() output.
+
+    The export_graph() MCP tool returns both the plan and the goal that was
+    defined and approved during the agent building process. This function
+    loads both so you can use them with FlexibleGraphExecutor.
+
+    Args:
+        data: JSON string or dict from export_graph()
+
+    Returns:
+        Tuple of (Plan, Goal) ready for FlexibleGraphExecutor
+
+    Example:
+        # Load from export_graph() output
+        exported = export_graph()
+        plan, goal = load_export(exported)
+
+        result = await executor.execute_plan(plan, goal, context)
+    """
+    import json as json_module
+
+    from framework.graph.goal import Goal
+
+    if isinstance(data, str):
+        data = json_module.loads(data)
+
+    # Load plan
+    plan = Plan.from_json(data)
+
+    # Load goal
+    goal_data = data.get("goal", {})
+    if goal_data:
+        goal = Goal.model_validate(goal_data)
+    else:
+        # Fallback: create minimal goal from plan metadata
+        goal = Goal(
+            id=plan.goal_id,
+            name=plan.goal_id,
+            description=plan.description,
+            success_criteria=[],
+            constraints=[],
+        )
+
+    return plan, goal
@@ -0,0 +1,252 @@
+import ast
+import operator
+from typing import Any
+
+# Safe operators whitelist
+SAFE_OPERATORS = {
+    ast.Add: operator.add,
+    ast.Sub: operator.sub,
+    ast.Mult: operator.mul,
+    ast.Div: operator.truediv,
+    ast.FloorDiv: operator.floordiv,
+    ast.Mod: operator.mod,
+    ast.Pow: operator.pow,
+    ast.LShift: operator.lshift,
+    ast.RShift: operator.rshift,
+    ast.BitOr: operator.or_,
+    ast.BitXor: operator.xor,
+    ast.BitAnd: operator.and_,
+    ast.Eq: operator.eq,
+    ast.NotEq: operator.ne,
+    ast.Lt: operator.lt,
+    ast.LtE: operator.le,
+    ast.Gt: operator.gt,
+    ast.GtE: operator.ge,
+    ast.Is: operator.is_,
+    ast.IsNot: operator.is_not,
+    ast.In: lambda x, y: x in y,
+    ast.NotIn: lambda x, y: x not in y,
+    ast.USub: operator.neg,
+    ast.UAdd: operator.pos,
+    ast.Not: operator.not_,
+    ast.Invert: operator.inv,
+}
+
+# Safe functions whitelist
+SAFE_FUNCTIONS = {
+    "len": len,
+    "int": int,
+    "float": float,
+    "str": str,
+    "bool": bool,
+    "list": list,
+    "dict": dict,
+    "tuple": tuple,
+    "set": set,
+    "min": min,
+    "max": max,
+    "sum": sum,
+    "abs": abs,
+    "round": round,
+    "all": all,
+    "any": any,
+}
+
+
+class SafeEvalVisitor(ast.NodeVisitor):
+    def __init__(self, context: dict[str, Any]):
+        self.context = context
+
+    def visit(self, node: ast.AST) -> Any:
+        # Override visit to prevent default behavior and ensure only explicitly allowed nodes work
+        method = "visit_" + node.__class__.__name__
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(node)
+
+    def generic_visit(self, node: ast.AST):
+        raise ValueError(f"Use of {node.__class__.__name__} is not allowed")
+
+    def visit_Expression(self, node: ast.Expression) -> Any:
+        return self.visit(node.body)
+
+    def visit_Expr(self, node: ast.Expr) -> Any:
+        return self.visit(node.value)
+
+    def visit_Constant(self, node: ast.Constant) -> Any:
+        return node.value
+
+    # --- Data Structures ---
+    def visit_List(self, node: ast.List) -> list:
+        return [self.visit(elt) for elt in node.elts]
+
+    def visit_Tuple(self, node: ast.Tuple) -> tuple:
+        return tuple(self.visit(elt) for elt in node.elts)
+
+    def visit_Dict(self, node: ast.Dict) -> dict:
+        return {
+            self.visit(k): self.visit(v)
+            for k, v in zip(node.keys, node.values, strict=False)
+            if k is not None
+        }
+
+    # --- Operations ---
+    def visit_BinOp(self, node: ast.BinOp) -> Any:
+        op_func = SAFE_OPERATORS.get(type(node.op))
+        if op_func is None:
+            raise ValueError(f"Operator {type(node.op).__name__} is not allowed")
+        return op_func(self.visit(node.left), self.visit(node.right))
+
+    def visit_UnaryOp(self, node: ast.UnaryOp) -> Any:
+        op_func = SAFE_OPERATORS.get(type(node.op))
+        if op_func is None:
+            raise ValueError(f"Operator {type(node.op).__name__} is not allowed")
+        return op_func(self.visit(node.operand))
+
+    def visit_Compare(self, node: ast.Compare) -> Any:
+        left = self.visit(node.left)
+        for op, comparator in zip(node.ops, node.comparators, strict=False):
+            op_func = SAFE_OPERATORS.get(type(op))
+            if op_func is None:
+                raise ValueError(f"Operator {type(op).__name__} is not allowed")
+            right = self.visit(comparator)
+            if not op_func(left, right):
+                return False
+            left = right  # Chain comparisons
+        return True
+
+    def visit_BoolOp(self, node: ast.BoolOp) -> Any:
+        values = [self.visit(v) for v in node.values]
+        if isinstance(node.op, ast.And):
+            return all(values)
+        elif isinstance(node.op, ast.Or):
+            return any(values)
+        raise ValueError(f"Boolean operator {type(node.op).__name__} is not allowed")
+
+    def visit_IfExp(self, node: ast.IfExp) -> Any:
+        # Ternary: true_val if test else false_val
+        if self.visit(node.test):
+            return self.visit(node.body)
+        else:
+            return self.visit(node.orelse)
+
+    # --- Variables and Attributes ---
+    def visit_Name(self, node: ast.Name) -> Any:
+        if isinstance(node.ctx, ast.Load):
+            if node.id in self.context:
+                return self.context[node.id]
+            raise NameError(f"Name '{node.id}' is not defined")
+        raise ValueError("Only reading variables is allowed")
+
+    def visit_Subscript(self, node: ast.Subscript) -> Any:
+        # value[slice]
+        val = self.visit(node.value)
+        idx = self.visit(node.slice)
+        return val[idx]
+
+    def visit_Attribute(self, node: ast.Attribute) -> Any:
+        # value.attr
+        # STIRCT CHECK: No access to private attributes (starting with _)
+        if node.attr.startswith("_"):
+            raise ValueError(f"Access to private attribute '{node.attr}' is not allowed")
+
+        val = self.visit(node.value)
+
+        # Safe attribute access: only allow if it's in the dict (if val is dict)
+        # or it's a safe property of a basic type?
+        # Actually, for flexibility, people often use dot access for dicts in these expressions.
+        # But standard Python dict doesn't support dot access.
+        # If val is a dict, Attribute access usually fails in Python unless wrapped.
+        # If the user context provides objects, we might want to allow attribute access.
+        # BUT we must be careful not to allow access to dangerous things like __class__ etc.
+        # The check starts_with("_") covers __class__, __init__, etc.
+
+        try:
+            return getattr(val, node.attr)
+        except AttributeError:
+            # Fallback: maybe it's a dict and they want dot access?
+            # (Only if we want to support that sugar, usually not standard python)
+            # Let's stick to standard python behavior + strict private check.
+            pass
+
+        raise AttributeError(f"Object has no attribute '{node.attr}'")
+
+    def visit_Call(self, node: ast.Call) -> Any:
+        # Only allow calling whitelisted functions
+        func = self.visit(node.func)
+
+        # Check if the function object itself is in our whitelist values
+        # This is tricky because `func` is the actual function object,
+        # but we also want to verify it came from a safe place.
+        # Easier: Check if node.func is a Name and that name is in SAFE_FUNCTIONS.
+
+        is_safe = False
+        if isinstance(node.func, ast.Name):
+            if node.func.id in SAFE_FUNCTIONS:
+                is_safe = True
+
+        # Also allow methods on objects if they are safe?
+        # E.g. "somestring".lower() or list.append() (if we allowed mutation, but we don't for now)
+        # For now, restrict to SAFE_FUNCTIONS whitelist for global calls and deny method calls
+        # unless we explicitly add safe methods.
+        # Allowing method calls on strings/lists (split, join, get) is commonly needed.
+
+        if isinstance(node.func, ast.Attribute):
+            # Method call.
+            # Allow basic safe methods?
+            # For security, start strict. Only helper functions.
+            # Re-visiting: User might want 'output.get("key")'.
+            method_name = node.func.attr
+            if method_name in [
+                "get",
+                "keys",
+                "values",
+                "items",
+                "lower",
+                "upper",
+                "strip",
+                "split",
+            ]:
+                is_safe = True
+
+        if not is_safe and func not in SAFE_FUNCTIONS.values():
+            raise ValueError("Call to function/method is not allowed")
+
+        args = [self.visit(arg) for arg in node.args]
+        keywords = {kw.arg: self.visit(kw.value) for kw in node.keywords}
+
+        return func(*args, **keywords)
+
+    def visit_Index(self, node: ast.Index) -> Any:
+        # Python < 3.9
+        return self.visit(node.value)
+
+
+def safe_eval(expr: str, context: dict[str, Any] | None = None) -> Any:
+    """
+    Safely evaluate a python expression string.
+
+    Args:
+        expr: The expression string to evaluate.
+        context: Dictionary of variables available in the expression.
+
+    Returns:
+        The result of the evaluation.
+
+    Raises:
+        ValueError: If unsafe operations or syntax are detected.
+        SyntaxError: If the expression is invalid Python.
+    """
+    if context is None:
+        context = {}
+
+    # Add safe builtins to context
+    full_context = context.copy()
+    full_context.update(SAFE_FUNCTIONS)
+
+    try:
+        tree = ast.parse(expr, mode="eval")
+    except SyntaxError as e:
+        raise SyntaxError(f"Invalid syntax in expression: {e}") from e
+
+    visitor = SafeEvalVisitor(full_context)
+    return visitor.visit(tree)
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`../../.claude/skills/building-agents-construction`
				`@@ -0,0 +1 @@`
				`../../.claude/skills/building-agents-patterns`
				`@@ -0,0 +1 @@`
				`"""Tests for Aden credential sync components."""`
				`@@ -0,0 +1 @@`
				`"""Tests for the credential store module."""`