Compare commits

..

1 Commits

Author SHA1 Message Date
Richard T 1e161ca006 feat: testing composio tools 2026-01-25 16:35:44 -08:00
701 changed files with 36095 additions and 167201 deletions
-9
View File
@@ -1,9 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "uv",
"args": ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"],
"disabled": false
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-concepts
---
use hive-concepts skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-create
---
use hive-create skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-credentials
---
use hive-credentials skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-patterns
---
use hive-patterns skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-test
---
use hive-test skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive
---
use hive skill
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-15
View File
@@ -1,15 +0,0 @@
{
"hooks": {
"PostToolUse": [
{
"matcher": "Edit|Write|NotebookEdit",
"hooks": [
{
"type": "command",
"command": "ruff check --fix \"$CLAUDE_FILE_PATH\" 2>/dev/null; ruff format \"$CLAUDE_FILE_PATH\" 2>/dev/null; true"
}
]
}
]
}
}
+34
View File
@@ -0,0 +1,34 @@
{
"permissions": {
"allow": [
"Bash(npm install:*)",
"Bash(npm test:*)",
"Skill(building-agents-construction)",
"Skill(building-agents-construction:*)",
"Bash(PYTHONPATH=core:exports pytest:*)",
"mcp__agent-builder__create_session",
"mcp__agent-builder__get_session_status",
"mcp__agent-builder__set_goal",
"mcp__agent-builder__list_mcp_servers",
"mcp__agent-builder__test_node",
"mcp__agent-builder__add_node",
"mcp__agent-builder__add_edge",
"mcp__agent-builder__validate_graph",
"Bash(ruff check:*)",
"Bash(PYTHONPATH=core:exports python:*)",
"mcp__agent-builder__list_tests",
"mcp__agent-builder__generate_constraint_tests",
"Bash(python -m agent:*)",
"Bash(python agent.py:*)",
"Bash(python -c:*)",
"Bash(done)",
"Bash(xargs cat:*)",
"mcp__agent-builder__list_mcp_tools",
"mcp__agent-builder__add_mcp_server",
"Bash(python3:*)",
"Bash(PYTHONPATH=core:tools/src:exports pytest:*)",
"Bash(source .env)",
"Bash(PYTHONPATH=core:tools/src:exports python:*)"
]
}
}
-34
View File
@@ -1,34 +0,0 @@
{
"permissions": {
"allow": [
"mcp__agent-builder__create_session",
"mcp__agent-builder__set_goal",
"mcp__agent-builder__add_node",
"mcp__agent-builder__add_edge",
"mcp__agent-builder__configure_loop",
"mcp__agent-builder__add_mcp_server",
"mcp__agent-builder__validate_graph",
"mcp__agent-builder__export_graph",
"mcp__agent-builder__load_session_by_id",
"Bash(git status:*)",
"Bash(gh run view:*)",
"Bash(uv run:*)",
"Bash(env:*)",
"mcp__agent-builder__test_node",
"mcp__agent-builder__list_mcp_tools",
"Bash(python -m py_compile:*)",
"Bash(python -m pytest:*)",
"Bash(source:*)",
"mcp__agent-builder__update_node",
"mcp__agent-builder__check_missing_credentials",
"mcp__agent-builder__list_stored_credentials",
"Bash(find:*)",
"mcp__agent-builder__run_tests",
"Bash(PYTHONPATH=core:exports:tools/src uv run pytest:*)",
"mcp__agent-builder__list_agent_sessions",
"mcp__agent-builder__generate_constraint_tests",
"mcp__agent-builder__generate_success_tests"
]
},
"enabledMcpjsonServers": ["agent-builder", "tools"]
}
@@ -1,53 +1,30 @@
---
name: hive
description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates hive-* skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
name: agent-workflow
description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates building-agents-* and testing-agent skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: workflow-orchestrator
orchestrates:
- hive-concepts
- hive-create
- hive-patterns
- hive-test
- hive-credentials
- hive-debugger
- building-agents-core
- building-agents-construction
- building-agents-patterns
- testing-agent
---
# Agent Development Workflow
**THIS IS AN EXECUTABLE WORKFLOW. DO NOT explore the codebase or read source files. ROUTE to the correct skill IMMEDIATELY.**
When this skill is loaded, **ALWAYS use the AskUserQuestion tool** to present options:
```
Use AskUserQuestion with these options:
- "Build a new agent" → Then invoke /hive-create
- "Test an existing agent" → Then invoke /hive-test
- "Learn agent concepts" → Then invoke /hive-concepts
- "Optimize agent design" → Then invoke /hive-patterns
- "Set up credentials" → Then invoke /hive-credentials
- "Debug a failing agent" → Then invoke /hive-debugger
- "Other" (please describe what you want to achieve)
```
**DO NOT:** Read source files, explore the codebase, search for code, or do any investigation before routing. The sub-skills handle all of that.
---
Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.
## Overview
This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:
1. **Understand Concepts** `/hive-concepts` (optional)
2. **Build Structure** `/hive-create`
3. **Optimize Design** `/hive-patterns` (optional)
4. **Setup Credentials**`/hive-credentials` (if agent uses tools requiring API keys)
5. **Test & Validate**`/hive-test`
6. **Debug Issues**`/hive-debugger` (if agent fails at runtime)
1. **Understand Concepts** (5-10 min) → `/building-agents-core` (optional)
2. **Build Structure** (15-30 min) → `/building-agents-construction`
3. **Optimize Design** (10-15 min) → `/building-agents-patterns` (optional)
4. **Test & Validate** (20-40 min) → `/testing-agent`
## When to Use This Workflow
@@ -58,26 +35,24 @@ Use this meta-skill when:
- Want consistent, repeatable agent builds
**Skip this workflow** if:
- You only need to test an existing agent → use `/hive-test` directly
- You only need to test an existing agent → use `/testing-agent` directly
- You know exactly which phase you're in → use specific skill directly
## Quick Decision Tree
```
"Need to understand agent concepts" → hive-concepts
"Build a new agent" → hive-create
"Optimize my agent design" → hive-patterns
"Need client-facing nodes or feedback loops" → hive-patterns
"Set up API keys for my agent" → hive-credentials
"Test my agent" → hive-test
"My agent is failing/stuck/has errors" → hive-debugger
"Need to understand agent concepts" → building-agents-core
"Build a new agent" → building-agents-construction
"Optimize my agent design" → building-agents-patterns
"Test my agent" → testing-agent
"Not sure what I need" → Read phases below, then decide
"Agent has structure but needs implementation" → See agent directory STATUS.md
```
## Phase 0: Understand Concepts (Optional)
**Skill**: `/hive-concepts`
**Duration**: 5-10 minutes
**Skill**: `/building-agents-core`
**Input**: Questions about agent architecture
### When to Use
@@ -85,12 +60,12 @@ Use this meta-skill when:
- First time building an agent
- Need to understand node types, edges, goals
- Want to validate tool availability
- Learning about event loop architecture and client-facing nodes
- Learning about pause/resume architecture
### What This Phase Provides
- Architecture overview (Python packages, not JSON)
- Core concepts (Goal, Node, Edge, Event Loop, Judges)
- Core concepts (Goal, Node, Edge, Pause/Resume)
- Tool discovery and validation procedures
- Workflow overview
@@ -98,8 +73,9 @@ Use this meta-skill when:
## Phase 1: Build Agent Structure
**Skill**: `/hive-create`
**Input**: User requirements ("Build an agent that...") or a template to start from
**Duration**: 15-30 minutes
**Skill**: `/building-agents-construction`
**Input**: User requirements ("Build an agent that...")
### What This Phase Does
@@ -127,7 +103,7 @@ Creates the complete agent architecture:
- ✅ 1-5 constraints defined
- ✅ 5-10 nodes specified in nodes/__init__.py
- ✅ 8-15 edges connecting workflow
- ✅ Validated structure (passes `uv run python -m agent_name validate`)
- ✅ Validated structure (passes `python -m agent_name validate`)
- ✅ README.md with usage instructions
- ✅ CLI commands (info, validate, run, shell)
@@ -141,7 +117,7 @@ You're ready for Phase 2 when:
### Common Outputs
The hive-create skill produces:
The building-agents-construction skill produces:
```
exports/agent_name/
├── __init__.py (package exports)
@@ -161,52 +137,53 @@ exports/agent_name/
→ You may need to add Python functions or MCP tools (not covered by current skills)
**If want to optimize design:**
→ Proceed to Phase 1.5 (hive-patterns)
→ Proceed to Phase 1.5 (building-agents-patterns)
**If ready to test:**
→ Proceed to Phase 2
## Phase 1.5: Optimize Design (Optional)
**Skill**: `/hive-patterns`
**Duration**: 10-15 minutes
**Skill**: `/building-agents-patterns`
**Input**: Completed agent structure
### When to Use
- Want to add client-facing blocking or feedback edges
- Need judge patterns for output validation
- Want fan-out/fan-in (parallel execution)
- Want to add pause/resume functionality
- Need error handling patterns
- Want to optimize performance
- Need examples of complex routing
- Want best practices guidance
### What This Phase Provides
- Client-facing interaction patterns
- Feedback edge routing with nullable output keys
- Judge patterns (implicit, SchemaJudge)
- Fan-out/fan-in parallel execution
- Context management and spillover patterns
- Practical examples and patterns
- Pause/resume architecture
- Error handling strategies
- Anti-patterns to avoid
- Performance optimization techniques
**Skip this phase** if your agent design is straightforward.
## Phase 2: Test & Validate
**Skill**: `/hive-test`
**Duration**: 20-40 minutes
**Skill**: `/testing-agent`
**Input**: Working agent from Phase 1
### What This Phase Does
Guides the creation and execution of a comprehensive test suite:
- Constraint tests
- Success criteria tests
- Edge case tests
- Integration tests
Creates comprehensive test suite:
- Constraint tests (verify hard requirements)
- Success criteria tests (measure goal achievement)
- Edge case tests (handle failures gracefully)
- Integration tests (end-to-end workflows)
### Process
1. **Analyze agent** - Read goal, constraints, success criteria
2. **Generate tests** - The calling agent writes pytest files in `exports/agent_name/tests/` using hive-test guidelines and templates
2. **Generate tests** - Create pytest files in `exports/agent_name/tests/`
3. **User approval** - Review and approve each test
4. **Run evaluation** - Execute tests and collect results
5. **Debug failures** - Identify and fix issues
@@ -269,9 +246,9 @@ You're done when:
```
User: "Build an agent that monitors files"
→ Use /hive-create
→ Use /building-agents-construction
→ Agent structure created
→ Use /hive-test
→ Use /testing-agent
→ Tests created and passing
→ Done: Production-ready agent
```
@@ -280,32 +257,19 @@ User: "Build an agent that monitors files"
```
User: "Build an agent (first time)"
→ Use /hive-concepts (understand concepts)
→ Use /hive-create (build structure)
→ Use /hive-patterns (optimize design)
→ Use /hive-test (validate)
→ Use /building-agents-core (understand concepts)
→ Use /building-agents-construction (build structure)
→ Use /building-agents-patterns (optimize design)
→ Use /testing-agent (validate)
→ Done: Production-ready agent
```
### Pattern 1c: Build from Template
```
User: "Build an agent based on the deep research template"
→ Use /hive-create
→ Select "From a template" path
→ Pick template, name new agent
→ Review/modify goal, nodes, graph
→ Agent exported with customizations
→ Use /hive-test
→ Done: Customized agent
```
### Pattern 2: Test Existing Agent
```
User: "Test my agent at exports/my_agent"
→ Skip Phase 1
→ Use /hive-test directly
→ Use /testing-agent directly
→ Tests created
→ Done: Validated agent
```
@@ -314,71 +278,58 @@ User: "Test my agent at exports/my_agent"
```
User: "Build an agent"
→ Use /hive-create (Phase 1)
→ Use /building-agents-construction (Phase 1)
→ Implementation needed (see STATUS.md)
→ [User implements functions]
→ Use /hive-test (Phase 2)
→ Use /testing-agent (Phase 2)
→ Tests reveal bugs
→ [Fix bugs manually]
→ Re-run tests
→ Done: Working agent
```
### Pattern 4: Agent with Review Loops and HITL Checkpoints
### Pattern 4: Complex Agent with Patterns
```
User: "Build an agent with human review and feedback loops"
→ Use /hive-concepts (learn event loop, client-facing nodes)
→ Use /hive-create (build structure with feedback edges)
→ Use /hive-patterns (implement client-facing + feedback patterns)
→ Use /hive-test (validate review flows and edge routing)
→ Done: Agent with HITL checkpoints and review loops
User: "Build an agent with multi-turn conversations"
→ Use /building-agents-core (learn pause/resume)
→ Use /building-agents-construction (build structure)
→ Use /building-agents-patterns (implement pause/resume pattern)
→ Use /testing-agent (validate conversation flows)
→ Done: Complex conversational agent
```
## Skill Dependencies
```
hive (meta-skill)
agent-workflow (meta-skill)
├── hive-concepts (foundational)
│ ├── Architecture concepts (event loop, judges)
│ ├── Node types (event_loop, function)
│ ├── Edge routing and priority
├── building-agents-core (foundational)
│ ├── Architecture concepts
│ ├── Node/Edge/Goal definitions
│ ├── Tool discovery procedures
│ └── Workflow overview
├── hive-create (procedural)
├── building-agents-construction (procedural)
│ ├── Creates package structure
│ ├── Defines goal
│ ├── Adds nodes (event_loop, function)
│ ├── Connects edges with priority routing
│ ├── Adds nodes incrementally
│ ├── Connects edges
│ ├── Finalizes agent class
│ └── Requires: hive-concepts
│ └── Requires: building-agents-core
├── hive-patterns (reference)
│ ├── Client-facing interaction patterns
│ ├── Feedback edges and review loops
│ ├── Judge patterns (implicit, SchemaJudge)
│ ├── Fan-out/fan-in parallel execution
│ └── Context management and anti-patterns
├── building-agents-patterns (reference)
│ ├── Best practices
│ ├── Pause/resume patterns
│ ├── Error handling
│ ├── Anti-patterns
│ └── Performance optimization
── hive-credentials (utility)
├── Detects missing credentials
├── Offers auth method choices (Aden OAuth, direct API key)
├── Stores securely in ~/.hive/credentials
└── Validates with health checks
├── hive-test (validation)
│ ├── Reads agent goal
│ ├── Generates tests
│ ├── Runs evaluation
│ └── Reports results
└── hive-debugger (troubleshooting)
├── Monitors runtime logs (L1/L2/L3)
├── Identifies retry loops, tool failures
├── Categorizes issues (10 categories)
└── Provides fix recommendations
── testing-agent
├── Reads agent goal
├── Generates tests
├── Runs evaluation
└── Reports results
```
## Troubleshooting
@@ -388,13 +339,13 @@ hive (meta-skill)
- Check node IDs match between nodes/__init__.py and agent.py
- Verify all edges reference valid node IDs
- Ensure entry_node exists in nodes list
- Run: `PYTHONPATH=exports uv run python -m agent_name validate`
- Run: `PYTHONPATH=core:exports python -m agent_name validate`
### "Agent has structure but won't run"
- Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
- Implementation may be needed (Python functions or MCP tools)
- This is expected - hive-create creates structure, not implementation
- This is expected - building-agents-construction creates structure, not implementation
- See implementation guide for completion options
### "Tests are failing"
@@ -402,16 +353,9 @@ hive (meta-skill)
- Review test output for specific failures
- Check agent goal and success criteria
- Verify constraints are met
- Use `/hive-test` to debug and iterate
- Use `/testing-agent` to debug and iterate
- Fix agent code and re-run tests
### "Agent is failing at runtime"
- Use `/hive-debugger` to analyze runtime logs
- The debugger identifies retry loops, tool failures, and stalled execution
- Get actionable fix recommendations with code changes
- Monitor the agent in real-time during TUI sessions
### "Not sure which phase I'm in"
Run these checks:
@@ -421,7 +365,7 @@ Run these checks:
ls exports/my_agent/agent.py
# Check if it validates
PYTHONPATH=exports uv run python -m my_agent validate
PYTHONPATH=core:exports python -m my_agent validate
# Check if tests exist
ls exports/my_agent/tests/
@@ -470,10 +414,10 @@ You're done with the workflow when:
## Additional Resources
- **hive-concepts**: See `.claude/skills/hive-concepts/SKILL.md`
- **hive-create**: See `.claude/skills/hive-create/SKILL.md`
- **hive-patterns**: See `.claude/skills/hive-patterns/SKILL.md`
- **hive-test**: See `.claude/skills/hive-test/SKILL.md`
- **building-agents-core**: See `.claude/skills/building-agents-core/SKILL.md`
- **building-agents-construction**: See `.claude/skills/building-agents-construction/SKILL.md`
- **building-agents-patterns**: See `.claude/skills/building-agents-patterns/SKILL.md`
- **testing-agent**: See `.claude/skills/testing-agent/SKILL.md`
- **Agent framework docs**: See `core/README.md`
- **Example agents**: See `exports/` directory
@@ -481,46 +425,36 @@ You're done with the workflow when:
This workflow provides a proven path from concept to production-ready agent:
1. **Learn** with `/hive-concepts` → Understand fundamentals (optional)
2. **Build** with `/hive-create` → Get validated structure
3. **Optimize** with `/hive-patterns` → Apply best practices (optional)
4. **Configure** with `/hive-credentials`Set up API keys (if needed)
5. **Test** with `/hive-test` → Get verified functionality
6. **Debug** with `/hive-debugger` → Fix runtime issues (if needed)
1. **Learn** with `/building-agents-core` → Understand fundamentals (optional)
2. **Build** with `/building-agents-construction` → Get validated structure
3. **Optimize** with `/building-agents-patterns` → Apply best practices (optional)
4. **Test** with `/testing-agent`Get verified functionality
The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.
## Skill Selection Guide
**Choose hive-concepts when:**
**Choose building-agents-core when:**
- First time building agents
- Need to understand event loop architecture
- Need to understand architecture
- Validating tool availability
- Learning about node types, edges, and judges
- Learning about node types and edges
**Choose hive-create when:**
**Choose building-agents-construction when:**
- Actually building an agent
- Have clear requirements
- Ready to write code
- Want step-by-step guidance
- Want to start from an existing template and customize it
**Choose hive-patterns when:**
**Choose building-agents-patterns when:**
- Agent structure complete
- Need client-facing nodes or feedback edges
- Implementing review loops or fan-out/fan-in
- Want judge patterns or context management
- Need advanced patterns
- Implementing pause/resume
- Optimizing performance
- Want best practices
**Choose hive-test when:**
**Choose testing-agent when:**
- Agent structure complete
- Ready to validate functionality
- Need comprehensive test coverage
- Testing feedback loops, output keys, or fan-out
**Choose hive-debugger when:**
- Agent is failing or stuck at runtime
- Seeing retry loops or escalations
- Tool calls are failing
- Need to understand why a node isn't completing
- Want real-time monitoring of agent execution
- Debugging agent behavior
@@ -1,6 +1,6 @@
# Example: File Monitor Agent
This example shows the complete /hive workflow in action for building a file monitoring agent.
This example shows the complete agent-workflow in action for building a file monitoring agent.
## Initial Request
@@ -12,7 +12,7 @@ User: "Build an agent that monitors ~/Downloads and copies new files to ~/Docume
### Step 1: Create Structure
Agent invokes `/hive-create` skill and:
Agent invokes `/building-agents` skill and:
1. Creates `exports/file_monitor_agent/` package
2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
@@ -75,10 +75,10 @@ initialize → list → identify → check
### Step 5: Finalize
```bash
$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
$ PYTHONPATH=core:exports python -m file_monitor_agent validate
✓ Agent is valid
$ PYTHONPATH=exports uv run python -m file_monitor_agent info
$ PYTHONPATH=core:exports python -m file_monitor_agent info
Agent: File Monitor & Copy Agent
Nodes: 7
Edges: 8
@@ -107,7 +107,7 @@ exports/file_monitor_agent/
### Step 1: Analyze Agent
Agent invokes `/hive-test` skill and:
Agent invokes `/testing-agent` skill and:
1. Reads goal from `exports/file_monitor_agent/agent.py`
2. Identifies 4 success criteria to test
@@ -131,7 +131,7 @@ Tests approved incrementally by user.
### Step 3: Run Tests
```bash
$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/
$ PYTHONPATH=core:exports pytest exports/file_monitor_agent/tests/
test_constraints.py::test_preserves_originals PASSED
test_constraints.py::test_handles_errors PASSED
@@ -162,7 +162,7 @@ test_edge_cases.py::test_large_files PASSED
./RUN_AGENT.sh
# Or manually
PYTHONPATH=exports uv run python -m file_monitor_agent run
PYTHONPATH=core:exports:tools/src python -m file_monitor_agent run
```
**Capabilities:**
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,80 @@
# Online Research Agent
Deep-dive research agent that searches 10+ sources and produces comprehensive narrative reports with citations.
## Features
- Generates multiple search queries from a topic
- Searches and fetches 15+ web sources
- Evaluates and ranks sources by relevance
- Synthesizes findings into themes
- Writes narrative report with numbered citations
- Quality checks for uncited claims
- Saves report to local markdown file
## Usage
### CLI
```bash
# Show agent info
python -m online_research_agent info
# Validate structure
python -m online_research_agent validate
# Run research on a topic
python -m online_research_agent run --topic "impact of AI on healthcare"
# Interactive shell
python -m online_research_agent shell
```
### Python API
```python
from online_research_agent import default_agent
# Simple usage
result = await default_agent.run({"topic": "climate change solutions"})
# Check output
if result.success:
print(f"Report saved to: {result.output['file_path']}")
print(result.output['final_report'])
```
## Workflow
```
parse-query → search-sources → fetch-content → evaluate-sources
write-report ← synthesize-findings
quality-check → save-report
```
## Output
Reports are saved to `./research_reports/` as markdown files with:
1. Executive Summary
2. Introduction
3. Key Findings (by theme)
4. Analysis
5. Conclusion
6. References
## Requirements
- Python 3.11+
- LLM provider API key (Groq, Cerebras, etc.)
- Internet access for web search/fetch
## Configuration
Edit `config.py` to change:
- `model`: LLM model (default: groq/moonshotai/kimi-k2-instruct-0905)
- `temperature`: Generation temperature (default: 0.7)
- `max_tokens`: Max tokens per response (default: 16384)
@@ -0,0 +1,23 @@
"""
Online Research Agent - Deep-dive research with narrative reports.
Research any topic by searching multiple sources, synthesizing information,
and producing a well-structured narrative report with citations.
"""
from .agent import OnlineResearchAgent, default_agent, goal, nodes, edges
from .config import RuntimeConfig, AgentMetadata, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"OnlineResearchAgent",
"default_agent",
"goal",
"nodes",
"edges",
"RuntimeConfig",
"AgentMetadata",
"default_config",
"metadata",
]
@@ -1,5 +1,5 @@
"""
CLI entry point for Deep Research Agent.
CLI entry point for Online Research Agent.
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
"""
@@ -10,7 +10,7 @@ import logging
import sys
import click
from .agent import default_agent, DeepResearchAgent
from .agent import default_agent, OnlineResearchAgent
def setup_logging(verbose=False, debug=False):
@@ -28,23 +28,24 @@ def setup_logging(verbose=False, debug=False):
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Deep Research Agent - Interactive, rigorous research with TUI conversation."""
"""Online Research Agent - Deep-dive research with narrative reports."""
pass
@cli.command()
@click.option("--topic", "-t", type=str, required=True, help="Research topic")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def run(topic, quiet, verbose, debug):
def run(topic, mock, quiet, verbose, debug):
"""Execute research on a topic."""
if not quiet:
setup_logging(verbose=verbose, debug=debug)
context = {"topic": topic}
result = asyncio.run(default_agent.run(context))
result = asyncio.run(default_agent.run(context, mock_mode=mock))
output_data = {
"success": result.success,
@@ -58,82 +59,6 @@ def run(topic, quiet, verbose, debug):
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def tui(verbose, debug):
"""Launch the TUI dashboard for interactive research."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
sys.exit(1)
from pathlib import Path
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.event_bus import EventBus
from framework.runtime.execution_stream import EntryPointSpec
async def run_with_tui():
agent = DeepResearchAgent()
# Build graph and tools
agent._event_bus = EventBus()
agent._tool_registry = ToolRegistry()
storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
storage_path.mkdir(parents=True, exist_ok=True)
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
agent._tool_registry.load_mcp_config(mcp_config_path)
llm = LiteLLMProvider(
model=agent.config.model,
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
tools = list(agent._tool_registry.get_tools().values())
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
runtime = create_agent_runtime(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Start Research",
entry_node="intake",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_with_tui())
@cli.command()
@click.option("--json", "output_json", is_flag=True)
def info(output_json):
@@ -146,7 +71,6 @@ def info(output_json):
click.echo(f"Version: {info_data['version']}")
click.echo(f"Description: {info_data['description']}")
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
@@ -157,9 +81,6 @@ def validate():
validation = default_agent.validate()
if validation["valid"]:
click.echo("Agent is valid")
if validation["warnings"]:
for warning in validation["warnings"]:
click.echo(f" WARNING: {warning}")
else:
click.echo("Agent has errors:")
for error in validation["errors"]:
@@ -170,7 +91,7 @@ def validate():
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
def shell(verbose):
"""Interactive research session (CLI, no TUI)."""
"""Interactive research session."""
asyncio.run(_interactive_shell(verbose))
@@ -178,26 +99,24 @@ async def _interactive_shell(verbose=False):
"""Async interactive shell."""
setup_logging(verbose=verbose)
click.echo("=== Deep Research Agent ===")
click.echo("=== Online Research Agent ===")
click.echo("Enter a topic to research (or 'quit' to exit):\n")
agent = DeepResearchAgent()
agent = OnlineResearchAgent()
await agent.start()
try:
while True:
try:
topic = await asyncio.get_event_loop().run_in_executor(
None, input, "Topic> "
)
if topic.lower() in ["quit", "exit", "q"]:
topic = await asyncio.get_event_loop().run_in_executor(None, input, "Topic> ")
if topic.lower() in ['quit', 'exit', 'q']:
click.echo("Goodbye!")
break
if not topic.strip():
continue
click.echo("\nResearching...\n")
click.echo("\nResearching... (this may take a few minutes)\n")
result = await agent.trigger_and_wait("start", {"topic": topic})
@@ -207,8 +126,13 @@ async def _interactive_shell(verbose=False):
if result.success:
output = result.output
status = output.get("delivery_status", "unknown")
click.echo(f"\nResearch complete (status: {status})\n")
if "file_path" in output:
click.echo(f"\nReport saved to: {output['file_path']}\n")
if "final_report" in output:
click.echo("\n--- Report Preview ---\n")
preview = output["final_report"][:500] + "..." if len(output.get("final_report", "")) > 500 else output.get("final_report", "")
click.echo(preview)
click.echo("\n")
else:
click.echo(f"\nResearch failed: {result.error}\n")
@@ -218,7 +142,6 @@ async def _interactive_shell(verbose=False):
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
await agent.stop()
@@ -0,0 +1,413 @@
"""Agent graph construction for Online Research Agent."""
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from .config import default_config, metadata
# Goal definition
goal = Goal(
id="comprehensive-online-research",
name="Comprehensive Online Research",
description="Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations.",
success_criteria=[
SuccessCriterion(
id="source-coverage",
description="Query 10+ diverse sources",
metric="source_count",
target=">=10",
weight=0.20,
),
SuccessCriterion(
id="relevance",
description="All sources directly address the query",
metric="relevance_score",
target="90%",
weight=0.25,
),
SuccessCriterion(
id="synthesis",
description="Synthesize findings into coherent narrative",
metric="coherence_score",
target="85%",
weight=0.25,
),
SuccessCriterion(
id="citations",
description="Include citations for all claims",
metric="citation_coverage",
target="100%",
weight=0.15,
),
SuccessCriterion(
id="actionable",
description="Report answers the user's question",
metric="answer_completeness",
target="90%",
weight=0.15,
),
],
constraints=[
Constraint(
id="no-hallucination",
description="Only include information found in sources",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="source-attribution",
description="Every factual claim must cite its source",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="recency-preference",
description="Prefer recent sources when relevant",
constraint_type="quality",
category="relevance",
),
Constraint(
id="no-paywalled",
description="Avoid sources that require payment to access",
constraint_type="functional",
category="accessibility",
),
],
)
# Import nodes
from .nodes import (
parse_query_node,
search_sources_node,
fetch_content_node,
evaluate_sources_node,
synthesize_findings_node,
write_report_node,
quality_check_node,
save_report_node,
)
# Node list
nodes = [
parse_query_node,
search_sources_node,
fetch_content_node,
evaluate_sources_node,
synthesize_findings_node,
write_report_node,
quality_check_node,
save_report_node,
]
# Edge definitions
edges = [
EdgeSpec(
id="parse-to-search",
source="parse-query",
target="search-sources",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="search-to-fetch",
source="search-sources",
target="fetch-content",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="fetch-to-evaluate",
source="fetch-content",
target="evaluate-sources",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="evaluate-to-synthesize",
source="evaluate-sources",
target="synthesize-findings",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="synthesize-to-write",
source="synthesize-findings",
target="write-report",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="write-to-quality",
source="write-report",
target="quality-check",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
EdgeSpec(
id="quality-to-save",
source="quality-check",
target="save-report",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
]
# Graph configuration
entry_node = "parse-query"
entry_points = {"start": "parse-query"}
pause_nodes = []
terminal_nodes = ["save-report"]
class OnlineResearchAgent:
"""
Online Research Agent - Deep-dive research with narrative reports.
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
"""
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self._runtime: AgentRuntime | None = None
self._graph: GraphSpec | None = None
def _build_entry_point_specs(self) -> list[EntryPointSpec]:
"""Convert entry_points dict to EntryPointSpec list."""
specs = []
for ep_id, node_id in self.entry_points.items():
if ep_id == "start":
trigger_type = "manual"
name = "Start"
elif "_resume" in ep_id:
trigger_type = "resume"
name = f"Resume from {ep_id.replace('_resume', '')}"
else:
trigger_type = "manual"
name = ep_id.replace("-", " ").title()
specs.append(EntryPointSpec(
id=ep_id,
name=name,
entry_node=node_id,
trigger_type=trigger_type,
isolation_level="shared",
))
return specs
def _create_runtime(self, mock_mode=False) -> AgentRuntime:
"""Create AgentRuntime instance."""
import json
from pathlib import Path
# Persistent storage in ~/.hive for telemetry and run history
storage_path = Path.home() / ".hive" / "online_research_agent"
storage_path.mkdir(parents=True, exist_ok=True)
tool_registry = ToolRegistry()
# Load MCP servers (always load, needed for tool validation)
agent_dir = Path(__file__).parent
mcp_config_path = agent_dir / "mcp_servers.json"
if mcp_config_path.exists():
with open(mcp_config_path) as f:
mcp_servers = json.load(f)
for server_name, server_config in mcp_servers.items():
server_config["name"] = server_name
# Resolve relative cwd paths
if "cwd" in server_config and not Path(server_config["cwd"]).is_absolute():
server_config["cwd"] = str(agent_dir / server_config["cwd"])
tool_registry.register_mcp_server(server_config)
llm = None
if not mock_mode:
# LiteLLMProvider uses environment variables for API keys
llm = LiteLLMProvider(model=self.config.model)
self._graph = GraphSpec(
id="online-research-agent-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
)
# Create AgentRuntime with all entry points
self._runtime = create_agent_runtime(
graph=self._graph,
goal=self.goal,
storage_path=storage_path,
entry_points=self._build_entry_point_specs(),
llm=llm,
tools=list(tool_registry.get_tools().values()),
tool_executor=tool_registry.get_executor(),
)
return self._runtime
async def start(self, mock_mode=False) -> None:
"""Start the agent runtime."""
if self._runtime is None:
self._create_runtime(mock_mode=mock_mode)
await self._runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime."""
if self._runtime is not None:
await self._runtime.stop()
async def trigger(
self,
entry_point: str,
input_data: dict,
correlation_id: str | None = None,
session_state: dict | None = None,
) -> str:
"""
Trigger execution at a specific entry point (non-blocking).
Args:
entry_point: Entry point ID (e.g., "start", "pause-node_resume")
input_data: Input data for the execution
correlation_id: Optional ID to correlate related executions
session_state: Optional session state to resume from (with paused_at, memory)
Returns:
Execution ID for tracking
"""
if self._runtime is None or not self._runtime.is_running:
raise RuntimeError("Agent runtime not started. Call start() first.")
return await self._runtime.trigger(entry_point, input_data, correlation_id, session_state=session_state)
async def trigger_and_wait(
self,
entry_point: str,
input_data: dict,
timeout: float | None = None,
session_state: dict | None = None,
) -> ExecutionResult | None:
"""
Trigger execution and wait for completion.
Args:
entry_point: Entry point ID
input_data: Input data for the execution
timeout: Maximum time to wait (seconds)
session_state: Optional session state to resume from (with paused_at, memory)
Returns:
ExecutionResult or None if timeout
"""
if self._runtime is None or not self._runtime.is_running:
raise RuntimeError("Agent runtime not started. Call start() first.")
return await self._runtime.trigger_and_wait(entry_point, input_data, timeout, session_state=session_state)
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""
Run the agent (convenience method for simple single execution).
For more control, use start() + trigger_and_wait() + stop().
"""
await self.start(mock_mode=mock_mode)
try:
# Determine entry point based on session_state
if session_state and "paused_at" in session_state:
paused_node = session_state["paused_at"]
resume_key = f"{paused_node}_resume"
if resume_key in self.entry_points:
entry_point = resume_key
else:
entry_point = "start"
else:
entry_point = "start"
result = await self.trigger_and_wait(entry_point, context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
async def get_goal_progress(self) -> dict:
"""Get goal progress across all executions."""
if self._runtime is None:
raise RuntimeError("Agent runtime not started")
return await self._runtime.get_goal_progress()
def get_stats(self) -> dict:
"""Get runtime statistics."""
if self._runtime is None:
return {"running": False}
return self._runtime.get_stats()
def info(self):
"""Get agent information."""
return {
"name": metadata.name,
"version": metadata.version,
"description": metadata.description,
"goal": {
"name": self.goal.name,
"description": self.goal.description,
},
"nodes": [n.id for n in self.nodes],
"edges": [e.id for e in self.edges],
"entry_node": self.entry_node,
"entry_points": self.entry_points,
"pause_nodes": self.pause_nodes,
"terminal_nodes": self.terminal_nodes,
"multi_entrypoint": True,
}
def validate(self):
"""Validate agent structure."""
errors = []
warnings = []
node_ids = {node.id for node in self.nodes}
for edge in self.edges:
if edge.source not in node_ids:
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
if edge.target not in node_ids:
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for terminal in self.terminal_nodes:
if terminal not in node_ids:
errors.append(f"Terminal node '{terminal}' not found")
for pause in self.pause_nodes:
if pause not in node_ids:
errors.append(f"Pause node '{pause}' not found")
# Validate entry points
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
}
# Create default instance
default_agent = OnlineResearchAgent()
@@ -0,0 +1,22 @@
"""Runtime configuration."""
from dataclasses import dataclass
@dataclass
class RuntimeConfig:
model: str = "groq/moonshotai/kimi-k2-instruct-0905"
temperature: float = 0.7
max_tokens: int = 16384
default_config = RuntimeConfig()
# Agent metadata
@dataclass
class AgentMetadata:
name: str = "Online Research Agent"
version: str = "1.0.0"
description: str = "Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations."
metadata = AgentMetadata()
@@ -1,8 +1,8 @@
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"command": "python",
"args": ["mcp_server.py", "--stdio"],
"cwd": "../../tools",
"description": "Hive tools MCP server providing web_search, web_scrape, and write_to_file"
}
@@ -0,0 +1,313 @@
"""Node definitions for Online Research Agent."""
from framework.graph import NodeSpec
# Node 1: Parse Query
parse_query_node = NodeSpec(
id="parse-query",
name="Parse Query",
description="Analyze the research topic and generate 3-5 diverse search queries to cover different aspects",
node_type="llm_generate",
input_keys=["topic"],
output_keys=["search_queries", "research_focus", "key_aspects"],
output_schema={
"research_focus": {"type": "string", "required": True, "description": "Brief statement of what we're researching"},
"key_aspects": {"type": "array", "required": True, "description": "List of 3-5 key aspects to investigate"},
"search_queries": {"type": "array", "required": True, "description": "List of 3-5 search queries"},
},
system_prompt="""\
You are a research query strategist. Given a research topic, analyze it and generate search queries.
Your task:
1. Understand the core research question
2. Identify 3-5 key aspects to investigate
3. Generate 3-5 diverse search queries that will find comprehensive information
CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
Return this JSON structure:
{
"research_focus": "Brief statement of what we're researching",
"key_aspects": ["aspect1", "aspect2", "aspect3"],
"search_queries": [
"query 1 - broad overview",
"query 2 - specific angle",
"query 3 - recent developments",
"query 4 - expert opinions",
"query 5 - data/statistics"
]
}
""",
tools=[],
max_retries=3,
)
# Node 2: Search Sources
search_sources_node = NodeSpec(
id="search-sources",
name="Search Sources",
description="Execute web searches using the generated queries to find 15+ source URLs",
node_type="llm_tool_use",
input_keys=["search_queries", "research_focus"],
output_keys=["source_urls", "search_results_summary"],
output_schema={
"source_urls": {"type": "array", "required": True, "description": "List of source URLs found"},
"search_results_summary": {"type": "string", "required": True, "description": "Brief summary of what was found"},
},
system_prompt="""\
You are a research assistant executing web searches. Use the web_search tool to find sources.
Your task:
1. Execute each search query using web_search tool
2. Collect URLs from search results
3. Aim for 15+ diverse sources
After searching, return JSON with found sources:
{
"source_urls": ["url1", "url2", ...],
"search_results_summary": "Brief summary of what was found"
}
""",
tools=["web_search"],
max_retries=3,
)
# Node 3: Fetch Content
fetch_content_node = NodeSpec(
id="fetch-content",
name="Fetch Content",
description="Fetch and extract content from the discovered source URLs",
node_type="llm_tool_use",
input_keys=["source_urls", "research_focus"],
output_keys=["fetched_sources", "fetch_errors"],
output_schema={
"fetched_sources": {"type": "array", "required": True, "description": "List of fetched source objects with url, title, content"},
"fetch_errors": {"type": "array", "required": True, "description": "List of URLs that failed to fetch"},
},
system_prompt="""\
You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
Your task:
1. Fetch content from each source URL using web_scrape tool
2. Extract the main content relevant to the research focus
3. Track any URLs that failed to fetch
After fetching, return JSON:
{
"fetched_sources": [
{"url": "...", "title": "...", "content": "extracted text..."},
...
],
"fetch_errors": ["url that failed", ...]
}
""",
tools=["web_scrape"],
max_retries=3,
)
# Node 4: Evaluate Sources
evaluate_sources_node = NodeSpec(
id="evaluate-sources",
name="Evaluate Sources",
description="Score sources for relevance and quality, filter to top 10",
node_type="llm_generate",
input_keys=["fetched_sources", "research_focus", "key_aspects"],
output_keys=["ranked_sources", "source_analysis"],
output_schema={
"ranked_sources": {"type": "array", "required": True, "description": "List of ranked sources with scores"},
"source_analysis": {"type": "string", "required": True, "description": "Overview of source quality and coverage"},
},
system_prompt="""\
You are a source evaluator. Assess each source for quality and relevance.
Scoring criteria:
- Relevance to research focus (1-10)
- Source credibility (1-10)
- Information depth (1-10)
- Recency if relevant (1-10)
Your task:
1. Score each source
2. Rank by combined score
3. Select top 10 sources
4. Note what each source uniquely contributes
Return JSON:
{
"ranked_sources": [
{"url": "...", "title": "...", "content": "...", "score": 8.5, "unique_value": "..."},
...
],
"source_analysis": "Overview of source quality and coverage"
}
""",
tools=[],
max_retries=3,
)
# Node 5: Synthesize Findings
synthesize_findings_node = NodeSpec(
id="synthesize-findings",
name="Synthesize Findings",
description="Extract key facts from sources and identify common themes",
node_type="llm_generate",
input_keys=["ranked_sources", "research_focus", "key_aspects"],
output_keys=["key_findings", "themes", "source_citations"],
output_schema={
"key_findings": {"type": "array", "required": True, "description": "List of key findings with sources and confidence"},
"themes": {"type": "array", "required": True, "description": "List of themes with descriptions and supporting sources"},
"source_citations": {"type": "object", "required": True, "description": "Map of facts to supporting URLs"},
},
system_prompt="""\
You are a research synthesizer. Analyze multiple sources to extract insights.
Your task:
1. Identify key facts from each source
2. Find common themes across sources
3. Note contradictions or debates
4. Build a citation map (fact -> source URL)
Return JSON:
{
"key_findings": [
{"finding": "...", "sources": ["url1", "url2"], "confidence": "high/medium/low"},
...
],
"themes": [
{"theme": "...", "description": "...", "supporting_sources": ["url1", ...]},
...
],
"source_citations": {
"fact or claim": ["supporting url1", "url2"],
...
}
}
""",
tools=[],
max_retries=3,
)
# Node 6: Write Report
write_report_node = NodeSpec(
id="write-report",
name="Write Report",
description="Generate a narrative report with proper citations",
node_type="llm_generate",
input_keys=["key_findings", "themes", "source_citations", "research_focus", "ranked_sources"],
output_keys=["report_content", "references"],
output_schema={
"report_content": {"type": "string", "required": True, "description": "Full markdown report text with citations"},
"references": {"type": "array", "required": True, "description": "List of reference objects with number, url, title"},
},
system_prompt="""\
You are a research report writer. Create a well-structured narrative report.
Report structure:
1. Executive Summary (2-3 paragraphs)
2. Introduction (context and scope)
3. Key Findings (organized by theme)
4. Analysis (synthesis and implications)
5. Conclusion
6. References (numbered list of all sources)
Citation format: Use numbered citations like [1], [2] that correspond to the References section.
IMPORTANT:
- Every factual claim MUST have a citation
- Write in clear, professional prose
- Be objective and balanced
- Highlight areas of consensus and debate
Return JSON:
{
"report_content": "Full markdown report text with citations...",
"references": [
{"number": 1, "url": "...", "title": "..."},
...
]
}
""",
tools=[],
max_retries=3,
)
# Node 7: Quality Check
quality_check_node = NodeSpec(
id="quality-check",
name="Quality Check",
description="Verify all claims have citations and report is coherent",
node_type="llm_generate",
input_keys=["report_content", "references", "source_citations"],
output_keys=["quality_score", "issues", "final_report"],
output_schema={
"quality_score": {"type": "number", "required": True, "description": "Quality score 0-1"},
"issues": {"type": "array", "required": True, "description": "List of issues found and fixed"},
"final_report": {"type": "string", "required": True, "description": "Corrected full report"},
},
system_prompt="""\
You are a quality assurance reviewer. Check the research report for issues.
Check for:
1. Uncited claims (factual statements without [n] citation)
2. Broken citations (references to non-existent numbers)
3. Coherence (logical flow between sections)
4. Completeness (all key aspects covered)
5. Accuracy (claims match source content)
If issues found, fix them in the final report.
Return JSON:
{
"quality_score": 0.95,
"issues": [
{"type": "uncited_claim", "location": "paragraph 3", "fixed": true},
...
],
"final_report": "Corrected full report with all issues fixed..."
}
""",
tools=[],
max_retries=3,
)
# Node 8: Save Report
save_report_node = NodeSpec(
id="save-report",
name="Save Report",
description="Write the final report to a local markdown file",
node_type="llm_tool_use",
input_keys=["final_report", "references", "research_focus"],
output_keys=["file_path", "save_status"],
output_schema={
"file_path": {"type": "string", "required": True, "description": "Path where report was saved"},
"save_status": {"type": "string", "required": True, "description": "Status of save operation"},
},
system_prompt="""\
You are a file manager. Save the research report to disk.
Your task:
1. Generate a filename from the research focus (slugified, with date)
2. Use the write_to_file tool to save the report as markdown
3. Save to the ./research_reports/ directory
Filename format: research_YYYY-MM-DD_topic-slug.md
Return JSON:
{
"file_path": "research_reports/research_2026-01-23_topic-name.md",
"save_status": "success"
}
""",
tools=["write_to_file"],
max_retries=3,
)
__all__ = [
"parse_query_node",
"search_sources_node",
"fetch_content_node",
"evaluate_sources_node",
"synthesize_findings_node",
"write_report_node",
"quality_check_node",
"save_report_node",
]
@@ -0,0 +1,303 @@
---
name: building-agents-core
description: Core concepts for goal-driven agents - architecture, node types, tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
license: Apache-2.0
metadata:
author: hive
version: "1.0"
type: foundational
part_of: building-agents
---
# Building Agents - Core Concepts
Foundational knowledge for building goal-driven agents as Python packages.
## Architecture: Python Services (Not JSON Configs)
Agents are built as Python packages:
```
exports/my_agent/
├── __init__.py # Package exports
├── __main__.py # CLI (run, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── nodes/__init__.py # Node definitions (NodeSpec)
├── config.py # Runtime config
└── README.md # Documentation
```
**Key Principle: Agent is visible and editable during build**
- ✅ Files created immediately as components are approved
- ✅ User can watch files grow in their editor
- ✅ No session state - just direct file writes
- ✅ No "export" step - agent is ready when build completes
## Core Concepts
### Goal
Success criteria and constraints (written to agent.py)
```python
goal = Goal(
id="research-goal",
name="Technical Research Agent",
description="Research technical topics thoroughly",
success_criteria=[
SuccessCriterion(
id="completeness",
description="Cover all aspects of topic",
metric="coverage_score",
target=">=0.9",
weight=0.4,
),
# 3-5 success criteria total
],
constraints=[
Constraint(
id="accuracy",
description="All information must be verified",
constraint_type="hard",
category="quality",
),
# 1-5 constraints total
],
)
```
### Node
Unit of work (written to nodes/__init__.py)
**Node Types:**
- `llm_generate` - Text generation, parsing
- `llm_tool_use` - Actions requiring tools
- `router` - Conditional branching
- `function` - Deterministic operations
```python
search_node = NodeSpec(
id="search-web",
name="Search Web",
description="Search for information online",
node_type="llm_tool_use",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}",
tools=["web_search"],
max_retries=3,
)
```
### Edge
Connection between nodes (written to agent.py)
**Edge Conditions:**
- `on_success` - Proceed if node succeeds
- `on_failure` - Handle errors
- `always` - Always proceed
- `conditional` - Based on expression
```python
EdgeSpec(
id="search-to-analyze",
source="search-web",
target="analyze-results",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
)
```
### Pause/Resume
Multi-turn conversations
- **Pause nodes** - Stop execution, wait for user input
- **Resume entry points** - Continue from pause with user's response
```python
# Example pause/resume configuration
pause_nodes = ["request-clarification"]
entry_points = {
"start": "analyze-request",
"request-clarification_resume": "process-clarification"
}
```
## Tool Discovery & Validation
**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
### Step 1: Register MCP Server (if not already done)
```python
mcp__agent-builder__add_mcp_server(
name="tools",
transport="stdio",
command="python",
args='["mcp_server.py", "--stdio"]',
cwd="../tools"
)
```
### Step 2: Discover Available Tools
```python
# List all tools from all registered servers
mcp__agent-builder__list_mcp_tools()
# Or list tools from a specific server
mcp__agent-builder__list_mcp_tools(server_name="tools")
```
This returns available tools with their descriptions and parameters:
```json
{
"success": true,
"tools_by_server": {
"tools": [
{
"name": "web_search",
"description": "Search the web...",
"parameters": ["query"]
},
{
"name": "web_scrape",
"description": "Scrape a URL...",
"parameters": ["url"]
}
]
},
"total_tools": 14
}
```
### Step 3: Validate Before Adding Nodes
Before writing a node with `tools=[...]`:
1. Call `list_mcp_tools()` to get available tools
2. Check each tool in your node exists in the response
3. If a tool doesn't exist:
- **DO NOT proceed** with the node
- Inform the user: "The tool 'X' is not available. Available tools are: ..."
- Ask if they want to use an alternative or proceed without the tool
### Tool Validation Anti-Patterns
**Never assume a tool exists** - always call `list_mcp_tools()` first
**Never write a node with unverified tools** - validate before writing
**Never silently drop tools** - if a tool doesn't exist, inform the user
**Never guess tool names** - use exact names from discovery response
### Example Validation Flow
```python
# 1. User requests: "Add a node that searches the web"
# 2. Discover available tools
tools_response = mcp__agent-builder__list_mcp_tools()
# 3. Check if web_search exists
available = [t["name"] for tools in tools_response["tools_by_server"].values() for t in tools]
if "web_search" not in available:
# Inform user and ask how to proceed
print("'web_search' not available. Available tools:", available)
else:
# Proceed with node creation
# ...
```
## Workflow Overview: Incremental File Construction
```
1. CREATE PACKAGE → mkdir + write skeletons
2. DEFINE GOAL → Write to agent.py + config.py
3. FOR EACH NODE:
- Propose design
- User approves
- Write to nodes/__init__.py IMMEDIATELY ← FILE WRITTEN
- (Optional) Validate with test_node ← MCP VALIDATION
- User can open file and see it
4. CONNECT EDGES → Update agent.py ← FILE WRITTEN
- (Optional) Validate with validate_graph ← MCP VALIDATION
5. FINALIZE → Write agent class to agent.py ← FILE WRITTEN
6. DONE - Agent ready at exports/my_agent/
```
**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
### The Key Difference
**OLD (Bad):**
```
MCP add_node → Session State → MCP add_node → Session State → ...
MCP export_graph
Files appear
```
**NEW (Good):**
```
Write node to file → (Optional: MCP test_node) → Write node to file → ...
↓ ↓
File visible File visible
immediately immediately
```
**Bottom line:** Use Write/Edit for construction, MCP for validation if needed.
## When to Use This Skill
Use building-agents-core when:
- Starting a new agent project and need to understand fundamentals
- Need to understand agent architecture before building
- Want to validate tool availability before proceeding
- Learning about node types, edges, and graph execution
**Next Steps:**
- Ready to build? → Use `building-agents-construction` skill
- Need patterns and examples? → Use `building-agents-patterns` skill
## MCP Tools for Validation
After writing files, optionally use MCP tools for validation:
**test_node** - Validate node configuration with mock inputs
```python
mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "test query"}',
mock_llm_response='{"results": "mock output"}'
)
```
**validate_graph** - Check graph structure
```python
mcp__agent-builder__validate_graph()
# Returns: unreachable nodes, missing connections, etc.
```
**create_session** - Track session state for bookkeeping
```python
mcp__agent-builder__create_session(session_name="my-build")
```
**Key Point:** Files are written FIRST. MCP tools are for validation only.
## Related Skills
- **building-agents-construction** - Step-by-step building process
- **building-agents-patterns** - Best practices and examples
- **agent-workflow** - Complete workflow orchestrator
- **testing-agent** - Test and validate completed agents
@@ -0,0 +1,497 @@
---
name: building-agents-patterns
description: Best practices, patterns, and examples for building goal-driven agents. Includes pause/resume architecture, hybrid workflows, anti-patterns, and handoff to testing. Use when optimizing agent design.
license: Apache-2.0
metadata:
author: hive
version: "1.0"
type: reference
part_of: building-agents
---
# Building Agents - Patterns & Best Practices
Design patterns, examples, and best practices for building robust goal-driven agents.
**Prerequisites:** Complete agent structure using `building-agents-construction`.
## Practical Example: Hybrid Workflow
How to build a node using both direct file writes and optional MCP validation:
```python
# 1. WRITE TO FILE FIRST (Primary - makes it visible)
node_code = '''
search_node = NodeSpec(
id="search-web",
node_type="llm_tool_use",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}",
tools=["web_search"],
)
'''
Edit(
file_path="exports/research_agent/nodes/__init__.py",
old_string="# Nodes will be added here",
new_string=node_code
)
print("✅ Added search_node to nodes/__init__.py")
print("📁 Open exports/research_agent/nodes/__init__.py to see it!")
# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
validation = mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "python tutorials"}',
mock_llm_response='{"search_results": [...mock results...]}'
)
print(f"✓ Validation: {validation['success']}")
```
**User experience:**
- Immediately sees node in their editor (from step 1)
- Gets validation feedback (from step 2)
- Can edit the file directly if needed
This combines visibility (files) with validation (MCP tools).
## Pause/Resume Architecture
For agents needing multi-turn conversations with user interaction:
### Basic Pause/Resume Flow
```python
# Define pause nodes - execution stops at these nodes
pause_nodes = ["request-clarification", "await-approval"]
# Define entry points - where to resume from each pause
entry_points = {
"start": "analyze-request", # Initial entry
"request-clarification_resume": "process-clarification", # Resume from clarification
"await-approval_resume": "execute-action", # Resume from approval
}
```
### Example: Multi-Turn Research Agent
```python
# Nodes
nodes = [
NodeSpec(id="analyze-request", ...),
NodeSpec(id="request-clarification", ...), # PAUSE NODE
NodeSpec(id="process-clarification", ...),
NodeSpec(id="generate-results", ...),
NodeSpec(id="await-approval", ...), # PAUSE NODE
NodeSpec(id="execute-action", ...),
]
# Edges with resume flows
edges = [
EdgeSpec(
id="analyze-to-clarify",
source="analyze-request",
target="request-clarification",
condition=EdgeCondition.CONDITIONAL,
condition_expr="needs_clarification == true",
),
# When resumed, goes to process-clarification
EdgeSpec(
id="clarify-to-process",
source="request-clarification",
target="process-clarification",
condition=EdgeCondition.ALWAYS,
),
EdgeSpec(
id="results-to-approval",
source="generate-results",
target="await-approval",
condition=EdgeCondition.ALWAYS,
),
# When resumed, goes to execute-action
EdgeSpec(
id="approval-to-execute",
source="await-approval",
target="execute-action",
condition=EdgeCondition.ALWAYS,
),
]
# Configuration
pause_nodes = ["request-clarification", "await-approval"]
entry_points = {
"start": "analyze-request",
"request-clarification_resume": "process-clarification",
"await-approval_resume": "execute-action",
}
```
### Running Pause/Resume Agents
```python
# Initial run - will pause at first pause node
result1 = await agent.run(
context={"query": "research topic"},
session_state=None
)
# Check if paused
if result1.paused_at:
print(f"Paused at: {result1.paused_at}")
# Resume with user input
result2 = await agent.run(
context={"user_response": "clarification details"},
session_state=result1.session_state # Pass previous state
)
```
## Anti-Patterns
### What NOT to Do
**Don't rely on `export_graph`** - Write files immediately, not at end
```python
# BAD: Building in session state, exporting at end
mcp__agent-builder__add_node(...)
mcp__agent-builder__add_node(...)
mcp__agent-builder__export_graph() # Files appear only now
# GOOD: Writing files immediately
Write(file_path="...", content=node_code) # File visible now
Write(file_path="...", content=node_code) # File visible now
```
**Don't hide code in session** - Write to files as components approved
```python
# BAD: Accumulating changes invisibly
session.add_component(component1)
session.add_component(component2)
# User can't see anything yet
# GOOD: Incremental visibility
Edit(file_path="...", ...) # User sees change 1
Edit(file_path="...", ...) # User sees change 2
```
**Don't wait to write files** - Agent visible from first step
```python
# BAD: Building everything before writing
design_all_nodes()
design_all_edges()
write_everything_at_once()
# GOOD: Write as you go
write_package_structure() # Visible
write_goal() # Visible
write_node_1() # Visible
write_node_2() # Visible
```
**Don't batch everything** - Write incrementally
```python
# BAD: Batching all nodes
nodes = [design_node_1(), design_node_2(), ...]
write_all_nodes(nodes)
# GOOD: One at a time with user feedback
write_node_1() # User approves
write_node_2() # User approves
write_node_3() # User approves
```
### MCP Tools - Correct Usage
**MCP tools OK for:**
`test_node` - Validate node configuration with mock inputs
`validate_graph` - Check graph structure
`create_session` - Track session state for bookkeeping
✅ Other validation tools
**Just don't:** Use MCP as the primary construction method or rely on export_graph
## Best Practices
### 1. Show Progress After Each Write
```python
# After writing a node
print("✅ Added analyze_request_node to nodes/__init__.py")
print("📊 Progress: 1/6 nodes added")
print("📁 Open exports/my_agent/nodes/__init__.py to see it!")
```
### 2. Let User Open Files During Build
```python
# Encourage file inspection
print("✅ Goal written to agent.py")
print("")
print("💡 Tip: Open exports/my_agent/agent.py in your editor to see the goal!")
```
### 3. Write Incrementally - One Component at a Time
```python
# Good flow
write_package_structure()
show_user("Package created")
write_goal()
show_user("Goal written")
for node in nodes:
get_approval(node)
write_node(node)
show_user(f"Node {node.id} written")
```
### 4. Test As You Build
```python
# After adding several nodes
print("💡 You can test current state with:")
print(" PYTHONPATH=core:exports python -m my_agent validate")
print(" PYTHONPATH=core:exports python -m my_agent info")
```
### 5. Keep User Informed
```python
# Clear status updates
print("🔨 Creating package structure...")
print("✅ Package created: exports/my_agent/")
print("")
print("📝 Next: Define agent goal")
```
## Continuous Monitoring Agents
For agents that run continuously without terminal nodes:
```python
# No terminal nodes - loops forever
terminal_nodes = []
# Workflow loops back to start
edges = [
EdgeSpec(id="monitor-to-check", source="monitor", target="check-condition"),
EdgeSpec(id="check-to-wait", source="check-condition", target="wait"),
EdgeSpec(id="wait-to-monitor", source="wait", target="monitor"), # Loop
]
# Entry node only
entry_node = "monitor"
entry_points = {"start": "monitor"}
pause_nodes = []
```
**Example: File Monitor**
```python
nodes = [
NodeSpec(id="list-files", ...),
NodeSpec(id="check-new-files", node_type="router", ...),
NodeSpec(id="process-files", ...),
NodeSpec(id="wait-interval", node_type="function", ...),
]
edges = [
EdgeSpec(id="list-to-check", source="list-files", target="check-new-files"),
EdgeSpec(
id="check-to-process",
source="check-new-files",
target="process-files",
condition=EdgeCondition.CONDITIONAL,
condition_expr="new_files_count > 0",
),
EdgeSpec(
id="check-to-wait",
source="check-new-files",
target="wait-interval",
condition=EdgeCondition.CONDITIONAL,
condition_expr="new_files_count == 0",
),
EdgeSpec(id="process-to-wait", source="process-files", target="wait-interval"),
EdgeSpec(id="wait-to-list", source="wait-interval", target="list-files"), # Loop back
]
terminal_nodes = [] # No terminal - runs forever
```
## Complex Routing Patterns
### Multi-Condition Router
```python
router_node = NodeSpec(
id="decision-router",
node_type="router",
input_keys=["analysis_result"],
output_keys=["decision"],
system_prompt="""
Based on the analysis result, decide the next action:
- If confidence > 0.9: route to "execute"
- If 0.5 <= confidence <= 0.9: route to "review"
- If confidence < 0.5: route to "clarify"
Return: {"decision": "execute|review|clarify"}
""",
)
# Edges for each route
edges = [
EdgeSpec(
id="router-to-execute",
source="decision-router",
target="execute-action",
condition=EdgeCondition.CONDITIONAL,
condition_expr="decision == 'execute'",
priority=1,
),
EdgeSpec(
id="router-to-review",
source="decision-router",
target="human-review",
condition=EdgeCondition.CONDITIONAL,
condition_expr="decision == 'review'",
priority=2,
),
EdgeSpec(
id="router-to-clarify",
source="decision-router",
target="request-clarification",
condition=EdgeCondition.CONDITIONAL,
condition_expr="decision == 'clarify'",
priority=3,
),
]
```
## Error Handling Patterns
### Graceful Failure with Fallback
```python
# Primary node with error handling
nodes = [
NodeSpec(id="api-call", max_retries=3, ...),
NodeSpec(id="fallback-cache", ...),
NodeSpec(id="report-error", ...),
]
edges = [
# Success path
EdgeSpec(
id="api-success",
source="api-call",
target="process-results",
condition=EdgeCondition.ON_SUCCESS,
),
# Fallback on failure
EdgeSpec(
id="api-to-fallback",
source="api-call",
target="fallback-cache",
condition=EdgeCondition.ON_FAILURE,
priority=1,
),
# Report if fallback also fails
EdgeSpec(
id="fallback-to-error",
source="fallback-cache",
target="report-error",
condition=EdgeCondition.ON_FAILURE,
priority=1,
),
]
```
## Performance Optimization
### Parallel Node Execution
```python
# Use multiple edges from same source for parallel execution
edges = [
EdgeSpec(
id="start-to-search1",
source="start",
target="search-source-1",
condition=EdgeCondition.ALWAYS,
),
EdgeSpec(
id="start-to-search2",
source="start",
target="search-source-2",
condition=EdgeCondition.ALWAYS,
),
EdgeSpec(
id="start-to-search3",
source="start",
target="search-source-3",
condition=EdgeCondition.ALWAYS,
),
# Converge results
EdgeSpec(
id="search1-to-merge",
source="search-source-1",
target="merge-results",
),
EdgeSpec(
id="search2-to-merge",
source="search-source-2",
target="merge-results",
),
EdgeSpec(
id="search3-to-merge",
source="search-source-3",
target="merge-results",
),
]
```
## Handoff to Testing
When agent is complete, transition to testing phase:
```python
print("""
✅ Agent complete: exports/my_agent/
Next steps:
1. Switch to testing-agent skill
2. Generate and approve tests
3. Run evaluation
4. Debug any failures
Command: "Test the agent at exports/my_agent/"
""")
```
### Pre-Testing Checklist
Before handing off to testing-agent:
- [ ] Agent structure validates: `python -m agent_name validate`
- [ ] All nodes defined in nodes/__init__.py
- [ ] All edges connect valid nodes
- [ ] Entry node specified
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
- [ ] README.md with usage instructions
- [ ] CLI commands work (info, validate)
## Related Skills
- **building-agents-core** - Fundamental concepts
- **building-agents-construction** - Step-by-step building
- **testing-agent** - Test and validate agents
- **agent-workflow** - Complete workflow orchestrator
---
**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
-399
View File
@@ -1,399 +0,0 @@
---
name: hive-concepts
description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: foundational
part_of: hive
---
# Building Agents - Core Concepts
Foundational knowledge for building goal-driven agents as Python packages.
## Architecture: Python Services (Not JSON Configs)
Agents are built as Python packages:
```
exports/my_agent/
├── __init__.py # Package exports
├── __main__.py # CLI (run, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── nodes/__init__.py # Node definitions (NodeSpec)
├── config.py # Runtime config
└── README.md # Documentation
```
**Key Principle: Agent is visible and editable during build**
- Files created immediately as components are approved
- User can watch files grow in their editor
- No session state - just direct file writes
- No "export" step - agent is ready when build completes
## Core Concepts
### Goal
Success criteria and constraints (written to agent.py)
```python
goal = Goal(
id="research-goal",
name="Technical Research Agent",
description="Research technical topics thoroughly",
success_criteria=[
SuccessCriterion(
id="completeness",
description="Cover all aspects of topic",
metric="coverage_score",
target=">=0.9",
weight=0.4,
),
# 3-5 success criteria total
],
constraints=[
Constraint(
id="accuracy",
description="All information must be verified",
constraint_type="hard",
category="quality",
),
# 1-5 constraints total
],
)
```
### Node
Unit of work (written to nodes/__init__.py)
**Node Types:**
- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
- `function` — Deterministic Python operations. No LLM involved.
```python
search_node = NodeSpec(
id="search-web",
name="Search Web",
description="Search for information and extract results",
node_type="event_loop",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
tools=["web_search"],
)
```
**NodeSpec Fields for Event Loop Nodes:**
| Field | Default | Description |
|-------|---------|-------------|
| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
### Edge
Connection between nodes (written to agent.py)
**Edge Conditions:**
- `on_success` — Proceed if node succeeds (most common)
- `on_failure` — Handle errors
- `always` — Always proceed
- `conditional` — Based on expression evaluating node output
**Edge Priority:**
Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
```python
# Forward edge (evaluated first)
EdgeSpec(
id="review-to-campaign",
source="review",
target="campaign-builder",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('approved_contacts') is not None",
priority=1,
)
# Feedback edge (evaluated after forward edges)
EdgeSpec(
id="review-feedback",
source="review",
target="extractor",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('redo_extraction') is not None",
priority=-1,
)
```
### Client-Facing Nodes
For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
- Stream its LLM output directly to the end user
- Block for user input between conversational turns
- Resume when new input is injected via `inject_event()`
```python
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
node_type="event_loop",
client_facing=True,
input_keys=[],
output_keys=["repo_url", "project_url"],
system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
)
```
> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
```python
system_prompt="""\
**STEP 1 — Respond to the user (text only, NO tool calls):**
[Present information, ask questions, etc.]
**STEP 2 — After the user responds, call set_output:**
[Call set_output with the structured outputs]
"""
```
This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
### Node Design: Fewer, Richer Nodes
Prefer fewer nodes that do more work over many thin single-purpose nodes:
- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
- **Good**: 4 rich nodes (intake → research → review → report)
Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
### nullable_output_keys for Cross-Edge Inputs
When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
```python
research_node = NodeSpec(
id="research",
input_keys=["research_brief", "feedback"],
nullable_output_keys=["feedback"], # Not present on first visit
max_node_visits=3,
...
)
```
## Event Loop Architecture Concepts
### How EventLoopNode Works
An event loop node runs a multi-turn loop:
1. LLM receives system prompt + conversation history
2. LLM responds (text and/or tool calls)
3. Tool calls are executed, results added to conversation
4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
5. Repeat until judge ACCEPTs or max_iterations reached
### EventLoopNode Runtime
EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
```python
# Direct execution — executor auto-creates EventLoopNodes
from framework.graph.executor import GraphExecutor
from framework.runtime.core import Runtime
runtime = Runtime(storage_path)
executor = GraphExecutor(
runtime=runtime,
llm=llm,
tools=tools,
tool_executor=tool_executor,
storage_path=storage_path,
)
result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
# TUI execution — AgentRuntime also works
from framework.runtime.agent_runtime import create_agent_runtime
runtime = create_agent_runtime(
graph=graph, goal=goal, storage_path=storage_path,
entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
)
```
### set_output
Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
### JudgeProtocol
**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
The judge controls when a node's loop exits:
- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
- **SchemaJudge**: Validates outputs against a Pydantic model
- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
### LoopConfig
Controls loop behavior:
- `max_iterations` (default 50) — prevents infinite loops
- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
- `tool_call_overflow_margin` (default 0.5) — wiggle room before discarding extra tool calls (50% means hard cutoff at 150% of limit)
- `stall_detection_threshold` (default 3) — detects repeated identical responses
- `max_history_tokens` (default 32000) — triggers conversation compaction
### Data Tools (Spillover Management)
When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
- `save_data(filename, data)` — Write data to a file in the data directory
- `load_data(filename, offset=0, limit=50)` — Read data with line-based pagination
- `list_data_files()` — List available data files
- `serve_file_to_user(filename, label="")` — Get a clickable file:// URI for the user
Note: `data_dir` is a framework-injected context parameter — the LLM never sees or passes it. `GraphExecutor.execute()` sets it per-execution via `contextvars`, so data tools and spillover always share the same session-scoped directory.
These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
```python
research_node = NodeSpec(
...
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
)
```
### Fan-Out / Fan-In
Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
### max_node_visits
Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
## Tool Discovery & Validation
**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
### Step 1: Register MCP Server (if not already done)
```python
mcp__agent-builder__add_mcp_server(
name="tools",
transport="stdio",
command="python",
args='["mcp_server.py", "--stdio"]',
cwd="../tools"
)
```
### Step 2: Discover Available Tools
```python
# List all tools from all registered servers
mcp__agent-builder__list_mcp_tools()
# Or list tools from a specific server
mcp__agent-builder__list_mcp_tools(server_name="tools")
```
### Step 3: Validate Before Adding Nodes
Before writing a node with `tools=[...]`:
1. Call `list_mcp_tools()` to get available tools
2. Check each tool in your node exists in the response
3. If a tool doesn't exist:
- **DO NOT proceed** with the node
- Inform the user: "The tool 'X' is not available. Available tools are: ..."
- Ask if they want to use an alternative or proceed without the tool
### Tool Validation Anti-Patterns
- **Never assume a tool exists** - always call `list_mcp_tools()` first
- **Never write a node with unverified tools** - validate before writing
- **Never silently drop tools** - if a tool doesn't exist, inform the user
- **Never guess tool names** - use exact names from discovery response
## Workflow Overview: Incremental File Construction
```
1. CREATE PACKAGE → mkdir + write skeletons
2. DEFINE GOAL → Write to agent.py + config.py
3. FOR EACH NODE:
- Propose design (event_loop for LLM work, function for deterministic)
- User approves
- Write to nodes/__init__.py IMMEDIATELY
- (Optional) Validate with test_node
4. CONNECT EDGES → Update agent.py
- Use priority for feedback edges (negative priority)
- (Optional) Validate with validate_graph
5. FINALIZE → Write agent class to agent.py
6. DONE - Agent ready at exports/my_agent/
```
**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
## When to Use This Skill
Use hive-concepts when:
- Starting a new agent project and need to understand fundamentals
- Need to understand agent architecture before building
- Want to validate tool availability before proceeding
- Learning about node types, edges, and graph execution
**Next Steps:**
- Ready to build? → Use `hive-create` skill
- Need patterns and examples? → Use `hive-patterns` skill
## MCP Tools for Validation
After writing files, optionally use MCP tools for validation:
**test_node** - Validate node configuration with mock inputs
```python
mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "test query"}',
mock_llm_response='{"results": "mock output"}'
)
```
**validate_graph** - Check graph structure
```python
mcp__agent-builder__validate_graph()
# Returns: unreachable nodes, missing connections, event_loop validation, etc.
```
**configure_loop** - Set event loop parameters
```python
mcp__agent-builder__configure_loop(
max_iterations=50,
max_tool_calls_per_turn=10,
stall_detection_threshold=3,
max_history_tokens=32000
)
```
**Key Point:** Files are written FIRST. MCP tools are for validation only.
## Related Skills
- **hive-create** - Step-by-step building process
- **hive-patterns** - Best practices: judges, feedback edges, fan-out, context management
- **hive** - Complete workflow orchestrator
- **hive-test** - Test and validate completed agents
File diff suppressed because it is too large Load Diff
@@ -1,24 +0,0 @@
"""
Deep Research Agent - Interactive, rigorous research with TUI conversation.
Research any topic through multi-source web search, quality evaluation,
and synthesis. Features client-facing TUI interaction at key checkpoints
for user guidance and iterative deepening.
"""
from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
from .config import RuntimeConfig, AgentMetadata, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"DeepResearchAgent",
"default_agent",
"goal",
"nodes",
"edges",
"RuntimeConfig",
"AgentMetadata",
"default_config",
"metadata",
]
@@ -1,241 +0,0 @@
"""
CLI entry point for Deep Research Agent.
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
"""
import asyncio
import json
import logging
import sys
import click
from .agent import default_agent, DeepResearchAgent
def setup_logging(verbose=False, debug=False):
"""Configure logging for execution visibility."""
if debug:
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose:
level, fmt = logging.INFO, "%(message)s"
else:
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
logging.getLogger("framework").setLevel(level)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Deep Research Agent - Interactive, rigorous research with TUI conversation."""
pass
@cli.command()
@click.option("--topic", "-t", type=str, required=True, help="Research topic")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def run(topic, mock, quiet, verbose, debug):
"""Execute research on a topic."""
if not quiet:
setup_logging(verbose=verbose, debug=debug)
context = {"topic": topic}
result = asyncio.run(default_agent.run(context, mock_mode=mock))
output_data = {
"success": result.success,
"steps_executed": result.steps_executed,
"output": result.output,
}
if result.error:
output_data["error"] = result.error
click.echo(json.dumps(output_data, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def tui(mock, verbose, debug):
"""Launch the TUI dashboard for interactive research."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
sys.exit(1)
from pathlib import Path
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.event_bus import EventBus
from framework.runtime.execution_stream import EntryPointSpec
async def run_with_tui():
agent = DeepResearchAgent()
# Build graph and tools
agent._event_bus = EventBus()
agent._tool_registry = ToolRegistry()
storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
storage_path.mkdir(parents=True, exist_ok=True)
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
agent._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock:
llm = LiteLLMProvider(
model=agent.config.model,
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
tools = list(agent._tool_registry.get_tools().values())
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
runtime = create_agent_runtime(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Start Research",
entry_node="intake",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_with_tui())
@cli.command()
@click.option("--json", "output_json", is_flag=True)
def info(output_json):
"""Show agent information."""
info_data = default_agent.info()
if output_json:
click.echo(json.dumps(info_data, indent=2))
else:
click.echo(f"Agent: {info_data['name']}")
click.echo(f"Version: {info_data['version']}")
click.echo(f"Description: {info_data['description']}")
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
@cli.command()
def validate():
"""Validate agent structure."""
validation = default_agent.validate()
if validation["valid"]:
click.echo("Agent is valid")
if validation["warnings"]:
for warning in validation["warnings"]:
click.echo(f" WARNING: {warning}")
else:
click.echo("Agent has errors:")
for error in validation["errors"]:
click.echo(f" ERROR: {error}")
sys.exit(0 if validation["valid"] else 1)
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
def shell(verbose):
"""Interactive research session (CLI, no TUI)."""
asyncio.run(_interactive_shell(verbose))
async def _interactive_shell(verbose=False):
"""Async interactive shell."""
setup_logging(verbose=verbose)
click.echo("=== Deep Research Agent ===")
click.echo("Enter a topic to research (or 'quit' to exit):\n")
agent = DeepResearchAgent()
await agent.start()
try:
while True:
try:
topic = await asyncio.get_event_loop().run_in_executor(
None, input, "Topic> "
)
if topic.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
if not topic.strip():
continue
click.echo("\nResearching...\n")
result = await agent.trigger_and_wait("start", {"topic": topic})
if result is None:
click.echo("\n[Execution timed out]\n")
continue
if result.success:
output = result.output
if "report_content" in output:
click.echo("\n--- Report ---\n")
click.echo(output["report_content"])
click.echo("\n")
if "references" in output:
click.echo("--- References ---\n")
for ref in output.get("references", []):
click.echo(
f" [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}"
)
click.echo("\n")
else:
click.echo(f"\nResearch failed: {result.error}\n")
except KeyboardInterrupt:
click.echo("\nGoodbye!")
break
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
await agent.stop()
if __name__ == "__main__":
cli()
@@ -1,358 +0,0 @@
"""Agent graph construction for Deep Research Agent."""
from pathlib import Path
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import (
intake_node,
research_node,
review_node,
report_node,
)
# Goal definition
goal = Goal(
id="rigorous-interactive-research",
name="Rigorous Interactive Research",
description=(
"Research any topic by searching diverse sources, analyzing findings, "
"and producing a cited report — with user checkpoints to guide direction."
),
success_criteria=[
SuccessCriterion(
id="source-diversity",
description="Use multiple diverse, authoritative sources",
metric="source_count",
target=">=5",
weight=0.25,
),
SuccessCriterion(
id="citation-coverage",
description="Every factual claim in the report cites its source",
metric="citation_coverage",
target="100%",
weight=0.25,
),
SuccessCriterion(
id="user-satisfaction",
description="User reviews findings before report generation",
metric="user_approval",
target="true",
weight=0.25,
),
SuccessCriterion(
id="report-completeness",
description="Final report answers the original research questions",
metric="question_coverage",
target="90%",
weight=0.25,
),
],
constraints=[
Constraint(
id="no-hallucination",
description="Only include information found in fetched sources",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="source-attribution",
description="Every claim must cite its source with a numbered reference",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="user-checkpoint",
description="Present findings to the user before writing the final report",
constraint_type="functional",
category="interaction",
),
],
)
# Node list
nodes = [
intake_node,
research_node,
review_node,
report_node,
]
# Edge definitions
edges = [
# intake -> research
EdgeSpec(
id="intake-to-research",
source="intake",
target="research",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
# research -> review
EdgeSpec(
id="research-to-review",
source="research",
target="review",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
# review -> research (feedback loop)
EdgeSpec(
id="review-to-research-feedback",
source="review",
target="research",
condition=EdgeCondition.CONDITIONAL,
condition_expr="needs_more_research == True",
priority=1,
),
# review -> report (user satisfied)
EdgeSpec(
id="review-to-report",
source="review",
target="report",
condition=EdgeCondition.CONDITIONAL,
condition_expr="needs_more_research == False",
priority=2,
),
# report -> research (user wants deeper research on current topic)
EdgeSpec(
id="report-to-research",
source="report",
target="research",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'more_research'",
priority=2,
),
# report -> intake (user wants a new topic — default when not more_research)
EdgeSpec(
id="report-to-intake",
source="report",
target="intake",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() != 'more_research'",
priority=1,
),
]
# Graph configuration
entry_node = "intake"
entry_points = {"start": "intake"}
pause_nodes = []
terminal_nodes = []
class DeepResearchAgent:
"""
Deep Research Agent — 4-node pipeline with user checkpoints.
Flow: intake -> research -> review -> report
^ |
+-- feedback loop (if user wants more)
Uses AgentRuntime for proper session management:
- Session-scoped storage (sessions/{session_id}/)
- Checkpointing for resume capability
- Runtime logging
- Data folder for save_data/load_data
"""
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self._graph: GraphSpec | None = None
self._agent_runtime: AgentRuntime | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
def _build_graph(self) -> GraphSpec:
"""Build the GraphSpec."""
return GraphSpec(
id="deep-research-agent-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config={
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_history_tokens": 32000,
},
conversation_mode="continuous",
identity_prompt=(
"You are a rigorous research agent. You search for information "
"from diverse, authoritative sources, analyze findings critically, "
"and produce well-cited reports. You never fabricate information — "
"every claim must trace back to a source you actually retrieved."
),
)
def _setup(self, mock_mode=False) -> None:
"""Set up the agent runtime with sessions, checkpoints, and logging."""
self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
self._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock_mode:
llm = LiteLLMProvider(
model=self.config.model,
api_key=self.config.api_key,
api_base=self.config.api_base,
)
tool_executor = self._tool_registry.get_executor()
tools = list(self._tool_registry.get_tools().values())
self._graph = self._build_graph()
checkpoint_config = CheckpointConfig(
enabled=True,
checkpoint_on_node_start=False,
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True,
)
entry_point_specs = [
EntryPointSpec(
id="default",
name="Default",
entry_node=self.entry_node,
trigger_type="manual",
isolation_level="shared",
)
]
self._agent_runtime = create_agent_runtime(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
entry_points=entry_point_specs,
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=checkpoint_config,
)
async def start(self, mock_mode=False) -> None:
"""Set up and start the agent runtime."""
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime and clean up."""
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(
self,
entry_point: str = "default",
input_data: dict | None = None,
timeout: float | None = None,
session_state: dict | None = None,
) -> ExecutionResult | None:
"""Execute the graph and wait for completion."""
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point,
input_data=input_data or {},
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
"""Get agent information."""
return {
"name": metadata.name,
"version": metadata.version,
"description": metadata.description,
"goal": {
"name": self.goal.name,
"description": self.goal.description,
},
"nodes": [n.id for n in self.nodes],
"edges": [e.id for e in self.edges],
"entry_node": self.entry_node,
"entry_points": self.entry_points,
"pause_nodes": self.pause_nodes,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
"""Validate agent structure."""
errors = []
warnings = []
node_ids = {node.id for node in self.nodes}
for edge in self.edges:
if edge.source not in node_ids:
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
if edge.target not in node_ids:
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for terminal in self.terminal_nodes:
if terminal not in node_ids:
errors.append(f"Terminal node '{terminal}' not found")
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
}
# Create default instance
default_agent = DeepResearchAgent()
@@ -1,26 +0,0 @@
"""Runtime configuration."""
from dataclasses import dataclass
from framework.config import RuntimeConfig
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "Deep Research Agent"
version: str = "1.0.0"
description: str = (
"Interactive research agent that rigorously investigates topics through "
"multi-source search, quality evaluation, and synthesis - with TUI conversation "
"at key checkpoints for user guidance and feedback."
)
intro_message: str = (
"Hi! I'm your deep research assistant. Tell me a topic and I'll investigate it "
"thoroughly — searching multiple sources, evaluating quality, and synthesizing "
"a comprehensive report. What would you like me to research?"
)
metadata = AgentMetadata()
@@ -1,204 +0,0 @@
"""Node definitions for Deep Research Agent."""
from framework.graph import NodeSpec
# Node 1: Intake (client-facing)
# Brief conversation to clarify what the user wants researched.
intake_node = NodeSpec(
id="intake",
name="Research Intake",
description="Discuss the research topic with the user, clarify scope, and confirm direction",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["topic"],
output_keys=["research_brief"],
success_criteria=(
"The research brief is specific and actionable: it states the topic, "
"the key questions to answer, the desired scope, and depth."
),
system_prompt="""\
You are a research intake specialist. The user wants to research a topic.
Have a brief conversation to clarify what they need.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
3. If it's already clear, confirm your understanding and ask the user to confirm
Keep it short. Don't over-ask.
**STEP 2 — After the user confirms, call set_output:**
- set_output("research_brief", "A clear paragraph describing exactly what to research, \
what questions to answer, what scope to cover, and how deep to go.")
""",
tools=[],
)
# Node 2: Research
# The workhorse — searches the web, fetches content, analyzes sources.
# One node with both tools avoids the context-passing overhead of 5 separate nodes.
research_node = NodeSpec(
id="research",
name="Research",
description="Search the web, fetch source content, and compile findings",
node_type="event_loop",
max_node_visits=0,
input_keys=["research_brief", "feedback"],
output_keys=["findings", "sources", "gaps"],
nullable_output_keys=["feedback"],
success_criteria=(
"Findings reference at least 3 distinct sources with URLs. "
"Key claims are substantiated by fetched content, not generated."
),
system_prompt="""\
You are a research agent. Given a research brief, find and analyze sources.
If feedback is provided, this is a follow-up round — focus on the gaps identified.
Work in phases:
1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
Prioritize authoritative sources (.edu, .gov, established publications).
2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
Skip URLs that fail. Extract the substantive content.
3. **Analyze**: Review what you've collected. Identify key findings, themes,
and any contradictions between sources.
Important:
- Work in batches of 3-4 tool calls at a time — never more than 10 per turn
- After each batch, assess whether you have enough material
- Prefer quality over quantity — 5 good sources beat 15 thin ones
- Track which URL each finding comes from (you'll need citations later)
- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
When done, use set_output (one key at a time, separate turns):
- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
Include themes, contradictions, and confidence levels.")
- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
""",
tools=[
"web_search",
"web_scrape",
"load_data",
"save_data",
"append_data",
"list_data_files",
],
)
# Node 3: Review (client-facing)
# Shows the user what was found and asks whether to dig deeper or proceed.
review_node = NodeSpec(
id="review",
name="Review Findings",
description="Present findings to user and decide whether to research more or write the report",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["findings", "sources", "gaps", "research_brief"],
output_keys=["needs_more_research", "feedback"],
success_criteria=(
"The user has been presented with findings and has explicitly indicated "
"whether they want more research or are ready for the report."
),
system_prompt="""\
Present the research findings to the user clearly and concisely.
**STEP 1 — Present (your first message, text only, NO tool calls):**
1. **Summary** (2-3 sentences of what was found)
2. **Key Findings** (bulleted, with confidence levels)
3. **Sources Used** (count and quality assessment)
4. **Gaps** (what's still unclear or under-covered)
End by asking: Are they satisfied, or do they want deeper research? \
Should we proceed to writing the final report?
**STEP 2 — After the user responds, call set_output:**
- set_output("needs_more_research", "true") — if they want more
- set_output("needs_more_research", "false") — if they're satisfied
- set_output("feedback", "What the user wants explored further, or empty string")
""",
tools=[],
)
# Node 4: Report (client-facing)
# Writes an HTML report, serves the link to the user, and answers follow-ups.
report_node = NodeSpec(
id="report",
name="Write & Deliver Report",
description="Write a cited HTML report from the findings and present it to the user",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["findings", "sources", "research_brief"],
output_keys=["delivery_status", "next_action"],
success_criteria=(
"An HTML report has been saved, the file link has been presented to the user, "
"and the user has indicated what they want to do next."
),
system_prompt="""\
Write a research report as an HTML file and present it to the user.
IMPORTANT: save_data requires TWO separate arguments: filename and data.
Call it like: save_data(filename="report.html", data="<html>...</html>")
Do NOT use _raw, do NOT nest arguments inside a JSON string.
**STEP 1 — Write and save the HTML report (tool calls, NO text to user yet):**
Build a clean HTML document. Keep the HTML concise — aim for clarity over length.
Use minimal embedded CSS (a few lines of style, not a full framework).
Report structure:
- Title & date
- Executive Summary (2-3 paragraphs)
- Key Findings (organized by theme, with [n] citation links)
- Analysis (synthesis, implications)
- Conclusion (key takeaways)
- References (numbered list with clickable URLs)
Requirements:
- Every factual claim must cite its source with [n] notation
- Be objective — present multiple viewpoints where sources disagree
- Answer the original research questions from the brief
Save the HTML:
save_data(filename="report.html", data="<html>...</html>")
Then get the clickable link:
serve_file_to_user(filename="report.html", label="Research Report")
If save_data fails, simplify and shorten the HTML, then retry.
**STEP 2 — Present the link to the user (text only, NO tool calls):**
Tell the user the report is ready and include the file:// URI from
serve_file_to_user so they can click it to open. Give a brief summary
of what the report covers. Ask if they have questions or want to continue.
**STEP 3 — After the user responds:**
- Answer any follow-up questions from the research material
- When the user is ready to move on, ask what they'd like to do next:
- Research a new topic?
- Dig deeper into the current topic?
- Then call set_output:
- set_output("delivery_status", "completed")
- set_output("next_action", "new_topic") — if they want a new topic
- set_output("next_action", "more_research") — if they want deeper research
""",
tools=[
"save_data",
"append_data",
"edit_data",
"serve_file_to_user",
"load_data",
"list_data_files",
],
)
__all__ = [
"intake_node",
"research_node",
"review_node",
"report_node",
]
-640
View File
@@ -1,640 +0,0 @@
---
name: hive-credentials
description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
license: Apache-2.0
metadata:
author: hive
version: "2.3"
type: utility
---
# Setup Credentials
Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
## When to Use
- Before running or testing an agent for the first time
- When `AgentRunner.run()` fails with "missing required credentials"
- When a user asks to configure credentials for an agent
- After building a new agent that uses tools requiring API keys
## Workflow
### Step 1: Identify the Agent
Determine which agent needs credentials. The user will either:
- Name the agent directly (e.g., "set up credentials for hubspot-agent")
- Have an agent directory open (check `exports/` for agent dirs)
- Be working on an agent in the current session
Locate the agent's directory under `exports/{agent_name}/`.
### Step 2: Detect Missing Credentials
Use the `check_missing_credentials` MCP tool to detect what the agent needs and what's already configured. This tool loads the agent, inspects its required tools and node types, maps them to credentials via `CREDENTIAL_SPECS`, and checks both the encrypted store and environment variables.
```
check_missing_credentials(agent_path="exports/{agent_name}")
```
The tool returns a JSON response:
```json
{
"agent": "exports/{agent_name}",
"missing": [
{
"credential_name": "brave_search",
"env_var": "BRAVE_SEARCH_API_KEY",
"description": "Brave Search API key for web search",
"help_url": "https://brave.com/search/api/",
"tools": ["web_search"]
}
],
"available": [
{
"credential_name": "anthropic",
"env_var": "ANTHROPIC_API_KEY",
"source": "encrypted_store"
}
],
"total_missing": 1,
"ready": false
}
```
**If `ready` is true (nothing missing):** Report all credentials as configured and skip Steps 3-5. Example:
```
All required credentials are already configured:
✓ anthropic (ANTHROPIC_API_KEY)
✓ brave_search (BRAVE_SEARCH_API_KEY)
Your agent is ready to run!
```
**If credentials are missing:** Continue to Step 3 with the `missing` list.
### Step 3: Present Auth Options for Each Missing Credential
For each missing credential, check what authentication methods are available:
```python
from aden_tools.credentials import CREDENTIAL_SPECS
spec = CREDENTIAL_SPECS.get("hubspot")
if spec:
# Determine available auth options
auth_options = []
if spec.aden_supported:
auth_options.append("aden")
if spec.direct_api_key_supported:
auth_options.append("direct")
auth_options.append("custom") # Always available
# Get setup info
setup_info = {
"env_var": spec.env_var,
"description": spec.description,
"help_url": spec.help_url,
"api_key_instructions": spec.api_key_instructions,
}
```
Present the available options using AskUserQuestion:
```
Choose how to configure HUBSPOT_ACCESS_TOKEN:
1) Aden Platform (OAuth) (Recommended)
Secure OAuth2 flow via hive.adenhq.com
- Quick setup with automatic token refresh
- No need to manage API keys manually
2) Direct API Key
Enter your own API key manually
- Requires creating a HubSpot Private App
- Full control over scopes and permissions
3) Local Credential Setup (Advanced)
Programmatic configuration for CI/CD
- For automated deployments
- Requires manual API calls
```
### Step 4: Execute Auth Flow Based on User Choice
#### Prerequisite: Ensure HIVE_CREDENTIAL_KEY Is Available
Before storing any credentials, verify `HIVE_CREDENTIAL_KEY` is set (needed to encrypt/decrypt the local store). Check both the current session and shell config:
```bash
# Check current session
printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
# Check shell config files
for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
```
- **In current session** — proceed to store credentials
- **In shell config but NOT in current session** — run `source ~/.zshrc` (or `~/.bashrc`) first, then proceed
- **Not set anywhere** — `EncryptedFileStorage` will auto-generate one. After storing, tell the user to persist it: `export HIVE_CREDENTIAL_KEY="{generated_key}"` in their shell profile
> **⚠️ IMPORTANT: After adding `HIVE_CREDENTIAL_KEY` to the user's shell config, always display:**
> ```
> ⚠️ Environment variables were added to your shell config.
> Open a NEW TERMINAL for them to take effect outside this session.
> ```
#### Option 1: Aden Platform (OAuth)
This is the recommended flow for supported integrations (HubSpot, etc.).
**How Aden OAuth Works:**
The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
1. User has an Aden account
2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
3. We just need to sync those credentials down to the local credential store
**4.1a. Check for ADEN_API_KEY**
```python
import os
aden_key = os.environ.get("ADEN_API_KEY")
```
If not set, guide user to get one from Aden (this is where they do OAuth):
```python
from aden_tools.credentials import open_browser, get_aden_setup_url
# Open browser to Aden - user will sign up and connect integrations there
url = get_aden_setup_url() # https://hive.adenhq.com
success, msg = open_browser(url)
print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
print("Once done, copy your API key and return here.")
```
Ask user to provide the ADEN_API_KEY they received.
**4.1b. Save ADEN_API_KEY to Shell Config**
With user approval, persist ADEN_API_KEY to their shell config:
```python
from aden_tools.credentials import (
detect_shell,
add_env_var_to_shell_config,
get_shell_source_command,
)
shell_type = detect_shell() # 'bash', 'zsh', or 'unknown'
# Ask user for approval before modifying shell config
# If approved:
success, config_path = add_env_var_to_shell_config(
"ADEN_API_KEY",
user_provided_key,
comment="Aden Platform (OAuth) API key"
)
if success:
source_cmd = get_shell_source_command()
print(f"Saved to {config_path}")
print(f"Run: {source_cmd}")
```
> **⚠️ IMPORTANT: After adding `ADEN_API_KEY` to the user's shell config, always display:**
> ```
> ⚠️ Environment variables were added to your shell config.
> Open a NEW TERMINAL for them to take effect outside this session.
> ```
Also save to `~/.hive/configuration.json` for the framework:
```python
import json
from pathlib import Path
config_path = Path.home() / ".hive" / "configuration.json"
config = json.loads(config_path.read_text()) if config_path.exists() else {}
config["aden"] = {
"api_key_configured": True,
"api_url": "https://api.adenhq.com"
}
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(config, indent=2))
```
**4.1c. Sync Credentials from Aden Server**
Since the user has already authorized integrations on Aden, use the one-liner factory method:
```python
from core.framework.credentials import CredentialStore
# This single call handles everything:
# - Creates encrypted local storage at ~/.hive/credentials
# - Configures Aden client from ADEN_API_KEY env var
# - Syncs all credentials from Aden server automatically
store = CredentialStore.with_aden_sync(
base_url="https://api.adenhq.com",
auto_sync=True, # Syncs on creation
)
# Check what was synced
synced = store.list_credentials()
print(f"Synced credentials: {synced}")
# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
if "hubspot" not in synced:
print("HubSpot not found in your Aden account.")
print("Please visit https://hive.adenhq.com to connect HubSpot, then try again.")
```
For more control over the sync process:
```python
from core.framework.credentials import CredentialStore
from core.framework.credentials.aden import (
AdenCredentialClient,
AdenClientConfig,
AdenSyncProvider,
)
# Create client (API key loaded from ADEN_API_KEY env var)
client = AdenCredentialClient(AdenClientConfig(
base_url="https://api.adenhq.com",
))
# Create provider and store
provider = AdenSyncProvider(client=client)
store = CredentialStore.with_encrypted_storage()
# Manual sync
synced_count = provider.sync_all(store)
print(f"Synced {synced_count} credentials from Aden")
```
**4.1d. Run Health Check**
```python
from aden_tools.credentials import check_credential_health
# Get the token from the store
cred = store.get_credential("hubspot")
token = cred.keys["access_token"].value.get_secret_value()
result = check_credential_health("hubspot", token)
if result.valid:
print("HubSpot credentials validated successfully!")
else:
print(f"Validation failed: {result.message}")
# Offer to retry the OAuth flow
```
#### Option 2: Direct API Key
For users who prefer manual API key management.
**4.2a. Show Setup Instructions**
```python
from aden_tools.credentials import CREDENTIAL_SPECS
spec = CREDENTIAL_SPECS.get("hubspot")
if spec and spec.api_key_instructions:
print(spec.api_key_instructions)
# Output:
# To get a HubSpot Private App token:
# 1. Go to HubSpot Settings > Integrations > Private Apps
# 2. Click "Create a private app"
# 3. Name your app (e.g., "Hive Agent")
# ...
if spec and spec.help_url:
print(f"More info: {spec.help_url}")
```
**4.2b. Collect API Key from User**
Use AskUserQuestion to securely collect the API key:
```
Please provide your HubSpot access token:
(This will be stored securely in ~/.hive/credentials)
```
**4.2c. Run Health Check Before Storing**
```python
from aden_tools.credentials import check_credential_health
result = check_credential_health("hubspot", user_provided_token)
if not result.valid:
print(f"Warning: {result.message}")
# Ask user if they want to:
# 1. Try a different token
# 2. Continue anyway (not recommended)
```
**4.2d. Store in Local Encrypted Store**
```python
from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
from pydantic import SecretStr
store = CredentialStore.with_encrypted_storage()
cred = CredentialObject(
id="hubspot",
name="HubSpot Access Token",
keys={
"access_token": CredentialKey(
name="access_token",
value=SecretStr(user_provided_token),
)
},
)
store.save_credential(cred)
```
**4.2e. Export to Current Session**
```bash
export HUBSPOT_ACCESS_TOKEN="the-value"
```
#### Option 3: Local Credential Setup (Advanced)
For programmatic/CI/CD setups.
**4.3a. Show Documentation**
```
For advanced credential management, you can use the CredentialStore API directly:
from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
from pydantic import SecretStr
store = CredentialStore.with_encrypted_storage()
cred = CredentialObject(
id="hubspot",
name="HubSpot Access Token",
keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
)
store.save_credential(cred)
For CI/CD environments:
- Set HIVE_CREDENTIAL_KEY for encryption
- Pre-populate ~/.hive/credentials programmatically
- Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
Documentation: See core/framework/credentials/README.md
```
### Step 5: Record Configuration Method
Track which auth method was used for each credential in `~/.hive/configuration.json`:
```python
import json
from pathlib import Path
from datetime import datetime
config_path = Path.home() / ".hive" / "configuration.json"
config = json.loads(config_path.read_text()) if config_path.exists() else {}
if "credential_methods" not in config:
config["credential_methods"] = {}
config["credential_methods"]["hubspot"] = {
"method": "aden", # or "direct" or "custom"
"configured_at": datetime.now().isoformat(),
}
config_path.write_text(json.dumps(config, indent=2))
```
### Step 6: Verify All Credentials
Use the `verify_credentials` MCP tool to confirm everything is properly configured:
```
verify_credentials(agent_path="exports/{agent_name}")
```
The tool returns:
```json
{
"agent": "exports/{agent_name}",
"ready": true,
"missing_credentials": [],
"warnings": [],
"errors": []
}
```
If `ready` is true, report success. If `missing_credentials` is non-empty, identify what failed and loop back to Step 3 for the remaining credentials.
## Health Check Reference
Health checks validate credentials by making lightweight API calls:
| Credential | Endpoint | What It Checks |
| --------------- | --------------------------------------- | --------------------------------- |
| `anthropic` | `POST /v1/messages` | API key validity |
| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity |
| `google_search` | `GET /customsearch/v1?q=test&num=1` | API key + CSE ID validity |
| `github` | `GET /user` | Token validity, user identity |
| `hubspot` | `GET /crm/v3/objects/contacts?limit=1` | Bearer token validity, CRM scopes |
| `resend` | `GET /domains` | API key validity |
```python
from aden_tools.credentials import check_credential_health, HealthCheckResult
result: HealthCheckResult = check_credential_health("hubspot", token_value)
# result.valid: bool
# result.message: str
# result.details: dict (status_code, rate_limited, etc.)
```
## Encryption Key (HIVE_CREDENTIAL_KEY)
The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
- The user MUST persist this key (e.g., in `~/.bashrc`/`~/.zshrc` or a secrets manager)
- Without this key, stored credentials cannot be decrypted
**Shell config rule:** Only TWO keys belong in shell config (`~/.zshrc`/`~/.bashrc`):
- `HIVE_CREDENTIAL_KEY` — encryption key for the credential store
- `ADEN_API_KEY` — Aden platform auth key (needed before the store can sync)
All other API keys (Brave, Google, HubSpot, etc.) must go in the encrypted store only. **Never offer to add them to shell config.**
If `HIVE_CREDENTIAL_KEY` is not set:
1. Let the store generate one
2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
3. Recommend adding it to `~/.bashrc` or their shell profile
## Security Rules
- **NEVER** log, print, or echo credential values in tool output
- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
- **NEVER** hardcode credentials in source code
- **NEVER** offer to save API keys to shell config (`~/.zshrc`/`~/.bashrc`) — the **only** keys that belong in shell config are `HIVE_CREDENTIAL_KEY` and `ADEN_API_KEY`. All other credentials (Brave, Google, HubSpot, GitHub, Resend, etc.) go in the encrypted store only.
- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
- **ALWAYS** run health checks before storing credentials (when possible)
- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
## Credential Sources Reference
All credential specs are defined in `tools/src/aden_tools/credentials/`:
| File | Category | Credentials | Aden Supported |
| ----------------- | ------------- | --------------------------------------------- | -------------- |
| `llm.py` | LLM Providers | `anthropic` | No |
| `search.py` | Search Tools | `brave_search`, `google_search`, `google_cse` | No |
| `email.py` | Email | `resend` | No |
| `integrations.py` | Integrations | `github`, `hubspot`, `google_calendar_oauth` | No / Yes |
**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
Add them to `llm.py` as needed.
To check what's registered:
```python
from aden_tools.credentials import CREDENTIAL_SPECS
for name, spec in CREDENTIAL_SPECS.items():
print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
```
## Migration: CredentialManager → CredentialStore
**CredentialManager is deprecated.** Use CredentialStore instead.
| Old (Deprecated) | New (Recommended) |
| ----------------------------------------- | -------------------------------------------------------------------- |
| `CredentialManager()` | `CredentialStore.with_encrypted_storage()` |
| `creds.get("hubspot")` | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
| `creds.validate_for_tools(tools)` | Use `store.is_available(cred_id)` per credential |
| `creds.get_auth_options("hubspot")` | Check `CREDENTIAL_SPECS["hubspot"].aden_supported` |
| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly |
**Why migrate?**
- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
- **CredentialStoreAdapter** exists for backward compatibility during migration
```python
# Old way (deprecated)
from aden_tools.credentials import CredentialManager
creds = CredentialManager()
token = creds.get("hubspot")
# New way (recommended)
from core.framework.credentials import CredentialStore
store = CredentialStore.with_encrypted_storage()
token = store.get("hubspot")
# With Aden sync (recommended for OAuth integrations)
store = CredentialStore.with_aden_sync()
token = store.get_key("hubspot", "access_token")
```
## Example Session
```
User: /hive-credentials for my research-agent
Agent: Let me check what credentials your research-agent needs.
[Calls check_missing_credentials(agent_path="exports/research-agent")]
→ Returns:
available: anthropic (encrypted_store), brave_search (encrypted_store)
missing: google_search (GOOGLE_API_KEY), google_cse (GOOGLE_CSE_ID)
ready: false
Agent: 2 of 4 required credentials are already configured. Only Google Custom
Search needs setup (2 values).
--- Setting up Google Custom Search (google_search + google_cse) ---
This requires two values that work together.
[Checks HIVE_CREDENTIAL_KEY before storing]
$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
set
First, the Google API Key:
1. Go to https://console.cloud.google.com/apis/credentials
2. Create a new project (or select an existing one)
3. Enable the "Custom Search API" from the API Library
4. Go to Credentials > Create Credentials > API Key
5. Copy the generated API key
[AskUserQuestion: "Please provide your Google API key:"]
[User provides key]
Now, the Custom Search Engine ID:
1. Go to https://programmablesearchengine.google.com/controlpanel/all
2. Click "Add" to create a new search engine
3. Under "What to search", select "Search the entire web"
4. Give your search engine a name
5. Click "Create"
6. Copy the Search Engine ID (cx value)
[AskUserQuestion: "Please provide your Google CSE ID:"]
[User provides ID]
[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
[Stores both in local encrypted store, exports to env]
✓ Google Custom Search credentials valid
[Calls verify_credentials(agent_path="exports/research-agent")]
→ Returns: ready: true, missing_credentials: []
All credentials are now configured:
✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
┌─────────────────────────────────────────────────────────────────────────────┐
│ ✅ CREDENTIALS CONFIGURED │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ OPEN A NEW TERMINAL before running commands below. │
│ Environment variables were saved to your shell config but │
│ only take effect in new terminal sessions. │
│ │
│ NEXT STEPS: │
│ │
│ 1. RUN YOUR AGENT: │
│ │
│ hive tui │
│ │
│ 2. IF YOU ENCOUNTER ISSUES, USE THE DEBUGGER: │
│ │
│ /hive-debugger │
│ │
│ The debugger analyzes runtime logs, identifies retry loops, tool │
│ failures, stalled execution, and provides actionable fix suggestions. │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
```
File diff suppressed because it is too large Load Diff
-385
View File
@@ -1,385 +0,0 @@
---
name: hive-patterns
description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: reference
part_of: hive
---
# Building Agents - Patterns & Best Practices
Design patterns, examples, and best practices for building robust goal-driven agents.
**Prerequisites:** Complete agent structure using `hive-create`.
## Practical Example: Hybrid Workflow
How to build a node using both direct file writes and optional MCP validation:
```python
# 1. WRITE TO FILE FIRST (Primary - makes it visible)
node_code = '''
search_node = NodeSpec(
id="search-web",
node_type="event_loop",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
tools=["web_search"],
)
'''
Edit(
file_path="exports/research_agent/nodes/__init__.py",
old_string="# Nodes will be added here",
new_string=node_code
)
# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
validation = mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "python tutorials"}',
mock_llm_response='{"search_results": [...mock results...]}'
)
```
**User experience:**
- Immediately sees node in their editor (from step 1)
- Gets validation feedback (from step 2)
- Can edit the file directly if needed
## Multi-Turn Interaction Patterns
For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
### Client-Facing Nodes
A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
```python
# Client-facing node with STEP 1/STEP 2 prompt pattern
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
node_type="event_loop",
client_facing=True,
input_keys=["topic"],
output_keys=["research_brief"],
system_prompt="""\
You are an intake specialist.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If it's vague, ask 1-2 clarifying questions
3. If it's clear, confirm your understanding
**STEP 2 — After the user confirms, call set_output:**
- set_output("research_brief", "Clear description of what to research")
""",
)
# Internal node runs without user interaction
research_node = NodeSpec(
id="research",
name="Research",
description="Search and analyze sources",
node_type="event_loop",
input_keys=["research_brief"],
output_keys=["findings", "sources"],
system_prompt="Research the topic using web_search and web_scrape...",
tools=["web_search", "web_scrape", "load_data", "save_data"],
)
```
**How it works:**
- Client-facing nodes stream LLM text to the user and block for input after each response
- User input is injected via `node.inject_event(text)`
- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
- Internal nodes (non-client-facing) run their entire loop without blocking
- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
### When to Use client_facing
| Scenario | client_facing | Why |
| ----------------------------------- | :-----------: | ---------------------- |
| Gathering user requirements | Yes | Need user input |
| Human review/approval checkpoint | Yes | Need human decision |
| Data processing (scanning, scoring) | No | Runs autonomously |
| Report generation | No | No user input needed |
| Final confirmation before action | Yes | Need explicit approval |
> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
## Edge-Based Routing and Feedback Loops
### Conditional Edge Routing
Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
```python
# Node with mutually exclusive outputs
review_node = NodeSpec(
id="review",
name="Review",
node_type="event_loop",
client_facing=True,
output_keys=["approved_contacts", "redo_extraction"],
nullable_output_keys=["approved_contacts", "redo_extraction"],
max_node_visits=3,
system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
)
# Forward edge (positive priority, evaluated first)
EdgeSpec(
id="review-to-campaign",
source="review",
target="campaign-builder",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('approved_contacts') is not None",
priority=1,
)
# Feedback edge (negative priority, evaluated after forward edges)
EdgeSpec(
id="review-feedback",
source="review",
target="extractor",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('redo_extraction') is not None",
priority=-1,
)
```
**Key concepts:**
- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
### Routing Decision Table
| Pattern | Old Approach | New Approach |
| ---------------------- | ----------------------- | --------------------------------------------- |
| Conditional branching | `router` node | Conditional edges with `condition_expr` |
| Binary approve/reject | `pause_nodes` + resume | `client_facing=True` + `nullable_output_keys` |
| Loop-back on rejection | Manual entry_points | Feedback edge with `priority=-1` |
| Multi-way routing | Router with routes dict | Multiple conditional edges with priorities |
## Judge Patterns
**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
- Output rollback logic
- `_user_has_responded` flags
- Premature set_output rejection
- Interaction protocol injection into system prompts
Judges control when an event_loop node's loop exits. Choose based on validation needs.
### Implicit Judge (Default)
When no judge is configured, the implicit judge ACCEPTs when:
- The LLM finishes its response with no tool calls
- All required output keys have been set via `set_output`
Best for simple nodes where "all outputs set" is sufficient validation.
### SchemaJudge
Validates outputs against a Pydantic model. Use when you need structural validation.
```python
from pydantic import BaseModel
class ScannerOutput(BaseModel):
github_users: list[dict] # Must be a list of user objects
class SchemaJudge:
def __init__(self, output_model: type[BaseModel]):
self._model = output_model
async def evaluate(self, context: dict) -> JudgeVerdict:
missing = context.get("missing_keys", [])
if missing:
return JudgeVerdict(
action="RETRY",
feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
)
try:
self._model.model_validate(context["output_accumulator"])
return JudgeVerdict(action="ACCEPT")
except ValidationError as e:
return JudgeVerdict(action="RETRY", feedback=str(e))
```
### When to Use Which Judge
| Judge | Use When | Example |
| --------------- | ------------------------------------- | ---------------------- |
| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
| SchemaJudge | Need structural validation of outputs | API response parsing |
| Custom | Domain-specific validation logic | Score must be 0.0-1.0 |
## Fan-Out / Fan-In (Parallel Execution)
Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
```python
# Scanner fans out to Profiler and Scorer in parallel
EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
condition=EdgeCondition.ON_SUCCESS)
EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
condition=EdgeCondition.ON_SUCCESS)
# Both fan in to Extractor
EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
condition=EdgeCondition.ON_SUCCESS)
EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
condition=EdgeCondition.ON_SUCCESS)
```
**Requirements:**
- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
- Only one parallel branch may contain a `client_facing` node
- Fan-in node receives outputs from all completed branches in shared memory
## Context Management Patterns
### Tiered Compaction
EventLoopNode automatically manages context window usage with tiered compaction:
1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
2. **Normal compaction** — LLM summarizes older messages
3. **Aggressive compaction** — Keeps only recent messages + summary
4. **Emergency** — Hard reset with tool history preservation
### Spillover Pattern
The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
For explicit data management, use the data tools (real MCP tools, not synthetic):
```python
# save_data, load_data, list_data_files, serve_file_to_user are real MCP tools
# data_dir is auto-injected by the framework — the LLM never sees it
# Saving large results
save_data(filename="sources.json", data=large_json_string)
# Reading with pagination (line-based offset/limit)
load_data(filename="sources.json", offset=0, limit=50)
# Listing available files
list_data_files()
# Serving a file to the user as a clickable link
serve_file_to_user(filename="report.html", label="Research Report")
```
Add data tools to nodes that handle large tool results:
```python
research_node = NodeSpec(
...
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
)
```
`data_dir` is a framework context parameter — auto-injected at call time. `GraphExecutor.execute()` sets it per-execution via `ToolRegistry.set_execution_context(data_dir=...)` (using `contextvars` for concurrency safety), ensuring it matches the session-scoped spillover directory.
## Anti-Patterns
### What NOT to Do
- **Don't rely on `export_graph`** — Write files immediately, not at end
- **Don't hide code in session** — Write to files as components are approved
- **Don't wait to write files** — Agent visible from first step
- **Don't batch everything** — Write incrementally, one component at a time
- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
### Fewer, Richer Nodes
A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
| Bad (8 thin nodes) | Good (4 rich nodes) |
| ------------------- | ----------------------------------- |
| parse-query | intake (client-facing) |
| search-sources | research (search + fetch + analyze) |
| fetch-content | review (client-facing) |
| evaluate-sources | report (write + deliver) |
| synthesize-findings | |
| write-report | |
| quality-check | |
| save-report | |
**Why fewer nodes are better:**
- The LLM retains full context of its work within a single node
- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
- Fewer edges means simpler graph and fewer failure points
- Data tools (`save_data`/`load_data`) handle context window limits within a single node
### MCP Tools - Correct Usage
**MCP tools OK for:**
- `test_node` — Validate node configuration with mock inputs
- `validate_graph` — Check graph structure
- `configure_loop` — Set event loop parameters
- `create_session` — Track session state for bookkeeping
**Just don't:** Use MCP as the primary construction method or rely on export_graph
## Error Handling Patterns
### Graceful Failure with Fallback
```python
edges = [
# Success path
EdgeSpec(id="api-success", source="api-call", target="process-results",
condition=EdgeCondition.ON_SUCCESS),
# Fallback on failure
EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
condition=EdgeCondition.ON_FAILURE, priority=1),
# Report if fallback also fails
EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
condition=EdgeCondition.ON_FAILURE, priority=1),
]
```
## Handoff to Testing
When agent is complete, transition to testing phase:
### Pre-Testing Checklist
- [ ] Agent structure validates: `uv run python -m agent_name validate`
- [ ] All nodes defined in nodes/**init**.py
- [ ] All edges connect valid nodes with correct priorities
- [ ] Feedback edge targets have `max_node_visits > 1`
- [ ] Client-facing nodes have meaningful system prompts
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
## Related Skills
- **hive-concepts** — Fundamental concepts (node types, edges, event loop architecture)
- **hive-create** — Step-by-step building process
- **hive-test** — Test and validate agents
- **hive** — Complete workflow orchestrator
---
**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
-940
View File
@@ -1,940 +0,0 @@
---
name: hive-test
description: Iterative agent testing with session recovery. Execute, analyze, fix, resume from checkpoints. Use when testing an agent, debugging test failures, or verifying fixes without re-running from scratch.
---
# Agent Testing
Test agents iteratively: execute, analyze failures, fix, resume from checkpoint, repeat.
## When to Use
- Testing a newly built agent against its goal
- Debugging a failing agent iteratively
- Verifying fixes without re-running expensive early nodes
- Running final regression tests before deployment
## Prerequisites
1. Agent package at `exports/{agent_name}/` (built with `/hive-create`)
2. Credentials configured (`/hive-credentials`)
3. `ANTHROPIC_API_KEY` set (or appropriate LLM provider key)
**Path distinction** (critical — don't confuse these):
- `exports/{agent_name}/` — agent source code (edit here)
- `~/.hive/agents/{agent_name}/` — runtime data: sessions, checkpoints, logs (read here)
---
## The Iterative Test Loop
This is the core workflow. Don't re-run the entire agent when a late node fails — analyze, fix, and resume from the last clean checkpoint.
```
┌──────────────────────────────────────┐
│ PHASE 1: Generate Test Scenarios │
│ Goal → synthetic test inputs + tests │
└──────────────┬───────────────────────┘
┌──────────────────────────────────────┐
│ PHASE 2: Execute │◄────────────────┐
│ Run agent (CLI or pytest) │ │
└──────────────┬───────────────────────┘ │
↓ │
Pass? ──yes──► PHASE 6: Final Verification │
│ │
no │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 3: Analyze │ │
│ Session + runtime logs + checkpoints │ │
└──────────────┬───────────────────────┘ │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 4: Fix │ │
│ Prompt / code / graph / goal │ │
└──────────────┬───────────────────────┘ │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 5: Recover & Resume │─────────────────┘
│ Checkpoint resume OR fresh re-run │
└──────────────────────────────────────┘
```
---
### Phase 1: Generate Test Scenarios
Create synthetic tests from the agent's goal, constraints, and success criteria.
#### Step 1a: Read the goal
```python
# Read goal from agent.py
Read(file_path="exports/{agent_name}/agent.py")
# Extract the Goal definition and convert to JSON string
```
#### Step 1b: Get test guidelines
```python
# Get constraint test guidelines
generate_constraint_tests(
goal_id="your-goal-id",
goal_json='{"id": "...", "constraints": [...]}',
agent_path="exports/{agent_name}"
)
# Get success criteria test guidelines
generate_success_tests(
goal_id="your-goal-id",
goal_json='{"id": "...", "success_criteria": [...]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/{agent_name}"
)
```
These return `file_header`, `test_template`, `constraints_formatted`/`success_criteria_formatted`, and `test_guidelines`. They do NOT generate test code — you write the tests.
#### Step 1c: Write tests
```python
Write(
file_path=result["output_file"],
content=result["file_header"] + "\n\n" + your_test_code
)
```
#### Test writing rules
- Every test MUST be `async` with `@pytest.mark.asyncio`
- Every test MUST accept `runner, auto_responder, mock_mode` fixtures
- Use `await auto_responder.start()` before running, `await auto_responder.stop()` in `finally`
- Use `await runner.run(input_dict)` — this goes through AgentRunner → AgentRuntime → ExecutionStream
- Access output via `result.output.get("key")` — NEVER `result.output["key"]`
- `result.success=True` means no exception, NOT goal achieved — always check output
- Write 8-15 tests total, not 30+
- Each real test costs ~3 seconds + LLM tokens
- NEVER use `default_agent.run()` — it bypasses the runtime (no sessions, no logs, client-facing nodes hang)
#### Step 1d: Check existing tests
Before generating, check if tests already exist:
```python
list_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
---
### Phase 2: Execute
Two execution paths, use the right one for your situation.
#### Iterative debugging (for complex agents)
Run the agent via CLI. This creates sessions with checkpoints at `~/.hive/agents/{agent_name}/sessions/`:
```bash
uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
```
Sessions and checkpoints are saved automatically.
**Client-facing nodes**: Agents with `client_facing=True` nodes (interactive conversation) work in headless mode when run from a real terminal — the agent streams output to stdout and reads user input from stdin via a `>>> ` prompt. In non-interactive shells (like Claude Code's Bash tool), client-facing nodes will hang because there is no stdin. For testing interactive agents from Claude Code, use `run_tests` with mock mode or have the user run the agent manually in their terminal.
#### Automated regression (for CI or final verification)
Use the `run_tests` MCP tool to run all pytest tests:
```python
run_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
Returns structured results:
```json
{
"overall_passed": false,
"summary": {"total": 12, "passed": 10, "failed": 2, "pass_rate": "83.3%"},
"test_results": [{"test_name": "test_success_source_diversity", "status": "failed"}],
"failures": [{"test_name": "test_success_source_diversity", "details": "..."}]
}
```
**Options:**
```python
# Run only constraint tests
run_tests(goal_id, agent_path, test_types='["constraint"]')
# Stop on first failure
run_tests(goal_id, agent_path, fail_fast=True)
# Parallel execution
run_tests(goal_id, agent_path, parallel=4)
```
**Note:** `run_tests` uses `AgentRunner` with `tmp_path` storage, so sessions are isolated per test run. For checkpoint-based recovery with persistent sessions, use CLI execution. Use `run_tests` for quick regression checks and final verification.
---
### Phase 3: Analyze Failures
When a test fails, drill down systematically. Don't guess — use the tools.
#### Step 3a: Get error category
```python
debug_test(
goal_id="your-goal-id",
test_name="test_success_source_diversity",
agent_path="exports/{agent_name}"
)
```
Returns error category (`IMPLEMENTATION_ERROR`, `ASSERTION_FAILURE`, `TIMEOUT`, `IMPORT_ERROR`, `API_ERROR`) plus full traceback and suggestions.
#### Step 3b: Find the failed session
```python
list_agent_sessions(
agent_work_dir="~/.hive/agents/{agent_name}",
status="failed",
limit=5
)
```
Returns session list with IDs, timestamps, current_node (where it failed), execution_quality.
#### Step 3c: Inspect session state
```python
get_agent_session_state(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345"
)
```
Returns execution path, which node was current, step count, timestamps — but excludes memory values (to avoid context bloat). Shows `memory_keys` and `memory_size` instead.
#### Step 3d: Examine runtime logs (L2/L3)
```python
# L2: Per-node success/failure, retry counts
query_runtime_log_details(
agent_work_dir="~/.hive/agents/{agent_name}",
run_id="session_20260209_143022_abc12345",
needs_attention_only=True
)
# L3: Exact LLM responses, tool call inputs/outputs
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/{agent_name}",
run_id="session_20260209_143022_abc12345",
node_id="research"
)
```
#### Step 3e: Inspect memory data
```python
# See what data a node actually produced
get_agent_session_memory(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
key="research_results"
)
```
#### Step 3f: Find recovery points
```python
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
is_clean="true"
)
```
Returns checkpoint summaries with IDs, types (`node_start`, `node_complete`), which node, and `is_clean` flag. Clean checkpoints are safe resume points.
#### Step 3g: Compare checkpoints (optional)
To understand what changed between two points in execution:
```python
compare_agent_checkpoints(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
checkpoint_id_before="cp_node_complete_research_143030",
checkpoint_id_after="cp_node_complete_review_143115"
)
```
Returns memory diff (added/removed/changed keys) and execution path diff.
---
### Phase 4: Fix Based on Root Cause
Use the analysis from Phase 3 to determine what to fix and where.
| Root Cause | What to Fix | Where to Edit |
|------------|------------|---------------|
| **Prompt issue** — LLM produces wrong output format, misses instructions | Node `system_prompt` | `exports/{agent}/nodes/__init__.py` |
| **Code bug** — TypeError, KeyError, logic error in Python | Agent code | `exports/{agent}/agent.py`, `nodes/__init__.py` |
| **Graph issue** — wrong routing, missing edge, bad condition_expr | Edges, node config | `exports/{agent}/agent.py` |
| **Tool issue** — MCP tool fails, wrong config, missing credential | Tool config | `exports/{agent}/mcp_servers.json`, `/hive-credentials` |
| **Goal issue** — success criteria too strict/vague, wrong constraints | Goal definition | `exports/{agent}/agent.py` (goal section) |
| **Test issue** — test expectations don't match actual agent behavior | Test code | `exports/{agent}/tests/test_*.py` |
#### Fix strategies by error category
**IMPLEMENTATION_ERROR** (TypeError, AttributeError, KeyError):
```python
# Read the failing code
Read(file_path="exports/{agent_name}/nodes/__init__.py")
# Fix the bug
Edit(
file_path="exports/{agent_name}/nodes/__init__.py",
old_string="results.get('videos')",
new_string="(results or {}).get('videos', [])"
)
```
**ASSERTION_FAILURE** (test assertions fail but agent ran successfully):
- Check if the agent's output is actually wrong → fix the prompt
- Check if the test's expectations are unrealistic → fix the test
- Use `get_agent_session_memory` to see what the agent actually produced
**TIMEOUT / STALL** (agent runs too long):
- Check `node_visit_counts` for feedback loops hitting max_node_visits
- Check L3 logs for tool calls that hang
- Reduce `max_iterations` in loop_config or fix the prompt to converge faster
**API_ERROR** (connection, rate limit, auth):
- Verify credentials with `/hive-credentials`
- Check MCP server configuration
---
### Phase 5: Recover & Resume
After fixing the agent, decide whether to resume or re-run.
#### When to resume from checkpoint
Resume when ALL of these are true:
- The fix is to a node that comes AFTER existing clean checkpoints
- Clean checkpoints exist (from a CLI execution with checkpointing)
- The early nodes are expensive (web scraping, API calls, long LLM chains)
```bash
# Resume from the last clean checkpoint before the failing node
uv run hive run exports/{agent_name} \
--resume-session session_20260209_143022_abc12345 \
--checkpoint cp_node_complete_research_143030
```
This skips all nodes before the checkpoint and only re-runs the fixed node onward.
#### When to re-run from scratch
Re-run when ANY of these are true:
- The fix is to the entry node or an early node
- No checkpoints exist (e.g., agent was run via `run_tests`)
- The agent is fast (2-3 nodes, completes in seconds)
- You changed the graph structure (added/removed nodes/edges)
```bash
uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
```
#### Inspecting a checkpoint before resuming
```python
get_agent_checkpoint(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
checkpoint_id="cp_node_complete_research_143030"
)
```
Returns the full checkpoint: shared_memory snapshot, execution_path, current_node, next_node, is_clean.
#### Loop back to Phase 2
After resuming or re-running, check if the fix worked. If not, go back to Phase 3.
---
### Phase 6: Final Verification
Once the iterative fix loop converges (the agent produces correct output), run the full automated test suite:
```python
run_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
All tests should pass. If not, repeat the loop for remaining failures.
---
## Credential Requirements
**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).
### Prerequisites
Before running agent tests, you MUST collect ALL required credentials from the user.
**Step 1: LLM API Key (always required)**
```bash
export ANTHROPIC_API_KEY="your-key-here"
```
**Step 2: Tool-specific credentials (depends on agent's tools)**
Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
```python
from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
creds = CredentialManager()
# Determine which tools the agent uses (from agent.json or mcp_servers.json)
agent_tools = [...] # e.g., ["hubspot_search_contacts", "web_search", ...]
# Find all missing credentials for those tools
missing = creds.get_missing_for_tools(agent_tools)
```
Common tool credentials:
| Tool | Env Var | Help URL |
|------|---------|----------|
| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
**Why ALL credentials are required:**
- Tests need to execute the agent's LLM nodes to validate behavior
- Tools with missing credentials will return error dicts instead of real data
- Mock mode bypasses everything, providing no confidence in real-world performance
### Mock Mode Limitations
Mock mode (`--mock` flag or `MOCK_MODE=1`) is **ONLY for structure validation**:
- Validates graph structure (nodes, edges, connections)
- Validates that `AgentRunner.load()` succeeds and the agent is importable
- Does NOT execute event_loop agents — MockLLMProvider never calls `set_output`, so event_loop nodes loop forever
- Does NOT test LLM reasoning, content quality, or constraint validation
- Does NOT test real API integrations or tool use
**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials.
### Enforcing Credentials in Tests
When writing tests, **ALWAYS include credential checks**:
```python
import os
import pytest
from aden_tools.credentials import CredentialManager
pytestmark = pytest.mark.skipif(
not CredentialManager().is_available("anthropic") and not os.environ.get("MOCK_MODE"),
reason="API key required for real testing. Set ANTHROPIC_API_KEY or use MOCK_MODE=1."
)
@pytest.fixture(scope="session", autouse=True)
def check_credentials():
"""Ensure ALL required credentials are set for real testing."""
creds = CredentialManager()
mock_mode = os.environ.get("MOCK_MODE")
if not creds.is_available("anthropic"):
if mock_mode:
print("\nRunning in MOCK MODE - structure validation only")
else:
pytest.fail(
"\nANTHROPIC_API_KEY not set!\n"
"Set API key: export ANTHROPIC_API_KEY='your-key-here'\n"
"Or run structure validation: MOCK_MODE=1 pytest exports/{agent}/tests/"
)
if not mock_mode:
agent_tools = [] # Update per agent
missing = creds.get_missing_for_tools(agent_tools)
if missing:
lines = ["\nMissing tool credentials!"]
for name in missing:
spec = creds.specs.get(name)
if spec:
lines.append(f" {spec.env_var} - {spec.description}")
pytest.fail("\n".join(lines))
```
### User Communication
When the user asks to test an agent, **ALWAYS check for ALL credentials first**:
1. **Identify the agent's tools** from `mcp_servers.json`
2. **Check ALL required credentials** using `CredentialManager`
3. **Ask the user to provide any missing credentials** before proceeding
4. Collect ALL missing credentials in a single prompt — not one at a time
---
## Safe Test Patterns
### OutputCleaner
The framework automatically validates and cleans node outputs using a fast LLM at edge traversal time. Tests should still use safe patterns because OutputCleaner may not catch all issues.
### Safe Access (REQUIRED)
```python
# UNSAFE - will crash on missing keys
approval = result.output["approval_decision"]
category = result.output["analysis"]["category"]
# SAFE - use .get() with defaults
output = result.output or {}
approval = output.get("approval_decision", "UNKNOWN")
# SAFE - type check before operations
analysis = output.get("analysis", {})
if isinstance(analysis, dict):
category = analysis.get("category", "unknown")
# SAFE - handle JSON parsing trap (LLM response as string)
import json
recommendation = output.get("recommendation", "{}")
if isinstance(recommendation, str):
try:
parsed = json.loads(recommendation)
if isinstance(parsed, dict):
approval = parsed.get("approval_decision", "UNKNOWN")
except json.JSONDecodeError:
approval = "UNKNOWN"
elif isinstance(recommendation, dict):
approval = recommendation.get("approval_decision", "UNKNOWN")
# SAFE - type check before iteration
items = output.get("items", [])
if isinstance(items, list):
for item in items:
...
```
### Helper Functions for conftest.py
```python
import json
import re
def _parse_json_from_output(result, key):
"""Parse JSON from agent output (framework may store full LLM response as string)."""
response_text = result.output.get(key, "")
json_text = re.sub(r'```json\s*|\s*```', '', response_text).strip()
try:
return json.loads(json_text)
except (json.JSONDecodeError, AttributeError, TypeError):
return result.output.get(key)
def safe_get_nested(result, key_path, default=None):
"""Safely get nested value from result.output."""
output = result.output or {}
current = output
for key in key_path:
if isinstance(current, dict):
current = current.get(key)
elif isinstance(current, str):
try:
json_text = re.sub(r'```json\s*|\s*```', '', current).strip()
parsed = json.loads(json_text)
if isinstance(parsed, dict):
current = parsed.get(key)
else:
return default
except json.JSONDecodeError:
return default
else:
return default
return current if current is not None else default
# Make available in tests
pytest.parse_json_from_output = _parse_json_from_output
pytest.safe_get_nested = safe_get_nested
```
### ExecutionResult Fields
**`result.success=True` means NO exception, NOT goal achieved**
```python
# WRONG
assert result.success
# RIGHT
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
approval = output.get("approval_decision")
assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
```
All fields:
- `success: bool` — Completed without exception (NOT goal achieved!)
- `output: dict` — Complete memory snapshot (may contain raw strings)
- `error: str | None` — Error message if failed
- `steps_executed: int` — Number of nodes executed
- `total_tokens: int` — Cumulative token usage
- `total_latency_ms: int` — Total execution time
- `path: list[str]` — Node IDs traversed (may repeat in feedback loops)
- `paused_at: str | None` — Node ID if paused
- `session_state: dict` — State for resuming
- `node_visit_counts: dict[str, int]` — Visit counts per node (feedback loop testing)
- `execution_quality: str` — "clean", "degraded", or "failed"
### Test Count Guidance
**Write 8-15 tests, not 30+**
- 2-3 tests per success criterion
- 1 happy path test
- 1 boundary/edge case test
- 1 error handling test (optional)
Each real test costs ~3 seconds + LLM tokens. 12 tests = ~36 seconds, $0.12.
---
## Test Patterns
### Happy Path
```python
@pytest.mark.asyncio
async def test_happy_path(runner, auto_responder, mock_mode):
"""Test normal successful execution."""
await auto_responder.start()
try:
result = await runner.run({"query": "python tutorials"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
assert output.get("report"), "No report produced"
```
### Boundary Condition
```python
@pytest.mark.asyncio
async def test_minimum_sources(runner, auto_responder, mock_mode):
"""Test at minimum source threshold."""
await auto_responder.start()
try:
result = await runner.run({"query": "niche topic"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
sources = output.get("sources", [])
if isinstance(sources, list):
assert len(sources) >= 3, f"Expected >= 3 sources, got {len(sources)}"
```
### Error Handling
```python
@pytest.mark.asyncio
async def test_empty_input(runner, auto_responder, mock_mode):
"""Test graceful handling of empty input."""
await auto_responder.start()
try:
result = await runner.run({"query": ""})
finally:
await auto_responder.stop()
# Agent should either fail gracefully or produce an error message
output = result.output or {}
assert not result.success or output.get("error"), "Should handle empty input"
```
### Feedback Loop
```python
@pytest.mark.asyncio
async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
"""Test that feedback loops don't run forever."""
await auto_responder.start()
try:
result = await runner.run({"query": "test"})
finally:
await auto_responder.stop()
visits = result.node_visit_counts or {}
for node_id, count in visits.items():
assert count <= 5, f"Node {node_id} visited {count} times — possible infinite loop"
```
---
## MCP Tool Reference
### Phase 1: Test Generation
```python
# Check existing tests
list_tests(goal_id, agent_path)
# Get constraint test guidelines (returns templates, NOT generated tests)
generate_constraint_tests(goal_id, goal_json, agent_path)
# Returns: output_file, file_header, test_template, constraints_formatted, test_guidelines
# Get success criteria test guidelines
generate_success_tests(goal_id, goal_json, node_names, tool_names, agent_path)
# Returns: output_file, file_header, test_template, success_criteria_formatted, test_guidelines
```
### Phase 2: Execution
```python
# Automated regression (no checkpoints, fresh runs)
run_tests(goal_id, agent_path, test_types='["all"]', parallel=-1, fail_fast=False)
# Run only specific test types
run_tests(goal_id, agent_path, test_types='["constraint"]')
run_tests(goal_id, agent_path, test_types='["success"]')
```
```bash
# Iterative debugging with checkpoints (via CLI)
uv run hive run exports/{agent_name} --input '{"query": "test"}'
```
### Phase 3: Analysis
```python
# Debug a specific failed test
debug_test(goal_id, test_name, agent_path)
# Find failed sessions
list_agent_sessions(agent_work_dir, status="failed", limit=5)
# Inspect session state (excludes memory values)
get_agent_session_state(agent_work_dir, session_id)
# Inspect memory data
get_agent_session_memory(agent_work_dir, session_id, key="research_results")
# Runtime logs: L1 summaries
query_runtime_logs(agent_work_dir, status="needs_attention")
# Runtime logs: L2 per-node details
query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
# Runtime logs: L3 tool/LLM raw data
query_runtime_log_raw(agent_work_dir, run_id, node_id="research")
# Find clean checkpoints
list_agent_checkpoints(agent_work_dir, session_id, is_clean="true")
# Compare checkpoints (memory diff)
compare_agent_checkpoints(agent_work_dir, session_id, cp_before, cp_after)
```
### Phase 5: Recovery
```python
# Inspect checkpoint before resuming
get_agent_checkpoint(agent_work_dir, session_id, checkpoint_id)
# Empty checkpoint_id = latest checkpoint
```
```bash
# Resume from checkpoint via CLI (headless)
uv run hive run exports/{agent_name} \
--resume-session {session_id} --checkpoint {checkpoint_id}
```
---
## Anti-Patterns
| Don't | Do Instead |
|-------|-----------|
| Use `default_agent.run()` in tests | Use `runner.run()` with `auto_responder` fixtures (goes through AgentRuntime) |
| Re-run entire agent when a late node fails | Resume from last clean checkpoint |
| Treat `result.success` as goal achieved | Check `result.output` for actual criteria |
| Access `result.output["key"]` directly | Use `result.output.get("key")` |
| Fix random things hoping tests pass | Analyze L2/L3 logs to find root cause first |
| Write 30+ tests | Write 8-15 focused tests |
| Skip credential check | Use `/hive-credentials` before testing |
| Confuse `exports/` with `~/.hive/agents/` | Code in `exports/`, runtime data in `~/.hive/` |
| Use `run_tests` for iterative debugging | Use headless CLI with checkpoints for iterative debugging |
| Use headless CLI for final regression | Use `run_tests` for automated regression |
| Use `--tui` from Claude Code | Use headless `run` command — TUI hangs in non-interactive shells |
| Test client-facing nodes from Claude Code | Use mock mode, or have the user run the agent in their terminal |
| Run tests without reading goal first | Always understand the goal before writing tests |
| Skip Phase 3 analysis and guess | Use session + log tools to identify root cause |
---
## Example Walkthrough: Deep Research Agent
A complete iteration showing the test loop for an agent with nodes: `intake → research → review → report`.
### Phase 1: Generate tests
```python
# Read the goal
Read(file_path="exports/deep_research_agent/agent.py")
# Get success criteria test guidelines
result = generate_success_tests(
goal_id="rigorous-interactive-research",
goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "target": ">=5"}, {"id": "citation-coverage", "target": "100%"}, {"id": "report-completeness", "target": "90%"}]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/deep_research_agent"
)
# Write tests
Write(
file_path=result["output_file"],
content=result["file_header"] + "\n\n" + test_code
)
```
### Phase 2: First execution
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
Result: `test_success_source_diversity` fails — agent only found 2 sources instead of 5.
### Phase 3: Analyze
```python
# Debug the failing test
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_source_diversity",
agent_path="exports/deep_research_agent"
)
# → ASSERTION_FAILURE: Expected >= 5 sources, got 2
# Find the session
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_150000_abc12345
# See what the research node produced
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
key="research_results"
)
# → Only 2 web_search calls made, each returned 1 source
# Check the LLM's behavior in the research node
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/deep_research_agent",
run_id="session_20260209_150000_abc12345",
node_id="research"
)
# → LLM called web_search only twice, then called set_output
```
Root cause: The research node's prompt doesn't tell the LLM to search for at least 5 diverse sources. It stops after the first couple of searches.
### Phase 4: Fix the prompt
```python
Read(file_path="exports/deep_research_agent/nodes/__init__.py")
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Search for information on the user\'s topic."',
new_string='system_prompt="Search for information on the user\'s topic. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries to ensure source diversity. Do not stop searching until you have at least 5 distinct sources."'
)
```
### Phase 5: Resume from checkpoint
For this example, the fix is to the `research` node. If we had run via CLI with checkpointing, we could resume from the checkpoint after `intake` to skip re-running intake:
```bash
# Check if clean checkpoint exists after intake
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
is_clean="true"
)
# → cp_node_complete_intake_150005
# Resume from after intake, re-run research with fixed prompt
uv run hive run exports/deep_research_agent \
--resume-session session_20260209_150000_abc12345 \
--checkpoint cp_node_complete_intake_150005
```
Or for this simple case (intake is fast), just re-run:
```bash
uv run hive run exports/deep_research_agent --input '{"topic": "test"}'
```
### Phase 6: Final verification
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent"
)
# → All 12 tests pass
```
---
## Test File Structure
```
exports/{agent_name}/
├── agent.py ← Agent to test (goal, nodes, edges)
├── nodes/__init__.py ← Node implementations (prompts, config)
├── config.py ← Agent configuration
├── mcp_servers.json ← Tool server config
└── tests/
├── conftest.py ← Shared fixtures + safe access helpers
├── test_constraints.py ← Constraint tests
├── test_success_criteria.py ← Success criteria tests
└── test_edge_cases.py ← Edge case tests
```
## Integration with Other Skills
| Scenario | From | To | Action |
|----------|------|----|--------|
| Agent built, ready to test | `/hive-create` | `/hive-test` | Generate tests, start loop |
| Prompt fix needed | `/hive-test` Phase 4 | Direct edit | Edit `nodes/__init__.py`, resume |
| Goal definition wrong | `/hive-test` Phase 4 | `/hive-create` | Update goal, may need rebuild |
| Missing credentials | `/hive-test` Phase 3 | `/hive-credentials` | Set up credentials |
| Complex runtime failure | `/hive-test` Phase 3 | `/hive-debugger` | Deep L1/L2/L3 analysis |
| All tests pass | `/hive-test` Phase 6 | Done | Agent validated |
@@ -1,333 +0,0 @@
# Example: Iterative Testing of a Research Agent
This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
## Agent Structure
```
exports/deep_research_agent/
├── agent.py # Goal + graph: intake → research → review → report
├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
├── config.py # Model config
├── mcp_servers.json # Tools: web_search, web_scrape
└── tests/ # Test files (we'll create these)
```
**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
---
## Phase 1: Generate Tests
### Read the goal
```python
Read(file_path="exports/deep_research_agent/agent.py")
# Extract: goal_id="rigorous-interactive-research"
# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
# constraints: no-hallucination, source-attribution
```
### Get test guidelines
```python
result = generate_success_tests(
goal_id="rigorous-interactive-research",
goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/deep_research_agent"
)
```
### Write tests
```python
Write(
file_path="exports/deep_research_agent/tests/test_success_criteria.py",
content=result["file_header"] + '''
@pytest.mark.asyncio
async def test_success_source_diversity(runner, auto_responder, mock_mode):
"""At least 5 diverse sources are found."""
await auto_responder.start()
try:
result = await runner.run({"query": "impact of remote work on productivity"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
sources = output.get("sources", [])
if isinstance(sources, list):
assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
@pytest.mark.asyncio
async def test_success_citation_coverage(runner, auto_responder, mock_mode):
"""Every factual claim in the report cites its source."""
await auto_responder.start()
try:
result = await runner.run({"query": "climate change effects on agriculture"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
report = output.get("report", "")
# Check that report contains numbered references
assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
@pytest.mark.asyncio
async def test_success_report_completeness(runner, auto_responder, mock_mode):
"""Report addresses the original research question."""
query = "pros and cons of nuclear energy"
await auto_responder.start()
try:
result = await runner.run({"query": query})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
report = output.get("report", "")
assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
@pytest.mark.asyncio
async def test_empty_query_handling(runner, auto_responder, mock_mode):
"""Agent handles empty input gracefully."""
await auto_responder.start()
try:
result = await runner.run({"query": ""})
finally:
await auto_responder.stop()
output = result.output or {}
assert not result.success or output.get("error"), "Should handle empty query"
@pytest.mark.asyncio
async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
"""Feedback loop between review and research terminates."""
await auto_responder.start()
try:
result = await runner.run({"query": "quantum computing basics"})
finally:
await auto_responder.stop()
visits = result.node_visit_counts or {}
for node_id, count in visits.items():
assert count <= 5, f"Node {node_id} visited {count} times"
'''
)
```
---
## Phase 2: First Execution
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
**Result:**
```json
{
"overall_passed": false,
"summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
"failures": [
{"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
]
}
```
---
## Phase 3: Analyze (Iteration 1)
### Debug the first failure
```python
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_source_diversity",
agent_path="exports/deep_research_agent"
)
# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
```
### Find the session and inspect memory
```python
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_150000_abc12345
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
key="research_results"
)
# → Only 2 sources found. LLM stopped searching after 2 queries.
```
### Check LLM behavior in the research node
```python
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/deep_research_agent",
run_id="session_20260209_150000_abc12345",
node_id="research"
)
# → LLM called web_search twice, got results, immediately called set_output.
# → Prompt doesn't instruct it to find at least 5 sources.
```
**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
---
## Phase 4: Fix (Iteration 1)
```python
Read(file_path="exports/deep_research_agent/nodes/__init__.py")
# Fix the research node prompt
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Search for information on the user\'s topic using web search."',
new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
)
```
---
## Phase 5: Recover & Resume (Iteration 1)
The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
**Result:**
```json
{
"overall_passed": false,
"summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
"failures": [
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
]
}
```
Source diversity now passes. Citation coverage still fails.
---
## Phase 3: Analyze (Iteration 2)
```python
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_citation_coverage",
agent_path="exports/deep_research_agent"
)
# Category: ASSERTION_FAILURE — Report lacks citations
# Check what the report node produced
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_151500_def67890
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_151500_def67890",
key="report"
)
# → Report text exists but uses no numbered references.
# → Sources are in memory but report node doesn't cite them.
```
**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
---
## Phase 4: Fix (Iteration 2)
```python
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Write a comprehensive report based on the research findings."',
new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
)
```
---
## Phase 5: Resume (Iteration 2)
The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
```bash
# Run via CLI to get checkpoints
uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
# After it runs, find the clean checkpoint before report
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_152000_ghi34567",
is_clean="true"
)
# → cp_node_complete_review_152100 (after review, before report)
# Resume — skips intake, research, review entirely
uv run hive run exports/deep_research_agent \
--resume-session session_20260209_152000_ghi34567 \
--checkpoint cp_node_complete_review_152100
```
Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
---
## Phase 6: Final Verification
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent"
)
```
**Result:**
```json
{
"overall_passed": true,
"summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
}
```
All tests pass.
---
## Summary
| Iteration | Failure | Root Cause | Fix | Recovery |
|-----------|---------|------------|-----|----------|
| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
**Key takeaways:**
- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
- Final `run_tests` confirms all scenarios pass end-to-end
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,351 @@
# Example: Testing a YouTube Research Agent
This example walks through testing a YouTube research agent that finds relevant videos based on a topic.
## Prerequisites
- Agent built with building-agents skill at `exports/youtube-research/`
- Goal defined with success criteria and constraints
## Step 1: Load the Goal
First, load the goal that was defined during the Goal stage:
```json
{
"id": "youtube-research",
"name": "YouTube Research Agent",
"description": "Find relevant YouTube videos on a given topic",
"success_criteria": [
{
"id": "find_videos",
"description": "Find 3-5 relevant videos",
"metric": "video_count",
"target": "3-5",
"weight": 1.0
},
{
"id": "relevance",
"description": "Videos must be relevant to the topic",
"metric": "relevance_score",
"target": ">0.8",
"weight": 0.8
}
],
"constraints": [
{
"id": "api_limits",
"description": "Must not exceed YouTube API rate limits",
"constraint_type": "hard",
"category": "technical"
},
{
"id": "content_safety",
"description": "Must filter out inappropriate content",
"constraint_type": "hard",
"category": "safety"
}
]
}
```
## Step 2: Get Constraint Test Guidelines
During the Goal stage (or early Eval), get test guidelines for constraints:
```python
result = generate_constraint_tests(
goal_id="youtube-research",
goal_json='<goal JSON above>',
agent_path="exports/youtube-research"
)
```
**The result contains guidelines (not generated tests):**
- `output_file`: Where to write tests
- `file_header`: Imports and fixtures to use
- `test_template`: Format for test functions
- `constraints_formatted`: The constraints to test
- `test_guidelines`: Rules for writing tests
## Step 3: Write Constraint Tests
Using the guidelines, write tests directly with the Write tool:
```python
# Write constraint tests using the provided file_header and guidelines
Write(
file_path="exports/youtube-research/tests/test_constraints.py",
content='''
"""Constraint tests for youtube-research agent."""
import os
import pytest
from exports.youtube_research import default_agent
pytestmark = pytest.mark.skipif(
not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
reason="API key required for real testing."
)
@pytest.mark.asyncio
async def test_constraint_api_limits_respected():
"""Verify API rate limits are not exceeded."""
import time
mock_mode = bool(os.environ.get("MOCK_MODE"))
for i in range(10):
result = await default_agent.run({"topic": f"test_{i}"}, mock_mode=mock_mode)
time.sleep(0.1)
# Should complete without rate limit errors
assert "rate limit" not in str(result).lower()
@pytest.mark.asyncio
async def test_constraint_content_safety_filter():
"""Verify inappropriate content is filtered."""
mock_mode = bool(os.environ.get("MOCK_MODE"))
result = await default_agent.run({"topic": "general topic"}, mock_mode=mock_mode)
for video in result.videos:
assert video.safe_for_work is True
assert video.age_restricted is False
'''
)
```
## Step 4: Get Success Criteria Test Guidelines
After the agent is built, get success criteria test guidelines:
```python
result = generate_success_tests(
goal_id="youtube-research",
goal_json='<goal JSON>',
node_names="search_node,filter_node,rank_node,format_node",
tool_names="youtube_search,video_details,channel_info",
agent_path="exports/youtube-research"
)
```
## Step 5: Write Success Criteria Tests
Using the guidelines, write success criteria tests:
```python
Write(
file_path="exports/youtube-research/tests/test_success_criteria.py",
content='''
"""Success criteria tests for youtube-research agent."""
import os
import pytest
from exports.youtube_research import default_agent
pytestmark = pytest.mark.skipif(
not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
reason="API key required for real testing."
)
@pytest.mark.asyncio
async def test_find_videos_happy_path():
"""Test finding videos for a common topic."""
mock_mode = bool(os.environ.get("MOCK_MODE"))
result = await default_agent.run({"topic": "machine learning"}, mock_mode=mock_mode)
assert result.success
assert 3 <= len(result.videos) <= 5
assert all(v.title for v in result.videos)
assert all(v.video_id for v in result.videos)
@pytest.mark.asyncio
async def test_find_videos_minimum_boundary():
"""Test at minimum threshold (3 videos)."""
mock_mode = bool(os.environ.get("MOCK_MODE"))
result = await default_agent.run({"topic": "niche topic xyz"}, mock_mode=mock_mode)
assert len(result.videos) >= 3
@pytest.mark.asyncio
async def test_relevance_score_threshold():
"""Test relevance scoring meets threshold."""
mock_mode = bool(os.environ.get("MOCK_MODE"))
result = await default_agent.run({"topic": "python programming"}, mock_mode=mock_mode)
for video in result.videos:
assert video.relevance_score > 0.8
@pytest.mark.asyncio
async def test_find_videos_no_results_graceful():
"""Test graceful handling of no results."""
mock_mode = bool(os.environ.get("MOCK_MODE"))
result = await default_agent.run({"topic": "xyznonexistent123"}, mock_mode=mock_mode)
# Should not crash, return empty or message
assert result.videos == [] or result.message
'''
)
```
## Step 6: Run All Tests
Execute all tests:
```python
result = run_tests(
goal_id="youtube-research",
agent_path="exports/youtube-research",
test_types='["all"]',
parallel=4
)
```
**Results:**
```json
{
"goal_id": "youtube-research",
"overall_passed": false,
"summary": {
"total": 6,
"passed": 5,
"failed": 1,
"pass_rate": "83.3%"
},
"duration_ms": 4521,
"results": [
{"test_id": "test_constraint_api_001", "passed": true, "duration_ms": 1234},
{"test_id": "test_constraint_content_001", "passed": true, "duration_ms": 456},
{"test_id": "test_success_001", "passed": true, "duration_ms": 789},
{"test_id": "test_success_002", "passed": true, "duration_ms": 654},
{"test_id": "test_success_003", "passed": true, "duration_ms": 543},
{"test_id": "test_success_004", "passed": false, "duration_ms": 845,
"error_category": "IMPLEMENTATION_ERROR",
"error_message": "TypeError: 'NoneType' object has no attribute 'videos'"}
]
}
```
## Step 7: Debug the Failed Test
```python
result = debug_test(
goal_id="youtube-research",
test_name="test_find_videos_no_results_graceful",
agent_path="exports/youtube-research"
)
```
**Debug Output:**
```json
{
"test_id": "test_success_004",
"test_name": "test_find_videos_no_results_graceful",
"input": {"topic": "xyznonexistent123"},
"expected": "Empty list or message",
"actual": {"error": "TypeError: 'NoneType' object has no attribute 'videos'"},
"passed": false,
"error_message": "TypeError: 'NoneType' object has no attribute 'videos'",
"error_category": "IMPLEMENTATION_ERROR",
"stack_trace": "Traceback (most recent call last):\n File \"filter_node.py\", line 42\n for video in result.videos:\nTypeError: 'NoneType' object has no attribute 'videos'",
"logs": [
{"timestamp": "2026-01-20T10:00:01", "node": "search_node", "level": "INFO", "msg": "Searching for: xyznonexistent123"},
{"timestamp": "2026-01-20T10:00:02", "node": "search_node", "level": "WARNING", "msg": "No results found"},
{"timestamp": "2026-01-20T10:00:02", "node": "filter_node", "level": "ERROR", "msg": "NoneType error"}
],
"runtime_data": {
"execution_path": ["start", "search_node", "filter_node"],
"node_outputs": {
"search_node": null
}
},
"suggested_fix": "Add null check in filter_node before accessing .videos attribute",
"iteration_guidance": {
"stage": "Agent",
"action": "Fix the code in nodes/edges",
"restart_required": false,
"description": "The goal is correct, but filter_node doesn't handle null results from search_node."
}
}
```
## Step 8: Iterate Based on Category
Since this is an **IMPLEMENTATION_ERROR**, we:
1. **Don't restart** the Goal → Agent → Eval flow
2. **Fix the agent** using building-agents skill:
- Modify `filter_node` to handle null results
3. **Re-run Eval** (tests only)
### Fix in building-agents:
```python
# Update the filter_node to handle null
add_node(
node_id="filter_node",
name="Filter Node",
description="Filter and rank videos",
node_type="function",
input_keys=["search_results"],
output_keys=["filtered_videos"],
system_prompt="""
Filter videos by relevance.
IMPORTANT: Handle case where search_results is None or empty.
Return empty list if no results.
"""
)
```
### Re-export and re-test:
```python
# Re-export the fixed agent
export_graph(path="exports/youtube-research")
# Re-run tests
result = run_tests(
goal_id="youtube-research",
agent_path="exports/youtube-research",
test_types='["all"]'
)
```
**Updated Results:**
```json
{
"goal_id": "youtube-research",
"overall_passed": true,
"summary": {
"total": 6,
"passed": 6,
"failed": 0,
"pass_rate": "100.0%"
}
}
```
## Summary
1. **Got guidelines** for constraint tests during Goal stage
2. **Wrote** constraint tests using Write tool
3. **Got guidelines** for success criteria tests during Eval stage
4. **Wrote** success criteria tests using Write tool
5. **Ran** tests in parallel
6. **Debugged** the one failure
7. **Categorized** as IMPLEMENTATION_ERROR
8. **Fixed** the agent (not the goal)
9. **Re-ran** Eval only (didn't restart full flow)
10. **Passed** all tests
The agent is now validated and ready for production use.
-145
View File
@@ -1,145 +0,0 @@
# Triage Issue Skill
Analyze a GitHub issue, verify claims against the codebase, and close invalid issues with a technical response.
## Trigger
User provides a GitHub issue URL or number, e.g.:
- `/triage-issue 1970`
- `/triage-issue https://github.com/adenhq/hive/issues/1970`
## Workflow
### Step 1: Fetch Issue Details
```bash
gh issue view <number> --repo adenhq/hive --json title,body,state,labels,author
```
Extract:
- Title
- Body (the claim/bug report)
- Current state
- Labels
- Author
If issue is already closed, inform user and stop.
### Step 2: Analyze the Claim
Read the issue body and identify:
1. **The core claim** - What is the user asserting?
2. **Technical specifics** - File paths, function names, code snippets mentioned
3. **Expected behavior** - What do they think should happen?
4. **Severity claimed** - Security issue? Bug? Feature request?
### Step 3: Investigate the Codebase
For each technical claim:
1. Find the referenced code using Grep/Glob/Read
2. Understand the actual implementation
3. Check if the claim accurately describes the behavior
4. Look for related tests, documentation, or design decisions
### Step 4: Evaluate Validity
Categorize the issue as one of:
| Category | Action |
|----------|--------|
| **Valid Bug** | Do NOT close. Inform user this is a real issue. |
| **Valid Feature Request** | Do NOT close. Suggest labeling appropriately. |
| **Misunderstanding** | Prepare technical explanation for why behavior is correct. |
| **Fundamentally Flawed** | Prepare critique explaining the technical impossibility or design rationale. |
| **Duplicate** | Find the original issue and prepare duplicate notice. |
| **Incomplete** | Prepare request for more information. |
### Step 5: Draft Response
For issues to be closed, draft a response that:
1. **Acknowledges the concern** - Don't be dismissive
2. **Explains the actual behavior** - With code references
3. **Provides technical rationale** - Why it works this way
4. **References industry standards** - If applicable
5. **Offers alternatives** - If there's a better approach for the user
Use this template:
```markdown
## Analysis
[Brief summary of what was investigated]
## Technical Details
[Explanation with code references]
## Why This Is Working As Designed
[Rationale]
## Recommendation
[What the user should do instead, if applicable]
---
*This issue was reviewed and closed by the maintainers.*
```
### Step 6: User Review
Present the draft to the user with:
```
## Issue #<number>: <title>
**Claim:** <summary of claim>
**Finding:** <valid/invalid/misunderstanding/etc>
**Draft Response:**
<the markdown response>
---
Do you want me to post this comment and close the issue?
```
Use AskUserQuestion with options:
- "Post and close" - Post comment, close issue
- "Edit response" - Let user modify the response
- "Skip" - Don't take action
### Step 7: Execute Action
If user approves:
```bash
# Post comment
gh issue comment <number> --repo adenhq/hive --body "<response>"
# Close issue
gh issue close <number> --repo adenhq/hive --reason "not planned"
```
Report success with link to the issue.
## Important Guidelines
1. **Never close valid issues** - If there's any merit to the claim, don't close it
2. **Be respectful** - The reporter took time to file the issue
3. **Be technical** - Provide code references and evidence
4. **Be educational** - Help them understand, don't just dismiss
5. **Check twice** - Make sure you understand the code before declaring something invalid
6. **Consider edge cases** - Maybe their environment reveals a real issue
## Example Critiques
### Security Misunderstanding
> "The claim that secrets are exposed in plaintext misunderstands the encryption architecture. While `SecretStr` is used for logging protection, actual encryption is provided by Fernet (AES-128-CBC) at the storage layer. The code path is: serialize → encrypt → write. Only encrypted bytes touch disk."
### Impossible Request
> "The requested feature would require [X] which violates [fundamental constraint]. This is not a limitation of our implementation but a fundamental property of [technology/protocol]."
### Already Handled
> "This scenario is already handled by [code reference]. The reporter may be using an older version or misconfigured environment."
-7
View File
@@ -1,7 +0,0 @@
# Project-level Codex config for Hive.
# Keep this file minimal: MCP connectivity + skill discovery.
[mcp_servers.agent-builder]
command = "uv"
args = ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"]
cwd = "."
+1
View File
@@ -0,0 +1 @@
{}
-20
View File
@@ -1,20 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "python",
"args": ["mcp_server.py", "--stdio"],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-18
View File
@@ -1,18 +0,0 @@
This project uses ruff for Python linting and formatting.
Rules:
- Line length: 100 characters
- Python target: 3.11+
- Use double quotes for strings
- Sort imports with isort (ruff I rules): stdlib, third-party, first-party (framework), local
- Combine as-imports
- Use type hints on all function signatures
- Use `from __future__ import annotations` for modern type syntax
- Raise exceptions with `from` in except blocks (B904)
- No unused imports (F401), no unused variables (F841)
- Prefer list/dict/set comprehensions over map/filter (C4)
Run `make lint` to auto-fix, `make check` to verify without modifying files.
Run `make format` to apply ruff formatting.
The ruff config lives in core/pyproject.toml under [tool.ruff].
-3
View File
@@ -11,9 +11,6 @@ indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
[*.py]
indent_size = 4
[*.md]
trim_trailing_whitespace = false
-124
View File
@@ -1,124 +0,0 @@
# Normalize line endings for all text files
* text=auto
# Source code
*.py text diff=python
*.js text
*.ts text
*.jsx text
*.tsx text
*.json text
*.yaml text
*.yml text
*.toml text
*.ini text
*.cfg text
# Shell scripts (must use LF)
*.sh text eol=lf
quickstart.sh text eol=lf
# PowerShell scripts (Windows-friendly)
*.ps1 text eol=lf
*.psm1 text eol=lf
# Windows batch files (must use CRLF)
*.bat text eol=crlf
*.cmd text eol=crlf
# Documentation
*.md text
*.txt text
*.rst text
*.tex text
# Configuration files
.gitignore text
.gitattributes text
.editorconfig text
Dockerfile text
docker-compose.yml text
requirements*.txt text
pyproject.toml text
setup.py text
setup.cfg text
MANIFEST.in text
LICENSE text
README* text
CHANGELOG* text
CONTRIBUTING* text
CODE_OF_CONDUCT* text
# Web files
*.html text
*.css text
*.scss text
*.sass text
# Data files
*.xml text
*.csv text
*.sql text
# Graphics (binary)
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.svg binary
*.eps binary
*.bmp binary
*.tif binary
*.tiff binary
# Archives (binary)
*.zip binary
*.tar binary
*.gz binary
*.bz2 binary
*.7z binary
*.rar binary
# Python compiled (binary)
*.pyc binary
*.pyo binary
*.pyd binary
*.whl binary
*.egg binary
# System libraries (binary)
*.so binary
*.dll binary
*.dylib binary
*.lib binary
*.a binary
# Documents (binary)
*.pdf binary
*.doc binary
*.docx binary
*.ppt binary
*.pptx binary
*.xls binary
*.xlsx binary
# Fonts (binary)
*.ttf binary
*.otf binary
*.woff binary
*.woff2 binary
*.eot binary
# Audio/Video (binary)
*.mp3 binary
*.mp4 binary
*.wav binary
*.avi binary
*.mov binary
*.flv binary
# Database files (binary)
*.db binary
*.sqlite binary
*.sqlite3 binary
+1
View File
@@ -8,6 +8,7 @@
/hive/ @adenhq/maintainers
# Infrastructure
/docker-compose*.yml @adenhq/maintainers
/.github/ @adenhq/maintainers
# Documentation
+2 -3
View File
@@ -1,10 +1,9 @@
---
name: Bug Report
about: Report a bug to help us improve
title: "[Bug]: "
labels: bug, enhancement
title: '[Bug]: '
labels: bug
assignees: ''
---
## Describe the Bug
+1 -2
View File
@@ -1,10 +1,9 @@
---
name: Feature Request
about: Suggest a new feature or enhancement
title: "[Feature]: "
title: '[Feature]: '
labels: enhancement
assignees: ''
---
## Problem Statement
@@ -1,71 +0,0 @@
---
name: Integration Request
about: Suggest a new integration
title: "[Integration]:"
labels: ''
assignees: ''
---
## Service
Name and brief description of the service and what it enables agents to do.
**Description:** [e.g., "API key for Slack Bot" — short one-liner for the credential spec]
## Credential Identity
- **credential_id:** [e.g., `slack`]
- **env_var:** [e.g., `SLACK_BOT_TOKEN`]
- **credential_key:** [e.g., `access_token`, `api_key`, `bot_token`]
## Tools
Tool function names that require this credential:
- [e.g., `slack_send_message`]
- [e.g., `slack_list_channels`]
## Auth Methods
- **Direct API key supported:** Yes / No
- **Aden OAuth supported:** Yes / No
If Aden OAuth is supported, describe the OAuth scopes/permissions required.
## How to Get the Credential
Link where users obtain the key/token:
[e.g., https://api.slack.com/apps]
Step-by-step instructions:
1. Go to ...
2. Create a ...
3. Select scopes/permissions: ...
4. Copy the key/token
## Health Check
A lightweight API call to validate the credential (no writes, no charges).
- **Endpoint:** [e.g., `https://slack.com/api/auth.test`]
- **Method:** [e.g., `GET` or `POST`]
- **Auth header:** [e.g., `Authorization: Bearer {token}` or `X-Api-Key: {key}`]
- **Parameters (if any):** [e.g., `?limit=1`]
- **200 means:** [e.g., key is valid]
- **401 means:** [e.g., invalid or expired]
- **429 means:** [e.g., rate limited but key is valid]
## Credential Group
Does this require multiple credentials configured together? (e.g., Google Custom Search needs
both an API key and a CSE ID)
- [ ] No, single credential
- [ ] Yes — list the other credential IDs in the group:
## Additional Context
Links to API docs, rate limits, free tier availability, or anything else relevant.
@@ -1,34 +0,0 @@
name: Auto-close duplicate issues
description: Auto-closes issues that are duplicates of existing issues
on:
schedule:
- cron: "0 */6 * * *"
workflow_dispatch:
jobs:
auto-close-duplicates:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
issues: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Run auto-close-duplicates tests
run: bun test scripts/auto-close-duplicates
- name: Auto-close duplicate issues
run: bun run scripts/auto-close-duplicates.ts
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
+23 -70
View File
@@ -21,48 +21,21 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
cache: 'pip'
- name: Install dependencies
run: uv sync --project core --group dev
- name: Ruff lint
run: |
uv run --project core ruff check core/
uv run --project core ruff check tools/
cd core
pip install -e .
pip install -r requirements-dev.txt
- name: Ruff format
- name: Run ruff
run: |
uv run --project core ruff format --check core/
uv run --project core ruff format --check tools/
cd core
ruff check .
test:
name: Test Python Framework
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install dependencies and run tests
run: |
cd core
uv sync
uv run pytest tests/ -v
test-tools:
name: Test Tools
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -71,20 +44,23 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install dependencies and run tests
- name: Install dependencies
run: |
cd tools
uv sync --extra dev
uv run pytest tests/ -v
cd core
pip install -e .
pip install -r requirements-dev.txt
- name: Run tests
run: |
cd core
pytest tests/ -v
validate:
name: Validate Agent Exports
runs-on: ubuntu-latest
needs: [lint, test, test-tools]
needs: [lint, test]
steps:
- uses: actions/checkout@v4
@@ -92,43 +68,20 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
cache: 'pip'
- name: Install dependencies
run: |
cd core
uv sync
pip install -e .
pip install -r requirements-dev.txt
- name: Validate exported agents
run: |
# Check that agent exports have valid structure
if [ ! -d "exports" ]; then
echo "No exports/ directory found, skipping validation"
exit 0
fi
shopt -s nullglob
agent_dirs=(exports/*/)
shopt -u nullglob
if [ ${#agent_dirs[@]} -eq 0 ]; then
echo "No agent directories in exports/, skipping validation"
exit 0
fi
validated=0
for agent_dir in "${agent_dirs[@]}"; do
for agent_dir in exports/*/; do
if [ -f "$agent_dir/agent.json" ]; then
echo "Validating $agent_dir"
uv run python -c "import json; json.load(open('$agent_dir/agent.json'))"
validated=$((validated + 1))
python -c "import json; json.load(open('$agent_dir/agent.json'))"
fi
done
if [ "$validated" -eq 0 ]; then
echo "No agent.json files found in exports/, skipping validation"
else
echo "Validated $validated agent(s)"
fi
+34 -70
View File
@@ -1,4 +1,4 @@
name: Issue Triage
name: Claude Issue Triage
on:
issues:
@@ -7,11 +7,9 @@ on:
jobs:
triage:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
issues: write
id-token: write
steps:
- name: Checkout repository
@@ -19,85 +17,51 @@ jobs:
with:
fetch-depth: 1
- name: Triage and check for duplicates
- name: Run Claude Issue Triage
id: claude-triage
uses: anthropics/claude-code-action@v1
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
allowed_non_write_users: "*"
prompt: |
Analyze this new issue and perform triage tasks.
REPO: ${{ github.repository }}
ISSUE NUMBER: ${{ github.event.issue.number }}
TITLE: ${{ github.event.issue.title }}
BODY: ${{ github.event.issue.body }}
AUTHOR: ${{ github.event.issue.user.login }}
Issue: #${{ github.event.issue.number }}
Repository: ${{ github.repository }}
Analyze this new issue and perform the following:
## Your Tasks:
1. **Categorize the issue type using ONLY these labels:**
- bug: Something isn't working
- enhancement: New feature or request
- question: Further information is requested
- documentation: Improvements or additions to documentation
- good first issue: Good for newcomers (if issue is well-defined and small scope)
- help wanted: Extra attention is needed (if issue needs community input)
- backlog: Tracked for the future, but not currently planned or prioritized
### 1. Get issue details
Use mcp__github__get_issue to get the full details of issue #${{ github.event.issue.number }}
2. **Check for duplicates:**
Search for similar existing issues using:
`gh issue list --state all --search "<key terms from title/body>"`
### 2. Check for duplicates
Search for similar existing issues using mcp__github__search_issues with relevant keywords from the issue title and body.
If a duplicate exists:
- Add the "duplicate" label
- Comment mentioning the original issue number
Criteria for duplicates:
- Same bug or error being reported
- Same feature request (even if worded differently)
- Same question being asked
- Issues describing the same root problem
3. **Check for invalid issues:**
If the issue lacks sufficient information, is spam, or doesn't make sense:
- Add the "invalid" label
- Comment asking for clarification or explaining why it's invalid
If you find a duplicate:
- Add a comment using EXACTLY this format (required for auto-close to work):
"Found a possible duplicate of #<issue_number>: <brief explanation of why it's a duplicate>"
- Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
- Suggest the user react with a thumbs-down if they disagree
4. **Apply labels:**
Based on your analysis, add appropriate labels using:
`gh issue edit ${{ github.event.issue.number }} --add-label "label1,label2"`
### 3. Check for Low-Quality / AI Spam
Analyze the issue quality. We are receiving many low-effort, AI-generated spam issues.
Flag the issue as INVALID if it matches these criteria:
- **Vague/Generic**: Title is "Fix bug" or "Error" without specific context.
- **Hallucinated**: Refers to files or features that do not exist in this repo.
- **Template Filler**: Body contains "Insert description here" or unrelated gibberish.
- **Low Effort**: No reproduction steps, no logs, only 1-2 sentences.
You may apply multiple labels if appropriate (e.g., "bug,help wanted").
If identified as spam/low-quality:
- Add the "invalid" label.
- Add a comment:
"This issue has been automatically flagged as low-quality or potentially AI-generated spam. It lacks specific details (logs, reproduction steps, file references) required for us to help. Please open a new issue following the template exactly if this is a legitimate request."
- Do NOT proceed to other steps.
### 4. Check for invalid issues (General)
If the issue is not spam but still lacks information:
- Add the "invalid" label
- Comment asking for clarification
### 5. Categorize with labels (if NOT a duplicate or spam)
Apply appropriate labels based on the issue content. Use ONLY these labels:
- bug: Something isn't working
- enhancement: New feature or request
- question: Further information is requested
- documentation: Improvements or additions to documentation
- good first issue: Good for newcomers (if issue is well-defined and small scope)
- help wanted: Extra attention is needed (if issue needs community input)
- backlog: Tracked for the future, but not currently planned or prioritized
### 6. Estimate size (if NOT a duplicate, spam, or invalid)
Apply exactly ONE size label to help contributors match their capacity to the task:
- "size: small": Docs, typos, single-file fixes, config changes
- "size: medium": Bug fixes with tests, adding a single tool, changes within one package
- "size: large": Cross-package changes (core + tools), new modules, complex logic, architectural refactors
You may apply multiple labels if appropriate (e.g., "bug", "size: small", and "good first issue").
## Tools Available:
- mcp__github__get_issue: Get issue details
- mcp__github__search_issues: Search for similar issues
- mcp__github__list_issues: List recent issues if needed
- mcp__github__add_issue_comment: Add a comment
- mcp__github__update_issue: Add labels
- mcp__github__get_issue_comments: Get existing comments
Be thorough but efficient. Focus on accurate categorization and finding true duplicates.
5. **Add a brief comment** summarizing your triage decision to help maintainers.
claude_args: |
--model claude-haiku-4-5-20251001
--allowedTools "mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues,mcp__github__add_issue_comment,mcp__github__update_issue,mcp__github__get_issue_comments"
--model claude-3-5-haiku-20241022
--allowedTools "Bash(gh issue:*),Bash(gh search:*)"
-204
View File
@@ -1,204 +0,0 @@
name: PR Check Command
on:
issue_comment:
types: [created]
jobs:
check-pr:
# Only run on PR comments that start with /check
if: github.event.issue.pull_request && startsWith(github.event.comment.body, '/check')
runs-on: ubuntu-latest
permissions:
pull-requests: write
issues: write
checks: write
statuses: write
steps:
- name: Check PR requirements
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.issue.number;
console.log(`Triggered by /check comment on PR #${prNumber}`);
// Fetch PR data
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});
const prBody = pr.body || '';
const prTitle = pr.title || '';
const prAuthor = pr.user.login;
const headSha = pr.head.sha;
// Create a check run in progress
const { data: checkRun } = await github.rest.checks.create({
owner: context.repo.owner,
repo: context.repo.repo,
name: 'check-requirements',
head_sha: headSha,
status: 'in_progress',
started_at: new Date().toISOString(),
});
// Extract issue numbers
const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
const allText = `${prTitle} ${prBody}`;
const matches = [...allText.matchAll(issuePattern)];
const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
console.log(`PR #${prNumber}:`);
console.log(` Author: ${prAuthor}`);
console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
if (issueNumbers.length === 0) {
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**Missing:** No linked issue found.
**To fix:**
1. Create or find an existing issue for this work
2. Assign yourself to the issue
3. Re-open this PR and add \`Fixes #123\` in the description
**Why is this required?** See #472 for details.`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
// Update check run to failure
await github.rest.checks.update({
owner: context.repo.owner,
repo: context.repo.repo,
check_run_id: checkRun.id,
status: 'completed',
conclusion: 'failure',
completed_at: new Date().toISOString(),
output: {
title: 'Missing linked issue',
summary: 'PR must reference an issue (e.g., `Fixes #123`)',
},
});
core.setFailed('PR must reference an issue');
return;
}
// Check if PR author is assigned to any linked issue
let issueWithAuthorAssigned = null;
let issuesWithoutAuthor = [];
for (const issueNum of issueNumbers) {
try {
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNum,
});
const assigneeLogins = (issue.assignees || []).map(a => a.login);
if (assigneeLogins.includes(prAuthor)) {
issueWithAuthorAssigned = issueNum;
console.log(` Issue #${issueNum} has PR author ${prAuthor} as assignee`);
break;
} else {
issuesWithoutAuthor.push({
number: issueNum,
assignees: assigneeLogins
});
console.log(` Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'}`);
}
} catch (error) {
console.log(` Issue #${issueNum} not found`);
}
}
if (!issueWithAuthorAssigned) {
const issueList = issuesWithoutAuthor.map(i =>
`#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
).join(', ');
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**PR Author:** @${prAuthor}
**Found issues:** ${issueList}
**Problem:** The PR author must be assigned to the linked issue.
**To fix:**
1. Assign yourself (@${prAuthor}) to one of the linked issues
2. Re-open this PR
**Why is this required?** See #472 for details.`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
// Update check run to failure
await github.rest.checks.update({
owner: context.repo.owner,
repo: context.repo.repo,
check_run_id: checkRun.id,
status: 'completed',
conclusion: 'failure',
completed_at: new Date().toISOString(),
output: {
title: 'PR author not assigned to issue',
summary: `PR author @${prAuthor} must be assigned to one of the linked issues: ${issueList}`,
},
});
core.setFailed('PR author must be assigned to the linked issue');
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: `✅ PR requirements met! Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
});
// Update check run to success
await github.rest.checks.update({
owner: context.repo.owner,
repo: context.repo.repo,
check_run_id: checkRun.id,
status: 'completed',
conclusion: 'success',
completed_at: new Date().toISOString(),
output: {
title: 'Requirements met',
summary: `Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
},
});
console.log(`PR requirements met!`);
}
@@ -1,138 +0,0 @@
name: PR Requirements Backfill
on:
workflow_dispatch:
jobs:
check-all-open-prs:
runs-on: ubuntu-latest
permissions:
pull-requests: write
issues: write
steps:
- name: Check all open PRs
uses: actions/github-script@v7
with:
script: |
const { data: pullRequests } = await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
per_page: 100,
});
console.log(`Found ${pullRequests.length} open PRs`);
for (const pr of pullRequests) {
const prNumber = pr.number;
const prBody = pr.body || '';
const prTitle = pr.title || '';
const prAuthor = pr.user.login;
console.log(`\nChecking PR #${prNumber}: ${prTitle}`);
// Extract issue numbers from body and title
const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
const allText = `${prTitle} ${prBody}`;
const matches = [...allText.matchAll(issuePattern)];
const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
if (issueNumbers.length === 0) {
console.log(` ❌ No linked issue - closing PR`);
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**Missing:** No linked issue found.
**To fix:**
1. Create or find an existing issue for this work
2. Assign yourself to the issue
3. Re-open this PR and add \`Fixes #123\` in the description`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
continue;
}
// Check if any linked issue has the PR author as assignee
let issueWithAuthorAssigned = null;
let issuesWithoutAuthor = [];
for (const issueNum of issueNumbers) {
try {
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNum,
});
const assigneeLogins = (issue.assignees || []).map(a => a.login);
if (assigneeLogins.includes(prAuthor)) {
issueWithAuthorAssigned = issueNum;
break;
} else {
issuesWithoutAuthor.push({
number: issueNum,
assignees: assigneeLogins
});
}
} catch (error) {
console.log(` Issue #${issueNum} not found or inaccessible`);
}
}
if (!issueWithAuthorAssigned) {
const issueList = issuesWithoutAuthor.map(i =>
`#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
).join(', ');
console.log(` ❌ PR author not assigned to any linked issue - closing PR`);
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**PR Author:** @${prAuthor}
**Found issues:** ${issueList}
**Problem:** The PR author must be assigned to the linked issue.
**To fix:**
1. Assign yourself (@${prAuthor}) to one of the linked issues
2. Re-open this PR`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
} else {
console.log(` ✅ PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
}
}
console.log('\nBackfill complete!');
-189
View File
@@ -1,189 +0,0 @@
name: PR Requirements Check
on:
pull_request_target:
types: [opened, reopened, edited, synchronize]
jobs:
check-requirements:
runs-on: ubuntu-latest
permissions:
pull-requests: write
issues: write
steps:
- name: Check PR has linked issue with assignee
uses: actions/github-script@v7
with:
script: |
const pr = context.payload.pull_request;
const prNumber = pr.number;
const prBody = pr.body || '';
const prTitle = pr.title || '';
const prLabels = (pr.labels || []).map(l => l.name);
// Allow micro-fix and documentation PRs without a linked issue
const isMicroFix = prLabels.includes('micro-fix') || /micro-fix/i.test(prTitle);
const isDocumentation = prLabels.includes('documentation') || /\bdocs?\b/i.test(prTitle);
if (isMicroFix || isDocumentation) {
const reason = isMicroFix ? 'micro-fix' : 'documentation';
console.log(`PR #${prNumber} is a ${reason}, skipping issue requirement.`);
return;
}
// Extract issue numbers from body and title
// Matches: fixes #123, closes #123, resolves #123, or plain #123
const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
const allText = `${prTitle} ${prBody}`;
const matches = [...allText.matchAll(issuePattern)];
const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
console.log(`PR #${prNumber}:`);
console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
if (issueNumbers.length === 0) {
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**Missing:** No linked issue found.
**To fix:**
1. Create or find an existing issue for this work
2. Assign yourself to the issue
3. Re-open this PR and add \`Fixes #123\` in the description
**Exception:** To bypass this requirement, you can:
- Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
- Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
**Micro-fix requirements** (must meet ALL):
| Qualifies | Disqualifies |
|-----------|--------------|
| < 20 lines changed | Any functional bug fix |
| Typos & Documentation & Linting | Refactoring for "clean code" |
| No logic/API/DB changes | New features (even tiny ones) |
**Why is this required?** See #472 for details.`;
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
});
const botComment = comments.data.find(
(c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
);
if (!botComment) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
}
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
core.setFailed('PR must reference an issue');
return;
}
// Check if any linked issue has the PR author as assignee
const prAuthor = pr.user.login;
let issueWithAuthorAssigned = null;
let issuesWithoutAuthor = [];
for (const issueNum of issueNumbers) {
try {
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNum,
});
const assigneeLogins = (issue.assignees || []).map(a => a.login);
if (assigneeLogins.includes(prAuthor)) {
issueWithAuthorAssigned = issueNum;
console.log(` Issue #${issueNum} has PR author ${prAuthor} as assignee`);
break;
} else {
issuesWithoutAuthor.push({
number: issueNum,
assignees: assigneeLogins
});
console.log(` Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'} (PR author: ${prAuthor})`);
}
} catch (error) {
console.log(` Issue #${issueNum} not found or inaccessible`);
}
}
if (!issueWithAuthorAssigned) {
const issueList = issuesWithoutAuthor.map(i =>
`#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
).join(', ');
const message = `## PR Closed - Requirements Not Met
This PR has been automatically closed because it doesn't meet the requirements.
**PR Author:** @${prAuthor}
**Found issues:** ${issueList}
**Problem:** The PR author must be assigned to the linked issue.
**To fix:**
1. Assign yourself (@${prAuthor}) to one of the linked issues
2. Re-open this PR
**Exception:** To bypass this requirement, you can:
- Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
- Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
**Micro-fix requirements** (must meet ALL):
| Qualifies | Disqualifies |
|-----------|--------------|
| < 20 lines changed | Any functional bug fix |
| Typos & Documentation & Linting | Refactoring for "clean code" |
| No logic/API/DB changes | New features (even tiny ones) |
**Why is this required?** See #472 for details.`;
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
});
const botComment = comments.data.find(
(c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
);
if (!botComment) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: message,
});
}
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
core.setFailed('PR author must be assigned to the linked issue');
} else {
console.log(`PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
}
+4 -5
View File
@@ -21,19 +21,18 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
cache: 'pip'
- name: Install dependencies
run: |
cd core
uv sync
pip install -e .
pip install -r requirements-dev.txt
- name: Run tests
run: |
cd core
uv run pytest tests/ -v
pytest tests/ -v
- name: Generate changelog
id: changelog
+1 -9
View File
@@ -46,7 +46,6 @@ coverage/
# TypeScript
*.tsbuildinfo
vite.config.d.ts
# Python
__pycache__/
@@ -55,6 +54,7 @@ __pycache__/
*.egg-info/
.eggs/
*.egg
uv.lock
# Generated runtime data
core/data/
@@ -69,12 +69,4 @@ exports/*
.agent-builder-sessions/*
.claude/settings.local.json
.venv
docs/github-issues/*
core/tests/*dumps/*
screenshots/*
+14 -3
View File
@@ -1,9 +1,20 @@
{
"mcpServers": {
"agent-builder": {
"command": "uv",
"args": ["run", "-m", "framework.mcp.agent_builder_server"],
"cwd": "core"
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "python",
"args": ["mcp_server.py", "--stdio"],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
-30
View File
@@ -1,30 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "uv",
"args": [
"run",
"python",
"-m",
"framework.mcp.agent_builder_server"
],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "uv",
"args": [
"run",
"python",
"mcp_server.py",
"--stdio"
],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-debugger
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/triage-issue
-18
View File
@@ -1,18 +0,0 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.0
hooks:
- id: ruff
name: ruff lint (core)
args: [--fix]
files: ^core/
- id: ruff
name: ruff lint (tools)
args: [--fix]
files: ^tools/
- id: ruff-format
name: ruff format (core)
files: ^core/
- id: ruff-format
name: ruff format (tools)
files: ^tools/
-1
View File
@@ -1 +0,0 @@
3.11
-7
View File
@@ -1,7 +0,0 @@
{
"recommendations": [
"charliermarsh.ruff",
"editorconfig.editorconfig",
"ms-python.python"
]
}
+28 -195
View File
@@ -1,207 +1,40 @@
# Release Notes
# Changelog
**Release Date:** February 18, 2026
**Tag:** v0.5.1
All notable changes to this project will be documented in this file.
## The Hive Gets a Brain
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
v0.5.1 is our most ambitious release yet. Hive agents can now **build other agents** -- the new Hive Coder meta-agent writes, tests, and fixes agent packages from natural language. The runtime grows multi-graph support so one session can orchestrate multiple agents simultaneously. The TUI gets a complete overhaul with an in-app agent picker, live streaming, and seamless escalation to the Coder. And we're now provider-agnostic: Claude Code subscriptions, OpenAI-compatible endpoints, and any LiteLLM-supported model work out of the box.
## [Unreleased]
---
### Added
- Initial project structure
- React frontend (honeycomb) with Vite and TypeScript
- Node.js backend (hive) with Express and TypeScript
- Docker Compose configuration for local development
- Configuration system via `config.yaml`
- GitHub Actions CI/CD workflows
- Comprehensive documentation
## Highlights
### Changed
- N/A
### Hive Coder -- The Agent That Builds Agents
### Deprecated
- N/A
A native meta-agent that lives inside the framework at `core/framework/agents/hive_coder/`. Give it a natural-language specification and it produces a complete agent package -- goal definition, node prompts, edge routing, MCP tool wiring, tests, and all boilerplate files.
### Removed
- N/A
```bash
# Launch the Coder directly
hive code
### Fixed
- N/A
# Or escalate from any running agent (TUI)
Ctrl+E # or /coder in chat
```
### Security
- N/A
The Coder ships with:
## [0.1.0] - 2025-01-13
- **Reference documentation** -- anti-patterns, construction guide, and design patterns baked into its system prompt
- **Guardian watchdog** -- an event-driven monitor that catches agent failures and triggers automatic remediation
- **Coder Tools MCP server** -- file I/O, fuzzy-match editing, git snapshots, and sandboxed shell execution (`tools/coder_tools_server.py`)
- **Test generation** -- structural tests for forever-alive agents that don't hang on `runner.run()`
### Added
- Initial release
### Multi-Graph Agent Runtime
`AgentRuntime` now supports loading, managing, and switching between multiple agent graphs within a single session. Six new lifecycle tools give agents (and the TUI) full control:
```python
# Load a second agent into the runtime
await runtime.add_graph("exports/deep_research_agent")
# Tools available to agents:
# load_agent, unload_agent, start_agent, restart_agent, list_agents, get_user_presence
```
The Hive Coder uses multi-graph internally -- when you escalate from a worker agent, the Coder loads as a separate graph while the worker stays alive in the background.
### TUI Revamp
The Terminal UI gets a ground-up rebuild with five major additions:
- **Agent Picker** (Ctrl+A) -- tabbed modal screen for browsing Your Agents, Framework agents, and Examples with metadata badges (node count, tool count, session count, tags)
- **Runtime-optional startup** -- TUI launches without a pre-loaded agent, showing the picker on first open
- **Live streaming pane** -- dedicated RichLog widget shows LLM tokens as they arrive, replacing the old one-token-per-line display
- **PDF attachments** -- `/attach` and `/detach` commands with native OS file dialog (macOS, Linux, Windows)
- **Multi-graph commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>` for managing agent graphs in-session
### Provider-Agnostic LLM Support
Hive is no longer Anthropic-only. v0.5.1 adds first-class support for:
- **Claude Code subscriptions** -- `use_claude_code_subscription: true` in `~/.hive/configuration.json` reads OAuth tokens from `~/.claude/.credentials.json` with automatic refresh
- **OpenAI-compatible endpoints** -- `api_base` config routes traffic through any compatible API (Azure OpenAI, vLLM, Ollama, etc.)
- **Any LiteLLM model** -- `RuntimeConfig` now passes `api_key`, `api_base`, and `extra_kwargs` through to LiteLLM
The quickstart script auto-detects Claude Code subscriptions and ZAI Code installations.
---
## What's New
### Architecture & Runtime
- **Hive Coder meta-agent** -- Natural-language agent builder with reference docs, guardian watchdog, and `hive code` CLI command. (@TimothyZhang7)
- **Multi-graph agent sessions** -- `add_graph`/`remove_graph` on AgentRuntime with 6 lifecycle tools (`load_agent`, `unload_agent`, `start_agent`, `restart_agent`, `list_agents`, `get_user_presence`). (@TimothyZhang7)
- **Claude Code subscription support** -- OAuth token refresh via `use_claude_code_subscription` config, auto-detection in quickstart, LiteLLM header patching. (@TimothyZhang7)
- **OpenAI-compatible endpoint support** -- `api_base` and `extra_kwargs` in `RuntimeConfig` for any OpenAI-compatible API. (@TimothyZhang7)
- **Remove deprecated node types** -- Delete `FlexibleGraphExecutor`, `WorkerNode`, `HybridJudge`, `CodeSandbox`, `Plan`, `FunctionNode`, `LLMNode`, `RouterNode`. Deprecated types (`llm_tool_use`, `llm_generate`, `function`, `router`, `human_input`) now raise `RuntimeError` with migration guidance. (@TimothyZhang7)
- **Interactive credential setup** -- Guided `CredentialSetupSession` with health checks and encrypted storage, accessible via `hive setup-credentials` or automatic prompting on credential errors. (@RichardTang-Aden)
- **Pre-start confirmation prompt** -- Interactive prompt before agent execution allowing credential updates or abort. (@RichardTang-Aden)
- **Event bus multi-graph support** -- `graph_id` on events, `filter_graph` on subscriptions, `ESCALATION_REQUESTED` event type, `exclude_own_graph` filter. (@TimothyZhang7)
### TUI Improvements
- **In-app agent picker** (Ctrl+A) -- Tabbed modal for browsing agents with metadata badges (nodes, tools, sessions, tags). (@TimothyZhang7)
- **Runtime-optional TUI startup** -- Launches without a pre-loaded agent, shows agent picker on startup. (@TimothyZhang7)
- **Hive Coder escalation** (Ctrl+E) -- Escalate to Hive Coder and return; also available via `/coder` and `/back` chat commands. (@TimothyZhang7)
- **PDF attachment support** -- `/attach` and `/detach` commands with native OS file dialog. (@TimothyZhang7)
- **Streaming output pane** -- Dedicated RichLog widget for live LLM token streaming. (@TimothyZhang7)
- **Multi-graph TUI commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>`. (@TimothyZhang7)
- **Agent Guardian watchdog** -- Event-driven monitor that catches secondary agent failures and triggers automatic remediation, with `--no-guardian` CLI flag. (@TimothyZhang7)
### New Tool Integrations
| Tool | Description | Contributor |
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
| **Discord** | 4 MCP tools (`discord_list_guilds`, `discord_list_channels`, `discord_send_message`, `discord_get_messages`) with rate-limit retry and channel filtering | @mishrapravin114 |
| **Exa Search API** | 4 AI-powered search tools (`exa_search`, `exa_find_similar`, `exa_get_contents`, `exa_answer`) with neural/keyword search, domain filters, and citation-backed answers | @JeetKaria06 |
| **Razorpay** | 6 payment processing tools for payments, invoices, payment links, and refunds with HTTP Basic Auth | @shivamshahi07 |
| **Google Docs** | Document creation, reading, and editing with OAuth credential support | @haliaeetusvocifer |
| **Gmail enhancements** | Expanded mail operations for inbox management | @bryanadenhq |
### Infrastructure
- **Default node type → `event_loop`** -- `NodeSpec.node_type` defaults to `"event_loop"` instead of `"llm_tool_use"`. (@TimothyZhang7)
- **Default `max_node_visits` → 0 (unlimited)** -- Nodes default to unlimited visits, reducing friction for feedback loops and forever-alive agents. (@TimothyZhang7)
- **Remove `function` field from NodeSpec** -- Follows deprecation of `FunctionNode`. (@TimothyZhang7)
- **LiteLLM OAuth patch** -- Correct header construction for OAuth tokens (remove `x-api-key` when Bearer token is present). (@TimothyZhang7)
- **Orchestrator config centralization** -- Reads `api_key`, `api_base`, `extra_kwargs` from centralized `~/.hive/configuration.json`. (@TimothyZhang7)
- **System prompt datetime injection** -- All system prompts now include current date/time for time-aware agent behavior. (@TimothyZhang7)
- **Utils module exports** -- Proper `__init__.py` exports for the utils module. (@Siddharth2624)
- **Increased default max_tokens** -- Opus 4.6 defaults to 32768, Sonnet 4.5 to 16384 (up from 8192). (@TimothyZhang7)
---
## Bug Fixes
- Flush WIP accumulator outputs on cancel/failure so edge conditions see correct values on resume
- Stall detection state preserved across resume (no more resets on checkpoint restore)
- Skip client-facing blocking for event-triggered executions (timer/webhook)
- Executor retry override scoped to actual EventLoopNode instances only
- Add `_awaiting_input` flag to EventLoopNode to prevent input injection race conditions
- Fix TUI streaming display (tokens no longer appear one-per-line)
- Fix `_return_from_escalation` crash when ChatRepl widgets not yet mounted
- Fix tools registration problems for Google Docs credentials (@RichardTang-Aden)
- Fix email agent version conflicts (@RichardTang-Aden)
- Fix coder tool timeouts (120s for tests, 300s cap for commands)
## Documentation
- Clarify installation and prevent root pip install misuse (@paarths-collab)
---
## Agent Updates
- **Email Inbox Management** -- Consolidate `gmail_inbox_guardian` and `inbox_management` into a single unified agent with updated prompts and config. (@RichardTang-Aden, @bryanadenhq)
- **Job Hunter** -- Updated node prompts, config, and agent metadata; added PDF resume selection. (@bryanadenhq)
- **Deep Research Agent** -- Revised node implementations with updated prompts and output handling.
- **Tech News Reporter** -- Revised node prompts for improved output quality.
- **Vulnerability Assessment** -- Expanded prompts with more detailed assessment instructions. (@bryanadenhq)
---
## Breaking Changes
- **Deprecated node types raise `RuntimeError`** -- `llm_tool_use`, `llm_generate`, `function`, `router`, `human_input` now fail instead of warning. Migrate to `event_loop`.
- **`NodeSpec.node_type` defaults to `"event_loop"`** (was `"llm_tool_use"`)
- **`NodeSpec.max_node_visits` defaults to `0` / unlimited** (was `1`)
- **`NodeSpec.function` field removed** -- `FunctionNode` is deleted; use event_loop nodes with tools instead.
---
## Community Contributors
A huge thank you to everyone who contributed to this release:
- **Richard Tang** (@RichardTang-Aden) -- Interactive credential setup, pre-start confirmation, email agent consolidation, tool registration fixes, lint and formatting
- **Pravin Mishra** (@mishrapravin114) -- Discord integration with 4 MCP tools
- **Jeet Karia** (@JeetKaria06) -- Exa Search API integration with 4 AI-powered search tools
- **Shivam Shahi** (@shivamshahi07) -- Razorpay payment processing integration
- **Siddharth Varshney** (@Siddharth2624) -- Utils module exports
- **@haliaeetusvocifer** -- Google Docs integration with OAuth support
- **Bryan** (@bryanadenhq) -- PDF selection, inbox agent fixes, Job Hunter and Vulnerability Assessment updates
- **@paarths-collab** -- Documentation improvements
---
## Upgrading
```bash
git pull origin main
uv sync
```
### Migration Guide
If your agents use deprecated node types, update them:
```python
# Before (v0.5.0) -- these now raise RuntimeError
NodeSpec(node_type="llm_tool_use", ...)
NodeSpec(node_type="function", function=my_func, ...)
# After (v0.5.1) -- use event_loop for everything
NodeSpec(node_type="event_loop", ...) # or just omit node_type (it's the default now)
```
If your agents set `max_node_visits=1` explicitly, they'll still work. The only change is the _default_ -- new agents without an explicit value now get unlimited visits.
To try the new Hive Coder:
```bash
# Launch Coder directly
hive code
# Or from TUI -- press Ctrl+E to escalate
hive tui
```
---
## What's Next
- **Agent-to-agent communication** -- one agent's output triggers another agent's entry point
- **Cost visibility** -- detailed runtime log of LLM costs per node and per session
- **Persistent webhook subscriptions** -- survive agent restarts without re-registering
- **Remote agent deployment** -- run agents as long-lived services with HTTP APIs
[Unreleased]: https://github.com/adenhq/hive/compare/v0.1.0...HEAD
[0.1.0]: https://github.com/adenhq/hive/releases/tag/v0.1.0
+24 -80
View File
@@ -1,70 +1,34 @@
# Contributing to Aden Agent Framework
Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors. Were especially looking for help building tools, integrations ([check #2805](https://github.com/adenhq/hive/issues/2805)), and example agents for the framework. If youre interested in extending its functionality, this is the perfect place to start.
Thank you for your interest in contributing to the Aden Agent Framework! This document provides guidelines and information for contributors.
## Code of Conduct
By participating in this project, you agree to abide by our [Code of Conduct](docs/CODE_OF_CONDUCT.md).
## Issue Assignment Policy
To prevent duplicate work and respect contributors' time, we require issue assignment before submitting PRs.
### How to Claim an Issue
1. **Find an Issue:** Browse existing issues or create a new one
2. **Claim It:** Leave a comment (e.g., *"I'd like to work on this!"*)
3. **Wait for Assignment:** A maintainer will assign you within 24 hours. Issues with reproducible steps or proposals are prioritized.
4. **Submit Your PR:** Once assigned, you're ready to contribute
> **Note:** PRs for unassigned issues may be delayed or closed if someone else was already assigned.
### Exceptions (No Assignment Needed)
You may submit PRs without prior assignment for:
- **Documentation:** Fixing typos or clarifying instructions — add the `documentation` label or include `doc`/`docs` in your PR title to bypass the linked issue requirement
- **Micro-fixes:** Add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement. Micro-fixes must meet **all** qualification criteria:
| Qualifies | Disqualifies |
|-----------|--------------|
| < 20 lines changed | Any functional bug fix |
| Typos & Documentation & Linting | Refactoring for "clean code" |
| No logic/API/DB changes | New features (even tiny ones) |
By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
## Getting Started
1. Fork the repository
2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
3. Add the upstream repository: `git remote add upstream https://github.com/adenhq/hive.git`
4. Sync with upstream to ensure you're starting from the latest code:
```bash
git fetch upstream
git checkout main
git merge upstream/main
```
5. Create a feature branch: `git checkout -b feature/your-feature-name`
6. Make your changes
7. Run checks and tests:
```bash
make check # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
make test # Core tests (cd core && pytest tests/ -v)
```
8. Commit your changes following our commit conventions
9. Push to your fork and submit a Pull Request
3. Create a feature branch: `git checkout -b feature/your-feature-name`
4. Make your changes
5. Run tests: `PYTHONPATH=core:exports python -m pytest`
6. Commit your changes following our commit conventions
7. Push to your fork and submit a Pull Request
## Development Setup
```bash
# Install Python packages and verify setup
# Install Python packages
./scripts/setup-python.sh
# Verify installation
python -c "import framework; import aden_tools; print('✓ Setup complete')"
# Install Claude Code skills (optional)
./quickstart.sh
```
> **Windows Users:**
> If you are on native Windows, it is recommended to use **WSL (Windows Subsystem for Linux)**.
> Alternatively, make sure to run PowerShell or Git Bash with Python 3.11+ installed, and disable "App Execution Aliases" in Windows settings.
> **Tip:** Installing Claude Code skills is optional for running existing agents, but required if you plan to **build new agents**.
## Commit Convention
We follow [Conventional Commits](https://www.conventionalcommits.org/):
@@ -95,10 +59,10 @@ docs(readme): update installation instructions
## Pull Request Process
1. **Get assigned to the issue first** (see [Issue Assignment Policy](#issue-assignment-policy))
2. Update documentation if needed
3. Add tests for new functionality
4. Ensure `make check` and `make test` pass
1. Update documentation if needed
2. Add tests for new functionality
3. Ensure all tests pass
4. Update the CHANGELOG.md if applicable
5. Request review from maintainers
### PR Title Format
@@ -111,7 +75,7 @@ feat(component): add new feature description
## Project Structure
- `core/` - Core framework (agent runtime, graph executor, protocols)
- `tools/` - MCP Tools Package (tools for agent capabilities)
- `tools/` - MCP Tools Package (19 tools for agent capabilities)
- `exports/` - Agent packages and examples
- `docs/` - Documentation
- `scripts/` - Build and utility scripts
@@ -126,39 +90,19 @@ feat(component): add new feature description
- Use meaningful variable and function names
- Keep functions focused and small
For linting and formatting (Ruff, pre-commit hooks), see [Linting & Formatting Setup](docs/contributing-lint-setup.md).
## Testing
> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
>
> ```bash
> PYTHONPATH=exports uv run python -m agent_name test
> ```
```bash
# Run lint and format checks (mirrors CI lint job)
make check
# Run all tests for the framework
cd core && python -m pytest
# Run core framework tests (mirrors CI test job)
make test
# Or run tests directly
cd core && pytest tests/ -v
# Run tools package tests (when contributing to tools/)
cd tools && uv run pytest tests/ -v
# Run all tests for tools
cd tools && python -m pytest
# Run tests for a specific agent
PYTHONPATH=exports uv run python -m agent_name test
PYTHONPATH=core:exports python -m agent_name test
```
> **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
## Contributor License Agreement
By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
## Questions?
Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).
+210 -139
View File
@@ -20,13 +20,12 @@ This guide covers everything you need to know to develop with the Aden Agent Fra
Aden Agent Framework is a Python-based system for building goal-driven, self-improving AI agents.
| Package | Directory | Description | Tech Stack |
| ------------- | ---------- | ----------------------------------------- | ------------ |
| **framework** | `/core` | Core runtime, graph executor, protocols | Python 3.11+ |
| **tools** | `/tools` | MCP tools for agent capabilities | Python 3.11+ |
| **exports** | `/exports` | Agent packages (user-created, gitignored) | Python 3.11+ |
| **skills** | `.claude`, `.agents`, `.agent` | Shared skills for Claude/Codex/other coding agents | Markdown |
| **codex** | `.codex` | Codex CLI project configuration (MCP servers) | TOML |
| Package | Directory | Description | Tech Stack |
| ------------- | ---------- | --------------------------------------- | ------------ |
| **framework** | `/core` | Core runtime, graph executor, protocols | Python 3.11+ |
| **tools** | `/tools` | 19 MCP tools for agent capabilities | Python 3.11+ |
| **exports** | `/exports` | Agent packages and examples | Python 3.11+ |
| **skills** | `.claude` | Claude Code skills for building/testing | Markdown |
### Key Principles
@@ -45,16 +44,15 @@ Aden Agent Framework is a Python-based system for building goal-driven, self-imp
Ensure you have installed:
- **Python 3.11+** - [Download](https://www.python.org/downloads/) (3.12 or 3.13 recommended)
- **uv** - Python package manager ([Install](https://docs.astral.sh/uv/getting-started/installation/))
- **pip** - Package installer for Python (comes with Python)
- **git** - Version control
- **Claude Code** - [Install](https://docs.anthropic.com/claude/docs/claude-code) (optional)
- **Codex CLI** - [Install](https://github.com/openai/codex) (optional)
- **Claude Code** - [Install](https://docs.anthropic.com/claude/docs/claude-code) (optional, for using building skills)
Verify installation:
```bash
python --version # Should be 3.11+
uv --version # Should be latest
pip --version # Should be latest
git --version # Any recent version
```
@@ -65,8 +63,8 @@ git --version # Any recent version
git clone https://github.com/adenhq/hive.git
cd hive
# 2. Run automated setup
./quickstart.sh
# 2. Run automated Python setup
./scripts/setup-python.sh
```
The setup script performs these actions:
@@ -101,60 +99,21 @@ Get API keys:
./quickstart.sh
```
This installs agent-related Claude Code skills:
This installs:
- `/hive` - Complete workflow for building agents
- `/hive-create` - Step-by-step agent building
- `/hive-concepts` - Fundamental agent concepts
- `/hive-patterns` - Best practices and design patterns
- `/hive-test` - Test and validate agents
### Cursor IDE Support
Skills are also available in Cursor. To enable:
1. Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`)
2. Run `MCP: Enable` to enable MCP servers
3. Restart Cursor to load the MCP servers from `.cursor/mcp.json`
4. Type `/` in Agent chat and search for skills (e.g., `/hive-create`)
### Codex CLI Support
Hive supports [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
Configuration files are tracked in git:
- `.codex/config.toml` — MCP server config (`agent-builder`)
- `.agents/skills/` — Symlinks to Hive skills
To use Codex with Hive:
1. Run `codex` in the repo root
2. Type `use hive` to start the agent workflow
Example:
```
codex> use hive
```
### Opencode Support
To enable Opencode integration:
1. Create/Ensure `.opencode/` directory exists
2. Configure MCP servers in `.opencode/mcp.json`
3. Restart Opencode to load the MCP servers
4. Switch to the Hive agent
* **Tools:** Accesses `agent-builder` and standard `tools` via standard MCP protocols over stdio.
- `/building-agents` - Build new goal-driven agents
- `/testing-agent` - Test agents with evaluation framework
### Verify Setup
```bash
# Verify package imports
uv run python -c "import framework; print('✓ framework OK')"
uv run python -c "import aden_tools; print('✓ aden_tools OK')"
uv run python -c "import litellm; print('✓ litellm OK')"
python -c "import framework; print('✓ framework OK')"
python -c "import aden_tools; print('✓ aden_tools OK')"
python -c "import litellm; print('✓ litellm OK')"
# Run an agent (after building one via /hive-create)
PYTHONPATH=exports uv run python -m your_agent_name validate
# Run an example agent
PYTHONPATH=core:exports python -m support_ticket_agent validate
```
---
@@ -166,84 +125,78 @@ hive/ # Repository root
├── .github/ # GitHub configuration
│ ├── workflows/
│ │ ├── ci.yml # Lint, test, validate on every PR
│ │ ── release.yml # Runs on tags
│ │ ├── pr-requirements.yml # PR requirement checks
│ │ ├── pr-check-command.yml # PR check commands
│ │ ├── claude-issue-triage.yml # Automated issue triage
│ │ └── auto-close-duplicates.yml # Close duplicate issues
│ │ ├── ci.yml # Runs on every PR
│ │ ── release.yml # Runs on tags
│ ├── ISSUE_TEMPLATE/ # Bug report & feature request templates
│ ├── PULL_REQUEST_TEMPLATE.md # PR description template
│ └── CODEOWNERS # Auto-assign reviewers
├── .claude/ # Claude Code Skills
│ └── skills/ # Skills for building
│ ├── hive/ # Complete workflow
├── hive-create/ # Step-by-step build guide
├── hive-concepts/ # Fundamental concepts
├── hive-patterns/ # Best practices
└── hive-test/ # Test and validate agents
├── .codex/ # Codex CLI project config
└── config.toml # Codex MCP server definitions
── .agents/ # Shared skill mountpoint
│ └── skills/ # Symlinks to Hive skills
│ └── skills/
│ ├── building-agents/ # Skills for building agents
│ ├── SKILL.md # Main skill definition
│ ├── building-agents-core/
│ ├── building-agents-patterns/
│ └── building-agents-construction/
│ ├── testing-agent/ # Skills for testing agents
└── SKILL.md
│ └── agent-workflow/ # Complete workflow orchestration
├── core/ # CORE FRAMEWORK PACKAGE
│ ├── framework/ # Main package code
│ │ ├── builder/ # Agent builder utilities
│ │ ├── credentials/ # Credential management
│ │ ├── graph/ # GraphExecutor - executes node graphs
│ │ ├── llm/ # LLM provider integrations (Anthropic, OpenAI, etc.)
│ │ ├── mcp/ # MCP server integration
│ │ ├── runner/ # AgentRunner - loads and runs agents
| | ├── observability/ # Structured logging - human-readable and machine-parseable tracing
│ │ ├── runtime/ # Runtime environment
│ │ ├── schemas/ # Data schemas
│ │ ├── storage/ # File-based persistence
│ │ ├── testing/ # Testing utilities
│ │ ├── tui/ # Terminal UI dashboard
├── executor/ # GraphExecutor - executes node graphs
│ │ ├── protocols/ # Standard protocols (hooks, tracing, etc.)
│ │ ├── llm/ # LLM provider integrations (Anthropic, OpenAI, etc.)
│ │ ├── memory/ # Memory systems (STM, LTM/RLM)
│ │ ├── tools/ # Tool registry and management
│ │ └── __init__.py
│ ├── pyproject.toml # Package metadata and dependencies
│ ├── requirements.txt # Python dependencies
│ ├── README.md # Framework documentation
│ ├── MCP_INTEGRATION_GUIDE.md # MCP server integration guide
│ └── docs/ # Protocol documentation
├── tools/ # TOOLS PACKAGE (MCP tools)
├── tools/ # TOOLS PACKAGE (19 MCP tools)
│ ├── src/
│ │ └── aden_tools/
│ │ ├── tools/ # Individual tool implementations
│ │ │ ├── web_search_tool/
│ │ │ ├── web_scrape_tool/
│ │ │ ├── file_system_toolkits/
│ │ │ └── ... # Additional tools
│ │ │ └── ... # 19 tools total
│ │ ├── mcp_server.py # HTTP MCP server
│ │ └── __init__.py
│ ├── pyproject.toml # Package metadata
│ ├── requirements.txt # Python dependencies
│ └── README.md # Tools documentation
├── exports/ # AGENT PACKAGES (user-created, gitignored)
── your_agent_name/ # Created via /hive-create
├── examples/ # Example agents
── templates/ # Pre-built template agents
├── exports/ # AGENT PACKAGES
── support_ticket_agent/ # Example: Support ticket handler
├── market_research_agent/ # Example: Market research
│ ├── outbound_sales_agent/ # Example: Sales outreach
── personal_assistant_agent/ # Example: Personal assistant
│ └── ... # More agent examples
├── docs/ # Documentation
│ ├── getting-started.md # Quick start guide
│ ├── configuration.md # Configuration reference
│ ├── architecture/ # System architecture
── articles/ # Technical articles
│ ├── quizzes/ # Developer quizzes
│ └── i18n/ # Translations
│ ├── architecture.md # System architecture
── articles/ # Technical articles
├── scripts/ # Utility scripts
── auto-close-duplicates.ts # GitHub duplicate issue closer
├── scripts/ # Build & utility scripts
── setup-python.sh # Python environment setup
│ └── setup.sh # Legacy setup script
├── .agent/ # Antigravity IDE: mcp_config.json + skills (symlinks)
├── quickstart.sh # Interactive setup wizard
├── quickstart.sh # Install Claude Code skills
├── ENVIRONMENT_SETUP.md # Complete Python setup guide
├── README.md # Project overview
├── DEVELOPER.md # This file
├── CONTRIBUTING.md # Contribution guidelines
├── CHANGELOG.md # Version history
├── ROADMAP.md # Product roadmap
├── LICENSE # Apache 2.0 License
├── docs/CODE_OF_CONDUCT.md # Community guidelines
├── CODE_OF_CONDUCT.md # Community guidelines
└── SECURITY.md # Security policy
```
@@ -260,10 +213,10 @@ The fastest way to build agents is using the Claude Code skills:
./quickstart.sh
# Build a new agent
claude> /hive
claude> /building-agents
# Test the agent
claude> /hive-test
claude> /testing-agent
```
### Agent Development Workflow
@@ -271,14 +224,14 @@ claude> /hive-test
1. **Define Your Goal**
```
claude> /hive
claude> /building-agents
Enter goal: "Build an agent that processes customer support tickets"
```
2. **Design the Workflow**
- The skill guides you through defining nodes
- Each node is a unit of work (LLM call with event_loop)
- Each node is a unit of work (LLM call, function, router)
- Edges define how execution flows
3. **Generate the Agent**
@@ -289,12 +242,12 @@ claude> /hive-test
4. **Validate the Agent**
```bash
PYTHONPATH=exports uv run python -m your_agent_name validate
PYTHONPATH=core:exports python -m your_agent_name validate
```
5. **Test the Agent**
```
claude> /hive-test
claude> /testing-agent
```
### Manual Agent Development
@@ -314,7 +267,7 @@ If you prefer to build agents manually:
{
"node_id": "analyze",
"name": "Analyze Ticket",
"node_type": "event_loop",
"node_type": "llm_generate",
"system_prompt": "Analyze this support ticket...",
"input_keys": ["ticket_content"],
"output_keys": ["category", "priority"]
@@ -334,19 +287,22 @@ If you prefer to build agents manually:
### Running Agents
```bash
# Browse and run agents interactively (Recommended)
hive tui
# Validate agent structure
PYTHONPATH=core:exports python -m agent_name validate
# Run a specific agent
hive run exports/my_agent --input '{"ticket_content": "My login is broken", "customer_id": "CUST-123"}'
# Show agent information
PYTHONPATH=core:exports python -m agent_name info
# Run with TUI dashboard
hive run exports/my_agent --tui
# Run agent with input
PYTHONPATH=core:exports python -m agent_name run --input '{
"ticket_content": "My login is broken",
"customer_id": "CUST-123"
}'
# Run in mock mode (no LLM calls)
PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
```
> **Using Python directly:** `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
---
## Testing Agents
@@ -355,7 +311,7 @@ hive run exports/my_agent --tui
```bash
# Run tests for an agent
claude> /hive-test
claude> /testing-agent
```
This generates and runs:
@@ -368,17 +324,17 @@ This generates and runs:
```bash
# Run all tests for an agent
PYTHONPATH=exports uv run python -m agent_name test
PYTHONPATH=core:exports python -m agent_name test
# Run specific test type
PYTHONPATH=exports uv run python -m agent_name test --type constraint
PYTHONPATH=exports uv run python -m agent_name test --type success
PYTHONPATH=core:exports python -m agent_name test --type constraint
PYTHONPATH=core:exports python -m agent_name test --type success
# Run with parallel execution
PYTHONPATH=exports uv run python -m agent_name test --parallel 4
PYTHONPATH=core:exports python -m agent_name test --parallel 4
# Fail fast (stop on first failure)
PYTHONPATH=exports uv run python -m agent_name test --fail-fast
PYTHONPATH=core:exports python -m agent_name test --fail-fast
```
### Writing Custom Tests
@@ -409,7 +365,7 @@ def test_ticket_categorization():
- **PEP 8** - Follow Python style guide
- **Type hints** - Use for function signatures and class attributes
- **Docstrings** - Document classes and public functions
- **Ruff** - Linter and formatter (run with `make check`)
- **Black** - Code formatter (run with `black .`)
```python
# Good
@@ -543,8 +499,8 @@ chore(deps): update React to 18.2.0
1. Create a feature branch from `main`
2. Make your changes with clear commits
3. Run tests locally: `make test`
4. Run linting: `make check`
3. Run tests locally: `npm run test`
4. Run linting: `npm run lint`
5. Push and create a PR
6. Fill out the PR template
7. Request review from CODEOWNERS
@@ -553,6 +509,66 @@ chore(deps): update React to 18.2.0
---
## Debugging
### Frontend Debugging
**React Developer Tools:**
1. Install the [React DevTools browser extension](https://react.dev/learn/react-developer-tools)
2. Open browser DevTools → React tab
3. Inspect component tree, props, state, and hooks
**VS Code Debugging:**
1. Add Chrome debug configuration to `.vscode/launch.json`:
```json
{
"type": "chrome",
"request": "launch",
"name": "Debug Frontend",
"url": "http://localhost:3000",
"webRoot": "${workspaceFolder}/honeycomb/src"
}
```
2. Start the dev server: `npm run dev -w honeycomb`
3. Press F5 in VS Code
### Backend Debugging
**VS Code Debugging:**
1. Add Node debug configuration:
```json
{
"type": "node",
"request": "launch",
"name": "Debug Backend",
"runtimeExecutable": "npm",
"runtimeArgs": ["run", "dev"],
"cwd": "${workspaceFolder}/hive",
"console": "integratedTerminal"
}
```
2. Set breakpoints in your code
3. Press F5 to start debugging
**Logging:**
```typescript
import { logger } from "../utils/logger";
// Add debug logs
logger.debug("Processing request", {
userId: req.user.id,
body: req.body,
});
```
---
## Common Tasks
@@ -562,18 +578,23 @@ chore(deps): update React to 18.2.0
```bash
# Add to core framework
cd core
uv add <package>
pip install <package>
# Then add to requirements.txt or pyproject.toml
# Add to tools package
cd tools
uv add <package>
pip install <package>
# Then add to requirements.txt or pyproject.toml
# Reinstall in editable mode
pip install -e .
```
### Creating a New Agent
```bash
# Option 1: Use Claude Code skill (recommended)
claude> /hive
claude> /building-agents
# Option 2: Create manually
# Note: exports/ is initially empty (gitignored). Create your agent directory:
@@ -610,7 +631,7 @@ def my_custom_tool(param1: str, param2: int) -> Dict[str, Any]:
"nodes": [
{
"node_id": "use_tool",
"node_type": "event_loop",
"node_type": "function",
"tools": ["my_custom_tool"],
...
}
@@ -659,10 +680,16 @@ echo 'ANTHROPIC_API_KEY=your-key-here' >> .env
### Debugging Agent Execution
```bash
# Run with verbose output
hive run exports/my_agent --verbose --input '{"task": "..."}'
```python
# Add debug logging to your agent
import logging
logging.basicConfig(level=logging.DEBUG)
# Run with verbose output
PYTHONPATH=core:exports python -m agent_name run --input '{...}' --verbose
# Use mock mode to test without LLM calls
PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
```
---
@@ -679,19 +706,63 @@ lsof -i :4000
# Kill process
kill -9 <PID>
# Or change ports in config.yaml and regenerate
```
### Node Modules Issues
```bash
# Clean everything and reinstall
npm run clean
rm -rf node_modules package-lock.json
npm install
```
### Docker Issues
```bash
# Reset Docker state
docker compose down -v
docker system prune -f
docker compose build --no-cache
docker compose up
```
### TypeScript Errors After Pull
```bash
# Rebuild TypeScript
npm run build
# Or restart TS server in VS Code
# Cmd/Ctrl + Shift + P → "TypeScript: Restart TS Server"
```
### Environment Variables Not Loading
```bash
# Verify .env file exists at project root
# Regenerate from config.yaml
npm run generate:env
# Verify files exist
cat .env
cat honeycomb/.env
cat hive/.env
# Or check shell environment
echo $ANTHROPIC_API_KEY
# Restart dev servers after changing env
```
# Create .env if needed
# Then add your API keys
### Tests Failing
```bash
# Run with verbose output
npm run test -w honeycomb -- --reporter=verbose
# Run single test file
npm run test -w honeycomb -- src/components/Button.test.tsx
# Clear test cache
npm run test -w honeycomb -- --clearCache
```
---
+346
View File
@@ -0,0 +1,346 @@
# Agent Development Environment Setup
Complete setup guide for building and running goal-driven agents with the Aden Agent Framework.
## Quick Setup
```bash
# Run the automated setup script
./scripts/setup-python.sh
```
This will:
- Check Python version (requires 3.11+)
- Install the core framework package (`framework`)
- Install the tools package (`aden_tools`)
- Fix package compatibility issues (openai + litellm)
- Verify all installations
## Manual Setup (Alternative)
If you prefer to set up manually or the script fails:
### 1. Install Core Framework
```bash
cd core
pip install -e .
```
### 2. Install Tools Package
```bash
cd tools
pip install -e .
```
### 3. Upgrade OpenAI Package
```bash
# litellm requires openai >= 1.0.0
pip install --upgrade "openai>=1.0.0"
```
### 4. Verify Installation
```bash
python -c "import framework; print('✓ framework OK')"
python -c "import aden_tools; print('✓ aden_tools OK')"
python -c "import litellm; print('✓ litellm OK')"
```
## Requirements
### Python Version
- **Minimum:** Python 3.11
- **Recommended:** Python 3.11 or 3.12
- **Tested on:** Python 3.11, 3.12, 3.13
### System Requirements
- pip (latest version)
- 2GB+ RAM
- Internet connection (for LLM API calls)
### API Keys (Optional)
For running agents with real LLMs:
```bash
export ANTHROPIC_API_KEY="your-key-here"
```
## Running Agents
All agent commands must be run from the project root with `PYTHONPATH` set:
```bash
# From /hive/ directory
PYTHONPATH=core:exports python -m agent_name COMMAND
```
### Example: Support Ticket Agent
```bash
# Validate agent structure
PYTHONPATH=core:exports python -m support_ticket_agent validate
# Show agent information
PYTHONPATH=core:exports python -m support_ticket_agent info
# Run agent with input
PYTHONPATH=core:exports python -m support_ticket_agent run --input '{
"ticket_content": "My login is broken. Error 401.",
"customer_id": "CUST-123",
"ticket_id": "TKT-456"
}'
# Run in mock mode (no LLM calls)
PYTHONPATH=core:exports python -m support_ticket_agent run --mock --input '{...}'
```
### Example: Other Agents
```bash
# Market Research Agent
PYTHONPATH=core:exports python -m market_research_agent info
# Outbound Sales Agent
PYTHONPATH=core:exports python -m outbound_sales_agent validate
# Personal Assistant Agent
PYTHONPATH=core:exports python -m personal_assistant_agent run --input '{...}'
```
## Building New Agents
Use Claude Code CLI with the agent building skills:
### 1. Install Skills (One-time)
```bash
./quickstart.sh
```
This installs:
- `/building-agents` - Build new agents
- `/testing-agent` - Test agents
### 2. Build an Agent
```
claude> /building-agents
```
Follow the prompts to:
1. Define your agent's goal
2. Design the workflow nodes
3. Connect edges
4. Generate the agent package
### 3. Test Your Agent
```
claude> /testing-agent
```
Creates comprehensive test suites for your agent.
## Troubleshooting
### "ModuleNotFoundError: No module named 'framework'"
**Solution:** Install the core package:
```bash
cd core && pip install -e .
```
### "ModuleNotFoundError: No module named 'aden_tools'"
**Solution:** Install the tools package:
```bash
cd tools && pip install -e .
```
Or run the setup script:
```bash
./scripts/setup-python.sh
```
### "ModuleNotFoundError: No module named 'openai.\_models'"
**Cause:** Outdated `openai` package (0.27.x) incompatible with `litellm`
**Solution:** Upgrade openai:
```bash
pip install --upgrade "openai>=1.0.0"
```
### "No module named 'support_ticket_agent'"
**Cause:** Not running from project root or missing PYTHONPATH
**Solution:** Ensure you're in `/home/timothy/oss/hive/` and use:
```bash
PYTHONPATH=core:exports python -m support_ticket_agent validate
```
### Agent imports fail with "broken installation"
**Symptom:** `pip list` shows packages pointing to non-existent directories
**Solution:** Reinstall packages properly:
```bash
# Remove broken installations
pip uninstall -y framework tools
# Reinstall correctly
./scripts/setup-python.sh
```
## Package Structure
The Hive framework consists of three Python packages:
```
hive/
├── core/ # Core framework (runtime, graph executor, LLM providers)
│ ├── framework/
│ ├── pyproject.toml
│ └── requirements.txt
├── tools/ # Tools and MCP servers
│ ├── src/
│ │ └── aden_tools/ # Actual package location
│ ├── pyproject.toml
│ └── README.md
└── exports/ # Agent packages (your agents go here)
├── support_ticket_agent/
├── market_research_agent/
├── outbound_sales_agent/
└── personal_assistant_agent/
```
### Why PYTHONPATH is Required
The packages are installed in **editable mode** (`pip install -e`), which means:
- `framework` and `aden_tools` are globally importable (no PYTHONPATH needed)
- `exports` is NOT installed as a package (PYTHONPATH required)
This design allows agents in `exports/` to be:
- Developed independently
- Version controlled separately
- Deployed as standalone packages
## Development Workflow
### 1. Setup (Once)
```bash
./scripts/setup-python.sh
```
### 2. Build Agent (Claude Code)
```
claude> /building-agents
Enter goal: "Build an agent that processes customer support tickets"
```
### 3. Validate Agent
```bash
PYTHONPATH=core:exports python -m support_ticket_agent validate
```
### 4. Test Agent
```
claude> /testing-agent
```
### 5. Run Agent
```bash
PYTHONPATH=core:exports python -m support_ticket_agent run --input '{...}'
```
## IDE Setup
### VSCode
Add to `.vscode/settings.json`:
```json
{
"python.analysis.extraPaths": [
"${workspaceFolder}/core",
"${workspaceFolder}/exports"
],
"python.autoComplete.extraPaths": [
"${workspaceFolder}/core",
"${workspaceFolder}/exports"
]
}
```
### PyCharm
1. Open Project Settings → Project Structure
2. Mark `core` as Sources Root
3. Mark `exports` as Sources Root
## Environment Variables
### Required for LLM Operations
```bash
export ANTHROPIC_API_KEY="sk-ant-..."
```
### Optional Configuration
```bash
# Credentials storage location (default: ~/.aden/credentials)
export ADEN_CREDENTIALS_PATH="/custom/path"
# Agent storage location (default: /tmp)
export AGENT_STORAGE_PATH="/custom/storage"
```
## Additional Resources
- **Framework Documentation:** [core/README.md](core/README.md)
- **Tools Documentation:** [tools/README.md](tools/README.md)
- **Example Agents:** [exports/](exports/)
- **Agent Building Guide:** [.claude/skills/building-agents-construction/SKILL.md](.claude/skills/building-agents-construction/SKILL.md)
- **Testing Guide:** [.claude/skills/testing-agent/SKILL.md](.claude/skills/testing-agent/SKILL.md)
## Contributing
When contributing agent packages:
1. Place agents in `exports/agent_name/`
2. Follow the standard agent structure (see existing agents)
3. Include README.md with usage instructions
4. Add tests if using `/testing-agent`
5. Document required environment variables
## Support
- **Issues:** https://github.com/adenhq/hive/issues
- **Discord:** https://discord.com/invite/MXE49hrKDk
- **Documentation:** https://docs.adenhq.com/
-34
View File
@@ -1,34 +0,0 @@
.PHONY: lint format check test install-hooks help frontend-dev frontend-build
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
lint: ## Run ruff linter and formatter (with auto-fix)
cd core && ruff check --fix .
cd tools && ruff check --fix .
cd core && ruff format .
cd tools && ruff format .
format: ## Run ruff formatter
cd core && ruff format .
cd tools && ruff format .
check: ## Run all checks without modifying files (CI-safe)
cd core && ruff check .
cd tools && ruff check .
cd core && ruff format --check .
cd tools && ruff format --check .
test: ## Run all tests
cd core && uv run python -m pytest tests/ -v
install-hooks: ## Install pre-commit hooks
uv pip install pre-commit
pre-commit install
frontend-dev: ## Start frontend dev server
cd core/frontend && npm run dev
frontend-build: ## Build frontend for production
cd core/frontend && npm run build
+41 -40
View File
@@ -3,17 +3,18 @@
</p>
<p align="center">
<a href="../../README.md">English</a> |
<a href="zh-CN.md">简体中文</a> |
<a href="es.md">Español</a> |
<a href="pt.md">Português</a> |
<a href="ja.md">日本語</a> |
<a href="ru.md">Русский</a> |
<a href="ko.md">한국어</a>
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
[![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/hive?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
[![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
[![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
@@ -29,7 +30,7 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-Tools-00ADD8?style=flat-square" alt="MCP" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## Descripción General
@@ -41,7 +42,7 @@ Visita [adenhq.com](https://adenhq.com) para documentación completa, ejemplos y
## ¿Qué es Aden?
<p align="center">
<img width="100%" alt="Aden Architecture" src="../assets/aden-architecture-diagram.jpg" />
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden es una plataforma para construir, desplegar, operar y adaptar agentes de IA:
@@ -65,6 +66,7 @@ Aden es una plataforma para construir, desplegar, operar y adaptar agentes de IA
### Prerrequisitos
- [Python 3.11+](https://www.python.org/downloads/) - Para desarrollo de agentes
- [Docker](https://docs.docker.com/get-docker/) (v20.10+) - Opcional, para herramientas en contenedores
### Instalación
@@ -74,11 +76,10 @@ git clone https://github.com/adenhq/hive.git
cd hive
# Ejecutar configuración del entorno Python
./quickstart.sh
./scripts/setup-python.sh
```
Esto instala:
- **framework** - Runtime del agente principal y ejecutor de grafos
- **aden_tools** - 19 herramientas MCP para capacidades de agentes
- Todas las dependencias requeridas
@@ -90,16 +91,16 @@ Esto instala:
./quickstart.sh
# Construir un agente usando Claude Code
claude> /hive
claude> /building-agents
# Probar tu agente
claude> /hive-test
claude> /testing-agent
# Ejecutar tu agente
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 Guía de Configuración Completa](../environment-setup.md)** - Instrucciones detalladas para desarrollo de agentes
**[📖 Guía de Configuración Completa](ENVIRONMENT_SETUP.md)** - Instrucciones detalladas para desarrollo de agentes
## Características
@@ -119,7 +120,7 @@ Los frameworks de agentes tradicionales requieren que diseñes manualmente flujo
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>Event Loop"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
@@ -163,14 +164,14 @@ flowchart LR
### La Ventaja de Aden
| Frameworks Tradicionales | Aden |
| ----------------------------------------- | -------------------------------------------- |
| Codificar flujos de trabajo de agentes | Describir objetivos en lenguaje natural |
| Definición manual de grafos | Grafos de agentes auto-generados |
| Manejo reactivo de errores | Auto-evolución proactiva |
| Configuraciones de herramientas estáticas | Nodos dinámicos envueltos en SDK |
| Configuración de monitoreo separada | Observabilidad en tiempo real integrada |
| Gestión de presupuesto DIY | Controles de costos y degradación integrados |
| Frameworks Tradicionales | Aden |
|--------------------------|------|
| Codificar flujos de trabajo de agentes | Describir objetivos en lenguaje natural |
| Definición manual de grafos | Grafos de agentes auto-generados |
| Manejo reactivo de errores | Auto-evolución proactiva |
| Configuraciones de herramientas estáticas | Nodos dinámicos envueltos en SDK |
| Configuración de monitoreo separada | Observabilidad en tiempo real integrada |
| Gestión de presupuesto DIY | Controles de costos y degradación integrados |
### Cómo Funciona
@@ -214,7 +215,10 @@ hive/
├── docs/ # Documentación y guías
├── scripts/ # Scripts de construcción y utilidades
├── .claude/ # Habilidades de Claude Code para construir agentes
├── ENVIRONMENT_SETUP.md # Guía de configuración de Python para desarrollo de agentes
├── DEVELOPER.md # Guía del desarrollador
├── CONTRIBUTING.md # Directrices de contribución
└── ROADMAP.md # Hoja de ruta del producto
```
## Desarrollo
@@ -225,7 +229,7 @@ Para construir y ejecutar agentes orientados a objetivos con el framework:
```bash
# Configuración única
./quickstart.sh
./scripts/setup-python.sh
# Esto instala:
# - paquete framework (runtime principal)
@@ -233,29 +237,29 @@ Para construir y ejecutar agentes orientados a objetivos con el framework:
# - Todas las dependencias
# Construir nuevos agentes usando habilidades de Claude Code
claude> /hive
claude> /building-agents
# Probar agentes
claude> /hive-test
claude> /testing-agent
# Ejecutar agentes
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
Consulta [environment-setup.md](../environment-setup.md) para instrucciones de configuración completas.
Consulta [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) para instrucciones de configuración completas.
## Documentación
- **[Guía del Desarrollador](../developer-guide.md)** - Guía completa para desarrolladores
- **[Guía del Desarrollador](DEVELOPER.md)** - Guía completa para desarrolladores
- [Primeros Pasos](docs/getting-started.md) - Instrucciones de configuración rápida
- [Guía de Configuración](docs/configuration.md) - Todas las opciones de configuración
- [Visión General de Arquitectura](docs/architecture/README.md) - Diseño y estructura del sistema
- [Visión General de Arquitectura](docs/architecture.md) - Diseño y estructura del sistema
## Hoja de Ruta
El Framework de Agentes Aden tiene como objetivo ayudar a los desarrolladores a construir agentes auto-adaptativos orientados a resultados. Encuentra nuestra hoja de ruta aquí
[roadmap.md](../roadmap.md)
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
@@ -285,14 +289,11 @@ Usamos [Discord](https://discord.com/invite/MXE49hrKDk) para soporte, solicitude
¡Damos la bienvenida a las contribuciones! Por favor consulta [CONTRIBUTING.md](CONTRIBUTING.md) para las directrices.
**Importante:** Por favor, solicita que se te asigne un issue antes de enviar un PR. Comenta en el issue para reclamarlo y un mantenedor te lo asignará en 24 horas. Esto ayuda a evitar trabajo duplicado.
1. Encuentra o crea un issue y solicita asignación
2. Haz fork del repositorio
3. Crea tu rama de funcionalidad (`git checkout -b feature/amazing-feature`)
4. Haz commit de tus cambios (`git commit -m 'Add amazing feature'`)
5. Haz push a la rama (`git push origin feature/amazing-feature`)
6. Abre un Pull Request
1. Haz fork del repositorio
2. Crea tu rama de funcionalidad (`git checkout -b feature/amazing-feature`)
3. Haz commit de tus cambios (`git commit -m 'Add amazing feature'`)
4. Haz push a la rama (`git push origin feature/amazing-feature`)
5. Abre un Pull Request
## Únete a Nuestro Equipo
+72 -73
View File
@@ -3,13 +3,13 @@
</p>
<p align="center">
<a href="../../README.md">English</a> |
<a href="zh-CN.md">简体中文</a> |
<a href="es.md">Español</a> |
<a href="pt.md">Português</a> |
<a href="ja.md">日本語</a> |
<a href="ru.md">Русский</a> |
<a href="ko.md">한국어</a>
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -30,33 +30,33 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-Tools-00ADD8?style=flat-square" alt="MCP" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## 概要
ワークフローをハードコーディングせずに、信頼性の高い自己改善型 AI エージェントを構築できます。コーディングエージェントとの会話を通じて目標を定義すると、フレームワークが動的に作成された接続コードを持つノードグラフを生成します。問題が発生すると、フレームワークは障害データをキャプチャし、コーディングエージェントを通じてエージェントを進化させ、再デプロイします。組み込みのヒューマンインザループノード、認証情報管理、リアルタイムモニタリングにより、適応性を損なうことなく制御を維持できます。
ワークフローをハードコーディングせずに、信頼性の高い自己改善型AIエージェントを構築できます。コーディングエージェントとの会話を通じて目標を定義すると、フレームワークが動的に作成された接続コードを持つノードグラフを生成します。問題が発生すると、フレームワークは障害データをキャプチャし、コーディングエージェントを通じてエージェントを進化させ、再デプロイします。組み込みのヒューマンインザループノード、認証情報管理、リアルタイムモニタリングにより、適応性を損なうことなく制御を維持できます。
完全なドキュメント、例、ガイドについては [adenhq.com](https://adenhq.com) をご覧ください。
## Aden とは
## Adenとは
<p align="center">
<img width="100%" alt="Aden Architecture" src="../assets/aden-architecture-diagram.jpg" />
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden は、AI エージェントの構築、デプロイ、運用、適応のためのプラットフォームです:
Adenは、AIエージェントの構築、デプロイ、運用、適応のためのプラットフォームです:
- **構築** - コーディングエージェントが自然言語の目標から専門的なワーカーエージェント(セールス、マーケティング、オペレーション)を生成
- **デプロイ** - CI/CD 統合と完全な API ライフサイクル管理を備えたヘッドレスデプロイメント
- **デプロイ** - CI/CD統合と完全なAPIライフサイクル管理を備えたヘッドレスデプロイメント
- **運用** - リアルタイムモニタリング、可観測性、ランタイムガードレールがエージェントの信頼性を維持
- **適応** - 継続的な評価、監督、適応により、エージェントは時間とともに改善
- **インフラ** - 共有メモリ、LLM 統合、ツール、スキルがすべてのエージェントを支援
- **インフラ** - 共有メモリ、LLM統合、ツール、スキルがすべてのエージェントを支援
## クイックリンク
- **[ドキュメント](https://docs.adenhq.com/)** - 完全なガイドと API リファレンス
- **[セルフホスティングガイド](https://docs.adenhq.com/getting-started/quickstart)** - インフラストラクチャへの Hive デプロイ
- **[ドキュメント](https://docs.adenhq.com/)** - 完全なガイドとAPIリファレンス
- **[セルフホスティングガイド](https://docs.adenhq.com/getting-started/quickstart)** - インフラストラクチャへのHiveデプロイ
- **[変更履歴](https://github.com/adenhq/hive/releases)** - 最新の更新とリリース
<!-- - **[ロードマップ](https://adenhq.com/roadmap)** - 今後の機能と計画 -->
- **[問題を報告](https://github.com/adenhq/hive/issues)** - バグレポートと機能リクエスト
@@ -76,13 +76,12 @@ git clone https://github.com/adenhq/hive.git
cd hive
# Python環境セットアップを実行
./quickstart.sh
./scripts/setup-python.sh
```
これにより以下がインストールされます:
- **framework** - コアエージェントランタイムとグラフエグゼキュータ
- **aden_tools** - エージェント機能のための 19 個の MCP ツール
- **aden_tools** - エージェント機能のための19個のMCPツール
- すべての必要な依存関係
### 最初のエージェントを構築
@@ -92,36 +91,36 @@ cd hive
./quickstart.sh
# Claude Codeを使用してエージェントを構築
claude> /hive
claude> /building-agents
# エージェントをテスト
claude> /hive-test
claude> /testing-agent
# エージェントを実行
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 完全セットアップガイド](../environment-setup.md)** - エージェント開発の詳細な手順
**[📖 完全セットアップガイド](ENVIRONMENT_SETUP.md)** - エージェント開発の詳細な手順
## 機能
- **目標駆動開発** - 自然言語で目標を定義;コーディングエージェントがそれを達成するためのエージェントグラフと接続コードを生成
- **自己適応エージェント** - フレームワークが障害をキャプチャし、目標を更新し、エージェントグラフを更新
- **動的ノード接続** - 事前定義されたエッジなし;接続コードは目標に基づいて任意の対応 LLM によって生成
- **SDK ラップノード** - すべてのノードが共有メモリ、ローカル RLM メモリ、モニタリング、ツール、LLM アクセスを標準装備
- **動的ノード接続** - 事前定義されたエッジなし;接続コードは目標に基づいて任意の対応LLMによって生成
- **SDKラップノード** - すべてのノードが共有メモリ、ローカルRLMメモリ、モニタリング、ツール、LLMアクセスを標準装備
- **ヒューマンインザループ** - 設定可能なタイムアウトとエスカレーションを備えた、人間の入力のために実行を一時停止する介入ノード
- **リアルタイム可観測性** - エージェント実行、決定、ノード間通信のライブモニタリングのための WebSocket ストリーミング
- **リアルタイム可観測性** - エージェント実行、決定、ノード間通信のライブモニタリングのためのWebSocketストリーミング
- **コストと予算管理** - 支出制限、スロットル、自動モデル劣化ポリシーを設定
- **本番環境対応** - セルフホスト可能、スケールと信頼性のために構築
## なぜ Aden
## なぜAdenか
従来のエージェントフレームワークでは、ワークフローを手動で設計し、エージェントの相互作用を定義し、障害を事後的に処理する必要があります。Aden はこのパラダイムを逆転させます—**結果を記述すれば、システムが自ら構築します**。
従来のエージェントフレームワークでは、ワークフローを手動で設計し、エージェントの相互作用を定義し、障害を事後的に処理する必要があります。Adenはこのパラダイムを逆転させます—**結果を記述すれば、システムが自ら構築します**。
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>Event Loop"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
@@ -163,34 +162,34 @@ flowchart LR
style STORE fill:#ed8c00,stroke:#cc5d00,stroke-width:2px,color:#fff
```
### Aden の優位性
### Adenの優位性
| 従来のフレームワーク | Aden |
| -------------------------------------- | -------------------------------- |
| エージェントワークフローをハードコード | 自然言語で目標を記述 |
| 手動でグラフを定義 | 自動生成されるエージェントグラフ |
| 事後的なエラー処理 | プロアクティブな自己進化 |
| 静的なツール設定 | 動的な SDK ラップノード |
| 別途モニタリング設定 | 組み込みのリアルタイム可観測性 |
| DIY 予算管理 | 統合されたコスト制御と劣化 |
| 従来のフレームワーク | Aden |
|----------------------|------|
| エージェントワークフローをハードコード | 自然言語で目標を記述 |
| 手動でグラフを定義 | 自動生成されるエージェントグラフ |
| 事後的なエラー処理 | プロアクティブな自己進化 |
| 静的なツール設定 | 動的なSDKラップノード |
| 別途モニタリング設定 | 組み込みのリアルタイム可観測性 |
| DIY予算管理 | 統合されたコスト制御と劣化 |
### 仕組み
1. **目標を定義** → 達成したいことを平易な言葉で記述
2. **コーディングエージェントが生成** → エージェントグラフ、接続コード、テストケースを作成
3. **ワーカーが実行** → SDK ラップノードが完全な可観測性とツールアクセスで実行
3. **ワーカーが実行** → SDKラップノードが完全な可観測性とツールアクセスで実行
4. **コントロールプレーンが監視** → リアルタイムメトリクス、予算執行、ポリシー管理
5. **自己改善** → 障害時、システムがグラフを進化させ自動的に再デプロイ
## Aden の比較
## Adenの比較
Aden はエージェント開発に根本的に異なるアプローチを採用しています。ほとんどのフレームワークがワークフローをハードコードするか、エージェントグラフを手動で定義することを要求するのに対し、Aden は**コーディングエージェントを使用して自然言語の目標からエージェントシステム全体を生成**します。エージェントが失敗した場合、フレームワークは単にエラーをログに記録するだけでなく—**自動的にエージェントグラフを進化させ**、再デプロイします。
Adenはエージェント開発に根本的に異なるアプローチを採用しています。ほとんどのフレームワークがワークフローをハードコードするか、エージェントグラフを手動で定義することを要求するのに対し、Adenは**コーディングエージェントを使用して自然言語の目標からエージェントシステム全体を生成**します。エージェントが失敗した場合、フレームワークは単にエラーをログに記録するだけでなく—**自動的にエージェントグラフを進化させ**、再デプロイします。
> **注意:** 詳細なフレームワーク比較表とよくある質問については、英語の[README.md](README.md)を参照してください。
### Aden を選ぶべきとき
### Adenを選ぶべきとき
Aden を選択する場合:
Adenを選択する場合:
- 手動介入なしに**失敗から自己改善する**エージェントが必要
- ワークフローではなく結果を記述する**目標駆動開発**が必要
@@ -201,7 +200,7 @@ Aden を選択する場合:
他のフレームワークを選択する場合:
- **型安全で予測可能なワークフロー**PydanticAI、Mastra
- **RAG とドキュメント処理**LlamaIndex、Haystack
- **RAGとドキュメント処理**LlamaIndex、Haystack
- **エージェント創発の研究**CAMEL
- **リアルタイム音声/マルチモーダル**TEN Framework
- **シンプルなコンポーネント連鎖**LangChain、Swarm
@@ -216,18 +215,21 @@ hive/
├── docs/ # ドキュメントとガイド
├── scripts/ # ビルドとユーティリティスクリプト
├── .claude/ # エージェント構築用のClaude Codeスキル
├── ENVIRONMENT_SETUP.md # エージェント開発用のPythonセットアップガイド
├── DEVELOPER.md # 開発者ガイド
├── CONTRIBUTING.md # 貢献ガイドライン
└── ROADMAP.md # プロダクトロードマップ
```
## 開発
### Python エージェント開発
### Pythonエージェント開発
フレームワークで目標駆動エージェントを構築および実行するには:
```bash
# 1回限りのセットアップ
./quickstart.sh
./scripts/setup-python.sh
# これにより以下がインストールされます:
# - frameworkパッケージ(コアランタイム)
@@ -235,29 +237,29 @@ hive/
# - すべての依存関係
# Claude Codeスキルを使用して新しいエージェントを構築
claude> /hive
claude> /building-agents
# エージェントをテスト
claude> /hive-test
claude> /testing-agent
# エージェントを実行
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
完全なセットアップ手順については、[environment-setup.md](../environment-setup.md)を参照してください。
完全なセットアップ手順については、[ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md)を参照してください。
## ドキュメント
- **[開発者ガイド](../developer-guide.md)** - 開発者向け総合ガイド
- **[開発者ガイド](DEVELOPER.md)** - 開発者向け総合ガイド
- [はじめに](docs/getting-started.md) - クイックセットアップ手順
- [設定ガイド](docs/configuration.md) - すべての設定オプション
- [アーキテクチャ概要](docs/architecture/README.md) - システム設計と構造
- [アーキテクチャ概要](docs/architecture.md) - システム設計と構造
## ロードマップ
Aden エージェントフレームワークは、開発者が結果志向で自己適応するエージェントを構築できるよう支援することを目指しています。ロードマップはこちらをご覧ください
Adenエージェントフレームワークは、開発者が結果志向で自己適応するエージェントを構築できるよう支援することを目指しています。ロードマップはこちらをご覧ください
[roadmap.md](../roadmap.md)
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
@@ -287,14 +289,11 @@ timeline
貢献を歓迎します!ガイドラインについては[CONTRIBUTING.md](CONTRIBUTING.md)をご覧ください。
**重要:** PR を提出する前に、まず Issue にアサインされてください。Issue にコメントして担当を申請すると、メンテナーが 24 時間以内にアサインします。これにより重複作業を防ぐことができます。
1. Issue を見つけるか作成し、アサインを受ける
2. リポジトリをフォーク
3. 機能ブランチを作成 (`git checkout -b feature/amazing-feature`)
4. 変更をコミット (`git commit -m 'Add amazing feature'`)
5. ブランチにプッシュ (`git push origin feature/amazing-feature`)
6. プルリクエストを開く
1. リポジトリをフォーク
2. 機能ブランチを作成 (`git checkout -b feature/amazing-feature`)
3. 変更をコミット (`git commit -m 'Add amazing feature'`)
4. ブランチにプッシュ (`git push origin feature/amazing-feature`)
5. プルリクエストを開く
## チームに参加
@@ -308,31 +307,31 @@ timeline
## ライセンス
このプロジェクトは Apache License 2.0 の下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルをご覧ください。
このプロジェクトはApache License 2.0の下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルをご覧ください。
## よくある質問 (FAQ)
> **注意:** よくある質問の完全版については、英語の[README.md](README.md)を参照してください。
**Q: AdenLangChain や他のエージェントフレームワークに依存していますか?**
**Q: AdenLangChainや他のエージェントフレームワークに依存していますか?**
いいえ。AdenLangChain、CrewAI、その他のエージェントフレームワークに依存せずにゼロから構築されています。フレームワークは軽量で柔軟に設計されており、事前定義されたコンポーネントに依存するのではなく、エージェントグラフを動的に生成します。
いいえ。AdenLangChain、CrewAI、その他のエージェントフレームワークに依存せずにゼロから構築されています。フレームワークは軽量で柔軟に設計されており、事前定義されたコンポーネントに依存するのではなく、エージェントグラフを動的に生成します。
**Q: Aden はどの LLM プロバイダーをサポートしていますか?**
**Q: AdenはどのLLMプロバイダーをサポートしていますか?**
AdenLiteLLM 統合を通じて 100 以上の LLM プロバイダーをサポートしており、OpenAIGPT-4、GPT-4o)、AnthropicClaude モデル)、Google Gemini、Mistral、Groq などが含まれます。適切な API キー環境変数を設定し、モデル名を指定するだけです。
AdenLiteLLM統合を通じて100以上のLLMプロバイダーをサポートしており、OpenAIGPT-4、GPT-4o)、AnthropicClaudeモデル)、Google Gemini、Mistral、Groqなどが含まれます。適切なAPIキー環境変数を設定し、モデル名を指定するだけです。
**Q: Aden はオープンソースですか?**
**Q: Adenはオープンソースですか?**
はい、AdenApache License 2.0 の下で完全にオープンソースです。コミュニティの貢献とコラボレーションを積極的に奨励しています。
はい、AdenApache License 2.0の下で完全にオープンソースです。コミュニティの貢献とコラボレーションを積極的に奨励しています。
**Q: Aden は他のエージェントフレームワークと何が違いますか?**
**Q: Adenは他のエージェントフレームワークと何が違いますか?**
Aden はコーディングエージェントを使用して自然言語の目標からエージェントシステム全体を生成します—ワークフローをハードコードしたり、グラフを手動で定義したりする必要はありません。エージェントが失敗すると、フレームワークは自動的に障害データをキャプチャし、エージェントグラフを進化させ、再デプロイします。この自己改善ループは Aden 独自のものです。
Adenはコーディングエージェントを使用して自然言語の目標からエージェントシステム全体を生成します—ワークフローをハードコードしたり、グラフを手動で定義したりする必要はありません。エージェントが失敗すると、フレームワークは自動的に障害データをキャプチャし、エージェントグラフを進化させ、再デプロイします。この自己改善ループはAden独自のものです。
**Q: Aden はヒューマンインザループワークフローをサポートしていますか?**
**Q: Adenはヒューマンインザループワークフローをサポートしていますか?**
はい、Aden は人間の入力のために実行を一時停止する介入ノードを通じて、ヒューマンインザループワークフローを完全にサポートしています。設定可能なタイムアウトとエスカレーションポリシーが含まれており、人間の専門家と AI エージェントのシームレスなコラボレーションを可能にします。
はい、Adenは人間の入力のために実行を一時停止する介入ノードを通じて、ヒューマンインザループワークフローを完全にサポートしています。設定可能なタイムアウトとエスカレーションポリシーが含まれており、人間の専門家とAIエージェントのシームレスなコラボレーションを可能にします。
---
+33 -33
View File
@@ -3,13 +3,13 @@
</p>
<p align="center">
<a href="../../README.md">English</a> |
<a href="zh-CN.md">简体中文</a> |
<a href="es.md">Español</a> |
<a href="pt.md">Português</a> |
<a href="ja.md">日本語</a> |
<a href="ru.md">Русский</a> |
<a href="ko.md">한국어</a>
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -30,7 +30,7 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-Tools-00ADD8?style=flat-square" alt="MCP" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## 개요
@@ -42,7 +42,7 @@
## Aden이란 무엇인가
<p align="center">
<img width="100%" alt="Aden Architecture" src="../assets/aden-architecture-diagram.jpg" />
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden은 AI 에이전트를 구축, 배포, 운영, 적응시키기 위한 플랫폼입니다:
@@ -76,7 +76,7 @@ git clone https://github.com/adenhq/hive.git
cd hive
# Python 환경 설정 실행
./quickstart.sh
./scripts/setup-python.sh
```
다음 요소들이 설치됩니다:
@@ -91,16 +91,16 @@ cd hive
./quickstart.sh
# Claude Code를 사용해 에이전트 빌드
claude> /hive
claude> /building-agents
# 에이전트 테스트
claude> /hive-test
claude> /testing-agent
# 에이전트 실행
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 전체 설정 가이드](../environment-setup.md)** - 에이전트 개발을 위한 상세한 설명
**[📖 전체 설정 가이드](ENVIRONMENT_SETUP.md)** - 에이전트 개발을 위한 상세한 설명
## 주요 기능
@@ -120,7 +120,7 @@ PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>Event Loop"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
@@ -226,7 +226,10 @@ hive/
├── docs/ # 문서 및 가이드
├── scripts/ # 빌드 및 유틸리티 스크립트
├── .claude/ # 에이전트 생성을 위한 Claude Code 스킬
├── ENVIRONMENT_SETUP.md # 에이전트 개발을 위한 Python 환경 설정 가이드
├── DEVELOPER.md # 개발자 가이드
├── CONTRIBUTING.md # 기여 가이드라인
└── ROADMAP.md # 제품 로드맵
```
## 개발
@@ -237,7 +240,7 @@ hive/
```bash
# 최초 1회 설정
./quickstart.sh
./scripts/setup-python.sh
# 다음 항목들이 설치됨:
# - framework 패키지 (핵심 런타임)
@@ -245,30 +248,30 @@ hive/
# - 모든 의존성
# Claude Code 스킬을 사용해 새 에이전트 생성
claude> /hive
claude> /building-agents
# 에이전트 테스트
claude> /hive-test
claude> /testing-agent
# 에이전트 실행
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
전체 설정 방법은 [environment-setup.md](../environment-setup.md) 를 참고하세요.
전체 설정 방법은 [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) 를 참고하세요.
## 문서
- **[개발자 가이드](../developer-guide.md)** - 개발자를 위한 종합 가이드
- **[개발자 가이드](DEVELOPER.md)** - 개발자를 위한 종합 가이드
- [시작하기](docs/getting-started.md) - 빠른 설정 방법
- [설정 가이드](docs/configuration.md) - 모든 설정 옵션 안내
- [아키텍처 개요](docs/architecture/README.md) - 시스템 설계 및 구조
- [아키텍처 개요](docs/architecture.md) - 시스템 설계 및 구조
## 로드맵
Aden Agent Framework는 개발자가 결과 중심(outcome-oriented) 이며 자기 적응형(self-adaptive) 에이전트를 구축할 수 있도록 돕는 것을 목표로 합니다.
자세한 로드맵은 아래 문서에서 확인할 수 있습니다.
[roadmap.md](../roadmap.md)
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
@@ -298,14 +301,11 @@ Aden은 지원, 기능 요청, 커뮤니티 토론을 위해 [Discord](https://d
기여를 환영합니다. 기여 가이드라인은 [CONTRIBUTING.md](CONTRIBUTING.md)를 참고해 주세요.
**중요:** PR을 제출하기 전에 먼저 Issue에 할당받으세요. Issue에 댓글을 달아 담당을 요청하면 유지관리자가 24시간 내에 할당해 드립니다. 이는 중복 작업을 방지하는 데 도움이 됩니다.
1. Issue를 찾거나 생성하고 할당받습니다
2. 저장소를 포크합니다
3. 기능 브랜치를 생성합니다 (`git checkout -b feature/amazing-feature`)
4. 변경 사항을 커밋합니다 (`git commit -m 'Add amazing feature'`)
5. 브랜치에 푸시합니다 (`git push origin feature/amazing-feature`)
6. Pull Request를 생성합니다
1. 저장소를 포크합니다
2. 기능 브랜치를 생성합니다 (`git checkout -b feature/amazing-feature`)
3. 변경 사항을 커밋합니다 (`git commit -m 'Add amazing feature'`)
4. 브랜치에 푸시합니다 (`git push origin feature/amazing-feature`)
5. Pull Request를 생성합니다
## 팀에 합류하세요
@@ -349,7 +349,7 @@ Aden은 모니터링과 관측성을 위해 토큰 사용량, 지연 시간 메
**Q: Aden은 어떤 배포 방식을 지원하나요?**
Aden은 Python 패키지를 통한 셀프 호스팅 배포를 지원합니다. 설치 방법은 [환경 설정 가이드](../environment-setup.md)를 참조하세요. 클라우드 배포 옵션과 Kubernetes 대응 설정은 로드맵에 포함되어 있습니다.
Aden은 기본적으로 Docker Compose 배포를 지원하며, 프로덕션 및 개발 환경 설정을 모두 제공합니다. Docker를 지원하는 모든 인프라에서 셀프 호스팅이 가능합니다. 클라우드 배포 옵션과 Kubernetes 대응 설정은 로드맵에 포함되어 있습니다.
**Q: Aden은 복잡한 프로덕션 규모의 사용 사례도 처리할 수 있나요?**
@@ -377,7 +377,7 @@ Aden은 지출 한도, 호출 제한, 자동 모델 다운그레이드 정책
**Q: 예제와 문서는 어디에서 확인할 수 있나요?**
전체 가이드, API 레퍼런스, 시작 튜토리얼은 [docs.adenhq.com](https://docs.adenhq.com/) 에서 확인하실 수 있습니다. 또한 저장소의 `docs/` 디렉터리와 종합적인 [developer-guide.md](../developer-guide.md) 가이드도 함께 제공됩니다.
전체 가이드, API 레퍼런스, 시작 튜토리얼은 [docs.adenhq.com](https://docs.adenhq.com/) 에서 확인하실 수 있습니다. 또한 저장소의 `docs/` 디렉터리와 종합적인 [DEVELOPER.md](DEVELOPER.md) 가이드도 함께 제공됩니다.
**Q: Aden에 기여하려면 어떻게 해야 하나요?**
+226 -326
View File
@@ -1,31 +1,28 @@
<p align="center">
<img width="100%" alt="Hive Banner" src="https://github.com/user-attachments/assets/a027429b-5d3c-4d34-88e4-0feaeaabbab3" />
<img width="100%" alt="Hive Banner" src="https://storage.googleapis.com/aden-prod-assets/website/aden-title-card.png" />
</p>
<p align="center">
<a href="README.md">English</a> |
<a href="docs/i18n/zh-CN.md">简体中文</a> |
<a href="docs/i18n/es.md">Español</a> |
<a href="docs/i18n/hi.md">हिन्दी</a> |
<a href="docs/i18n/pt.md">Português</a> |
<a href="docs/i18n/ja.md">日本語</a> |
<a href="docs/i18n/ru.md">Русский</a> |
<a href="docs/i18n/ko.md">한국어</a>
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
<p align="center">
<a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
<a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
<a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
<a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
<a href="https://www.linkedin.com/company/teamaden/"><img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff" alt="LinkedIn" /></a>
<img src="https://img.shields.io/badge/MCP-102_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
[![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/hive?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
[![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
[![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
<p align="center">
<img src="https://img.shields.io/badge/AI_Agents-Self--Improving-brightgreen?style=flat-square" alt="AI Agents" />
<img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
<img src="https://img.shields.io/badge/Headless-Development-purple?style=flat-square" alt="Headless" />
<img src="https://img.shields.io/badge/Goal--Driven-Development-purple?style=flat-square" alt="Goal-Driven" />
<img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
<img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
</p>
@@ -33,383 +30,264 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## Overview
Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Build reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.
https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
## What is Aden
## Who Is Hive For?
<p align="center">
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
Aden is a platform for building, deploying, operating, and adapting AI agents:
Hive is a good fit if you:
- Want AI agents that **execute real business processes**, not demos
- Prefer **goal-driven development** over hardcoded workflows
- Need **self-healing and adaptive agents** that improve over time
- Require **human-in-the-loop control**, observability, and cost limits
- Plan to run agents in **production environments**
Hive may not be the best fit if youre only experimenting with simple agent chains or one-off scripts.
## When Should You Use Hive?
Use Hive when you need:
- Long-running, autonomous agents
- Strong guardrails, process, and controls
- Continuous improvement based on failures
- Multi-agent coordination
- A framework that evolves with your goals
- **Build** - A Coding Agent generates specialized Worker Agents (Sales, Marketing, Ops) from natural language goals
- **Deploy** - Headless deployment with CI/CD integration and full API lifecycle management
- **Operate** - Real-time monitoring, observability, and runtime guardrails keep agents reliable
- **Adapt** - Continuous evaluation, supervision, and adaptation ensure agents improve over time
- **Infra** - Shared memory, LLM integrations, tools, and skills power every agent
## Quick Links
- **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
- **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
- **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
- **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
<!-- - **[Roadmap](https://adenhq.com/roadmap)** - Upcoming features and plans -->
- **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
- **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs
## Quick Start
### Prerequisites
- Python 3.11+ for agent development
- Claude Code, Codex CLI, or Cursor for utilizing agent skills
> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
- [Python 3.11+](https://www.python.org/downloads/) for agent development
- [Docker](https://docs.docker.com/get-docker/) (v20.10+) - Optional, for containerized tools
### Installation
> **Note**
> Hive uses a `uv` workspace layout and is not installed with `pip install`.
> Running `pip install -e .` from the repository root will create a placeholder package and Hive will not function correctly.
> Please use the quickstart script below to set up the environment.
```bash
# Clone the repository
git clone https://github.com/adenhq/hive.git
cd hive
# Run quickstart setup
./quickstart.sh
# Run Python environment setup
./scripts/setup-python.sh
```
This sets up:
- **framework** - Core agent runtime and graph executor (in `core/.venv`)
- **aden_tools** - MCP tools for agent capabilities (in `tools/.venv`)
- **credential store** - Encrypted API key storage (`~/.hive/credentials`)
- **LLM provider** - Interactive default model configuration
- All required Python dependencies with `uv`
This installs:
- **framework** - Core agent runtime and graph executor
- **aden_tools** - 19 MCP tools for agent capabilities
- All required dependencies
### Build Your First Agent
```bash
# Install Claude Code skills (one-time)
./quickstart.sh
# Build an agent using Claude Code
claude> /hive
claude> /building-agents
# Test your agent
claude> /hive-debugger
claude> /testing-agent
# (at separate terminal) Launch the interactive dashboard
hive tui
# Or run directly
hive run exports/your_agent_name --input '{"key": "value"}'
# Run your agent
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
## Coding Agent Support
### Codex CLI
Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
3. **Launch:** Run `codex` in the repo root, then type `use hive`
Example:
```
codex> use hive
```
### Opencode
Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
1. **Setup:** Run the quickstart script
2. **Launch:** Open Opencode in the project root.
3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
4. **Verify:** Ask the agent _"List your tools"_ to confirm the connection.
The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
### Antigravity IDE Support
Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
```bash
./scripts/setup-antigravity-mcp.sh
```
**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.
**[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
## Features
- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
- **Goal-Driven Development** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **Self-Adapting Agents** - Framework captures failures, updates objectives and updates the agent graph
- **Dynamic Node Connections** - No predefined edges; connection code is generated by any capable LLM based on your goals
- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
- **Human-in-the-Loop** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
- **Production-Ready** - Self-hostable, built for scale and reliability
## Integration
<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
Hive is built to be model-agnostic and system-agnostic.
- **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
- **Business system connectivity** - Hive Framework is designed to connect to all kinds of business systems as tools, such as CRM, support, messaging, data, file, and internal APIs via MCP.
## Why Aden
Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.
Traditional agent frameworks require you to manually design workflows, define agent interactions, and handle failures reactively. Aden flips this paradigm**you describe outcomes, and the system builds itself**.
```mermaid
flowchart LR
GOAL["Define Goal"] --> GEN["Auto-Generate Graph"]
GEN --> EXEC["Execute Agents"]
EXEC --> MON["Monitor & Observe"]
MON --> CHECK{{"Pass?"}}
CHECK -- "Yes" --> DONE["Deliver Result"]
CHECK -- "No" --> EVOLVE["Evolve Graph"]
EVOLVE --> EXEC
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
GOAL -.- V1["Natural Language"]
GEN -.- V2["Instant Architecture"]
EXEC -.- V3["Easy Integrations"]
MON -.- V4["Full visibility"]
EVOLVE -.- V5["Adaptability"]
DONE -.- V6["Reliable outcomes"]
subgraph EXPORT["📦 EXPORT"]
direction TB
JSON["agent.json<br/>(GraphSpec)"]
TOOLS["tools.py<br/>(Functions)"]
MCP["mcp_servers.json<br/>(Integrations)"]
end
style GOAL fill:#ffbe42,stroke:#cc5d00,stroke-width:2px,color:#333
style GEN fill:#ffb100,stroke:#cc5d00,stroke-width:2px,color:#333
style EXEC fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
style MON fill:#ff9800,stroke:#cc5d00,stroke-width:2px,color:#fff
style CHECK fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
style DONE fill:#4caf50,stroke:#2e7d32,stroke-width:2px,color:#fff
style EVOLVE fill:#e8763d,stroke:#cc5d00,stroke-width:2px,color:#fff
style V1 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
style V2 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
style V3 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
style V4 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
style V5 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
style V6 fill:#fff,stroke:#ed8c00,stroke-width:1px,color:#cc5d00
subgraph RUN["🚀 RUNTIME"]
LOAD["AgentRunner<br/>Load + Parse"] --> SETUP["Setup Runtime<br/>+ ToolRegistry"]
SETUP --> EXEC["GraphExecutor<br/>Execute Nodes"]
subgraph DECISION["Decision Recording"]
DEC1["runtime.decide()<br/>intent → options → choice"]
DEC2["runtime.record_outcome()<br/>success, result, metrics"]
end
end
subgraph INFRA["⚙️ INFRASTRUCTURE"]
CTX["NodeContext<br/>memory • llm • tools"]
STORE[("FileStorage<br/>Runs & Decisions")]
end
APPROVE --> EXPORT
EXPORT --> LOAD
EXEC --> DECISION
EXEC --> CTX
DECISION --> STORE
STORE -.->|"Analyze & Improve"| NODES
style BUILD fill:#ffbe42,stroke:#cc5d00,stroke-width:3px,color:#333
style EXPORT fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
style RUN fill:#ffb100,stroke:#cc5d00,stroke-width:3px,color:#333
style DECISION fill:#ffcc80,stroke:#ed8c00,stroke-width:2px,color:#333
style INFRA fill:#e8763d,stroke:#cc5d00,stroke-width:3px,color:#fff
style STORE fill:#ed8c00,stroke:#cc5d00,stroke-width:2px,color:#fff
```
### The Hive Advantage
### The Aden Advantage
| Traditional Frameworks | Hive |
| Traditional Frameworks | Aden |
| -------------------------- | -------------------------------------- |
| Hardcode agent workflows | Describe goals in natural language |
| Manual graph definition | Auto-generated agent graphs |
| Reactive error handling | Outcome-evaluation and adaptiveness |
| Reactive error handling | Proactive self-evolution |
| Static tool configurations | Dynamic SDK-wrapped nodes |
| Separate monitoring setup | Built-in real-time observability |
| DIY budget management | Integrated cost controls & degradation |
### How It Works
1. **[Define Your Goal](docs/key_concepts/goals_outcome.md)** → Describe what you want to achieve in plain English
2. **Coding Agent Generates** → Creates the [agent graph](docs/key_concepts/graph.md), connection code, and test cases
3. **[Workers Execute](docs/key_concepts/worker_agent.md)** → SDK-wrapped nodes run with full observability and tool access
1. **Define Your Goal** → Describe what you want to achieve in plain English
2. **Coding Agent Generates** → Creates the agent graph, connection code, and test cases
3. **Workers Execute** → SDK-wrapped nodes run with full observability and tool access
4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically
5. **Self-Improve** → On failure, the system evolves the graph and redeploys automatically
## Run Agents
## How Aden Compares
The `hive` CLI is the primary interface for running agents.
Aden takes a fundamentally different approach to agent development. While most frameworks require you to hardcode workflows or manually define agent graphs, Aden uses a **coding agent to generate your entire agent system** from natural language goals. When agents fail, the framework doesn't just log errors—it **automatically evolves the agent graph** and redeploys.
```bash
# Browse and run agents interactively (Recommended)
hive tui
### Comparison Table
# Run a specific agent directly
hive run exports/my_agent --input '{"task": "Your input here"}'
| Framework | Category | Approach | Aden Difference |
| ----------------------------------- | ------------------------- | --------------------------------------------------------------- | --------------------------------------------------------- |
| **LangChain, LlamaIndex, Haystack** | Component Libraries | Predefined components for RAG/LLM apps; manual connection logic | Generates entire graph and connection code upfront |
| **CrewAI, AutoGen, Swarm** | Multi-Agent Orchestration | Role-based agents with predefined collaboration patterns | Dynamically creates agents/connections; adapts on failure |
| **PydanticAI, Mastra, Agno** | Type-Safe Frameworks | Structured outputs and validation for known workflows | Evolving workflows; structure emerges through iteration |
| **Agent Zero, Letta** | Personal AI Assistants | Memory and learning; OS-as-tool or stateful memory focus | Production multi-agent systems with self-healing |
| **CAMEL** | Research Framework | Emergent behavior in large-scale simulations (up to 1M agents) | Production-oriented with reliable execution and recovery |
| **TEN Framework, Genkit** | Infrastructure Frameworks | Real-time multimodal (TEN) or full-stack AI (Genkit) | Higher abstraction—generates and evolves agent logic |
| **GPT Engineer, Motia** | Code Generation | Code from specs (GPT Engineer) or "Step" primitive (Motia) | Self-adapting graphs with automatic failure recovery |
| **Trading Agents** | Domain-Specific | Hardcoded trading firm roles on LangGraph | Domain-agnostic; generates structures for any use case |
# Run a specific agent with the TUI dashboard
hive run exports/my_agent --tui
### When to Choose Aden
# Interactive REPL
hive shell
Choose Aden when you need:
- Agents that **self-improve from failures** without manual intervention
- **Goal-driven development** where you describe outcomes, not workflows
- **Production reliability** with automatic recovery and redeployment
- **Rapid iteration** on agent architectures without rewriting code
- **Full observability** with real-time monitoring and human oversight
Choose other frameworks when you need:
- **Type-safe, predictable workflows** (PydanticAI, Mastra)
- **RAG and document processing** (LlamaIndex, Haystack)
- **Research on agent emergence** (CAMEL)
- **Real-time voice/multimodal** (TEN Framework)
- **Simple component chaining** (LangChain, Swarm)
## Project Structure
```
hive/
├── core/ # Core framework - Agent runtime, graph executor, protocols
├── tools/ # MCP Tools Package - 19 tools for agent capabilities
├── exports/ # Agent packages - Pre-built agents and examples
├── docs/ # Documentation and guides
├── scripts/ # Build and utility scripts
├── .claude/ # Claude Code skills for building agents
├── ENVIRONMENT_SETUP.md # Python setup guide for agent development
├── DEVELOPER.md # Developer guide
├── CONTRIBUTING.md # Contribution guidelines
└── ROADMAP.md # Product roadmap
```
The TUI scans both `exports/` and `examples/templates/` for available agents.
## Development
> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
### Python Agent Development
See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.
For building and running goal-driven agents with the framework:
```bash
# One-time setup
./scripts/setup-python.sh
# This installs:
# - framework package (core runtime)
# - aden_tools package (19 MCP tools)
# - All dependencies
# Build new agents using Claude Code skills
claude> /building-agents
# Test agents
claude> /testing-agent
# Run agents
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions.
## Documentation
- **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
- **[Developer Guide](DEVELOPER.md)** - Comprehensive guide for developers
- [Getting Started](docs/getting-started.md) - Quick setup instructions
- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
- [Configuration Guide](docs/configuration.md) - All configuration options
- [Architecture Overview](docs/architecture/README.md) - System design and structure
- [Architecture Overview](docs/architecture.md) - System design and structure
## Roadmap
Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [roadmap.md](docs/roadmap.md) for details.
Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here
[ROADMAP.md](ROADMAP.md)
```mermaid
flowchart TB
%% Main Entity
User([User])
%% =========================================
%% EXTERNAL EVENT SOURCES
%% =========================================
subgraph ExtEventSource [External Event Source]
E_Sch["Schedulers"]
E_WH["Webhook"]
E_SSE["SSE"]
end
%% =========================================
%% SYSTEM NODES
%% =========================================
subgraph WorkerBees [Worker Bees]
WB_C["Conversation"]
WB_SP["System prompt"]
subgraph Graph [Graph]
direction TB
N1["Node"] --> N2["Node"] --> N3["Node"]
N1 -.-> AN["Active Node"]
N2 -.-> AN
N3 -.-> AN
%% Nested Event Loop Node
subgraph EventLoopNode [Event Loop Node]
ELN_L["listener"]
ELN_SP["System Prompt<br/>(Task)"]
ELN_EL["Event loop"]
ELN_C["Conversation"]
end
end
end
subgraph JudgeNode [Judge]
J_C["Criteria"]
J_P["Principles"]
J_EL["Event loop"] <--> J_S["Scheduler"]
end
subgraph QueenBee [Queen Bee]
QB_SP["System prompt"]
QB_EL["Event loop"]
QB_C["Conversation"]
end
subgraph Infra [Infra]
SA["Sub Agent"]
TR["Tool Registry"]
WTM["Write through Conversation Memory<br/>(Logs/RAM/Harddrive)"]
SM["Shared Memory<br/>(State/Harddrive)"]
EB["Event Bus<br/>(RAM)"]
CS["Credential Store<br/>(Harddrive/Cloud)"]
end
subgraph PC [PC]
B["Browser"]
CB["Codebase<br/>v 0.0.x ... v n.n.n"]
end
%% =========================================
%% CONNECTIONS & DATA FLOW
%% =========================================
%% External Event Routing
E_Sch --> ELN_L
E_WH --> ELN_L
E_SSE --> ELN_L
ELN_L -->|"triggers"| ELN_EL
%% User Interactions
User -->|"Talk"| WB_C
User -->|"Talk"| QB_C
User -->|"Read/Write Access"| CS
%% Inter-System Logic
ELN_C <-->|"Mirror"| WB_C
WB_C -->|"Focus"| AN
WorkerBees -->|"Inquire"| JudgeNode
JudgeNode -->|"Approve"| WorkerBees
%% Judge Alignments
J_C <-.->|"aligns"| WB_SP
J_P <-.->|"aligns"| QB_SP
%% Escalate path
J_EL -->|"Report (Escalate)"| QB_EL
%% Pub/Sub Logic
AN -->|"publish"| EB
EB -->|"subscribe"| QB_C
%% Infra and Process Spawning
ELN_EL -->|"Spawn"| SA
SA -->|"Inform"| ELN_EL
SA -->|"Starts"| B
B -->|"Report"| ELN_EL
TR -->|"Assigned"| ELN_EL
CB -->|"Modify Worker Bee"| WB_C
%% =========================================
%% SHARED MEMORY & LOGS ACCESS
%% =========================================
%% Worker Bees Access (link to node inside Graph subgraph)
AN <-->|"Read/Write"| WTM
AN <-->|"Read/Write"| SM
%% Queen Bee Access
QB_C <-->|"Read/Write"| WTM
QB_EL <-->|"Read/Write"| SM
%% Credentials Access
CS -->|"Read Access"| QB_C
timeline
title Aden Agent Framework Roadmap
section Foundation
Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
section Expansion
Intelligence : Guardrails : Streaming Mode : Semantic Search
Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
```
## Contributing
We welcome contributions from the community! Were especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If youre interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.
1. Find or create an issue and get assigned
2. Fork the repository
3. Create your feature branch (`git checkout -b feature/amazing-feature`)
4. Commit your changes (`git commit -m 'Add amazing feature'`)
5. Push to the branch (`git push origin feature/amazing-feature`)
6. Open a Pull Request
## Community & Support
We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature requests, and community discussions.
@@ -418,6 +296,16 @@ We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature req
- Twitter/X - [@adenhq](https://x.com/aden_hq)
- LinkedIn - [Company Page](https://www.linkedin.com/company/teamaden/)
## Contributing
We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
1. Fork the repository
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
3. Commit your changes (`git commit -m 'Add amazing feature'`)
4. Push to the branch (`git push origin feature/amazing-feature`)
5. Open a Pull Request
## Join Our Team
**We're hiring!** Join us in engineering, research, and go-to-market roles.
@@ -434,57 +322,69 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS
## Frequently Asked Questions (FAQ)
**Q: What LLM providers does Hive support?**
**Q: Does Aden depend on LangChain or other agent frameworks?**
Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
No. Aden is built from the ground up with no dependencies on LangChain, CrewAI, or other agent frameworks. The framework is designed to be lean and flexible, generating agent graphs dynamically rather than relying on predefined components.
**Q: Can I use Hive with local AI models like Ollama?**
**Q: What LLM providers does Aden support?**
Yes! Hive supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.
Aden supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
**Q: What makes Hive different from other agent frameworks?**
**Q: Can I use Aden with local AI models like Ollama?**
Hive generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, [evolves the agent graph](docs/key_concepts/evolution.md), and redeploys. This self-improving loop is unique to Aden.
Yes! Aden supports local models through LiteLLM. Simply use the model name format `ollama/model-name` (e.g., `ollama/llama3`, `ollama/mistral`) and ensure Ollama is running locally.
**Q: Is Hive open-source?**
**Q: What makes Aden different from other agent frameworks?**
Yes, Hive is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.
Aden generates your entire agent system from natural language goals using a coding agent—you don't hardcode workflows or manually define graphs. When agents fail, the framework automatically captures failure data, evolves the agent graph, and redeploys. This self-improving loop is unique to Aden.
**Q: Can Hive handle complex, production-scale use cases?**
**Q: Is Aden open-source?**
Yes. Hive is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
Yes, Aden is fully open-source under the Apache License 2.0. We actively encourage community contributions and collaboration.
**Q: Does Hive support human-in-the-loop workflows?**
**Q: Does Aden collect data from users?**
Yes, Hive fully supports [human-in-the-loop](docs/key_concepts/graph.md#human-in-the-loop) workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
Aden collects telemetry data for monitoring and observability purposes, including token usage, latency metrics, and cost tracking. Content capture (prompts and responses) is configurable and stored with team-scoped data isolation. All data stays within your infrastructure when self-hosted.
**Q: What programming languages does Hive support?**
**Q: What deployment options does Aden support?**
The Hive framework is built in Python. A JavaScript/TypeScript SDK is on the roadmap.
Aden supports Docker Compose deployment out of the box, with both production and development configurations. Self-hosted deployments work on any infrastructure supporting Docker. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
**Q: Can Hive agents interact with external tools and APIs?**
**Q: Can Aden handle complex, production-scale use cases?**
Yes. Aden is explicitly designed for production environments with features like automatic failure recovery, real-time observability, cost controls, and horizontal scaling support. The framework handles both simple automations and complex multi-agent workflows.
**Q: Does Aden support human-in-the-loop workflows?**
Yes, Aden fully supports human-in-the-loop workflows through intervention nodes that pause execution for human input. These include configurable timeouts and escalation policies, allowing seamless collaboration between human experts and AI agents.
**Q: What monitoring and debugging tools does Aden provide?**
Aden includes comprehensive observability features: real-time WebSocket streaming for live agent execution monitoring, TimescaleDB-powered analytics for cost and performance metrics, health check endpoints for Kubernetes integration, and 19 MCP tools for budget management, agent status, and policy control.
**Q: What programming languages does Aden support?**
Aden provides SDKs for both Python and JavaScript/TypeScript. The Python SDK includes integration templates for LangGraph, LangFlow, and LiveKit. The backend is Node.js/TypeScript, and the frontend is React/TypeScript.
**Q: Can Aden agents interact with external tools and APIs?**
Yes. Aden's SDK-wrapped nodes provide built-in tool access, and the framework supports flexible tool ecosystems. Agents can integrate with external APIs, databases, and services through the node architecture.
**Q: How does cost control work in Hive?**
**Q: How does cost control work in Aden?**
Hive provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.
Aden provides granular budget controls including spending limits, throttles, and automatic model degradation policies. You can set budgets at the team, agent, or workflow level, with real-time cost tracking and alerts.
**Q: Where can I find examples and documentation?**
Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [developer guide](docs/developer-guide.md).
Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API reference, and getting started tutorials. The repository also includes documentation in the `docs/` folder and a comprehensive [DEVELOPER.md](DEVELOPER.md) guide.
**Q: How can I contribute to Aden?**
Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
**Q: When will my team start seeing results from Aden's adaptive agents?**
**Q: Does Aden offer enterprise support?**
Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
**Q: How does Hive compare to other agent frameworks?**
Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
For enterprise inquiries, contact the Aden team through [adenhq.com](https://adenhq.com) or join our [Discord community](https://discord.com/invite/MXE49hrKDk) for support and discussions.
---
+39 -40
View File
@@ -3,13 +3,13 @@
</p>
<p align="center">
<a href="../../README.md">English</a> |
<a href="zh-CN.md">简体中文</a> |
<a href="es.md">Español</a> |
<a href="pt.md">Português</a> |
<a href="ja.md">日本語</a> |
<a href="ru.md">Русский</a> |
<a href="ko.md">한국어</a>
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -30,7 +30,7 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-Tools-00ADD8?style=flat-square" alt="MCP" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## Visão Geral
@@ -42,7 +42,7 @@ Visite [adenhq.com](https://adenhq.com) para documentação completa, exemplos e
## O que é Aden
<p align="center">
<img width="100%" alt="Aden Architecture" src="../assets/aden-architecture-diagram.jpg" />
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden é uma plataforma para construir, implantar, operar e adaptar agentes de IA:
@@ -76,11 +76,10 @@ git clone https://github.com/adenhq/hive.git
cd hive
# Executar configuração do ambiente Python
./quickstart.sh
./scripts/setup-python.sh
```
Isto instala:
- **framework** - Runtime do agente principal e executor de grafos
- **aden_tools** - 19 ferramentas MCP para capacidades de agentes
- Todas as dependências necessárias
@@ -92,16 +91,16 @@ Isto instala:
./quickstart.sh
# Construir um agente usando Claude Code
claude> /hive
claude> /building-agents
# Testar seu agente
claude> /hive-test
claude> /testing-agent
# Executar seu agente
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 Guia Completo de Configuração](../environment-setup.md)** - Instruções detalhadas para desenvolvimento de agentes
**[📖 Guia Completo de Configuração](ENVIRONMENT_SETUP.md)** - Instruções detalhadas para desenvolvimento de agentes
## Funcionalidades
@@ -121,7 +120,7 @@ Frameworks de agentes tradicionais exigem que você projete manualmente fluxos d
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>Event Loop"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
@@ -165,14 +164,14 @@ flowchart LR
### A Vantagem Aden
| Frameworks Tradicionais | Aden |
| --------------------------------------- | ------------------------------------------ |
| Codificar fluxos de trabalho de agentes | Descrever objetivos em linguagem natural |
| Definição manual de grafos | Grafos de agentes auto-gerados |
| Tratamento reativo de erros | Auto-evolução proativa |
| Configurações de ferramentas estáticas | Nós dinâmicos envolvidos em SDK |
| Configuração de monitoramento separada | Observabilidade em tempo real integrada |
| Gerenciamento de orçamento DIY | Controles de custo e degradação integrados |
| Frameworks Tradicionais | Aden |
|-------------------------|------|
| Codificar fluxos de trabalho de agentes | Descrever objetivos em linguagem natural |
| Definição manual de grafos | Grafos de agentes auto-gerados |
| Tratamento reativo de erros | Auto-evolução proativa |
| Configurações de ferramentas estáticas | Nós dinâmicos envolvidos em SDK |
| Configuração de monitoramento separada | Observabilidade em tempo real integrada |
| Gerenciamento de orçamento DIY | Controles de custo e degradação integrados |
### Como Funciona
@@ -216,7 +215,10 @@ hive/
├── docs/ # Documentação e guias
├── scripts/ # Scripts de build e utilitários
├── .claude/ # Habilidades Claude Code para construir agentes
├── ENVIRONMENT_SETUP.md # Guia de configuração Python para desenvolvimento de agentes
├── DEVELOPER.md # Guia do desenvolvedor
├── CONTRIBUTING.md # Diretrizes de contribuição
└── ROADMAP.md # Roadmap do produto
```
## Desenvolvimento
@@ -227,7 +229,7 @@ Para construir e executar agentes orientados a objetivos com o framework:
```bash
# Configuração única
./quickstart.sh
./scripts/setup-python.sh
# Isto instala:
# - pacote framework (runtime principal)
@@ -235,29 +237,29 @@ Para construir e executar agentes orientados a objetivos com o framework:
# - Todas as dependências
# Construir novos agentes usando habilidades Claude Code
claude> /hive
claude> /building-agents
# Testar agentes
claude> /hive-test
claude> /testing-agent
# Executar agentes
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
Consulte [environment-setup.md](../environment-setup.md) para instruções completas de configuração.
Consulte [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) para instruções completas de configuração.
## Documentação
- **[Guia do Desenvolvedor](../developer-guide.md)** - Guia abrangente para desenvolvedores
- **[Guia do Desenvolvedor](DEVELOPER.md)** - Guia abrangente para desenvolvedores
- [Começando](docs/getting-started.md) - Instruções de configuração rápida
- [Guia de Configuração](docs/configuration.md) - Todas as opções de configuração
- [Visão Geral da Arquitetura](docs/architecture/README.md) - Design e estrutura do sistema
- [Visão Geral da Arquitetura](docs/architecture.md) - Design e estrutura do sistema
## Roadmap
O Aden Agent Framework visa ajudar desenvolvedores a construir agentes auto-adaptativos orientados a resultados. Encontre nosso roadmap aqui
[roadmap.md](../roadmap.md)
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
@@ -287,14 +289,11 @@ Usamos [Discord](https://discord.com/invite/MXE49hrKDk) para suporte, solicitaç
Aceitamos contribuições! Por favor, consulte [CONTRIBUTING.md](CONTRIBUTING.md) para diretrizes.
**Importante:** Por favor, seja atribuído a uma issue antes de enviar um PR. Comente na issue para reivindicá-la e um mantenedor irá atribuí-la a você em 24 horas. Isso ajuda a evitar trabalho duplicado.
1. Encontre ou crie uma issue e seja atribuído
2. Faça fork do repositório
3. Crie sua branch de funcionalidade (`git checkout -b feature/amazing-feature`)
4. Faça commit das suas alterações (`git commit -m 'Add amazing feature'`)
5. Faça push para a branch (`git push origin feature/amazing-feature`)
6. Abra um Pull Request
1. Faça fork do repositório
2. Crie sua branch de funcionalidade (`git checkout -b feature/amazing-feature`)
3. Faça commit das suas alterações (`git commit -m 'Add amazing feature'`)
4. Faça push para a branch (`git push origin feature/amazing-feature`)
5. Abra um Pull Request
## Junte-se ao Nosso Time
+39 -40
View File
@@ -3,13 +3,13 @@
</p>
<p align="center">
<a href="../../README.md">English</a> |
<a href="zh-CN.md">简体中文</a> |
<a href="es.md">Español</a> |
<a href="pt.md">Português</a> |
<a href="ja.md">日本語</a> |
<a href="ru.md">Русский</a> |
<a href="ko.md">한국어</a>
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -30,7 +30,7 @@
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-Tools-00ADD8?style=flat-square" alt="MCP" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## Обзор
@@ -42,7 +42,7 @@
## Что такое Aden
<p align="center">
<img width="100%" alt="Aden Architecture" src="../assets/aden-architecture-diagram.jpg" />
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden — это платформа для создания, развёртывания, эксплуатации и адаптации ИИ-агентов:
@@ -76,11 +76,10 @@ git clone https://github.com/adenhq/hive.git
cd hive
# Запустить настройку окружения Python
./quickstart.sh
./scripts/setup-python.sh
```
Это установит:
- **framework** - Основная среда выполнения агентов и исполнитель графов
- **aden_tools** - 19 инструментов MCP для возможностей агентов
- Все необходимые зависимости
@@ -92,16 +91,16 @@ cd hive
./quickstart.sh
# Создать агента с помощью Claude Code
claude> /hive
claude> /building-agents
# Протестировать агента
claude> /hive-test
claude> /testing-agent
# Запустить агента
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 Полное руководство по настройке](../environment-setup.md)** - Подробные инструкции для разработки агентов
**[📖 Полное руководство по настройке](ENVIRONMENT_SETUP.md)** - Подробные инструкции для разработки агентов
## Функции
@@ -121,7 +120,7 @@ PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>Event Loop"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
@@ -165,14 +164,14 @@ flowchart LR
### Преимущество Aden
| Традиционные фреймворки | Aden |
| ------------------------------------- | -------------------------------------------- |
| Жёсткое кодирование рабочих процессов | Описание целей на естественном языке |
| Ручное определение графов | Автоматически генерируемые графы агентов |
| Реактивная обработка ошибок | Проактивная самоэволюция |
| Статические конфигурации инструментов | Динамические узлы, обёрнутые SDK |
| Отдельная настройка мониторинга | Встроенная наблюдаемость в реальном времени |
| DIY управление бюджетом | Интегрированный контроль затрат и деградация |
| Традиционные фреймворки | Aden |
|-------------------------|------|
| Жёсткое кодирование рабочих процессов | Описание целей на естественном языке |
| Ручное определение графов | Автоматически генерируемые графы агентов |
| Реактивная обработка ошибок | Проактивная самоэволюция |
| Статические конфигурации инструментов | Динамические узлы, обёрнутые SDK |
| Отдельная настройка мониторинга | Встроенная наблюдаемость в реальном времени |
| DIY управление бюджетом | Интегрированный контроль затрат и деградация |
### Как это работает
@@ -216,7 +215,10 @@ hive/
├── docs/ # Документация и руководства
├── scripts/ # Скрипты сборки и утилиты
├── .claude/ # Навыки Claude Code для создания агентов
├── ENVIRONMENT_SETUP.md # Руководство по настройке Python для разработки агентов
├── DEVELOPER.md # Руководство разработчика
├── CONTRIBUTING.md # Руководство по участию
└── ROADMAP.md # Дорожная карта продукта
```
## Разработка
@@ -227,7 +229,7 @@ hive/
```bash
# Одноразовая настройка
./quickstart.sh
./scripts/setup-python.sh
# Это установит:
# - пакет framework (основная среда выполнения)
@@ -235,29 +237,29 @@ hive/
# - Все зависимости
# Создать новых агентов с помощью навыков Claude Code
claude> /hive
claude> /building-agents
# Протестировать агентов
claude> /hive-test
claude> /testing-agent
# Запустить агентов
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
Обратитесь к [environment-setup.md](../environment-setup.md) для полных инструкций по настройке.
Обратитесь к [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) для полных инструкций по настройке.
## Документация
- **[Руководство разработчика](../developer-guide.md)** - Полное руководство для разработчиков
- **[Руководство разработчика](DEVELOPER.md)** - Полное руководство для разработчиков
- [Начало работы](docs/getting-started.md) - Инструкции по быстрой настройке
- [Руководство по конфигурации](docs/configuration.md) - Все опции конфигурации
- [Обзор архитектуры](docs/architecture/README.md) - Дизайн и структура системы
- [Обзор архитектуры](docs/architecture.md) - Дизайн и структура системы
## Дорожная карта
Aden Agent Framework призван помочь разработчикам создавать самоадаптирующихся агентов, ориентированных на результат. Найдите нашу дорожную карту здесь
[roadmap.md](../roadmap.md)
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
@@ -287,14 +289,11 @@ timeline
Мы приветствуем вклад! Пожалуйста, ознакомьтесь с [CONTRIBUTING.md](CONTRIBUTING.md) для руководств.
**Важно:** Пожалуйста, получите назначение на issue перед отправкой PR. Оставьте комментарий в issue, чтобы заявить о своём желании работать над ним, и мейнтейнер назначит вас в течение 24 часов. Это помогает избежать дублирования работы.
1. Найдите или создайте issue и получите назначение
2. Сделайте форк репозитория
3. Создайте ветку функции (`git checkout -b feature/amazing-feature`)
4. Зафиксируйте изменения (`git commit -m 'Add amazing feature'`)
5. Отправьте в ветку (`git push origin feature/amazing-feature`)
6. Откройте Pull Request
1. Сделайте форк репозитория
2. Создайте ветку функции (`git checkout -b feature/amazing-feature`)
3. Зафиксируйте изменения (`git commit -m 'Add amazing feature'`)
4. Отправьте в ветку (`git push origin feature/amazing-feature`)
5. Откройте Pull Request
## Присоединяйтесь к команде
+340
View File
@@ -0,0 +1,340 @@
<p align="center">
<img width="100%" alt="Hive Banner" src="https://storage.googleapis.com/aden-prod-assets/website/aden-title-card.png" />
</p>
<p align="center">
<a href="README.md">English</a> |
<a href="README.zh-CN.md">简体中文</a> |
<a href="README.es.md">Español</a> |
<a href="README.pt.md">Português</a> |
<a href="README.ja.md">日本語</a> |
<a href="README.ru.md">Русский</a> |
<a href="README.ko.md">한국어</a>
</p>
[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/adenhq/hive/blob/main/LICENSE)
[![Y Combinator](https://img.shields.io/badge/Y%20Combinator-Aden-orange)](https://www.ycombinator.com/companies/aden)
[![Docker Pulls](https://img.shields.io/docker/pulls/adenhq/hive?logo=Docker&labelColor=%23528bff)](https://hub.docker.com/u/adenhq)
[![Discord](https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb)](https://discord.com/invite/MXE49hrKDk)
[![Twitter Follow](https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5)](https://x.com/aden_hq)
[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/teamaden/)
<p align="center">
<img src="https://img.shields.io/badge/AI_Agents-Self--Improving-brightgreen?style=flat-square" alt="AI Agents" />
<img src="https://img.shields.io/badge/Multi--Agent-Systems-blue?style=flat-square" alt="Multi-Agent" />
<img src="https://img.shields.io/badge/Goal--Driven-Development-purple?style=flat-square" alt="Goal-Driven" />
<img src="https://img.shields.io/badge/Human--in--the--Loop-orange?style=flat-square" alt="HITL" />
<img src="https://img.shields.io/badge/Production--Ready-red?style=flat-square" alt="Production" />
</p>
<p align="center">
<img src="https://img.shields.io/badge/OpenAI-supported-412991?style=flat-square&logo=openai" alt="OpenAI" />
<img src="https://img.shields.io/badge/Anthropic-supported-d4a574?style=flat-square" alt="Anthropic" />
<img src="https://img.shields.io/badge/Google_Gemini-supported-4285F4?style=flat-square&logo=google" alt="Gemini" />
<img src="https://img.shields.io/badge/MCP-19_Tools-00ADD8?style=flat-square" alt="MCP" />
</p>
## 概述
构建可靠的、自我改进的 AI 智能体,无需硬编码工作流。通过与编码智能体对话来定义目标,框架会生成带有动态创建连接代码的节点图。当出现问题时,框架会捕获故障数据,通过编码智能体进化智能体,并重新部署。内置的人机协作节点、凭证管理和实时监控让您在保持适应性的同时拥有完全控制权。
访问 [adenhq.com](https://adenhq.com) 获取完整文档、示例和指南。
## 什么是 Aden
<p align="center">
<img width="100%" alt="Aden Architecture" src="docs/assets/aden-architecture-diagram.jpg" />
</p>
Aden 是一个用于构建、部署、运营和适应 AI 智能体的平台:
- **构建** - 编码智能体根据自然语言目标生成专业的工作智能体(销售、营销、运营)
- **部署** - 无头部署,支持 CI/CD 集成和完整的 API 生命周期管理
- **运营** - 实时监控、可观测性和运行时护栏确保智能体可靠运行
- **适应** - 持续评估、监督和适应确保智能体随时间改进
- **基础设施** - 共享内存、LLM 集成、工具和技能为每个智能体提供支持
## 快速链接
- **[文档](https://docs.adenhq.com/)** - 完整指南和 API 参考
- **[自托管指南](https://docs.adenhq.com/getting-started/quickstart)** - 在您的基础设施上部署 Hive
- **[更新日志](https://github.com/adenhq/hive/releases)** - 最新更新和版本
<!-- - **[路线图](https://adenhq.com/roadmap)** - 即将推出的功能和计划 -->
- **[报告问题](https://github.com/adenhq/hive/issues)** - Bug 报告和功能请求
## 快速开始
### 前置要求
- [Python 3.11+](https://www.python.org/downloads/) - 用于智能体开发
- [Docker](https://docs.docker.com/get-docker/) (v20.10+) - 可选,用于容器化工具
### 安装
```bash
# 克隆仓库
git clone https://github.com/adenhq/hive.git
cd hive
# 运行 Python 环境设置
./scripts/setup-python.sh
```
这将安装:
- **framework** - 核心智能体运行时和图执行器
- **aden_tools** - 19 个 MCP 工具提供智能体能力
- 所有必需的依赖项
### 构建您的第一个智能体
```bash
# 安装 Claude Code 技能(一次性)
./quickstart.sh
# 使用 Claude Code 构建智能体
claude> /building-agents
# 测试您的智能体
claude> /testing-agent
# 运行您的智能体
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
```
**[📖 完整设置指南](ENVIRONMENT_SETUP.md)** - 智能体开发的详细说明
## 功能特性
- **目标驱动开发** - 用自然语言定义目标;编码智能体生成智能体图和连接代码来实现它们
- **自适应智能体** - 框架捕获故障,更新目标并更新智能体图
- **动态节点连接** - 没有预定义边;连接代码由任何有能力的 LLM 根据您的目标生成
- **SDK 封装节点** - 每个节点开箱即用地获得共享内存、本地 RLM 内存、监控、工具和 LLM 访问
- **人机协作** - 干预节点暂停执行以等待人工输入,支持可配置的超时和升级
- **实时可观测性** - WebSocket 流式传输用于实时监控智能体执行、决策和节点间通信
- **成本与预算控制** - 设置支出限制、节流和自动模型降级策略
- **生产就绪** - 可自托管,为规模和可靠性而构建
## 为什么选择 Aden
传统智能体框架要求您手动设计工作流、定义智能体交互并被动处理故障。Aden 颠覆了这一范式——**您描述结果,系统自动构建自己**。
```mermaid
flowchart LR
subgraph BUILD["🏗️ BUILD"]
GOAL["Define Goal<br/>+ Success Criteria"] --> NODES["Add Nodes<br/>LLM/Router/Function"]
NODES --> EDGES["Connect Edges<br/>on_success/failure/conditional"]
EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
end
subgraph EXPORT["📦 EXPORT"]
direction TB
JSON["agent.json<br/>(GraphSpec)"]
TOOLS["tools.py<br/>(Functions)"]
MCP["mcp_servers.json<br/>(Integrations)"]
end
subgraph RUN["🚀 RUNTIME"]
LOAD["AgentRunner<br/>Load + Parse"] --> SETUP["Setup Runtime<br/>+ ToolRegistry"]
SETUP --> EXEC["GraphExecutor<br/>Execute Nodes"]
subgraph DECISION["Decision Recording"]
DEC1["runtime.decide()<br/>intent → options → choice"]
DEC2["runtime.record_outcome()<br/>success, result, metrics"]
end
end
subgraph INFRA["⚙️ INFRASTRUCTURE"]
CTX["NodeContext<br/>memory • llm • tools"]
STORE[("FileStorage<br/>Runs & Decisions")]
end
APPROVE --> EXPORT
EXPORT --> LOAD
EXEC --> DECISION
EXEC --> CTX
DECISION --> STORE
STORE -.->|"Analyze & Improve"| NODES
style BUILD fill:#ffbe42,stroke:#cc5d00,stroke-width:3px,color:#333
style EXPORT fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
style RUN fill:#ffb100,stroke:#cc5d00,stroke-width:3px,color:#333
style DECISION fill:#ffcc80,stroke:#ed8c00,stroke-width:2px,color:#333
style INFRA fill:#e8763d,stroke:#cc5d00,stroke-width:3px,color:#fff
style STORE fill:#ed8c00,stroke:#cc5d00,stroke-width:2px,color:#fff
```
### Aden 的优势
| 传统框架 | Aden |
|----------|------|
| 硬编码智能体工作流 | 用自然语言描述目标 |
| 手动图定义 | 自动生成智能体图 |
| 被动错误处理 | 主动自我进化 |
| 静态工具配置 | 动态 SDK 封装节点 |
| 单独设置监控 | 内置实时可观测性 |
| DIY 预算管理 | 集成成本控制和降级 |
### 工作原理
1. **定义目标** → 用简单英语描述您想要实现的目标
2. **编码智能体生成** → 创建智能体图、连接代码和测试用例
3. **工作节点执行** → SDK 封装节点以完全可观测性和工具访问运行
4. **控制平面监控** → 实时指标、预算执行、策略管理
5. **自我改进** → 失败时,系统进化图并自动重新部署
## Aden 与其他框架的比较
Aden 在智能体开发方面采取了根本不同的方法。虽然大多数框架要求您硬编码工作流或手动定义智能体图,但 Aden 使用**编码智能体从自然语言目标生成整个智能体系统**。当智能体失败时,框架不仅记录错误——它会**自动进化智能体图**并重新部署。
> **注意:** 详细的框架比较表和常见问题解答,请参阅英文版 [README.md](README.md)。
### 何时选择 Aden
选择 Aden 当您需要:
- 智能体从失败中**自我改进**而无需人工干预
- **目标驱动的开发**,您描述结果而非工作流
- 具有自动恢复和重新部署的**生产可靠性**
- 无需重写代码即可**快速迭代**智能体架构
- 具有实时监控和人工监督的**完整可观测性**
选择其他框架当您需要:
- **类型安全、可预测的工作流**PydanticAI、Mastra
- **RAG 和文档处理**LlamaIndex、Haystack
- **智能体涌现的研究**(CAMEL)
- **实时语音/多模态**TEN Framework
- **简单的组件链接**LangChain、Swarm
## 项目结构
```
hive/
├── core/ # 核心框架 - 智能体运行时、图执行器、协议
├── tools/ # MCP 工具包 - 19 个工具提供智能体能力
├── exports/ # 智能体包 - 预构建的智能体和示例
├── docs/ # 文档和指南
├── scripts/ # 构建和实用脚本
├── .claude/ # Claude Code 技能用于构建智能体
├── ENVIRONMENT_SETUP.md # 智能体开发的 Python 设置指南
├── DEVELOPER.md # 开发者指南
├── CONTRIBUTING.md # 贡献指南
└── ROADMAP.md # 产品路线图
```
## 开发
### Python 智能体开发
使用框架构建和运行目标驱动的智能体:
```bash
# 一次性设置
./scripts/setup-python.sh
# 这将安装:
# - framework 包(核心运行时)
# - aden_tools 包(19 个 MCP 工具)
# - 所有依赖项
# 使用 Claude Code 技能构建新智能体
claude> /building-agents
# 测试智能体
claude> /testing-agent
# 运行智能体
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
```
完整设置说明请参阅 [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md)。
## 文档
- **[开发者指南](DEVELOPER.md)** - 开发者综合指南
- [入门指南](docs/getting-started.md) - 快速设置说明
- [配置指南](docs/configuration.md) - 所有配置选项
- [架构概述](docs/architecture.md) - 系统设计和结构
## 路线图
Aden 智能体框架旨在帮助开发者构建面向结果的、自适应的智能体。请在此查看我们的路线图
[ROADMAP.md](ROADMAP.md)
```mermaid
timeline
title Aden Agent Framework Roadmap
section Foundation
Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
section Expansion
Intelligence : Guardrails : Streaming Mode : Semantic Search
Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
```
## 社区与支持
我们使用 [Discord](https://discord.com/invite/MXE49hrKDk) 进行支持、功能请求和社区讨论。
- Discord - [加入我们的社区](https://discord.com/invite/MXE49hrKDk)
- Twitter/X - [@adenhq](https://x.com/aden_hq)
- LinkedIn - [公司主页](https://www.linkedin.com/company/teamaden/)
## 贡献
我们欢迎贡献!请参阅 [CONTRIBUTING.md](CONTRIBUTING.md) 了解指南。
1. Fork 仓库
2. 创建功能分支 (`git checkout -b feature/amazing-feature`)
3. 提交更改 (`git commit -m 'Add amazing feature'`)
4. 推送到分支 (`git push origin feature/amazing-feature`)
5. 创建 Pull Request
## 加入我们的团队
**我们正在招聘!** 加入我们的工程、研究和市场推广团队。
[查看开放职位](https://jobs.adenhq.com/a8cec478-cdbc-473c-bbd4-f4b7027ec193/applicant)
## 安全
有关安全问题,请参阅 [SECURITY.md](SECURITY.md)。
## 许可证
本项目采用 Apache License 2.0 许可证 - 详情请参阅 [LICENSE](LICENSE) 文件。
## 常见问题 (FAQ)
> **注意:** 完整的常见问题解答,请参阅英文版 [README.md](README.md)。
**问:Aden 是否依赖 LangChain 或其他智能体框架?**
不。Aden 从头开始构建,不依赖 LangChain、CrewAI 或其他智能体框架。该框架设计精简灵活,动态生成智能体图而非依赖预定义组件。
**问:Aden 支持哪些 LLM 提供商?**
Aden 通过 LiteLLM 集成支持 100 多个 LLM 提供商,包括 OpenAIGPT-4、GPT-4o)、AnthropicClaude 模型)、Google Gemini、Mistral、Groq 等。只需设置适当的 API 密钥环境变量并指定模型名称即可。
**问:Aden 是开源的吗?**
是的,Aden 在 Apache License 2.0 下完全开源。我们积极鼓励社区贡献和协作。
**问:Aden 与其他智能体框架有何不同?**
Aden 使用编码智能体从自然语言目标生成整个智能体系统——您无需硬编码工作流或手动定义图。当智能体失败时,框架会自动捕获故障数据、进化智能体图并重新部署。这种自我改进循环是 Aden 独有的。
**问:Aden 支持人机协作工作流吗?**
是的,Aden 通过干预节点完全支持人机协作工作流,这些节点会暂停执行以等待人工输入。包括可配置的超时和升级策略,实现人类专家与 AI 智能体的无缝协作。
---
<p align="center">
用 🔥 热情打造于旧金山
</p>
+150
View File
@@ -0,0 +1,150 @@
Product Roadmap
Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here
```mermaid
timeline
title Aden Agent Framework Roadmap
section Foundation
Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
section Expansion
Intelligence : Guardrails : Streaming Mode : Semantic Search
Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
```
---
## Phase 1: Foundation
### Backbone Architecture
- [ ] **Node-Based Architecture (Agent as a node)**
- [x] Object schema definition
- [x] Node wrapper SDK
- [ ] Shared memory access
- [ ] Default monitoring hooks
- [ ] Tool access layer
- [x] LLM integration layer (Natively supports all mainstream LLMs through LiteLLM)
- [x] Anthropic
- [x] OpenAI
- [x] Google
- [ ] **Communication protocol between nodes**
- [ ] **[Coding Agent] Goal Creation Session** (separate from coding session)
- [ ] Instruction back and forth
- [x] Goal Object schema definition
- [ ] Being able to generate the test cases
- [ ] Test case validation for worker agent (Outcome driven)
- [ ] **[Coding Agent] Worker Agent Creation**
- [x] Coding Agent tools
- [ ] Use Template Agent as a start
- [x] Use our MCP tools
- [ ] **[Worker Agent] Human-in-the-Loop**
- [x] Worker Agents request with questions and options
- [x] Callback Handler System to receive events throughout execution
- [ ] Tool-Based Intervention Points (tool to pause execution and request human input)
- [x] Multiple entrypoint for different event source (e.g. Human input, webhook)
- [ ] Streaming Interface for Real-time Monitoring
- [ ] Request State Management
### Essential Tools
- [x] **File Use Tool Kit**
- [ ] **Memory Tools**
- [x] STM Layer Tool (state-based short-term memory)
- [x] LTM Layer Tool (RLM - long-term memory)
- [ ] **Infrastructure Tools**
- [x] Runtime Log Tool (logs for coding agent)
- [ ] Audit Trail Tool (decision timeline generation)
- [ ] Web Search
- [ ] Web Scraper
- [ ] Recipe for "Add your own tools"
### Memory & File System
- [x] DB for long-term persistent memory (Filesystem as durable scratchpad pattern)
- [x] Session Local memory isolation
### Eval System (Basic)
- [x] Test Driven - Run test case for all agent iteration
- [ ] Failure recording mechanism
- [ ] SDK for defining failure conditions
- [ ] Basic observability hooks
- [ ] User-driven log analysis (OSS approach)
### Data Validation
- [ ] Natively Support data validation of LLMs output with Pydantic
### Developer Experience
- [ ] **Debugging mode**
- [ ] **Documentation**
- [ ] Quick start guide
- [ ] Goal creation guide
- [ ] Agent creation guide
- [ ] GitHub Page setup
- [ ] README with examples
- [ ] Contributing guidelines
- [ ] **Distribution**
- [ ] PyPI package
- [ ] Docker image on Docker Hub
### Sample Agents
- [ ] Knowledge Agent
- [ ] Blog Writer Agent
- [ ] SDR Agent
---
## Phase 2: Expansion
### Basic Guardrails
- [ ] Support Basic Monitoring from Agent node SDK
- [ ] SDK guardrail implementation (in node)
- [ ] Guardrail type support (Determined Condition as Guardrails)
### Agent Capability
- [ ] Streaming mode support
### Cross-Platform
- [ ] JavaScript / TypeScript Version SDK
### File System Enhancement
- [ ] Semantic Search integration
- [ ] Interactive File System in product (frontend integration)
### More Worker Tools
- [ ] Custom Tool Integrator
- [ ] Integration as a tool (Credential Store & Support)
- [ ] **Core Agent Tools**
- [ ] Node Discovery Tool (find other agents in the graph)
- [ ] HITL Tool (pause execution for human approval)
- [ ] Wake-up Tool (resume agent tasks)
### Deployment (Self-Hosted)
- [ ] Docker container standardization
- [ ] Headless backend execution
- [ ] Exposed API for frontend attachment
- [ ] Local monitoring & observability
- [ ] Basic lifecycle APIs (Start, Stop, Pause, Resume)
### Deployment (Cloud)
- [ ] Cloud Service Options
- [ ] Support deployment to 3rd-party platforms
- [ ] Self-deploy + orchestrator connection
- [ ] **CI/CD Pipeline**
- [ ] Automated test execution
- [ ] Agent version control
- [ ] All tests must pass for deployment
### Developer Experience Enhancement
- [ ] Tool usage documentation
- [ ] Discord Support Channel
### More Agent Templates
- [ ] GTM Sales Agent (workflow)
- [ ] GTM Marketing Agent (workflow)
- [ ] Analytics Agent
- [ ] Training Agent
- [ ] Smart Entry / Form Agent (self-evolution emphasis)

Some files were not shown because too many files have changed in this diff Show More