Compare commits
142 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4046e4e379 | |||
| 34782a6b85 | |||
| d25d94e71b | |||
| 51f1b449cd | |||
| 804e47dde4 | |||
| 582c810d15 | |||
| cede629718 | |||
| 10941dc7fc | |||
| c1c16878e4 | |||
| 80a41b434b | |||
| 9a8e117f1d | |||
| 878603033a | |||
| 1c6f17e8db | |||
| 8f32ef8064 | |||
| e12bc96e21 | |||
| 2355d3d729 | |||
| a093a59cb0 | |||
| d7917988c3 | |||
| ae566a2027 | |||
| b15473d3f3 | |||
| 265bf885ec | |||
| e318281989 | |||
| 3e2a11d60d | |||
| 4b9f73310e | |||
| b17c26116d | |||
| 3114af75e4 | |||
| 7a6d10639b | |||
| 6ff29ea6aa | |||
| a23f01973a | |||
| 0aaa3a3eca | |||
| 82f05d1102 | |||
| 8ff6d9c8bd | |||
| a2e102fe15 | |||
| 119280da1a | |||
| 4d49f74d5a | |||
| 6a42b9c66b | |||
| fc4a39480a | |||
| b98afb01c8 | |||
| ccd6bb7656 | |||
| ea30e5c631 | |||
| d16a3c3b22 | |||
| a03bd78c2e | |||
| 3cca41aab1 | |||
| d19aaed946 | |||
| 9a7db8cf94 | |||
| f50630c551 | |||
| 0ef2e64733 | |||
| 3a8e121d43 | |||
| 23e249144d | |||
| 25014bfa89 | |||
| 78ea585779 | |||
| ac13c11f89 | |||
| 51d341b88c | |||
| 7dd70b8e31 | |||
| 84b332d989 | |||
| fd1826a267 | |||
| bcc6848275 | |||
| 75dd053a40 | |||
| 20f2aa09f2 | |||
| fb8c810b3d | |||
| b99b6c5cd3 | |||
| ad21cf4243 | |||
| 1e45cfff67 | |||
| 0280600a47 | |||
| 571ad518dc | |||
| fe37a25cf1 | |||
| e06138628c | |||
| 1ed0edd158 | |||
| 49dbc46082 | |||
| a16a4adc09 | |||
| b4ab1cbd56 | |||
| 6faa63f0d0 | |||
| f4737dcfe7 | |||
| 2b44af427f | |||
| 11f7401bc2 | |||
| db7b5180dd | |||
| 5b4e56252c | |||
| c69bc24598 | |||
| 0cf17e1c63 | |||
| feac803491 | |||
| 4aacec30d8 | |||
| b459a2f7a9 | |||
| ca7f6d3514 | |||
| ca8ede65f0 | |||
| b033c56ae5 | |||
| 9a177c46e1 | |||
| d49e858d32 | |||
| 20bea9cd7f | |||
| d7afa5dcf2 | |||
| 22e816bf86 | |||
| a7709d489c | |||
| 3240616808 | |||
| 18dfc997b8 | |||
| 92d0b6addf | |||
| b9f83d4d61 | |||
| 694feaffd2 | |||
| 9c16826ad3 | |||
| eb68e2143b | |||
| f305745295 | |||
| df4d0ad3fd | |||
| 9034d1dc71 | |||
| 537172d8ce | |||
| 20b2e4b3dd | |||
| fc22586752 | |||
| 646440eba3 | |||
| 53e5579326 | |||
| 29a1630d0f | |||
| 171f4ab2ae | |||
| a86043a2ec | |||
| 3947da2cf1 | |||
| 17caab6563 | |||
| a5ae071a03 | |||
| 9c33da7b8d | |||
| 94d31743b0 | |||
| 70db618c6e | |||
| 960a4549ef | |||
| 363a650dfa | |||
| b6e2634537 | |||
| 23146c8dae | |||
| 9f424f2fc0 | |||
| 25989d9f90 | |||
| 684da96a83 | |||
| abae7979cb | |||
| 49bce57fcf | |||
| 63d017fc21 | |||
| c52ce6bb49 | |||
| bcddd4ce77 | |||
| 017872f71b | |||
| 7e670ce0a8 | |||
| d32308b6d2 | |||
| 604d16e353 | |||
| db577785d6 | |||
| c9ae3a0541 | |||
| ed95dab9f3 | |||
| a6536cef94 | |||
| 3ccc81e81c | |||
| 94197cbcb9 | |||
| 3ee6d98905 | |||
| a96cd546c8 | |||
| eb33d4f1c2 | |||
| 4253956326 | |||
| d6b05bf337 |
@@ -1,46 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(npm install:*)",
|
||||
"Bash(npm test:*)",
|
||||
"Skill(building-agents-construction)",
|
||||
"Skill(building-agents-construction:*)",
|
||||
"Bash(PYTHONPATH=core:exports pytest:*)",
|
||||
"mcp__agent-builder__create_session",
|
||||
"mcp__agent-builder__get_session_status",
|
||||
"mcp__agent-builder__set_goal",
|
||||
"mcp__agent-builder__list_mcp_servers",
|
||||
"mcp__agent-builder__test_node",
|
||||
"mcp__agent-builder__add_node",
|
||||
"mcp__agent-builder__add_edge",
|
||||
"mcp__agent-builder__validate_graph",
|
||||
"Bash(ruff check:*)",
|
||||
"Bash(PYTHONPATH=core:exports python:*)",
|
||||
"mcp__agent-builder__list_tests",
|
||||
"mcp__agent-builder__generate_constraint_tests",
|
||||
"Bash(python -m agent:*)",
|
||||
"Bash(python agent.py:*)",
|
||||
"Bash(python -c:*)",
|
||||
"Bash(done)",
|
||||
"Bash(xargs cat:*)",
|
||||
"mcp__agent-builder__list_mcp_tools",
|
||||
"mcp__agent-builder__add_mcp_server",
|
||||
"Bash(gh issue list:*)",
|
||||
"WebFetch(domain:github.com)",
|
||||
"Bash(pip install:*)",
|
||||
"Bash(python -m pytest:*)",
|
||||
"Bash(git checkout:*)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit -m \"$\\(cat <<''EOF''\nfeat\\(tools\\): Add Excel tool for spreadsheet operations\n\nAdds a new Excel tool for reading and manipulating .xlsx/.xlsm files:\n- excel_read: Read Excel files with pagination and sheet selection\n- excel_write: Create new Excel files with data\n- excel_append: Append rows to existing files\n- excel_info: Get metadata about Excel files \\(sheets, columns, row counts\\)\n- excel_sheet_list: List all sheets in a workbook\n\nIncludes comprehensive test coverage \\(37 tests\\) and documentation.\n\nReferences #2805\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
|
||||
"Bash(git push:*)",
|
||||
"Bash(git pull:*)",
|
||||
"Bash(git stash:*)",
|
||||
"Bash(git merge:*)"
|
||||
]
|
||||
},
|
||||
"enableAllProjectMcpServers": true,
|
||||
"enabledMcpjsonServers": [
|
||||
"agent-builder",
|
||||
"tools"
|
||||
]
|
||||
}
|
||||
@@ -46,6 +46,7 @@ Use this meta-skill when:
|
||||
"Need to understand agent concepts" → building-agents-core
|
||||
"Build a new agent" → building-agents-construction
|
||||
"Optimize my agent design" → building-agents-patterns
|
||||
"Need client-facing nodes or feedback loops" → building-agents-patterns
|
||||
"Set up API keys for my agent" → setup-credentials
|
||||
"Test my agent" → testing-agent
|
||||
"Not sure what I need" → Read phases below, then decide
|
||||
@@ -63,12 +64,12 @@ Use this meta-skill when:
|
||||
- First time building an agent
|
||||
- Need to understand node types, edges, goals
|
||||
- Want to validate tool availability
|
||||
- Learning about pause/resume architecture
|
||||
- Learning about event loop architecture and client-facing nodes
|
||||
|
||||
### What This Phase Provides
|
||||
|
||||
- Architecture overview (Python packages, not JSON)
|
||||
- Core concepts (Goal, Node, Edge, Pause/Resume)
|
||||
- Core concepts (Goal, Node, Edge, Event Loop, Judges)
|
||||
- Tool discovery and validation procedures
|
||||
- Workflow overview
|
||||
|
||||
@@ -106,7 +107,7 @@ Creates the complete agent architecture:
|
||||
- ✅ 1-5 constraints defined
|
||||
- ✅ 5-10 nodes specified in nodes/__init__.py
|
||||
- ✅ 8-15 edges connecting workflow
|
||||
- ✅ Validated structure (passes `python -m agent_name validate`)
|
||||
- ✅ Validated structure (passes `uv run python -m agent_name validate`)
|
||||
- ✅ README.md with usage instructions
|
||||
- ✅ CLI commands (info, validate, run, shell)
|
||||
|
||||
@@ -153,19 +154,20 @@ exports/agent_name/
|
||||
|
||||
### When to Use
|
||||
|
||||
- Want to add pause/resume functionality
|
||||
- Want to add client-facing blocking or feedback edges
|
||||
- Need judge patterns for output validation
|
||||
- Want fan-out/fan-in (parallel execution)
|
||||
- Need error handling patterns
|
||||
- Want to optimize performance
|
||||
- Need examples of complex routing
|
||||
- Want best practices guidance
|
||||
|
||||
### What This Phase Provides
|
||||
|
||||
- Practical examples and patterns
|
||||
- Pause/resume architecture
|
||||
- Error handling strategies
|
||||
- Client-facing interaction patterns
|
||||
- Feedback edge routing with nullable output keys
|
||||
- Judge patterns (implicit, SchemaJudge)
|
||||
- Fan-out/fan-in parallel execution
|
||||
- Context management and spillover patterns
|
||||
- Anti-patterns to avoid
|
||||
- Performance optimization techniques
|
||||
|
||||
**Skip this phase** if your agent design is straightforward.
|
||||
|
||||
@@ -291,15 +293,15 @@ User: "Build an agent"
|
||||
→ Done: Working agent
|
||||
```
|
||||
|
||||
### Pattern 4: Complex Agent with Patterns
|
||||
### Pattern 4: Agent with Review Loops and HITL Checkpoints
|
||||
|
||||
```
|
||||
User: "Build an agent with multi-turn conversations"
|
||||
→ Use /building-agents-core (learn pause/resume)
|
||||
→ Use /building-agents-construction (build structure)
|
||||
→ Use /building-agents-patterns (implement pause/resume pattern)
|
||||
→ Use /testing-agent (validate conversation flows)
|
||||
→ Done: Complex conversational agent
|
||||
User: "Build an agent with human review and feedback loops"
|
||||
→ Use /building-agents-core (learn event loop, client-facing nodes)
|
||||
→ Use /building-agents-construction (build structure with feedback edges)
|
||||
→ Use /building-agents-patterns (implement client-facing + feedback patterns)
|
||||
→ Use /testing-agent (validate review flows and edge routing)
|
||||
→ Done: Agent with HITL checkpoints and review loops
|
||||
```
|
||||
|
||||
## Skill Dependencies
|
||||
@@ -308,25 +310,26 @@ User: "Build an agent with multi-turn conversations"
|
||||
agent-workflow (meta-skill)
|
||||
│
|
||||
├── building-agents-core (foundational)
|
||||
│ ├── Architecture concepts
|
||||
│ ├── Node/Edge/Goal definitions
|
||||
│ ├── Architecture concepts (event loop, judges)
|
||||
│ ├── Node types (event_loop, function)
|
||||
│ ├── Edge routing and priority
|
||||
│ ├── Tool discovery procedures
|
||||
│ └── Workflow overview
|
||||
│
|
||||
├── building-agents-construction (procedural)
|
||||
│ ├── Creates package structure
|
||||
│ ├── Defines goal
|
||||
│ ├── Adds nodes incrementally
|
||||
│ ├── Connects edges
|
||||
│ ├── Adds nodes (event_loop, function)
|
||||
│ ├── Connects edges with priority routing
|
||||
│ ├── Finalizes agent class
|
||||
│ └── Requires: building-agents-core
|
||||
│
|
||||
├── building-agents-patterns (reference)
|
||||
│ ├── Best practices
|
||||
│ ├── Pause/resume patterns
|
||||
│ ├── Error handling
|
||||
│ ├── Anti-patterns
|
||||
│ └── Performance optimization
|
||||
│ ├── Client-facing interaction patterns
|
||||
│ ├── Feedback edges and review loops
|
||||
│ ├── Judge patterns (implicit, SchemaJudge)
|
||||
│ ├── Fan-out/fan-in parallel execution
|
||||
│ └── Context management and anti-patterns
|
||||
│
|
||||
└── testing-agent
|
||||
├── Reads agent goal
|
||||
@@ -342,7 +345,7 @@ agent-workflow (meta-skill)
|
||||
- Check node IDs match between nodes/__init__.py and agent.py
|
||||
- Verify all edges reference valid node IDs
|
||||
- Ensure entry_node exists in nodes list
|
||||
- Run: `PYTHONPATH=core:exports python -m agent_name validate`
|
||||
- Run: `PYTHONPATH=exports uv run python -m agent_name validate`
|
||||
|
||||
### "Agent has structure but won't run"
|
||||
|
||||
@@ -368,7 +371,7 @@ Run these checks:
|
||||
ls exports/my_agent/agent.py
|
||||
|
||||
# Check if it validates
|
||||
PYTHONPATH=core:exports python -m my_agent validate
|
||||
PYTHONPATH=exports uv run python -m my_agent validate
|
||||
|
||||
# Check if tests exist
|
||||
ls exports/my_agent/tests/
|
||||
@@ -439,9 +442,9 @@ The workflow is **flexible** - skip phases as needed, iterate freely, and adapt
|
||||
|
||||
**Choose building-agents-core when:**
|
||||
- First time building agents
|
||||
- Need to understand architecture
|
||||
- Need to understand event loop architecture
|
||||
- Validating tool availability
|
||||
- Learning about node types and edges
|
||||
- Learning about node types, edges, and judges
|
||||
|
||||
**Choose building-agents-construction when:**
|
||||
- Actually building an agent
|
||||
@@ -451,13 +454,13 @@ The workflow is **flexible** - skip phases as needed, iterate freely, and adapt
|
||||
|
||||
**Choose building-agents-patterns when:**
|
||||
- Agent structure complete
|
||||
- Need advanced patterns
|
||||
- Implementing pause/resume
|
||||
- Optimizing performance
|
||||
- Need client-facing nodes or feedback edges
|
||||
- Implementing review loops or fan-out/fan-in
|
||||
- Want judge patterns or context management
|
||||
- Want best practices
|
||||
|
||||
**Choose testing-agent when:**
|
||||
- Agent structure complete
|
||||
- Ready to validate functionality
|
||||
- Need comprehensive test coverage
|
||||
- Debugging agent behavior
|
||||
- Testing feedback loops, output keys, or fan-out
|
||||
|
||||
@@ -75,10 +75,10 @@ initialize → list → identify → check
|
||||
### Step 5: Finalize
|
||||
|
||||
```bash
|
||||
$ PYTHONPATH=core:exports python -m file_monitor_agent validate
|
||||
$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
|
||||
✓ Agent is valid
|
||||
|
||||
$ PYTHONPATH=core:exports python -m file_monitor_agent info
|
||||
$ PYTHONPATH=exports uv run python -m file_monitor_agent info
|
||||
Agent: File Monitor & Copy Agent
|
||||
Nodes: 7
|
||||
Edges: 8
|
||||
@@ -131,7 +131,7 @@ Tests approved incrementally by user.
|
||||
### Step 3: Run Tests
|
||||
|
||||
```bash
|
||||
$ PYTHONPATH=core:exports pytest exports/file_monitor_agent/tests/
|
||||
$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/
|
||||
|
||||
test_constraints.py::test_preserves_originals PASSED
|
||||
test_constraints.py::test_handles_errors PASSED
|
||||
@@ -162,7 +162,7 @@ test_edge_cases.py::test_large_files PASSED
|
||||
./RUN_AGENT.sh
|
||||
|
||||
# Or manually
|
||||
PYTHONPATH=core:exports:tools/src python -m file_monitor_agent run
|
||||
PYTHONPATH=exports uv run python -m file_monitor_agent run
|
||||
```
|
||||
|
||||
**Capabilities:**
|
||||
|
||||
@@ -124,11 +124,14 @@ AskUserQuestion(questions=[{
|
||||
- node_id (kebab-case)
|
||||
- name
|
||||
- description
|
||||
- node_type: `"llm_generate"` (no tools) or `"llm_tool_use"` (uses tools)
|
||||
- node_type: `"event_loop"` (recommended for all LLM work) or `"function"` (deterministic, no LLM)
|
||||
- input_keys (what data this node receives)
|
||||
- output_keys (what data this node produces)
|
||||
- tools (ONLY tools that exist - empty list for llm_generate)
|
||||
- system_prompt
|
||||
- tools (ONLY tools that exist - empty list if no tools needed)
|
||||
- system_prompt (should mention `set_output` for producing structured outputs)
|
||||
- client_facing: True if this node interacts with the user
|
||||
- nullable_output_keys (for mutually exclusive outputs)
|
||||
- max_node_visits (>1 if this node is a feedback loop target)
|
||||
|
||||
**PRESENT the workflow to the user:**
|
||||
|
||||
@@ -136,7 +139,7 @@ AskUserQuestion(questions=[{
|
||||
>
|
||||
> 1. **[node-id]** - [description]
|
||||
>
|
||||
> - Type: [llm_generate/llm_tool_use]
|
||||
> - Type: event_loop [client-facing] / function
|
||||
> - Input: [keys]
|
||||
> - Output: [keys]
|
||||
> - Tools: [tools or "none"]
|
||||
@@ -211,8 +214,8 @@ mcp__agent-builder__get_session_status()
|
||||
- source (node that outputs)
|
||||
- target (node that receives)
|
||||
- condition: `"on_success"`, `"always"`, `"on_failure"`, or `"conditional"`
|
||||
- condition_expr (Python expression, only if conditional)
|
||||
- priority (integer, lower = higher priority)
|
||||
- condition_expr (Python expression using `output.get(...)`, only if conditional)
|
||||
- priority (positive = forward edge evaluated first, negative = feedback edge)
|
||||
|
||||
**FOR EACH edge, call:**
|
||||
|
||||
@@ -264,7 +267,7 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
|
||||
- NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
|
||||
- NOT: `{"first-node-id"}` (WRONG - this is a set)
|
||||
|
||||
**Use the example agent** at `.claude/skills/building-agents-construction/examples/online_research_agent/` as a template for file structure and patterns.
|
||||
**Use the example agent** at `.claude/skills/building-agents-construction/examples/deep_research_agent/` as a template for file structure and patterns. It demonstrates: STEP 1/STEP 2 prompts, client-facing nodes, feedback loops, nullable_output_keys, and data tools.
|
||||
|
||||
**AFTER writing all files, tell the user:**
|
||||
|
||||
@@ -284,8 +287,8 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
|
||||
>
|
||||
> ```bash
|
||||
> cd /home/timothy/oss/hive
|
||||
> PYTHONPATH=core:exports python -m AGENT_NAME validate
|
||||
> PYTHONPATH=core:exports python -m AGENT_NAME info
|
||||
> PYTHONPATH=exports uv run python -m AGENT_NAME validate
|
||||
> PYTHONPATH=exports uv run python -m AGENT_NAME info
|
||||
> ```
|
||||
|
||||
---
|
||||
@@ -295,7 +298,7 @@ This returns JSON with all the goal, nodes, edges, and MCP server configurations
|
||||
**RUN validation:**
|
||||
|
||||
```bash
|
||||
cd /home/timothy/oss/hive && PYTHONPATH=core:exports python -m AGENT_NAME validate
|
||||
cd /home/timothy/oss/hive && PYTHONPATH=exports uv run python -m AGENT_NAME validate
|
||||
```
|
||||
|
||||
- If valid: Agent is complete!
|
||||
@@ -317,39 +320,86 @@ mcp__agent-builder__get_session_status()
|
||||
|
||||
## REFERENCE: Node Types
|
||||
|
||||
| Type | tools param | Use when |
|
||||
| -------------- | ---------------------- | ---------------------------------------------- |
|
||||
| `llm_generate` | `'[]'` | Pure reasoning, JSON output, no external calls |
|
||||
| `llm_tool_use` | `'["tool1", "tool2"]'` | Needs to call MCP tools |
|
||||
| Type | tools param | Use when |
|
||||
|------|-------------|----------|
|
||||
| `event_loop` | `'["tool1"]'` or `'[]'` | LLM-powered work with or without tools |
|
||||
| `function` | N/A | Deterministic Python operations, no LLM |
|
||||
|
||||
---
|
||||
|
||||
## REFERENCE: Edge Conditions
|
||||
## REFERENCE: NodeSpec New Fields
|
||||
|
||||
| Condition | When edge is followed |
|
||||
| ------------- | ------------------------------------- |
|
||||
| `on_success` | Source node completed successfully |
|
||||
| `on_failure` | Source node failed |
|
||||
| `always` | Always, regardless of success/failure |
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `client_facing` | `False` | Streams output to user, blocks for input between turns |
|
||||
| `nullable_output_keys` | `[]` | Output keys that may remain unset (mutually exclusive outputs) |
|
||||
| `max_node_visits` | `1` | Max executions per run. Set >1 for feedback loop targets. 0=unlimited |
|
||||
|
||||
---
|
||||
|
||||
## REFERENCE: Edge Conditions & Priority
|
||||
|
||||
| Condition | When edge is followed |
|
||||
|-----------|--------------------------------------|
|
||||
| `on_success` | Source node completed successfully |
|
||||
| `on_failure` | Source node failed |
|
||||
| `always` | Always, regardless of success/failure |
|
||||
| `conditional` | When condition_expr evaluates to True |
|
||||
|
||||
**Priority:** Positive = forward edge (evaluated first). Negative = feedback edge (loops back to earlier node). Multiple ON_SUCCESS edges from same source = parallel execution (fan-out).
|
||||
|
||||
---
|
||||
|
||||
## REFERENCE: System Prompt Best Practice
|
||||
|
||||
For nodes with JSON output, include this in the system_prompt:
|
||||
For **internal** event_loop nodes (not client-facing), instruct the LLM to use `set_output`:
|
||||
|
||||
```
|
||||
CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
|
||||
Just the JSON object starting with { and ending with }.
|
||||
Use set_output(key, value) to store your results. For example:
|
||||
- set_output("search_results", <your results as a JSON string>)
|
||||
|
||||
Return this exact structure:
|
||||
{
|
||||
"key1": "...",
|
||||
"key2": "..."
|
||||
}
|
||||
Do NOT return raw JSON. Use the set_output tool to produce outputs.
|
||||
```
|
||||
|
||||
For **client-facing** event_loop nodes, use the STEP 1/STEP 2 pattern:
|
||||
|
||||
```
|
||||
**STEP 1 — Respond to the user (text only, NO tool calls):**
|
||||
[Present information, ask questions, etc.]
|
||||
|
||||
**STEP 2 — After the user responds, call set_output:**
|
||||
- set_output("key", "value based on user's response")
|
||||
```
|
||||
|
||||
This prevents the LLM from calling `set_output` before the user has had a chance to respond. The "NO tool calls" instruction in STEP 1 ensures the node blocks for user input before proceeding.
|
||||
|
||||
---
|
||||
|
||||
## EventLoopNode Runtime
|
||||
|
||||
EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. Both direct `GraphExecutor` and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically. No manual `node_registry` setup is needed.
|
||||
|
||||
```python
|
||||
# Direct execution
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.runtime.core import Runtime
|
||||
|
||||
storage_path = Path.home() / ".hive" / "my_agent"
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
runtime = Runtime(storage_path)
|
||||
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
storage_path=storage_path,
|
||||
)
|
||||
result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
|
||||
```
|
||||
|
||||
**DO NOT pass `runtime=None` to `GraphExecutor`** — it will crash with `'NoneType' object has no attribute 'start_run'`.
|
||||
|
||||
---
|
||||
|
||||
## COMMON MISTAKES TO AVOID
|
||||
@@ -359,3 +409,7 @@ Return this exact structure:
|
||||
3. **Skipping validation** - Always validate nodes and graph before proceeding
|
||||
4. **Not waiting for approval** - Always ask user before major steps
|
||||
5. **Displaying this file** - Execute the steps, don't show documentation
|
||||
6. **Too many thin nodes** - Prefer fewer, richer nodes (4 nodes > 8 nodes)
|
||||
7. **Missing STEP 1/STEP 2 in client-facing prompts** - Client-facing nodes need explicit phases to prevent premature set_output
|
||||
8. **Forgetting nullable_output_keys** - Mark input_keys that only arrive on certain edges (e.g., feedback) as nullable on the receiving node
|
||||
9. **Adding framework gating for LLM behavior** - Fix prompts or use judges, not ad-hoc code
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Deep Research Agent - Interactive, rigorous research with TUI conversation.
|
||||
|
||||
Research any topic through multi-source web search, quality evaluation,
|
||||
and synthesis. Features client-facing TUI interaction at key checkpoints
|
||||
for user guidance and iterative deepening.
|
||||
"""
|
||||
|
||||
from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
|
||||
from .config import RuntimeConfig, AgentMetadata, default_config, metadata
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
__all__ = [
|
||||
"DeepResearchAgent",
|
||||
"default_agent",
|
||||
"goal",
|
||||
"nodes",
|
||||
"edges",
|
||||
"RuntimeConfig",
|
||||
"AgentMetadata",
|
||||
"default_config",
|
||||
"metadata",
|
||||
]
|
||||
+96
-18
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
CLI entry point for Online Research Agent.
|
||||
CLI entry point for Deep Research Agent.
|
||||
|
||||
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
|
||||
"""
|
||||
@@ -10,7 +10,7 @@ import logging
|
||||
import sys
|
||||
import click
|
||||
|
||||
from .agent import default_agent, OnlineResearchAgent
|
||||
from .agent import default_agent, DeepResearchAgent
|
||||
|
||||
|
||||
def setup_logging(verbose=False, debug=False):
|
||||
@@ -28,7 +28,7 @@ def setup_logging(verbose=False, debug=False):
|
||||
@click.group()
|
||||
@click.version_option(version="1.0.0")
|
||||
def cli():
|
||||
"""Online Research Agent - Deep-dive research with narrative reports."""
|
||||
"""Deep Research Agent - Interactive, rigorous research with TUI conversation."""
|
||||
pass
|
||||
|
||||
|
||||
@@ -59,6 +59,83 @@ def run(topic, mock, quiet, verbose, debug):
|
||||
sys.exit(0 if result.success else 1)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--mock", is_flag=True, help="Run in mock mode")
|
||||
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
|
||||
@click.option("--debug", is_flag=True, help="Show debug logging")
|
||||
def tui(mock, verbose, debug):
|
||||
"""Launch the TUI dashboard for interactive research."""
|
||||
setup_logging(verbose=verbose, debug=debug)
|
||||
|
||||
try:
|
||||
from framework.tui.app import AdenTUI
|
||||
except ImportError:
|
||||
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
|
||||
sys.exit(1)
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.agent_runtime import create_agent_runtime
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
async def run_with_tui():
|
||||
agent = DeepResearchAgent()
|
||||
|
||||
# Build graph and tools
|
||||
agent._event_bus = EventBus()
|
||||
agent._tool_registry = ToolRegistry()
|
||||
|
||||
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_config_path.exists():
|
||||
agent._tool_registry.load_mcp_config(mcp_config_path)
|
||||
|
||||
llm = None
|
||||
if not mock:
|
||||
llm = LiteLLMProvider(
|
||||
model=agent.config.model,
|
||||
api_key=agent.config.api_key,
|
||||
api_base=agent.config.api_base,
|
||||
)
|
||||
|
||||
tools = list(agent._tool_registry.get_tools().values())
|
||||
tool_executor = agent._tool_registry.get_executor()
|
||||
graph = agent._build_graph()
|
||||
|
||||
storage_path = Path.home() / ".hive" / "deep_research_agent"
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
runtime = create_agent_runtime(
|
||||
graph=graph,
|
||||
goal=agent.goal,
|
||||
storage_path=storage_path,
|
||||
entry_points=[
|
||||
EntryPointSpec(
|
||||
id="start",
|
||||
name="Start Research",
|
||||
entry_node="intake",
|
||||
trigger_type="manual",
|
||||
isolation_level="isolated",
|
||||
),
|
||||
],
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
)
|
||||
|
||||
await runtime.start()
|
||||
|
||||
try:
|
||||
app = AdenTUI(runtime)
|
||||
await app.run_async()
|
||||
finally:
|
||||
await runtime.stop()
|
||||
|
||||
asyncio.run(run_with_tui())
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--json", "output_json", is_flag=True)
|
||||
def info(output_json):
|
||||
@@ -71,6 +148,7 @@ def info(output_json):
|
||||
click.echo(f"Version: {info_data['version']}")
|
||||
click.echo(f"Description: {info_data['description']}")
|
||||
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
|
||||
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
|
||||
click.echo(f"Entry: {info_data['entry_node']}")
|
||||
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
|
||||
|
||||
@@ -81,6 +159,9 @@ def validate():
|
||||
validation = default_agent.validate()
|
||||
if validation["valid"]:
|
||||
click.echo("Agent is valid")
|
||||
if validation["warnings"]:
|
||||
for warning in validation["warnings"]:
|
||||
click.echo(f" WARNING: {warning}")
|
||||
else:
|
||||
click.echo("Agent has errors:")
|
||||
for error in validation["errors"]:
|
||||
@@ -91,7 +172,7 @@ def validate():
|
||||
@cli.command()
|
||||
@click.option("--verbose", "-v", is_flag=True)
|
||||
def shell(verbose):
|
||||
"""Interactive research session."""
|
||||
"""Interactive research session (CLI, no TUI)."""
|
||||
asyncio.run(_interactive_shell(verbose))
|
||||
|
||||
|
||||
@@ -99,10 +180,10 @@ async def _interactive_shell(verbose=False):
|
||||
"""Async interactive shell."""
|
||||
setup_logging(verbose=verbose)
|
||||
|
||||
click.echo("=== Online Research Agent ===")
|
||||
click.echo("=== Deep Research Agent ===")
|
||||
click.echo("Enter a topic to research (or 'quit' to exit):\n")
|
||||
|
||||
agent = OnlineResearchAgent()
|
||||
agent = DeepResearchAgent()
|
||||
await agent.start()
|
||||
|
||||
try:
|
||||
@@ -118,7 +199,7 @@ async def _interactive_shell(verbose=False):
|
||||
if not topic.strip():
|
||||
continue
|
||||
|
||||
click.echo("\nResearching... (this may take a few minutes)\n")
|
||||
click.echo("\nResearching...\n")
|
||||
|
||||
result = await agent.trigger_and_wait("start", {"topic": topic})
|
||||
|
||||
@@ -128,16 +209,14 @@ async def _interactive_shell(verbose=False):
|
||||
|
||||
if result.success:
|
||||
output = result.output
|
||||
if "file_path" in output:
|
||||
click.echo(f"\nReport saved to: {output['file_path']}\n")
|
||||
if "final_report" in output:
|
||||
click.echo("\n--- Report Preview ---\n")
|
||||
preview = (
|
||||
output["final_report"][:500] + "..."
|
||||
if len(output.get("final_report", "")) > 500
|
||||
else output.get("final_report", "")
|
||||
)
|
||||
click.echo(preview)
|
||||
if "report_content" in output:
|
||||
click.echo("\n--- Report ---\n")
|
||||
click.echo(output["report_content"])
|
||||
click.echo("\n")
|
||||
if "references" in output:
|
||||
click.echo("--- References ---\n")
|
||||
for ref in output.get("references", []):
|
||||
click.echo(f" [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}")
|
||||
click.echo("\n")
|
||||
else:
|
||||
click.echo(f"\nResearch failed: {result.error}\n")
|
||||
@@ -148,7 +227,6 @@ async def _interactive_shell(verbose=False):
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
await agent.stop()
|
||||
@@ -0,0 +1,305 @@
|
||||
"""Agent graph construction for Deep Research Agent."""
|
||||
|
||||
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import ExecutionResult, GraphExecutor
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.core import Runtime
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
|
||||
from .config import default_config, metadata
|
||||
from .nodes import (
|
||||
intake_node,
|
||||
research_node,
|
||||
review_node,
|
||||
report_node,
|
||||
)
|
||||
|
||||
# Goal definition
|
||||
goal = Goal(
|
||||
id="rigorous-interactive-research",
|
||||
name="Rigorous Interactive Research",
|
||||
description=(
|
||||
"Research any topic by searching diverse sources, analyzing findings, "
|
||||
"and producing a cited report — with user checkpoints to guide direction."
|
||||
),
|
||||
success_criteria=[
|
||||
SuccessCriterion(
|
||||
id="source-diversity",
|
||||
description="Use multiple diverse, authoritative sources",
|
||||
metric="source_count",
|
||||
target=">=5",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="citation-coverage",
|
||||
description="Every factual claim in the report cites its source",
|
||||
metric="citation_coverage",
|
||||
target="100%",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="user-satisfaction",
|
||||
description="User reviews findings before report generation",
|
||||
metric="user_approval",
|
||||
target="true",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="report-completeness",
|
||||
description="Final report answers the original research questions",
|
||||
metric="question_coverage",
|
||||
target="90%",
|
||||
weight=0.25,
|
||||
),
|
||||
],
|
||||
constraints=[
|
||||
Constraint(
|
||||
id="no-hallucination",
|
||||
description="Only include information found in fetched sources",
|
||||
constraint_type="quality",
|
||||
category="accuracy",
|
||||
),
|
||||
Constraint(
|
||||
id="source-attribution",
|
||||
description="Every claim must cite its source with a numbered reference",
|
||||
constraint_type="quality",
|
||||
category="accuracy",
|
||||
),
|
||||
Constraint(
|
||||
id="user-checkpoint",
|
||||
description="Present findings to the user before writing the final report",
|
||||
constraint_type="functional",
|
||||
category="interaction",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Node list
|
||||
nodes = [
|
||||
intake_node,
|
||||
research_node,
|
||||
review_node,
|
||||
report_node,
|
||||
]
|
||||
|
||||
# Edge definitions
|
||||
edges = [
|
||||
# intake -> research
|
||||
EdgeSpec(
|
||||
id="intake-to-research",
|
||||
source="intake",
|
||||
target="research",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
# research -> review
|
||||
EdgeSpec(
|
||||
id="research-to-review",
|
||||
source="research",
|
||||
target="review",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
# review -> research (feedback loop)
|
||||
EdgeSpec(
|
||||
id="review-to-research-feedback",
|
||||
source="review",
|
||||
target="research",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="needs_more_research == True",
|
||||
priority=1,
|
||||
),
|
||||
# review -> report (user satisfied)
|
||||
EdgeSpec(
|
||||
id="review-to-report",
|
||||
source="review",
|
||||
target="report",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="needs_more_research == False",
|
||||
priority=2,
|
||||
),
|
||||
]
|
||||
|
||||
# Graph configuration
|
||||
entry_node = "intake"
|
||||
entry_points = {"start": "intake"}
|
||||
pause_nodes = []
|
||||
terminal_nodes = ["report"]
|
||||
|
||||
|
||||
class DeepResearchAgent:
|
||||
"""
|
||||
Deep Research Agent — 4-node pipeline with user checkpoints.
|
||||
|
||||
Flow: intake -> research -> review -> report
|
||||
^ |
|
||||
+-- feedback loop (if user wants more)
|
||||
"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
self.config = config or default_config
|
||||
self.goal = goal
|
||||
self.nodes = nodes
|
||||
self.edges = edges
|
||||
self.entry_node = entry_node
|
||||
self.entry_points = entry_points
|
||||
self.pause_nodes = pause_nodes
|
||||
self.terminal_nodes = terminal_nodes
|
||||
self._executor: GraphExecutor | None = None
|
||||
self._graph: GraphSpec | None = None
|
||||
self._event_bus: EventBus | None = None
|
||||
self._tool_registry: ToolRegistry | None = None
|
||||
|
||||
def _build_graph(self) -> GraphSpec:
|
||||
"""Build the GraphSpec."""
|
||||
return GraphSpec(
|
||||
id="deep-research-agent-graph",
|
||||
goal_id=self.goal.id,
|
||||
version="1.0.0",
|
||||
entry_node=self.entry_node,
|
||||
entry_points=self.entry_points,
|
||||
terminal_nodes=self.terminal_nodes,
|
||||
pause_nodes=self.pause_nodes,
|
||||
nodes=self.nodes,
|
||||
edges=self.edges,
|
||||
default_model=self.config.model,
|
||||
max_tokens=self.config.max_tokens,
|
||||
)
|
||||
|
||||
def _setup(self, mock_mode=False) -> GraphExecutor:
|
||||
"""Set up the executor with all components."""
|
||||
from pathlib import Path
|
||||
|
||||
storage_path = Path.home() / ".hive" / "deep_research_agent"
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._event_bus = EventBus()
|
||||
self._tool_registry = ToolRegistry()
|
||||
|
||||
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
|
||||
if mcp_config_path.exists():
|
||||
self._tool_registry.load_mcp_config(mcp_config_path)
|
||||
|
||||
llm = None
|
||||
if not mock_mode:
|
||||
llm = LiteLLMProvider(
|
||||
model=self.config.model,
|
||||
api_key=self.config.api_key,
|
||||
api_base=self.config.api_base,
|
||||
)
|
||||
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
|
||||
self._graph = self._build_graph()
|
||||
runtime = Runtime(storage_path)
|
||||
|
||||
self._executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
event_bus=self._event_bus,
|
||||
storage_path=storage_path,
|
||||
)
|
||||
|
||||
return self._executor
|
||||
|
||||
async def start(self, mock_mode=False) -> None:
|
||||
"""Set up the agent (initialize executor and tools)."""
|
||||
if self._executor is None:
|
||||
self._setup(mock_mode=mock_mode)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Clean up resources."""
|
||||
self._executor = None
|
||||
self._event_bus = None
|
||||
|
||||
async def trigger_and_wait(
|
||||
self,
|
||||
entry_point: str,
|
||||
input_data: dict,
|
||||
timeout: float | None = None,
|
||||
session_state: dict | None = None,
|
||||
) -> ExecutionResult | None:
|
||||
"""Execute the graph and wait for completion."""
|
||||
if self._executor is None:
|
||||
raise RuntimeError("Agent not started. Call start() first.")
|
||||
if self._graph is None:
|
||||
raise RuntimeError("Graph not built. Call start() first.")
|
||||
|
||||
return await self._executor.execute(
|
||||
graph=self._graph,
|
||||
goal=self.goal,
|
||||
input_data=input_data,
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, context: dict, mock_mode=False, session_state=None
|
||||
) -> ExecutionResult:
|
||||
"""Run the agent (convenience method for single execution)."""
|
||||
await self.start(mock_mode=mock_mode)
|
||||
try:
|
||||
result = await self.trigger_and_wait(
|
||||
"start", context, session_state=session_state
|
||||
)
|
||||
return result or ExecutionResult(success=False, error="Execution timeout")
|
||||
finally:
|
||||
await self.stop()
|
||||
|
||||
def info(self):
|
||||
"""Get agent information."""
|
||||
return {
|
||||
"name": metadata.name,
|
||||
"version": metadata.version,
|
||||
"description": metadata.description,
|
||||
"goal": {
|
||||
"name": self.goal.name,
|
||||
"description": self.goal.description,
|
||||
},
|
||||
"nodes": [n.id for n in self.nodes],
|
||||
"edges": [e.id for e in self.edges],
|
||||
"entry_node": self.entry_node,
|
||||
"entry_points": self.entry_points,
|
||||
"pause_nodes": self.pause_nodes,
|
||||
"terminal_nodes": self.terminal_nodes,
|
||||
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
|
||||
}
|
||||
|
||||
def validate(self):
|
||||
"""Validate agent structure."""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
node_ids = {node.id for node in self.nodes}
|
||||
for edge in self.edges:
|
||||
if edge.source not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
|
||||
if edge.target not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
|
||||
|
||||
if self.entry_node not in node_ids:
|
||||
errors.append(f"Entry node '{self.entry_node}' not found")
|
||||
|
||||
for terminal in self.terminal_nodes:
|
||||
if terminal not in node_ids:
|
||||
errors.append(f"Terminal node '{terminal}' not found")
|
||||
|
||||
for ep_id, node_id in self.entry_points.items():
|
||||
if node_id not in node_ids:
|
||||
errors.append(
|
||||
f"Entry point '{ep_id}' references unknown node '{node_id}'"
|
||||
)
|
||||
|
||||
return {
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
|
||||
# Create default instance
|
||||
default_agent = DeepResearchAgent()
|
||||
+6
-3
@@ -32,12 +32,15 @@ class RuntimeConfig:
|
||||
default_config = RuntimeConfig()
|
||||
|
||||
|
||||
# Agent metadata
|
||||
@dataclass
|
||||
class AgentMetadata:
|
||||
name: str = "Online Research Agent"
|
||||
name: str = "Deep Research Agent"
|
||||
version: str = "1.0.0"
|
||||
description: str = "Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations."
|
||||
description: str = (
|
||||
"Interactive research agent that rigorously investigates topics through "
|
||||
"multi-source search, quality evaluation, and synthesis - with TUI conversation "
|
||||
"at key checkpoints for user guidance and feedback."
|
||||
)
|
||||
|
||||
|
||||
metadata = AgentMetadata()
|
||||
+147
@@ -0,0 +1,147 @@
|
||||
"""Node definitions for Deep Research Agent."""
|
||||
|
||||
from framework.graph import NodeSpec
|
||||
|
||||
# Node 1: Intake (client-facing)
|
||||
# Brief conversation to clarify what the user wants researched.
|
||||
intake_node = NodeSpec(
|
||||
id="intake",
|
||||
name="Research Intake",
|
||||
description="Discuss the research topic with the user, clarify scope, and confirm direction",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
input_keys=["topic"],
|
||||
output_keys=["research_brief"],
|
||||
system_prompt="""\
|
||||
You are a research intake specialist. The user wants to research a topic.
|
||||
Have a brief conversation to clarify what they need.
|
||||
|
||||
**STEP 1 — Read and respond (text only, NO tool calls):**
|
||||
1. Read the topic provided
|
||||
2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
|
||||
3. If it's already clear, confirm your understanding and ask the user to confirm
|
||||
|
||||
Keep it short. Don't over-ask.
|
||||
|
||||
**STEP 2 — After the user confirms, call set_output:**
|
||||
- set_output("research_brief", "A clear paragraph describing exactly what to research, \
|
||||
what questions to answer, what scope to cover, and how deep to go.")
|
||||
""",
|
||||
tools=[],
|
||||
)
|
||||
|
||||
# Node 2: Research
|
||||
# The workhorse — searches the web, fetches content, analyzes sources.
|
||||
# One node with both tools avoids the context-passing overhead of 5 separate nodes.
|
||||
research_node = NodeSpec(
|
||||
id="research",
|
||||
name="Research",
|
||||
description="Search the web, fetch source content, and compile findings",
|
||||
node_type="event_loop",
|
||||
max_node_visits=3,
|
||||
input_keys=["research_brief", "feedback"],
|
||||
output_keys=["findings", "sources", "gaps"],
|
||||
nullable_output_keys=["feedback"],
|
||||
system_prompt="""\
|
||||
You are a research agent. Given a research brief, find and analyze sources.
|
||||
|
||||
If feedback is provided, this is a follow-up round — focus on the gaps identified.
|
||||
|
||||
Work in phases:
|
||||
1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
|
||||
Prioritize authoritative sources (.edu, .gov, established publications).
|
||||
2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
|
||||
Skip URLs that fail. Extract the substantive content.
|
||||
3. **Analyze**: Review what you've collected. Identify key findings, themes,
|
||||
and any contradictions between sources.
|
||||
|
||||
Important:
|
||||
- Work in batches of 3-4 tool calls at a time to manage context
|
||||
- After each batch, assess whether you have enough material
|
||||
- Prefer quality over quantity — 5 good sources beat 15 thin ones
|
||||
- Track which URL each finding comes from (you'll need citations later)
|
||||
|
||||
When done, use set_output:
|
||||
- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
|
||||
Include themes, contradictions, and confidence levels.")
|
||||
- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
|
||||
- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
|
||||
""",
|
||||
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
|
||||
)
|
||||
|
||||
# Node 3: Review (client-facing)
|
||||
# Shows the user what was found and asks whether to dig deeper or proceed.
|
||||
review_node = NodeSpec(
|
||||
id="review",
|
||||
name="Review Findings",
|
||||
description="Present findings to user and decide whether to research more or write the report",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=3,
|
||||
input_keys=["findings", "sources", "gaps", "research_brief"],
|
||||
output_keys=["needs_more_research", "feedback"],
|
||||
system_prompt="""\
|
||||
Present the research findings to the user clearly and concisely.
|
||||
|
||||
**STEP 1 — Present (your first message, text only, NO tool calls):**
|
||||
1. **Summary** (2-3 sentences of what was found)
|
||||
2. **Key Findings** (bulleted, with confidence levels)
|
||||
3. **Sources Used** (count and quality assessment)
|
||||
4. **Gaps** (what's still unclear or under-covered)
|
||||
|
||||
End by asking: Are they satisfied, or do they want deeper research? \
|
||||
Should we proceed to writing the final report?
|
||||
|
||||
**STEP 2 — After the user responds, call set_output:**
|
||||
- set_output("needs_more_research", "true") — if they want more
|
||||
- set_output("needs_more_research", "false") — if they're satisfied
|
||||
- set_output("feedback", "What the user wants explored further, or empty string")
|
||||
""",
|
||||
tools=[],
|
||||
)
|
||||
|
||||
# Node 4: Report (client-facing)
|
||||
# Writes the final report and presents it to the user.
|
||||
report_node = NodeSpec(
|
||||
id="report",
|
||||
name="Write & Deliver Report",
|
||||
description="Write a cited report from the findings and present it to the user",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
input_keys=["findings", "sources", "research_brief"],
|
||||
output_keys=["delivery_status"],
|
||||
system_prompt="""\
|
||||
Write a comprehensive research report and present it to the user.
|
||||
|
||||
**STEP 1 — Write and present the report (text only, NO tool calls):**
|
||||
|
||||
Report structure:
|
||||
1. **Executive Summary** (2-3 paragraphs)
|
||||
2. **Findings** (organized by theme, with [n] citations)
|
||||
3. **Analysis** (synthesis, implications, areas of debate)
|
||||
4. **Conclusion** (key takeaways, confidence assessment)
|
||||
5. **References** (numbered list of sources cited)
|
||||
|
||||
Requirements:
|
||||
- Every factual claim must cite its source with [n] notation
|
||||
- Be objective — present multiple viewpoints where sources disagree
|
||||
- Distinguish well-supported conclusions from speculation
|
||||
- Answer the original research questions from the brief
|
||||
|
||||
End by asking the user if they have questions or want to save the report.
|
||||
|
||||
**STEP 2 — After the user responds:**
|
||||
- Answer follow-up questions from the research material
|
||||
- If they want to save, use write_to_file tool
|
||||
- When the user is satisfied: set_output("delivery_status", "completed")
|
||||
""",
|
||||
tools=["write_to_file"],
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"intake_node",
|
||||
"research_node",
|
||||
"review_node",
|
||||
"report_node",
|
||||
]
|
||||
@@ -1,80 +0,0 @@
|
||||
# Online Research Agent
|
||||
|
||||
Deep-dive research agent that searches 10+ sources and produces comprehensive narrative reports with citations.
|
||||
|
||||
## Features
|
||||
|
||||
- Generates multiple search queries from a topic
|
||||
- Searches and fetches 15+ web sources
|
||||
- Evaluates and ranks sources by relevance
|
||||
- Synthesizes findings into themes
|
||||
- Writes narrative report with numbered citations
|
||||
- Quality checks for uncited claims
|
||||
- Saves report to local markdown file
|
||||
|
||||
## Usage
|
||||
|
||||
### CLI
|
||||
|
||||
```bash
|
||||
# Show agent info
|
||||
python -m online_research_agent info
|
||||
|
||||
# Validate structure
|
||||
python -m online_research_agent validate
|
||||
|
||||
# Run research on a topic
|
||||
python -m online_research_agent run --topic "impact of AI on healthcare"
|
||||
|
||||
# Interactive shell
|
||||
python -m online_research_agent shell
|
||||
```
|
||||
|
||||
### Python API
|
||||
|
||||
```python
|
||||
from online_research_agent import default_agent
|
||||
|
||||
# Simple usage
|
||||
result = await default_agent.run({"topic": "climate change solutions"})
|
||||
|
||||
# Check output
|
||||
if result.success:
|
||||
print(f"Report saved to: {result.output['file_path']}")
|
||||
print(result.output['final_report'])
|
||||
```
|
||||
|
||||
## Workflow
|
||||
|
||||
```
|
||||
parse-query → search-sources → fetch-content → evaluate-sources
|
||||
↓
|
||||
write-report ← synthesize-findings
|
||||
↓
|
||||
quality-check → save-report
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Reports are saved to `./research_reports/` as markdown files with:
|
||||
|
||||
1. Executive Summary
|
||||
2. Introduction
|
||||
3. Key Findings (by theme)
|
||||
4. Analysis
|
||||
5. Conclusion
|
||||
6. References
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.11+
|
||||
- LLM provider API key (Groq, Cerebras, etc.)
|
||||
- Internet access for web search/fetch
|
||||
|
||||
## Configuration
|
||||
|
||||
Edit `config.py` to change:
|
||||
|
||||
- `model`: LLM model (default: groq/moonshotai/kimi-k2-instruct-0905)
|
||||
- `temperature`: Generation temperature (default: 0.7)
|
||||
- `max_tokens`: Max tokens per response (default: 16384)
|
||||
-23
@@ -1,23 +0,0 @@
|
||||
"""
|
||||
Online Research Agent - Deep-dive research with narrative reports.
|
||||
|
||||
Research any topic by searching multiple sources, synthesizing information,
|
||||
and producing a well-structured narrative report with citations.
|
||||
"""
|
||||
|
||||
from .agent import OnlineResearchAgent, default_agent, goal, nodes, edges
|
||||
from .config import RuntimeConfig, AgentMetadata, default_config, metadata
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
__all__ = [
|
||||
"OnlineResearchAgent",
|
||||
"default_agent",
|
||||
"goal",
|
||||
"nodes",
|
||||
"edges",
|
||||
"RuntimeConfig",
|
||||
"AgentMetadata",
|
||||
"default_config",
|
||||
"metadata",
|
||||
]
|
||||
@@ -1,429 +0,0 @@
|
||||
"""Agent graph construction for Online Research Agent."""
|
||||
|
||||
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import ExecutionResult
|
||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
from framework.llm import LiteLLMProvider
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
|
||||
from .config import default_config, metadata
|
||||
from .nodes import (
|
||||
parse_query_node,
|
||||
search_sources_node,
|
||||
fetch_content_node,
|
||||
evaluate_sources_node,
|
||||
synthesize_findings_node,
|
||||
write_report_node,
|
||||
quality_check_node,
|
||||
save_report_node,
|
||||
)
|
||||
|
||||
# Goal definition
|
||||
goal = Goal(
|
||||
id="comprehensive-online-research",
|
||||
name="Comprehensive Online Research",
|
||||
description="Research any topic by searching multiple sources, synthesizing information, and producing a well-structured narrative report with citations.",
|
||||
success_criteria=[
|
||||
SuccessCriterion(
|
||||
id="source-coverage",
|
||||
description="Query 10+ diverse sources",
|
||||
metric="source_count",
|
||||
target=">=10",
|
||||
weight=0.20,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="relevance",
|
||||
description="All sources directly address the query",
|
||||
metric="relevance_score",
|
||||
target="90%",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="synthesis",
|
||||
description="Synthesize findings into coherent narrative",
|
||||
metric="coherence_score",
|
||||
target="85%",
|
||||
weight=0.25,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="citations",
|
||||
description="Include citations for all claims",
|
||||
metric="citation_coverage",
|
||||
target="100%",
|
||||
weight=0.15,
|
||||
),
|
||||
SuccessCriterion(
|
||||
id="actionable",
|
||||
description="Report answers the user's question",
|
||||
metric="answer_completeness",
|
||||
target="90%",
|
||||
weight=0.15,
|
||||
),
|
||||
],
|
||||
constraints=[
|
||||
Constraint(
|
||||
id="no-hallucination",
|
||||
description="Only include information found in sources",
|
||||
constraint_type="quality",
|
||||
category="accuracy",
|
||||
),
|
||||
Constraint(
|
||||
id="source-attribution",
|
||||
description="Every factual claim must cite its source",
|
||||
constraint_type="quality",
|
||||
category="accuracy",
|
||||
),
|
||||
Constraint(
|
||||
id="recency-preference",
|
||||
description="Prefer recent sources when relevant",
|
||||
constraint_type="quality",
|
||||
category="relevance",
|
||||
),
|
||||
Constraint(
|
||||
id="no-paywalled",
|
||||
description="Avoid sources that require payment to access",
|
||||
constraint_type="functional",
|
||||
category="accessibility",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Node list
|
||||
nodes = [
|
||||
parse_query_node,
|
||||
search_sources_node,
|
||||
fetch_content_node,
|
||||
evaluate_sources_node,
|
||||
synthesize_findings_node,
|
||||
write_report_node,
|
||||
quality_check_node,
|
||||
save_report_node,
|
||||
]
|
||||
|
||||
# Edge definitions
|
||||
edges = [
|
||||
EdgeSpec(
|
||||
id="parse-to-search",
|
||||
source="parse-query",
|
||||
target="search-sources",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="search-to-fetch",
|
||||
source="search-sources",
|
||||
target="fetch-content",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="fetch-to-evaluate",
|
||||
source="fetch-content",
|
||||
target="evaluate-sources",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="evaluate-to-synthesize",
|
||||
source="evaluate-sources",
|
||||
target="synthesize-findings",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="synthesize-to-write",
|
||||
source="synthesize-findings",
|
||||
target="write-report",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="write-to-quality",
|
||||
source="write-report",
|
||||
target="quality-check",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="quality-to-save",
|
||||
source="quality-check",
|
||||
target="save-report",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
priority=1,
|
||||
),
|
||||
]
|
||||
|
||||
# Graph configuration
|
||||
entry_node = "parse-query"
|
||||
entry_points = {"start": "parse-query"}
|
||||
pause_nodes = []
|
||||
terminal_nodes = ["save-report"]
|
||||
|
||||
|
||||
class OnlineResearchAgent:
|
||||
"""
|
||||
Online Research Agent - Deep-dive research with narrative reports.
|
||||
|
||||
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
|
||||
"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
self.config = config or default_config
|
||||
self.goal = goal
|
||||
self.nodes = nodes
|
||||
self.edges = edges
|
||||
self.entry_node = entry_node
|
||||
self.entry_points = entry_points
|
||||
self.pause_nodes = pause_nodes
|
||||
self.terminal_nodes = terminal_nodes
|
||||
self._runtime: AgentRuntime | None = None
|
||||
self._graph: GraphSpec | None = None
|
||||
|
||||
def _build_entry_point_specs(self) -> list[EntryPointSpec]:
|
||||
"""Convert entry_points dict to EntryPointSpec list."""
|
||||
specs = []
|
||||
for ep_id, node_id in self.entry_points.items():
|
||||
if ep_id == "start":
|
||||
trigger_type = "manual"
|
||||
name = "Start"
|
||||
elif "_resume" in ep_id:
|
||||
trigger_type = "resume"
|
||||
name = f"Resume from {ep_id.replace('_resume', '')}"
|
||||
else:
|
||||
trigger_type = "manual"
|
||||
name = ep_id.replace("-", " ").title()
|
||||
|
||||
specs.append(
|
||||
EntryPointSpec(
|
||||
id=ep_id,
|
||||
name=name,
|
||||
entry_node=node_id,
|
||||
trigger_type=trigger_type,
|
||||
isolation_level="shared",
|
||||
)
|
||||
)
|
||||
return specs
|
||||
|
||||
def _create_runtime(self, mock_mode=False) -> AgentRuntime:
|
||||
"""Create AgentRuntime instance."""
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Persistent storage in ~/.hive for telemetry and run history
|
||||
storage_path = Path.home() / ".hive" / "online_research_agent"
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
tool_registry = ToolRegistry()
|
||||
|
||||
# Load MCP servers (always load, needed for tool validation)
|
||||
agent_dir = Path(__file__).parent
|
||||
mcp_config_path = agent_dir / "mcp_servers.json"
|
||||
|
||||
if mcp_config_path.exists():
|
||||
with open(mcp_config_path) as f:
|
||||
mcp_servers = json.load(f)
|
||||
|
||||
for server_config in mcp_servers.get("servers", []):
|
||||
# Resolve relative cwd paths
|
||||
cwd = server_config.get("cwd")
|
||||
if cwd and not Path(cwd).is_absolute():
|
||||
server_config["cwd"] = str(agent_dir / cwd)
|
||||
tool_registry.register_mcp_server(server_config)
|
||||
|
||||
llm = None
|
||||
if not mock_mode:
|
||||
# LiteLLMProvider uses environment variables for API keys
|
||||
llm = LiteLLMProvider(
|
||||
model=self.config.model,
|
||||
api_key=self.config.api_key,
|
||||
api_base=self.config.api_base,
|
||||
)
|
||||
|
||||
self._graph = GraphSpec(
|
||||
id="online-research-agent-graph",
|
||||
goal_id=self.goal.id,
|
||||
version="1.0.0",
|
||||
entry_node=self.entry_node,
|
||||
entry_points=self.entry_points,
|
||||
terminal_nodes=self.terminal_nodes,
|
||||
pause_nodes=self.pause_nodes,
|
||||
nodes=self.nodes,
|
||||
edges=self.edges,
|
||||
default_model=self.config.model,
|
||||
max_tokens=self.config.max_tokens,
|
||||
)
|
||||
|
||||
# Create AgentRuntime with all entry points
|
||||
self._runtime = create_agent_runtime(
|
||||
graph=self._graph,
|
||||
goal=self.goal,
|
||||
storage_path=storage_path,
|
||||
entry_points=self._build_entry_point_specs(),
|
||||
llm=llm,
|
||||
tools=list(tool_registry.get_tools().values()),
|
||||
tool_executor=tool_registry.get_executor(),
|
||||
)
|
||||
|
||||
return self._runtime
|
||||
|
||||
async def start(self, mock_mode=False) -> None:
|
||||
"""Start the agent runtime."""
|
||||
if self._runtime is None:
|
||||
self._create_runtime(mock_mode=mock_mode)
|
||||
await self._runtime.start()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the agent runtime."""
|
||||
if self._runtime is not None:
|
||||
await self._runtime.stop()
|
||||
|
||||
async def trigger(
|
||||
self,
|
||||
entry_point: str,
|
||||
input_data: dict,
|
||||
correlation_id: str | None = None,
|
||||
session_state: dict | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Trigger execution at a specific entry point (non-blocking).
|
||||
|
||||
Args:
|
||||
entry_point: Entry point ID (e.g., "start", "pause-node_resume")
|
||||
input_data: Input data for the execution
|
||||
correlation_id: Optional ID to correlate related executions
|
||||
session_state: Optional session state to resume from (with paused_at, memory)
|
||||
|
||||
Returns:
|
||||
Execution ID for tracking
|
||||
"""
|
||||
if self._runtime is None or not self._runtime.is_running:
|
||||
raise RuntimeError("Agent runtime not started. Call start() first.")
|
||||
return await self._runtime.trigger(
|
||||
entry_point, input_data, correlation_id, session_state=session_state
|
||||
)
|
||||
|
||||
async def trigger_and_wait(
|
||||
self,
|
||||
entry_point: str,
|
||||
input_data: dict,
|
||||
timeout: float | None = None,
|
||||
session_state: dict | None = None,
|
||||
) -> ExecutionResult | None:
|
||||
"""
|
||||
Trigger execution and wait for completion.
|
||||
|
||||
Args:
|
||||
entry_point: Entry point ID
|
||||
input_data: Input data for the execution
|
||||
timeout: Maximum time to wait (seconds)
|
||||
session_state: Optional session state to resume from (with paused_at, memory)
|
||||
|
||||
Returns:
|
||||
ExecutionResult or None if timeout
|
||||
"""
|
||||
if self._runtime is None or not self._runtime.is_running:
|
||||
raise RuntimeError("Agent runtime not started. Call start() first.")
|
||||
return await self._runtime.trigger_and_wait(
|
||||
entry_point, input_data, timeout, session_state=session_state
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, context: dict, mock_mode=False, session_state=None
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
Run the agent (convenience method for simple single execution).
|
||||
|
||||
For more control, use start() + trigger_and_wait() + stop().
|
||||
"""
|
||||
await self.start(mock_mode=mock_mode)
|
||||
try:
|
||||
# Determine entry point based on session_state
|
||||
if session_state and "paused_at" in session_state:
|
||||
paused_node = session_state["paused_at"]
|
||||
resume_key = f"{paused_node}_resume"
|
||||
if resume_key in self.entry_points:
|
||||
entry_point = resume_key
|
||||
else:
|
||||
entry_point = "start"
|
||||
else:
|
||||
entry_point = "start"
|
||||
|
||||
result = await self.trigger_and_wait(
|
||||
entry_point, context, session_state=session_state
|
||||
)
|
||||
return result or ExecutionResult(success=False, error="Execution timeout")
|
||||
finally:
|
||||
await self.stop()
|
||||
|
||||
async def get_goal_progress(self) -> dict:
|
||||
"""Get goal progress across all executions."""
|
||||
if self._runtime is None:
|
||||
raise RuntimeError("Agent runtime not started")
|
||||
return await self._runtime.get_goal_progress()
|
||||
|
||||
def get_stats(self) -> dict:
|
||||
"""Get runtime statistics."""
|
||||
if self._runtime is None:
|
||||
return {"running": False}
|
||||
return self._runtime.get_stats()
|
||||
|
||||
def info(self):
|
||||
"""Get agent information."""
|
||||
return {
|
||||
"name": metadata.name,
|
||||
"version": metadata.version,
|
||||
"description": metadata.description,
|
||||
"goal": {
|
||||
"name": self.goal.name,
|
||||
"description": self.goal.description,
|
||||
},
|
||||
"nodes": [n.id for n in self.nodes],
|
||||
"edges": [e.id for e in self.edges],
|
||||
"entry_node": self.entry_node,
|
||||
"entry_points": self.entry_points,
|
||||
"pause_nodes": self.pause_nodes,
|
||||
"terminal_nodes": self.terminal_nodes,
|
||||
"multi_entrypoint": True,
|
||||
}
|
||||
|
||||
def validate(self):
|
||||
"""Validate agent structure."""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
node_ids = {node.id for node in self.nodes}
|
||||
for edge in self.edges:
|
||||
if edge.source not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
|
||||
if edge.target not in node_ids:
|
||||
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
|
||||
|
||||
if self.entry_node not in node_ids:
|
||||
errors.append(f"Entry node '{self.entry_node}' not found")
|
||||
|
||||
for terminal in self.terminal_nodes:
|
||||
if terminal not in node_ids:
|
||||
errors.append(f"Terminal node '{terminal}' not found")
|
||||
|
||||
for pause in self.pause_nodes:
|
||||
if pause not in node_ids:
|
||||
errors.append(f"Pause node '{pause}' not found")
|
||||
|
||||
# Validate entry points
|
||||
for ep_id, node_id in self.entry_points.items():
|
||||
if node_id not in node_ids:
|
||||
errors.append(
|
||||
f"Entry point '{ep_id}' references unknown node '{node_id}'"
|
||||
)
|
||||
|
||||
return {
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
|
||||
# Create default instance
|
||||
default_agent = OnlineResearchAgent()
|
||||
-396
@@ -1,396 +0,0 @@
|
||||
"""Node definitions for Online Research Agent."""
|
||||
|
||||
from framework.graph import NodeSpec
|
||||
|
||||
# Node 1: Parse Query
|
||||
parse_query_node = NodeSpec(
|
||||
id="parse-query",
|
||||
name="Parse Query",
|
||||
description="Analyze the research topic and generate 3-5 diverse search queries to cover different aspects",
|
||||
node_type="llm_generate",
|
||||
input_keys=["topic"],
|
||||
output_keys=["search_queries", "research_focus", "key_aspects"],
|
||||
output_schema={
|
||||
"research_focus": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Brief statement of what we're researching",
|
||||
},
|
||||
"key_aspects": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of 3-5 key aspects to investigate",
|
||||
},
|
||||
"search_queries": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of 3-5 search queries",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a research query strategist. Given a research topic, analyze it and generate search queries.
|
||||
|
||||
Your task:
|
||||
1. Understand the core research question
|
||||
2. Identify 3-5 key aspects to investigate
|
||||
3. Generate 3-5 diverse search queries that will find comprehensive information
|
||||
|
||||
CRITICAL: Return ONLY raw JSON. NO markdown, NO code blocks.
|
||||
|
||||
Return this JSON structure:
|
||||
{
|
||||
"research_focus": "Brief statement of what we're researching",
|
||||
"key_aspects": ["aspect1", "aspect2", "aspect3"],
|
||||
"search_queries": [
|
||||
"query 1 - broad overview",
|
||||
"query 2 - specific angle",
|
||||
"query 3 - recent developments",
|
||||
"query 4 - expert opinions",
|
||||
"query 5 - data/statistics"
|
||||
]
|
||||
}
|
||||
""",
|
||||
tools=[],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 2: Search Sources
|
||||
search_sources_node = NodeSpec(
|
||||
id="search-sources",
|
||||
name="Search Sources",
|
||||
description="Execute web searches using the generated queries to find 15+ source URLs",
|
||||
node_type="llm_tool_use",
|
||||
input_keys=["search_queries", "research_focus"],
|
||||
output_keys=["source_urls", "search_results_summary"],
|
||||
output_schema={
|
||||
"source_urls": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of source URLs found",
|
||||
},
|
||||
"search_results_summary": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Brief summary of what was found",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a research assistant executing web searches. Use the web_search tool to find sources.
|
||||
|
||||
Your task:
|
||||
1. Execute each search query using web_search tool
|
||||
2. Collect URLs from search results
|
||||
3. Aim for 15+ diverse sources
|
||||
|
||||
After searching, return JSON with found sources:
|
||||
{
|
||||
"source_urls": ["url1", "url2", ...],
|
||||
"search_results_summary": "Brief summary of what was found"
|
||||
}
|
||||
""",
|
||||
tools=["web_search"],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 3: Fetch Content
|
||||
fetch_content_node = NodeSpec(
|
||||
id="fetch-content",
|
||||
name="Fetch Content",
|
||||
description="Fetch and extract content from the discovered source URLs",
|
||||
node_type="llm_tool_use",
|
||||
input_keys=["source_urls", "research_focus"],
|
||||
output_keys=["fetched_sources", "fetch_errors"],
|
||||
output_schema={
|
||||
"fetched_sources": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of fetched source objects with url, title, content",
|
||||
},
|
||||
"fetch_errors": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of URLs that failed to fetch",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
|
||||
|
||||
Your task:
|
||||
1. Fetch content from each source URL using web_scrape tool
|
||||
2. Extract the main content relevant to the research focus
|
||||
3. Track any URLs that failed to fetch
|
||||
|
||||
After fetching, return JSON:
|
||||
{
|
||||
"fetched_sources": [
|
||||
{"url": "...", "title": "...", "content": "extracted text..."},
|
||||
...
|
||||
],
|
||||
"fetch_errors": ["url that failed", ...]
|
||||
}
|
||||
""",
|
||||
tools=["web_scrape"],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 4: Evaluate Sources
|
||||
evaluate_sources_node = NodeSpec(
|
||||
id="evaluate-sources",
|
||||
name="Evaluate Sources",
|
||||
description="Score sources for relevance and quality, filter to top 10",
|
||||
node_type="llm_generate",
|
||||
input_keys=["fetched_sources", "research_focus", "key_aspects"],
|
||||
output_keys=["ranked_sources", "source_analysis"],
|
||||
output_schema={
|
||||
"ranked_sources": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of ranked sources with scores",
|
||||
},
|
||||
"source_analysis": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Overview of source quality and coverage",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a source evaluator. Assess each source for quality and relevance.
|
||||
|
||||
Scoring criteria:
|
||||
- Relevance to research focus (1-10)
|
||||
- Source credibility (1-10)
|
||||
- Information depth (1-10)
|
||||
- Recency if relevant (1-10)
|
||||
|
||||
Your task:
|
||||
1. Score each source
|
||||
2. Rank by combined score
|
||||
3. Select top 10 sources
|
||||
4. Note what each source uniquely contributes
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"ranked_sources": [
|
||||
{"url": "...", "title": "...", "content": "...", "score": 8.5, "unique_value": "..."},
|
||||
...
|
||||
],
|
||||
"source_analysis": "Overview of source quality and coverage"
|
||||
}
|
||||
""",
|
||||
tools=[],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 5: Synthesize Findings
|
||||
synthesize_findings_node = NodeSpec(
|
||||
id="synthesize-findings",
|
||||
name="Synthesize Findings",
|
||||
description="Extract key facts from sources and identify common themes",
|
||||
node_type="llm_generate",
|
||||
input_keys=["ranked_sources", "research_focus", "key_aspects"],
|
||||
output_keys=["key_findings", "themes", "source_citations"],
|
||||
output_schema={
|
||||
"key_findings": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of key findings with sources and confidence",
|
||||
},
|
||||
"themes": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of themes with descriptions and supporting sources",
|
||||
},
|
||||
"source_citations": {
|
||||
"type": "object",
|
||||
"required": True,
|
||||
"description": "Map of facts to supporting URLs",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a research synthesizer. Analyze multiple sources to extract insights.
|
||||
|
||||
Your task:
|
||||
1. Identify key facts from each source
|
||||
2. Find common themes across sources
|
||||
3. Note contradictions or debates
|
||||
4. Build a citation map (fact -> source URL)
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"key_findings": [
|
||||
{"finding": "...", "sources": ["url1", "url2"], "confidence": "high/medium/low"},
|
||||
...
|
||||
],
|
||||
"themes": [
|
||||
{"theme": "...", "description": "...", "supporting_sources": ["url1", ...]},
|
||||
...
|
||||
],
|
||||
"source_citations": {
|
||||
"fact or claim": ["supporting url1", "url2"],
|
||||
...
|
||||
}
|
||||
}
|
||||
""",
|
||||
tools=[],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 6: Write Report
|
||||
write_report_node = NodeSpec(
|
||||
id="write-report",
|
||||
name="Write Report",
|
||||
description="Generate a narrative report with proper citations",
|
||||
node_type="llm_generate",
|
||||
input_keys=[
|
||||
"key_findings",
|
||||
"themes",
|
||||
"source_citations",
|
||||
"research_focus",
|
||||
"ranked_sources",
|
||||
],
|
||||
output_keys=["report_content", "references"],
|
||||
output_schema={
|
||||
"report_content": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Full markdown report text with citations",
|
||||
},
|
||||
"references": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of reference objects with number, url, title",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a research report writer. Create a well-structured narrative report.
|
||||
|
||||
Report structure:
|
||||
1. Executive Summary (2-3 paragraphs)
|
||||
2. Introduction (context and scope)
|
||||
3. Key Findings (organized by theme)
|
||||
4. Analysis (synthesis and implications)
|
||||
5. Conclusion
|
||||
6. References (numbered list of all sources)
|
||||
|
||||
Citation format: Use numbered citations like [1], [2] that correspond to the References section.
|
||||
|
||||
IMPORTANT:
|
||||
- Every factual claim MUST have a citation
|
||||
- Write in clear, professional prose
|
||||
- Be objective and balanced
|
||||
- Highlight areas of consensus and debate
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"report_content": "Full markdown report text with citations...",
|
||||
"references": [
|
||||
{"number": 1, "url": "...", "title": "..."},
|
||||
...
|
||||
]
|
||||
}
|
||||
""",
|
||||
tools=[],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 7: Quality Check
|
||||
quality_check_node = NodeSpec(
|
||||
id="quality-check",
|
||||
name="Quality Check",
|
||||
description="Verify all claims have citations and report is coherent",
|
||||
node_type="llm_generate",
|
||||
input_keys=["report_content", "references", "source_citations"],
|
||||
output_keys=["quality_score", "issues", "final_report"],
|
||||
output_schema={
|
||||
"quality_score": {
|
||||
"type": "number",
|
||||
"required": True,
|
||||
"description": "Quality score 0-1",
|
||||
},
|
||||
"issues": {
|
||||
"type": "array",
|
||||
"required": True,
|
||||
"description": "List of issues found and fixed",
|
||||
},
|
||||
"final_report": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Corrected full report",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a quality assurance reviewer. Check the research report for issues.
|
||||
|
||||
Check for:
|
||||
1. Uncited claims (factual statements without [n] citation)
|
||||
2. Broken citations (references to non-existent numbers)
|
||||
3. Coherence (logical flow between sections)
|
||||
4. Completeness (all key aspects covered)
|
||||
5. Accuracy (claims match source content)
|
||||
|
||||
If issues found, fix them in the final report.
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"quality_score": 0.95,
|
||||
"issues": [
|
||||
{"type": "uncited_claim", "location": "paragraph 3", "fixed": true},
|
||||
...
|
||||
],
|
||||
"final_report": "Corrected full report with all issues fixed..."
|
||||
}
|
||||
""",
|
||||
tools=[],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
# Node 8: Save Report
|
||||
save_report_node = NodeSpec(
|
||||
id="save-report",
|
||||
name="Save Report",
|
||||
description="Write the final report to a local markdown file",
|
||||
node_type="llm_tool_use",
|
||||
input_keys=["final_report", "references", "research_focus"],
|
||||
output_keys=["file_path", "save_status"],
|
||||
output_schema={
|
||||
"file_path": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Path where report was saved",
|
||||
},
|
||||
"save_status": {
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"description": "Status of save operation",
|
||||
},
|
||||
},
|
||||
system_prompt="""\
|
||||
You are a file manager. Save the research report to disk.
|
||||
|
||||
Your task:
|
||||
1. Generate a filename from the research focus (slugified, with date)
|
||||
2. Use the write_to_file tool to save the report as markdown
|
||||
3. Save to the ./research_reports/ directory
|
||||
|
||||
Filename format: research_YYYY-MM-DD_topic-slug.md
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"file_path": "research_reports/research_2026-01-23_topic-name.md",
|
||||
"save_status": "success"
|
||||
}
|
||||
""",
|
||||
tools=["write_to_file"],
|
||||
max_retries=3,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"parse_query_node",
|
||||
"search_sources_node",
|
||||
"fetch_content_node",
|
||||
"evaluate_sources_node",
|
||||
"synthesize_findings_node",
|
||||
"write_report_node",
|
||||
"quality_check_node",
|
||||
"save_report_node",
|
||||
]
|
||||
@@ -1,10 +1,10 @@
|
||||
---
|
||||
name: building-agents-core
|
||||
description: Core concepts for goal-driven agents - architecture, node types, tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
|
||||
description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: hive
|
||||
version: "1.0"
|
||||
version: "2.0"
|
||||
type: foundational
|
||||
part_of: building-agents
|
||||
---
|
||||
@@ -29,10 +29,10 @@ exports/my_agent/
|
||||
|
||||
**Key Principle: Agent is visible and editable during build**
|
||||
|
||||
- ✅ Files created immediately as components are approved
|
||||
- ✅ User can watch files grow in their editor
|
||||
- ✅ No session state - just direct file writes
|
||||
- ✅ No "export" step - agent is ready when build completes
|
||||
- Files created immediately as components are approved
|
||||
- User can watch files grow in their editor
|
||||
- No session state - just direct file writes
|
||||
- No "export" step - agent is ready when build completes
|
||||
|
||||
## Core Concepts
|
||||
|
||||
@@ -73,62 +73,212 @@ Unit of work (written to nodes/__init__.py)
|
||||
|
||||
**Node Types:**
|
||||
|
||||
- `llm_generate` - Text generation, parsing
|
||||
- `llm_tool_use` - Actions requiring tools
|
||||
- `router` - Conditional branching
|
||||
- `function` - Deterministic operations
|
||||
- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
|
||||
- `function` — Deterministic Python operations. No LLM involved.
|
||||
|
||||
```python
|
||||
search_node = NodeSpec(
|
||||
id="search-web",
|
||||
name="Search Web",
|
||||
description="Search for information online",
|
||||
node_type="llm_tool_use",
|
||||
description="Search for information and extract results",
|
||||
node_type="event_loop",
|
||||
input_keys=["query"],
|
||||
output_keys=["search_results"],
|
||||
system_prompt="Search the web for: {query}",
|
||||
system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
|
||||
tools=["web_search"],
|
||||
max_retries=3,
|
||||
)
|
||||
```
|
||||
|
||||
**NodeSpec Fields for Event Loop Nodes:**
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
|
||||
| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
|
||||
| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
|
||||
|
||||
### Edge
|
||||
|
||||
Connection between nodes (written to agent.py)
|
||||
|
||||
**Edge Conditions:**
|
||||
|
||||
- `on_success` - Proceed if node succeeds
|
||||
- `on_failure` - Handle errors
|
||||
- `always` - Always proceed
|
||||
- `conditional` - Based on expression
|
||||
- `on_success` — Proceed if node succeeds (most common)
|
||||
- `on_failure` — Handle errors
|
||||
- `always` — Always proceed
|
||||
- `conditional` — Based on expression evaluating node output
|
||||
|
||||
**Edge Priority:**
|
||||
|
||||
Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
|
||||
|
||||
```python
|
||||
# Forward edge (evaluated first)
|
||||
EdgeSpec(
|
||||
id="search-to-analyze",
|
||||
source="search-web",
|
||||
target="analyze-results",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
id="review-to-campaign",
|
||||
source="review",
|
||||
target="campaign-builder",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('approved_contacts') is not None",
|
||||
priority=1,
|
||||
)
|
||||
|
||||
# Feedback edge (evaluated after forward edges)
|
||||
EdgeSpec(
|
||||
id="review-feedback",
|
||||
source="review",
|
||||
target="extractor",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('redo_extraction') is not None",
|
||||
priority=-1,
|
||||
)
|
||||
```
|
||||
|
||||
### Pause/Resume
|
||||
### Client-Facing Nodes
|
||||
|
||||
Multi-turn conversations
|
||||
|
||||
- **Pause nodes** - Stop execution, wait for user input
|
||||
- **Resume entry points** - Continue from pause with user's response
|
||||
For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
|
||||
- Stream its LLM output directly to the end user
|
||||
- Block for user input between conversational turns
|
||||
- Resume when new input is injected via `inject_event()`
|
||||
|
||||
```python
|
||||
# Example pause/resume configuration
|
||||
pause_nodes = ["request-clarification"]
|
||||
entry_points = {
|
||||
"start": "analyze-request",
|
||||
"request-clarification_resume": "process-clarification"
|
||||
}
|
||||
intake_node = NodeSpec(
|
||||
id="intake",
|
||||
name="Intake",
|
||||
description="Gather requirements from the user",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
input_keys=[],
|
||||
output_keys=["repo_url", "project_url"],
|
||||
system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
|
||||
)
|
||||
```
|
||||
|
||||
> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
|
||||
|
||||
**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
|
||||
|
||||
```python
|
||||
system_prompt="""\
|
||||
**STEP 1 — Respond to the user (text only, NO tool calls):**
|
||||
[Present information, ask questions, etc.]
|
||||
|
||||
**STEP 2 — After the user responds, call set_output:**
|
||||
[Call set_output with the structured outputs]
|
||||
"""
|
||||
```
|
||||
|
||||
This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
|
||||
|
||||
### Node Design: Fewer, Richer Nodes
|
||||
|
||||
Prefer fewer nodes that do more work over many thin single-purpose nodes:
|
||||
|
||||
- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
|
||||
- **Good**: 4 rich nodes (intake → research → review → report)
|
||||
|
||||
Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
|
||||
|
||||
### nullable_output_keys for Cross-Edge Inputs
|
||||
|
||||
When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
|
||||
|
||||
```python
|
||||
research_node = NodeSpec(
|
||||
id="research",
|
||||
input_keys=["research_brief", "feedback"],
|
||||
nullable_output_keys=["feedback"], # Not present on first visit
|
||||
max_node_visits=3,
|
||||
...
|
||||
)
|
||||
```
|
||||
|
||||
## Event Loop Architecture Concepts
|
||||
|
||||
### How EventLoopNode Works
|
||||
|
||||
An event loop node runs a multi-turn loop:
|
||||
1. LLM receives system prompt + conversation history
|
||||
2. LLM responds (text and/or tool calls)
|
||||
3. Tool calls are executed, results added to conversation
|
||||
4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
|
||||
5. Repeat until judge ACCEPTs or max_iterations reached
|
||||
|
||||
### EventLoopNode Runtime
|
||||
|
||||
EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
|
||||
|
||||
```python
|
||||
# Direct execution — executor auto-creates EventLoopNodes
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.runtime.core import Runtime
|
||||
|
||||
runtime = Runtime(storage_path)
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
storage_path=storage_path,
|
||||
)
|
||||
result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
|
||||
|
||||
# TUI execution — AgentRuntime also works
|
||||
from framework.runtime.agent_runtime import create_agent_runtime
|
||||
runtime = create_agent_runtime(
|
||||
graph=graph, goal=goal, storage_path=storage_path,
|
||||
entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
|
||||
)
|
||||
```
|
||||
|
||||
### set_output
|
||||
|
||||
Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
|
||||
|
||||
`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
|
||||
|
||||
### JudgeProtocol
|
||||
|
||||
**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
|
||||
|
||||
The judge controls when a node's loop exits:
|
||||
- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
|
||||
- **SchemaJudge**: Validates outputs against a Pydantic model
|
||||
- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
|
||||
|
||||
### LoopConfig
|
||||
|
||||
Controls loop behavior:
|
||||
- `max_iterations` (default 50) — prevents infinite loops
|
||||
- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
|
||||
- `stall_detection_threshold` (default 3) — detects repeated identical responses
|
||||
- `max_history_tokens` (default 32000) — triggers conversation compaction
|
||||
|
||||
### Data Tools (Spillover Management)
|
||||
|
||||
When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
|
||||
|
||||
- `save_data(filename, data, data_dir)` — Write data to a file in the data directory
|
||||
- `load_data(filename, data_dir, offset=0, limit=50)` — Read data with line-based pagination
|
||||
- `list_data_files(data_dir)` — List available data files
|
||||
|
||||
These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
|
||||
|
||||
```python
|
||||
research_node = NodeSpec(
|
||||
...
|
||||
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
|
||||
)
|
||||
```
|
||||
|
||||
### Fan-Out / Fan-In
|
||||
|
||||
Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
|
||||
|
||||
### max_node_visits
|
||||
|
||||
Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
|
||||
|
||||
## Tool Discovery & Validation
|
||||
|
||||
**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
|
||||
@@ -157,29 +307,6 @@ mcp__agent-builder__list_mcp_tools()
|
||||
mcp__agent-builder__list_mcp_tools(server_name="tools")
|
||||
```
|
||||
|
||||
This returns available tools with their descriptions and parameters:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"tools_by_server": {
|
||||
"tools": [
|
||||
{
|
||||
"name": "web_search",
|
||||
"description": "Search the web...",
|
||||
"parameters": ["query"]
|
||||
},
|
||||
{
|
||||
"name": "web_scrape",
|
||||
"description": "Scrape a URL...",
|
||||
"parameters": ["url"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"total_tools": 14
|
||||
}
|
||||
```
|
||||
|
||||
### Step 3: Validate Before Adding Nodes
|
||||
|
||||
Before writing a node with `tools=[...]`:
|
||||
@@ -193,27 +320,10 @@ Before writing a node with `tools=[...]`:
|
||||
|
||||
### Tool Validation Anti-Patterns
|
||||
|
||||
❌ **Never assume a tool exists** - always call `list_mcp_tools()` first
|
||||
❌ **Never write a node with unverified tools** - validate before writing
|
||||
❌ **Never silently drop tools** - if a tool doesn't exist, inform the user
|
||||
❌ **Never guess tool names** - use exact names from discovery response
|
||||
|
||||
### Example Validation Flow
|
||||
|
||||
```python
|
||||
# 1. User requests: "Add a node that searches the web"
|
||||
# 2. Discover available tools
|
||||
tools_response = mcp__agent-builder__list_mcp_tools()
|
||||
|
||||
# 3. Check if web_search exists
|
||||
available = [t["name"] for tools in tools_response["tools_by_server"].values() for t in tools]
|
||||
if "web_search" not in available:
|
||||
# Inform user and ask how to proceed
|
||||
print("❌ 'web_search' not available. Available tools:", available)
|
||||
else:
|
||||
# Proceed with node creation
|
||||
# ...
|
||||
```
|
||||
- **Never assume a tool exists** - always call `list_mcp_tools()` first
|
||||
- **Never write a node with unverified tools** - validate before writing
|
||||
- **Never silently drop tools** - if a tool doesn't exist, inform the user
|
||||
- **Never guess tool names** - use exact names from discovery response
|
||||
|
||||
## Workflow Overview: Incremental File Construction
|
||||
|
||||
@@ -221,42 +331,19 @@ else:
|
||||
1. CREATE PACKAGE → mkdir + write skeletons
|
||||
2. DEFINE GOAL → Write to agent.py + config.py
|
||||
3. FOR EACH NODE:
|
||||
- Propose design
|
||||
- Propose design (event_loop for LLM work, function for deterministic)
|
||||
- User approves
|
||||
- Write to nodes/__init__.py IMMEDIATELY ← FILE WRITTEN
|
||||
- (Optional) Validate with test_node ← MCP VALIDATION
|
||||
- User can open file and see it
|
||||
4. CONNECT EDGES → Update agent.py ← FILE WRITTEN
|
||||
- (Optional) Validate with validate_graph ← MCP VALIDATION
|
||||
5. FINALIZE → Write agent class to agent.py ← FILE WRITTEN
|
||||
- Write to nodes/__init__.py IMMEDIATELY
|
||||
- (Optional) Validate with test_node
|
||||
4. CONNECT EDGES → Update agent.py
|
||||
- Use priority for feedback edges (negative priority)
|
||||
- (Optional) Validate with validate_graph
|
||||
5. FINALIZE → Write agent class to agent.py
|
||||
6. DONE - Agent ready at exports/my_agent/
|
||||
```
|
||||
|
||||
**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
|
||||
|
||||
### The Key Difference
|
||||
|
||||
**OLD (Bad):**
|
||||
|
||||
```
|
||||
MCP add_node → Session State → MCP add_node → Session State → ...
|
||||
↓
|
||||
MCP export_graph
|
||||
↓
|
||||
Files appear
|
||||
```
|
||||
|
||||
**NEW (Good):**
|
||||
|
||||
```
|
||||
Write node to file → (Optional: MCP test_node) → Write node to file → ...
|
||||
↓ ↓
|
||||
File visible File visible
|
||||
immediately immediately
|
||||
```
|
||||
|
||||
**Bottom line:** Use Write/Edit for construction, MCP for validation if needed.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use building-agents-core when:
|
||||
@@ -285,12 +372,17 @@ mcp__agent-builder__test_node(
|
||||
**validate_graph** - Check graph structure
|
||||
```python
|
||||
mcp__agent-builder__validate_graph()
|
||||
# Returns: unreachable nodes, missing connections, etc.
|
||||
# Returns: unreachable nodes, missing connections, event_loop validation, etc.
|
||||
```
|
||||
|
||||
**create_session** - Track session state for bookkeeping
|
||||
**configure_loop** - Set event loop parameters
|
||||
```python
|
||||
mcp__agent-builder__create_session(session_name="my-build")
|
||||
mcp__agent-builder__configure_loop(
|
||||
max_iterations=50,
|
||||
max_tool_calls_per_turn=10,
|
||||
stall_detection_threshold=3,
|
||||
max_history_tokens=32000
|
||||
)
|
||||
```
|
||||
|
||||
**Key Point:** Files are written FIRST. MCP tools are for validation only.
|
||||
@@ -298,6 +390,6 @@ mcp__agent-builder__create_session(session_name="my-build")
|
||||
## Related Skills
|
||||
|
||||
- **building-agents-construction** - Step-by-step building process
|
||||
- **building-agents-patterns** - Best practices and examples
|
||||
- **building-agents-patterns** - Best practices: judges, feedback edges, fan-out, context management
|
||||
- **agent-workflow** - Complete workflow orchestrator
|
||||
- **testing-agent** - Test and validate completed agents
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
---
|
||||
name: building-agents-patterns
|
||||
description: Best practices, patterns, and examples for building goal-driven agents. Includes pause/resume architecture, hybrid workflows, anti-patterns, and handoff to testing. Use when optimizing agent design.
|
||||
description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: hive
|
||||
version: "1.0"
|
||||
version: "2.0"
|
||||
type: reference
|
||||
part_of: building-agents
|
||||
---
|
||||
@@ -24,10 +24,10 @@ How to build a node using both direct file writes and optional MCP validation:
|
||||
node_code = '''
|
||||
search_node = NodeSpec(
|
||||
id="search-web",
|
||||
node_type="llm_tool_use",
|
||||
node_type="event_loop",
|
||||
input_keys=["query"],
|
||||
output_keys=["search_results"],
|
||||
system_prompt="Search the web for: {query}",
|
||||
system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
|
||||
tools=["web_search"],
|
||||
)
|
||||
'''
|
||||
@@ -38,17 +38,12 @@ Edit(
|
||||
new_string=node_code
|
||||
)
|
||||
|
||||
print("✅ Added search_node to nodes/__init__.py")
|
||||
print("📁 Open exports/research_agent/nodes/__init__.py to see it!")
|
||||
|
||||
# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
|
||||
validation = mcp__agent-builder__test_node(
|
||||
node_id="search-web",
|
||||
test_input='{"query": "python tutorials"}',
|
||||
mock_llm_response='{"search_results": [...mock results...]}'
|
||||
)
|
||||
|
||||
print(f"✓ Validation: {validation['success']}")
|
||||
```
|
||||
|
||||
**User experience:**
|
||||
@@ -57,401 +52,300 @@ print(f"✓ Validation: {validation['success']}")
|
||||
- Gets validation feedback (from step 2)
|
||||
- Can edit the file directly if needed
|
||||
|
||||
This combines visibility (files) with validation (MCP tools).
|
||||
## Multi-Turn Interaction Patterns
|
||||
|
||||
## Pause/Resume Architecture
|
||||
For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
|
||||
|
||||
For agents needing multi-turn conversations with user interaction:
|
||||
### Client-Facing Nodes
|
||||
|
||||
### Basic Pause/Resume Flow
|
||||
A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
|
||||
|
||||
```python
|
||||
# Define pause nodes - execution stops at these nodes
|
||||
pause_nodes = ["request-clarification", "await-approval"]
|
||||
# Client-facing node with STEP 1/STEP 2 prompt pattern
|
||||
intake_node = NodeSpec(
|
||||
id="intake",
|
||||
name="Intake",
|
||||
description="Gather requirements from the user",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
input_keys=["topic"],
|
||||
output_keys=["research_brief"],
|
||||
system_prompt="""\
|
||||
You are an intake specialist.
|
||||
|
||||
# Define entry points - where to resume from each pause
|
||||
entry_points = {
|
||||
"start": "analyze-request", # Initial entry
|
||||
"request-clarification_resume": "process-clarification", # Resume from clarification
|
||||
"await-approval_resume": "execute-action", # Resume from approval
|
||||
}
|
||||
```
|
||||
**STEP 1 — Read and respond (text only, NO tool calls):**
|
||||
1. Read the topic provided
|
||||
2. If it's vague, ask 1-2 clarifying questions
|
||||
3. If it's clear, confirm your understanding
|
||||
|
||||
### Example: Multi-Turn Research Agent
|
||||
|
||||
```python
|
||||
# Nodes
|
||||
nodes = [
|
||||
NodeSpec(id="analyze-request", ...),
|
||||
NodeSpec(id="request-clarification", ...), # PAUSE NODE
|
||||
NodeSpec(id="process-clarification", ...),
|
||||
NodeSpec(id="generate-results", ...),
|
||||
NodeSpec(id="await-approval", ...), # PAUSE NODE
|
||||
NodeSpec(id="execute-action", ...),
|
||||
]
|
||||
|
||||
# Edges with resume flows
|
||||
edges = [
|
||||
EdgeSpec(
|
||||
id="analyze-to-clarify",
|
||||
source="analyze-request",
|
||||
target="request-clarification",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="needs_clarification == true",
|
||||
),
|
||||
# When resumed, goes to process-clarification
|
||||
EdgeSpec(
|
||||
id="clarify-to-process",
|
||||
source="request-clarification",
|
||||
target="process-clarification",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="results-to-approval",
|
||||
source="generate-results",
|
||||
target="await-approval",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
# When resumed, goes to execute-action
|
||||
EdgeSpec(
|
||||
id="approval-to-execute",
|
||||
source="await-approval",
|
||||
target="execute-action",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
]
|
||||
|
||||
# Configuration
|
||||
pause_nodes = ["request-clarification", "await-approval"]
|
||||
entry_points = {
|
||||
"start": "analyze-request",
|
||||
"request-clarification_resume": "process-clarification",
|
||||
"await-approval_resume": "execute-action",
|
||||
}
|
||||
```
|
||||
|
||||
### Running Pause/Resume Agents
|
||||
|
||||
```python
|
||||
# Initial run - will pause at first pause node
|
||||
result1 = await agent.run(
|
||||
context={"query": "research topic"},
|
||||
session_state=None
|
||||
**STEP 2 — After the user confirms, call set_output:**
|
||||
- set_output("research_brief", "Clear description of what to research")
|
||||
""",
|
||||
)
|
||||
|
||||
# Check if paused
|
||||
if result1.paused_at:
|
||||
print(f"Paused at: {result1.paused_at}")
|
||||
|
||||
# Resume with user input
|
||||
result2 = await agent.run(
|
||||
context={"user_response": "clarification details"},
|
||||
session_state=result1.session_state # Pass previous state
|
||||
)
|
||||
# Internal node runs without user interaction
|
||||
research_node = NodeSpec(
|
||||
id="research",
|
||||
name="Research",
|
||||
description="Search and analyze sources",
|
||||
node_type="event_loop",
|
||||
input_keys=["research_brief"],
|
||||
output_keys=["findings", "sources"],
|
||||
system_prompt="Research the topic using web_search and web_scrape...",
|
||||
tools=["web_search", "web_scrape", "load_data", "save_data"],
|
||||
)
|
||||
```
|
||||
|
||||
**How it works:**
|
||||
- Client-facing nodes stream LLM text to the user and block for input after each response
|
||||
- User input is injected via `node.inject_event(text)`
|
||||
- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
|
||||
- Internal nodes (non-client-facing) run their entire loop without blocking
|
||||
- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
|
||||
|
||||
**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
|
||||
|
||||
### When to Use client_facing
|
||||
|
||||
| Scenario | client_facing | Why |
|
||||
|----------|:---:|-----|
|
||||
| Gathering user requirements | Yes | Need user input |
|
||||
| Human review/approval checkpoint | Yes | Need human decision |
|
||||
| Data processing (scanning, scoring) | No | Runs autonomously |
|
||||
| Report generation | No | No user input needed |
|
||||
| Final confirmation before action | Yes | Need explicit approval |
|
||||
|
||||
> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
|
||||
|
||||
## Edge-Based Routing and Feedback Loops
|
||||
|
||||
### Conditional Edge Routing
|
||||
|
||||
Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
|
||||
|
||||
```python
|
||||
# Node with mutually exclusive outputs
|
||||
review_node = NodeSpec(
|
||||
id="review",
|
||||
name="Review",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
output_keys=["approved_contacts", "redo_extraction"],
|
||||
nullable_output_keys=["approved_contacts", "redo_extraction"],
|
||||
max_node_visits=3,
|
||||
system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
|
||||
)
|
||||
|
||||
# Forward edge (positive priority, evaluated first)
|
||||
EdgeSpec(
|
||||
id="review-to-campaign",
|
||||
source="review",
|
||||
target="campaign-builder",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('approved_contacts') is not None",
|
||||
priority=1,
|
||||
)
|
||||
|
||||
# Feedback edge (negative priority, evaluated after forward edges)
|
||||
EdgeSpec(
|
||||
id="review-feedback",
|
||||
source="review",
|
||||
target="extractor",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('redo_extraction') is not None",
|
||||
priority=-1,
|
||||
)
|
||||
```
|
||||
|
||||
**Key concepts:**
|
||||
- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
|
||||
- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
|
||||
- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
|
||||
|
||||
### Routing Decision Table
|
||||
|
||||
| Pattern | Old Approach | New Approach |
|
||||
|---------|-------------|--------------|
|
||||
| Conditional branching | `router` node | Conditional edges with `condition_expr` |
|
||||
| Binary approve/reject | `pause_nodes` + resume | `client_facing=True` + `nullable_output_keys` |
|
||||
| Loop-back on rejection | Manual entry_points | Feedback edge with `priority=-1` |
|
||||
| Multi-way routing | Router with routes dict | Multiple conditional edges with priorities |
|
||||
|
||||
## Judge Patterns
|
||||
|
||||
**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
|
||||
- Output rollback logic
|
||||
- `_user_has_responded` flags
|
||||
- Premature set_output rejection
|
||||
- Interaction protocol injection into system prompts
|
||||
|
||||
Judges control when an event_loop node's loop exits. Choose based on validation needs.
|
||||
|
||||
### Implicit Judge (Default)
|
||||
|
||||
When no judge is configured, the implicit judge ACCEPTs when:
|
||||
- The LLM finishes its response with no tool calls
|
||||
- All required output keys have been set via `set_output`
|
||||
|
||||
Best for simple nodes where "all outputs set" is sufficient validation.
|
||||
|
||||
### SchemaJudge
|
||||
|
||||
Validates outputs against a Pydantic model. Use when you need structural validation.
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
class ScannerOutput(BaseModel):
|
||||
github_users: list[dict] # Must be a list of user objects
|
||||
|
||||
class SchemaJudge:
|
||||
def __init__(self, output_model: type[BaseModel]):
|
||||
self._model = output_model
|
||||
|
||||
async def evaluate(self, context: dict) -> JudgeVerdict:
|
||||
missing = context.get("missing_keys", [])
|
||||
if missing:
|
||||
return JudgeVerdict(
|
||||
action="RETRY",
|
||||
feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
|
||||
)
|
||||
try:
|
||||
self._model.model_validate(context["output_accumulator"])
|
||||
return JudgeVerdict(action="ACCEPT")
|
||||
except ValidationError as e:
|
||||
return JudgeVerdict(action="RETRY", feedback=str(e))
|
||||
```
|
||||
|
||||
### When to Use Which Judge
|
||||
|
||||
| Judge | Use When | Example |
|
||||
|-------|----------|---------|
|
||||
| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
|
||||
| SchemaJudge | Need structural validation of outputs | API response parsing |
|
||||
| Custom | Domain-specific validation logic | Score must be 0.0-1.0 |
|
||||
|
||||
## Fan-Out / Fan-In (Parallel Execution)
|
||||
|
||||
Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
|
||||
|
||||
```python
|
||||
# Scanner fans out to Profiler and Scorer in parallel
|
||||
EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
|
||||
condition=EdgeCondition.ON_SUCCESS)
|
||||
EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
|
||||
condition=EdgeCondition.ON_SUCCESS)
|
||||
|
||||
# Both fan in to Extractor
|
||||
EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
|
||||
condition=EdgeCondition.ON_SUCCESS)
|
||||
EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
|
||||
condition=EdgeCondition.ON_SUCCESS)
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
|
||||
- Only one parallel branch may contain a `client_facing` node
|
||||
- Fan-in node receives outputs from all completed branches in shared memory
|
||||
|
||||
## Context Management Patterns
|
||||
|
||||
### Tiered Compaction
|
||||
|
||||
EventLoopNode automatically manages context window usage with tiered compaction:
|
||||
1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
|
||||
2. **Normal compaction** — LLM summarizes older messages
|
||||
3. **Aggressive compaction** — Keeps only recent messages + summary
|
||||
4. **Emergency** — Hard reset with tool history preservation
|
||||
|
||||
### Spillover Pattern
|
||||
|
||||
The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
|
||||
|
||||
For explicit data management, use the data tools (real MCP tools, not synthetic):
|
||||
|
||||
```python
|
||||
# save_data, load_data, list_data_files are real MCP tools
|
||||
# Each takes a data_dir parameter since the MCP server is shared
|
||||
|
||||
# Saving large results
|
||||
save_data(filename="sources.json", data=large_json_string, data_dir="/path/to/spillover")
|
||||
|
||||
# Reading with pagination (line-based offset/limit)
|
||||
load_data(filename="sources.json", data_dir="/path/to/spillover", offset=0, limit=50)
|
||||
|
||||
# Listing available files
|
||||
list_data_files(data_dir="/path/to/spillover")
|
||||
```
|
||||
|
||||
Add data tools to nodes that handle large tool results:
|
||||
|
||||
```python
|
||||
research_node = NodeSpec(
|
||||
...
|
||||
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
|
||||
)
|
||||
```
|
||||
|
||||
The `data_dir` is passed by the framework (from the node's spillover directory). The LLM sees `data_dir` in truncation messages and uses it when calling `load_data`.
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
### What NOT to Do
|
||||
|
||||
❌ **Don't rely on `export_graph`** - Write files immediately, not at end
|
||||
```python
|
||||
# BAD: Building in session state, exporting at end
|
||||
mcp__agent-builder__add_node(...)
|
||||
mcp__agent-builder__add_node(...)
|
||||
mcp__agent-builder__export_graph() # Files appear only now
|
||||
- **Don't rely on `export_graph`** — Write files immediately, not at end
|
||||
- **Don't hide code in session** — Write to files as components are approved
|
||||
- **Don't wait to write files** — Agent visible from first step
|
||||
- **Don't batch everything** — Write incrementally, one component at a time
|
||||
- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
|
||||
- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
|
||||
|
||||
# GOOD: Writing files immediately
|
||||
Write(file_path="...", content=node_code) # File visible now
|
||||
Write(file_path="...", content=node_code) # File visible now
|
||||
```
|
||||
### Fewer, Richer Nodes
|
||||
|
||||
❌ **Don't hide code in session** - Write to files as components approved
|
||||
```python
|
||||
# BAD: Accumulating changes invisibly
|
||||
session.add_component(component1)
|
||||
session.add_component(component2)
|
||||
# User can't see anything yet
|
||||
A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
|
||||
|
||||
# GOOD: Incremental visibility
|
||||
Edit(file_path="...", ...) # User sees change 1
|
||||
Edit(file_path="...", ...) # User sees change 2
|
||||
```
|
||||
| Bad (8 thin nodes) | Good (4 rich nodes) |
|
||||
|---------------------|---------------------|
|
||||
| parse-query | intake (client-facing) |
|
||||
| search-sources | research (search + fetch + analyze) |
|
||||
| fetch-content | review (client-facing) |
|
||||
| evaluate-sources | report (write + deliver) |
|
||||
| synthesize-findings | |
|
||||
| write-report | |
|
||||
| quality-check | |
|
||||
| save-report | |
|
||||
|
||||
❌ **Don't wait to write files** - Agent visible from first step
|
||||
```python
|
||||
# BAD: Building everything before writing
|
||||
design_all_nodes()
|
||||
design_all_edges()
|
||||
write_everything_at_once()
|
||||
|
||||
# GOOD: Write as you go
|
||||
write_package_structure() # Visible
|
||||
write_goal() # Visible
|
||||
write_node_1() # Visible
|
||||
write_node_2() # Visible
|
||||
```
|
||||
|
||||
❌ **Don't batch everything** - Write incrementally
|
||||
```python
|
||||
# BAD: Batching all nodes
|
||||
nodes = [design_node_1(), design_node_2(), ...]
|
||||
write_all_nodes(nodes)
|
||||
|
||||
# GOOD: One at a time with user feedback
|
||||
write_node_1() # User approves
|
||||
write_node_2() # User approves
|
||||
write_node_3() # User approves
|
||||
```
|
||||
**Why fewer nodes are better:**
|
||||
- The LLM retains full context of its work within a single node
|
||||
- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
|
||||
- Fewer edges means simpler graph and fewer failure points
|
||||
- Data tools (`save_data`/`load_data`) handle context window limits within a single node
|
||||
|
||||
### MCP Tools - Correct Usage
|
||||
|
||||
**MCP tools OK for:**
|
||||
✅ `test_node` - Validate node configuration with mock inputs
|
||||
✅ `validate_graph` - Check graph structure
|
||||
✅ `create_session` - Track session state for bookkeeping
|
||||
✅ Other validation tools
|
||||
- `test_node` — Validate node configuration with mock inputs
|
||||
- `validate_graph` — Check graph structure
|
||||
- `configure_loop` — Set event loop parameters
|
||||
- `create_session` — Track session state for bookkeeping
|
||||
|
||||
**Just don't:** Use MCP as the primary construction method or rely on export_graph
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Show Progress After Each Write
|
||||
|
||||
```python
|
||||
# After writing a node
|
||||
print("✅ Added analyze_request_node to nodes/__init__.py")
|
||||
print("📊 Progress: 1/6 nodes added")
|
||||
print("📁 Open exports/my_agent/nodes/__init__.py to see it!")
|
||||
```
|
||||
|
||||
### 2. Let User Open Files During Build
|
||||
|
||||
```python
|
||||
# Encourage file inspection
|
||||
print("✅ Goal written to agent.py")
|
||||
print("")
|
||||
print("💡 Tip: Open exports/my_agent/agent.py in your editor to see the goal!")
|
||||
```
|
||||
|
||||
### 3. Write Incrementally - One Component at a Time
|
||||
|
||||
```python
|
||||
# Good flow
|
||||
write_package_structure()
|
||||
show_user("Package created")
|
||||
|
||||
write_goal()
|
||||
show_user("Goal written")
|
||||
|
||||
for node in nodes:
|
||||
get_approval(node)
|
||||
write_node(node)
|
||||
show_user(f"Node {node.id} written")
|
||||
```
|
||||
|
||||
### 4. Test As You Build
|
||||
|
||||
```python
|
||||
# After adding several nodes
|
||||
print("💡 You can test current state with:")
|
||||
print(" PYTHONPATH=core:exports python -m my_agent validate")
|
||||
print(" PYTHONPATH=core:exports python -m my_agent info")
|
||||
```
|
||||
|
||||
### 5. Keep User Informed
|
||||
|
||||
```python
|
||||
# Clear status updates
|
||||
print("🔨 Creating package structure...")
|
||||
print("✅ Package created: exports/my_agent/")
|
||||
print("")
|
||||
print("📝 Next: Define agent goal")
|
||||
```
|
||||
|
||||
## Continuous Monitoring Agents
|
||||
|
||||
For agents that run continuously without terminal nodes:
|
||||
|
||||
```python
|
||||
# No terminal nodes - loops forever
|
||||
terminal_nodes = []
|
||||
|
||||
# Workflow loops back to start
|
||||
edges = [
|
||||
EdgeSpec(id="monitor-to-check", source="monitor", target="check-condition"),
|
||||
EdgeSpec(id="check-to-wait", source="check-condition", target="wait"),
|
||||
EdgeSpec(id="wait-to-monitor", source="wait", target="monitor"), # Loop
|
||||
]
|
||||
|
||||
# Entry node only
|
||||
entry_node = "monitor"
|
||||
entry_points = {"start": "monitor"}
|
||||
pause_nodes = []
|
||||
```
|
||||
|
||||
**Example: File Monitor**
|
||||
|
||||
```python
|
||||
nodes = [
|
||||
NodeSpec(id="list-files", ...),
|
||||
NodeSpec(id="check-new-files", node_type="router", ...),
|
||||
NodeSpec(id="process-files", ...),
|
||||
NodeSpec(id="wait-interval", node_type="function", ...),
|
||||
]
|
||||
|
||||
edges = [
|
||||
EdgeSpec(id="list-to-check", source="list-files", target="check-new-files"),
|
||||
EdgeSpec(
|
||||
id="check-to-process",
|
||||
source="check-new-files",
|
||||
target="process-files",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="new_files_count > 0",
|
||||
),
|
||||
EdgeSpec(
|
||||
id="check-to-wait",
|
||||
source="check-new-files",
|
||||
target="wait-interval",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="new_files_count == 0",
|
||||
),
|
||||
EdgeSpec(id="process-to-wait", source="process-files", target="wait-interval"),
|
||||
EdgeSpec(id="wait-to-list", source="wait-interval", target="list-files"), # Loop back
|
||||
]
|
||||
|
||||
terminal_nodes = [] # No terminal - runs forever
|
||||
```
|
||||
|
||||
## Complex Routing Patterns
|
||||
|
||||
### Multi-Condition Router
|
||||
|
||||
```python
|
||||
router_node = NodeSpec(
|
||||
id="decision-router",
|
||||
node_type="router",
|
||||
input_keys=["analysis_result"],
|
||||
output_keys=["decision"],
|
||||
system_prompt="""
|
||||
Based on the analysis result, decide the next action:
|
||||
- If confidence > 0.9: route to "execute"
|
||||
- If 0.5 <= confidence <= 0.9: route to "review"
|
||||
- If confidence < 0.5: route to "clarify"
|
||||
|
||||
Return: {"decision": "execute|review|clarify"}
|
||||
""",
|
||||
)
|
||||
|
||||
# Edges for each route
|
||||
edges = [
|
||||
EdgeSpec(
|
||||
id="router-to-execute",
|
||||
source="decision-router",
|
||||
target="execute-action",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="decision == 'execute'",
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="router-to-review",
|
||||
source="decision-router",
|
||||
target="human-review",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="decision == 'review'",
|
||||
priority=2,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="router-to-clarify",
|
||||
source="decision-router",
|
||||
target="request-clarification",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="decision == 'clarify'",
|
||||
priority=3,
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
## Error Handling Patterns
|
||||
|
||||
### Graceful Failure with Fallback
|
||||
|
||||
```python
|
||||
# Primary node with error handling
|
||||
nodes = [
|
||||
NodeSpec(id="api-call", max_retries=3, ...),
|
||||
NodeSpec(id="fallback-cache", ...),
|
||||
NodeSpec(id="report-error", ...),
|
||||
]
|
||||
|
||||
edges = [
|
||||
# Success path
|
||||
EdgeSpec(
|
||||
id="api-success",
|
||||
source="api-call",
|
||||
target="process-results",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
),
|
||||
EdgeSpec(id="api-success", source="api-call", target="process-results",
|
||||
condition=EdgeCondition.ON_SUCCESS),
|
||||
# Fallback on failure
|
||||
EdgeSpec(
|
||||
id="api-to-fallback",
|
||||
source="api-call",
|
||||
target="fallback-cache",
|
||||
condition=EdgeCondition.ON_FAILURE,
|
||||
priority=1,
|
||||
),
|
||||
EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
|
||||
condition=EdgeCondition.ON_FAILURE, priority=1),
|
||||
# Report if fallback also fails
|
||||
EdgeSpec(
|
||||
id="fallback-to-error",
|
||||
source="fallback-cache",
|
||||
target="report-error",
|
||||
condition=EdgeCondition.ON_FAILURE,
|
||||
priority=1,
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Parallel Node Execution
|
||||
|
||||
```python
|
||||
# Use multiple edges from same source for parallel execution
|
||||
edges = [
|
||||
EdgeSpec(
|
||||
id="start-to-search1",
|
||||
source="start",
|
||||
target="search-source-1",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="start-to-search2",
|
||||
source="start",
|
||||
target="search-source-2",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="start-to-search3",
|
||||
source="start",
|
||||
target="search-source-3",
|
||||
condition=EdgeCondition.ALWAYS,
|
||||
),
|
||||
# Converge results
|
||||
EdgeSpec(
|
||||
id="search1-to-merge",
|
||||
source="search-source-1",
|
||||
target="merge-results",
|
||||
),
|
||||
EdgeSpec(
|
||||
id="search2-to-merge",
|
||||
source="search-source-2",
|
||||
target="merge-results",
|
||||
),
|
||||
EdgeSpec(
|
||||
id="search3-to-merge",
|
||||
source="search-source-3",
|
||||
target="merge-results",
|
||||
),
|
||||
EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
|
||||
condition=EdgeCondition.ON_FAILURE, priority=1),
|
||||
]
|
||||
```
|
||||
|
||||
@@ -459,38 +353,21 @@ edges = [
|
||||
|
||||
When agent is complete, transition to testing phase:
|
||||
|
||||
```python
|
||||
print("""
|
||||
✅ Agent complete: exports/my_agent/
|
||||
|
||||
Next steps:
|
||||
1. Switch to testing-agent skill
|
||||
2. Generate and approve tests
|
||||
3. Run evaluation
|
||||
4. Debug any failures
|
||||
|
||||
Command: "Test the agent at exports/my_agent/"
|
||||
""")
|
||||
```
|
||||
|
||||
### Pre-Testing Checklist
|
||||
|
||||
Before handing off to testing-agent:
|
||||
|
||||
- [ ] Agent structure validates: `python -m agent_name validate`
|
||||
- [ ] Agent structure validates: `uv run python -m agent_name validate`
|
||||
- [ ] All nodes defined in nodes/__init__.py
|
||||
- [ ] All edges connect valid nodes
|
||||
- [ ] Entry node specified
|
||||
- [ ] All edges connect valid nodes with correct priorities
|
||||
- [ ] Feedback edge targets have `max_node_visits > 1`
|
||||
- [ ] Client-facing nodes have meaningful system prompts
|
||||
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
|
||||
- [ ] README.md with usage instructions
|
||||
- [ ] CLI commands work (info, validate)
|
||||
|
||||
## Related Skills
|
||||
|
||||
- **building-agents-core** - Fundamental concepts
|
||||
- **building-agents-construction** - Step-by-step building
|
||||
- **testing-agent** - Test and validate agents
|
||||
- **agent-workflow** - Complete workflow orchestrator
|
||||
- **building-agents-core** — Fundamental concepts (node types, edges, event loop architecture)
|
||||
- **building-agents-construction** — Step-by-step building process
|
||||
- **testing-agent** — Test and validate agents
|
||||
- **agent-workflow** — Complete workflow orchestrator
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
---
|
||||
name: setup-credentials
|
||||
description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the encrypted credential store at ~/.hive/credentials.
|
||||
description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: hive
|
||||
version: "2.1"
|
||||
version: "2.2"
|
||||
type: utility
|
||||
---
|
||||
|
||||
@@ -31,48 +31,96 @@ Determine which agent needs credentials. The user will either:
|
||||
|
||||
Locate the agent's directory under `exports/{agent_name}/`.
|
||||
|
||||
### Step 2: Detect Required Credentials
|
||||
### Step 2: Detect Required Credentials (Bash-First)
|
||||
|
||||
Read the agent's configuration to determine which tools and node types it uses:
|
||||
Use bash commands to determine what the agent needs and what's already configured. This avoids Python import issues and works even when `HIVE_CREDENTIAL_KEY` is not set.
|
||||
|
||||
```python
|
||||
from core.framework.runner import AgentRunner
|
||||
#### Step 2a: Read Agent Requirements
|
||||
|
||||
runner = AgentRunner.load("exports/{agent_name}")
|
||||
validation = runner.validate()
|
||||
Extract `required_tools` and node types from the agent config:
|
||||
|
||||
# validation.missing_credentials contains env var names
|
||||
# validation.warnings contains detailed messages with help URLs
|
||||
```bash
|
||||
# Get required tools
|
||||
jq -r '.required_tools[]?' exports/{agent_name}/agent.json 2>/dev/null
|
||||
|
||||
# Get node types from graph nodes
|
||||
jq -r '.graph.nodes[]?.node_type' exports/{agent_name}/agent.json 2>/dev/null | sort -u
|
||||
```
|
||||
|
||||
Alternatively, check the credential store directly:
|
||||
Map the extracted tools and node types to credentials by reading the spec files directly:
|
||||
|
||||
```python
|
||||
from core.framework.credentials import CredentialStore
|
||||
|
||||
# Use encrypted storage (default: ~/.hive/credentials)
|
||||
store = CredentialStore.with_encrypted_storage()
|
||||
|
||||
# Check what's available
|
||||
available = store.list_credentials()
|
||||
print(f"Available credentials: {available}")
|
||||
|
||||
# Check if specific credential exists
|
||||
if store.is_available("hubspot"):
|
||||
print("HubSpot credential found")
|
||||
else:
|
||||
print("HubSpot credential missing")
|
||||
```bash
|
||||
# Read all credential specs — each file defines tools, node_types, env_var, and credential_id
|
||||
cat tools/src/aden_tools/credentials/llm.py tools/src/aden_tools/credentials/search.py tools/src/aden_tools/credentials/email.py tools/src/aden_tools/credentials/integrations.py
|
||||
```
|
||||
|
||||
To see all known credential specs (for help URLs and setup instructions):
|
||||
For each `CredentialSpec`, match its `tools` and `node_types` lists against the agent's required tools and node types. Extract the `env_var`, `credential_id`, and `credential_group` for every match. This is the list of needed credentials.
|
||||
|
||||
```python
|
||||
from aden_tools.credentials import CREDENTIAL_SPECS
|
||||
#### Step 2b: Check Existing Credential Sources
|
||||
|
||||
for name, spec in CREDENTIAL_SPECS.items():
|
||||
print(f"{name}: env_var={spec.env_var}, aden={spec.aden_supported}")
|
||||
For each needed credential, check three sources. A credential is "found" if it exists in ANY of them:
|
||||
|
||||
**1. Encrypted store metadata index** (unencrypted JSON — no decryption key needed):
|
||||
|
||||
```bash
|
||||
cat ~/.hive/credentials/metadata/index.json 2>/dev/null | jq -r '.credentials | keys[]'
|
||||
```
|
||||
|
||||
If a credential ID appears in this list, it is stored in the encrypted store.
|
||||
|
||||
**2. Environment variables:**
|
||||
|
||||
```bash
|
||||
# Check each needed env var, e.g.:
|
||||
printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "ANTHROPIC_API_KEY: set" || echo "ANTHROPIC_API_KEY: not set"
|
||||
printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "BRAVE_SEARCH_API_KEY: set" || echo "BRAVE_SEARCH_API_KEY: not set"
|
||||
```
|
||||
|
||||
**3. Project `.env` file:**
|
||||
|
||||
```bash
|
||||
# Check each needed env var, e.g.:
|
||||
grep -q '^ANTHROPIC_API_KEY=' .env 2>/dev/null && echo "ANTHROPIC_API_KEY: in .env" || echo "ANTHROPIC_API_KEY: not in .env"
|
||||
grep -q '^BRAVE_SEARCH_API_KEY=' .env 2>/dev/null && echo "BRAVE_SEARCH_API_KEY: in .env" || echo "BRAVE_SEARCH_API_KEY: not in .env"
|
||||
```
|
||||
|
||||
#### Step 2c: HIVE_CREDENTIAL_KEY Check
|
||||
|
||||
If any credentials were found in the encrypted store metadata index, verify the encryption key is available. The key is typically persisted to shell config by a previous setup-credentials run.
|
||||
|
||||
Check both the current session AND shell config files:
|
||||
|
||||
```bash
|
||||
# Check 1: Current session
|
||||
printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
|
||||
|
||||
# Check 2: Shell config files (where setup-credentials persists it)
|
||||
# Note: check each file individually to avoid non-zero exit when one doesn't exist
|
||||
for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
|
||||
```
|
||||
|
||||
Decision logic:
|
||||
- **In current session** — no action needed, credentials in the store are usable
|
||||
- **In shell config but NOT in current session** — the key is persisted but this shell hasn't sourced it. Run `source ~/.zshrc` (or `~/.bashrc`), then re-check. Credentials in the store are usable after sourcing.
|
||||
- **Not in session AND not in shell config** — the key was never persisted. Warn the user that credentials in the store cannot be decrypted. Help fix the key situation (recover/re-persist), do NOT re-collect credential values that are already stored.
|
||||
|
||||
#### Step 2d: Compute Missing & Group
|
||||
|
||||
Diff the "needed" credentials against the "found" credentials to get the truly missing list.
|
||||
|
||||
Group related credentials by their `credential_group` field from the spec files. Credentials that share the same non-empty `credential_group` value should be presented as a single setup step rather than asking for each one individually.
|
||||
|
||||
**If nothing is missing and there's no HIVE_CREDENTIAL_KEY issue:** Report all credentials as configured and skip Steps 3-5. Example:
|
||||
|
||||
```
|
||||
All required credentials are already configured:
|
||||
✓ anthropic (ANTHROPIC_API_KEY) — found in encrypted store
|
||||
✓ brave_search (BRAVE_SEARCH_API_KEY) — found in environment
|
||||
Your agent is ready to run!
|
||||
```
|
||||
|
||||
**If credentials are missing:** Continue to Step 3 with only the missing ones.
|
||||
|
||||
### Step 3: Present Auth Options for Each Missing Credential
|
||||
|
||||
For each missing credential, check what authentication methods are available:
|
||||
@@ -104,7 +152,7 @@ Present the available options using AskUserQuestion:
|
||||
```
|
||||
Choose how to configure HUBSPOT_ACCESS_TOKEN:
|
||||
|
||||
1) Aden Authorization Server (Recommended)
|
||||
1) Aden Platform (OAuth) (Recommended)
|
||||
Secure OAuth2 flow via integration.adenhq.com
|
||||
- Quick setup with automatic token refresh
|
||||
- No need to manage API keys manually
|
||||
@@ -114,7 +162,7 @@ Choose how to configure HUBSPOT_ACCESS_TOKEN:
|
||||
- Requires creating a HubSpot Private App
|
||||
- Full control over scopes and permissions
|
||||
|
||||
3) Custom Credential Store (Advanced)
|
||||
3) Local Credential Setup (Advanced)
|
||||
Programmatic configuration for CI/CD
|
||||
- For automated deployments
|
||||
- Requires manual API calls
|
||||
@@ -122,7 +170,7 @@ Choose how to configure HUBSPOT_ACCESS_TOKEN:
|
||||
|
||||
### Step 4: Execute Auth Flow Based on User Choice
|
||||
|
||||
#### Option 1: Aden Authorization Server
|
||||
#### Option 1: Aden Platform (OAuth)
|
||||
|
||||
This is the recommended flow for supported integrations (HubSpot, etc.).
|
||||
|
||||
@@ -174,7 +222,7 @@ shell_type = detect_shell() # 'bash', 'zsh', or 'unknown'
|
||||
success, config_path = add_env_var_to_shell_config(
|
||||
"ADEN_API_KEY",
|
||||
user_provided_key,
|
||||
comment="Aden authorization server API key"
|
||||
comment="Aden Platform (OAuth) API key"
|
||||
)
|
||||
|
||||
if success:
|
||||
@@ -313,7 +361,7 @@ if not result.valid:
|
||||
# 2. Continue anyway (not recommended)
|
||||
```
|
||||
|
||||
**4.2d. Store in Encrypted Credential Store**
|
||||
**4.2d. Store in Local Encrypted Store**
|
||||
|
||||
```python
|
||||
from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
|
||||
@@ -340,7 +388,7 @@ store.save_credential(cred)
|
||||
export HUBSPOT_ACCESS_TOKEN="the-value"
|
||||
```
|
||||
|
||||
#### Option 3: Custom Credential Store (Advanced)
|
||||
#### Option 3: Local Credential Setup (Advanced)
|
||||
|
||||
For programmatic/CI/CD setups.
|
||||
|
||||
@@ -408,10 +456,14 @@ Report the result to the user.
|
||||
|
||||
Health checks validate credentials by making lightweight API calls:
|
||||
|
||||
| Credential | Endpoint | What It Checks |
|
||||
| -------------- | --------------------------------------- | --------------------------------- |
|
||||
| `hubspot` | `GET /crm/v3/objects/contacts?limit=1` | Bearer token validity, CRM scopes |
|
||||
| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity |
|
||||
| Credential | Endpoint | What It Checks |
|
||||
| --------------- | --------------------------------------- | ---------------------------------- |
|
||||
| `anthropic` | `POST /v1/messages` | API key validity |
|
||||
| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity |
|
||||
| `google_search` | `GET /customsearch/v1?q=test&num=1` | API key + CSE ID validity |
|
||||
| `github` | `GET /user` | Token validity, user identity |
|
||||
| `hubspot` | `GET /crm/v3/objects/contacts?limit=1` | Bearer token validity, CRM scopes |
|
||||
| `resend` | `GET /domains` | API key validity |
|
||||
|
||||
```python
|
||||
from aden_tools.credentials import check_credential_health, HealthCheckResult
|
||||
@@ -424,7 +476,7 @@ result: HealthCheckResult = check_credential_health("hubspot", token_value)
|
||||
|
||||
## Encryption Key (HIVE_CREDENTIAL_KEY)
|
||||
|
||||
The encrypted credential store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
|
||||
The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
|
||||
|
||||
- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
|
||||
- The user MUST persist this key (e.g., in `~/.bashrc` or a secrets manager)
|
||||
@@ -443,7 +495,7 @@ If `HIVE_CREDENTIAL_KEY` is not set:
|
||||
- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
|
||||
- **NEVER** hardcode credentials in source code
|
||||
- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
|
||||
- **ALWAYS** use the encrypted credential store (`~/.hive/credentials`) for persistence
|
||||
- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
|
||||
- **ALWAYS** run health checks before storing credentials (when possible)
|
||||
- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
|
||||
- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
|
||||
@@ -456,7 +508,8 @@ All credential specs are defined in `tools/src/aden_tools/credentials/`:
|
||||
| ----------------- | ------------- | --------------------------------------------- | -------------- |
|
||||
| `llm.py` | LLM Providers | `anthropic` | No |
|
||||
| `search.py` | Search Tools | `brave_search`, `google_search`, `google_cse` | No |
|
||||
| `integrations.py` | Integrations | `hubspot` | Yes |
|
||||
| `email.py` | Email | `resend` | No |
|
||||
| `integrations.py` | Integrations | `github`, `hubspot` | No / Yes |
|
||||
|
||||
**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
|
||||
variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
|
||||
@@ -507,66 +560,90 @@ token = store.get_key("hubspot", "access_token")
|
||||
## Example Session
|
||||
|
||||
```
|
||||
User: /setup-credentials for my hubspot-agent
|
||||
User: /setup-credentials for my research-agent
|
||||
|
||||
Agent: Let me check what credentials your hubspot-agent needs.
|
||||
Agent: Let me check what credentials your research-agent needs.
|
||||
|
||||
[Runs validation, finds ANTHROPIC_API_KEY and HUBSPOT_ACCESS_TOKEN missing]
|
||||
[Reads agent config]
|
||||
$ jq -r '.required_tools[]?' exports/research-agent/agent.json
|
||||
web_search
|
||||
google_search
|
||||
|
||||
Agent: Your hubspot-agent requires 2 credentials:
|
||||
$ jq -r '.graph.nodes[]?.node_type' exports/research-agent/agent.json | sort -u
|
||||
event_loop
|
||||
|
||||
1. ANTHROPIC_API_KEY - Anthropic API key for LLM calls
|
||||
2. HUBSPOT_ACCESS_TOKEN - HubSpot access token
|
||||
[Maps tools/nodes to credentials using lookup table]
|
||||
Needed: anthropic, brave_search, google_search, google_cse
|
||||
|
||||
Let's set these up one at a time.
|
||||
[Checks encrypted store metadata index]
|
||||
$ cat ~/.hive/credentials/metadata/index.json | jq -r '.credentials | keys[]'
|
||||
anthropic
|
||||
brave_search
|
||||
|
||||
--- Setting up ANTHROPIC_API_KEY ---
|
||||
[Checks environment variables]
|
||||
$ printenv ANTHROPIC_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
|
||||
not set
|
||||
$ printenv BRAVE_SEARCH_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
|
||||
not set
|
||||
$ printenv GOOGLE_API_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
|
||||
not set
|
||||
$ printenv GOOGLE_CSE_ID > /dev/null 2>&1 && echo "set" || echo "not set"
|
||||
not set
|
||||
|
||||
This credential only supports direct API key entry.
|
||||
[Checks HIVE_CREDENTIAL_KEY since credentials found in store]
|
||||
$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
|
||||
session: not set
|
||||
$ for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
|
||||
/Users/user/.zshrc
|
||||
|
||||
[Shows instructions for getting Anthropic API key]
|
||||
[AskUserQuestion: "Please provide your Anthropic API key:"]
|
||||
[User provides key]
|
||||
[Stores in credential store, exports to env]
|
||||
[Key is in shell config but not current session — sourcing it]
|
||||
$ source ~/.zshrc
|
||||
|
||||
--- Setting up HUBSPOT_ACCESS_TOKEN ---
|
||||
[Computes missing credentials]
|
||||
Found:
|
||||
✓ anthropic (ANTHROPIC_API_KEY) — in encrypted store
|
||||
✓ brave_search (BRAVE_SEARCH_API_KEY) — in encrypted store
|
||||
Missing:
|
||||
✗ google_search (GOOGLE_API_KEY)
|
||||
✗ google_cse (GOOGLE_CSE_ID)
|
||||
|
||||
[AskUserQuestion with 3 options]
|
||||
Agent: 2 of 4 required credentials are already configured. Only Google Custom
|
||||
Search needs setup (2 values as a single group).
|
||||
|
||||
Choose how to configure HUBSPOT_ACCESS_TOKEN:
|
||||
1) Aden Authorization Server (Recommended)
|
||||
2) Direct API Key
|
||||
3) Custom Credential Store (Advanced)
|
||||
--- Setting up Google Custom Search (google_search + google_cse) ---
|
||||
|
||||
User: 1 (Aden)
|
||||
This requires two values that work together.
|
||||
|
||||
Agent: Great! Let me check if you have an Aden API key configured...
|
||||
First, the Google API Key:
|
||||
1. Go to https://console.cloud.google.com/apis/credentials
|
||||
2. Create a new project (or select an existing one)
|
||||
3. Enable the "Custom Search API" from the API Library
|
||||
4. Go to Credentials > Create Credentials > API Key
|
||||
5. Copy the generated API key
|
||||
|
||||
[Checks for ADEN_API_KEY, not found]
|
||||
[Opens browser to integration.adenhq.com/setup]
|
||||
|
||||
Agent: I've opened Aden's setup page. Please:
|
||||
1. Sign in or create an account
|
||||
2. Connect your HubSpot account (OAuth happens on Aden's site)
|
||||
3. Copy the API key shown after setup
|
||||
|
||||
[AskUserQuestion: "Please provide your Aden API key:"]
|
||||
[AskUserQuestion: "Please provide your Google API key:"]
|
||||
[User provides key]
|
||||
|
||||
Agent: Would you like me to save this to your shell config (~/.zshrc) for future sessions?
|
||||
Now, the Custom Search Engine ID:
|
||||
1. Go to https://programmablesearchengine.google.com/controlpanel/all
|
||||
2. Click "Add" to create a new search engine
|
||||
3. Under "What to search", select "Search the entire web"
|
||||
4. Give your search engine a name
|
||||
5. Click "Create"
|
||||
6. Copy the Search Engine ID (cx value)
|
||||
|
||||
[User approves]
|
||||
[Saves to ~/.zshrc and ~/.hive/configuration.json]
|
||||
[AskUserQuestion: "Please provide your Google CSE ID:"]
|
||||
[User provides ID]
|
||||
|
||||
Agent: Let me sync your credentials from Aden...
|
||||
[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
|
||||
[Stores both in local encrypted store, exports to env]
|
||||
|
||||
[Syncs credentials from Aden server - OAuth already done on Aden's side]
|
||||
[Runs health check]
|
||||
|
||||
Agent: HubSpot credentials validated successfully!
|
||||
✓ Google Custom Search credentials valid
|
||||
|
||||
All credentials are now configured:
|
||||
- ANTHROPIC_API_KEY: Stored in encrypted credential store
|
||||
- HUBSPOT_ACCESS_TOKEN: Synced from Aden (OAuth completed on Aden)
|
||||
- Validation passed - your agent is ready to run!
|
||||
✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
|
||||
✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
|
||||
✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
|
||||
✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
|
||||
Your agent is ready to run!
|
||||
```
|
||||
|
||||
@@ -930,9 +930,10 @@ assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
|
||||
- `steps_executed: int` - Number of nodes executed
|
||||
- `total_tokens: int` - Cumulative token usage
|
||||
- `total_latency_ms: int` - Total execution time
|
||||
- `path: list[str]` - Node IDs traversed
|
||||
- `path: list[str]` - Node IDs traversed (may contain repeated IDs from feedback loops)
|
||||
- `paused_at: str | None` - Node ID if HITL pause occurred
|
||||
- `session_state: dict` - State for resuming
|
||||
- `node_visit_counts: dict[str, int]` - How many times each node executed (useful for feedback loop testing)
|
||||
|
||||
### Happy Path Test
|
||||
```python
|
||||
@@ -975,6 +976,57 @@ async def test_performance_latency(mock_mode):
|
||||
assert duration < 5.0, f"Took {{duration}}s, expected <5s"
|
||||
```
|
||||
|
||||
### Testing Event Loop Nodes
|
||||
|
||||
Event loop nodes run multi-turn loops internally. Tests should verify:
|
||||
|
||||
**Output Keys Test** — All required keys are set via `set_output`:
|
||||
```python
|
||||
@pytest.mark.asyncio
|
||||
async def test_all_output_keys_set(mock_mode):
|
||||
"""Test that event_loop nodes set all required output keys."""
|
||||
result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
|
||||
assert result.success, f"Agent failed: {{result.error}}"
|
||||
output = result.output or {{}}
|
||||
for key in ["expected_key_1", "expected_key_2"]:
|
||||
assert key in output, f"Output key '{{key}}' not set by event_loop node"
|
||||
```
|
||||
|
||||
**Feedback Loop Test** — Verify feedback loops terminate:
|
||||
```python
|
||||
@pytest.mark.asyncio
|
||||
async def test_feedback_loop_respects_max_visits(mock_mode):
|
||||
"""Test that feedback loops terminate at max_node_visits."""
|
||||
result = await default_agent.run({{"input": "trigger_rejection"}}, mock_mode=mock_mode)
|
||||
assert result.success or result.error is not None
|
||||
visits = getattr(result, "node_visit_counts", {{}}) or {{}}
|
||||
for node_id, count in visits.items():
|
||||
assert count <= 5, f"Node {{node_id}} visited {{count}} times"
|
||||
```
|
||||
|
||||
**Fan-Out Test** — Verify parallel branches both complete:
|
||||
```python
|
||||
@pytest.mark.asyncio
|
||||
async def test_parallel_branches_complete(mock_mode):
|
||||
"""Test that fan-out branches all complete and produce outputs."""
|
||||
result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
|
||||
assert result.success
|
||||
output = result.output or {{}}
|
||||
# Check outputs from both parallel branches
|
||||
assert "branch_a_output" in output, "Branch A output missing"
|
||||
assert "branch_b_output" in output, "Branch B output missing"
|
||||
```
|
||||
|
||||
**Client-Facing Node Test** — In mock mode, client-facing nodes may not block:
|
||||
```python
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_facing_node(mock_mode):
|
||||
"""Test that client-facing nodes produce output."""
|
||||
result = await default_agent.run({{"query": "test"}}, mock_mode=mock_mode)
|
||||
# In mock mode, client-facing blocking is typically bypassed
|
||||
assert result.success or result.paused_at is not None
|
||||
```
|
||||
|
||||
## Integration with building-agents
|
||||
|
||||
### Handoff Points
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
---
|
||||
name: Bug Report
|
||||
about: Report a bug to help us improve
|
||||
title: '[Bug]: '
|
||||
labels: bug
|
||||
title: "[Bug]: "
|
||||
labels: bug, enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## Describe the Bug
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
---
|
||||
name: Feature Request
|
||||
about: Suggest a new feature or enhancement
|
||||
title: '[Feature]: '
|
||||
title: "[Feature]: "
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## Problem Statement
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
---
|
||||
name: Integration Request
|
||||
about: Suggest a new integration
|
||||
title: "[Integration]:"
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## Service
|
||||
|
||||
Name and brief description of the service and what it enables agents to do.
|
||||
|
||||
**Description:** [e.g., "API key for Slack Bot" — short one-liner for the credential spec]
|
||||
|
||||
## Credential Identity
|
||||
|
||||
- **credential_id:** [e.g., `slack`]
|
||||
- **env_var:** [e.g., `SLACK_BOT_TOKEN`]
|
||||
- **credential_key:** [e.g., `access_token`, `api_key`, `bot_token`]
|
||||
|
||||
## Tools
|
||||
|
||||
Tool function names that require this credential:
|
||||
|
||||
- [e.g., `slack_send_message`]
|
||||
- [e.g., `slack_list_channels`]
|
||||
|
||||
## Auth Methods
|
||||
|
||||
- **Direct API key supported:** Yes / No
|
||||
- **Aden OAuth supported:** Yes / No
|
||||
|
||||
If Aden OAuth is supported, describe the OAuth scopes/permissions required.
|
||||
|
||||
## How to Get the Credential
|
||||
|
||||
Link where users obtain the key/token:
|
||||
|
||||
[e.g., https://api.slack.com/apps]
|
||||
|
||||
Step-by-step instructions:
|
||||
|
||||
1. Go to ...
|
||||
2. Create a ...
|
||||
3. Select scopes/permissions: ...
|
||||
4. Copy the key/token
|
||||
|
||||
## Health Check
|
||||
|
||||
A lightweight API call to validate the credential (no writes, no charges).
|
||||
|
||||
- **Endpoint:** [e.g., `https://slack.com/api/auth.test`]
|
||||
- **Method:** [e.g., `GET` or `POST`]
|
||||
- **Auth header:** [e.g., `Authorization: Bearer {token}` or `X-Api-Key: {key}`]
|
||||
- **Parameters (if any):** [e.g., `?limit=1`]
|
||||
- **200 means:** [e.g., key is valid]
|
||||
- **401 means:** [e.g., invalid or expired]
|
||||
- **429 means:** [e.g., rate limited but key is valid]
|
||||
|
||||
## Credential Group
|
||||
|
||||
Does this require multiple credentials configured together? (e.g., Google Custom Search needs
|
||||
both an API key and a CSE ID)
|
||||
|
||||
- [ ] No, single credential
|
||||
- [ ] Yes — list the other credential IDs in the group:
|
||||
|
||||
## Additional Context
|
||||
|
||||
Links to API docs, rate limits, free tier availability, or anything else relevant.
|
||||
+42
-25
@@ -21,23 +21,22 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd core
|
||||
pip install -e .
|
||||
pip install -r requirements-dev.txt
|
||||
run: uv sync --project core --group dev
|
||||
|
||||
- name: Ruff lint
|
||||
run: |
|
||||
ruff check core/
|
||||
ruff check tools/
|
||||
uv run --project core ruff check core/
|
||||
uv run --project core ruff check tools/
|
||||
|
||||
- name: Ruff format
|
||||
run: |
|
||||
ruff format --check core/
|
||||
ruff format --check tools/
|
||||
uv run --project core ruff format --check core/
|
||||
uv run --project core ruff format --check tools/
|
||||
|
||||
test:
|
||||
name: Test Python Framework
|
||||
@@ -52,23 +51,19 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies and run tests
|
||||
run: |
|
||||
cd core
|
||||
pip install -e .
|
||||
pip install -r requirements-dev.txt
|
||||
uv sync
|
||||
uv run pytest tests/ -v
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd core
|
||||
pytest tests/ -v
|
||||
|
||||
validate:
|
||||
name: Validate Agent Exports
|
||||
test-tools:
|
||||
name: Test Tools
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, test]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -76,13 +71,35 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies and run tests
|
||||
run: |
|
||||
cd tools
|
||||
uv sync --extra dev
|
||||
uv run pytest tests/ -v
|
||||
|
||||
validate:
|
||||
name: Validate Agent Exports
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, test, test-tools]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd core
|
||||
pip install -e .
|
||||
pip install -r requirements-dev.txt
|
||||
uv sync
|
||||
|
||||
- name: Validate exported agents
|
||||
run: |
|
||||
@@ -105,7 +122,7 @@ jobs:
|
||||
for agent_dir in "${agent_dirs[@]}"; do
|
||||
if [ -f "$agent_dir/agent.json" ]; then
|
||||
echo "Validating $agent_dir"
|
||||
python -c "import json; json.load(open('$agent_dir/agent.json'))"
|
||||
uv run python -c "import json; json.load(open('$agent_dir/agent.json'))"
|
||||
validated=$((validated + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -80,7 +80,13 @@ jobs:
|
||||
- help wanted: Extra attention is needed (if issue needs community input)
|
||||
- backlog: Tracked for the future, but not currently planned or prioritized
|
||||
|
||||
You may apply multiple labels if appropriate (e.g., "bug" and "help wanted").
|
||||
### 6. Estimate size (if NOT a duplicate, spam, or invalid)
|
||||
Apply exactly ONE size label to help contributors match their capacity to the task:
|
||||
- "size: small": Docs, typos, single-file fixes, config changes
|
||||
- "size: medium": Bug fixes with tests, adding a single tool, changes within one package
|
||||
- "size: large": Cross-package changes (core + tools), new modules, complex logic, architectural refactors
|
||||
|
||||
You may apply multiple labels if appropriate (e.g., "bug", "size: small", and "good first issue").
|
||||
|
||||
## Tools Available:
|
||||
- mcp__github__get_issue: Get issue details
|
||||
|
||||
@@ -21,18 +21,19 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd core
|
||||
pip install -e .
|
||||
pip install -r requirements-dev.txt
|
||||
uv sync
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd core
|
||||
pytest tests/ -v
|
||||
uv run pytest tests/ -v
|
||||
|
||||
- name: Generate changelog
|
||||
id: changelog
|
||||
|
||||
+6
-2
@@ -54,7 +54,6 @@ __pycache__/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
*.egg
|
||||
uv.lock
|
||||
|
||||
# Generated runtime data
|
||||
core/data/
|
||||
@@ -69,4 +68,9 @@ exports/*
|
||||
|
||||
.agent-builder-sessions/*
|
||||
|
||||
.venv
|
||||
.claude/settings.local.json
|
||||
|
||||
.venv
|
||||
|
||||
docs/github-issues/*
|
||||
core/tests/*dumps/*
|
||||
|
||||
@@ -1,20 +1,14 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"agent-builder": {
|
||||
"command": ".venv/bin/python",
|
||||
"args": ["-m", "framework.mcp.agent_builder_server"],
|
||||
"cwd": "core",
|
||||
"env": {
|
||||
"PYTHONPATH": "../tools/src"
|
||||
}
|
||||
"command": "uv",
|
||||
"args": ["run", "-m", "framework.mcp.agent_builder_server"],
|
||||
"cwd": "core"
|
||||
},
|
||||
"tools": {
|
||||
"command": ".venv/bin/python",
|
||||
"args": ["mcp_server.py", "--stdio"],
|
||||
"cwd": "tools",
|
||||
"env": {
|
||||
"PYTHONPATH": "src:../core"
|
||||
}
|
||||
"command": "uv",
|
||||
"args": ["run", "mcp_server.py", "--stdio"],
|
||||
"cwd": "tools"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.6
|
||||
rev: v0.15.0
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff lint (core)
|
||||
|
||||
+12
-5
@@ -35,9 +35,16 @@ You may submit PRs without prior assignment for:
|
||||
|
||||
1. Fork the repository
|
||||
2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/hive.git`
|
||||
3. Create a feature branch: `git checkout -b feature/your-feature-name`
|
||||
4. Make your changes
|
||||
5. Run checks and tests:
|
||||
3. Add the upstream repository: `git remote add upstream https://github.com/adenhq/hive.git`
|
||||
4. Sync with upstream to ensure you're starting from the latest code:
|
||||
```bash
|
||||
git fetch upstream
|
||||
git checkout main
|
||||
git merge upstream/main
|
||||
```
|
||||
5. Create a feature branch: `git checkout -b feature/your-feature-name`
|
||||
6. Make your changes
|
||||
7. Run checks and tests:
|
||||
```bash
|
||||
make check # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
|
||||
make test # Core tests (cd core && pytest tests/ -v)
|
||||
@@ -125,7 +132,7 @@ feat(component): add new feature description
|
||||
> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
|
||||
>
|
||||
> ```bash
|
||||
> PYTHONPATH=core:exports python -m agent_name test
|
||||
> PYTHONPATH=exports uv run python -m agent_name test
|
||||
> ```
|
||||
|
||||
```bash
|
||||
@@ -139,7 +146,7 @@ make test
|
||||
cd core && pytest tests/ -v
|
||||
|
||||
# Run tests for a specific agent
|
||||
PYTHONPATH=core:exports python -m agent_name test
|
||||
PYTHONPATH=exports uv run python -m agent_name test
|
||||
```
|
||||
|
||||
> **CI also validates** that all exported agent JSON files (`exports/*/agent.json`) are well-formed JSON. Ensure your agent exports are valid before submitting.
|
||||
|
||||
+38
-41
@@ -44,7 +44,7 @@ Aden Agent Framework is a Python-based system for building goal-driven, self-imp
|
||||
Ensure you have installed:
|
||||
|
||||
- **Python 3.11+** - [Download](https://www.python.org/downloads/) (3.12 or 3.13 recommended)
|
||||
- **pip** - Package installer for Python (comes with Python)
|
||||
- **uv** - Python package manager ([Install](https://docs.astral.sh/uv/getting-started/installation/))
|
||||
- **git** - Version control
|
||||
- **Claude Code** - [Install](https://docs.anthropic.com/claude/docs/claude-code) (optional, for using building skills)
|
||||
|
||||
@@ -52,7 +52,7 @@ Verify installation:
|
||||
|
||||
```bash
|
||||
python --version # Should be 3.11+
|
||||
pip --version # Should be latest
|
||||
uv --version # Should be latest
|
||||
git --version # Any recent version
|
||||
```
|
||||
|
||||
@@ -111,12 +111,12 @@ This installs agent-related Claude Code skills:
|
||||
|
||||
```bash
|
||||
# Verify package imports
|
||||
python -c "import framework; print('✓ framework OK')"
|
||||
python -c "import aden_tools; print('✓ aden_tools OK')"
|
||||
python -c "import litellm; print('✓ litellm OK')"
|
||||
uv run python -c "import framework; print('✓ framework OK')"
|
||||
uv run python -c "import aden_tools; print('✓ aden_tools OK')"
|
||||
uv run python -c "import litellm; print('✓ litellm OK')"
|
||||
|
||||
# Run an agent (after building one via /building-agents-construction)
|
||||
PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
PYTHONPATH=exports uv run python -m your_agent_name validate
|
||||
```
|
||||
|
||||
---
|
||||
@@ -128,8 +128,12 @@ hive/ # Repository root
|
||||
│
|
||||
├── .github/ # GitHub configuration
|
||||
│ ├── workflows/
|
||||
│ │ ├── ci.yml # Runs on every PR
|
||||
│ │ └── release.yml # Runs on tags
|
||||
│ │ ├── ci.yml # Lint, test, validate on every PR
|
||||
│ │ ├── release.yml # Runs on tags
|
||||
│ │ ├── pr-requirements.yml # PR requirement checks
|
||||
│ │ ├── pr-check-command.yml # PR check commands
|
||||
│ │ ├── claude-issue-triage.yml # Automated issue triage
|
||||
│ │ └── auto-close-duplicates.yml # Close duplicate issues
|
||||
│ ├── ISSUE_TEMPLATE/ # Bug report & feature request templates
|
||||
│ ├── PULL_REQUEST_TEMPLATE.md # PR description template
|
||||
│ └── CODEOWNERS # Auto-assign reviewers
|
||||
@@ -166,7 +170,6 @@ hive/ # Repository root
|
||||
│ │ ├── testing/ # Testing utilities
|
||||
│ │ └── __init__.py
|
||||
│ ├── pyproject.toml # Package metadata and dependencies
|
||||
│ ├── requirements.txt # Python dependencies
|
||||
│ ├── README.md # Framework documentation
|
||||
│ ├── MCP_INTEGRATION_GUIDE.md # MCP server integration guide
|
||||
│ └── docs/ # Protocol documentation
|
||||
@@ -182,7 +185,6 @@ hive/ # Repository root
|
||||
│ │ ├── mcp_server.py # HTTP MCP server
|
||||
│ │ └── __init__.py
|
||||
│ ├── pyproject.toml # Package metadata
|
||||
│ ├── requirements.txt # Python dependencies
|
||||
│ └── README.md # Tools documentation
|
||||
│
|
||||
├── exports/ # AGENT PACKAGES (user-created, gitignored)
|
||||
@@ -191,14 +193,15 @@ hive/ # Repository root
|
||||
├── docs/ # Documentation
|
||||
│ ├── getting-started.md # Quick start guide
|
||||
│ ├── configuration.md # Configuration reference
|
||||
│ ├── architecture.md # System architecture
|
||||
│ └── articles/ # Technical articles
|
||||
│ ├── architecture/ # System architecture
|
||||
│ ├── articles/ # Technical articles
|
||||
│ ├── quizzes/ # Developer quizzes
|
||||
│ └── i18n/ # Translations
|
||||
│
|
||||
├── scripts/ # Build & utility scripts
|
||||
│ ├── setup-python.sh # Python environment setup
|
||||
│ └── setup.sh # Legacy setup script
|
||||
├── scripts/ # Utility scripts
|
||||
│ └── auto-close-duplicates.ts # GitHub duplicate issue closer
|
||||
│
|
||||
├── quickstart.sh # Install Claude Code skills
|
||||
├── quickstart.sh # Interactive setup wizard
|
||||
├── ENVIRONMENT_SETUP.md # Complete Python setup guide
|
||||
├── README.md # Project overview
|
||||
├── DEVELOPER.md # This file
|
||||
@@ -252,7 +255,7 @@ claude> /testing-agent
|
||||
4. **Validate the Agent**
|
||||
|
||||
```bash
|
||||
PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
PYTHONPATH=exports uv run python -m your_agent_name validate
|
||||
```
|
||||
|
||||
5. **Test the Agent**
|
||||
@@ -298,19 +301,19 @@ If you prefer to build agents manually:
|
||||
|
||||
```bash
|
||||
# Validate agent structure
|
||||
PYTHONPATH=core:exports python -m agent_name validate
|
||||
PYTHONPATH=exports uv run python -m agent_name validate
|
||||
|
||||
# Show agent information
|
||||
PYTHONPATH=core:exports python -m agent_name info
|
||||
PYTHONPATH=exports uv run python -m agent_name info
|
||||
|
||||
# Run agent with input
|
||||
PYTHONPATH=core:exports python -m agent_name run --input '{
|
||||
PYTHONPATH=exports uv run python -m agent_name run --input '{
|
||||
"ticket_content": "My login is broken",
|
||||
"customer_id": "CUST-123"
|
||||
}'
|
||||
|
||||
# Run in mock mode (no LLM calls)
|
||||
PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m agent_name run --mock --input '{...}'
|
||||
```
|
||||
|
||||
---
|
||||
@@ -334,17 +337,17 @@ This generates and runs:
|
||||
|
||||
```bash
|
||||
# Run all tests for an agent
|
||||
PYTHONPATH=core:exports python -m agent_name test
|
||||
PYTHONPATH=exports uv run python -m agent_name test
|
||||
|
||||
# Run specific test type
|
||||
PYTHONPATH=core:exports python -m agent_name test --type constraint
|
||||
PYTHONPATH=core:exports python -m agent_name test --type success
|
||||
PYTHONPATH=exports uv run python -m agent_name test --type constraint
|
||||
PYTHONPATH=exports uv run python -m agent_name test --type success
|
||||
|
||||
# Run with parallel execution
|
||||
PYTHONPATH=core:exports python -m agent_name test --parallel 4
|
||||
PYTHONPATH=exports uv run python -m agent_name test --parallel 4
|
||||
|
||||
# Fail fast (stop on first failure)
|
||||
PYTHONPATH=core:exports python -m agent_name test --fail-fast
|
||||
PYTHONPATH=exports uv run python -m agent_name test --fail-fast
|
||||
```
|
||||
|
||||
### Writing Custom Tests
|
||||
@@ -375,7 +378,7 @@ def test_ticket_categorization():
|
||||
- **PEP 8** - Follow Python style guide
|
||||
- **Type hints** - Use for function signatures and class attributes
|
||||
- **Docstrings** - Document classes and public functions
|
||||
- **Black** - Code formatter (run with `black .`)
|
||||
- **Ruff** - Linter and formatter (run with `make check`)
|
||||
|
||||
```python
|
||||
# Good
|
||||
@@ -509,8 +512,8 @@ chore(deps): update React to 18.2.0
|
||||
|
||||
1. Create a feature branch from `main`
|
||||
2. Make your changes with clear commits
|
||||
3. Run tests locally: `PYTHONPATH=core:exports python -m pytest`
|
||||
4. Run linting: `black --check .`
|
||||
3. Run tests locally: `make test`
|
||||
4. Run linting: `make check`
|
||||
5. Push and create a PR
|
||||
6. Fill out the PR template
|
||||
7. Request review from CODEOWNERS
|
||||
@@ -528,16 +531,11 @@ chore(deps): update React to 18.2.0
|
||||
```bash
|
||||
# Add to core framework
|
||||
cd core
|
||||
pip install <package>
|
||||
# Then add to requirements.txt or pyproject.toml
|
||||
uv add <package>
|
||||
|
||||
# Add to tools package
|
||||
cd tools
|
||||
pip install <package>
|
||||
# Then add to requirements.txt or pyproject.toml
|
||||
|
||||
# Reinstall in editable mode
|
||||
pip install -e .
|
||||
uv add <package>
|
||||
```
|
||||
|
||||
### Creating a New Agent
|
||||
@@ -636,10 +634,10 @@ import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
# Run with verbose output
|
||||
PYTHONPATH=core:exports python -m agent_name run --input '{...}' --verbose
|
||||
PYTHONPATH=exports uv run python -m agent_name run --input '{...}' --verbose
|
||||
|
||||
# Use mock mode to test without LLM calls
|
||||
PYTHONPATH=core:exports python -m agent_name run --mock --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m agent_name run --mock --input '{...}'
|
||||
```
|
||||
|
||||
---
|
||||
@@ -670,9 +668,8 @@ cat .env
|
||||
# Or check shell environment
|
||||
echo $ANTHROPIC_API_KEY
|
||||
|
||||
# Copy from .env.example if needed
|
||||
cp .env.example .env
|
||||
# Then edit .env with your API keys
|
||||
# Create .env if needed
|
||||
# Then add your API keys
|
||||
```
|
||||
|
||||
|
||||
|
||||
+71
-30
@@ -21,6 +21,19 @@ This will:
|
||||
- Fix package compatibility issues (openai + litellm)
|
||||
- Verify all installations
|
||||
|
||||
## Windows Setup
|
||||
|
||||
Windows users should use **WSL (Windows Subsystem for Linux)** to set up and run agents.
|
||||
|
||||
1. [Install WSL 2](https://learn.microsoft.com/en-us/windows/wsl/install) if you haven't already:
|
||||
```powershell
|
||||
wsl --install
|
||||
```
|
||||
2. Open your WSL terminal, clone the repo, and run the quickstart script:
|
||||
```bash
|
||||
./quickstart.sh
|
||||
```
|
||||
|
||||
## Alpine Linux Setup
|
||||
|
||||
If you are using Alpine Linux (e.g., inside a Docker container), you must install system dependencies and use a virtual environment before running the setup script:
|
||||
@@ -32,9 +45,9 @@ apk add bash git python3 py3-pip nodejs npm curl build-base python3-dev linux-he
|
||||
```
|
||||
2. Set up Virtual Environment (Required for Python 3.12+):
|
||||
```
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install --upgrade pip setuptools wheel
|
||||
uv venv
|
||||
source .venv/bin/activate
|
||||
# uv handles pip/setuptools/wheel automatically
|
||||
```
|
||||
3. Run the Quickstart Script:
|
||||
```
|
||||
@@ -49,29 +62,29 @@ If you prefer to set up manually or the script fails:
|
||||
|
||||
```bash
|
||||
cd core
|
||||
pip install -e .
|
||||
uv pip install -e .
|
||||
```
|
||||
|
||||
### 2. Install Tools Package
|
||||
|
||||
```bash
|
||||
cd tools
|
||||
pip install -e .
|
||||
uv pip install -e .
|
||||
```
|
||||
|
||||
### 3. Upgrade OpenAI Package
|
||||
|
||||
```bash
|
||||
# litellm requires openai >= 1.0.0
|
||||
pip install --upgrade "openai>=1.0.0"
|
||||
uv pip install --upgrade "openai>=1.0.0"
|
||||
```
|
||||
|
||||
### 4. Verify Installation
|
||||
|
||||
```bash
|
||||
python -c "import framework; print('✓ framework OK')"
|
||||
python -c "import aden_tools; print('✓ aden_tools OK')"
|
||||
python -c "import litellm; print('✓ litellm OK')"
|
||||
uv run python -c "import framework; print('✓ framework OK')"
|
||||
uv run python -c "import aden_tools; print('✓ aden_tools OK')"
|
||||
uv run python -c "import litellm; print('✓ litellm OK')"
|
||||
```
|
||||
|
||||
> **Windows Tip:**
|
||||
@@ -100,33 +113,44 @@ For running agents with real LLMs:
|
||||
export ANTHROPIC_API_KEY="your-key-here"
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
|
||||
```powershell
|
||||
$env:ANTHROPIC_API_KEY="your-key-here"
|
||||
```
|
||||
|
||||
## Running Agents
|
||||
|
||||
All agent commands must be run from the project root with `PYTHONPATH` set:
|
||||
|
||||
```bash
|
||||
# From /hive/ directory
|
||||
PYTHONPATH=core:exports python -m agent_name COMMAND
|
||||
PYTHONPATH=exports uv run python -m agent_name COMMAND
|
||||
```
|
||||
|
||||
### Example Commands
|
||||
Windows (PowerShell):
|
||||
|
||||
After building an agent via `/building-agents-construction`, use these commands:
|
||||
```powershell
|
||||
$env:PYTHONPATH="core;exports"
|
||||
python -m agent_name COMMAND
|
||||
```
|
||||
|
||||
### Example: Support Ticket Agent
|
||||
|
||||
```bash
|
||||
# Validate agent structure
|
||||
PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
PYTHONPATH=exports uv run python -m your_agent_name validate
|
||||
|
||||
# Show agent information
|
||||
PYTHONPATH=core:exports python -m your_agent_name info
|
||||
PYTHONPATH=exports uv run python -m your_agent_name info
|
||||
|
||||
# Run agent with input
|
||||
PYTHONPATH=core:exports python -m your_agent_name run --input '{
|
||||
PYTHONPATH=exports uv run python -m your_agent_name run --input '{
|
||||
"task": "Your input here"
|
||||
}'
|
||||
|
||||
# Run in mock mode (no LLM calls)
|
||||
PYTHONPATH=core:exports python -m your_agent_name run --mock --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m your_agent_name run --mock --input '{...}'
|
||||
```
|
||||
|
||||
## Building New Agents and Run Flow
|
||||
@@ -231,7 +255,7 @@ This workflow orchestrates all agent-building skills to take you from idea → p
|
||||
|
||||
```bash
|
||||
# Create virtual environment
|
||||
python3 -m venv .venv
|
||||
uv venv
|
||||
|
||||
# Activate it
|
||||
source .venv/bin/activate # macOS/Linux
|
||||
@@ -245,7 +269,15 @@ Always activate the venv before running agents:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
PYTHONPATH=core:exports python -m your_agent_name demo
|
||||
PYTHONPATH=exports uv run python -m your_agent_name demo
|
||||
```
|
||||
|
||||
### PowerShell: “running scripts is disabled on this system”
|
||||
|
||||
Run once per session:
|
||||
|
||||
```powershell
|
||||
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
|
||||
```
|
||||
|
||||
### "ModuleNotFoundError: No module named 'framework'"
|
||||
@@ -253,7 +285,7 @@ PYTHONPATH=core:exports python -m your_agent_name demo
|
||||
**Solution:** Install the core package:
|
||||
|
||||
```bash
|
||||
cd core && pip install -e .
|
||||
cd core && uv pip install -e .
|
||||
```
|
||||
|
||||
### "ModuleNotFoundError: No module named 'aden_tools'"
|
||||
@@ -261,7 +293,7 @@ cd core && pip install -e .
|
||||
**Solution:** Install the tools package:
|
||||
|
||||
```bash
|
||||
cd tools && pip install -e .
|
||||
cd tools && uv pip install -e .
|
||||
```
|
||||
|
||||
Or run the setup script:
|
||||
@@ -277,17 +309,26 @@ Or run the setup script:
|
||||
**Solution:** Upgrade openai:
|
||||
|
||||
```bash
|
||||
pip install --upgrade "openai>=1.0.0"
|
||||
uv pip install --upgrade "openai>=1.0.0"
|
||||
```
|
||||
|
||||
### "No module named 'your_agent_name'"
|
||||
|
||||
**Cause:** Not running from project root, missing PYTHONPATH, or agent not yet created
|
||||
|
||||
**Solution:** Ensure you're in the project root directory, have built an agent, and use:
|
||||
**Solution:** Ensure you're in `/hive/` and use:
|
||||
|
||||
Linux/macOS:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
PYTHONPATH=exports uv run python -m your_agent_name validate
|
||||
```
|
||||
|
||||
Windows:
|
||||
|
||||
```powershell
|
||||
$env:PYTHONPATH="core;exports"
|
||||
python -m support_ticket_agent validate
|
||||
```
|
||||
|
||||
### Agent imports fail with "broken installation"
|
||||
@@ -298,7 +339,7 @@ PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
|
||||
```bash
|
||||
# Remove broken installations
|
||||
pip uninstall -y framework tools
|
||||
uv pip uninstall framework tools
|
||||
|
||||
# Reinstall correctly
|
||||
./quickstart.sh
|
||||
@@ -352,12 +393,12 @@ If you need to use both packages in a single script (e.g., for testing), you hav
|
||||
|
||||
```bash
|
||||
# Option 1: Install both in a shared environment
|
||||
python -m venv .venv
|
||||
uv venv
|
||||
source .venv/bin/activate
|
||||
pip install -e core/ -e tools/
|
||||
uv pip install -e core/ -e tools/
|
||||
|
||||
# Option 2: Use PYTHONPATH (for quick testing)
|
||||
PYTHONPATH=core:tools/src python your_script.py
|
||||
PYTHONPATH=tools/src uv run python your_script.py
|
||||
```
|
||||
|
||||
### MCP Server Configuration
|
||||
@@ -383,7 +424,7 @@ This ensures each MCP server runs with its correct dependencies.
|
||||
|
||||
### Why PYTHONPATH is Required
|
||||
|
||||
The packages are installed in **editable mode** (`pip install -e`), which means:
|
||||
The packages are installed in **editable mode** (`uv pip install -e`), which means:
|
||||
|
||||
- `framework` and `aden_tools` are globally importable (no PYTHONPATH needed)
|
||||
- `exports` is NOT installed as a package (PYTHONPATH required)
|
||||
@@ -412,7 +453,7 @@ Enter goal: "Build an agent that processes customer support tickets"
|
||||
### 3. Validate Agent
|
||||
|
||||
```bash
|
||||
PYTHONPATH=core:exports python -m your_agent_name validate
|
||||
PYTHONPATH=exports uv run python -m your_agent_name validate
|
||||
```
|
||||
|
||||
### 4. Test Agent
|
||||
@@ -424,7 +465,7 @@ claude> /testing-agent
|
||||
### 5. Run Agent
|
||||
|
||||
```bash
|
||||
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
|
||||
```
|
||||
|
||||
## IDE Setup
|
||||
|
||||
@@ -4,9 +4,11 @@ help: ## Show this help
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
|
||||
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
lint: ## Run ruff linter (with auto-fix)
|
||||
lint: ## Run ruff linter and formatter (with auto-fix)
|
||||
cd core && ruff check --fix .
|
||||
cd tools && ruff check --fix .
|
||||
cd core && ruff format .
|
||||
cd tools && ruff format .
|
||||
|
||||
format: ## Run ruff formatter
|
||||
cd core && ruff format .
|
||||
@@ -19,8 +21,8 @@ check: ## Run all checks without modifying files (CI-safe)
|
||||
cd tools && ruff format --check .
|
||||
|
||||
test: ## Run all tests
|
||||
cd core && python -m pytest tests/ -v
|
||||
cd core && uv run python -m pytest tests/ -v
|
||||
|
||||
install-hooks: ## Install pre-commit hooks
|
||||
pip install pre-commit
|
||||
uv pip install pre-commit
|
||||
pre-commit install
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
- **Added empty response retry logic** — LLM provider now detects empty responses (e.g. Gemini returning 200 with no content on rate limit) and retries with exponential backoff, preventing hallucinated output from the cleanup LLM
|
||||
- **Added context-aware input compaction** — LLM nodes now estimate input token count before calling the model and progressively truncate the largest values if they exceed the context window budget
|
||||
- **Increased rate limit retries to 10** with verbose `[retry]` and `[compaction]` logging that includes model name, finish reason, and attempt count
|
||||
- **Updated setup scripts** — `scripts/setup-python.sh` now installs Playwright Chromium browser automatically for web scraping support
|
||||
- **Interactive quickstart onboarding** — `quickstart.sh` rewritten as bee-themed interactive wizard that detects existing API keys (including Claude Code subscription), lets user pick ONE default LLM provider, and saves configuration to `~/.hive/configuration.json`
|
||||
- **Fixed lint errors** across `hubspot_tool.py` (line length) and `agent_builder_server.py` (unused variable)
|
||||
|
||||
@@ -24,8 +23,6 @@
|
||||
- `tools/src/aden_tools/tools/web_scrape_tool/README.md` — Updated docs
|
||||
- `tools/pyproject.toml` — Added `playwright`, `playwright-stealth` deps
|
||||
- `tools/Dockerfile` — Added `playwright install chromium --with-deps`
|
||||
- `scripts/setup-python.sh` — Added Playwright Chromium browser install step
|
||||
|
||||
### LLM Reliability
|
||||
- `core/framework/llm/litellm.py` — Empty response retry + max retries 10 + verbose logging
|
||||
- `core/framework/graph/node.py` — Input compaction via `_compact_inputs()`, `_estimate_tokens()`, `_get_context_limit()`
|
||||
@@ -41,7 +38,6 @@
|
||||
## Test plan
|
||||
- [ ] Run `make lint` — passes clean
|
||||
- [ ] Run `./quickstart.sh` and verify interactive flow works, config saved to `~/.hive/configuration.json`
|
||||
- [ ] Run `./scripts/setup-python.sh` and verify Playwright Chromium installs
|
||||
- [ ] Run `pytest tests/tools/test_web_scrape_tool.py -v`
|
||||
- [ ] Run agent against a JS-heavy site and verify `web_scrape` returns rendered content
|
||||
- [ ] Set `HUBSPOT_ACCESS_TOKEN` and verify HubSpot tool CRUD operations work
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
|
||||
[](https://github.com/adenhq/hive/blob/main/LICENSE)
|
||||
[](https://www.ycombinator.com/companies/aden)
|
||||
[](https://hub.docker.com/u/adenhq)
|
||||
[](https://discord.com/invite/MXE49hrKDk)
|
||||
[](https://x.com/aden_hq)
|
||||
[](https://www.linkedin.com/company/teamaden/)
|
||||
@@ -40,6 +39,31 @@ Build reliable, self-improving AI agents without hardcoding workflows. Define yo
|
||||
|
||||
Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.
|
||||
|
||||
## Who Is Hive For?
|
||||
|
||||
Hive is designed for developers and teams who want to build **production-grade AI agents** without manually wiring complex workflows.
|
||||
|
||||
Hive is a good fit if you:
|
||||
|
||||
- Want AI agents that **execute real business processes**, not demos
|
||||
- Prefer **goal-driven development** over hardcoded workflows
|
||||
- Need **self-healing and adaptive agents** that improve over time
|
||||
- Require **human-in-the-loop control**, observability, and cost limits
|
||||
- Plan to run agents in **production environments**
|
||||
|
||||
Hive may not be the best fit if you’re only experimenting with simple agent chains or one-off scripts.
|
||||
|
||||
## When Should You Use Hive?
|
||||
|
||||
Use Hive when you need:
|
||||
|
||||
- Long-running, autonomous agents
|
||||
- Multi-agent coordination
|
||||
- Continuous improvement based on failures
|
||||
- Strong monitoring, safety, and budget controls
|
||||
- A framework that evolves with your goals
|
||||
|
||||
|
||||
## What is Aden
|
||||
|
||||
<p align="center">
|
||||
@@ -64,11 +88,13 @@ Aden is a platform for building, deploying, operating, and adapting AI agents:
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
## Prerequisites
|
||||
|
||||
- [Python 3.11+](https://www.python.org/downloads/) for agent development
|
||||
- Python 3.11+ for agent development
|
||||
- Claude Code or Cursor for utilizing agent skills
|
||||
|
||||
> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
@@ -95,7 +121,7 @@ claude> /building-agents-construction
|
||||
claude> /testing-agent
|
||||
|
||||
# Run your agent
|
||||
PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'
|
||||
```
|
||||
|
||||
**[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
|
||||
@@ -181,7 +207,7 @@ flowchart LR
|
||||
Aden Hive provides a list of featured agents that you can use and build on top of.
|
||||
|
||||
### Run an agent shared by others
|
||||
Put the agent in `exports/` and run `PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'`
|
||||
Put the agent in `exports/` and run `PYTHONPATH=exports uv run python -m your_agent_name run --input '{...}'`
|
||||
|
||||
|
||||
For building and running goal-driven agents with the framework:
|
||||
@@ -202,7 +228,7 @@ claude> /building-agents-construction
|
||||
claude> /testing-agent
|
||||
|
||||
# Run agents
|
||||
PYTHONPATH=core:exports python -m agent_name run --input '{...}'
|
||||
PYTHONPATH=exports uv run python -m agent_name run --input '{...}'
|
||||
```
|
||||
|
||||
See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions.
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
# 🚀 Release v0.4.0
|
||||
|
||||
**79 commits since v0.3.2** | **Target: `main` @ `80a41b4`**
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
This is a major release introducing the **Event Loop Node architecture**, an **interactive TUI dashboard**, **ClientIO gateway** for client-facing agents, a **GitHub tool**, **Slack tool integration** (45+ tools), and a full **migration from pip to uv** for package management.
|
||||
|
||||
---
|
||||
|
||||
## 🆕 Features
|
||||
|
||||
### 🔄 Event Loop Node Architecture
|
||||
- Implement event loop node framework (WP1-4, WP8, WP9, WP10, WP12) — a new node type that supports iterative, multi-turn execution with tool calls, judge-based acceptance, and client-facing interaction
|
||||
- Emit bus events for runtime observability
|
||||
- Add graph validation for client-facing nodes
|
||||
- Soft-fail on schema mismatch during context handoff (no more hard failures)
|
||||
|
||||
### 🖥️ Interactive TUI Dashboard
|
||||
- Add interactive TUI dashboard for agent execution with 3-pane layout (logs/graph + chat)
|
||||
- Implement selectable logging, interactive ChatREPL, and thread-safe event handling
|
||||
- Screenshot feature, header polish, keybinding updates
|
||||
- Lazy widget loading, Horizontal/Vertical layout fixes
|
||||
- Integrate agent builder with TUI
|
||||
|
||||
### 💬 ClientIO Gateway
|
||||
- Implement ClientIO gateway for client-facing node I/O routing
|
||||
- Client-facing nodes can now request and receive user input at runtime
|
||||
|
||||
### 🐙 GitHub Tool
|
||||
- Add GitHub tool for repository and issue management
|
||||
- Security and integration fixes from PR feedback
|
||||
|
||||
### 💼 Slack Tool Integration
|
||||
- Add Slack bot integration with 45+ tools for multipurpose integration
|
||||
- Includes CRM support capabilities
|
||||
|
||||
### 🔑 Credential Store
|
||||
- Provider-based credential store (`aden provider credential store by provider`)
|
||||
- Support non-OAuth key setup in credential workflows
|
||||
- Quickstart credential store integration
|
||||
|
||||
### 📦 Migration to uv
|
||||
- Migrate from pip to uv for package management
|
||||
- Consolidate workspace to uv monorepo
|
||||
- Migrate all CI jobs from pip to uv
|
||||
- Check for litellm import in both `CORE_PYTHON` and `TOOLS_PYTHON` environments
|
||||
|
||||
### 🛠️ Other Features
|
||||
- Tool truncation for handling large tool outputs
|
||||
- Inject runtime datetime into LLM system prompts
|
||||
- Add sample agent folder structure and examples
|
||||
- Add message when LLM key is not available
|
||||
- Edit bot prompt to decide on technical size of issues
|
||||
- Update skills and agent builder tools; bump pinned ruff version
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug Fixes
|
||||
|
||||
- **ON_FAILURE edge routing**: Follow ON_FAILURE edges when a node fails after max retries
|
||||
- **Malformed JSON tool arguments**: Handle malformed JSON tool arguments safely in LiteLLMProvider
|
||||
- **Quickstart compatibility**: Fix quickstart.sh compatibility and provider selection issues
|
||||
- **Silent exit fix**: Resolve silent exit when selecting non-Anthropic LLM provider
|
||||
- **Robust compaction logic**: Fix conversation compaction edge cases
|
||||
- **Loop prevention**: Prevent infinite loops in feedback edges
|
||||
- **Tool pruning logic**: Fix incorrect tool pruning behavior
|
||||
- **Text delta granularity**: Fix text delta granularity and tool limit problems
|
||||
- **Tool call results**: Fix formulation of tool call results
|
||||
- **Max retry reset**: Reset max retry counter to 0 for event loop nodes
|
||||
- **Graph validation**: Fix graph validation logic
|
||||
- **MCP exports directory**: Handle missing exports directory in test generation tools
|
||||
- **Bash version support**: Fix bash version compatibility
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Chores & CI
|
||||
|
||||
- Consolidate workspace to uv monorepo
|
||||
- Migrate remaining CI jobs from pip to uv
|
||||
- Clean up use of `setup-python` in CI
|
||||
- Windows lint fixes
|
||||
- Various lint and formatting fixes
|
||||
- Update `.gitignore` and remove local claude settings
|
||||
- Update issue templates
|
||||
|
||||
---
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
- Add Windows compatibility warning
|
||||
- Update architecture diagram source path in README
|
||||
|
||||
---
|
||||
|
||||
## 👏 Contributors
|
||||
|
||||
Thanks to all contributors for this release:
|
||||
|
||||
- **@mubarakar95** — Interactive TUI dashboard (3-pane layout, ChatREPL, selectable logging, screenshot feature, lazy widget loading)
|
||||
- **@levxn** — Slack bot integration with 45+ tools including CRM support
|
||||
- **@lakshitaa-chellaramani** — GitHub tool for repository and issue management
|
||||
- **@Acid-OP** — ON_FAILURE edge routing fix after max retries
|
||||
- **@Siddharth2624** — Malformed JSON tool argument handling in LiteLLMProvider
|
||||
- **@Antiarin** — Runtime datetime injection into LLM system prompts
|
||||
- **@kuldeepgaur02** — Fix silent exit when selecting non-Anthropic LLM provider
|
||||
- **@Anjali Yadav** — Fix missing exports directory in MCP test generation tools
|
||||
- **@Hundao** — Migrate remaining CI jobs from pip to uv
|
||||
- **@ranjithkumar9343** — Windows compatibility warning documentation
|
||||
- **@Yogesh Sakharam Diwate** — Architecture diagram path update in README
|
||||
+1
-1
@@ -268,7 +268,7 @@ classDef done fill:#9e9e9e,color:#fff,stroke:#757575
|
||||
- [ ] Wake-up Tool (resume agent tasks)
|
||||
|
||||
### Deployment (Self-Hosted)
|
||||
- [ ] Docker container standardization
|
||||
- [ ] Workder agent docker container standardization
|
||||
- [ ] Headless backend execution
|
||||
- [ ] Exposed API for frontend attachment
|
||||
- [ ] Local monitoring & observability
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
# TUI Text Selection and Copy Guide
|
||||
|
||||
## Keybindings
|
||||
|
||||
| Key | Action |
|
||||
|---------------|-----------------------|
|
||||
| `Tab` | Next panel |
|
||||
| `Shift+Tab` | Previous panel |
|
||||
| `Ctrl+S` | Save SVG screenshot |
|
||||
| `Ctrl+O` | Command palette |
|
||||
| `Q` | Quit |
|
||||
|
||||
## Panel Cycle Order
|
||||
|
||||
`Tab` cycles: **Log Pane → Graph View → Chat Input**
|
||||
|
||||
## Text Selection
|
||||
|
||||
Textual apps capture the mouse, so normal click-drag selection won't work by default. To select and copy text from any pane:
|
||||
|
||||
1. **Hold `Shift`** while clicking and dragging — this bypasses Textual's mouse capture and lets your terminal handle selection natively.
|
||||
2. Copy with your terminal's shortcut (`Cmd+C` on macOS, `Ctrl+Shift+C` on most Linux terminals).
|
||||
|
||||
## Log Pane Scrolling
|
||||
|
||||
The log pane uses `auto_scroll=False`. New output only scrolls to the bottom when you are already at the bottom of the log. If you've scrolled up to read earlier output, it stays in place.
|
||||
|
||||
## Screenshots
|
||||
|
||||
`Ctrl+S` saves an SVG screenshot to the `screenshots/` directory with a timestamped filename. Open the SVG in any browser to view it.
|
||||
+10
-10
@@ -14,7 +14,7 @@ Framework provides a runtime framework that captures **decisions**, not just act
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
uv pip install -e .
|
||||
```
|
||||
|
||||
## MCP Server Setup
|
||||
@@ -45,13 +45,13 @@ If you prefer manual setup:
|
||||
|
||||
```bash
|
||||
# Install framework
|
||||
pip install -e .
|
||||
uv pip install -e .
|
||||
|
||||
# Install MCP dependencies
|
||||
pip install mcp fastmcp
|
||||
uv pip install mcp fastmcp
|
||||
|
||||
# Test the server
|
||||
python -m framework.mcp.agent_builder_server
|
||||
uv run python -m framework.mcp.agent_builder_server
|
||||
```
|
||||
|
||||
### Using with MCP Clients
|
||||
@@ -86,13 +86,13 @@ Run an LLM-powered calculator:
|
||||
|
||||
```bash
|
||||
# Single calculation
|
||||
python -m framework calculate "2 + 3 * 4"
|
||||
uv run python -m framework calculate "2 + 3 * 4"
|
||||
|
||||
# Interactive mode
|
||||
python -m framework interactive
|
||||
uv run python -m framework interactive
|
||||
|
||||
# Analyze runs with Builder
|
||||
python -m framework analyze calculator
|
||||
uv run python -m framework analyze calculator
|
||||
```
|
||||
|
||||
### Using the Runtime
|
||||
@@ -136,13 +136,13 @@ Tests are generated using MCP tools (`generate_constraint_tests`, `generate_succ
|
||||
|
||||
```bash
|
||||
# Run tests against an agent
|
||||
python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
|
||||
uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
|
||||
|
||||
# Debug failed tests
|
||||
python -m framework test-debug <agent_path> <test_name>
|
||||
uv run python -m framework test-debug <agent_path> <test_name>
|
||||
|
||||
# List tests for a goal
|
||||
python -m framework test-list <goal_id>
|
||||
uv run python -m framework test-list <goal_id>
|
||||
```
|
||||
|
||||
For detailed testing workflows, see the [testing-agent skill](../.claude/skills/testing-agent/SKILL.md).
|
||||
|
||||
@@ -0,0 +1,740 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
EventLoopNode WebSocket Demo
|
||||
|
||||
Real LLM, real FileConversationStore, real EventBus.
|
||||
Streams EventLoopNode execution to a browser via WebSocket.
|
||||
|
||||
Usage:
|
||||
cd /home/timothy/oss/hive/core
|
||||
python demos/event_loop_wss_demo.py
|
||||
|
||||
Then open http://localhost:8765 in your browser.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import websockets
|
||||
from bs4 import BeautifulSoup
|
||||
from websockets.http11 import Request, Response
|
||||
|
||||
# Add core, tools, and hive root to path
|
||||
_CORE_DIR = Path(__file__).resolve().parent.parent
|
||||
_HIVE_DIR = _CORE_DIR.parent
|
||||
sys.path.insert(0, str(_CORE_DIR)) # framework.*
|
||||
sys.path.insert(0, str(_HIVE_DIR / "tools" / "src")) # aden_tools.*
|
||||
sys.path.insert(0, str(_HIVE_DIR)) # core.framework.* (for aden_tools imports)
|
||||
|
||||
import os # noqa: E402
|
||||
|
||||
from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter # noqa: E402
|
||||
from core.framework.credentials import CredentialStore # noqa: E402
|
||||
|
||||
from framework.credentials.storage import ( # noqa: E402
|
||||
CompositeStorage,
|
||||
EncryptedFileStorage,
|
||||
EnvVarStorage,
|
||||
)
|
||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeSpec, SharedMemory # noqa: E402
|
||||
from framework.llm.litellm import LiteLLMProvider # noqa: E402
|
||||
from framework.llm.provider import Tool # noqa: E402
|
||||
from framework.runner.tool_registry import ToolRegistry # noqa: E402
|
||||
from framework.runtime.core import Runtime # noqa: E402
|
||||
from framework.runtime.event_bus import EventBus, EventType # noqa: E402
|
||||
from framework.storage.conversation_store import FileConversationStore # noqa: E402
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
|
||||
logger = logging.getLogger("demo")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Persistent state (shared across WebSocket connections)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_demo_"))
|
||||
STORE = FileConversationStore(STORE_DIR / "conversation")
|
||||
RUNTIME = Runtime(STORE_DIR / "runtime")
|
||||
LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Tool Registry — real tools via ToolRegistry (same pattern as GraphExecutor)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
TOOL_REGISTRY = ToolRegistry()
|
||||
|
||||
# Credential store: Aden sync (OAuth2 tokens) + encrypted files + env var fallback
|
||||
_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
|
||||
_local_storage = CompositeStorage(
|
||||
primary=EncryptedFileStorage(),
|
||||
fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
|
||||
)
|
||||
|
||||
if os.environ.get("ADEN_API_KEY"):
|
||||
try:
|
||||
from framework.credentials.aden import ( # noqa: E402
|
||||
AdenCachedStorage,
|
||||
AdenClientConfig,
|
||||
AdenCredentialClient,
|
||||
AdenSyncProvider,
|
||||
)
|
||||
|
||||
_client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com"))
|
||||
_provider = AdenSyncProvider(client=_client)
|
||||
_storage = AdenCachedStorage(
|
||||
local_storage=_local_storage,
|
||||
aden_provider=_provider,
|
||||
)
|
||||
_cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True)
|
||||
_synced = _provider.sync_all(_cred_store)
|
||||
logger.info("Synced %d credentials from Aden", _synced)
|
||||
except Exception as e:
|
||||
logger.warning("Aden sync unavailable: %s", e)
|
||||
_cred_store = CredentialStore(storage=_local_storage)
|
||||
else:
|
||||
logger.info("ADEN_API_KEY not set, using local credential storage")
|
||||
_cred_store = CredentialStore(storage=_local_storage)
|
||||
|
||||
CREDENTIALS = CredentialStoreAdapter(_cred_store)
|
||||
|
||||
# Debug: log which credentials resolved
|
||||
for _name in ["brave_search", "hubspot", "anthropic"]:
|
||||
_val = CREDENTIALS.get(_name)
|
||||
if _val:
|
||||
logger.debug("credential %s: OK (len=%d)", _name, len(_val))
|
||||
else:
|
||||
logger.debug("credential %s: not found", _name)
|
||||
|
||||
# --- web_search (Brave Search API) ---
|
||||
|
||||
TOOL_REGISTRY.register(
|
||||
name="web_search",
|
||||
tool=Tool(
|
||||
name="web_search",
|
||||
description=(
|
||||
"Search the web for current information. "
|
||||
"Returns titles, URLs, and snippets from search results."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query (1-500 characters)",
|
||||
},
|
||||
"num_results": {
|
||||
"type": "integer",
|
||||
"description": "Number of results to return (1-20, default 10)",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
),
|
||||
executor=lambda inputs: _exec_web_search(inputs),
|
||||
)
|
||||
|
||||
|
||||
def _exec_web_search(inputs: dict) -> dict:
|
||||
api_key = CREDENTIALS.get("brave_search")
|
||||
if not api_key:
|
||||
return {"error": "brave_search credential not configured"}
|
||||
query = inputs.get("query", "")
|
||||
num_results = min(inputs.get("num_results", 10), 20)
|
||||
resp = httpx.get(
|
||||
"https://api.search.brave.com/res/v1/web/search",
|
||||
params={"q": query, "count": num_results},
|
||||
headers={"X-Subscription-Token": api_key, "Accept": "application/json"},
|
||||
timeout=30.0,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"Brave API HTTP {resp.status_code}"}
|
||||
data = resp.json()
|
||||
results = [
|
||||
{
|
||||
"title": item.get("title", ""),
|
||||
"url": item.get("url", ""),
|
||||
"snippet": item.get("description", ""),
|
||||
}
|
||||
for item in data.get("web", {}).get("results", [])[:num_results]
|
||||
]
|
||||
return {"query": query, "results": results, "total": len(results)}
|
||||
|
||||
|
||||
# --- web_scrape (httpx + BeautifulSoup, no playwright for sync compat) ---
|
||||
|
||||
TOOL_REGISTRY.register(
|
||||
name="web_scrape",
|
||||
tool=Tool(
|
||||
name="web_scrape",
|
||||
description=(
|
||||
"Scrape and extract text content from a webpage URL. "
|
||||
"Returns the page title and main text content."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL of the webpage to scrape",
|
||||
},
|
||||
"max_length": {
|
||||
"type": "integer",
|
||||
"description": "Maximum text length (default 50000)",
|
||||
},
|
||||
},
|
||||
"required": ["url"],
|
||||
},
|
||||
),
|
||||
executor=lambda inputs: _exec_web_scrape(inputs),
|
||||
)
|
||||
|
||||
_SCRAPE_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/131.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
|
||||
|
||||
def _exec_web_scrape(inputs: dict) -> dict:
|
||||
url = inputs.get("url", "")
|
||||
max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = "https://" + url
|
||||
try:
|
||||
resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"HTTP {resp.status_code}"}
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
|
||||
tag.decompose()
|
||||
title = soup.title.get_text(strip=True) if soup.title else ""
|
||||
main = (
|
||||
soup.find("article")
|
||||
or soup.find("main")
|
||||
or soup.find(attrs={"role": "main"})
|
||||
or soup.find("body")
|
||||
)
|
||||
text = main.get_text(separator=" ", strip=True) if main else ""
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
return {"url": url, "title": title, "content": text, "length": len(text)}
|
||||
except httpx.TimeoutException:
|
||||
return {"error": "Request timed out"}
|
||||
except Exception as e:
|
||||
return {"error": f"Scrape failed: {e}"}
|
||||
|
||||
|
||||
# --- HubSpot CRM tools (optional, requires HUBSPOT_ACCESS_TOKEN) ---
|
||||
|
||||
_HUBSPOT_API = "https://api.hubapi.com"
|
||||
|
||||
|
||||
def _hubspot_headers() -> dict | None:
|
||||
token = CREDENTIALS.get("hubspot")
|
||||
if token:
|
||||
logger.debug("HubSpot token: %s...%s (len=%d)", token[:8], token[-4:], len(token))
|
||||
else:
|
||||
logger.debug("HubSpot token: not found")
|
||||
if not token:
|
||||
return None
|
||||
return {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
|
||||
def _exec_hubspot_search(inputs: dict) -> dict:
|
||||
headers = _hubspot_headers()
|
||||
if not headers:
|
||||
return {"error": "HUBSPOT_ACCESS_TOKEN not set"}
|
||||
object_type = inputs.get("object_type", "contacts")
|
||||
query = inputs.get("query", "")
|
||||
limit = min(inputs.get("limit", 10), 100)
|
||||
body: dict = {"limit": limit}
|
||||
if query:
|
||||
body["query"] = query
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{_HUBSPOT_API}/crm/v3/objects/{object_type}/search",
|
||||
headers=headers,
|
||||
json=body,
|
||||
timeout=30.0,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"HubSpot API HTTP {resp.status_code}: {resp.text[:200]}"}
|
||||
return resp.json()
|
||||
except httpx.TimeoutException:
|
||||
return {"error": "Request timed out"}
|
||||
except Exception as e:
|
||||
return {"error": f"HubSpot error: {e}"}
|
||||
|
||||
|
||||
TOOL_REGISTRY.register(
|
||||
name="hubspot_search",
|
||||
tool=Tool(
|
||||
name="hubspot_search",
|
||||
description=(
|
||||
"Search HubSpot CRM objects (contacts, companies, or deals). "
|
||||
"Returns matching records with their properties."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object_type": {
|
||||
"type": "string",
|
||||
"description": "CRM object type: 'contacts', 'companies', or 'deals'",
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query (name, email, domain, etc.)",
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Max results (1-100, default 10)",
|
||||
},
|
||||
},
|
||||
"required": ["object_type"],
|
||||
},
|
||||
),
|
||||
executor=lambda inputs: _exec_hubspot_search(inputs),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ToolRegistry loaded: %s",
|
||||
", ".join(TOOL_REGISTRY.get_registered_names()),
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# HTML page (embedded)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
HTML_PAGE = ( # noqa: E501
|
||||
"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>EventLoopNode Live Demo</title>
|
||||
<style>
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body {
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
background: #0d1117; color: #c9d1d9;
|
||||
height: 100vh; display: flex; flex-direction: column;
|
||||
}
|
||||
header {
|
||||
background: #161b22; padding: 12px 20px;
|
||||
border-bottom: 1px solid #30363d;
|
||||
display: flex; align-items: center; gap: 16px;
|
||||
}
|
||||
header h1 { font-size: 16px; color: #58a6ff; font-weight: 600; }
|
||||
.status {
|
||||
font-size: 12px; padding: 3px 10px; border-radius: 12px;
|
||||
background: #21262d; color: #8b949e;
|
||||
}
|
||||
.status.running { background: #1a4b2e; color: #3fb950; }
|
||||
.status.done { background: #1a3a5c; color: #58a6ff; }
|
||||
.status.error { background: #4b1a1a; color: #f85149; }
|
||||
.chat { flex: 1; overflow-y: auto; padding: 16px; }
|
||||
.msg {
|
||||
margin: 8px 0; padding: 10px 14px; border-radius: 8px;
|
||||
line-height: 1.6; white-space: pre-wrap; word-wrap: break-word;
|
||||
}
|
||||
.msg.user { background: #1a3a5c; color: #58a6ff; }
|
||||
.msg.assistant { background: #161b22; color: #c9d1d9; }
|
||||
.msg.event {
|
||||
background: transparent; color: #8b949e; font-size: 11px;
|
||||
padding: 4px 14px; border-left: 3px solid #30363d;
|
||||
}
|
||||
.msg.event.loop { border-left-color: #58a6ff; }
|
||||
.msg.event.tool { border-left-color: #d29922; }
|
||||
.msg.event.stall { border-left-color: #f85149; }
|
||||
.input-bar {
|
||||
padding: 12px 16px; background: #161b22;
|
||||
border-top: 1px solid #30363d; display: flex; gap: 8px;
|
||||
}
|
||||
.input-bar input {
|
||||
flex: 1; background: #0d1117; border: 1px solid #30363d;
|
||||
color: #c9d1d9; padding: 8px 12px; border-radius: 6px;
|
||||
font-family: inherit; font-size: 14px; outline: none;
|
||||
}
|
||||
.input-bar input:focus { border-color: #58a6ff; }
|
||||
.input-bar button {
|
||||
background: #238636; color: #fff; border: none;
|
||||
padding: 8px 20px; border-radius: 6px; cursor: pointer;
|
||||
font-family: inherit; font-weight: 600;
|
||||
}
|
||||
.input-bar button:hover { background: #2ea043; }
|
||||
.input-bar button:disabled {
|
||||
background: #21262d; color: #484f58; cursor: not-allowed;
|
||||
}
|
||||
.input-bar button.clear { background: #da3633; }
|
||||
.input-bar button.clear:hover { background: #f85149; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>EventLoopNode Live</h1>
|
||||
<span id="status" class="status">Idle</span>
|
||||
<span id="iter" class="status" style="display:none">Step 0</span>
|
||||
</header>
|
||||
<div id="chat" class="chat"></div>
|
||||
<div class="input-bar">
|
||||
<input id="input" type="text"
|
||||
placeholder="Ask anything..." autofocus />
|
||||
<button id="go" onclick="run()">Send</button>
|
||||
<button class="clear"
|
||||
onclick="clearConversation()">Clear</button>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let ws = null;
|
||||
let currentAssistantEl = null;
|
||||
let iterCount = 0;
|
||||
const chat = document.getElementById('chat');
|
||||
const status = document.getElementById('status');
|
||||
const iterEl = document.getElementById('iter');
|
||||
const goBtn = document.getElementById('go');
|
||||
const inputEl = document.getElementById('input');
|
||||
|
||||
inputEl.addEventListener('keydown', e => {
|
||||
if (e.key === 'Enter') run();
|
||||
});
|
||||
|
||||
function setStatus(text, cls) {
|
||||
status.textContent = text;
|
||||
status.className = 'status ' + cls;
|
||||
}
|
||||
|
||||
function addMsg(text, cls) {
|
||||
const el = document.createElement('div');
|
||||
el.className = 'msg ' + cls;
|
||||
el.textContent = text;
|
||||
chat.appendChild(el);
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
return el;
|
||||
}
|
||||
|
||||
function connect() {
|
||||
ws = new WebSocket('ws://' + location.host + '/ws');
|
||||
ws.onopen = () => {
|
||||
setStatus('Ready', 'done');
|
||||
goBtn.disabled = false;
|
||||
};
|
||||
ws.onmessage = handleEvent;
|
||||
ws.onerror = () => { setStatus('Error', 'error'); };
|
||||
ws.onclose = () => {
|
||||
setStatus('Reconnecting...', '');
|
||||
goBtn.disabled = true;
|
||||
setTimeout(connect, 2000);
|
||||
};
|
||||
}
|
||||
|
||||
function handleEvent(msg) {
|
||||
const evt = JSON.parse(msg.data);
|
||||
|
||||
if (evt.type === 'llm_text_delta') {
|
||||
if (currentAssistantEl) {
|
||||
currentAssistantEl.textContent += evt.content;
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
}
|
||||
}
|
||||
else if (evt.type === 'ready') {
|
||||
setStatus('Ready', 'done');
|
||||
if (currentAssistantEl && !currentAssistantEl.textContent)
|
||||
currentAssistantEl.remove();
|
||||
goBtn.disabled = false;
|
||||
}
|
||||
else if (evt.type === 'node_loop_iteration') {
|
||||
iterCount = evt.iteration || (iterCount + 1);
|
||||
iterEl.textContent = 'Step ' + iterCount;
|
||||
iterEl.style.display = '';
|
||||
}
|
||||
else if (evt.type === 'tool_call_started') {
|
||||
var info = evt.tool_name + '('
|
||||
+ JSON.stringify(evt.tool_input).slice(0, 120) + ')';
|
||||
addMsg('TOOL ' + info, 'event tool');
|
||||
}
|
||||
else if (evt.type === 'tool_call_completed') {
|
||||
var preview = (evt.result || '').slice(0, 200);
|
||||
var cls = evt.is_error ? 'stall' : 'tool';
|
||||
addMsg('RESULT ' + evt.tool_name + ': ' + preview,
|
||||
'event ' + cls);
|
||||
currentAssistantEl = addMsg('', 'assistant');
|
||||
}
|
||||
else if (evt.type === 'result') {
|
||||
setStatus('Session ended', evt.success ? 'done' : 'error');
|
||||
if (evt.error) addMsg('ERROR ' + evt.error, 'event stall');
|
||||
if (currentAssistantEl && !currentAssistantEl.textContent)
|
||||
currentAssistantEl.remove();
|
||||
goBtn.disabled = false;
|
||||
}
|
||||
else if (evt.type === 'node_stalled') {
|
||||
addMsg('STALLED ' + evt.reason, 'event stall');
|
||||
}
|
||||
else if (evt.type === 'cleared') {
|
||||
chat.innerHTML = '';
|
||||
iterCount = 0;
|
||||
iterEl.textContent = 'Step 0';
|
||||
iterEl.style.display = 'none';
|
||||
setStatus('Ready', 'done');
|
||||
goBtn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function run() {
|
||||
const text = inputEl.value.trim();
|
||||
if (!text || !ws || ws.readyState !== 1) return;
|
||||
addMsg(text, 'user');
|
||||
currentAssistantEl = addMsg('', 'assistant');
|
||||
inputEl.value = '';
|
||||
setStatus('Running', 'running');
|
||||
goBtn.disabled = true;
|
||||
ws.send(JSON.stringify({ topic: text }));
|
||||
}
|
||||
|
||||
function clearConversation() {
|
||||
if (ws && ws.readyState === 1) {
|
||||
ws.send(JSON.stringify({ command: 'clear' }));
|
||||
}
|
||||
}
|
||||
|
||||
connect();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# WebSocket handler
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def handle_ws(websocket):
|
||||
"""Persistent WebSocket: long-lived EventLoopNode with client_facing blocking."""
|
||||
global STORE
|
||||
|
||||
# -- Event forwarding (WebSocket ← EventBus) ----------------------------
|
||||
bus = EventBus()
|
||||
|
||||
async def forward_event(event):
|
||||
try:
|
||||
payload = {"type": event.type.value, **event.data}
|
||||
if event.node_id:
|
||||
payload["node_id"] = event.node_id
|
||||
await websocket.send(json.dumps(payload))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
bus.subscribe(
|
||||
event_types=[
|
||||
EventType.NODE_LOOP_STARTED,
|
||||
EventType.NODE_LOOP_ITERATION,
|
||||
EventType.NODE_LOOP_COMPLETED,
|
||||
EventType.LLM_TEXT_DELTA,
|
||||
EventType.TOOL_CALL_STARTED,
|
||||
EventType.TOOL_CALL_COMPLETED,
|
||||
EventType.NODE_STALLED,
|
||||
],
|
||||
handler=forward_event,
|
||||
)
|
||||
|
||||
# -- Per-connection state -----------------------------------------------
|
||||
node = None
|
||||
loop_task = None
|
||||
|
||||
tools = list(TOOL_REGISTRY.get_tools().values())
|
||||
tool_executor = TOOL_REGISTRY.get_executor()
|
||||
|
||||
node_spec = NodeSpec(
|
||||
id="assistant",
|
||||
name="Chat Assistant",
|
||||
description="A conversational assistant that remembers context across messages",
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
system_prompt=(
|
||||
"You are a helpful assistant with access to tools. "
|
||||
"You can search the web, scrape webpages, and query HubSpot CRM. "
|
||||
"Use tools when the user asks for current information or external data. "
|
||||
"You have full conversation history, so you can reference previous messages."
|
||||
),
|
||||
)
|
||||
|
||||
# -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus ---
|
||||
async def on_input_requested(event):
|
||||
try:
|
||||
await websocket.send(json.dumps({"type": "ready"}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
bus.subscribe(
|
||||
event_types=[EventType.CLIENT_INPUT_REQUESTED],
|
||||
handler=on_input_requested,
|
||||
)
|
||||
|
||||
async def start_loop(first_message: str):
|
||||
"""Create an EventLoopNode and run it as a background task."""
|
||||
nonlocal node, loop_task
|
||||
|
||||
memory = SharedMemory()
|
||||
ctx = NodeContext(
|
||||
runtime=RUNTIME,
|
||||
node_id="assistant",
|
||||
node_spec=node_spec,
|
||||
memory=memory,
|
||||
input_data={},
|
||||
llm=LLM,
|
||||
available_tools=tools,
|
||||
)
|
||||
node = EventLoopNode(
|
||||
event_bus=bus,
|
||||
config=LoopConfig(max_iterations=10_000, max_history_tokens=32_000),
|
||||
conversation_store=STORE,
|
||||
tool_executor=tool_executor,
|
||||
)
|
||||
await node.inject_event(first_message)
|
||||
|
||||
async def _run():
|
||||
try:
|
||||
result = await node.execute(ctx)
|
||||
try:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "result",
|
||||
"success": result.success,
|
||||
"output": result.output,
|
||||
"error": result.error,
|
||||
"tokens": result.tokens_used,
|
||||
}
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}")
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
logger.info("Loop stopped: WebSocket closed")
|
||||
except Exception as e:
|
||||
logger.exception("Loop error")
|
||||
try:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "result",
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"output": {},
|
||||
}
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
loop_task = asyncio.create_task(_run())
|
||||
|
||||
async def stop_loop():
|
||||
"""Signal the node and wait for the loop task to finish."""
|
||||
nonlocal node, loop_task
|
||||
if loop_task and not loop_task.done():
|
||||
if node:
|
||||
node.signal_shutdown()
|
||||
try:
|
||||
await asyncio.wait_for(loop_task, timeout=5.0)
|
||||
except (TimeoutError, asyncio.CancelledError):
|
||||
loop_task.cancel()
|
||||
node = None
|
||||
loop_task = None
|
||||
|
||||
# -- Message loop (runs for the lifetime of this WebSocket) -------------
|
||||
try:
|
||||
async for raw in websocket:
|
||||
try:
|
||||
msg = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Clear command
|
||||
if msg.get("command") == "clear":
|
||||
import shutil
|
||||
|
||||
await stop_loop()
|
||||
await STORE.close()
|
||||
conv_dir = STORE_DIR / "conversation"
|
||||
if conv_dir.exists():
|
||||
shutil.rmtree(conv_dir)
|
||||
STORE = FileConversationStore(conv_dir)
|
||||
await websocket.send(json.dumps({"type": "cleared"}))
|
||||
logger.info("Conversation cleared")
|
||||
continue
|
||||
|
||||
topic = msg.get("topic", "")
|
||||
if not topic:
|
||||
continue
|
||||
|
||||
if node is None:
|
||||
# First message — spin up the loop
|
||||
logger.info(f"Starting persistent loop: {topic}")
|
||||
await start_loop(topic)
|
||||
else:
|
||||
# Subsequent message — inject into the running loop
|
||||
logger.info(f"Injecting message: {topic}")
|
||||
await node.inject_event(topic)
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
finally:
|
||||
await stop_loop()
|
||||
logger.info("WebSocket closed, loop stopped")
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# HTTP handler for serving the HTML page
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def process_request(connection, request: Request):
|
||||
"""Serve HTML on GET /, upgrade to WebSocket on /ws."""
|
||||
if request.path == "/ws":
|
||||
return None # let websockets handle the upgrade
|
||||
# Serve the HTML page for any other path
|
||||
return Response(
|
||||
HTTPStatus.OK,
|
||||
"OK",
|
||||
websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
|
||||
HTML_PAGE.encode(),
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Main
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def main():
|
||||
port = 8765
|
||||
async with websockets.serve(
|
||||
handle_ws,
|
||||
"0.0.0.0",
|
||||
port,
|
||||
process_request=process_request,
|
||||
):
|
||||
logger.info(f"Demo running at http://localhost:{port}")
|
||||
logger.info("Open in your browser and enter a topic to research.")
|
||||
await asyncio.Future() # run forever
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,930 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Two-Node ContextHandoff Demo
|
||||
|
||||
Demonstrates ContextHandoff between two EventLoopNode instances:
|
||||
Node A (Researcher) → ContextHandoff → Node B (Analyst)
|
||||
|
||||
Real LLM, real FileConversationStore, real EventBus.
|
||||
Streams both nodes to a browser via WebSocket.
|
||||
|
||||
Usage:
|
||||
cd /home/timothy/oss/hive/core
|
||||
python demos/handoff_demo.py
|
||||
|
||||
Then open http://localhost:8766 in your browser.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import websockets
|
||||
from bs4 import BeautifulSoup
|
||||
from websockets.http11 import Request, Response
|
||||
|
||||
# Add core, tools, and hive root to path
|
||||
_CORE_DIR = Path(__file__).resolve().parent.parent
|
||||
_HIVE_DIR = _CORE_DIR.parent
|
||||
sys.path.insert(0, str(_CORE_DIR)) # framework.*
|
||||
sys.path.insert(0, str(_HIVE_DIR / "tools" / "src")) # aden_tools.*
|
||||
sys.path.insert(0, str(_HIVE_DIR)) # core.framework.* (for aden_tools imports)
|
||||
|
||||
from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter # noqa: E402
|
||||
from core.framework.credentials import CredentialStore # noqa: E402
|
||||
|
||||
from framework.credentials.storage import ( # noqa: E402
|
||||
CompositeStorage,
|
||||
EncryptedFileStorage,
|
||||
EnvVarStorage,
|
||||
)
|
||||
from framework.graph.context_handoff import ContextHandoff # noqa: E402
|
||||
from framework.graph.conversation import NodeConversation # noqa: E402
|
||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeSpec, SharedMemory # noqa: E402
|
||||
from framework.llm.litellm import LiteLLMProvider # noqa: E402
|
||||
from framework.llm.provider import Tool # noqa: E402
|
||||
from framework.runner.tool_registry import ToolRegistry # noqa: E402
|
||||
from framework.runtime.core import Runtime # noqa: E402
|
||||
from framework.runtime.event_bus import EventBus, EventType # noqa: E402
|
||||
from framework.storage.conversation_store import FileConversationStore # noqa: E402
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
|
||||
logger = logging.getLogger("handoff_demo")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Persistent state
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_"))
|
||||
RUNTIME = Runtime(STORE_DIR / "runtime")
|
||||
LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Credentials
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
# Composite credential store: encrypted files (primary) + env vars (fallback)
|
||||
_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
|
||||
_composite = CompositeStorage(
|
||||
primary=EncryptedFileStorage(),
|
||||
fallbacks=[EnvVarStorage(env_mapping=_env_mapping)],
|
||||
)
|
||||
CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite))
|
||||
|
||||
for _name in ["brave_search", "hubspot"]:
|
||||
_val = CREDENTIALS.get(_name)
|
||||
if _val:
|
||||
logger.debug("credential %s: OK (len=%d)", _name, len(_val))
|
||||
else:
|
||||
logger.debug("credential %s: not found", _name)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Tool Registry — web_search + web_scrape for Node A (Researcher)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
TOOL_REGISTRY = ToolRegistry()
|
||||
|
||||
|
||||
def _exec_web_search(inputs: dict) -> dict:
|
||||
api_key = CREDENTIALS.get("brave_search")
|
||||
if not api_key:
|
||||
return {"error": "brave_search credential not configured"}
|
||||
query = inputs.get("query", "")
|
||||
num_results = min(inputs.get("num_results", 10), 20)
|
||||
resp = httpx.get(
|
||||
"https://api.search.brave.com/res/v1/web/search",
|
||||
params={"q": query, "count": num_results},
|
||||
headers={
|
||||
"X-Subscription-Token": api_key,
|
||||
"Accept": "application/json",
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"Brave API HTTP {resp.status_code}"}
|
||||
data = resp.json()
|
||||
results = [
|
||||
{
|
||||
"title": item.get("title", ""),
|
||||
"url": item.get("url", ""),
|
||||
"snippet": item.get("description", ""),
|
||||
}
|
||||
for item in data.get("web", {}).get("results", [])[:num_results]
|
||||
]
|
||||
return {"query": query, "results": results, "total": len(results)}
|
||||
|
||||
|
||||
TOOL_REGISTRY.register(
|
||||
name="web_search",
|
||||
tool=Tool(
|
||||
name="web_search",
|
||||
description=(
|
||||
"Search the web for current information. "
|
||||
"Returns titles, URLs, and snippets from search results."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query (1-500 characters)",
|
||||
},
|
||||
"num_results": {
|
||||
"type": "integer",
|
||||
"description": "Number of results (1-20, default 10)",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
),
|
||||
executor=lambda inputs: _exec_web_search(inputs),
|
||||
)
|
||||
|
||||
_SCRAPE_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/131.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
|
||||
|
||||
def _exec_web_scrape(inputs: dict) -> dict:
|
||||
url = inputs.get("url", "")
|
||||
max_length = max(1000, min(inputs.get("max_length", 50000), 500000))
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = "https://" + url
|
||||
try:
|
||||
resp = httpx.get(
|
||||
url,
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
headers=_SCRAPE_HEADERS,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"HTTP {resp.status_code}"}
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
|
||||
tag.decompose()
|
||||
title = soup.title.get_text(strip=True) if soup.title else ""
|
||||
main = (
|
||||
soup.find("article")
|
||||
or soup.find("main")
|
||||
or soup.find(attrs={"role": "main"})
|
||||
or soup.find("body")
|
||||
)
|
||||
text = main.get_text(separator=" ", strip=True) if main else ""
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
return {
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": text,
|
||||
"length": len(text),
|
||||
}
|
||||
except httpx.TimeoutException:
|
||||
return {"error": "Request timed out"}
|
||||
except Exception as e:
|
||||
return {"error": f"Scrape failed: {e}"}
|
||||
|
||||
|
||||
TOOL_REGISTRY.register(
|
||||
name="web_scrape",
|
||||
tool=Tool(
|
||||
name="web_scrape",
|
||||
description=(
|
||||
"Scrape and extract text content from a webpage URL. "
|
||||
"Returns the page title and main text content."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL of the webpage to scrape",
|
||||
},
|
||||
"max_length": {
|
||||
"type": "integer",
|
||||
"description": "Maximum text length (default 50000)",
|
||||
},
|
||||
},
|
||||
"required": ["url"],
|
||||
},
|
||||
),
|
||||
executor=lambda inputs: _exec_web_scrape(inputs),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ToolRegistry loaded: %s",
|
||||
", ".join(TOOL_REGISTRY.get_registered_names()),
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Node Specs
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
RESEARCHER_SPEC = NodeSpec(
|
||||
id="researcher",
|
||||
name="Researcher",
|
||||
description="Researches a topic using web search and scraping tools",
|
||||
node_type="event_loop",
|
||||
input_keys=["topic"],
|
||||
output_keys=["research_summary"],
|
||||
system_prompt=(
|
||||
"You are a thorough research assistant. Your job is to research "
|
||||
"the given topic using the web_search and web_scrape tools.\n\n"
|
||||
"1. Search for relevant information on the topic\n"
|
||||
"2. Scrape 1-2 of the most promising URLs for details\n"
|
||||
"3. Synthesize your findings into a comprehensive summary\n"
|
||||
"4. Use set_output with key='research_summary' to save your "
|
||||
"findings\n\n"
|
||||
"Be thorough but efficient. Aim for 2-4 search/scrape calls, "
|
||||
"then summarize and set_output."
|
||||
),
|
||||
)
|
||||
|
||||
ANALYST_SPEC = NodeSpec(
|
||||
id="analyst",
|
||||
name="Analyst",
|
||||
description="Analyzes research findings and provides insights",
|
||||
node_type="event_loop",
|
||||
input_keys=["context"],
|
||||
output_keys=["analysis"],
|
||||
system_prompt=(
|
||||
"You are a strategic analyst. You receive research findings from "
|
||||
"a previous researcher and must:\n\n"
|
||||
"1. Identify key themes and patterns\n"
|
||||
"2. Assess the reliability and significance of the findings\n"
|
||||
"3. Provide actionable insights and recommendations\n"
|
||||
"4. Use set_output with key='analysis' to save your analysis\n\n"
|
||||
"Be concise but insightful. Focus on what matters most."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# HTML page
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
HTML_PAGE = ( # noqa: E501
|
||||
"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>ContextHandoff Demo</title>
|
||||
<style>
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
body {
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
background: #0d1117;
|
||||
color: #c9d1d9;
|
||||
height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
header {
|
||||
background: #161b22;
|
||||
padding: 12px 20px;
|
||||
border-bottom: 1px solid #30363d;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 16px;
|
||||
}
|
||||
header h1 {
|
||||
font-size: 16px;
|
||||
color: #58a6ff;
|
||||
font-weight: 600;
|
||||
}
|
||||
.badge {
|
||||
font-size: 12px;
|
||||
padding: 3px 10px;
|
||||
border-radius: 12px;
|
||||
background: #21262d;
|
||||
color: #8b949e;
|
||||
}
|
||||
.badge.researcher {
|
||||
background: #1a3a5c;
|
||||
color: #58a6ff;
|
||||
}
|
||||
.badge.analyst {
|
||||
background: #1a4b2e;
|
||||
color: #3fb950;
|
||||
}
|
||||
.badge.handoff {
|
||||
background: #3d1f00;
|
||||
color: #d29922;
|
||||
}
|
||||
.badge.done {
|
||||
background: #21262d;
|
||||
color: #8b949e;
|
||||
}
|
||||
.badge.error {
|
||||
background: #4b1a1a;
|
||||
color: #f85149;
|
||||
}
|
||||
.chat {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 16px;
|
||||
}
|
||||
.msg {
|
||||
margin: 8px 0;
|
||||
padding: 10px 14px;
|
||||
border-radius: 8px;
|
||||
line-height: 1.6;
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
}
|
||||
.msg.user {
|
||||
background: #1a3a5c;
|
||||
color: #58a6ff;
|
||||
}
|
||||
.msg.assistant {
|
||||
background: #161b22;
|
||||
color: #c9d1d9;
|
||||
}
|
||||
.msg.assistant.analyst-msg {
|
||||
border-left: 3px solid #3fb950;
|
||||
}
|
||||
.msg.event {
|
||||
background: transparent;
|
||||
color: #8b949e;
|
||||
font-size: 11px;
|
||||
padding: 4px 14px;
|
||||
border-left: 3px solid #30363d;
|
||||
}
|
||||
.msg.event.loop {
|
||||
border-left-color: #58a6ff;
|
||||
}
|
||||
.msg.event.tool {
|
||||
border-left-color: #d29922;
|
||||
}
|
||||
.msg.event.stall {
|
||||
border-left-color: #f85149;
|
||||
}
|
||||
.handoff-banner {
|
||||
margin: 16px 0;
|
||||
padding: 16px;
|
||||
background: #1c1200;
|
||||
border: 1px solid #d29922;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.handoff-banner h3 {
|
||||
color: #d29922;
|
||||
font-size: 14px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.handoff-banner p, .result-banner p {
|
||||
color: #8b949e;
|
||||
font-size: 12px;
|
||||
line-height: 1.5;
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
white-space: pre-wrap;
|
||||
text-align: left;
|
||||
}
|
||||
.result-banner {
|
||||
margin: 16px 0;
|
||||
padding: 16px;
|
||||
background: #0a2614;
|
||||
border: 1px solid #3fb950;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.result-banner h3 {
|
||||
color: #3fb950;
|
||||
font-size: 14px;
|
||||
margin-bottom: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.result-banner .label {
|
||||
color: #58a6ff;
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
margin-top: 10px;
|
||||
margin-bottom: 2px;
|
||||
}
|
||||
.result-banner .tokens {
|
||||
color: #484f58;
|
||||
font-size: 11px;
|
||||
text-align: center;
|
||||
margin-top: 10px;
|
||||
}
|
||||
.input-bar {
|
||||
padding: 12px 16px;
|
||||
background: #161b22;
|
||||
border-top: 1px solid #30363d;
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
.input-bar input {
|
||||
flex: 1;
|
||||
background: #0d1117;
|
||||
border: 1px solid #30363d;
|
||||
color: #c9d1d9;
|
||||
padding: 8px 12px;
|
||||
border-radius: 6px;
|
||||
font-family: inherit;
|
||||
font-size: 14px;
|
||||
outline: none;
|
||||
}
|
||||
.input-bar input:focus {
|
||||
border-color: #58a6ff;
|
||||
}
|
||||
.input-bar button {
|
||||
background: #238636;
|
||||
color: #fff;
|
||||
border: none;
|
||||
padding: 8px 20px;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-family: inherit;
|
||||
font-weight: 600;
|
||||
}
|
||||
.input-bar button:hover {
|
||||
background: #2ea043;
|
||||
}
|
||||
.input-bar button:disabled {
|
||||
background: #21262d;
|
||||
color: #484f58;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>ContextHandoff Demo</h1>
|
||||
<span id="phase" class="badge">Idle</span>
|
||||
<span id="iter" class="badge" style="display:none">Step 0</span>
|
||||
</header>
|
||||
<div id="chat" class="chat"></div>
|
||||
<div class="input-bar">
|
||||
<input id="input" type="text"
|
||||
placeholder="Enter a research topic..." autofocus />
|
||||
<button id="go" onclick="run()">Research</button>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let ws = null;
|
||||
let currentAssistantEl = null;
|
||||
let iterCount = 0;
|
||||
let currentPhase = 'idle';
|
||||
const chat = document.getElementById('chat');
|
||||
const phase = document.getElementById('phase');
|
||||
const iterEl = document.getElementById('iter');
|
||||
const goBtn = document.getElementById('go');
|
||||
const inputEl = document.getElementById('input');
|
||||
|
||||
inputEl.addEventListener('keydown', e => {
|
||||
if (e.key === 'Enter') run();
|
||||
});
|
||||
|
||||
function setPhase(text, cls) {
|
||||
phase.textContent = text;
|
||||
phase.className = 'badge ' + cls;
|
||||
currentPhase = cls;
|
||||
}
|
||||
|
||||
function addMsg(text, cls) {
|
||||
const el = document.createElement('div');
|
||||
el.className = 'msg ' + cls;
|
||||
el.textContent = text;
|
||||
chat.appendChild(el);
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
return el;
|
||||
}
|
||||
|
||||
function addHandoffBanner(summary) {
|
||||
const banner = document.createElement('div');
|
||||
banner.className = 'handoff-banner';
|
||||
const h3 = document.createElement('h3');
|
||||
h3.textContent = 'Context Handoff: Researcher -> Analyst';
|
||||
const p = document.createElement('p');
|
||||
p.textContent = summary || 'Passing research context...';
|
||||
banner.appendChild(h3);
|
||||
banner.appendChild(p);
|
||||
chat.appendChild(banner);
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
}
|
||||
|
||||
function addResultBanner(researcher, analyst, tokens) {
|
||||
const banner = document.createElement('div');
|
||||
banner.className = 'result-banner';
|
||||
const h3 = document.createElement('h3');
|
||||
h3.textContent = 'Pipeline Complete';
|
||||
banner.appendChild(h3);
|
||||
|
||||
if (researcher && researcher.research_summary) {
|
||||
const lbl = document.createElement('div');
|
||||
lbl.className = 'label';
|
||||
lbl.textContent = 'RESEARCH SUMMARY';
|
||||
banner.appendChild(lbl);
|
||||
const p = document.createElement('p');
|
||||
p.textContent = researcher.research_summary;
|
||||
banner.appendChild(p);
|
||||
}
|
||||
|
||||
if (analyst && analyst.analysis) {
|
||||
const lbl = document.createElement('div');
|
||||
lbl.className = 'label';
|
||||
lbl.textContent = 'ANALYSIS';
|
||||
lbl.style.color = '#3fb950';
|
||||
banner.appendChild(lbl);
|
||||
const p = document.createElement('p');
|
||||
p.textContent = analyst.analysis;
|
||||
banner.appendChild(p);
|
||||
}
|
||||
|
||||
if (tokens) {
|
||||
const t = document.createElement('div');
|
||||
t.className = 'tokens';
|
||||
t.textContent = 'Total tokens: ' + tokens.toLocaleString();
|
||||
banner.appendChild(t);
|
||||
}
|
||||
|
||||
chat.appendChild(banner);
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
}
|
||||
|
||||
function connect() {
|
||||
ws = new WebSocket('ws://' + location.host + '/ws');
|
||||
ws.onopen = () => {
|
||||
setPhase('Ready', 'done');
|
||||
goBtn.disabled = false;
|
||||
};
|
||||
ws.onmessage = handleEvent;
|
||||
ws.onerror = () => { setPhase('Error', 'error'); };
|
||||
ws.onclose = () => {
|
||||
setPhase('Reconnecting...', '');
|
||||
goBtn.disabled = true;
|
||||
setTimeout(connect, 2000);
|
||||
};
|
||||
}
|
||||
|
||||
function handleEvent(msg) {
|
||||
const evt = JSON.parse(msg.data);
|
||||
|
||||
if (evt.type === 'phase') {
|
||||
if (evt.phase === 'researcher') {
|
||||
setPhase('Researcher', 'researcher');
|
||||
} else if (evt.phase === 'handoff') {
|
||||
setPhase('Handoff', 'handoff');
|
||||
} else if (evt.phase === 'analyst') {
|
||||
setPhase('Analyst', 'analyst');
|
||||
}
|
||||
iterCount = 0;
|
||||
iterEl.style.display = 'none';
|
||||
}
|
||||
else if (evt.type === 'llm_text_delta') {
|
||||
if (currentAssistantEl) {
|
||||
currentAssistantEl.textContent += evt.content;
|
||||
chat.scrollTop = chat.scrollHeight;
|
||||
}
|
||||
}
|
||||
else if (evt.type === 'node_loop_iteration') {
|
||||
iterCount = evt.iteration || (iterCount + 1);
|
||||
iterEl.textContent = 'Step ' + iterCount;
|
||||
iterEl.style.display = '';
|
||||
}
|
||||
else if (evt.type === 'tool_call_started') {
|
||||
var info = evt.tool_name + '('
|
||||
+ JSON.stringify(evt.tool_input).slice(0, 120) + ')';
|
||||
addMsg('TOOL ' + info, 'event tool');
|
||||
}
|
||||
else if (evt.type === 'tool_call_completed') {
|
||||
var preview = (evt.result || '').slice(0, 200);
|
||||
var cls = evt.is_error ? 'stall' : 'tool';
|
||||
addMsg(
|
||||
'RESULT ' + evt.tool_name + ': ' + preview,
|
||||
'event ' + cls
|
||||
);
|
||||
var assistCls = currentPhase === 'analyst'
|
||||
? 'assistant analyst-msg' : 'assistant';
|
||||
currentAssistantEl = addMsg('', assistCls);
|
||||
}
|
||||
else if (evt.type === 'handoff_context') {
|
||||
addHandoffBanner(evt.summary);
|
||||
var assistCls = 'assistant analyst-msg';
|
||||
currentAssistantEl = addMsg('', assistCls);
|
||||
}
|
||||
else if (evt.type === 'node_result') {
|
||||
if (evt.node_id === 'researcher') {
|
||||
if (currentAssistantEl
|
||||
&& !currentAssistantEl.textContent) {
|
||||
currentAssistantEl.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (evt.type === 'done') {
|
||||
setPhase('Done', 'done');
|
||||
iterEl.style.display = 'none';
|
||||
if (currentAssistantEl
|
||||
&& !currentAssistantEl.textContent) {
|
||||
currentAssistantEl.remove();
|
||||
}
|
||||
currentAssistantEl = null;
|
||||
addResultBanner(
|
||||
evt.researcher, evt.analyst, evt.total_tokens
|
||||
);
|
||||
goBtn.disabled = false;
|
||||
inputEl.placeholder = 'Enter another topic...';
|
||||
}
|
||||
else if (evt.type === 'error') {
|
||||
setPhase('Error', 'error');
|
||||
addMsg('ERROR ' + evt.message, 'event stall');
|
||||
goBtn.disabled = false;
|
||||
}
|
||||
else if (evt.type === 'node_stalled') {
|
||||
addMsg('STALLED ' + evt.reason, 'event stall');
|
||||
}
|
||||
}
|
||||
|
||||
function run() {
|
||||
const text = inputEl.value.trim();
|
||||
if (!text || !ws || ws.readyState !== 1) return;
|
||||
chat.innerHTML = '';
|
||||
addMsg(text, 'user');
|
||||
currentAssistantEl = addMsg('', 'assistant');
|
||||
inputEl.value = '';
|
||||
goBtn.disabled = true;
|
||||
ws.send(JSON.stringify({ topic: text }));
|
||||
}
|
||||
|
||||
connect();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# WebSocket handler — sequential Node A → Handoff → Node B
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def handle_ws(websocket):
|
||||
"""Run the two-node handoff pipeline per user message."""
|
||||
try:
|
||||
async for raw in websocket:
|
||||
try:
|
||||
msg = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
topic = msg.get("topic", "")
|
||||
if not topic:
|
||||
continue
|
||||
|
||||
logger.info(f"Starting handoff pipeline for: {topic}")
|
||||
|
||||
try:
|
||||
await _run_pipeline(websocket, topic)
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
logger.info("WebSocket closed during pipeline")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.exception("Pipeline error")
|
||||
try:
|
||||
await websocket.send(json.dumps({"type": "error", "message": str(e)}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
|
||||
|
||||
async def _run_pipeline(websocket, topic: str):
|
||||
"""Execute: Node A (research) → ContextHandoff → Node B (analysis)."""
|
||||
import shutil
|
||||
|
||||
# Fresh stores for each run
|
||||
run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR))
|
||||
store_a = FileConversationStore(run_dir / "node_a")
|
||||
store_b = FileConversationStore(run_dir / "node_b")
|
||||
|
||||
# Shared event bus
|
||||
bus = EventBus()
|
||||
|
||||
async def forward_event(event):
|
||||
try:
|
||||
payload = {"type": event.type.value, **event.data}
|
||||
if event.node_id:
|
||||
payload["node_id"] = event.node_id
|
||||
await websocket.send(json.dumps(payload))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
bus.subscribe(
|
||||
event_types=[
|
||||
EventType.NODE_LOOP_STARTED,
|
||||
EventType.NODE_LOOP_ITERATION,
|
||||
EventType.NODE_LOOP_COMPLETED,
|
||||
EventType.LLM_TEXT_DELTA,
|
||||
EventType.TOOL_CALL_STARTED,
|
||||
EventType.TOOL_CALL_COMPLETED,
|
||||
EventType.NODE_STALLED,
|
||||
],
|
||||
handler=forward_event,
|
||||
)
|
||||
|
||||
tools = list(TOOL_REGISTRY.get_tools().values())
|
||||
tool_executor = TOOL_REGISTRY.get_executor()
|
||||
|
||||
# ---- Phase 1: Researcher ------------------------------------------------
|
||||
await websocket.send(json.dumps({"type": "phase", "phase": "researcher"}))
|
||||
|
||||
node_a = EventLoopNode(
|
||||
event_bus=bus,
|
||||
judge=None, # implicit judge: accept when output_keys filled
|
||||
config=LoopConfig(
|
||||
max_iterations=20,
|
||||
max_tool_calls_per_turn=10,
|
||||
max_history_tokens=32_000,
|
||||
),
|
||||
conversation_store=store_a,
|
||||
tool_executor=tool_executor,
|
||||
)
|
||||
|
||||
ctx_a = NodeContext(
|
||||
runtime=RUNTIME,
|
||||
node_id="researcher",
|
||||
node_spec=RESEARCHER_SPEC,
|
||||
memory=SharedMemory(),
|
||||
input_data={"topic": topic},
|
||||
llm=LLM,
|
||||
available_tools=tools,
|
||||
)
|
||||
|
||||
result_a = await node_a.execute(ctx_a)
|
||||
logger.info(
|
||||
"Researcher done: success=%s, tokens=%s",
|
||||
result_a.success,
|
||||
result_a.tokens_used,
|
||||
)
|
||||
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "node_result",
|
||||
"node_id": "researcher",
|
||||
"success": result_a.success,
|
||||
"output": result_a.output,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if not result_a.success:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"message": f"Researcher failed: {result_a.error}",
|
||||
}
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
# ---- Phase 2: Context Handoff -------------------------------------------
|
||||
await websocket.send(json.dumps({"type": "phase", "phase": "handoff"}))
|
||||
|
||||
# Restore the researcher's conversation from store
|
||||
conversation_a = await NodeConversation.restore(store_a)
|
||||
if conversation_a is None:
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Failed to restore researcher conversation",
|
||||
}
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
handoff_engine = ContextHandoff(llm=LLM)
|
||||
handoff_context = handoff_engine.summarize_conversation(
|
||||
conversation=conversation_a,
|
||||
node_id="researcher",
|
||||
output_keys=["research_summary"],
|
||||
)
|
||||
|
||||
formatted_handoff = ContextHandoff.format_as_input(handoff_context)
|
||||
logger.info(
|
||||
"Handoff: %d turns, ~%d tokens, keys=%s",
|
||||
handoff_context.turn_count,
|
||||
handoff_context.total_tokens_used,
|
||||
list(handoff_context.key_outputs.keys()),
|
||||
)
|
||||
|
||||
# Send handoff context to browser
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "handoff_context",
|
||||
"summary": handoff_context.summary[:500],
|
||||
"turn_count": handoff_context.turn_count,
|
||||
"tokens": handoff_context.total_tokens_used,
|
||||
"key_outputs": handoff_context.key_outputs,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# ---- Phase 3: Analyst ---------------------------------------------------
|
||||
await websocket.send(json.dumps({"type": "phase", "phase": "analyst"}))
|
||||
|
||||
node_b = EventLoopNode(
|
||||
event_bus=bus,
|
||||
judge=None, # implicit judge
|
||||
config=LoopConfig(
|
||||
max_iterations=10,
|
||||
max_tool_calls_per_turn=5,
|
||||
max_history_tokens=32_000,
|
||||
),
|
||||
conversation_store=store_b,
|
||||
)
|
||||
|
||||
ctx_b = NodeContext(
|
||||
runtime=RUNTIME,
|
||||
node_id="analyst",
|
||||
node_spec=ANALYST_SPEC,
|
||||
memory=SharedMemory(),
|
||||
input_data={"context": formatted_handoff},
|
||||
llm=LLM,
|
||||
available_tools=[],
|
||||
)
|
||||
|
||||
result_b = await node_b.execute(ctx_b)
|
||||
logger.info(
|
||||
"Analyst done: success=%s, tokens=%s",
|
||||
result_b.success,
|
||||
result_b.tokens_used,
|
||||
)
|
||||
|
||||
# ---- Done ---------------------------------------------------------------
|
||||
await websocket.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "done",
|
||||
"researcher": result_a.output,
|
||||
"analyst": result_b.output,
|
||||
"total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# Clean up temp stores
|
||||
try:
|
||||
shutil.rmtree(run_dir)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# HTTP handler
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def process_request(connection, request: Request):
|
||||
"""Serve HTML on GET /, upgrade to WebSocket on /ws."""
|
||||
if request.path == "/ws":
|
||||
return None
|
||||
return Response(
|
||||
HTTPStatus.OK,
|
||||
"OK",
|
||||
websockets.Headers({"Content-Type": "text/html; charset=utf-8"}),
|
||||
HTML_PAGE.encode(),
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Main
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def main():
|
||||
port = 8766
|
||||
async with websockets.serve(
|
||||
handle_ws,
|
||||
"0.0.0.0",
|
||||
port,
|
||||
process_request=process_request,
|
||||
):
|
||||
logger.info(f"Handoff demo at http://localhost:{port}")
|
||||
logger.info("Enter a research topic to start the pipeline.")
|
||||
await asyncio.Future()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@ for understanding the core runtime loop:
|
||||
Setup -> Graph definition -> Execution -> Result
|
||||
|
||||
Run with:
|
||||
PYTHONPATH=core python core/examples/manual_agent.py
|
||||
uv run python core/examples/manual_agent.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -15,7 +15,7 @@ You cannot skip steps or bypass validation.
|
||||
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -26,7 +26,7 @@ from framework.graph.goal import Goal
|
||||
from framework.graph.node import NodeSpec
|
||||
|
||||
|
||||
class BuildPhase(str, Enum):
|
||||
class BuildPhase(StrEnum):
|
||||
"""Current phase of the build process."""
|
||||
|
||||
INIT = "init" # Just started
|
||||
|
||||
@@ -64,6 +64,8 @@ class AdenCachedStorage(CredentialStorage):
|
||||
- **Reads**: Try local cache first, fallback to Aden if stale/missing
|
||||
- **Writes**: Always write to local cache
|
||||
- **Offline resilience**: Uses cached credentials when Aden is unreachable
|
||||
- **Provider-based lookup**: Match credentials by provider name (e.g., "hubspot")
|
||||
when direct ID lookup fails, since Aden uses hash-based IDs internally.
|
||||
|
||||
The cache TTL determines how long to trust local credentials before
|
||||
checking with the Aden server for updates. This balances:
|
||||
@@ -85,6 +87,7 @@ class AdenCachedStorage(CredentialStorage):
|
||||
|
||||
# First access fetches from Aden
|
||||
# Subsequent accesses use cache until TTL expires
|
||||
# Can look up by provider name OR credential ID
|
||||
token = store.get_key("hubspot", "access_token")
|
||||
"""
|
||||
|
||||
@@ -111,21 +114,24 @@ class AdenCachedStorage(CredentialStorage):
|
||||
self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
|
||||
self._prefer_local = prefer_local
|
||||
self._cache_timestamps: dict[str, datetime] = {}
|
||||
# Index: provider name (e.g., "hubspot") -> credential hash ID
|
||||
self._provider_index: dict[str, str] = {}
|
||||
|
||||
def save(self, credential: CredentialObject) -> None:
|
||||
"""
|
||||
Save credential to local cache.
|
||||
Save credential to local cache and update provider index.
|
||||
|
||||
Args:
|
||||
credential: The credential to save.
|
||||
"""
|
||||
self._local.save(credential)
|
||||
self._cache_timestamps[credential.id] = datetime.now(UTC)
|
||||
self._index_provider(credential)
|
||||
logger.debug(f"Cached credential '{credential.id}'")
|
||||
|
||||
def load(self, credential_id: str) -> CredentialObject | None:
|
||||
"""
|
||||
Load credential from cache, with Aden fallback.
|
||||
Load credential from cache, with Aden fallback and provider-based lookup.
|
||||
|
||||
The loading strategy depends on the `prefer_local` setting:
|
||||
|
||||
@@ -141,8 +147,37 @@ class AdenCachedStorage(CredentialStorage):
|
||||
2. Update local cache with response
|
||||
3. Fall back to local cache only if Aden fails
|
||||
|
||||
Provider-based lookup:
|
||||
When a provider index mapping exists for the credential_id (e.g.,
|
||||
"hubspot" → hash ID), the Aden-synced credential is loaded first.
|
||||
This ensures fresh OAuth tokens from Aden take priority over stale
|
||||
local credentials (env vars, old encrypted files).
|
||||
|
||||
Args:
|
||||
credential_id: The credential identifier.
|
||||
credential_id: The credential identifier or provider name.
|
||||
|
||||
Returns:
|
||||
CredentialObject if found, None otherwise.
|
||||
"""
|
||||
# Check provider index first — Aden-synced credentials take priority
|
||||
resolved_id = self._provider_index.get(credential_id)
|
||||
if resolved_id and resolved_id != credential_id:
|
||||
result = self._load_by_id(resolved_id)
|
||||
if result is not None:
|
||||
logger.info(
|
||||
f"Loaded credential '{credential_id}' via provider index (id='{resolved_id}')"
|
||||
)
|
||||
return result
|
||||
|
||||
# Direct lookup (exact credential_id match)
|
||||
return self._load_by_id(credential_id)
|
||||
|
||||
def _load_by_id(self, credential_id: str) -> CredentialObject | None:
|
||||
"""
|
||||
Load credential by exact ID from cache, with Aden fallback.
|
||||
|
||||
Args:
|
||||
credential_id: The exact credential identifier.
|
||||
|
||||
Returns:
|
||||
CredentialObject if found, None otherwise.
|
||||
@@ -200,15 +235,21 @@ class AdenCachedStorage(CredentialStorage):
|
||||
|
||||
def exists(self, credential_id: str) -> bool:
|
||||
"""
|
||||
Check if credential exists in local cache.
|
||||
Check if credential exists in local cache (by ID or provider name).
|
||||
|
||||
Args:
|
||||
credential_id: The credential identifier.
|
||||
credential_id: The credential identifier or provider name.
|
||||
|
||||
Returns:
|
||||
True if credential exists locally.
|
||||
"""
|
||||
return self._local.exists(credential_id)
|
||||
if self._local.exists(credential_id):
|
||||
return True
|
||||
# Check provider index
|
||||
resolved_id = self._provider_index.get(credential_id)
|
||||
if resolved_id and resolved_id != credential_id:
|
||||
return self._local.exists(resolved_id)
|
||||
return False
|
||||
|
||||
def _is_cache_fresh(self, credential_id: str) -> bool:
|
||||
"""
|
||||
@@ -242,6 +283,47 @@ class AdenCachedStorage(CredentialStorage):
|
||||
self._cache_timestamps.clear()
|
||||
logger.debug("Invalidated all cache entries")
|
||||
|
||||
def _index_provider(self, credential: CredentialObject) -> None:
|
||||
"""
|
||||
Index a credential by its provider/integration type.
|
||||
|
||||
Aden credentials carry an ``_integration_type`` key whose value is
|
||||
the provider name (e.g., ``hubspot``). This method maps that
|
||||
provider name to the credential's hash ID so that subsequent
|
||||
``load("hubspot")`` calls resolve to the correct credential.
|
||||
|
||||
Args:
|
||||
credential: The credential to index.
|
||||
"""
|
||||
integration_type_key = credential.keys.get("_integration_type")
|
||||
if integration_type_key is None:
|
||||
return
|
||||
provider_name = integration_type_key.value.get_secret_value()
|
||||
if provider_name:
|
||||
self._provider_index[provider_name] = credential.id
|
||||
logger.debug(f"Indexed provider '{provider_name}' -> '{credential.id}'")
|
||||
|
||||
def rebuild_provider_index(self) -> int:
|
||||
"""
|
||||
Rebuild the provider index from all locally cached credentials.
|
||||
|
||||
Useful after loading from disk when the in-memory index is empty.
|
||||
|
||||
Returns:
|
||||
Number of provider mappings indexed.
|
||||
"""
|
||||
self._provider_index.clear()
|
||||
indexed = 0
|
||||
for cred_id in self._local.list_all():
|
||||
cred = self._local.load(cred_id)
|
||||
if cred:
|
||||
before = len(self._provider_index)
|
||||
self._index_provider(cred)
|
||||
if len(self._provider_index) > before:
|
||||
indexed += 1
|
||||
logger.debug(f"Rebuilt provider index with {indexed} mappings")
|
||||
return indexed
|
||||
|
||||
def sync_all_from_aden(self) -> int:
|
||||
"""
|
||||
Sync all credentials from Aden server to local cache.
|
||||
|
||||
@@ -589,6 +589,149 @@ class TestAdenCachedStorage:
|
||||
assert info["stale"]["is_fresh"] is False
|
||||
assert info["stale"]["ttl_remaining_seconds"] == 0
|
||||
|
||||
def test_save_indexes_provider(self, cached_storage):
|
||||
"""Test save builds the provider index from _integration_type key."""
|
||||
cred = CredentialObject(
|
||||
id="aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1",
|
||||
credential_type=CredentialType.OAUTH2,
|
||||
keys={
|
||||
"access_token": CredentialKey(
|
||||
name="access_token",
|
||||
value=SecretStr("token-value"),
|
||||
),
|
||||
"_integration_type": CredentialKey(
|
||||
name="_integration_type",
|
||||
value=SecretStr("hubspot"),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
cached_storage.save(cred)
|
||||
|
||||
assert cached_storage._provider_index["hubspot"] == "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
|
||||
|
||||
def test_load_by_provider_name(self, cached_storage):
|
||||
"""Test load resolves provider name to hash-based credential ID."""
|
||||
hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
|
||||
cred = CredentialObject(
|
||||
id=hash_id,
|
||||
credential_type=CredentialType.OAUTH2,
|
||||
keys={
|
||||
"access_token": CredentialKey(
|
||||
name="access_token",
|
||||
value=SecretStr("hubspot-token"),
|
||||
),
|
||||
"_integration_type": CredentialKey(
|
||||
name="_integration_type",
|
||||
value=SecretStr("hubspot"),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Save builds the index
|
||||
cached_storage.save(cred)
|
||||
|
||||
# Load by provider name should resolve to the hash ID
|
||||
loaded = cached_storage.load("hubspot")
|
||||
|
||||
assert loaded is not None
|
||||
assert loaded.id == hash_id
|
||||
assert loaded.keys["access_token"].value.get_secret_value() == "hubspot-token"
|
||||
|
||||
def test_load_by_direct_id_still_works(self, cached_storage):
|
||||
"""Test load by direct hash ID still works as before."""
|
||||
hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
|
||||
cred = CredentialObject(
|
||||
id=hash_id,
|
||||
credential_type=CredentialType.OAUTH2,
|
||||
keys={
|
||||
"access_token": CredentialKey(
|
||||
name="access_token",
|
||||
value=SecretStr("token"),
|
||||
),
|
||||
"_integration_type": CredentialKey(
|
||||
name="_integration_type",
|
||||
value=SecretStr("hubspot"),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
cached_storage.save(cred)
|
||||
|
||||
# Direct ID lookup should still work
|
||||
loaded = cached_storage.load(hash_id)
|
||||
|
||||
assert loaded is not None
|
||||
assert loaded.id == hash_id
|
||||
|
||||
def test_exists_by_provider_name(self, cached_storage):
|
||||
"""Test exists resolves provider name to hash-based credential ID."""
|
||||
hash_id = "c2xhY2s6dGVzdDo5OTk="
|
||||
cred = CredentialObject(
|
||||
id=hash_id,
|
||||
credential_type=CredentialType.OAUTH2,
|
||||
keys={
|
||||
"access_token": CredentialKey(
|
||||
name="access_token",
|
||||
value=SecretStr("slack-token"),
|
||||
),
|
||||
"_integration_type": CredentialKey(
|
||||
name="_integration_type",
|
||||
value=SecretStr("slack"),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
cached_storage.save(cred)
|
||||
|
||||
assert cached_storage.exists("slack") is True
|
||||
assert cached_storage.exists(hash_id) is True
|
||||
assert cached_storage.exists("nonexistent") is False
|
||||
|
||||
def test_rebuild_provider_index(self, cached_storage, local_storage):
|
||||
"""Test rebuild_provider_index reconstructs from local storage."""
|
||||
# Manually save credentials to local storage (bypassing cached_storage.save)
|
||||
for provider_name, hash_id in [("hubspot", "hash_hub"), ("slack", "hash_slack")]:
|
||||
cred = CredentialObject(
|
||||
id=hash_id,
|
||||
credential_type=CredentialType.OAUTH2,
|
||||
keys={
|
||||
"_integration_type": CredentialKey(
|
||||
name="_integration_type",
|
||||
value=SecretStr(provider_name),
|
||||
),
|
||||
},
|
||||
)
|
||||
local_storage.save(cred)
|
||||
|
||||
# Index should be empty (we bypassed save)
|
||||
assert len(cached_storage._provider_index) == 0
|
||||
|
||||
# Rebuild
|
||||
indexed = cached_storage.rebuild_provider_index()
|
||||
|
||||
assert indexed == 2
|
||||
assert cached_storage._provider_index["hubspot"] == "hash_hub"
|
||||
assert cached_storage._provider_index["slack"] == "hash_slack"
|
||||
|
||||
def test_save_without_integration_type_no_index(self, cached_storage):
|
||||
"""Test save does not index credentials without _integration_type key."""
|
||||
cred = CredentialObject(
|
||||
id="plain-cred",
|
||||
credential_type=CredentialType.API_KEY,
|
||||
keys={
|
||||
"api_key": CredentialKey(
|
||||
name="api_key",
|
||||
value=SecretStr("key-value"),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
cached_storage.save(cred)
|
||||
|
||||
assert "plain-cred" not in cached_storage._provider_index
|
||||
assert len(cached_storage._provider_index) == 0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests
|
||||
|
||||
@@ -8,7 +8,7 @@ containing one or more keys (e.g., api_key, access_token, refresh_token).
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
@@ -19,7 +19,7 @@ def _utc_now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
class CredentialType(str, Enum):
|
||||
class CredentialType(StrEnum):
|
||||
"""Types of credentials the store can manage."""
|
||||
|
||||
API_KEY = "api_key"
|
||||
|
||||
@@ -96,7 +96,7 @@ class BaseOAuth2Provider(CredentialProvider):
|
||||
self._client = httpx.Client(timeout=self.config.request_timeout)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OAuth2 provider requires 'httpx'. Install with: pip install httpx"
|
||||
"OAuth2 provider requires 'httpx'. Install with: uv pip install httpx"
|
||||
) from e
|
||||
return self._client
|
||||
|
||||
|
||||
@@ -11,11 +11,11 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class TokenPlacement(str, Enum):
|
||||
class TokenPlacement(StrEnum):
|
||||
"""Where to place the access token in HTTP requests."""
|
||||
|
||||
HEADER_BEARER = "header_bearer"
|
||||
|
||||
@@ -136,7 +136,8 @@ class EncryptedFileStorage(CredentialStorage):
|
||||
from cryptography.fernet import Fernet
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Encrypted storage requires 'cryptography'. Install with: pip install cryptography"
|
||||
"Encrypted storage requires 'cryptography'. "
|
||||
"Install with: uv pip install cryptography"
|
||||
) from e
|
||||
|
||||
self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
HashiCorp Vault storage adapter.
|
||||
|
||||
Provides integration with HashiCorp Vault for enterprise secret management.
|
||||
Requires the 'hvac' package: pip install hvac
|
||||
Requires the 'hvac' package: uv pip install hvac
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -66,7 +66,7 @@ class HashiCorpVaultStorage(CredentialStorage):
|
||||
- AWS IAM auth method
|
||||
|
||||
Requirements:
|
||||
pip install hvac
|
||||
uv pip install hvac
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -97,7 +97,7 @@ class HashiCorpVaultStorage(CredentialStorage):
|
||||
import hvac
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"HashiCorp Vault support requires 'hvac'. Install with: pip install hvac"
|
||||
"HashiCorp Vault support requires 'hvac'. Install with: uv pip install hvac"
|
||||
) from e
|
||||
|
||||
self._url = url
|
||||
|
||||
@@ -1,8 +1,22 @@
|
||||
"""Graph structures: Goals, Nodes, Edges, and Flexible Execution."""
|
||||
|
||||
from framework.graph.client_io import (
|
||||
ActiveNodeClientIO,
|
||||
ClientIOGateway,
|
||||
InertNodeClientIO,
|
||||
NodeClientIO,
|
||||
)
|
||||
from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
|
||||
from framework.graph.context_handoff import ContextHandoff, HandoffContext
|
||||
from framework.graph.conversation import ConversationStore, Message, NodeConversation
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.event_loop_node import (
|
||||
EventLoopNode,
|
||||
JudgeProtocol,
|
||||
JudgeVerdict,
|
||||
LoopConfig,
|
||||
OutputAccumulator,
|
||||
)
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
|
||||
from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
|
||||
@@ -77,4 +91,18 @@ __all__ = [
|
||||
"NodeConversation",
|
||||
"ConversationStore",
|
||||
"Message",
|
||||
# Event Loop
|
||||
"EventLoopNode",
|
||||
"LoopConfig",
|
||||
"OutputAccumulator",
|
||||
"JudgeProtocol",
|
||||
"JudgeVerdict",
|
||||
# Context Handoff
|
||||
"ContextHandoff",
|
||||
"HandoffContext",
|
||||
# Client I/O
|
||||
"NodeClientIO",
|
||||
"ActiveNodeClientIO",
|
||||
"InertNodeClientIO",
|
||||
"ClientIOGateway",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Client I/O gateway for graph nodes.
|
||||
|
||||
Provides the bridge between node code and external clients:
|
||||
- ActiveNodeClientIO: for client_facing=True nodes (streams output, accepts input)
|
||||
- InertNodeClientIO: for client_facing=False nodes (logs internally, redirects input)
|
||||
- ClientIOGateway: factory that creates the right variant per node
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runtime.event_bus import EventBus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NodeClientIO(ABC):
|
||||
"""Abstract base for node client I/O."""
|
||||
|
||||
@abstractmethod
|
||||
async def emit_output(self, content: str, is_final: bool = False) -> None:
|
||||
"""Emit output content. If is_final=True, signal end of stream."""
|
||||
|
||||
@abstractmethod
|
||||
async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
|
||||
"""Request input. Behavior depends on whether the node is client-facing."""
|
||||
|
||||
|
||||
class ActiveNodeClientIO(NodeClientIO):
|
||||
"""
|
||||
Client I/O for client_facing=True nodes.
|
||||
|
||||
- emit_output() queues content and publishes CLIENT_OUTPUT_DELTA.
|
||||
- request_input() publishes CLIENT_INPUT_REQUESTED, then awaits provide_input().
|
||||
- output_stream() yields queued content until the final sentinel.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
node_id: str,
|
||||
event_bus: EventBus | None = None,
|
||||
) -> None:
|
||||
self.node_id = node_id
|
||||
self._event_bus = event_bus
|
||||
|
||||
self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
|
||||
self._output_snapshot = ""
|
||||
|
||||
self._input_event: asyncio.Event | None = None
|
||||
self._input_result: str | None = None
|
||||
|
||||
async def emit_output(self, content: str, is_final: bool = False) -> None:
|
||||
self._output_snapshot += content
|
||||
await self._output_queue.put(content)
|
||||
|
||||
if self._event_bus is not None:
|
||||
await self._event_bus.emit_client_output_delta(
|
||||
stream_id=self.node_id,
|
||||
node_id=self.node_id,
|
||||
content=content,
|
||||
snapshot=self._output_snapshot,
|
||||
)
|
||||
|
||||
if is_final:
|
||||
await self._output_queue.put(None)
|
||||
|
||||
async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
|
||||
if self._input_event is not None:
|
||||
raise RuntimeError("request_input already pending for this node")
|
||||
|
||||
self._input_event = asyncio.Event()
|
||||
self._input_result = None
|
||||
|
||||
if self._event_bus is not None:
|
||||
await self._event_bus.emit_client_input_requested(
|
||||
stream_id=self.node_id,
|
||||
node_id=self.node_id,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
try:
|
||||
if timeout is not None:
|
||||
await asyncio.wait_for(self._input_event.wait(), timeout=timeout)
|
||||
else:
|
||||
await self._input_event.wait()
|
||||
finally:
|
||||
self._input_event = None
|
||||
|
||||
if self._input_result is None:
|
||||
raise RuntimeError("input event was set but no input was provided")
|
||||
result = self._input_result
|
||||
self._input_result = None
|
||||
return result
|
||||
|
||||
async def provide_input(self, content: str) -> None:
|
||||
"""Called externally to fulfill a pending request_input()."""
|
||||
if self._input_event is None:
|
||||
raise RuntimeError("no pending request_input to fulfill")
|
||||
self._input_result = content
|
||||
self._input_event.set()
|
||||
|
||||
async def output_stream(self) -> AsyncIterator[str]:
|
||||
"""Async iterator that yields output chunks until the final sentinel."""
|
||||
while True:
|
||||
chunk = await self._output_queue.get()
|
||||
if chunk is None:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
|
||||
class InertNodeClientIO(NodeClientIO):
|
||||
"""
|
||||
Client I/O for client_facing=False nodes.
|
||||
|
||||
- emit_output() publishes NODE_INTERNAL_OUTPUT (content is not discarded).
|
||||
- request_input() publishes NODE_INPUT_BLOCKED and returns a redirect string.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
node_id: str,
|
||||
event_bus: EventBus | None = None,
|
||||
) -> None:
|
||||
self.node_id = node_id
|
||||
self._event_bus = event_bus
|
||||
|
||||
async def emit_output(self, content: str, is_final: bool = False) -> None:
|
||||
if self._event_bus is not None:
|
||||
await self._event_bus.emit_node_internal_output(
|
||||
stream_id=self.node_id,
|
||||
node_id=self.node_id,
|
||||
content=content,
|
||||
)
|
||||
|
||||
async def request_input(self, prompt: str = "", timeout: float | None = None) -> str:
|
||||
if self._event_bus is not None:
|
||||
await self._event_bus.emit_node_input_blocked(
|
||||
stream_id=self.node_id,
|
||||
node_id=self.node_id,
|
||||
prompt=prompt,
|
||||
)
|
||||
return (
|
||||
"You are an internal processing node. There is no user to interact with."
|
||||
" Work with the data provided in your inputs to complete your task."
|
||||
)
|
||||
|
||||
|
||||
class ClientIOGateway:
|
||||
"""Factory that creates the appropriate NodeClientIO for a node."""
|
||||
|
||||
def __init__(self, event_bus: EventBus | None = None) -> None:
|
||||
self._event_bus = event_bus
|
||||
|
||||
def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
|
||||
if client_facing:
|
||||
return ActiveNodeClientIO(
|
||||
node_id=node_id,
|
||||
event_bus=self._event_bus,
|
||||
)
|
||||
return InertNodeClientIO(
|
||||
node_id=node_id,
|
||||
event_bus=self._event_bus,
|
||||
)
|
||||
@@ -0,0 +1,191 @@
|
||||
"""Context handoff: summarize a completed NodeConversation for the next graph node."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from framework.graph.conversation import _try_extract_key
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.graph.conversation import NodeConversation
|
||||
from framework.llm.provider import LLMProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TRUNCATE_CHARS = 500
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class HandoffContext:
|
||||
"""Structured summary of a completed node conversation."""
|
||||
|
||||
source_node_id: str
|
||||
summary: str
|
||||
key_outputs: dict[str, Any]
|
||||
turn_count: int
|
||||
total_tokens_used: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ContextHandoff
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ContextHandoff:
|
||||
"""Summarize a completed NodeConversation into a HandoffContext.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
llm : LLMProvider | None
|
||||
Optional LLM provider for abstractive summarization.
|
||||
When *None*, all summarization uses the extractive fallback.
|
||||
"""
|
||||
|
||||
def __init__(self, llm: LLMProvider | None = None) -> None:
|
||||
self.llm = llm
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def summarize_conversation(
|
||||
self,
|
||||
conversation: NodeConversation,
|
||||
node_id: str,
|
||||
output_keys: list[str] | None = None,
|
||||
) -> HandoffContext:
|
||||
"""Produce a HandoffContext from *conversation*.
|
||||
|
||||
1. Extracts turn_count & total_tokens_used (sync properties).
|
||||
2. Extracts key_outputs by scanning assistant messages most-recent-first.
|
||||
3. Builds a summary via the LLM (if available) or extractive fallback.
|
||||
"""
|
||||
turn_count = conversation.turn_count
|
||||
total_tokens_used = conversation.estimate_tokens()
|
||||
messages = conversation.messages # defensive copy
|
||||
|
||||
# --- key outputs ---------------------------------------------------
|
||||
key_outputs: dict[str, Any] = {}
|
||||
if output_keys:
|
||||
remaining = set(output_keys)
|
||||
for msg in reversed(messages):
|
||||
if msg.role != "assistant" or not remaining:
|
||||
continue
|
||||
for key in list(remaining):
|
||||
value = _try_extract_key(msg.content, key)
|
||||
if value is not None:
|
||||
key_outputs[key] = value
|
||||
remaining.discard(key)
|
||||
|
||||
# --- summary -------------------------------------------------------
|
||||
if self.llm is not None:
|
||||
try:
|
||||
summary = self._llm_summary(messages, output_keys or [])
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"LLM summarization failed; falling back to extractive.",
|
||||
exc_info=True,
|
||||
)
|
||||
summary = self._extractive_summary(messages)
|
||||
else:
|
||||
summary = self._extractive_summary(messages)
|
||||
|
||||
return HandoffContext(
|
||||
source_node_id=node_id,
|
||||
summary=summary,
|
||||
key_outputs=key_outputs,
|
||||
turn_count=turn_count,
|
||||
total_tokens_used=total_tokens_used,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def format_as_input(handoff: HandoffContext) -> str:
|
||||
"""Render *handoff* as structured plain text for the next node's input."""
|
||||
header = (
|
||||
f"--- CONTEXT FROM: {handoff.source_node_id} "
|
||||
f"({handoff.turn_count} turns, ~{handoff.total_tokens_used} tokens) ---"
|
||||
)
|
||||
|
||||
sections: list[str] = [header, ""]
|
||||
|
||||
if handoff.key_outputs:
|
||||
sections.append("KEY OUTPUTS:")
|
||||
for k, v in handoff.key_outputs.items():
|
||||
sections.append(f"- {k}: {v}")
|
||||
sections.append("")
|
||||
|
||||
summary_text = handoff.summary or "No summary available."
|
||||
sections.append("SUMMARY:")
|
||||
sections.append(summary_text)
|
||||
sections.append("")
|
||||
sections.append("--- END CONTEXT ---")
|
||||
|
||||
return "\n".join(sections)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _extractive_summary(messages: list) -> str:
|
||||
"""Build a summary from key assistant messages without an LLM.
|
||||
|
||||
Strategy:
|
||||
- Include the first assistant message (initial assessment).
|
||||
- Include the last assistant message (final conclusion).
|
||||
- Truncate each to ~500 chars.
|
||||
"""
|
||||
if not messages:
|
||||
return "Empty conversation."
|
||||
|
||||
assistant_msgs = [m for m in messages if m.role == "assistant"]
|
||||
if not assistant_msgs:
|
||||
return "No assistant responses."
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
first = assistant_msgs[0].content
|
||||
parts.append(first[:_TRUNCATE_CHARS])
|
||||
|
||||
if len(assistant_msgs) > 1:
|
||||
last = assistant_msgs[-1].content
|
||||
parts.append(last[:_TRUNCATE_CHARS])
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def _llm_summary(self, messages: list, output_keys: list[str]) -> str:
|
||||
"""Produce a summary by calling the LLM provider."""
|
||||
if self.llm is None:
|
||||
raise ValueError("_llm_summary called without an LLM provider")
|
||||
|
||||
conversation_text = "\n".join(f"[{m.role}]: {m.content}" for m in messages)
|
||||
|
||||
key_hint = ""
|
||||
if output_keys:
|
||||
key_hint = (
|
||||
"\nThe following output keys are especially important: "
|
||||
+ ", ".join(output_keys)
|
||||
+ ".\n"
|
||||
)
|
||||
|
||||
system_prompt = (
|
||||
"You are a concise summarizer. Given the conversation below, "
|
||||
"produce a brief summary (at most ~500 tokens) that captures the "
|
||||
"key decisions, findings, and outcomes. Focus on what was concluded "
|
||||
"rather than the back-and-forth process." + key_hint
|
||||
)
|
||||
|
||||
response = self.llm.complete(
|
||||
messages=[{"role": "user", "content": conversation_text}],
|
||||
system=system_prompt,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
return response.content.strip()
|
||||
@@ -75,6 +75,16 @@ class Message:
|
||||
)
|
||||
|
||||
|
||||
def _extract_spillover_filename(content: str) -> str | None:
|
||||
"""Extract spillover filename from a truncated tool result.
|
||||
|
||||
Matches the pattern produced by EventLoopNode._truncate_tool_result():
|
||||
"saved to 'tool_github_list_stargazers_abc123.txt'"
|
||||
"""
|
||||
match = re.search(r"saved to '([^']+)'", content)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ConversationStore protocol (Phase 2)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -108,6 +118,50 @@ class ConversationStore(Protocol):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _try_extract_key(content: str, key: str) -> str | None:
|
||||
"""Try 4 strategies to extract a *key*'s value from message content.
|
||||
|
||||
Strategies (in order):
|
||||
1. Whole message is JSON — ``json.loads``, check for key.
|
||||
2. Embedded JSON via ``find_json_object`` helper.
|
||||
3. Colon format: ``key: value``.
|
||||
4. Equals format: ``key = value``.
|
||||
"""
|
||||
from framework.graph.node import find_json_object
|
||||
|
||||
# 1. Whole message is JSON
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict) and key in parsed:
|
||||
val = parsed[key]
|
||||
return json.dumps(val) if not isinstance(val, str) else val
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# 2. Embedded JSON via find_json_object
|
||||
json_str = find_json_object(content)
|
||||
if json_str:
|
||||
try:
|
||||
parsed = json.loads(json_str)
|
||||
if isinstance(parsed, dict) and key in parsed:
|
||||
val = parsed[key]
|
||||
return json.dumps(val) if not isinstance(val, str) else val
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# 3. Colon format: key: value
|
||||
match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
# 4. Equals format: key = value
|
||||
match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class NodeConversation:
|
||||
"""Message history for a graph node with optional write-through persistence.
|
||||
|
||||
@@ -133,6 +187,7 @@ class NodeConversation:
|
||||
self._messages: list[Message] = []
|
||||
self._next_seq: int = 0
|
||||
self._meta_persisted: bool = False
|
||||
self._last_api_input_tokens: int | None = None
|
||||
|
||||
# --- Properties --------------------------------------------------------
|
||||
|
||||
@@ -205,14 +260,78 @@ class NodeConversation:
|
||||
# --- Query -------------------------------------------------------------
|
||||
|
||||
def to_llm_messages(self) -> list[dict[str, Any]]:
|
||||
"""Return messages as OpenAI-format dicts (system prompt excluded)."""
|
||||
return [m.to_llm_dict() for m in self._messages]
|
||||
"""Return messages as OpenAI-format dicts (system prompt excluded).
|
||||
|
||||
Automatically repairs orphaned tool_use blocks (assistant messages
|
||||
with tool_calls that lack corresponding tool-result messages). This
|
||||
can happen when a loop is cancelled mid-tool-execution.
|
||||
"""
|
||||
msgs = [m.to_llm_dict() for m in self._messages]
|
||||
return self._repair_orphaned_tool_calls(msgs)
|
||||
|
||||
@staticmethod
|
||||
def _repair_orphaned_tool_calls(
|
||||
msgs: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Ensure every tool_call has a matching tool-result message."""
|
||||
repaired: list[dict[str, Any]] = []
|
||||
for i, m in enumerate(msgs):
|
||||
repaired.append(m)
|
||||
tool_calls = m.get("tool_calls")
|
||||
if m.get("role") != "assistant" or not tool_calls:
|
||||
continue
|
||||
# Collect IDs of tool results that follow this assistant message
|
||||
answered: set[str] = set()
|
||||
for j in range(i + 1, len(msgs)):
|
||||
if msgs[j].get("role") == "tool":
|
||||
tid = msgs[j].get("tool_call_id")
|
||||
if tid:
|
||||
answered.add(tid)
|
||||
else:
|
||||
break # stop at first non-tool message
|
||||
# Patch any missing results
|
||||
for tc in tool_calls:
|
||||
tc_id = tc.get("id")
|
||||
if tc_id and tc_id not in answered:
|
||||
repaired.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tc_id,
|
||||
"content": "ERROR: Tool execution was interrupted.",
|
||||
}
|
||||
)
|
||||
return repaired
|
||||
|
||||
def estimate_tokens(self) -> int:
|
||||
"""Rough token estimate: total characters / 4."""
|
||||
"""Best available token estimate.
|
||||
|
||||
Uses actual API input token count when available (set via
|
||||
:meth:`update_token_count`), otherwise falls back to the rough
|
||||
``total_chars / 4`` heuristic.
|
||||
"""
|
||||
if self._last_api_input_tokens is not None:
|
||||
return self._last_api_input_tokens
|
||||
total_chars = sum(len(m.content) for m in self._messages)
|
||||
return total_chars // 4
|
||||
|
||||
def update_token_count(self, actual_input_tokens: int) -> None:
|
||||
"""Store actual API input token count for more accurate compaction.
|
||||
|
||||
Called by EventLoopNode after each LLM call with the ``input_tokens``
|
||||
value from the API response. This value includes system prompt and
|
||||
tool definitions, so it may be higher than a message-only estimate.
|
||||
"""
|
||||
self._last_api_input_tokens = actual_input_tokens
|
||||
|
||||
def usage_ratio(self) -> float:
|
||||
"""Current token usage as a fraction of *max_history_tokens*.
|
||||
|
||||
Returns 0.0 when ``max_history_tokens`` is zero (unlimited).
|
||||
"""
|
||||
if self._max_history_tokens <= 0:
|
||||
return 0.0
|
||||
return self.estimate_tokens() / self._max_history_tokens
|
||||
|
||||
def needs_compaction(self) -> bool:
|
||||
return self.estimate_tokens() >= self._max_history_tokens * self._compaction_threshold
|
||||
|
||||
@@ -244,42 +363,89 @@ class NodeConversation:
|
||||
|
||||
def _try_extract_key(self, content: str, key: str) -> str | None:
|
||||
"""Try 4 strategies to extract a key's value from message content."""
|
||||
from framework.graph.node import find_json_object
|
||||
|
||||
# 1. Whole message is JSON
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict) and key in parsed:
|
||||
val = parsed[key]
|
||||
return json.dumps(val) if not isinstance(val, str) else val
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# 2. Embedded JSON via find_json_object
|
||||
json_str = find_json_object(content)
|
||||
if json_str:
|
||||
try:
|
||||
parsed = json.loads(json_str)
|
||||
if isinstance(parsed, dict) and key in parsed:
|
||||
val = parsed[key]
|
||||
return json.dumps(val) if not isinstance(val, str) else val
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# 3. Colon format: key: value
|
||||
match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
# 4. Equals format: key = value
|
||||
match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
return None
|
||||
return _try_extract_key(content, key)
|
||||
|
||||
# --- Lifecycle ---------------------------------------------------------
|
||||
|
||||
async def prune_old_tool_results(
|
||||
self,
|
||||
protect_tokens: int = 5000,
|
||||
min_prune_tokens: int = 2000,
|
||||
) -> int:
|
||||
"""Replace old tool result content with compact placeholders.
|
||||
|
||||
Walks backward through messages. Recent tool results (within
|
||||
*protect_tokens*) are kept intact. Older tool results have their
|
||||
content replaced with a ~100-char placeholder that preserves the
|
||||
spillover filename reference (if any). Message structure (role,
|
||||
seq, tool_use_id) stays valid for the LLM API.
|
||||
|
||||
Error tool results are never pruned — they prevent re-calling
|
||||
failing tools.
|
||||
|
||||
Returns the number of messages pruned (0 if nothing was pruned).
|
||||
"""
|
||||
if not self._messages:
|
||||
return 0
|
||||
|
||||
# Phase 1: Walk backward, classify tool results as protected vs pruneable
|
||||
protected_tokens = 0
|
||||
pruneable: list[int] = [] # indices into self._messages
|
||||
pruneable_tokens = 0
|
||||
|
||||
for i in range(len(self._messages) - 1, -1, -1):
|
||||
msg = self._messages[i]
|
||||
if msg.role != "tool":
|
||||
continue
|
||||
if msg.is_error:
|
||||
continue # never prune errors
|
||||
if msg.content.startswith("[Pruned tool result"):
|
||||
continue # already pruned
|
||||
|
||||
est = len(msg.content) // 4
|
||||
if protected_tokens < protect_tokens:
|
||||
protected_tokens += est
|
||||
else:
|
||||
pruneable.append(i)
|
||||
pruneable_tokens += est
|
||||
|
||||
# Phase 2: Only prune if enough to be worthwhile
|
||||
if pruneable_tokens < min_prune_tokens:
|
||||
return 0
|
||||
|
||||
# Phase 3: Replace content with compact placeholder
|
||||
count = 0
|
||||
for i in pruneable:
|
||||
msg = self._messages[i]
|
||||
orig_len = len(msg.content)
|
||||
spillover = _extract_spillover_filename(msg.content)
|
||||
|
||||
if spillover:
|
||||
placeholder = (
|
||||
f"[Pruned tool result: {orig_len} chars. "
|
||||
f"Full data in '{spillover}'. "
|
||||
f"Use load_data('{spillover}') to retrieve.]"
|
||||
)
|
||||
else:
|
||||
placeholder = f"[Pruned tool result: {orig_len} chars cleared from context.]"
|
||||
|
||||
self._messages[i] = Message(
|
||||
seq=msg.seq,
|
||||
role=msg.role,
|
||||
content=placeholder,
|
||||
tool_use_id=msg.tool_use_id,
|
||||
tool_calls=msg.tool_calls,
|
||||
is_error=msg.is_error,
|
||||
)
|
||||
count += 1
|
||||
|
||||
if self._store:
|
||||
await self._store.write_part(msg.seq, self._messages[i].to_storage_dict())
|
||||
|
||||
# Reset token estimate — content lengths changed
|
||||
self._last_api_input_tokens = None
|
||||
return count
|
||||
|
||||
async def compact(self, summary: str, keep_recent: int = 2) -> None:
|
||||
"""Replace old messages with a summary, optionally keeping recent ones.
|
||||
|
||||
@@ -294,12 +460,18 @@ class NodeConversation:
|
||||
# Clamp: must discard at least 1 message
|
||||
keep_recent = max(0, min(keep_recent, len(self._messages) - 1))
|
||||
|
||||
if keep_recent > 0:
|
||||
old_messages = self._messages[:-keep_recent]
|
||||
recent_messages = self._messages[-keep_recent:]
|
||||
else:
|
||||
old_messages = self._messages
|
||||
recent_messages = []
|
||||
total = len(self._messages)
|
||||
split = total - keep_recent if keep_recent > 0 else total
|
||||
|
||||
# Advance split past orphaned tool results at the boundary.
|
||||
# Tool-role messages reference a tool_use from the preceding
|
||||
# assistant message; if that assistant message falls into the
|
||||
# compacted (old) portion the tool_result becomes invalid.
|
||||
while split < total and self._messages[split].role == "tool":
|
||||
split += 1
|
||||
|
||||
old_messages = list(self._messages[:split])
|
||||
recent_messages = list(self._messages[split:])
|
||||
|
||||
# Extract protected values from messages being discarded
|
||||
if self._output_keys:
|
||||
@@ -330,6 +502,7 @@ class NodeConversation:
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
|
||||
self._messages = [summary_msg] + recent_messages
|
||||
self._last_api_input_tokens = None # reset; next LLM call will recalibrate
|
||||
|
||||
async def clear(self) -> None:
|
||||
"""Remove all messages, keep system prompt, preserve ``_next_seq``."""
|
||||
@@ -337,6 +510,7 @@ class NodeConversation:
|
||||
await self._store.delete_parts_before(self._next_seq)
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
self._messages.clear()
|
||||
self._last_api_input_tokens = None
|
||||
|
||||
def export_summary(self) -> str:
|
||||
"""Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
|
||||
|
||||
@@ -11,7 +11,6 @@ our edges can be created dynamically by a Builder agent based on the goal.
|
||||
|
||||
Edge Types:
|
||||
- always: Always traverse after source completes
|
||||
- always: Always traverse after source completes
|
||||
- on_success: Traverse only if source succeeds
|
||||
- on_failure: Traverse only if source fails
|
||||
- conditional: Traverse based on expression evaluation (SAFE SUBSET ONLY)
|
||||
@@ -22,7 +21,7 @@ allowing the LLM to evaluate whether proceeding along an edge makes sense
|
||||
given the current goal, context, and execution state.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -30,7 +29,7 @@ from pydantic import BaseModel, Field
|
||||
from framework.graph.safe_eval import safe_eval
|
||||
|
||||
|
||||
class EdgeCondition(str, Enum):
|
||||
class EdgeCondition(StrEnum):
|
||||
"""When an edge should be traversed."""
|
||||
|
||||
ALWAYS = "always" # Always after source completes
|
||||
@@ -609,4 +608,40 @@ class GraphSpec(BaseModel):
|
||||
continue
|
||||
errors.append(f"Node '{node.id}' is unreachable from entry")
|
||||
|
||||
# Client-facing fan-out validation
|
||||
fan_outs = self.detect_fan_out_nodes()
|
||||
for source_id, targets in fan_outs.items():
|
||||
client_facing_targets = [
|
||||
t
|
||||
for t in targets
|
||||
if self.get_node(t) and getattr(self.get_node(t), "client_facing", False)
|
||||
]
|
||||
if len(client_facing_targets) > 1:
|
||||
errors.append(
|
||||
f"Fan-out from '{source_id}' has multiple client-facing nodes: "
|
||||
f"{client_facing_targets}. Only one branch may be client-facing."
|
||||
)
|
||||
|
||||
# Output key overlap on parallel event_loop nodes
|
||||
for source_id, targets in fan_outs.items():
|
||||
event_loop_targets = [
|
||||
t
|
||||
for t in targets
|
||||
if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop"
|
||||
]
|
||||
if len(event_loop_targets) > 1:
|
||||
seen_keys: dict[str, str] = {}
|
||||
for node_id in event_loop_targets:
|
||||
node = self.get_node(node_id)
|
||||
for key in getattr(node, "output_keys", []):
|
||||
if key in seen_keys:
|
||||
errors.append(
|
||||
f"Fan-out from '{source_id}': event_loop nodes "
|
||||
f"'{seen_keys[key]}' and '{node_id}' both write to "
|
||||
f"output_key '{key}'. Parallel event_loop nodes must "
|
||||
f"have disjoint output_keys to prevent last-wins data loss."
|
||||
)
|
||||
else:
|
||||
seen_keys[key] = node_id
|
||||
|
||||
return errors
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,11 +11,13 @@ The executor:
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.graph.edge import EdgeSpec, GraphSpec
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.goal import Goal
|
||||
from framework.graph.node import (
|
||||
FunctionNode,
|
||||
@@ -54,6 +56,9 @@ class ExecutionResult:
|
||||
had_partial_failures: bool = False # True if any node failed but eventually succeeded
|
||||
execution_quality: str = "clean" # "clean", "degraded", or "failed"
|
||||
|
||||
# Visit tracking (for feedback/callback edges)
|
||||
node_visit_counts: dict[str, int] = field(default_factory=dict) # {node_id: visit_count}
|
||||
|
||||
@property
|
||||
def is_clean_success(self) -> bool:
|
||||
"""True only if execution succeeded with no retries or failures."""
|
||||
@@ -124,6 +129,9 @@ class GraphExecutor:
|
||||
cleansing_config: CleansingConfig | None = None,
|
||||
enable_parallel_execution: bool = True,
|
||||
parallel_config: ParallelExecutionConfig | None = None,
|
||||
event_bus: Any | None = None,
|
||||
stream_id: str = "",
|
||||
storage_path: str | Path | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize the executor.
|
||||
@@ -138,6 +146,9 @@ class GraphExecutor:
|
||||
cleansing_config: Optional output cleansing configuration
|
||||
enable_parallel_execution: Enable parallel fan-out execution (default True)
|
||||
parallel_config: Configuration for parallel execution behavior
|
||||
event_bus: Optional event bus for emitting node lifecycle events
|
||||
stream_id: Stream ID for event correlation
|
||||
storage_path: Optional base path for conversation persistence
|
||||
"""
|
||||
self.runtime = runtime
|
||||
self.llm = llm
|
||||
@@ -147,6 +158,9 @@ class GraphExecutor:
|
||||
self.approval_callback = approval_callback
|
||||
self.validator = OutputValidator()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._event_bus = event_bus
|
||||
self._stream_id = stream_id
|
||||
self._storage_path = Path(storage_path) if storage_path else None
|
||||
|
||||
# Initialize output cleaner
|
||||
self.cleansing_config = cleansing_config or CleansingConfig()
|
||||
@@ -250,6 +264,8 @@ class GraphExecutor:
|
||||
total_tokens = 0
|
||||
total_latency = 0
|
||||
node_retry_counts: dict[str, int] = {} # Track retries per node
|
||||
node_visit_counts: dict[str, int] = {} # Track visits for feedback loops
|
||||
_is_retry = False # True when looping back for a retry (not a new visit)
|
||||
|
||||
# Determine entry point (may differ if resuming)
|
||||
current_node_id = graph.get_entry_point(session_state)
|
||||
@@ -278,6 +294,34 @@ class GraphExecutor:
|
||||
if node_spec is None:
|
||||
raise RuntimeError(f"Node not found: {current_node_id}")
|
||||
|
||||
# Enforce max_node_visits (feedback/callback edge support)
|
||||
# Don't increment visit count on retries — retries are not new visits
|
||||
if not _is_retry:
|
||||
cnt = node_visit_counts.get(current_node_id, 0) + 1
|
||||
node_visit_counts[current_node_id] = cnt
|
||||
_is_retry = False
|
||||
max_visits = getattr(node_spec, "max_node_visits", 1)
|
||||
if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
|
||||
self.logger.warning(
|
||||
f" ⊘ Node '{node_spec.name}' visit limit reached "
|
||||
f"({node_visit_counts[current_node_id]}/{max_visits}), skipping"
|
||||
)
|
||||
# Skip execution — follow outgoing edges using current memory
|
||||
skip_result = NodeResult(success=True, output=memory.read_all())
|
||||
next_node = self._follow_edges(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=skip_result,
|
||||
memory=memory,
|
||||
)
|
||||
if next_node is None:
|
||||
self.logger.info(" → No more edges after visit limit, ending")
|
||||
break
|
||||
current_node_id = next_node
|
||||
continue
|
||||
|
||||
path.append(current_node_id)
|
||||
|
||||
# Check if pause (HITL) before execution
|
||||
@@ -323,13 +367,33 @@ class GraphExecutor:
|
||||
description=f"Validation errors for {current_node_id}: {validation_errors}",
|
||||
)
|
||||
|
||||
# Emit node-started event (skip event_loop nodes — they emit their own)
|
||||
if self._event_bus and node_spec.node_type != "event_loop":
|
||||
await self._event_bus.emit_node_loop_started(
|
||||
stream_id=self._stream_id, node_id=current_node_id
|
||||
)
|
||||
|
||||
# Execute node
|
||||
self.logger.info(" Executing...")
|
||||
result = await node_impl.execute(ctx)
|
||||
|
||||
# Emit node-completed event (skip event_loop nodes)
|
||||
if self._event_bus and node_spec.node_type != "event_loop":
|
||||
await self._event_bus.emit_node_loop_completed(
|
||||
stream_id=self._stream_id, node_id=current_node_id, iterations=1
|
||||
)
|
||||
|
||||
if result.success:
|
||||
# Validate output before accepting it
|
||||
if result.output and node_spec.output_keys:
|
||||
# Validate output before accepting it.
|
||||
# Skip for event_loop nodes — their judge system is
|
||||
# the sole acceptance mechanism (see WP-8). Empty
|
||||
# strings and other flexible outputs are legitimate
|
||||
# for LLM-driven nodes that already passed the judge.
|
||||
if (
|
||||
result.output
|
||||
and node_spec.output_keys
|
||||
and node_spec.node_type != "event_loop"
|
||||
):
|
||||
validation = self.validator.validate_all(
|
||||
output=result.output,
|
||||
expected_keys=node_spec.output_keys,
|
||||
@@ -380,6 +444,15 @@ class GraphExecutor:
|
||||
# [CORRECTED] Use node_spec.max_retries instead of hardcoded 3
|
||||
max_retries = getattr(node_spec, "max_retries", 3)
|
||||
|
||||
# Event loop nodes handle retry internally via judge —
|
||||
# executor retry is catastrophic (retry multiplication)
|
||||
if node_spec.node_type == "event_loop" and max_retries > 0:
|
||||
self.logger.warning(
|
||||
f"EventLoopNode '{node_spec.id}' has max_retries={max_retries}. "
|
||||
"Overriding to 0 — event loop nodes handle retry internally via judge."
|
||||
)
|
||||
max_retries = 0
|
||||
|
||||
if node_retry_counts[current_node_id] < max_retries:
|
||||
# Retry - don't increment steps for retries
|
||||
steps -= 1
|
||||
@@ -395,49 +468,69 @@ class GraphExecutor:
|
||||
self.logger.info(
|
||||
f" ↻ Retrying ({node_retry_counts[current_node_id]}/{max_retries})..."
|
||||
)
|
||||
_is_retry = True
|
||||
continue
|
||||
else:
|
||||
# Max retries exceeded - fail the execution
|
||||
# Max retries exceeded - check for failure handlers
|
||||
self.logger.error(
|
||||
f" ✗ Max retries ({max_retries}) exceeded for node {current_node_id}"
|
||||
)
|
||||
self.runtime.report_problem(
|
||||
severity="critical",
|
||||
description=(
|
||||
f"Node {current_node_id} failed after "
|
||||
f"{max_retries} attempts: {result.error}"
|
||||
),
|
||||
)
|
||||
self.runtime.end_run(
|
||||
success=False,
|
||||
output_data=memory.read_all(),
|
||||
narrative=(
|
||||
f"Failed at {node_spec.name} after "
|
||||
f"{max_retries} retries: {result.error}"
|
||||
),
|
||||
|
||||
# Check if there's an ON_FAILURE edge to follow
|
||||
next_node = self._follow_edges(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=result, # result.success=False triggers ON_FAILURE
|
||||
memory=memory,
|
||||
)
|
||||
|
||||
# Calculate quality metrics
|
||||
total_retries_count = sum(node_retry_counts.values())
|
||||
nodes_failed = list(node_retry_counts.keys())
|
||||
if next_node:
|
||||
# Found a failure handler - route to it
|
||||
self.logger.info(f" → Routing to failure handler: {next_node}")
|
||||
current_node_id = next_node
|
||||
continue # Continue execution with handler
|
||||
else:
|
||||
# No failure handler - terminate execution
|
||||
self.runtime.report_problem(
|
||||
severity="critical",
|
||||
description=(
|
||||
f"Node {current_node_id} failed after "
|
||||
f"{max_retries} attempts: {result.error}"
|
||||
),
|
||||
)
|
||||
self.runtime.end_run(
|
||||
success=False,
|
||||
output_data=memory.read_all(),
|
||||
narrative=(
|
||||
f"Failed at {node_spec.name} after "
|
||||
f"{max_retries} retries: {result.error}"
|
||||
),
|
||||
)
|
||||
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
error=(
|
||||
f"Node '{node_spec.name}' failed after "
|
||||
f"{max_retries} attempts: {result.error}"
|
||||
),
|
||||
output=memory.read_all(),
|
||||
steps_executed=steps,
|
||||
total_tokens=total_tokens,
|
||||
total_latency_ms=total_latency,
|
||||
path=path,
|
||||
total_retries=total_retries_count,
|
||||
nodes_with_failures=nodes_failed,
|
||||
retry_details=dict(node_retry_counts),
|
||||
had_partial_failures=len(nodes_failed) > 0,
|
||||
execution_quality="failed",
|
||||
)
|
||||
# Calculate quality metrics
|
||||
total_retries_count = sum(node_retry_counts.values())
|
||||
nodes_failed = list(node_retry_counts.keys())
|
||||
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
error=(
|
||||
f"Node '{node_spec.name}' failed after "
|
||||
f"{max_retries} attempts: {result.error}"
|
||||
),
|
||||
output=memory.read_all(),
|
||||
steps_executed=steps,
|
||||
total_tokens=total_tokens,
|
||||
total_latency_ms=total_latency,
|
||||
path=path,
|
||||
total_retries=total_retries_count,
|
||||
nodes_with_failures=nodes_failed,
|
||||
retry_details=dict(node_retry_counts),
|
||||
had_partial_failures=len(nodes_failed) > 0,
|
||||
execution_quality="failed",
|
||||
node_visit_counts=dict(node_visit_counts),
|
||||
)
|
||||
|
||||
# Check if we just executed a pause node - if so, save state and return
|
||||
# This must happen BEFORE determining next node, since pause nodes may have no edges
|
||||
@@ -476,6 +569,7 @@ class GraphExecutor:
|
||||
retry_details=dict(node_retry_counts),
|
||||
had_partial_failures=len(nodes_failed) > 0,
|
||||
execution_quality=exec_quality,
|
||||
node_visit_counts=dict(node_visit_counts),
|
||||
)
|
||||
|
||||
# Check if this is a terminal node - if so, we're done
|
||||
@@ -596,6 +690,7 @@ class GraphExecutor:
|
||||
retry_details=dict(node_retry_counts),
|
||||
had_partial_failures=len(nodes_failed) > 0,
|
||||
execution_quality=exec_quality,
|
||||
node_visit_counts=dict(node_visit_counts),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -622,6 +717,7 @@ class GraphExecutor:
|
||||
retry_details=dict(node_retry_counts),
|
||||
had_partial_failures=len(nodes_failed) > 0,
|
||||
execution_quality="failed",
|
||||
node_visit_counts=dict(node_visit_counts),
|
||||
)
|
||||
|
||||
def _build_context(
|
||||
@@ -658,7 +754,15 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
# Valid node types - no ambiguous "llm" type allowed
|
||||
VALID_NODE_TYPES = {"llm_tool_use", "llm_generate", "router", "function", "human_input"}
|
||||
VALID_NODE_TYPES = {
|
||||
"llm_tool_use",
|
||||
"llm_generate",
|
||||
"router",
|
||||
"function",
|
||||
"human_input",
|
||||
"event_loop",
|
||||
}
|
||||
DEPRECATED_NODE_TYPES = {"llm_tool_use": "event_loop", "llm_generate": "event_loop"}
|
||||
|
||||
def _get_node_implementation(
|
||||
self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
|
||||
@@ -676,6 +780,17 @@ class GraphExecutor:
|
||||
f"Use 'llm_tool_use' for nodes that call tools, 'llm_generate' for text generation."
|
||||
)
|
||||
|
||||
# Warn on deprecated node types
|
||||
if node_spec.node_type in self.DEPRECATED_NODE_TYPES:
|
||||
replacement = self.DEPRECATED_NODE_TYPES[node_spec.node_type]
|
||||
warnings.warn(
|
||||
f"Node type '{node_spec.node_type}' is deprecated. "
|
||||
f"Use '{replacement}' instead. "
|
||||
f"Node: '{node_spec.id}'",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# Create based on type
|
||||
if node_spec.node_type == "llm_tool_use":
|
||||
if not node_spec.tools:
|
||||
@@ -713,6 +828,45 @@ class GraphExecutor:
|
||||
cleanup_llm_model=cleanup_llm_model,
|
||||
)
|
||||
|
||||
if node_spec.node_type == "event_loop":
|
||||
# Auto-create EventLoopNode with sensible defaults.
|
||||
# Custom configs can still be pre-registered via node_registry.
|
||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
|
||||
|
||||
# Create a FileConversationStore if a storage path is available
|
||||
conv_store = None
|
||||
if self._storage_path:
|
||||
from framework.storage.conversation_store import FileConversationStore
|
||||
|
||||
store_path = self._storage_path / "conversations" / node_spec.id
|
||||
conv_store = FileConversationStore(base_path=store_path)
|
||||
|
||||
# Auto-configure spillover directory for large tool results.
|
||||
# When a tool result exceeds max_tool_result_chars, the full
|
||||
# content is written to spillover_dir and the agent gets a
|
||||
# truncated preview with instructions to use load_data().
|
||||
spillover = None
|
||||
if self._storage_path:
|
||||
spillover = str(self._storage_path / "data")
|
||||
|
||||
node = EventLoopNode(
|
||||
event_bus=self._event_bus,
|
||||
judge=None, # implicit judge: accept when output_keys are filled
|
||||
config=LoopConfig(
|
||||
max_iterations=100 if node_spec.client_facing else 50,
|
||||
max_tool_calls_per_turn=10,
|
||||
stall_detection_threshold=3,
|
||||
max_history_tokens=32000,
|
||||
max_tool_result_chars=3_000,
|
||||
spillover_dir=spillover,
|
||||
),
|
||||
tool_executor=self.tool_executor,
|
||||
conversation_store=conv_store,
|
||||
)
|
||||
# Cache so inject_event() is reachable for client-facing input
|
||||
self.node_registry[node_spec.id] = node
|
||||
return node
|
||||
|
||||
# Should never reach here due to validation above
|
||||
raise RuntimeError(f"Unhandled node type: {node_spec.node_type}")
|
||||
|
||||
@@ -740,9 +894,12 @@ class GraphExecutor:
|
||||
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
|
||||
target_node_name=target_node_spec.name if target_node_spec else edge.target,
|
||||
):
|
||||
# Validate and clean output before mapping inputs
|
||||
# Validate and clean output before mapping inputs.
|
||||
# Use full memory state (not just result.output) because
|
||||
# target input_keys may come from earlier nodes in the
|
||||
# graph, not only from the immediate source node.
|
||||
if self.cleansing_config.enabled and target_node_spec:
|
||||
output_to_validate = result.output
|
||||
output_to_validate = memory.read_all()
|
||||
|
||||
validation = self.output_cleaner.validate_output(
|
||||
output=output_to_validate,
|
||||
@@ -823,6 +980,21 @@ class GraphExecutor:
|
||||
):
|
||||
traversable.append(edge)
|
||||
|
||||
# Priority filtering for CONDITIONAL edges:
|
||||
# When multiple CONDITIONAL edges match, keep only the highest-priority
|
||||
# group. This prevents mutually-exclusive conditional branches (e.g.
|
||||
# forward vs. feedback) from incorrectly triggering fan-out.
|
||||
# ON_SUCCESS / other edge types are unaffected.
|
||||
if len(traversable) > 1:
|
||||
conditionals = [e for e in traversable if e.condition == EdgeCondition.CONDITIONAL]
|
||||
if len(conditionals) > 1:
|
||||
max_prio = max(e.priority for e in conditionals)
|
||||
traversable = [
|
||||
e
|
||||
for e in traversable
|
||||
if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
|
||||
]
|
||||
|
||||
return traversable
|
||||
|
||||
def _find_convergence_node(
|
||||
@@ -909,13 +1081,27 @@ class GraphExecutor:
|
||||
branch.status = "failed"
|
||||
branch.error = f"Node {branch.node_id} not found in graph"
|
||||
return branch, RuntimeError(branch.error)
|
||||
|
||||
effective_max_retries = node_spec.max_retries
|
||||
if node_spec.node_type == "event_loop":
|
||||
if effective_max_retries > 1:
|
||||
self.logger.warning(
|
||||
f"EventLoopNode '{node_spec.id}' has "
|
||||
f"max_retries={effective_max_retries}. Overriding "
|
||||
"to 1 — event loop nodes handle retry internally."
|
||||
)
|
||||
effective_max_retries = 1
|
||||
|
||||
branch.status = "running"
|
||||
|
||||
try:
|
||||
# Validate and clean output before mapping inputs (same as _follow_edges)
|
||||
# Validate and clean output before mapping inputs (same as _follow_edges).
|
||||
# Use full memory state since target input_keys may come
|
||||
# from earlier nodes, not just the immediate source.
|
||||
if self.cleansing_config.enabled and node_spec:
|
||||
mem_snapshot = memory.read_all()
|
||||
validation = self.output_cleaner.validate_output(
|
||||
output=source_result.output,
|
||||
output=mem_snapshot,
|
||||
source_node_id=source_node_spec.id if source_node_spec else "unknown",
|
||||
target_node_spec=node_spec,
|
||||
)
|
||||
@@ -926,7 +1112,7 @@ class GraphExecutor:
|
||||
f"{branch.node_id}: {validation.errors}"
|
||||
)
|
||||
cleaned_output = self.output_cleaner.clean_output(
|
||||
output=source_result.output,
|
||||
output=mem_snapshot,
|
||||
source_node_id=source_node_spec.id if source_node_spec else "unknown",
|
||||
target_node_spec=node_spec,
|
||||
validation_errors=validation.errors,
|
||||
@@ -942,19 +1128,31 @@ class GraphExecutor:
|
||||
|
||||
# Execute with retries
|
||||
last_result = None
|
||||
for attempt in range(node_spec.max_retries):
|
||||
for attempt in range(effective_max_retries):
|
||||
branch.retry_count = attempt
|
||||
|
||||
# Build context for this branch
|
||||
ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
|
||||
node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
|
||||
|
||||
# Emit node-started event (skip event_loop nodes)
|
||||
if self._event_bus and node_spec.node_type != "event_loop":
|
||||
await self._event_bus.emit_node_loop_started(
|
||||
stream_id=self._stream_id, node_id=branch.node_id
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})"
|
||||
)
|
||||
result = await node_impl.execute(ctx)
|
||||
last_result = result
|
||||
|
||||
# Emit node-completed event (skip event_loop nodes)
|
||||
if self._event_bus and node_spec.node_type != "event_loop":
|
||||
await self._event_bus.emit_node_loop_completed(
|
||||
stream_id=self._stream_id, node_id=branch.node_id, iterations=1
|
||||
)
|
||||
|
||||
if result.success:
|
||||
# Write outputs to shared memory using async write
|
||||
for key, value in result.output.items():
|
||||
@@ -970,7 +1168,7 @@ class GraphExecutor:
|
||||
|
||||
self.logger.warning(
|
||||
f" ↻ Branch {node_spec.name}: "
|
||||
f"retry {attempt + 1}/{node_spec.max_retries}"
|
||||
f"retry {attempt + 1}/{effective_max_retries}"
|
||||
)
|
||||
|
||||
# All retries exhausted
|
||||
@@ -979,7 +1177,7 @@ class GraphExecutor:
|
||||
branch.result = last_result
|
||||
self.logger.error(
|
||||
f" ✗ Branch {node_spec.name}: "
|
||||
f"failed after {node_spec.max_retries} attempts"
|
||||
f"failed after {effective_max_retries} attempts"
|
||||
)
|
||||
return branch, last_result
|
||||
|
||||
|
||||
@@ -12,13 +12,13 @@ Goals are:
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class GoalStatus(str, Enum):
|
||||
class GoalStatus(StrEnum):
|
||||
"""Lifecycle status of a goal."""
|
||||
|
||||
DRAFT = "draft" # Being defined
|
||||
|
||||
@@ -6,11 +6,11 @@ where agents need to gather input from humans.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class HITLInputType(str, Enum):
|
||||
class HITLInputType(StrEnum):
|
||||
"""Type of input expected from human."""
|
||||
|
||||
FREE_TEXT = "free_text" # Open-ended text response
|
||||
|
||||
@@ -16,10 +16,12 @@ Protocol:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -153,7 +155,10 @@ class NodeSpec(BaseModel):
|
||||
# Node behavior type
|
||||
node_type: str = Field(
|
||||
default="llm_tool_use",
|
||||
description="Type: 'llm_tool_use', 'llm_generate', 'function', 'router', 'human_input'",
|
||||
description=(
|
||||
"Type: 'event_loop', 'function', 'router', 'human_input'. "
|
||||
"Deprecated: 'llm_tool_use', 'llm_generate' (use 'event_loop' instead)."
|
||||
),
|
||||
)
|
||||
|
||||
# Data flow
|
||||
@@ -205,6 +210,15 @@ class NodeSpec(BaseModel):
|
||||
max_retries: int = Field(default=3)
|
||||
retry_on: list[str] = Field(default_factory=list, description="Error types to retry on")
|
||||
|
||||
# Visit limits (for feedback/callback edges)
|
||||
max_node_visits: int = Field(
|
||||
default=1,
|
||||
description=(
|
||||
"Max times this node executes in one graph run. "
|
||||
"Set >1 for feedback loops. 0 = unlimited (max_steps guards)."
|
||||
),
|
||||
)
|
||||
|
||||
# Pydantic model for output validation
|
||||
output_model: type[BaseModel] | None = Field(
|
||||
default=None,
|
||||
@@ -218,6 +232,12 @@ class NodeSpec(BaseModel):
|
||||
description="Maximum retries when Pydantic validation fails (with feedback to LLM)",
|
||||
)
|
||||
|
||||
# Client-facing behavior
|
||||
client_facing: bool = Field(
|
||||
default=False,
|
||||
description="If True, this node streams output to the end user and can request input.",
|
||||
)
|
||||
|
||||
model_config = {"extra": "allow", "arbitrary_types_allowed": True}
|
||||
|
||||
|
||||
@@ -1348,7 +1368,9 @@ Expected output keys: {output_keys}
|
||||
LLM Response:
|
||||
{raw_response}
|
||||
|
||||
Output ONLY the JSON object, nothing else."""
|
||||
Output ONLY the JSON object, nothing else.
|
||||
If no valid JSON object exists in the response, output exactly: {{"error": "NO_JSON_FOUND"}}
|
||||
Do NOT fabricate data or return empty objects."""
|
||||
|
||||
try:
|
||||
result = cleaner_llm.complete(
|
||||
@@ -1395,6 +1417,14 @@ Output ONLY the JSON object, nothing else."""
|
||||
parsed = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
parsed = json.loads(_fix_unescaped_newlines_in_json(cleaned))
|
||||
|
||||
# Validate LLM didn't return empty or fabricated data
|
||||
if parsed.get("error") == "NO_JSON_FOUND":
|
||||
raise ValueError("Cannot parse JSON from response")
|
||||
if not parsed or parsed == {}:
|
||||
raise ValueError("Cannot parse JSON from response")
|
||||
if all(v is None for v in parsed.values()):
|
||||
raise ValueError("Cannot parse JSON from response")
|
||||
logger.info(" ✓ LLM cleaned JSON output")
|
||||
return parsed
|
||||
|
||||
@@ -1504,6 +1534,8 @@ Output ONLY the JSON object, nothing else."""
|
||||
|
||||
def _build_system_prompt(self, ctx: NodeContext) -> str:
|
||||
"""Build the system prompt."""
|
||||
from datetime import datetime
|
||||
|
||||
parts = []
|
||||
|
||||
if ctx.node_spec.system_prompt:
|
||||
@@ -1526,6 +1558,15 @@ Output ONLY the JSON object, nothing else."""
|
||||
|
||||
parts.append(prompt)
|
||||
|
||||
# Inject current datetime so LLM knows "now"
|
||||
utc_dt = datetime.now(UTC)
|
||||
local_dt = datetime.now().astimezone()
|
||||
local_tz_name = local_dt.tzname() or "Unknown"
|
||||
parts.append("\n## Runtime Context")
|
||||
parts.append(f"- Current Date/Time (UTC): {utc_dt.isoformat()}")
|
||||
parts.append(f"- Local Timezone: {local_tz_name}")
|
||||
parts.append(f"- Current Date/Time (Local): {local_dt.isoformat()}")
|
||||
|
||||
if ctx.goal_context:
|
||||
parts.append("\n# Goal Context")
|
||||
parts.append(ctx.goal_context)
|
||||
@@ -1727,8 +1768,19 @@ class FunctionNode(NodeProtocol):
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
# Call the function
|
||||
result = self.func(**ctx.input_data)
|
||||
# Filter input_data to only declared input_keys to prevent
|
||||
# leaking extra memory keys from upstream nodes.
|
||||
if ctx.node_spec.input_keys:
|
||||
filtered = {
|
||||
k: v for k, v in ctx.input_data.items() if k in ctx.node_spec.input_keys
|
||||
}
|
||||
else:
|
||||
filtered = ctx.input_data
|
||||
|
||||
# Call the function (supports both sync and async)
|
||||
result = self.func(**filtered)
|
||||
if inspect.isawaitable(result):
|
||||
result = await result
|
||||
|
||||
latency_ms = int((time.time() - start) * 1000)
|
||||
|
||||
|
||||
@@ -144,8 +144,11 @@ class OutputCleaner:
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Check 1: Required input keys present
|
||||
# Check 1: Required input keys present (skip nullable keys)
|
||||
nullable = set(getattr(target_node_spec, "nullable_output_keys", None) or [])
|
||||
for key in target_node_spec.input_keys:
|
||||
if key in nullable:
|
||||
continue
|
||||
if key not in output:
|
||||
errors.append(f"Missing required key: '{key}'")
|
||||
continue
|
||||
|
||||
@@ -11,13 +11,13 @@ The Plan is the contract between the external planner and the executor:
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
class ActionType(StrEnum):
|
||||
"""Types of actions a PlanStep can perform."""
|
||||
|
||||
LLM_CALL = "llm_call" # Call LLM for generation
|
||||
@@ -27,7 +27,7 @@ class ActionType(str, Enum):
|
||||
CODE_EXECUTION = "code_execution" # Execute dynamic code (sandboxed)
|
||||
|
||||
|
||||
class StepStatus(str, Enum):
|
||||
class StepStatus(StrEnum):
|
||||
"""Status of a plan step."""
|
||||
|
||||
PENDING = "pending"
|
||||
@@ -56,7 +56,7 @@ class StepStatus(str, Enum):
|
||||
return self == StepStatus.COMPLETED
|
||||
|
||||
|
||||
class ApprovalDecision(str, Enum):
|
||||
class ApprovalDecision(StrEnum):
|
||||
"""Human decision on a step requiring approval."""
|
||||
|
||||
APPROVE = "approve" # Execute as planned
|
||||
@@ -91,7 +91,7 @@ class ApprovalResult(BaseModel):
|
||||
model_config = {"extra": "allow"}
|
||||
|
||||
|
||||
class JudgmentAction(str, Enum):
|
||||
class JudgmentAction(StrEnum):
|
||||
"""Actions the judge can take after evaluating a step."""
|
||||
|
||||
ACCEPT = "accept" # Step completed successfully, continue
|
||||
@@ -423,7 +423,7 @@ class Plan(BaseModel):
|
||||
}
|
||||
|
||||
|
||||
class ExecutionStatus(str, Enum):
|
||||
class ExecutionStatus(StrEnum):
|
||||
"""Status of plan execution."""
|
||||
|
||||
COMPLETED = "completed"
|
||||
|
||||
@@ -75,16 +75,6 @@ class SafeEvalVisitor(ast.NodeVisitor):
|
||||
def visit_Constant(self, node: ast.Constant) -> Any:
|
||||
return node.value
|
||||
|
||||
# --- Number/String/Bytes/NameConstant (Python < 3.8 compat if needed) ---
|
||||
def visit_Num(self, node: ast.Num) -> Any:
|
||||
return node.n
|
||||
|
||||
def visit_Str(self, node: ast.Str) -> Any:
|
||||
return node.s
|
||||
|
||||
def visit_NameConstant(self, node: ast.NameConstant) -> Any:
|
||||
return node.value
|
||||
|
||||
# --- Data Structures ---
|
||||
def visit_List(self, node: ast.List) -> list:
|
||||
return [self.visit(elt) for elt in node.elts]
|
||||
|
||||
@@ -126,14 +126,16 @@ class OutputValidator:
|
||||
|
||||
for key in expected_keys:
|
||||
if key not in output:
|
||||
errors.append(f"Missing required output key: '{key}'")
|
||||
if key not in nullable_keys:
|
||||
errors.append(f"Missing required output key: '{key}'")
|
||||
elif not allow_empty:
|
||||
value = output[key]
|
||||
if value is None:
|
||||
if key not in nullable_keys:
|
||||
errors.append(f"Output key '{key}' is None")
|
||||
elif isinstance(value, str) and len(value.strip()) == 0:
|
||||
errors.append(f"Output key '{key}' is empty string")
|
||||
if key not in nullable_keys:
|
||||
errors.append(f"Output key '{key}' is empty string")
|
||||
|
||||
return ValidationResult(success=len(errors) == 0, errors=errors)
|
||||
|
||||
@@ -205,7 +207,7 @@ class OutputValidator:
|
||||
def validate_no_hallucination(
|
||||
self,
|
||||
output: dict[str, Any],
|
||||
max_length: int = 10000,
|
||||
max_length: int = 50000,
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Check for signs of LLM hallucination in output values.
|
||||
|
||||
@@ -1,8 +1,31 @@
|
||||
"""LLM provider abstraction."""
|
||||
|
||||
from framework.llm.provider import LLMProvider, LLMResponse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
ReasoningDeltaEvent,
|
||||
ReasoningStartEvent,
|
||||
StreamErrorEvent,
|
||||
StreamEvent,
|
||||
TextDeltaEvent,
|
||||
TextEndEvent,
|
||||
ToolCallEvent,
|
||||
ToolResultEvent,
|
||||
)
|
||||
|
||||
__all__ = ["LLMProvider", "LLMResponse"]
|
||||
__all__ = [
|
||||
"LLMProvider",
|
||||
"LLMResponse",
|
||||
"StreamEvent",
|
||||
"TextDeltaEvent",
|
||||
"TextEndEvent",
|
||||
"ToolCallEvent",
|
||||
"ToolResultEvent",
|
||||
"ReasoningStartEvent",
|
||||
"ReasoningDeltaEvent",
|
||||
"FinishEvent",
|
||||
"StreamErrorEvent",
|
||||
]
|
||||
|
||||
try:
|
||||
from framework.llm.anthropic import AnthropicProvider # noqa: F401
|
||||
|
||||
@@ -18,7 +18,7 @@ def _get_api_key_from_credential_store() -> str | None:
|
||||
try:
|
||||
from aden_tools.credentials import CredentialStoreAdapter
|
||||
|
||||
creds = CredentialStoreAdapter.with_env_storage()
|
||||
creds = CredentialStoreAdapter.default()
|
||||
if creds.is_available("anthropic"):
|
||||
return creds.get("anthropic")
|
||||
except ImportError:
|
||||
|
||||
@@ -7,10 +7,11 @@ Groq, and local models.
|
||||
See: https://docs.litellm.ai/docs/providers
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@@ -23,6 +24,7 @@ except ImportError:
|
||||
RateLimitError = Exception # type: ignore[assignment, misc]
|
||||
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import StreamEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -145,7 +147,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
|
||||
if litellm is None:
|
||||
raise ImportError(
|
||||
"LiteLLM is not installed. Please install it with: pip install litellm"
|
||||
"LiteLLM is not installed. Please install it with: uv pip install litellm"
|
||||
)
|
||||
|
||||
def _completion_with_rate_limit_retry(self, **kwargs: Any) -> Any:
|
||||
@@ -161,11 +163,24 @@ class LiteLLMProvider(LLMProvider):
|
||||
content = response.choices[0].message.content if response.choices else None
|
||||
has_tool_calls = bool(response.choices and response.choices[0].message.tool_calls)
|
||||
if not content and not has_tool_calls:
|
||||
# If the conversation ends with an assistant message,
|
||||
# an empty response is expected — don't retry.
|
||||
messages = kwargs.get("messages", [])
|
||||
last_role = next(
|
||||
(m["role"] for m in reversed(messages) if m.get("role") != "system"),
|
||||
None,
|
||||
)
|
||||
if last_role == "assistant":
|
||||
logger.debug(
|
||||
"[retry] Empty response after assistant message — "
|
||||
"expected, not retrying."
|
||||
)
|
||||
return response
|
||||
|
||||
finish_reason = (
|
||||
response.choices[0].finish_reason if response.choices else "unknown"
|
||||
)
|
||||
# Dump full request to file for debugging
|
||||
messages = kwargs.get("messages", [])
|
||||
token_count, token_method = _estimate_tokens(model, messages)
|
||||
dump_path = _dump_failed_request(
|
||||
model=model,
|
||||
@@ -378,11 +393,18 @@ class LiteLLMProvider(LLMProvider):
|
||||
|
||||
# Execute tools and add results.
|
||||
for tool_call in message.tool_calls:
|
||||
# Parse arguments
|
||||
try:
|
||||
args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError:
|
||||
args = {}
|
||||
# Surface error to LLM and skip tool execution
|
||||
current_messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call.id,
|
||||
"content": "Invalid JSON arguments provided to tool.",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
tool_use = ToolUse(
|
||||
id=tool_call.id,
|
||||
@@ -425,3 +447,189 @@ class LiteLLMProvider(LLMProvider):
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
async def stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
system: str = "",
|
||||
tools: list[Tool] | None = None,
|
||||
max_tokens: int = 4096,
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""Stream a completion via litellm.acompletion(stream=True).
|
||||
|
||||
Yields StreamEvent objects as chunks arrive from the provider.
|
||||
Tool call arguments are accumulated across chunks and yielded as
|
||||
a single ToolCallEvent with fully parsed JSON when complete.
|
||||
|
||||
Empty responses (e.g. Gemini stealth rate-limits that return 200
|
||||
with no content) are retried with exponential backoff, mirroring
|
||||
the retry behaviour of ``_completion_with_rate_limit_retry``.
|
||||
"""
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
StreamErrorEvent,
|
||||
TextDeltaEvent,
|
||||
TextEndEvent,
|
||||
ToolCallEvent,
|
||||
)
|
||||
|
||||
full_messages: list[dict[str, Any]] = []
|
||||
if system:
|
||||
full_messages.append({"role": "system", "content": system})
|
||||
full_messages.extend(messages)
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"messages": full_messages,
|
||||
"max_tokens": max_tokens,
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True},
|
||||
**self.extra_kwargs,
|
||||
}
|
||||
if self.api_key:
|
||||
kwargs["api_key"] = self.api_key
|
||||
if self.api_base:
|
||||
kwargs["api_base"] = self.api_base
|
||||
if tools:
|
||||
kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
|
||||
|
||||
for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
|
||||
# Post-stream events (ToolCall, TextEnd, Finish) are buffered
|
||||
# because they depend on the full stream. TextDeltaEvents are
|
||||
# yielded immediately so callers see tokens in real time.
|
||||
tail_events: list[StreamEvent] = []
|
||||
accumulated_text = ""
|
||||
tool_calls_acc: dict[int, dict[str, str]] = {}
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
|
||||
try:
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
|
||||
async for chunk in response:
|
||||
choice = chunk.choices[0] if chunk.choices else None
|
||||
if not choice:
|
||||
continue
|
||||
|
||||
delta = choice.delta
|
||||
|
||||
# --- Text content — yield immediately for real-time streaming ---
|
||||
if delta and delta.content:
|
||||
accumulated_text += delta.content
|
||||
yield TextDeltaEvent(
|
||||
content=delta.content,
|
||||
snapshot=accumulated_text,
|
||||
)
|
||||
|
||||
# --- Tool calls (accumulate across chunks) ---
|
||||
if delta and delta.tool_calls:
|
||||
for tc in delta.tool_calls:
|
||||
idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
|
||||
if idx not in tool_calls_acc:
|
||||
tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
|
||||
if tc.id:
|
||||
tool_calls_acc[idx]["id"] = tc.id
|
||||
if tc.function:
|
||||
if tc.function.name:
|
||||
tool_calls_acc[idx]["name"] = tc.function.name
|
||||
if tc.function.arguments:
|
||||
tool_calls_acc[idx]["arguments"] += tc.function.arguments
|
||||
|
||||
# --- Finish ---
|
||||
if choice.finish_reason:
|
||||
for _idx, tc_data in sorted(tool_calls_acc.items()):
|
||||
try:
|
||||
parsed_args = json.loads(tc_data["arguments"])
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
parsed_args = {"_raw": tc_data.get("arguments", "")}
|
||||
tail_events.append(
|
||||
ToolCallEvent(
|
||||
tool_use_id=tc_data["id"],
|
||||
tool_name=tc_data["name"],
|
||||
tool_input=parsed_args,
|
||||
)
|
||||
)
|
||||
|
||||
if accumulated_text:
|
||||
tail_events.append(TextEndEvent(full_text=accumulated_text))
|
||||
|
||||
usage = getattr(chunk, "usage", None)
|
||||
if usage:
|
||||
input_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
||||
output_tokens = getattr(usage, "completion_tokens", 0) or 0
|
||||
|
||||
tail_events.append(
|
||||
FinishEvent(
|
||||
stop_reason=choice.finish_reason,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
model=self.model,
|
||||
)
|
||||
)
|
||||
|
||||
# Check whether the stream produced any real content.
|
||||
# (If text deltas were yielded above, has_content is True
|
||||
# and we skip the retry path — nothing was yielded in vain.)
|
||||
has_content = accumulated_text or tool_calls_acc
|
||||
if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
|
||||
# If the conversation ends with an assistant or tool
|
||||
# message, an empty stream is expected — the LLM has
|
||||
# nothing new to say. Don't burn retries on this;
|
||||
# let the caller (EventLoopNode) decide what to do.
|
||||
# Typical case: client_facing node where the LLM set
|
||||
# all outputs via set_output tool calls, and the tool
|
||||
# results are the last messages.
|
||||
last_role = next(
|
||||
(m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
|
||||
None,
|
||||
)
|
||||
if last_role in ("assistant", "tool"):
|
||||
logger.debug(
|
||||
"[stream] Empty response after %s message — expected, not retrying.",
|
||||
last_role,
|
||||
)
|
||||
for event in tail_events:
|
||||
yield event
|
||||
return
|
||||
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
|
||||
token_count, token_method = _estimate_tokens(
|
||||
self.model,
|
||||
full_messages,
|
||||
)
|
||||
dump_path = _dump_failed_request(
|
||||
model=self.model,
|
||||
kwargs=kwargs,
|
||||
error_type="empty_stream",
|
||||
attempt=attempt,
|
||||
)
|
||||
logger.warning(
|
||||
f"[stream-retry] {self.model} returned empty stream — "
|
||||
f"~{token_count} tokens ({token_method}). "
|
||||
f"Request dumped to: {dump_path}. "
|
||||
f"Retrying in {wait}s "
|
||||
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
|
||||
# Success (or final attempt) — flush remaining events.
|
||||
for event in tail_events:
|
||||
yield event
|
||||
return
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < RATE_LIMIT_MAX_RETRIES:
|
||||
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
|
||||
logger.warning(
|
||||
f"[stream-retry] {self.model} rate limited (429): {e!s}. "
|
||||
f"Retrying in {wait}s "
|
||||
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
yield StreamErrorEvent(error=str(e), recoverable=False)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
yield StreamErrorEvent(error=str(e), recoverable=False)
|
||||
return
|
||||
|
||||
@@ -2,10 +2,16 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from typing import Any
|
||||
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
StreamEvent,
|
||||
TextDeltaEvent,
|
||||
TextEndEvent,
|
||||
)
|
||||
|
||||
|
||||
class MockLLMProvider(LLMProvider):
|
||||
@@ -175,3 +181,28 @@ class MockLLMProvider(LLMProvider):
|
||||
output_tokens=0,
|
||||
stop_reason="mock_complete",
|
||||
)
|
||||
|
||||
async def stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
system: str = "",
|
||||
tools: list[Tool] | None = None,
|
||||
max_tokens: int = 4096,
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""Stream a mock completion as word-level TextDeltaEvents.
|
||||
|
||||
Splits the mock response into words and yields each as a separate
|
||||
TextDeltaEvent with an accumulating snapshot, exercising the full
|
||||
streaming pipeline without any API calls.
|
||||
"""
|
||||
content = self._generate_mock_response(system=system, json_mode=False)
|
||||
words = content.split(" ")
|
||||
accumulated = ""
|
||||
|
||||
for i, word in enumerate(words):
|
||||
chunk = word if i == 0 else " " + word
|
||||
accumulated += chunk
|
||||
yield TextDeltaEvent(content=chunk, snapshot=accumulated)
|
||||
|
||||
yield TextEndEvent(full_text=accumulated)
|
||||
yield FinishEvent(stop_reason="mock_complete", model=self.model)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""LLM Provider abstraction for pluggable LLM backends."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
@@ -108,3 +108,45 @@ class LLMProvider(ABC):
|
||||
Final LLMResponse after tool use completes
|
||||
"""
|
||||
pass
|
||||
|
||||
async def stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
system: str = "",
|
||||
tools: list[Tool] | None = None,
|
||||
max_tokens: int = 4096,
|
||||
) -> AsyncIterator["StreamEvent"]:
|
||||
"""
|
||||
Stream a completion as an async iterator of StreamEvents.
|
||||
|
||||
Default implementation wraps complete() with synthetic events.
|
||||
Subclasses SHOULD override for true streaming.
|
||||
|
||||
Tool orchestration is the CALLER's responsibility:
|
||||
- Caller detects ToolCallEvent, executes tool, adds result
|
||||
to messages, calls stream() again.
|
||||
"""
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
TextDeltaEvent,
|
||||
TextEndEvent,
|
||||
)
|
||||
|
||||
response = self.complete(
|
||||
messages=messages,
|
||||
system=system,
|
||||
tools=tools,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
yield TextDeltaEvent(content=response.content, snapshot=response.content)
|
||||
yield TextEndEvent(full_text=response.content)
|
||||
yield FinishEvent(
|
||||
stop_reason=response.stop_reason,
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model=response.model,
|
||||
)
|
||||
|
||||
|
||||
# Deferred import target for type annotation
|
||||
from framework.llm.stream_events import StreamEvent as StreamEvent # noqa: E402, F401
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
"""Stream event types for LLM streaming responses.
|
||||
|
||||
Defines a discriminated union of frozen dataclasses representing every event
|
||||
a streaming LLM call can produce. These types form the contract between the
|
||||
LLM provider layer, EventLoopNode, event bus, persistence, and monitoring.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TextDeltaEvent:
|
||||
"""A chunk of text produced by the LLM."""
|
||||
|
||||
type: Literal["text_delta"] = "text_delta"
|
||||
content: str = "" # this chunk's text
|
||||
snapshot: str = "" # accumulated text so far
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TextEndEvent:
|
||||
"""Signals that text generation is complete."""
|
||||
|
||||
type: Literal["text_end"] = "text_end"
|
||||
full_text: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallEvent:
|
||||
"""The LLM has requested a tool call."""
|
||||
|
||||
type: Literal["tool_call"] = "tool_call"
|
||||
tool_use_id: str = ""
|
||||
tool_name: str = ""
|
||||
tool_input: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolResultEvent:
|
||||
"""Result of executing a tool call."""
|
||||
|
||||
type: Literal["tool_result"] = "tool_result"
|
||||
tool_use_id: str = ""
|
||||
content: str = ""
|
||||
is_error: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReasoningStartEvent:
|
||||
"""The LLM has started a reasoning/thinking block."""
|
||||
|
||||
type: Literal["reasoning_start"] = "reasoning_start"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReasoningDeltaEvent:
|
||||
"""A chunk of reasoning/thinking content."""
|
||||
|
||||
type: Literal["reasoning_delta"] = "reasoning_delta"
|
||||
content: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FinishEvent:
|
||||
"""The LLM has finished generating."""
|
||||
|
||||
type: Literal["finish"] = "finish"
|
||||
stop_reason: str = ""
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
model: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StreamErrorEvent:
|
||||
"""An error occurred during streaming."""
|
||||
|
||||
type: Literal["error"] = "error"
|
||||
error: str = ""
|
||||
recoverable: bool = False
|
||||
|
||||
|
||||
# Discriminated union of all stream event types
|
||||
StreamEvent = (
|
||||
TextDeltaEvent
|
||||
| TextEndEvent
|
||||
| ToolCallEvent
|
||||
| ToolResultEvent
|
||||
| ReasoningStartEvent
|
||||
| ReasoningDeltaEvent
|
||||
| FinishEvent
|
||||
| StreamErrorEvent
|
||||
)
|
||||
@@ -4,7 +4,7 @@ MCP Server for Agent Building Tools
|
||||
Exposes tools for building goal-driven agents via the Model Context Protocol.
|
||||
|
||||
Usage:
|
||||
python -m framework.mcp.agent_builder_server
|
||||
uv run python -m framework.mcp.agent_builder_server
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -44,6 +44,7 @@ class BuildSession:
|
||||
self.nodes: list[NodeSpec] = []
|
||||
self.edges: list[EdgeSpec] = []
|
||||
self.mcp_servers: list[dict] = [] # MCP server configurations
|
||||
self.loop_config: dict = {} # LoopConfig parameters for EventLoopNodes
|
||||
self.created_at = datetime.now().isoformat()
|
||||
self.last_modified = datetime.now().isoformat()
|
||||
|
||||
@@ -56,6 +57,7 @@ class BuildSession:
|
||||
"nodes": [n.model_dump() for n in self.nodes],
|
||||
"edges": [e.model_dump() for e in self.edges],
|
||||
"mcp_servers": self.mcp_servers,
|
||||
"loop_config": self.loop_config,
|
||||
"created_at": self.created_at,
|
||||
"last_modified": self.last_modified,
|
||||
}
|
||||
@@ -102,6 +104,9 @@ class BuildSession:
|
||||
# Restore MCP servers
|
||||
session.mcp_servers = data.get("mcp_servers", [])
|
||||
|
||||
# Restore loop config
|
||||
session.loop_config = data.get("loop_config", {})
|
||||
|
||||
return session
|
||||
|
||||
|
||||
@@ -516,19 +521,63 @@ def _validate_tool_credentials(tools_list: list[str]) -> dict | None:
|
||||
return None
|
||||
|
||||
|
||||
def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
|
||||
"""
|
||||
Validate and normalize agent_path.
|
||||
|
||||
Returns:
|
||||
(Path, None) if valid
|
||||
(None, error_json) if invalid
|
||||
"""
|
||||
if not agent_path:
|
||||
return None, json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": "agent_path is required (e.g., 'exports/my_agent')",
|
||||
}
|
||||
)
|
||||
|
||||
path = Path(agent_path)
|
||||
|
||||
if not path.exists():
|
||||
return None, json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Agent path not found: {path}",
|
||||
"hint": "Run export_graph to create an agent in exports/ first",
|
||||
}
|
||||
)
|
||||
|
||||
return path, None
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def add_node(
|
||||
node_id: Annotated[str, "Unique identifier for the node"],
|
||||
name: Annotated[str, "Human-readable name"],
|
||||
description: Annotated[str, "What this node does"],
|
||||
node_type: Annotated[str, "Type: llm_generate, llm_tool_use, router, or function"],
|
||||
node_type: Annotated[
|
||||
str,
|
||||
"Type: event_loop (recommended), function, router. "
|
||||
"Deprecated: llm_generate, llm_tool_use (use event_loop instead)",
|
||||
],
|
||||
input_keys: Annotated[str, "JSON array of keys this node reads from shared memory"],
|
||||
output_keys: Annotated[str, "JSON array of keys this node writes to shared memory"],
|
||||
system_prompt: Annotated[str, "Instructions for LLM nodes"] = "",
|
||||
tools: Annotated[str, "JSON array of tool names for llm_tool_use nodes"] = "[]",
|
||||
tools: Annotated[str, "JSON array of tool names for event_loop or llm_tool_use nodes"] = "[]",
|
||||
routes: Annotated[
|
||||
str, "JSON object mapping conditions to target node IDs for router nodes"
|
||||
] = "{}",
|
||||
client_facing: Annotated[
|
||||
bool, "If True, node streams output to user and blocks for input between turns"
|
||||
] = False,
|
||||
nullable_output_keys: Annotated[
|
||||
str, "JSON array of output keys that may remain unset (for mutually exclusive outputs)"
|
||||
] = "[]",
|
||||
max_node_visits: Annotated[
|
||||
int,
|
||||
"Max times this node executes per graph run. Set >1 for feedback loop targets. 0=unlimited",
|
||||
] = 1,
|
||||
) -> str:
|
||||
"""Add a node to the agent graph. Nodes process inputs and produce outputs."""
|
||||
session = get_session()
|
||||
@@ -539,6 +588,7 @@ def add_node(
|
||||
output_keys_list = json.loads(output_keys)
|
||||
tools_list = json.loads(tools)
|
||||
routes_dict = json.loads(routes)
|
||||
nullable_output_keys_list = json.loads(nullable_output_keys)
|
||||
except json.JSONDecodeError as e:
|
||||
return json.dumps(
|
||||
{
|
||||
@@ -567,6 +617,9 @@ def add_node(
|
||||
system_prompt=system_prompt or None,
|
||||
tools=tools_list,
|
||||
routes=routes_dict,
|
||||
client_facing=client_facing,
|
||||
nullable_output_keys=nullable_output_keys_list,
|
||||
max_node_visits=max_node_visits,
|
||||
)
|
||||
|
||||
session.nodes.append(node)
|
||||
@@ -586,6 +639,26 @@ def add_node(
|
||||
if node_type in ("llm_generate", "llm_tool_use") and not system_prompt:
|
||||
warnings.append(f"LLM node '{node_id}' should have a system_prompt")
|
||||
|
||||
# EventLoopNode validation
|
||||
if node_type == "event_loop" and not system_prompt:
|
||||
warnings.append(f"Event loop node '{node_id}' should have a system_prompt")
|
||||
|
||||
# Deprecated type warnings
|
||||
if node_type in ("llm_generate", "llm_tool_use"):
|
||||
warnings.append(
|
||||
f"Node type '{node_type}' is deprecated. Use 'event_loop' instead. "
|
||||
"EventLoopNode supports tool use, streaming, and judge-based evaluation."
|
||||
)
|
||||
|
||||
# nullable_output_keys must be a subset of output_keys
|
||||
if nullable_output_keys_list:
|
||||
invalid_nullable = [k for k in nullable_output_keys_list if k not in output_keys_list]
|
||||
if invalid_nullable:
|
||||
errors.append(
|
||||
f"nullable_output_keys {invalid_nullable} must be a subset of "
|
||||
f"output_keys {output_keys_list}"
|
||||
)
|
||||
|
||||
_save_session(session) # Auto-save
|
||||
|
||||
return json.dumps(
|
||||
@@ -662,6 +735,7 @@ def add_edge(
|
||||
|
||||
# Validate
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
if not any(n.id == source for n in session.nodes):
|
||||
errors.append(f"Source node '{source}' not found")
|
||||
@@ -670,12 +744,24 @@ def add_edge(
|
||||
if edge_condition == EdgeCondition.CONDITIONAL and not condition_expr:
|
||||
errors.append(f"Conditional edge '{edge_id}' needs condition_expr")
|
||||
|
||||
# Feedback edge validation
|
||||
if priority < 0:
|
||||
target_node = next((n for n in session.nodes if n.id == target), None)
|
||||
if target_node and target_node.max_node_visits <= 1:
|
||||
warnings.append(
|
||||
f"Edge '{edge_id}' has negative priority (feedback edge) "
|
||||
f"targeting '{target}', but node '{target}' has "
|
||||
f"max_node_visits={target_node.max_node_visits}. "
|
||||
"Consider increasing max_node_visits on the target node."
|
||||
)
|
||||
|
||||
_save_session(session) # Auto-save
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"warnings": warnings,
|
||||
"edge": edge.model_dump(),
|
||||
"total_edges": len(session.edges),
|
||||
"approval_required": True,
|
||||
@@ -709,12 +795,23 @@ def update_node(
|
||||
node_id: Annotated[str, "ID of the node to update"],
|
||||
name: Annotated[str, "Updated human-readable name"] = "",
|
||||
description: Annotated[str, "Updated description"] = "",
|
||||
node_type: Annotated[str, "Updated type: llm_generate, llm_tool_use, router, or function"] = "",
|
||||
node_type: Annotated[
|
||||
str,
|
||||
"Updated type: event_loop (recommended), function, router. "
|
||||
"Deprecated: llm_generate, llm_tool_use",
|
||||
] = "",
|
||||
input_keys: Annotated[str, "Updated JSON array of input keys"] = "",
|
||||
output_keys: Annotated[str, "Updated JSON array of output keys"] = "",
|
||||
system_prompt: Annotated[str, "Updated instructions for LLM nodes"] = "",
|
||||
tools: Annotated[str, "Updated JSON array of tool names"] = "",
|
||||
routes: Annotated[str, "Updated JSON object mapping conditions to target node IDs"] = "",
|
||||
client_facing: Annotated[
|
||||
str, "Updated client-facing flag ('true'/'false', empty=no change)"
|
||||
] = "",
|
||||
nullable_output_keys: Annotated[
|
||||
str, "Updated JSON array of nullable output keys (empty=no change)"
|
||||
] = "",
|
||||
max_node_visits: Annotated[int, "Updated max node visits per graph run. 0=no change"] = 0,
|
||||
) -> str:
|
||||
"""Update an existing node in the agent graph. Only provided fields will be updated."""
|
||||
session = get_session()
|
||||
@@ -735,6 +832,9 @@ def update_node(
|
||||
output_keys_list = json.loads(output_keys) if output_keys else None
|
||||
tools_list = json.loads(tools) if tools else None
|
||||
routes_dict = json.loads(routes) if routes else None
|
||||
nullable_output_keys_list = (
|
||||
json.loads(nullable_output_keys) if nullable_output_keys else None
|
||||
)
|
||||
except json.JSONDecodeError as e:
|
||||
return json.dumps(
|
||||
{
|
||||
@@ -767,6 +867,12 @@ def update_node(
|
||||
node.tools = tools_list
|
||||
if routes_dict is not None:
|
||||
node.routes = routes_dict
|
||||
if client_facing:
|
||||
node.client_facing = client_facing.lower() == "true"
|
||||
if nullable_output_keys_list is not None:
|
||||
node.nullable_output_keys = nullable_output_keys_list
|
||||
if max_node_visits > 0:
|
||||
node.max_node_visits = max_node_visits
|
||||
|
||||
# Validate
|
||||
errors = []
|
||||
@@ -779,6 +885,26 @@ def update_node(
|
||||
if node.node_type in ("llm_generate", "llm_tool_use") and not node.system_prompt:
|
||||
warnings.append(f"LLM node '{node_id}' should have a system_prompt")
|
||||
|
||||
# EventLoopNode validation
|
||||
if node.node_type == "event_loop" and not node.system_prompt:
|
||||
warnings.append(f"Event loop node '{node_id}' should have a system_prompt")
|
||||
|
||||
# Deprecated type warnings
|
||||
if node.node_type in ("llm_generate", "llm_tool_use"):
|
||||
warnings.append(
|
||||
f"Node type '{node.node_type}' is deprecated. Use 'event_loop' instead. "
|
||||
"EventLoopNode supports tool use, streaming, and judge-based evaluation."
|
||||
)
|
||||
|
||||
# nullable_output_keys must be a subset of output_keys
|
||||
if node.nullable_output_keys:
|
||||
invalid_nullable = [k for k in node.nullable_output_keys if k not in node.output_keys]
|
||||
if invalid_nullable:
|
||||
errors.append(
|
||||
f"nullable_output_keys {invalid_nullable} must be a subset of "
|
||||
f"output_keys {node.output_keys}"
|
||||
)
|
||||
|
||||
_save_session(session) # Auto-save
|
||||
|
||||
return json.dumps(
|
||||
@@ -979,17 +1105,30 @@ def validate_graph() -> str:
|
||||
errors.append(f"Unreachable nodes: {unreachable}")
|
||||
|
||||
# === CONTEXT FLOW VALIDATION ===
|
||||
# Build dependency map (node_id -> list of nodes it depends on)
|
||||
# Build dependency maps — separate forward edges from feedback edges.
|
||||
# Feedback edges (priority < 0) create cycles; they must not block the
|
||||
# topological sort. Context they carry arrives on *revisits*, not on
|
||||
# the first execution of a node.
|
||||
feedback_edge_ids = {e.id for e in session.edges if e.priority < 0}
|
||||
forward_dependencies: dict[str, list[str]] = {node.id: [] for node in session.nodes}
|
||||
feedback_sources: dict[str, list[str]] = {node.id: [] for node in session.nodes}
|
||||
# Combined map kept for error-message generation (all deps)
|
||||
dependencies: dict[str, list[str]] = {node.id: [] for node in session.nodes}
|
||||
|
||||
for edge in session.edges:
|
||||
if edge.target in dependencies:
|
||||
dependencies[edge.target].append(edge.source)
|
||||
if edge.target not in forward_dependencies:
|
||||
continue
|
||||
dependencies[edge.target].append(edge.source)
|
||||
if edge.id in feedback_edge_ids:
|
||||
feedback_sources[edge.target].append(edge.source)
|
||||
else:
|
||||
forward_dependencies[edge.target].append(edge.source)
|
||||
|
||||
# Build output map (node_id -> keys it produces)
|
||||
node_outputs: dict[str, set[str]] = {node.id: set(node.output_keys) for node in session.nodes}
|
||||
|
||||
# Compute available context for each node (what keys it can read)
|
||||
# Using topological order
|
||||
# Using topological order on the forward-edge DAG
|
||||
available_context: dict[str, set[str]] = {}
|
||||
computed = set()
|
||||
nodes_by_id = {n.id: n for n in session.nodes}
|
||||
@@ -999,7 +1138,8 @@ def validate_graph() -> str:
|
||||
# Entry nodes can only read from initial context
|
||||
initial_context_keys: set[str] = set()
|
||||
|
||||
# Compute in topological order
|
||||
# Compute in topological order (forward edges only — feedback edges
|
||||
# don't block, since their context arrives on revisits)
|
||||
remaining = {n.id for n in session.nodes}
|
||||
max_iterations = len(session.nodes) * 2
|
||||
|
||||
@@ -1008,18 +1148,23 @@ def validate_graph() -> str:
|
||||
break
|
||||
|
||||
for node_id in list(remaining):
|
||||
deps = dependencies.get(node_id, [])
|
||||
fwd_deps = forward_dependencies.get(node_id, [])
|
||||
|
||||
# Can compute if all dependencies are computed (or no dependencies)
|
||||
if all(d in computed for d in deps):
|
||||
# Collect outputs from all dependencies
|
||||
# Can compute if all FORWARD dependencies are computed
|
||||
if all(d in computed for d in fwd_deps):
|
||||
# Collect outputs from all forward dependencies
|
||||
available = set(initial_context_keys)
|
||||
for dep_id in deps:
|
||||
# Add outputs from dependency
|
||||
for dep_id in fwd_deps:
|
||||
available.update(node_outputs.get(dep_id, set()))
|
||||
# Also add what was available to the dependency (transitive)
|
||||
available.update(available_context.get(dep_id, set()))
|
||||
|
||||
# Also include context from already-computed feedback
|
||||
# sources (bonus, not blocking)
|
||||
for fb_src in feedback_sources.get(node_id, []):
|
||||
if fb_src in computed:
|
||||
available.update(node_outputs.get(fb_src, set()))
|
||||
available.update(available_context.get(fb_src, set()))
|
||||
|
||||
available_context[node_id] = available
|
||||
computed.add(node_id)
|
||||
remaining.remove(node_id)
|
||||
@@ -1029,15 +1174,37 @@ def validate_graph() -> str:
|
||||
context_errors = []
|
||||
context_warnings = []
|
||||
missing_inputs: dict[str, list[str]] = {}
|
||||
feedback_only_inputs: dict[str, list[str]] = {}
|
||||
|
||||
for node in session.nodes:
|
||||
available = available_context.get(node.id, set())
|
||||
|
||||
for input_key in node.input_keys:
|
||||
if input_key not in available:
|
||||
if node.id not in missing_inputs:
|
||||
missing_inputs[node.id] = []
|
||||
missing_inputs[node.id].append(input_key)
|
||||
# Check if this input is provided by a feedback source
|
||||
fb_provides = set()
|
||||
for fb_src in feedback_sources.get(node.id, []):
|
||||
fb_provides.update(node_outputs.get(fb_src, set()))
|
||||
fb_provides.update(available_context.get(fb_src, set()))
|
||||
|
||||
if input_key in fb_provides:
|
||||
# Input arrives via feedback edge — warn, don't error
|
||||
if node.id not in feedback_only_inputs:
|
||||
feedback_only_inputs[node.id] = []
|
||||
feedback_only_inputs[node.id].append(input_key)
|
||||
else:
|
||||
if node.id not in missing_inputs:
|
||||
missing_inputs[node.id] = []
|
||||
missing_inputs[node.id].append(input_key)
|
||||
|
||||
# Warn about feedback-only inputs (available on revisits, not first run)
|
||||
for node_id, fb_keys in feedback_only_inputs.items():
|
||||
fb_srcs = feedback_sources.get(node_id, [])
|
||||
context_warnings.append(
|
||||
f"Node '{node_id}' input(s) {fb_keys} are only provided via "
|
||||
f"feedback edge(s) from {fb_srcs}. These will be available on "
|
||||
f"revisits but not on the first execution."
|
||||
)
|
||||
|
||||
# Generate helpful error messages
|
||||
for node_id, missing in missing_inputs.items():
|
||||
@@ -1117,6 +1284,87 @@ def validate_graph() -> str:
|
||||
errors.extend(context_errors)
|
||||
warnings.extend(context_warnings)
|
||||
|
||||
# === EventLoopNode-specific validation ===
|
||||
from collections import defaultdict
|
||||
|
||||
# Detect fan-out: multiple ON_SUCCESS edges from same source
|
||||
outgoing_success: dict[str, list[str]] = defaultdict(list)
|
||||
for edge in session.edges:
|
||||
cond = edge.condition.value if hasattr(edge.condition, "value") else edge.condition
|
||||
if cond == "on_success":
|
||||
outgoing_success[edge.source].append(edge.target)
|
||||
|
||||
for source_id, targets in outgoing_success.items():
|
||||
if len(targets) > 1:
|
||||
# Client-facing fan-out: cannot target multiple client_facing nodes
|
||||
cf_targets = [
|
||||
t for t in targets if any(n.id == t and n.client_facing for n in session.nodes)
|
||||
]
|
||||
if len(cf_targets) > 1:
|
||||
errors.append(
|
||||
f"Fan-out from '{source_id}' targets multiple client_facing "
|
||||
f"nodes: {cf_targets}. Only one branch may be client-facing."
|
||||
)
|
||||
|
||||
# Output key overlap on parallel event_loop nodes
|
||||
el_targets = [
|
||||
t
|
||||
for t in targets
|
||||
if any(n.id == t and n.node_type == "event_loop" for n in session.nodes)
|
||||
]
|
||||
if len(el_targets) > 1:
|
||||
seen_keys: dict[str, str] = {}
|
||||
for nid in el_targets:
|
||||
node_obj = next((n for n in session.nodes if n.id == nid), None)
|
||||
if node_obj:
|
||||
for key in node_obj.output_keys:
|
||||
if key in seen_keys:
|
||||
errors.append(
|
||||
f"Fan-out from '{source_id}': event_loop "
|
||||
f"nodes '{seen_keys[key]}' and '{nid}' both "
|
||||
f"write to output_key '{key}'. Parallel "
|
||||
"nodes must have disjoint output_keys."
|
||||
)
|
||||
else:
|
||||
seen_keys[key] = nid
|
||||
|
||||
# Feedback loop validation: targets should allow re-visits
|
||||
for edge in session.edges:
|
||||
if edge.priority < 0:
|
||||
target_node = next((n for n in session.nodes if n.id == edge.target), None)
|
||||
if target_node and target_node.max_node_visits <= 1:
|
||||
warnings.append(
|
||||
f"Feedback edge '{edge.id}' targets '{edge.target}' "
|
||||
f"which has max_node_visits={target_node.max_node_visits}. "
|
||||
"Consider setting max_node_visits > 1."
|
||||
)
|
||||
|
||||
# nullable_output_keys must be subset of output_keys
|
||||
for node in session.nodes:
|
||||
if node.nullable_output_keys:
|
||||
invalid = [k for k in node.nullable_output_keys if k not in node.output_keys]
|
||||
if invalid:
|
||||
errors.append(
|
||||
f"Node '{node.id}': nullable_output_keys {invalid} "
|
||||
f"must be a subset of output_keys {node.output_keys}"
|
||||
)
|
||||
|
||||
# Deprecated node type warnings
|
||||
deprecated_nodes = [
|
||||
{"node_id": n.id, "type": n.node_type, "replacement": "event_loop"}
|
||||
for n in session.nodes
|
||||
if n.node_type in ("llm_generate", "llm_tool_use")
|
||||
]
|
||||
for dn in deprecated_nodes:
|
||||
warnings.append(
|
||||
f"Node '{dn['node_id']}' uses deprecated type '{dn['type']}'. Use 'event_loop' instead."
|
||||
)
|
||||
|
||||
# Collect summary info
|
||||
event_loop_nodes = [n.id for n in session.nodes if n.node_type == "event_loop"]
|
||||
client_facing_nodes = [n.id for n in session.nodes if n.client_facing]
|
||||
feedback_edges = [e.id for e in session.edges if e.priority < 0]
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"valid": len(errors) == 0,
|
||||
@@ -1133,6 +1381,10 @@ def validate_graph() -> str:
|
||||
"context_flow": {node_id: list(keys) for node_id, keys in available_context.items()}
|
||||
if available_context
|
||||
else None,
|
||||
"event_loop_nodes": event_loop_nodes,
|
||||
"client_facing_nodes": client_facing_nodes,
|
||||
"feedback_edges": feedback_edges,
|
||||
"deprecated_node_types": deprecated_nodes,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1183,6 +1435,12 @@ def _generate_readme(session: BuildSession, export_data: dict, all_tools: set) -
|
||||
if node.routes:
|
||||
routes_str = ", ".join([f"{k}→{v}" for k, v in node.routes.items()])
|
||||
node_info.append(f" - Routes: {routes_str}")
|
||||
if node.client_facing:
|
||||
node_info.append(" - Client-facing: Yes (blocks for user input)")
|
||||
if node.nullable_output_keys:
|
||||
node_info.append(f" - Nullable outputs: `{', '.join(node.nullable_output_keys)}`")
|
||||
if node.max_node_visits > 1:
|
||||
node_info.append(f" - Max visits: {node.max_node_visits}")
|
||||
nodes_section.append("\n".join(node_info))
|
||||
|
||||
# Build success criteria section
|
||||
@@ -1236,7 +1494,12 @@ def _generate_readme(session: BuildSession, export_data: dict, all_tools: set) -
|
||||
|
||||
for edge in edges:
|
||||
cond = edge.condition.value if hasattr(edge.condition, "value") else edge.condition
|
||||
readme += f"- `{edge.source}` → `{edge.target}` (condition: {cond})\n"
|
||||
priority_note = f", priority={edge.priority}" if edge.priority != 0 else ""
|
||||
feedback_note = " **[FEEDBACK]**" if edge.priority < 0 else ""
|
||||
readme += (
|
||||
f"- `{edge.source}` → `{edge.target}` "
|
||||
f"(condition: {cond}{priority_note}){feedback_note}\n"
|
||||
)
|
||||
|
||||
readme += f"""
|
||||
|
||||
@@ -1451,6 +1714,10 @@ def export_graph() -> str:
|
||||
"created_at": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
# Include loop config if configured
|
||||
if session.loop_config:
|
||||
graph_spec["loop_config"] = session.loop_config
|
||||
|
||||
# Collect all tools referenced by nodes
|
||||
all_tools = set()
|
||||
for node in session.nodes:
|
||||
@@ -1566,6 +1833,50 @@ def get_session_status() -> str:
|
||||
"nodes": [n.id for n in session.nodes],
|
||||
"edges": [(e.source, e.target) for e in session.edges],
|
||||
"mcp_servers": [s["name"] for s in session.mcp_servers],
|
||||
"event_loop_nodes": [n.id for n in session.nodes if n.node_type == "event_loop"],
|
||||
"client_facing_nodes": [n.id for n in session.nodes if n.client_facing],
|
||||
"deprecated_nodes": [
|
||||
n.id for n in session.nodes if n.node_type in ("llm_generate", "llm_tool_use")
|
||||
],
|
||||
"feedback_edges": [e.id for e in session.edges if e.priority < 0],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def configure_loop(
|
||||
max_iterations: Annotated[int, "Maximum loop iterations per node execution (default 50)"] = 50,
|
||||
max_tool_calls_per_turn: Annotated[int, "Maximum tool calls per LLM turn (default 10)"] = 10,
|
||||
stall_detection_threshold: Annotated[
|
||||
int, "Consecutive identical responses before stall detection triggers (default 3)"
|
||||
] = 3,
|
||||
max_history_tokens: Annotated[
|
||||
int, "Maximum conversation history tokens before compaction (default 32000)"
|
||||
] = 32000,
|
||||
) -> str:
|
||||
"""Configure event loop parameters for EventLoopNode execution.
|
||||
|
||||
These settings control how EventLoopNodes behave at runtime:
|
||||
- max_iterations: prevents infinite loops
|
||||
- max_tool_calls_per_turn: limits tool calls per LLM response
|
||||
- stall_detection_threshold: detects when LLM repeats itself
|
||||
- max_history_tokens: triggers conversation compaction
|
||||
"""
|
||||
session = get_session()
|
||||
|
||||
session.loop_config = {
|
||||
"max_iterations": max_iterations,
|
||||
"max_tool_calls_per_turn": max_tool_calls_per_turn,
|
||||
"stall_detection_threshold": stall_detection_threshold,
|
||||
"max_history_tokens": max_history_tokens,
|
||||
}
|
||||
|
||||
_save_session(session)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"loop_config": session.loop_config,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1861,10 +2172,41 @@ def test_node(
|
||||
result["routing_options"] = node_spec.routes
|
||||
result["simulation"] = "Router would evaluate routes based on input and select target node"
|
||||
|
||||
elif node_spec.node_type in ("llm_generate", "llm_tool_use"):
|
||||
# Show what prompt would be sent
|
||||
elif node_spec.node_type == "event_loop":
|
||||
# EventLoopNode simulation
|
||||
result["system_prompt"] = node_spec.system_prompt
|
||||
result["available_tools"] = node_spec.tools
|
||||
result["client_facing"] = node_spec.client_facing
|
||||
result["nullable_output_keys"] = node_spec.nullable_output_keys
|
||||
result["max_node_visits"] = node_spec.max_node_visits
|
||||
|
||||
if mock_llm_response:
|
||||
result["mock_response"] = mock_llm_response
|
||||
result["simulation"] = (
|
||||
"EventLoopNode would run a multi-turn streaming loop. "
|
||||
"Each iteration: LLM call -> tool execution -> judge evaluation. "
|
||||
"Loop continues until judge ACCEPTs or max_iterations reached."
|
||||
)
|
||||
else:
|
||||
cf_note = (
|
||||
"Node is client-facing: will block for user input between turns. "
|
||||
if node_spec.client_facing
|
||||
else ""
|
||||
)
|
||||
result["simulation"] = (
|
||||
"EventLoopNode would stream LLM responses, execute tool calls, "
|
||||
"and use judge evaluation to decide when to stop. "
|
||||
+ cf_note
|
||||
+ f"Max visits per graph run: {node_spec.max_node_visits}."
|
||||
)
|
||||
|
||||
elif node_spec.node_type in ("llm_generate", "llm_tool_use"):
|
||||
# Legacy LLM node types
|
||||
result["system_prompt"] = node_spec.system_prompt
|
||||
result["available_tools"] = node_spec.tools
|
||||
result["deprecation_warning"] = (
|
||||
f"Node type '{node_spec.node_type}' is deprecated. Use 'event_loop' instead."
|
||||
)
|
||||
|
||||
if mock_llm_response:
|
||||
result["mock_response"] = mock_llm_response
|
||||
@@ -1879,6 +2221,7 @@ def test_node(
|
||||
result["expected_memory_state"] = {
|
||||
"inputs_available": {k: input_data.get(k, "<not provided>") for k in node_spec.input_keys},
|
||||
"outputs_to_write": node_spec.output_keys,
|
||||
"nullable_outputs": node_spec.nullable_output_keys or [],
|
||||
}
|
||||
|
||||
return json.dumps(
|
||||
@@ -1967,13 +2310,19 @@ def test_graph(
|
||||
"writes": current_node.output_keys,
|
||||
}
|
||||
|
||||
if current_node.node_type in ("llm_generate", "llm_tool_use"):
|
||||
if current_node.node_type in ("llm_generate", "llm_tool_use", "event_loop"):
|
||||
step_info["prompt_preview"] = (
|
||||
current_node.system_prompt[:200] + "..."
|
||||
if current_node.system_prompt and len(current_node.system_prompt) > 200
|
||||
else current_node.system_prompt
|
||||
)
|
||||
step_info["tools_available"] = current_node.tools
|
||||
if current_node.node_type == "event_loop":
|
||||
step_info["event_loop_config"] = {
|
||||
"client_facing": current_node.client_facing,
|
||||
"max_node_visits": current_node.max_node_visits,
|
||||
"nullable_output_keys": current_node.nullable_output_keys,
|
||||
}
|
||||
|
||||
execution_trace.append(step_info)
|
||||
|
||||
@@ -1982,16 +2331,32 @@ def test_graph(
|
||||
step_info["is_terminal"] = True
|
||||
break
|
||||
|
||||
# Find next node via edges
|
||||
# Find next node via edges (sorted by priority, highest first)
|
||||
outgoing = sorted(
|
||||
[e for e in session.edges if e.source == current_node_id],
|
||||
key=lambda e: -e.priority,
|
||||
)
|
||||
next_node = None
|
||||
for edge in session.edges:
|
||||
if edge.source == current_node_id:
|
||||
# In dry run, assume success path
|
||||
if edge.condition.value in ("always", "on_success"):
|
||||
next_node = edge.target
|
||||
step_info["next_node"] = next_node
|
||||
step_info["edge_condition"] = edge.condition.value
|
||||
break
|
||||
for edge in outgoing:
|
||||
# In dry run, follow success/always edges (highest priority first)
|
||||
if edge.condition.value in ("always", "on_success"):
|
||||
next_node = edge.target
|
||||
step_info["next_node"] = next_node
|
||||
step_info["edge_condition"] = edge.condition.value
|
||||
step_info["edge_priority"] = edge.priority
|
||||
break
|
||||
|
||||
# Note any feedback edges from this node
|
||||
feedback = [e for e in outgoing if e.priority < 0]
|
||||
if feedback:
|
||||
step_info["feedback_edges"] = [
|
||||
{
|
||||
"target": e.target,
|
||||
"condition_expr": e.condition_expr,
|
||||
"priority": e.priority,
|
||||
}
|
||||
for e in feedback
|
||||
]
|
||||
|
||||
if next_node is None:
|
||||
step_info["note"] = "No outgoing edge found (end of path)"
|
||||
@@ -2597,10 +2962,11 @@ def generate_constraint_tests(
|
||||
if not agent_path and _session:
|
||||
agent_path = f"exports/{_session.name}"
|
||||
|
||||
if not agent_path:
|
||||
return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
agent_module = _get_agent_module_from_path(agent_path)
|
||||
agent_module = _get_agent_module_from_path(path)
|
||||
|
||||
# Format constraints for display
|
||||
constraints_formatted = (
|
||||
@@ -2619,9 +2985,9 @@ def generate_constraint_tests(
|
||||
return json.dumps(
|
||||
{
|
||||
"goal_id": goal_id,
|
||||
"agent_path": agent_path,
|
||||
"agent_path": str(path),
|
||||
"agent_module": agent_module,
|
||||
"output_file": f"{agent_path}/tests/test_constraints.py",
|
||||
"output_file": f"{str(path)}/tests/test_constraints.py",
|
||||
"constraints": [c.model_dump() for c in goal.constraints] if goal.constraints else [],
|
||||
"constraints_formatted": constraints_formatted,
|
||||
"test_guidelines": {
|
||||
@@ -2677,10 +3043,11 @@ def generate_success_tests(
|
||||
if not agent_path and _session:
|
||||
agent_path = f"exports/{_session.name}"
|
||||
|
||||
if not agent_path:
|
||||
return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
agent_module = _get_agent_module_from_path(agent_path)
|
||||
agent_module = _get_agent_module_from_path(path)
|
||||
|
||||
# Parse node/tool names for context
|
||||
nodes = [n.strip() for n in node_names.split(",") if n.strip()]
|
||||
@@ -2705,9 +3072,9 @@ def generate_success_tests(
|
||||
return json.dumps(
|
||||
{
|
||||
"goal_id": goal_id,
|
||||
"agent_path": agent_path,
|
||||
"agent_path": str(path),
|
||||
"agent_module": agent_module,
|
||||
"output_file": f"{agent_path}/tests/test_success_criteria.py",
|
||||
"output_file": f"{str(path)}/tests/test_success_criteria.py",
|
||||
"success_criteria": [c.model_dump() for c in goal.success_criteria]
|
||||
if goal.success_criteria
|
||||
else [],
|
||||
@@ -2766,7 +3133,11 @@ def run_tests(
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
tests_dir = Path(agent_path) / "tests"
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
tests_dir = path / "tests"
|
||||
|
||||
if not tests_dir.exists():
|
||||
return json.dumps(
|
||||
@@ -2957,10 +3328,11 @@ def debug_test(
|
||||
if not agent_path and _session:
|
||||
agent_path = f"exports/{_session.name}"
|
||||
|
||||
if not agent_path:
|
||||
return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
tests_dir = Path(agent_path) / "tests"
|
||||
tests_dir = path / "tests"
|
||||
|
||||
if not tests_dir.exists():
|
||||
return json.dumps(
|
||||
@@ -3101,10 +3473,11 @@ def list_tests(
|
||||
if not agent_path and _session:
|
||||
agent_path = f"exports/{_session.name}"
|
||||
|
||||
if not agent_path:
|
||||
return json.dumps({"error": "agent_path required (e.g., 'exports/my_agent')"})
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
tests_dir = Path(agent_path) / "tests"
|
||||
tests_dir = path / "tests"
|
||||
|
||||
if not tests_dir.exists():
|
||||
return json.dumps(
|
||||
@@ -3379,7 +3752,7 @@ def store_credential(
|
||||
display_name: Annotated[str, "Human-readable name (e.g., 'HubSpot Access Token')"] = "",
|
||||
) -> str:
|
||||
"""
|
||||
Store a credential securely in the encrypted credential store at ~/.hive/credentials.
|
||||
Store a credential securely in the local encrypted store at ~/.hive/credentials.
|
||||
|
||||
Uses Fernet encryption (AES-128-CBC + HMAC). Requires HIVE_CREDENTIAL_KEY env var.
|
||||
"""
|
||||
@@ -3421,7 +3794,7 @@ def store_credential(
|
||||
@mcp.tool()
|
||||
def list_stored_credentials() -> str:
|
||||
"""
|
||||
List all credentials currently stored in the encrypted credential store.
|
||||
List all credentials currently stored in the local encrypted store.
|
||||
|
||||
Returns credential IDs and metadata (never returns secret values).
|
||||
"""
|
||||
@@ -3461,7 +3834,7 @@ def delete_stored_credential(
|
||||
credential_name: Annotated[str, "Logical credential name to delete (e.g., 'hubspot')"],
|
||||
) -> str:
|
||||
"""
|
||||
Delete a credential from the encrypted credential store.
|
||||
Delete a credential from the local encrypted store.
|
||||
"""
|
||||
try:
|
||||
store = _get_credential_store()
|
||||
|
||||
@@ -56,6 +56,18 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
action="store_true",
|
||||
help="Show detailed execution logs (steps, LLM calls, etc.)",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
help="Launch interactive terminal dashboard",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--model",
|
||||
"-m",
|
||||
type=str,
|
||||
default=None,
|
||||
help="LLM model to use (any LiteLLM-compatible name)",
|
||||
)
|
||||
run_parser.set_defaults(func=cmd_run)
|
||||
|
||||
# info command
|
||||
@@ -205,38 +217,83 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
print(f"Error reading input file: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Load and run agent
|
||||
try:
|
||||
runner = AgentRunner.load(
|
||||
args.agent_path,
|
||||
mock_mode=args.mock,
|
||||
model=getattr(args, "model", "claude-haiku-4-5-20251001"),
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
# Run the agent (with TUI or standard)
|
||||
if getattr(args, "tui", False):
|
||||
from framework.tui.app import AdenTUI
|
||||
|
||||
# Auto-inject user_id if the agent expects it but it's not provided
|
||||
entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
|
||||
if "user_id" in entry_input_keys and context.get("user_id") is None:
|
||||
import os
|
||||
async def run_with_tui():
|
||||
try:
|
||||
# Load runner inside the async loop to ensure strict loop affinity
|
||||
# (only one load — avoids spawning duplicate MCP subprocesses)
|
||||
try:
|
||||
runner = AgentRunner.load(
|
||||
args.agent_path,
|
||||
mock_mode=args.mock,
|
||||
model=args.model,
|
||||
enable_tui=True,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error loading agent: {e}")
|
||||
return
|
||||
|
||||
context["user_id"] = os.environ.get("USER", "default_user")
|
||||
# Force setup inside the loop
|
||||
if runner._agent_runtime is None:
|
||||
runner._setup()
|
||||
|
||||
if not args.quiet:
|
||||
info = runner.info()
|
||||
print(f"Agent: {info.name}")
|
||||
print(f"Goal: {info.goal_name}")
|
||||
print(f"Steps: {info.node_count}")
|
||||
print(f"Input: {json.dumps(context)}")
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("Executing agent...")
|
||||
print("=" * 60)
|
||||
print()
|
||||
# Start runtime before TUI so it's ready for user input
|
||||
if runner._agent_runtime and not runner._agent_runtime.is_running:
|
||||
await runner._agent_runtime.start()
|
||||
|
||||
# Run the agent
|
||||
result = asyncio.run(runner.run(context))
|
||||
app = AdenTUI(runner._agent_runtime)
|
||||
|
||||
# TUI handles execution via ChatRepl — user submits input,
|
||||
# ChatRepl calls runtime.trigger_and_wait(). No auto-launch.
|
||||
await app.run_async()
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print(f"TUI error: {e}")
|
||||
|
||||
await runner.cleanup_async()
|
||||
return None
|
||||
|
||||
asyncio.run(run_with_tui())
|
||||
print("TUI session ended.")
|
||||
return 0
|
||||
else:
|
||||
# Standard execution — load runner here (not shared with TUI path)
|
||||
try:
|
||||
runner = AgentRunner.load(
|
||||
args.agent_path,
|
||||
mock_mode=args.mock,
|
||||
model=args.model,
|
||||
enable_tui=False,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Auto-inject user_id if the agent expects it but it's not provided
|
||||
entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
|
||||
if "user_id" in entry_input_keys and context.get("user_id") is None:
|
||||
import os
|
||||
|
||||
context["user_id"] = os.environ.get("USER", "default_user")
|
||||
|
||||
if not args.quiet:
|
||||
info = runner.info()
|
||||
print(f"Agent: {info.name}")
|
||||
print(f"Goal: {info.goal_name}")
|
||||
print(f"Steps: {info.node_count}")
|
||||
print(f"Input: {json.dumps(context)}")
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("Executing agent...")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
result = asyncio.run(runner.run(context))
|
||||
|
||||
# Format output
|
||||
output = {
|
||||
|
||||
@@ -362,6 +362,15 @@ class MCPClient:
|
||||
# Call tool using persistent session
|
||||
result = await self._session.call_tool(tool_name, arguments=arguments)
|
||||
|
||||
# Check for server-side errors (validation failures, tool exceptions, etc.)
|
||||
if getattr(result, "isError", False):
|
||||
error_text = ""
|
||||
if result.content:
|
||||
content_item = result.content[0]
|
||||
if hasattr(content_item, "text"):
|
||||
error_text = content_item.text
|
||||
raise RuntimeError(f"MCP tool '{tool_name}' failed: {error_text}")
|
||||
|
||||
# Extract content
|
||||
if result.content:
|
||||
# MCP returns content as a list of content items
|
||||
|
||||
+214
-40
@@ -28,6 +28,33 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration paths
|
||||
HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
|
||||
|
||||
|
||||
def _ensure_credential_key_env() -> None:
|
||||
"""Load HIVE_CREDENTIAL_KEY from shell config if not already in environment.
|
||||
|
||||
The setup-credentials skill writes the encryption key to ~/.zshrc or ~/.bashrc.
|
||||
If the user hasn't sourced their config in the current shell, this reads it
|
||||
directly so the runner (and any MCP subprocesses it spawns) can unlock the
|
||||
encrypted credential store.
|
||||
|
||||
Only HIVE_CREDENTIAL_KEY is loaded this way — all other secrets (API keys, etc.)
|
||||
come from the credential store itself.
|
||||
"""
|
||||
if os.environ.get("HIVE_CREDENTIAL_KEY"):
|
||||
return
|
||||
|
||||
try:
|
||||
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
|
||||
|
||||
found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
|
||||
if found and value:
|
||||
os.environ["HIVE_CREDENTIAL_KEY"] = value
|
||||
logger.debug("Loaded HIVE_CREDENTIAL_KEY from shell config")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
|
||||
|
||||
|
||||
@@ -236,6 +263,15 @@ class AgentRunner:
|
||||
result = await runner.run({"lead_id": "123"})
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _resolve_default_model() -> str:
|
||||
"""Resolve the default model from ~/.hive/configuration.json."""
|
||||
config = get_hive_config()
|
||||
llm = config.get("llm", {})
|
||||
if llm.get("provider") and llm.get("model"):
|
||||
return f"{llm['provider']}/{llm['model']}"
|
||||
return "anthropic/claude-sonnet-4-20250514"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_path: Path,
|
||||
@@ -243,7 +279,8 @@ class AgentRunner:
|
||||
goal: Goal,
|
||||
mock_mode: bool = False,
|
||||
storage_path: Path | None = None,
|
||||
model: str = "cerebras/zai-glm-4.7",
|
||||
model: str | None = None,
|
||||
enable_tui: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize the runner (use AgentRunner.load() instead).
|
||||
@@ -254,14 +291,15 @@ class AgentRunner:
|
||||
goal: Loaded Goal object
|
||||
mock_mode: If True, use mock LLM responses
|
||||
storage_path: Path for runtime storage (defaults to temp)
|
||||
model: Model to use - any LiteLLM-compatible model name
|
||||
(e.g., "claude-sonnet-4-20250514", "gpt-4o-mini", "gemini/gemini-pro")
|
||||
model: Model to use (reads from agent config or ~/.hive/configuration.json if None)
|
||||
enable_tui: If True, forces use of AgentRuntime with EventBus
|
||||
"""
|
||||
self.agent_path = agent_path
|
||||
self.graph = graph
|
||||
self.goal = goal
|
||||
self.mock_mode = mock_mode
|
||||
self.model = model
|
||||
self.model = model or self._resolve_default_model()
|
||||
self.enable_tui = enable_tui
|
||||
|
||||
# Set up storage
|
||||
if storage_path:
|
||||
@@ -275,6 +313,10 @@ class AgentRunner:
|
||||
self._storage_path = default_storage
|
||||
self._temp_dir = None
|
||||
|
||||
# Load HIVE_CREDENTIAL_KEY from shell config if not in env.
|
||||
# Must happen before MCP subprocesses are spawned so they inherit it.
|
||||
_ensure_credential_key_env()
|
||||
|
||||
# Initialize components
|
||||
self._tool_registry = ToolRegistry()
|
||||
self._runtime: Runtime | None = None
|
||||
@@ -296,32 +338,121 @@ class AgentRunner:
|
||||
if mcp_config_path.exists():
|
||||
self._load_mcp_servers_from_config(mcp_config_path)
|
||||
|
||||
@staticmethod
|
||||
def _import_agent_module(agent_path: Path):
|
||||
"""Import an agent package from its directory path.
|
||||
|
||||
Tries package import first (works when exports/ is on sys.path,
|
||||
which cli.py:_configure_paths() ensures). Falls back to direct
|
||||
file import of agent.py via importlib.util.
|
||||
"""
|
||||
import importlib
|
||||
|
||||
package_name = agent_path.name
|
||||
|
||||
# Try importing as a package (works when exports/ is on sys.path)
|
||||
try:
|
||||
return importlib.import_module(package_name)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Fallback: import agent.py directly via file path
|
||||
import importlib.util
|
||||
|
||||
agent_py = agent_path / "agent.py"
|
||||
if not agent_py.exists():
|
||||
raise FileNotFoundError(
|
||||
f"No importable agent found at {agent_path}. "
|
||||
f"Expected a Python package with agent.py."
|
||||
)
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"{package_name}.agent",
|
||||
agent_py,
|
||||
submodule_search_locations=[str(agent_path)],
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
@classmethod
|
||||
def load(
|
||||
cls,
|
||||
agent_path: str | Path,
|
||||
mock_mode: bool = False,
|
||||
storage_path: Path | None = None,
|
||||
model: str = "cerebras/zai-glm-4.7",
|
||||
model: str | None = None,
|
||||
enable_tui: bool = False,
|
||||
) -> "AgentRunner":
|
||||
"""
|
||||
Load an agent from an export folder.
|
||||
|
||||
Imports the agent's Python package and reads module-level variables
|
||||
(goal, nodes, edges, etc.) to build a GraphSpec. Falls back to
|
||||
agent.json if no Python module is found.
|
||||
|
||||
Args:
|
||||
agent_path: Path to agent folder (containing agent.json)
|
||||
agent_path: Path to agent folder
|
||||
mock_mode: If True, use mock LLM responses
|
||||
storage_path: Path for runtime storage (defaults to temp)
|
||||
model: LLM model to use (any LiteLLM-compatible model name)
|
||||
storage_path: Path for runtime storage (defaults to ~/.hive/storage/{name})
|
||||
model: LLM model to use (reads from agent's default_config if None)
|
||||
enable_tui: If True, forces use of AgentRuntime with EventBus
|
||||
|
||||
Returns:
|
||||
AgentRunner instance ready to run
|
||||
"""
|
||||
agent_path = Path(agent_path)
|
||||
|
||||
# Load agent.json
|
||||
# Try loading from Python module first (code-based agents)
|
||||
agent_py = agent_path / "agent.py"
|
||||
if agent_py.exists():
|
||||
agent_module = cls._import_agent_module(agent_path)
|
||||
|
||||
goal = getattr(agent_module, "goal", None)
|
||||
nodes = getattr(agent_module, "nodes", None)
|
||||
edges = getattr(agent_module, "edges", None)
|
||||
|
||||
if goal is None or nodes is None or edges is None:
|
||||
raise ValueError(
|
||||
f"Agent at {agent_path} must define 'goal', 'nodes', and 'edges' "
|
||||
f"in agent.py (or __init__.py)"
|
||||
)
|
||||
|
||||
# Read model and max_tokens from agent's config if not explicitly provided
|
||||
agent_config = getattr(agent_module, "default_config", None)
|
||||
if model is None:
|
||||
if agent_config and hasattr(agent_config, "model"):
|
||||
model = agent_config.model
|
||||
|
||||
max_tokens = getattr(agent_config, "max_tokens", 1024) if agent_config else 1024
|
||||
|
||||
# Build GraphSpec from module-level variables
|
||||
graph = GraphSpec(
|
||||
id=f"{agent_path.name}-graph",
|
||||
goal_id=goal.id,
|
||||
version="1.0.0",
|
||||
entry_node=getattr(agent_module, "entry_node", nodes[0].id),
|
||||
entry_points=getattr(agent_module, "entry_points", {}),
|
||||
terminal_nodes=getattr(agent_module, "terminal_nodes", []),
|
||||
pause_nodes=getattr(agent_module, "pause_nodes", []),
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
return cls(
|
||||
agent_path=agent_path,
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
mock_mode=mock_mode,
|
||||
storage_path=storage_path,
|
||||
model=model,
|
||||
enable_tui=enable_tui,
|
||||
)
|
||||
|
||||
# Fallback: load from agent.json (legacy JSON-based agents)
|
||||
agent_json_path = agent_path / "agent.json"
|
||||
if not agent_json_path.exists():
|
||||
raise FileNotFoundError(f"agent.json not found in {agent_path}")
|
||||
raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
|
||||
|
||||
with open(agent_json_path) as f:
|
||||
graph, goal = load_agent_export(f.read())
|
||||
@@ -333,6 +464,7 @@ class AgentRunner:
|
||||
mock_mode=mock_mode,
|
||||
storage_path=storage_path,
|
||||
model=model,
|
||||
enable_tui=enable_tui,
|
||||
)
|
||||
|
||||
def register_tool(
|
||||
@@ -411,25 +543,8 @@ class AgentRunner:
|
||||
return self._tool_registry.register_mcp_server(server_config)
|
||||
|
||||
def _load_mcp_servers_from_config(self, config_path: Path) -> None:
|
||||
"""
|
||||
Load and register MCP servers from a configuration file.
|
||||
|
||||
Args:
|
||||
config_path: Path to mcp_servers.json file
|
||||
"""
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
|
||||
servers = config.get("servers", [])
|
||||
for server_config in servers:
|
||||
try:
|
||||
self._tool_registry.register_mcp_server(server_config)
|
||||
except Exception as e:
|
||||
server_name = server_config.get("name", "unknown")
|
||||
logger.warning(f"Failed to register MCP server '{server_name}': {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load MCP servers config from {config_path}: {e}")
|
||||
"""Load and register MCP servers from a configuration file."""
|
||||
self._tool_registry.load_mcp_config(config_path)
|
||||
|
||||
def set_approval_callback(self, callback: Callable) -> None:
|
||||
"""
|
||||
@@ -488,16 +603,25 @@ class AgentRunner:
|
||||
api_key_env = self._get_api_key_env_var(self.model)
|
||||
if api_key_env and os.environ.get(api_key_env):
|
||||
self._llm = LiteLLMProvider(model=self.model)
|
||||
elif api_key_env:
|
||||
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
|
||||
print(f"Set it with: export {api_key_env}=your-api-key")
|
||||
else:
|
||||
# Fall back to credential store
|
||||
api_key = self._get_api_key_from_credential_store()
|
||||
if api_key:
|
||||
self._llm = LiteLLMProvider(model=self.model, api_key=api_key)
|
||||
# Set env var so downstream code (e.g. cleanup LLM in
|
||||
# node._extract_json) can also find it
|
||||
if api_key_env:
|
||||
os.environ[api_key_env] = api_key
|
||||
elif api_key_env:
|
||||
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
|
||||
print(f"Set it with: export {api_key_env}=your-api-key")
|
||||
|
||||
# Get tools for executor/runtime
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
|
||||
if self._uses_async_entry_points:
|
||||
# Multi-entry-point mode: use AgentRuntime
|
||||
if self._uses_async_entry_points or self.enable_tui:
|
||||
# Multi-entry-point mode or TUI mode: use AgentRuntime
|
||||
self._setup_agent_runtime(tools, tool_executor)
|
||||
else:
|
||||
# Single-entry-point mode: use legacy GraphExecutor
|
||||
@@ -535,6 +659,33 @@ class AgentRunner:
|
||||
# Default: assume OpenAI-compatible
|
||||
return "OPENAI_API_KEY"
|
||||
|
||||
def _get_api_key_from_credential_store(self) -> str | None:
|
||||
"""Get the LLM API key from the encrypted credential store.
|
||||
|
||||
Maps model name to credential store ID (e.g. "anthropic/..." -> "anthropic")
|
||||
and retrieves the key via CredentialStore.get().
|
||||
"""
|
||||
if not os.environ.get("HIVE_CREDENTIAL_KEY"):
|
||||
return None
|
||||
|
||||
# Map model prefix to credential store ID
|
||||
model_lower = self.model.lower()
|
||||
cred_id = None
|
||||
if model_lower.startswith("anthropic/") or model_lower.startswith("claude"):
|
||||
cred_id = "anthropic"
|
||||
# Add more mappings as providers are added to LLM_CREDENTIALS
|
||||
|
||||
if cred_id is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
from framework.credentials import CredentialStore
|
||||
|
||||
store = CredentialStore.with_encrypted_storage()
|
||||
return store.get(cred_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _setup_legacy_executor(self, tools: list, tool_executor: Callable | None) -> None:
|
||||
"""Set up legacy single-entry-point execution using GraphExecutor."""
|
||||
# Create runtime
|
||||
@@ -566,6 +717,19 @@ class AgentRunner:
|
||||
)
|
||||
entry_points.append(ep)
|
||||
|
||||
# If TUI enabled but no entry points (single-entry agent), create default
|
||||
if not entry_points and self.enable_tui and self.graph.entry_node:
|
||||
logger.info("Creating default entry point for TUI")
|
||||
entry_points.append(
|
||||
EntryPointSpec(
|
||||
id="default",
|
||||
name="Default",
|
||||
entry_node=self.graph.entry_node,
|
||||
trigger_type="manual",
|
||||
isolation_level="shared",
|
||||
)
|
||||
)
|
||||
|
||||
# Create AgentRuntime with all entry points
|
||||
self._agent_runtime = create_agent_runtime(
|
||||
graph=self.graph,
|
||||
@@ -616,7 +780,7 @@ class AgentRunner:
|
||||
error=error_msg,
|
||||
)
|
||||
|
||||
if self._uses_async_entry_points:
|
||||
if self._uses_async_entry_points or self.enable_tui:
|
||||
# Multi-entry-point mode: use AgentRuntime
|
||||
return await self._run_with_agent_runtime(
|
||||
input_data=input_data or {},
|
||||
@@ -908,15 +1072,25 @@ class AgentRunner:
|
||||
EnvVarStorage,
|
||||
)
|
||||
|
||||
# Build env mapping for fallback
|
||||
# Build env mapping for credential lookup
|
||||
env_mapping = {
|
||||
(spec.credential_id or name): spec.env_var
|
||||
for name, spec in CREDENTIAL_SPECS.items()
|
||||
}
|
||||
storage = CompositeStorage(
|
||||
primary=EncryptedFileStorage(),
|
||||
fallbacks=[EnvVarStorage(env_mapping=env_mapping)],
|
||||
)
|
||||
|
||||
# Only use EncryptedFileStorage if the encryption key is configured;
|
||||
# otherwise just check env vars (avoids generating a throwaway key)
|
||||
storages: list = [EnvVarStorage(env_mapping=env_mapping)]
|
||||
if os.environ.get("HIVE_CREDENTIAL_KEY"):
|
||||
storages.insert(0, EncryptedFileStorage())
|
||||
|
||||
if len(storages) == 1:
|
||||
storage = storages[0]
|
||||
else:
|
||||
storage = CompositeStorage(
|
||||
primary=storages[0],
|
||||
fallbacks=storages[1:],
|
||||
)
|
||||
store = CredentialStore(storage=storage)
|
||||
|
||||
# Build reverse mappings
|
||||
|
||||
@@ -33,6 +33,11 @@ class ToolRegistry:
|
||||
4. Manually registered tools
|
||||
"""
|
||||
|
||||
# Framework-internal context keys injected into tool calls.
|
||||
# Stripped from LLM-facing schemas (the LLM doesn't know these values)
|
||||
# and auto-injected at call time for tools that accept them.
|
||||
CONTEXT_PARAMS = frozenset({"workspace_id", "agent_id", "session_id"})
|
||||
|
||||
def __init__(self):
|
||||
self._tools: dict[str, RegisteredTool] = {}
|
||||
self._mcp_clients: list[Any] = [] # List of MCPClient instances
|
||||
@@ -257,6 +262,43 @@ class ToolRegistry:
|
||||
"""
|
||||
self._session_context.update(context)
|
||||
|
||||
def load_mcp_config(self, config_path: Path) -> None:
|
||||
"""
|
||||
Load and register MCP servers from a config file.
|
||||
|
||||
Resolves relative ``cwd`` paths against the config file's parent
|
||||
directory so callers never need to handle path resolution themselves.
|
||||
|
||||
Args:
|
||||
config_path: Path to an ``mcp_servers.json`` file.
|
||||
"""
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load MCP config from {config_path}: {e}")
|
||||
return
|
||||
|
||||
base_dir = config_path.parent
|
||||
|
||||
# Support both formats:
|
||||
# {"servers": [{"name": "x", ...}]} (list format)
|
||||
# {"server-name": {"transport": ...}, ...} (dict format)
|
||||
server_list = config.get("servers", [])
|
||||
if not server_list and "servers" not in config:
|
||||
# Treat top-level keys as server names
|
||||
server_list = [{"name": name, **cfg} for name, cfg in config.items()]
|
||||
|
||||
for server_config in server_list:
|
||||
cwd = server_config.get("cwd")
|
||||
if cwd and not Path(cwd).is_absolute():
|
||||
server_config["cwd"] = str((base_dir / cwd).resolve())
|
||||
try:
|
||||
self.register_mcp_server(server_config)
|
||||
except Exception as e:
|
||||
name = server_config.get("name", "unknown")
|
||||
logger.warning(f"Failed to register MCP server '{name}': {e}")
|
||||
|
||||
def register_mcp_server(
|
||||
self,
|
||||
server_config: dict[str, Any],
|
||||
@@ -305,15 +347,25 @@ class ToolRegistry:
|
||||
# Register each tool
|
||||
count = 0
|
||||
for mcp_tool in client.list_tools():
|
||||
# Convert MCP tool to framework Tool
|
||||
# Convert MCP tool to framework Tool (strips context params from LLM schema)
|
||||
tool = self._convert_mcp_tool_to_framework_tool(mcp_tool)
|
||||
|
||||
# Create executor that calls the MCP server
|
||||
def make_mcp_executor(client_ref: MCPClient, tool_name: str, registry_ref):
|
||||
def make_mcp_executor(
|
||||
client_ref: MCPClient,
|
||||
tool_name: str,
|
||||
registry_ref,
|
||||
tool_params: set[str],
|
||||
):
|
||||
def executor(inputs: dict) -> Any:
|
||||
try:
|
||||
# Inject session context for tools that need it
|
||||
merged_inputs = {**registry_ref._session_context, **inputs}
|
||||
# Only inject session context params the tool accepts
|
||||
filtered_context = {
|
||||
k: v
|
||||
for k, v in registry_ref._session_context.items()
|
||||
if k in tool_params
|
||||
}
|
||||
merged_inputs = {**filtered_context, **inputs}
|
||||
result = client_ref.call_tool(tool_name, merged_inputs)
|
||||
# MCP tools return content array, extract the result
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
@@ -327,10 +379,11 @@ class ToolRegistry:
|
||||
|
||||
return executor
|
||||
|
||||
tool_params = set(mcp_tool.input_schema.get("properties", {}).keys())
|
||||
self.register(
|
||||
mcp_tool.name,
|
||||
tool,
|
||||
make_mcp_executor(client, mcp_tool.name, self),
|
||||
make_mcp_executor(client, mcp_tool.name, self, tool_params),
|
||||
)
|
||||
count += 1
|
||||
|
||||
@@ -356,6 +409,11 @@ class ToolRegistry:
|
||||
properties = input_schema.get("properties", {})
|
||||
required = input_schema.get("required", [])
|
||||
|
||||
# Strip framework-internal context params from LLM-facing schema.
|
||||
# The LLM can't know these values; they're auto-injected at call time.
|
||||
properties = {k: v for k, v in properties.items() if k not in self.CONTEXT_PARAMS}
|
||||
required = [r for r in required if r not in self.CONTEXT_PARAMS]
|
||||
|
||||
# Convert to framework Tool format
|
||||
tool = Tool(
|
||||
name=mcp_tool.name,
|
||||
|
||||
@@ -296,6 +296,25 @@ class AgentRuntime:
|
||||
raise ValueError(f"Entry point '{entry_point_id}' not found")
|
||||
return await stream.wait_for_completion(exec_id, timeout)
|
||||
|
||||
async def inject_input(self, node_id: str, content: str) -> bool:
|
||||
"""Inject user input into a running client-facing node.
|
||||
|
||||
Routes input to the EventLoopNode identified by ``node_id``
|
||||
across all active streams. Used by the TUI ChatRepl to deliver
|
||||
user responses during client-facing node execution.
|
||||
|
||||
Args:
|
||||
node_id: The node currently waiting for input
|
||||
content: The user's input text
|
||||
|
||||
Returns:
|
||||
True if input was delivered, False if no matching node found
|
||||
"""
|
||||
for stream in self._streams.values():
|
||||
if await stream.inject_input(node_id, content):
|
||||
return True
|
||||
return False
|
||||
|
||||
async def get_goal_progress(self) -> dict[str, Any]:
|
||||
"""
|
||||
Evaluate goal progress across all streams.
|
||||
|
||||
@@ -12,13 +12,13 @@ import logging
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EventType(str, Enum):
|
||||
class EventType(StrEnum):
|
||||
"""Types of events that can be published."""
|
||||
|
||||
# Execution lifecycle
|
||||
@@ -41,6 +41,28 @@ class EventType(str, Enum):
|
||||
STREAM_STARTED = "stream_started"
|
||||
STREAM_STOPPED = "stream_stopped"
|
||||
|
||||
# Node event-loop lifecycle
|
||||
NODE_LOOP_STARTED = "node_loop_started"
|
||||
NODE_LOOP_ITERATION = "node_loop_iteration"
|
||||
NODE_LOOP_COMPLETED = "node_loop_completed"
|
||||
|
||||
# LLM streaming observability
|
||||
LLM_TEXT_DELTA = "llm_text_delta"
|
||||
LLM_REASONING_DELTA = "llm_reasoning_delta"
|
||||
|
||||
# Tool lifecycle
|
||||
TOOL_CALL_STARTED = "tool_call_started"
|
||||
TOOL_CALL_COMPLETED = "tool_call_completed"
|
||||
|
||||
# Client I/O (client_facing=True nodes only)
|
||||
CLIENT_OUTPUT_DELTA = "client_output_delta"
|
||||
CLIENT_INPUT_REQUESTED = "client_input_requested"
|
||||
|
||||
# Internal node observability (client_facing=False nodes)
|
||||
NODE_INTERNAL_OUTPUT = "node_internal_output"
|
||||
NODE_INPUT_BLOCKED = "node_input_blocked"
|
||||
NODE_STALLED = "node_stalled"
|
||||
|
||||
# Custom events
|
||||
CUSTOM = "custom"
|
||||
|
||||
@@ -51,6 +73,7 @@ class AgentEvent:
|
||||
|
||||
type: EventType
|
||||
stream_id: str
|
||||
node_id: str | None = None # Which node emitted this event
|
||||
execution_id: str | None = None
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
@@ -61,6 +84,7 @@ class AgentEvent:
|
||||
return {
|
||||
"type": self.type.value,
|
||||
"stream_id": self.stream_id,
|
||||
"node_id": self.node_id,
|
||||
"execution_id": self.execution_id,
|
||||
"data": self.data,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
@@ -80,6 +104,7 @@ class Subscription:
|
||||
event_types: set[EventType]
|
||||
handler: EventHandler
|
||||
filter_stream: str | None = None # Only receive events from this stream
|
||||
filter_node: str | None = None # Only receive events from this node
|
||||
filter_execution: str | None = None # Only receive events from this execution
|
||||
|
||||
|
||||
@@ -138,6 +163,7 @@ class EventBus:
|
||||
event_types: list[EventType],
|
||||
handler: EventHandler,
|
||||
filter_stream: str | None = None,
|
||||
filter_node: str | None = None,
|
||||
filter_execution: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
@@ -147,6 +173,7 @@ class EventBus:
|
||||
event_types: Types of events to receive
|
||||
handler: Async function to call when event occurs
|
||||
filter_stream: Only receive events from this stream
|
||||
filter_node: Only receive events from this node
|
||||
filter_execution: Only receive events from this execution
|
||||
|
||||
Returns:
|
||||
@@ -160,6 +187,7 @@ class EventBus:
|
||||
event_types=set(event_types),
|
||||
handler=handler,
|
||||
filter_stream=filter_stream,
|
||||
filter_node=filter_node,
|
||||
filter_execution=filter_execution,
|
||||
)
|
||||
|
||||
@@ -218,6 +246,10 @@ class EventBus:
|
||||
if subscription.filter_stream and subscription.filter_stream != event.stream_id:
|
||||
return False
|
||||
|
||||
# Check node filter
|
||||
if subscription.filter_node and subscription.filter_node != event.node_id:
|
||||
return False
|
||||
|
||||
# Check execution filter
|
||||
if subscription.filter_execution and subscription.filter_execution != event.execution_id:
|
||||
return False
|
||||
@@ -359,6 +391,248 @@ class EventBus:
|
||||
)
|
||||
)
|
||||
|
||||
# === NODE EVENT-LOOP PUBLISHERS ===
|
||||
|
||||
async def emit_node_loop_started(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
execution_id: str | None = None,
|
||||
max_iterations: int | None = None,
|
||||
) -> None:
|
||||
"""Emit node loop started event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_LOOP_STARTED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"max_iterations": max_iterations},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_node_loop_iteration(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
iteration: int,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node loop iteration event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_LOOP_ITERATION,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"iteration": iteration},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_node_loop_completed(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
iterations: int,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node loop completed event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_LOOP_COMPLETED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"iterations": iterations},
|
||||
)
|
||||
)
|
||||
|
||||
# === LLM STREAMING PUBLISHERS ===
|
||||
|
||||
async def emit_llm_text_delta(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
content: str,
|
||||
snapshot: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit LLM text delta event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.LLM_TEXT_DELTA,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"content": content, "snapshot": snapshot},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_llm_reasoning_delta(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
content: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit LLM reasoning delta event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.LLM_REASONING_DELTA,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"content": content},
|
||||
)
|
||||
)
|
||||
|
||||
# === TOOL LIFECYCLE PUBLISHERS ===
|
||||
|
||||
async def emit_tool_call_started(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
tool_use_id: str,
|
||||
tool_name: str,
|
||||
tool_input: dict[str, Any] | None = None,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit tool call started event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.TOOL_CALL_STARTED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={
|
||||
"tool_use_id": tool_use_id,
|
||||
"tool_name": tool_name,
|
||||
"tool_input": tool_input or {},
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_tool_call_completed(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
tool_use_id: str,
|
||||
tool_name: str,
|
||||
result: str = "",
|
||||
is_error: bool = False,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit tool call completed event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.TOOL_CALL_COMPLETED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={
|
||||
"tool_use_id": tool_use_id,
|
||||
"tool_name": tool_name,
|
||||
"result": result,
|
||||
"is_error": is_error,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# === CLIENT I/O PUBLISHERS ===
|
||||
|
||||
async def emit_client_output_delta(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
content: str,
|
||||
snapshot: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit client output delta event (client_facing=True nodes)."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CLIENT_OUTPUT_DELTA,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"content": content, "snapshot": snapshot},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_client_input_requested(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
prompt: str = "",
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit client input requested event (client_facing=True nodes)."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CLIENT_INPUT_REQUESTED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"prompt": prompt},
|
||||
)
|
||||
)
|
||||
|
||||
# === INTERNAL NODE PUBLISHERS ===
|
||||
|
||||
async def emit_node_internal_output(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
content: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node internal output event (client_facing=False nodes)."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_INTERNAL_OUTPUT,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"content": content},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_node_stalled(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
reason: str = "",
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node stalled event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_STALLED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"reason": reason},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_node_input_blocked(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
prompt: str = "",
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node input blocked event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_INPUT_BLOCKED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"prompt": prompt},
|
||||
)
|
||||
)
|
||||
|
||||
# === QUERY OPERATIONS ===
|
||||
|
||||
def get_history(
|
||||
@@ -410,6 +684,7 @@ class EventBus:
|
||||
self,
|
||||
event_type: EventType,
|
||||
stream_id: str | None = None,
|
||||
node_id: str | None = None,
|
||||
execution_id: str | None = None,
|
||||
timeout: float | None = None,
|
||||
) -> AgentEvent | None:
|
||||
@@ -419,6 +694,7 @@ class EventBus:
|
||||
Args:
|
||||
event_type: Type of event to wait for
|
||||
stream_id: Filter by stream
|
||||
node_id: Filter by node
|
||||
execution_id: Filter by execution
|
||||
timeout: Maximum time to wait (seconds)
|
||||
|
||||
@@ -438,6 +714,7 @@ class EventBus:
|
||||
event_types=[event_type],
|
||||
handler=handler,
|
||||
filter_stream=stream_id,
|
||||
filter_node=node_id,
|
||||
filter_execution=execution_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -153,6 +153,7 @@ class ExecutionStream:
|
||||
# Execution tracking
|
||||
self._active_executions: dict[str, ExecutionContext] = {}
|
||||
self._execution_tasks: dict[str, asyncio.Task] = {}
|
||||
self._active_executors: dict[str, GraphExecutor] = {}
|
||||
self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
|
||||
self._execution_result_times: dict[str, float] = {}
|
||||
self._completion_events: dict[str, asyncio.Event] = {}
|
||||
@@ -237,6 +238,21 @@ class ExecutionStream:
|
||||
)
|
||||
)
|
||||
|
||||
async def inject_input(self, node_id: str, content: str) -> bool:
|
||||
"""Inject user input into a running client-facing EventLoopNode.
|
||||
|
||||
Searches active executors for a node matching ``node_id`` and calls
|
||||
its ``inject_event()`` method to unblock ``_await_user_input()``.
|
||||
|
||||
Returns True if input was delivered, False otherwise.
|
||||
"""
|
||||
for executor in self._active_executors.values():
|
||||
node = executor.node_registry.get(node_id)
|
||||
if node is not None and hasattr(node, "inject_event"):
|
||||
await node.inject_event(content)
|
||||
return True
|
||||
return False
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
input_data: dict[str, Any],
|
||||
@@ -314,13 +330,21 @@ class ExecutionStream:
|
||||
# Create runtime adapter for this execution
|
||||
runtime_adapter = StreamRuntimeAdapter(self._runtime, execution_id)
|
||||
|
||||
# Create executor for this execution
|
||||
# Create executor for this execution.
|
||||
# Scope storage by execution_id so each execution gets
|
||||
# fresh conversations and spillover directories.
|
||||
exec_storage = self._storage.base_path / "sessions" / execution_id
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime_adapter,
|
||||
llm=self._llm,
|
||||
tools=self._tools,
|
||||
tool_executor=self._tool_executor,
|
||||
event_bus=self._event_bus,
|
||||
stream_id=self.stream_id,
|
||||
storage_path=exec_storage,
|
||||
)
|
||||
# Track executor so inject_input() can reach EventLoopNode instances
|
||||
self._active_executors[execution_id] = executor
|
||||
|
||||
# Create modified graph with entry point
|
||||
# We need to override the entry_node to use our entry point
|
||||
@@ -334,6 +358,9 @@ class ExecutionStream:
|
||||
session_state=ctx.session_state,
|
||||
)
|
||||
|
||||
# Clean up executor reference
|
||||
self._active_executors.pop(execution_id, None)
|
||||
|
||||
# Store result with retention
|
||||
self._record_execution_result(execution_id, result)
|
||||
|
||||
|
||||
@@ -11,13 +11,13 @@ import asyncio
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IsolationLevel(str, Enum):
|
||||
class IsolationLevel(StrEnum):
|
||||
"""State isolation level for concurrent executions."""
|
||||
|
||||
ISOLATED = "isolated" # Private state per execution
|
||||
@@ -25,7 +25,7 @@ class IsolationLevel(str, Enum):
|
||||
SYNCHRONIZED = "synchronized" # Shared with write locks (strong consistency)
|
||||
|
||||
|
||||
class StateScope(str, Enum):
|
||||
class StateScope(StrEnum):
|
||||
"""Scope for state operations."""
|
||||
|
||||
EXECUTION = "execution" # Local to a single execution
|
||||
|
||||
@@ -10,13 +10,13 @@ This is MORE important than actions because:
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, computed_field
|
||||
|
||||
|
||||
class DecisionType(str, Enum):
|
||||
class DecisionType(StrEnum):
|
||||
"""Types of decisions an agent can make."""
|
||||
|
||||
TOOL_SELECTION = "tool_selection" # Which tool to use
|
||||
|
||||
@@ -6,7 +6,7 @@ summaries and metrics that Builder needs to understand what happened.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, computed_field
|
||||
@@ -14,7 +14,7 @@ from pydantic import BaseModel, Field, computed_field
|
||||
from framework.schemas.decision import Decision, Outcome
|
||||
|
||||
|
||||
class RunStatus(str, Enum):
|
||||
class RunStatus(StrEnum):
|
||||
"""Status of a run."""
|
||||
|
||||
RUNNING = "running"
|
||||
|
||||
@@ -167,14 +167,18 @@ class ConcurrentStorage:
|
||||
run: Run to save
|
||||
immediate: If True, save immediately (bypasses batching)
|
||||
"""
|
||||
# Invalidate summary cache since the run data is changing
|
||||
# This ensures load_summary() fetches fresh data after the save
|
||||
self._cache.pop(f"summary:{run.id}", None)
|
||||
|
||||
if immediate or not self._running:
|
||||
await self._save_run_locked(run)
|
||||
# Update cache only after successful immediate write
|
||||
self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
|
||||
else:
|
||||
# For batched writes, cache will be updated in _flush_batch after successful write
|
||||
await self._write_queue.put(("run", run))
|
||||
|
||||
# Update cache
|
||||
self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
|
||||
|
||||
async def _save_run_locked(self, run: Run) -> None:
|
||||
"""Save a run with file locking, including index locks."""
|
||||
lock_key = f"run:{run.id}"
|
||||
@@ -363,8 +367,12 @@ class ConcurrentStorage:
|
||||
try:
|
||||
if item_type == "run":
|
||||
await self._save_run_locked(item)
|
||||
# Update cache only after successful batched write
|
||||
# This fixes the race condition where cache was updated before write completed
|
||||
self._cache[f"run:{item.id}"] = CacheEntry(item, time.time())
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save {item_type}: {e}")
|
||||
# Cache is NOT updated on failure - prevents stale/inconsistent cache state
|
||||
|
||||
async def _flush_pending(self) -> None:
|
||||
"""Flush all pending writes."""
|
||||
|
||||
@@ -26,9 +26,9 @@ Testing tools are integrated into the main agent_builder_server.py:
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
python -m framework test-run <agent_path> --goal <goal_id>
|
||||
python -m framework test-debug <goal_id> <test_id>
|
||||
python -m framework test-list <agent_path> --goal <goal_id>
|
||||
uv run python -m framework test-run <agent_path> --goal <goal_id>
|
||||
uv run python -m framework test-debug <goal_id> <test_id>
|
||||
uv run python -m framework test-list <agent_path> --goal <goal_id>
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
@@ -6,13 +6,13 @@ programmatic/MCP-based approval.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ApprovalAction(str, Enum):
|
||||
class ApprovalAction(StrEnum):
|
||||
"""Actions a user can take on a generated test."""
|
||||
|
||||
APPROVE = "approve" # Accept as-is
|
||||
|
||||
@@ -24,7 +24,7 @@ def _get_api_key():
|
||||
# 1. Try CredentialStoreAdapter for Anthropic
|
||||
try:
|
||||
from aden_tools.credentials import CredentialStoreAdapter
|
||||
creds = CredentialStoreAdapter.with_env_storage()
|
||||
creds = CredentialStoreAdapter.default()
|
||||
if creds.is_available("anthropic"):
|
||||
return creds.get("anthropic")
|
||||
except (ImportError, KeyError):
|
||||
@@ -57,7 +57,7 @@ def _get_api_key():
|
||||
"""Get API key from CredentialStoreAdapter or environment."""
|
||||
try:
|
||||
from aden_tools.credentials import CredentialStoreAdapter
|
||||
creds = CredentialStoreAdapter.with_env_storage()
|
||||
creds = CredentialStoreAdapter.default()
|
||||
if creds.is_available("anthropic"):
|
||||
return creds.get("anthropic")
|
||||
except (ImportError, KeyError):
|
||||
|
||||
@@ -6,13 +6,13 @@ but require mandatory user approval before being stored.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ApprovalStatus(str, Enum):
|
||||
class ApprovalStatus(StrEnum):
|
||||
"""Status of user approval for a generated test."""
|
||||
|
||||
PENDING = "pending" # Awaiting user review
|
||||
@@ -21,7 +21,7 @@ class ApprovalStatus(str, Enum):
|
||||
REJECTED = "rejected" # User declined (with reason)
|
||||
|
||||
|
||||
class TestType(str, Enum):
|
||||
class TestType(StrEnum):
|
||||
"""Type of test based on what it validates."""
|
||||
|
||||
__test__ = False # Not a pytest test class
|
||||
|
||||
@@ -6,13 +6,13 @@ categorization for guiding iteration strategy.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ErrorCategory(str, Enum):
|
||||
class ErrorCategory(StrEnum):
|
||||
"""
|
||||
Category of test failure for guiding iteration.
|
||||
|
||||
|
||||
@@ -0,0 +1,518 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from textual.app import App, ComposeResult
|
||||
from textual.binding import Binding
|
||||
from textual.containers import Container, Horizontal, Vertical
|
||||
from textual.widgets import Footer, Label
|
||||
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
from framework.tui.widgets.chat_repl import ChatRepl
|
||||
from framework.tui.widgets.graph_view import GraphOverview
|
||||
from framework.tui.widgets.log_pane import LogPane
|
||||
|
||||
|
||||
class StatusBar(Container):
|
||||
"""Live status bar showing agent execution state."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
StatusBar {
|
||||
dock: top;
|
||||
height: 1;
|
||||
background: $panel;
|
||||
color: $text;
|
||||
padding: 0 1;
|
||||
}
|
||||
StatusBar > Label {
|
||||
width: 100%;
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, graph_id: str = ""):
|
||||
super().__init__()
|
||||
self._graph_id = graph_id
|
||||
self._state = "idle"
|
||||
self._active_node: str | None = None
|
||||
self._node_detail: str = ""
|
||||
self._start_time: float | None = None
|
||||
self._final_elapsed: float | None = None
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
yield Label(id="status-content")
|
||||
|
||||
def on_mount(self) -> None:
|
||||
self._refresh()
|
||||
self.set_interval(1.0, self._refresh)
|
||||
|
||||
def _format_elapsed(self, seconds: float) -> str:
|
||||
total = int(seconds)
|
||||
hours, remainder = divmod(total, 3600)
|
||||
mins, secs = divmod(remainder, 60)
|
||||
if hours:
|
||||
return f"{hours}:{mins:02d}:{secs:02d}"
|
||||
return f"{mins}:{secs:02d}"
|
||||
|
||||
def _refresh(self) -> None:
|
||||
parts: list[str] = []
|
||||
|
||||
if self._graph_id:
|
||||
parts.append(f"[bold]{self._graph_id}[/bold]")
|
||||
|
||||
if self._state == "idle":
|
||||
parts.append("[dim]○ idle[/dim]")
|
||||
elif self._state == "running":
|
||||
parts.append("[bold green]● running[/bold green]")
|
||||
elif self._state == "completed":
|
||||
parts.append("[green]✓ done[/green]")
|
||||
elif self._state == "failed":
|
||||
parts.append("[bold red]✗ failed[/bold red]")
|
||||
|
||||
if self._active_node:
|
||||
node_str = f"[cyan]{self._active_node}[/cyan]"
|
||||
if self._node_detail:
|
||||
node_str += f" [dim]({self._node_detail})[/dim]"
|
||||
parts.append(node_str)
|
||||
|
||||
if self._state == "running" and self._start_time:
|
||||
parts.append(f"[dim]{self._format_elapsed(time.time() - self._start_time)}[/dim]")
|
||||
elif self._final_elapsed is not None:
|
||||
parts.append(f"[dim]{self._format_elapsed(self._final_elapsed)}[/dim]")
|
||||
|
||||
try:
|
||||
label = self.query_one("#status-content", Label)
|
||||
label.update(" │ ".join(parts))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def set_graph_id(self, graph_id: str) -> None:
|
||||
self._graph_id = graph_id
|
||||
self._refresh()
|
||||
|
||||
def set_running(self, entry_node: str = "") -> None:
|
||||
self._state = "running"
|
||||
self._active_node = entry_node or None
|
||||
self._node_detail = ""
|
||||
self._start_time = time.time()
|
||||
self._final_elapsed = None
|
||||
self._refresh()
|
||||
|
||||
def set_completed(self) -> None:
|
||||
self._state = "completed"
|
||||
if self._start_time:
|
||||
self._final_elapsed = time.time() - self._start_time
|
||||
self._active_node = None
|
||||
self._node_detail = ""
|
||||
self._start_time = None
|
||||
self._refresh()
|
||||
|
||||
def set_failed(self, error: str = "") -> None:
|
||||
self._state = "failed"
|
||||
if self._start_time:
|
||||
self._final_elapsed = time.time() - self._start_time
|
||||
self._node_detail = error[:40] if error else ""
|
||||
self._start_time = None
|
||||
self._refresh()
|
||||
|
||||
def set_active_node(self, node_id: str, detail: str = "") -> None:
|
||||
self._active_node = node_id
|
||||
self._node_detail = detail
|
||||
self._refresh()
|
||||
|
||||
def set_node_detail(self, detail: str) -> None:
|
||||
self._node_detail = detail
|
||||
self._refresh()
|
||||
|
||||
|
||||
class AdenTUI(App):
|
||||
TITLE = "Aden TUI Dashboard"
|
||||
COMMAND_PALETTE_BINDING = "ctrl+o"
|
||||
CSS = """
|
||||
Screen {
|
||||
layout: vertical;
|
||||
background: $surface;
|
||||
}
|
||||
|
||||
#left-pane {
|
||||
width: 60%;
|
||||
height: 100%;
|
||||
layout: vertical;
|
||||
background: $surface;
|
||||
}
|
||||
|
||||
GraphOverview {
|
||||
height: 40%;
|
||||
background: $panel;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
LogPane {
|
||||
height: 60%;
|
||||
background: $surface;
|
||||
padding: 0;
|
||||
margin-bottom: 1;
|
||||
}
|
||||
|
||||
ChatRepl {
|
||||
width: 40%;
|
||||
height: 100%;
|
||||
background: $panel;
|
||||
border-left: tall $primary;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
#chat-history {
|
||||
height: 1fr;
|
||||
width: 100%;
|
||||
background: $surface;
|
||||
border: none;
|
||||
scrollbar-background: $panel;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
|
||||
RichLog {
|
||||
background: $surface;
|
||||
border: none;
|
||||
scrollbar-background: $panel;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
|
||||
Input {
|
||||
background: $surface;
|
||||
border: tall $primary;
|
||||
margin-top: 1;
|
||||
}
|
||||
|
||||
Input:focus {
|
||||
border: tall $accent;
|
||||
}
|
||||
|
||||
StatusBar {
|
||||
background: $panel;
|
||||
color: $text;
|
||||
height: 1;
|
||||
padding: 0 1;
|
||||
}
|
||||
|
||||
Footer {
|
||||
background: $panel;
|
||||
color: $text-muted;
|
||||
}
|
||||
"""
|
||||
|
||||
BINDINGS = [
|
||||
Binding("q", "quit", "Quit"),
|
||||
Binding("ctrl+s", "screenshot", "Screenshot (SVG)", show=True, priority=True),
|
||||
Binding("tab", "focus_next", "Next Panel", show=True),
|
||||
Binding("shift+tab", "focus_previous", "Previous Panel", show=False),
|
||||
]
|
||||
|
||||
def __init__(self, runtime: AgentRuntime):
|
||||
super().__init__()
|
||||
|
||||
self.runtime = runtime
|
||||
self.log_pane = LogPane()
|
||||
self.graph_view = GraphOverview(runtime)
|
||||
self.chat_repl = ChatRepl(runtime)
|
||||
self.status_bar = StatusBar(graph_id=runtime.graph.id)
|
||||
self.is_ready = False
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
yield self.status_bar
|
||||
|
||||
yield Horizontal(
|
||||
Vertical(
|
||||
self.log_pane,
|
||||
self.graph_view,
|
||||
id="left-pane",
|
||||
),
|
||||
self.chat_repl,
|
||||
)
|
||||
|
||||
yield Footer()
|
||||
|
||||
async def on_mount(self) -> None:
|
||||
"""Called when app starts."""
|
||||
self.title = "Aden TUI Dashboard"
|
||||
|
||||
# Add logging setup
|
||||
self._setup_logging_queue()
|
||||
|
||||
# Set ready immediately so _poll_logs can process messages
|
||||
self.is_ready = True
|
||||
|
||||
# Add event subscription with delay to ensure TUI is fully initialized
|
||||
self.call_later(self._init_runtime_connection)
|
||||
|
||||
# Delay initial log messages until layout is fully rendered
|
||||
def write_initial_logs():
|
||||
logging.info("TUI Dashboard initialized successfully")
|
||||
logging.info("Waiting for agent execution to start...")
|
||||
|
||||
# Wait for layout to be fully rendered before writing logs
|
||||
self.set_timer(0.2, write_initial_logs)
|
||||
|
||||
def _setup_logging_queue(self) -> None:
|
||||
"""Setup a thread-safe queue for logs."""
|
||||
try:
|
||||
import queue
|
||||
from logging.handlers import QueueHandler
|
||||
|
||||
self.log_queue = queue.Queue()
|
||||
self.queue_handler = QueueHandler(self.log_queue)
|
||||
self.queue_handler.setLevel(logging.INFO)
|
||||
|
||||
# Get root logger
|
||||
root_logger = logging.getLogger()
|
||||
|
||||
# Remove ALL existing handlers to prevent stdout output
|
||||
# This is critical - StreamHandlers cause text to appear in header
|
||||
for handler in root_logger.handlers[:]:
|
||||
root_logger.removeHandler(handler)
|
||||
|
||||
# Add ONLY our queue handler
|
||||
root_logger.addHandler(self.queue_handler)
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
# Suppress LiteLLM logging completely
|
||||
litellm_logger = logging.getLogger("LiteLLM")
|
||||
litellm_logger.setLevel(logging.CRITICAL) # Only show critical errors
|
||||
litellm_logger.propagate = False # Don't propagate to root logger
|
||||
|
||||
# Start polling
|
||||
self.set_interval(0.1, self._poll_logs)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _poll_logs(self) -> None:
|
||||
"""Poll the log queue and update UI."""
|
||||
if not self.is_ready:
|
||||
return
|
||||
|
||||
try:
|
||||
while not self.log_queue.empty():
|
||||
record = self.log_queue.get_nowait()
|
||||
# Filter out framework/library logs
|
||||
if record.name.startswith(("textual", "LiteLLM", "litellm")):
|
||||
continue
|
||||
|
||||
self.log_pane.write_python_log(record)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_EVENT_TYPES = [
|
||||
EventType.LLM_TEXT_DELTA,
|
||||
EventType.CLIENT_OUTPUT_DELTA,
|
||||
EventType.TOOL_CALL_STARTED,
|
||||
EventType.TOOL_CALL_COMPLETED,
|
||||
EventType.EXECUTION_STARTED,
|
||||
EventType.EXECUTION_COMPLETED,
|
||||
EventType.EXECUTION_FAILED,
|
||||
EventType.NODE_LOOP_STARTED,
|
||||
EventType.NODE_LOOP_ITERATION,
|
||||
EventType.NODE_LOOP_COMPLETED,
|
||||
EventType.CLIENT_INPUT_REQUESTED,
|
||||
EventType.NODE_STALLED,
|
||||
EventType.GOAL_PROGRESS,
|
||||
EventType.GOAL_ACHIEVED,
|
||||
EventType.CONSTRAINT_VIOLATION,
|
||||
EventType.STATE_CHANGED,
|
||||
EventType.NODE_INPUT_BLOCKED,
|
||||
]
|
||||
|
||||
_LOG_PANE_EVENTS = frozenset(_EVENT_TYPES) - {
|
||||
EventType.LLM_TEXT_DELTA,
|
||||
EventType.CLIENT_OUTPUT_DELTA,
|
||||
}
|
||||
|
||||
async def _init_runtime_connection(self) -> None:
|
||||
"""Subscribe to runtime events with an async handler."""
|
||||
try:
|
||||
self._subscription_id = self.runtime.subscribe_to_events(
|
||||
event_types=self._EVENT_TYPES,
|
||||
handler=self._handle_event,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _handle_event(self, event: AgentEvent) -> None:
|
||||
"""Called from the agent thread — bridge to Textual's main thread."""
|
||||
try:
|
||||
self.call_from_thread(self._route_event, event)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _route_event(self, event: AgentEvent) -> None:
|
||||
"""Route incoming events to widgets. Runs on Textual's main thread."""
|
||||
if not self.is_ready:
|
||||
return
|
||||
|
||||
try:
|
||||
et = event.type
|
||||
|
||||
# --- Chat REPL events ---
|
||||
if et in (EventType.LLM_TEXT_DELTA, EventType.CLIENT_OUTPUT_DELTA):
|
||||
self.chat_repl.handle_text_delta(
|
||||
event.data.get("content", ""),
|
||||
event.data.get("snapshot", ""),
|
||||
)
|
||||
elif et == EventType.TOOL_CALL_STARTED:
|
||||
self.chat_repl.handle_tool_started(
|
||||
event.data.get("tool_name", "unknown"),
|
||||
event.data.get("tool_input", {}),
|
||||
)
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
self.chat_repl.handle_tool_completed(
|
||||
event.data.get("tool_name", "unknown"),
|
||||
event.data.get("result", ""),
|
||||
event.data.get("is_error", False),
|
||||
)
|
||||
elif et == EventType.EXECUTION_COMPLETED:
|
||||
self.chat_repl.handle_execution_completed(event.data.get("output", {}))
|
||||
elif et == EventType.EXECUTION_FAILED:
|
||||
self.chat_repl.handle_execution_failed(event.data.get("error", "Unknown error"))
|
||||
elif et == EventType.CLIENT_INPUT_REQUESTED:
|
||||
self.chat_repl.handle_input_requested(
|
||||
event.node_id or event.data.get("node_id", ""),
|
||||
)
|
||||
|
||||
# --- Graph view events ---
|
||||
if et in (
|
||||
EventType.EXECUTION_STARTED,
|
||||
EventType.EXECUTION_COMPLETED,
|
||||
EventType.EXECUTION_FAILED,
|
||||
):
|
||||
self.graph_view.update_execution(event)
|
||||
|
||||
if et == EventType.NODE_LOOP_STARTED:
|
||||
self.graph_view.handle_node_loop_started(event.node_id or "")
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
self.graph_view.handle_node_loop_iteration(
|
||||
event.node_id or "",
|
||||
event.data.get("iteration", 0),
|
||||
)
|
||||
elif et == EventType.NODE_LOOP_COMPLETED:
|
||||
self.graph_view.handle_node_loop_completed(event.node_id or "")
|
||||
elif et == EventType.NODE_STALLED:
|
||||
self.graph_view.handle_stalled(
|
||||
event.node_id or "",
|
||||
event.data.get("reason", ""),
|
||||
)
|
||||
|
||||
if et == EventType.TOOL_CALL_STARTED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=True,
|
||||
)
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
self.graph_view.handle_tool_call(
|
||||
event.node_id or "",
|
||||
event.data.get("tool_name", "unknown"),
|
||||
started=False,
|
||||
)
|
||||
|
||||
# --- Status bar events ---
|
||||
if et == EventType.EXECUTION_STARTED:
|
||||
entry_node = event.data.get("entry_node") or (
|
||||
self.runtime.graph.entry_node if self.runtime else ""
|
||||
)
|
||||
self.status_bar.set_running(entry_node)
|
||||
elif et == EventType.EXECUTION_COMPLETED:
|
||||
self.status_bar.set_completed()
|
||||
elif et == EventType.EXECUTION_FAILED:
|
||||
self.status_bar.set_failed(event.data.get("error", ""))
|
||||
elif et == EventType.NODE_LOOP_STARTED:
|
||||
self.status_bar.set_active_node(event.node_id or "", "thinking...")
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
self.status_bar.set_node_detail(f"step {event.data.get('iteration', '?')}")
|
||||
elif et == EventType.TOOL_CALL_STARTED:
|
||||
self.status_bar.set_node_detail(f"{event.data.get('tool_name', '')}...")
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
self.status_bar.set_node_detail("thinking...")
|
||||
elif et == EventType.NODE_STALLED:
|
||||
self.status_bar.set_node_detail(f"stalled: {event.data.get('reason', '')}")
|
||||
|
||||
# --- Log pane events ---
|
||||
if et in self._LOG_PANE_EVENTS:
|
||||
self.log_pane.write_event(event)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def save_screenshot(self, filename: str | None = None) -> str:
|
||||
"""Save a screenshot of the current screen as SVG (viewable in browsers).
|
||||
|
||||
Args:
|
||||
filename: Optional filename for the screenshot. If None, generates timestamp-based name.
|
||||
|
||||
Returns:
|
||||
Path to the saved SVG file.
|
||||
"""
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Create screenshots directory
|
||||
screenshots_dir = Path("screenshots")
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Generate filename if not provided
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"tui_screenshot_{timestamp}.svg"
|
||||
|
||||
# Ensure .svg extension
|
||||
if not filename.endswith(".svg"):
|
||||
filename += ".svg"
|
||||
|
||||
# Full path
|
||||
filepath = screenshots_dir / filename
|
||||
|
||||
# Temporarily hide borders for cleaner screenshot
|
||||
chat_widget = self.query_one(ChatRepl)
|
||||
original_chat_border = chat_widget.styles.border_left
|
||||
chat_widget.styles.border_left = ("none", "transparent")
|
||||
|
||||
# Hide all Input widget borders
|
||||
input_widgets = self.query("Input")
|
||||
original_input_borders = []
|
||||
for input_widget in input_widgets:
|
||||
original_input_borders.append(input_widget.styles.border)
|
||||
input_widget.styles.border = ("none", "transparent")
|
||||
|
||||
try:
|
||||
# Get SVG data from Textual and save it
|
||||
svg_data = self.export_screenshot()
|
||||
filepath.write_text(svg_data, encoding="utf-8")
|
||||
finally:
|
||||
# Restore the original borders
|
||||
chat_widget.styles.border_left = original_chat_border
|
||||
for i, input_widget in enumerate(input_widgets):
|
||||
input_widget.styles.border = original_input_borders[i]
|
||||
|
||||
return str(filepath)
|
||||
|
||||
def action_screenshot(self) -> None:
|
||||
"""Take a screenshot (bound to Ctrl+S)."""
|
||||
try:
|
||||
filepath = self.save_screenshot()
|
||||
self.notify(
|
||||
f"Screenshot saved: {filepath} (SVG - open in browser)",
|
||||
severity="information",
|
||||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
self.notify(f"Screenshot failed: {e}", severity="error", timeout=5)
|
||||
|
||||
async def on_unmount(self) -> None:
|
||||
"""Cleanup on app shutdown."""
|
||||
self.is_ready = False
|
||||
try:
|
||||
if hasattr(self, "_subscription_id"):
|
||||
self.runtime.unsubscribe_from_events(self._subscription_id)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if hasattr(self, "queue_handler"):
|
||||
logging.getLogger().removeHandler(self.queue_handler)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,303 @@
|
||||
"""
|
||||
Chat / REPL Widget - Uses RichLog for append-only, selection-safe display.
|
||||
|
||||
Streaming display approach:
|
||||
- The processing-indicator Label is used as a live status bar during streaming
|
||||
(Label.update() replaces text in-place, unlike RichLog which is append-only).
|
||||
- On EXECUTION_COMPLETED, the final output is written to RichLog as permanent history.
|
||||
- Tool events are written directly to RichLog as discrete status lines.
|
||||
|
||||
Client-facing input:
|
||||
- When a client_facing=True EventLoopNode emits CLIENT_INPUT_REQUESTED, the
|
||||
ChatRepl transitions to "waiting for input" state: input is re-enabled and
|
||||
subsequent submissions are routed to runtime.inject_input() instead of
|
||||
starting a new execution.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Vertical
|
||||
from textual.widgets import Input, Label, RichLog
|
||||
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
|
||||
|
||||
class ChatRepl(Vertical):
|
||||
"""Widget for interactive chat/REPL."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
ChatRepl {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
layout: vertical;
|
||||
}
|
||||
|
||||
ChatRepl > RichLog {
|
||||
width: 100%;
|
||||
height: 1fr;
|
||||
background: $surface;
|
||||
border: none;
|
||||
scrollbar-background: $panel;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
|
||||
ChatRepl > #processing-indicator {
|
||||
width: 100%;
|
||||
height: 1;
|
||||
background: $primary 20%;
|
||||
color: $text;
|
||||
text-style: bold;
|
||||
display: none;
|
||||
}
|
||||
|
||||
ChatRepl > Input {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
dock: bottom;
|
||||
background: $surface;
|
||||
border: tall $primary;
|
||||
margin-top: 1;
|
||||
}
|
||||
|
||||
ChatRepl > Input:focus {
|
||||
border: tall $accent;
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, runtime: AgentRuntime):
|
||||
super().__init__()
|
||||
self.runtime = runtime
|
||||
self._current_exec_id: str | None = None
|
||||
self._streaming_snapshot: str = ""
|
||||
self._waiting_for_input: bool = False
|
||||
self._input_node_id: str | None = None
|
||||
|
||||
# Dedicated event loop for agent execution.
|
||||
# Keeps blocking runtime code (LLM calls, MCP tools) off
|
||||
# the Textual event loop so the UI stays responsive.
|
||||
self._agent_loop = asyncio.new_event_loop()
|
||||
self._agent_thread = threading.Thread(
|
||||
target=self._agent_loop.run_forever,
|
||||
daemon=True,
|
||||
name="agent-execution",
|
||||
)
|
||||
self._agent_thread.start()
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
yield RichLog(id="chat-history", highlight=True, markup=True, auto_scroll=False, wrap=True)
|
||||
yield Label("Agent is processing...", id="processing-indicator")
|
||||
yield Input(placeholder="Enter input for agent...", id="chat-input")
|
||||
|
||||
def _write_history(self, content: str) -> None:
|
||||
"""Write to chat history, only auto-scrolling if user is at the bottom."""
|
||||
history = self.query_one("#chat-history", RichLog)
|
||||
was_at_bottom = history.is_vertical_scroll_end
|
||||
history.write(content)
|
||||
if was_at_bottom:
|
||||
history.scroll_end(animate=False)
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Add welcome message when widget mounts."""
|
||||
history = self.query_one("#chat-history", RichLog)
|
||||
history.write("[bold cyan]Chat REPL Ready[/bold cyan] — Type your input below\n")
|
||||
|
||||
async def on_input_submitted(self, message: Input.Submitted) -> None:
|
||||
"""Handle input submission — either start new execution or inject input."""
|
||||
user_input = message.value.strip()
|
||||
if not user_input:
|
||||
return
|
||||
|
||||
# Client-facing input: route to the waiting node
|
||||
if self._waiting_for_input and self._input_node_id:
|
||||
self._write_history(f"[bold green]You:[/bold green] {user_input}")
|
||||
message.input.value = ""
|
||||
|
||||
# Disable input while agent processes the response
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = True
|
||||
chat_input.placeholder = "Enter input for agent..."
|
||||
self._waiting_for_input = False
|
||||
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update("Thinking...")
|
||||
|
||||
node_id = self._input_node_id
|
||||
self._input_node_id = None
|
||||
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.runtime.inject_input(node_id, user_input),
|
||||
self._agent_loop,
|
||||
)
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception as e:
|
||||
self._write_history(f"[bold red]Error delivering input:[/bold red] {e}")
|
||||
return
|
||||
|
||||
# Double-submit guard: reject input while an execution is in-flight
|
||||
if self._current_exec_id is not None:
|
||||
self._write_history("[dim]Agent is still running — please wait.[/dim]")
|
||||
return
|
||||
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
|
||||
# Append user message and clear input
|
||||
self._write_history(f"[bold green]You:[/bold green] {user_input}")
|
||||
message.input.value = ""
|
||||
|
||||
try:
|
||||
# Get entry point
|
||||
entry_points = self.runtime.get_entry_points()
|
||||
if not entry_points:
|
||||
self._write_history("[bold red]Error:[/bold red] No entry points")
|
||||
return
|
||||
|
||||
# Determine the input key from the entry node
|
||||
entry_point = entry_points[0]
|
||||
entry_node = self.runtime.graph.get_node(entry_point.entry_node)
|
||||
|
||||
if entry_node and entry_node.input_keys:
|
||||
input_key = entry_node.input_keys[0]
|
||||
else:
|
||||
input_key = "input"
|
||||
|
||||
# Reset streaming state
|
||||
self._streaming_snapshot = ""
|
||||
|
||||
# Show processing indicator
|
||||
indicator.update("Thinking...")
|
||||
indicator.display = True
|
||||
|
||||
# Disable input while the agent is working
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = True
|
||||
|
||||
# Submit execution to the dedicated agent loop so blocking
|
||||
# runtime code (LLM, MCP tools) never touches Textual's loop.
|
||||
# trigger() returns immediately with an exec_id; the heavy
|
||||
# execution task runs entirely on the agent thread.
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.runtime.trigger(
|
||||
entry_point_id=entry_point.id,
|
||||
input_data={input_key: user_input},
|
||||
),
|
||||
self._agent_loop,
|
||||
)
|
||||
# wrap_future lets us await without blocking Textual's loop
|
||||
self._current_exec_id = await asyncio.wrap_future(future)
|
||||
|
||||
except Exception as e:
|
||||
indicator.display = False
|
||||
self._current_exec_id = None
|
||||
# Re-enable input on error
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = False
|
||||
self._write_history(f"[bold red]Error:[/bold red] {e}")
|
||||
|
||||
# -- Event handlers called by app.py _handle_event --
|
||||
|
||||
def handle_text_delta(self, content: str, snapshot: str) -> None:
|
||||
"""Handle a streaming text token from the LLM."""
|
||||
self._streaming_snapshot = snapshot
|
||||
|
||||
# Show a truncated live preview in the indicator label
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
preview = snapshot[-80:] if len(snapshot) > 80 else snapshot
|
||||
# Replace newlines for single-line display
|
||||
preview = preview.replace("\n", " ")
|
||||
indicator.update(
|
||||
f"Thinking: ...{preview}" if len(snapshot) > 80 else f"Thinking: {preview}"
|
||||
)
|
||||
|
||||
def handle_tool_started(self, tool_name: str, tool_input: dict[str, Any]) -> None:
|
||||
"""Handle a tool call starting."""
|
||||
# Update indicator to show tool activity
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update(f"Using tool: {tool_name}...")
|
||||
|
||||
# Write a discrete status line to history
|
||||
self._write_history(f"[dim]Tool: {tool_name}[/dim]")
|
||||
|
||||
def handle_tool_completed(self, tool_name: str, result: str, is_error: bool) -> None:
|
||||
"""Handle a tool call completing."""
|
||||
result_str = str(result)
|
||||
preview = result_str[:200] + "..." if len(result_str) > 200 else result_str
|
||||
preview = preview.replace("\n", " ")
|
||||
|
||||
if is_error:
|
||||
self._write_history(f"[dim red]Tool {tool_name} error: {preview}[/dim red]")
|
||||
else:
|
||||
self._write_history(f"[dim]Tool {tool_name} result: {preview}[/dim]")
|
||||
|
||||
# Restore thinking indicator
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update("Thinking...")
|
||||
|
||||
def handle_execution_completed(self, output: dict[str, Any]) -> None:
|
||||
"""Handle execution finishing successfully."""
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.display = False
|
||||
|
||||
# Write the final streaming snapshot to permanent history (if any)
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
|
||||
else:
|
||||
output_str = str(output.get("output_string", output))
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {output_str}")
|
||||
self._write_history("") # separator
|
||||
|
||||
self._current_exec_id = None
|
||||
self._streaming_snapshot = ""
|
||||
self._waiting_for_input = False
|
||||
self._input_node_id = None
|
||||
|
||||
# Re-enable input
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = False
|
||||
chat_input.placeholder = "Enter input for agent..."
|
||||
chat_input.focus()
|
||||
|
||||
def handle_execution_failed(self, error: str) -> None:
|
||||
"""Handle execution failing."""
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.display = False
|
||||
|
||||
self._write_history(f"[bold red]Error:[/bold red] {error}")
|
||||
self._write_history("") # separator
|
||||
|
||||
self._current_exec_id = None
|
||||
self._streaming_snapshot = ""
|
||||
self._waiting_for_input = False
|
||||
self._input_node_id = None
|
||||
|
||||
# Re-enable input
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = False
|
||||
chat_input.placeholder = "Enter input for agent..."
|
||||
chat_input.focus()
|
||||
|
||||
def handle_input_requested(self, node_id: str) -> None:
|
||||
"""Handle a client-facing node requesting user input.
|
||||
|
||||
Transitions to 'waiting for input' state: flushes the current
|
||||
streaming snapshot to history, re-enables the input widget,
|
||||
and sets a flag so the next submission routes to inject_input().
|
||||
"""
|
||||
# Flush accumulated streaming text as agent output
|
||||
if self._streaming_snapshot:
|
||||
self._write_history(f"[bold blue]Agent:[/bold blue] {self._streaming_snapshot}")
|
||||
self._streaming_snapshot = ""
|
||||
|
||||
self._waiting_for_input = True
|
||||
self._input_node_id = node_id or None
|
||||
|
||||
indicator = self.query_one("#processing-indicator", Label)
|
||||
indicator.update("Waiting for your input...")
|
||||
|
||||
chat_input = self.query_one("#chat-input", Input)
|
||||
chat_input.disabled = False
|
||||
chat_input.placeholder = "Type your response..."
|
||||
chat_input.focus()
|
||||
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Graph/Tree Overview Widget - Displays real agent graph structure.
|
||||
"""
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Vertical
|
||||
from textual.widgets import RichLog
|
||||
|
||||
from framework.runtime.agent_runtime import AgentRuntime
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
|
||||
class GraphOverview(Vertical):
|
||||
"""Widget to display Agent execution graph/tree with real data."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
GraphOverview {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: $panel;
|
||||
}
|
||||
|
||||
GraphOverview > RichLog {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: $panel;
|
||||
border: none;
|
||||
scrollbar-background: $surface;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, runtime: AgentRuntime):
|
||||
super().__init__()
|
||||
self.runtime = runtime
|
||||
self.active_node: str | None = None
|
||||
self.execution_path: list[str] = []
|
||||
# Per-node status strings shown next to the node in the graph display.
|
||||
# e.g. {"planner": "thinking...", "searcher": "web_search..."}
|
||||
self._node_status: dict[str, str] = {}
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
# Use RichLog for formatted output
|
||||
yield RichLog(id="graph-display", highlight=True, markup=True)
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Display initial graph structure."""
|
||||
self._display_graph()
|
||||
|
||||
def _topo_order(self) -> list[str]:
|
||||
"""BFS from entry_node following edges."""
|
||||
graph = self.runtime.graph
|
||||
visited: list[str] = []
|
||||
seen: set[str] = set()
|
||||
queue = [graph.entry_node]
|
||||
while queue:
|
||||
nid = queue.pop(0)
|
||||
if nid in seen:
|
||||
continue
|
||||
seen.add(nid)
|
||||
visited.append(nid)
|
||||
for edge in graph.get_outgoing_edges(nid):
|
||||
if edge.target not in seen:
|
||||
queue.append(edge.target)
|
||||
# Append orphan nodes not reachable from entry
|
||||
for node in graph.nodes:
|
||||
if node.id not in seen:
|
||||
visited.append(node.id)
|
||||
return visited
|
||||
|
||||
def _render_node_line(self, node_id: str) -> str:
|
||||
"""Render a single node with status symbol and optional status text."""
|
||||
graph = self.runtime.graph
|
||||
is_terminal = node_id in (graph.terminal_nodes or [])
|
||||
is_active = node_id == self.active_node
|
||||
is_done = node_id in self.execution_path and not is_active
|
||||
status = self._node_status.get(node_id, "")
|
||||
|
||||
if is_active:
|
||||
sym = "[bold green]●[/bold green]"
|
||||
elif is_done:
|
||||
sym = "[dim]✓[/dim]"
|
||||
elif is_terminal:
|
||||
sym = "[yellow]■[/yellow]"
|
||||
else:
|
||||
sym = "○"
|
||||
|
||||
if is_active:
|
||||
name = f"[bold green]{node_id}[/bold green]"
|
||||
elif is_done:
|
||||
name = f"[dim]{node_id}[/dim]"
|
||||
else:
|
||||
name = node_id
|
||||
|
||||
suffix = f" [italic]{status}[/italic]" if status else ""
|
||||
return f" {sym} {name}{suffix}"
|
||||
|
||||
def _render_edges(self, node_id: str) -> list[str]:
|
||||
"""Render edge connectors from this node to its targets."""
|
||||
edges = self.runtime.graph.get_outgoing_edges(node_id)
|
||||
if not edges:
|
||||
return []
|
||||
if len(edges) == 1:
|
||||
return [" │", " ▼"]
|
||||
# Fan-out: show branches
|
||||
lines: list[str] = []
|
||||
for i, edge in enumerate(edges):
|
||||
connector = "└" if i == len(edges) - 1 else "├"
|
||||
cond = ""
|
||||
if edge.condition.value not in ("always", "on_success"):
|
||||
cond = f" [dim]({edge.condition.value})[/dim]"
|
||||
lines.append(f" {connector}──▶ {edge.target}{cond}")
|
||||
return lines
|
||||
|
||||
def _display_graph(self) -> None:
|
||||
"""Display the graph as an ASCII DAG with edge connectors."""
|
||||
display = self.query_one("#graph-display", RichLog)
|
||||
display.clear()
|
||||
|
||||
graph = self.runtime.graph
|
||||
display.write(f"[bold cyan]Agent Graph:[/bold cyan] {graph.id}\n")
|
||||
|
||||
# Render each node in topological order with edges
|
||||
ordered = self._topo_order()
|
||||
for node_id in ordered:
|
||||
display.write(self._render_node_line(node_id))
|
||||
for edge_line in self._render_edges(node_id):
|
||||
display.write(edge_line)
|
||||
|
||||
# Execution path footer
|
||||
if self.execution_path:
|
||||
display.write("")
|
||||
display.write(f"[dim]Path:[/dim] {' → '.join(self.execution_path[-5:])}")
|
||||
|
||||
def update_active_node(self, node_id: str) -> None:
|
||||
"""Update the currently active node."""
|
||||
self.active_node = node_id
|
||||
if node_id not in self.execution_path:
|
||||
self.execution_path.append(node_id)
|
||||
self._display_graph()
|
||||
|
||||
def update_execution(self, event) -> None:
|
||||
"""Update the displayed node status based on execution lifecycle events."""
|
||||
if event.type == EventType.EXECUTION_STARTED:
|
||||
self._node_status.clear()
|
||||
self.execution_path.clear()
|
||||
entry_node = event.data.get("entry_node") or (
|
||||
self.runtime.graph.entry_node if self.runtime else None
|
||||
)
|
||||
if entry_node:
|
||||
self.update_active_node(entry_node)
|
||||
|
||||
elif event.type == EventType.EXECUTION_COMPLETED:
|
||||
self.active_node = None
|
||||
self._node_status.clear()
|
||||
self._display_graph()
|
||||
|
||||
elif event.type == EventType.EXECUTION_FAILED:
|
||||
error = event.data.get("error", "Unknown error")
|
||||
if self.active_node:
|
||||
self._node_status[self.active_node] = f"[red]FAILED: {error}[/red]"
|
||||
self.active_node = None
|
||||
self._display_graph()
|
||||
|
||||
# -- Event handlers called by app.py _handle_event --
|
||||
|
||||
def handle_node_loop_started(self, node_id: str) -> None:
|
||||
"""A node's event loop has started."""
|
||||
self._node_status[node_id] = "thinking..."
|
||||
self.update_active_node(node_id)
|
||||
|
||||
def handle_node_loop_iteration(self, node_id: str, iteration: int) -> None:
|
||||
"""A node advanced to a new loop iteration."""
|
||||
self._node_status[node_id] = f"step {iteration}"
|
||||
self._display_graph()
|
||||
|
||||
def handle_node_loop_completed(self, node_id: str) -> None:
|
||||
"""A node's event loop completed."""
|
||||
self._node_status.pop(node_id, None)
|
||||
self._display_graph()
|
||||
|
||||
def handle_tool_call(self, node_id: str, tool_name: str, *, started: bool) -> None:
|
||||
"""Show tool activity next to the active node."""
|
||||
if started:
|
||||
self._node_status[node_id] = f"{tool_name}..."
|
||||
else:
|
||||
# Restore to generic thinking status after tool completes
|
||||
self._node_status[node_id] = "thinking..."
|
||||
self._display_graph()
|
||||
|
||||
def handle_stalled(self, node_id: str, reason: str) -> None:
|
||||
"""Highlight a stalled node."""
|
||||
self._node_status[node_id] = f"[red]stalled: {reason}[/red]"
|
||||
self._display_graph()
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Log Pane Widget - Uses RichLog for reliable rendering.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Container
|
||||
from textual.widgets import RichLog
|
||||
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
|
||||
class LogPane(Container):
|
||||
"""Widget to display logs with reliable rendering."""
|
||||
|
||||
_EVENT_FORMAT: dict[EventType, tuple[str, str]] = {
|
||||
EventType.EXECUTION_STARTED: (">>", "bold cyan"),
|
||||
EventType.EXECUTION_COMPLETED: ("<<", "bold green"),
|
||||
EventType.EXECUTION_FAILED: ("!!", "bold red"),
|
||||
EventType.TOOL_CALL_STARTED: ("->", "yellow"),
|
||||
EventType.TOOL_CALL_COMPLETED: ("<-", "green"),
|
||||
EventType.NODE_LOOP_STARTED: ("@@", "cyan"),
|
||||
EventType.NODE_LOOP_ITERATION: ("..", "dim"),
|
||||
EventType.NODE_LOOP_COMPLETED: ("@@", "dim"),
|
||||
EventType.NODE_STALLED: ("!!", "bold yellow"),
|
||||
EventType.NODE_INPUT_BLOCKED: ("!!", "yellow"),
|
||||
EventType.GOAL_PROGRESS: ("%%", "blue"),
|
||||
EventType.GOAL_ACHIEVED: ("**", "bold green"),
|
||||
EventType.CONSTRAINT_VIOLATION: ("!!", "bold red"),
|
||||
EventType.STATE_CHANGED: ("~~", "dim"),
|
||||
EventType.CLIENT_INPUT_REQUESTED: ("??", "magenta"),
|
||||
}
|
||||
|
||||
_LOG_LEVEL_COLORS = {
|
||||
logging.DEBUG: "dim",
|
||||
logging.INFO: "",
|
||||
logging.WARNING: "yellow",
|
||||
logging.ERROR: "red",
|
||||
logging.CRITICAL: "bold red",
|
||||
}
|
||||
|
||||
DEFAULT_CSS = """
|
||||
LogPane {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
LogPane > RichLog {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: $surface;
|
||||
border: none;
|
||||
scrollbar-background: $panel;
|
||||
scrollbar-color: $primary;
|
||||
}
|
||||
"""
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
# RichLog is designed for log display and doesn't have TextArea's rendering issues
|
||||
yield RichLog(id="main-log", highlight=True, markup=True, auto_scroll=False)
|
||||
|
||||
def write_event(self, event: AgentEvent) -> None:
|
||||
"""Format an AgentEvent with timestamp + symbol and write to the log."""
|
||||
ts = event.timestamp.strftime("%H:%M:%S")
|
||||
symbol, color = self._EVENT_FORMAT.get(event.type, ("--", "dim"))
|
||||
text = self._extract_event_text(event)
|
||||
self.write_log(f"[dim]{ts}[/dim] [{color}]{symbol} {text}[/{color}]")
|
||||
|
||||
def _extract_event_text(self, event: AgentEvent) -> str:
|
||||
"""Extract human-readable text from an event's data dict."""
|
||||
et = event.type
|
||||
data = event.data
|
||||
|
||||
if et == EventType.EXECUTION_STARTED:
|
||||
return "Execution started"
|
||||
elif et == EventType.EXECUTION_COMPLETED:
|
||||
return "Execution completed"
|
||||
elif et == EventType.EXECUTION_FAILED:
|
||||
return f"Execution FAILED: {data.get('error', 'unknown')}"
|
||||
elif et == EventType.TOOL_CALL_STARTED:
|
||||
return f"Tool call: {data.get('tool_name', 'unknown')}"
|
||||
elif et == EventType.TOOL_CALL_COMPLETED:
|
||||
name = data.get("tool_name", "unknown")
|
||||
if data.get("is_error"):
|
||||
preview = str(data.get("result", ""))[:80]
|
||||
return f"Tool error: {name} - {preview}"
|
||||
return f"Tool done: {name}"
|
||||
elif et == EventType.NODE_LOOP_STARTED:
|
||||
return f"Node started: {event.node_id or 'unknown'}"
|
||||
elif et == EventType.NODE_LOOP_ITERATION:
|
||||
return f"{event.node_id or 'unknown'} iteration {data.get('iteration', '?')}"
|
||||
elif et == EventType.NODE_LOOP_COMPLETED:
|
||||
return f"Node done: {event.node_id or 'unknown'}"
|
||||
elif et == EventType.NODE_STALLED:
|
||||
reason = data.get("reason", "")
|
||||
node = event.node_id or "unknown"
|
||||
return f"Node stalled: {node} - {reason}" if reason else f"Node stalled: {node}"
|
||||
elif et == EventType.NODE_INPUT_BLOCKED:
|
||||
return f"Node input blocked: {event.node_id or 'unknown'}"
|
||||
elif et == EventType.GOAL_PROGRESS:
|
||||
return f"Goal progress: {data.get('progress', '?')}"
|
||||
elif et == EventType.GOAL_ACHIEVED:
|
||||
return "Goal achieved"
|
||||
elif et == EventType.CONSTRAINT_VIOLATION:
|
||||
return f"Constraint violated: {data.get('description', 'unknown')}"
|
||||
elif et == EventType.STATE_CHANGED:
|
||||
return f"State changed: {data.get('key', 'unknown')}"
|
||||
elif et == EventType.CLIENT_INPUT_REQUESTED:
|
||||
return "Waiting for user input"
|
||||
else:
|
||||
return f"{et.value}: {data}"
|
||||
|
||||
def write_python_log(self, record: logging.LogRecord) -> None:
|
||||
"""Format a Python log record with timestamp and severity color."""
|
||||
ts = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
|
||||
color = self._LOG_LEVEL_COLORS.get(record.levelno, "")
|
||||
msg = record.getMessage()
|
||||
if color:
|
||||
self.write_log(f"[dim]{ts}[/dim] [{color}]{record.levelname}[/{color}] {msg}")
|
||||
else:
|
||||
self.write_log(f"[dim]{ts}[/dim] {record.levelname} {msg}")
|
||||
|
||||
def write_log(self, message: str) -> None:
|
||||
"""Write a log message to the log pane."""
|
||||
try:
|
||||
# Check if widget is mounted
|
||||
if not self.is_mounted:
|
||||
return
|
||||
|
||||
log = self.query_one("#main-log", RichLog)
|
||||
|
||||
# Check if log is mounted
|
||||
if not log.is_mounted:
|
||||
return
|
||||
|
||||
# Only auto-scroll if user is already at the bottom
|
||||
was_at_bottom = log.is_vertical_scroll_end
|
||||
|
||||
log.write(message)
|
||||
|
||||
if was_at_bottom:
|
||||
log.scroll_end(animate=False)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
+8
-1
@@ -11,16 +11,22 @@ dependencies = [
|
||||
"litellm>=1.81.0",
|
||||
"mcp>=1.0.0",
|
||||
"fastmcp>=2.0.0",
|
||||
"textual>=1.0.0",
|
||||
"pytest>=8.0",
|
||||
"pytest-asyncio>=0.23",
|
||||
"pytest-xdist>=3.0",
|
||||
"tools",
|
||||
]
|
||||
|
||||
# [project.optional-dependencies]
|
||||
[project.optional-dependencies]
|
||||
tui = ["textual>=0.75.0"]
|
||||
|
||||
[project.scripts]
|
||||
hive = "framework.cli:main"
|
||||
|
||||
[tool.uv.sources]
|
||||
tools = { workspace = true }
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
@@ -43,6 +49,7 @@ lint.select = [
|
||||
"W", # pycodestyle warnings
|
||||
]
|
||||
|
||||
lint.per-file-ignores."demos/*" = ["E501"]
|
||||
lint.isort.combine-as-imports = true
|
||||
lint.isort.known-first-party = ["framework"]
|
||||
lint.isort.section-order = [
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
# Development dependencies
|
||||
-r requirements.txt
|
||||
|
||||
# Testing
|
||||
pytest>=8.0
|
||||
pytest-asyncio>=0.23
|
||||
|
||||
# Linting & type checking
|
||||
ruff>=0.1.0
|
||||
mypy>=1.0
|
||||
@@ -1,14 +0,0 @@
|
||||
# Core dependencies
|
||||
pydantic>=2.0
|
||||
anthropic>=0.40.0
|
||||
httpx>=0.27.0
|
||||
litellm>=1.81.0
|
||||
|
||||
# MCP server dependencies
|
||||
mcp
|
||||
fastmcp
|
||||
|
||||
# Testing (required for test framework)
|
||||
pytest>=8.0
|
||||
pytest-asyncio>=0.23
|
||||
pytest-xdist>=3.0
|
||||
+1
-1
@@ -143,7 +143,7 @@ def main():
|
||||
logger.info("The MCP server is now ready to use!")
|
||||
logger.info("")
|
||||
logger.info(f"{Colors.BLUE}To start the MCP server manually:{Colors.NC}")
|
||||
logger.info(" python -m framework.mcp.agent_builder_server")
|
||||
logger.info(" uv run python -m framework.mcp.agent_builder_server")
|
||||
logger.info("")
|
||||
logger.info(f"{Colors.BLUE}MCP Configuration location:{Colors.NC}")
|
||||
logger.info(f" {mcp_config_path}")
|
||||
|
||||
+4
-4
@@ -19,7 +19,7 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo -e "${YELLOW}Step 1: Installing framework package...${NC}"
|
||||
pip install -e . || {
|
||||
uv pip install -e . || {
|
||||
echo -e "${RED}Failed to install framework package${NC}"
|
||||
exit 1
|
||||
}
|
||||
@@ -27,7 +27,7 @@ echo -e "${GREEN}✓ Framework package installed${NC}"
|
||||
echo ""
|
||||
|
||||
echo -e "${YELLOW}Step 2: Installing MCP dependencies...${NC}"
|
||||
pip install mcp fastmcp || {
|
||||
uv pip install mcp fastmcp || {
|
||||
echo -e "${RED}Failed to install MCP dependencies${NC}"
|
||||
exit 1
|
||||
}
|
||||
@@ -59,7 +59,7 @@ fi
|
||||
echo ""
|
||||
|
||||
echo -e "${YELLOW}Step 4: Testing MCP server...${NC}"
|
||||
python -c "from framework.mcp import agent_builder_server; print('✓ MCP server module loads successfully')" || {
|
||||
uv run python -c "from framework.mcp import agent_builder_server; print('✓ MCP server module loads successfully')" || {
|
||||
echo -e "${RED}Failed to import MCP server module${NC}"
|
||||
exit 1
|
||||
}
|
||||
@@ -71,7 +71,7 @@ echo ""
|
||||
echo "The MCP server is now ready to use!"
|
||||
echo ""
|
||||
echo "To start the MCP server manually:"
|
||||
echo " python -m framework.mcp.agent_builder_server"
|
||||
echo " uv run python -m framework.mcp.agent_builder_server"
|
||||
echo ""
|
||||
echo "MCP Configuration location:"
|
||||
echo " $SCRIPT_DIR/.mcp.json"
|
||||
|
||||
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
Tests for client-facing fan-out and event_loop output_key overlap validation.
|
||||
|
||||
Validates two rules added to GraphSpec.validate():
|
||||
1. Fan-out must not have multiple client_facing=True targets.
|
||||
2. Parallel event_loop nodes must have disjoint output_keys.
|
||||
"""
|
||||
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.node import NodeSpec
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rule 1: client_facing fan-out
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClientFacingFanOut:
|
||||
"""Fan-out to multiple client_facing=True targets must be rejected."""
|
||||
|
||||
def test_fan_out_two_client_facing_fails(self):
|
||||
"""Two client-facing targets on the same fan-out -> error."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(id="a", name="a", description="Node a", client_facing=True),
|
||||
NodeSpec(id="b", name="b", description="Node b", client_facing=True),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
cf_errors = [e for e in errors if "multiple client-facing" in e]
|
||||
assert len(cf_errors) == 1
|
||||
assert "'src'" in cf_errors[0]
|
||||
|
||||
def test_fan_out_one_client_facing_passes(self):
|
||||
"""Only one client-facing target -> no error."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(id="a", name="a", description="Node a", client_facing=True),
|
||||
NodeSpec(id="b", name="b", description="Node b", client_facing=False),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
cf_errors = [e for e in errors if "multiple client-facing" in e]
|
||||
assert len(cf_errors) == 0
|
||||
|
||||
def test_fan_out_zero_client_facing_passes(self):
|
||||
"""No client-facing targets at all -> no error."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(id="a", name="a", description="Node a"),
|
||||
NodeSpec(id="b", name="b", description="Node b"),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
cf_errors = [e for e in errors if "multiple client-facing" in e]
|
||||
assert len(cf_errors) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rule 2: event_loop output_key overlap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEventLoopOutputKeyOverlap:
|
||||
"""Parallel event_loop nodes with overlapping output_keys must be rejected."""
|
||||
|
||||
def test_overlapping_output_keys_event_loop_fails(self):
|
||||
"""Two event_loop nodes sharing an output_key -> error."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(
|
||||
id="a",
|
||||
name="a",
|
||||
description="Node a",
|
||||
node_type="event_loop",
|
||||
output_keys=["status", "shared"],
|
||||
),
|
||||
NodeSpec(
|
||||
id="b",
|
||||
name="b",
|
||||
description="Node b",
|
||||
node_type="event_loop",
|
||||
output_keys=["result", "shared"],
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
key_errors = [e for e in errors if "output_key" in e]
|
||||
assert len(key_errors) == 1
|
||||
assert "'shared'" in key_errors[0]
|
||||
|
||||
def test_disjoint_output_keys_event_loop_passes(self):
|
||||
"""Two event_loop nodes with disjoint output_keys -> no error."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(
|
||||
id="a",
|
||||
name="a",
|
||||
description="Node a",
|
||||
node_type="event_loop",
|
||||
output_keys=["status"],
|
||||
),
|
||||
NodeSpec(
|
||||
id="b",
|
||||
name="b",
|
||||
description="Node b",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
key_errors = [e for e in errors if "output_key" in e]
|
||||
assert len(key_errors) == 0
|
||||
|
||||
def test_overlapping_keys_non_event_loop_no_error(self):
|
||||
"""Non-event_loop nodes with overlapping keys -> no error (last-wins OK)."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="src",
|
||||
nodes=[
|
||||
NodeSpec(id="src", name="src", description="Source node"),
|
||||
NodeSpec(
|
||||
id="a",
|
||||
name="a",
|
||||
description="Node a",
|
||||
node_type="llm_generate",
|
||||
output_keys=["shared"],
|
||||
),
|
||||
NodeSpec(
|
||||
id="b",
|
||||
name="b",
|
||||
description="Node b",
|
||||
node_type="llm_generate",
|
||||
output_keys=["shared"],
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="src->a", source="src", target="a", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="src->b", source="src", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
key_errors = [e for e in errors if "output_key" in e]
|
||||
assert len(key_errors) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Baseline: no fan-out -> no errors from these rules
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoFanOutUnaffected:
|
||||
"""Linear graphs should not trigger either validation rule."""
|
||||
|
||||
def test_no_fan_out_unaffected(self):
|
||||
"""Linear chain with client_facing and event_loop nodes -> no errors."""
|
||||
graph = GraphSpec(
|
||||
id="g1",
|
||||
goal_id="goal1",
|
||||
entry_node="a",
|
||||
terminal_nodes=["c"],
|
||||
nodes=[
|
||||
NodeSpec(id="a", name="a", description="Node a", client_facing=True),
|
||||
NodeSpec(
|
||||
id="b",
|
||||
name="b",
|
||||
description="Node b",
|
||||
node_type="event_loop",
|
||||
output_keys=["x"],
|
||||
),
|
||||
NodeSpec(
|
||||
id="c",
|
||||
name="c",
|
||||
description="Node c",
|
||||
client_facing=True,
|
||||
node_type="event_loop",
|
||||
output_keys=["x"],
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(id="a->b", source="a", target="b", condition=EdgeCondition.ON_SUCCESS),
|
||||
EdgeSpec(id="b->c", source="b", target="c", condition=EdgeCondition.ON_SUCCESS),
|
||||
],
|
||||
)
|
||||
|
||||
errors = graph.validate()
|
||||
cf_errors = [e for e in errors if "multiple client-facing" in e]
|
||||
key_errors = [e for e in errors if "output_key" in e]
|
||||
assert len(cf_errors) == 0
|
||||
assert len(key_errors) == 0
|
||||
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
Tests for ClientIO gateway (WP-9).
|
||||
|
||||
Covers:
|
||||
- ActiveNodeClientIO: emit_output → output_stream round-trip, request_input, timeout
|
||||
- InertNodeClientIO: emit_output publishes NODE_INTERNAL_OUTPUT, request_input returns redirect
|
||||
- ClientIOGateway: factory creates correct variant
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.client_io import (
|
||||
ActiveNodeClientIO,
|
||||
ClientIOGateway,
|
||||
InertNodeClientIO,
|
||||
NodeClientIO,
|
||||
)
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
_AGENT_EVENT_FIELDS = {"stream_id", "node_id", "execution_id", "correlation_id"}
|
||||
|
||||
|
||||
class MockEventBus:
|
||||
"""Lightweight stand-in for EventBus that records published events."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.events: list[AgentEvent] = []
|
||||
|
||||
async def _record(self, event_type: EventType, **kwargs) -> None:
|
||||
agent_kwargs = {k: v for k, v in kwargs.items() if k in _AGENT_EVENT_FIELDS}
|
||||
data = {k: v for k, v in kwargs.items() if k not in _AGENT_EVENT_FIELDS}
|
||||
self.events.append(AgentEvent(type=event_type, **agent_kwargs, data=data))
|
||||
|
||||
async def emit_client_output_delta(self, **kwargs) -> None:
|
||||
await self._record(EventType.CLIENT_OUTPUT_DELTA, **kwargs)
|
||||
|
||||
async def emit_client_input_requested(self, **kwargs) -> None:
|
||||
await self._record(EventType.CLIENT_INPUT_REQUESTED, **kwargs)
|
||||
|
||||
async def emit_node_internal_output(self, **kwargs) -> None:
|
||||
await self._record(EventType.NODE_INTERNAL_OUTPUT, **kwargs)
|
||||
|
||||
async def emit_node_input_blocked(self, **kwargs) -> None:
|
||||
await self._record(EventType.NODE_INPUT_BLOCKED, **kwargs)
|
||||
|
||||
|
||||
# --- ActiveNodeClientIO tests ---
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_active_emit_and_consume():
|
||||
"""emit_output → output_stream round-trip works correctly."""
|
||||
bus = MockEventBus()
|
||||
io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
|
||||
|
||||
await io.emit_output("Hello ")
|
||||
await io.emit_output("World", is_final=True)
|
||||
|
||||
chunks = []
|
||||
async for chunk in io.output_stream():
|
||||
chunks.append(chunk)
|
||||
|
||||
assert chunks == ["Hello ", "World"]
|
||||
assert len(bus.events) == 2
|
||||
assert all(e.type == EventType.CLIENT_OUTPUT_DELTA for e in bus.events)
|
||||
# Verify snapshot accumulates
|
||||
assert bus.events[0].data["snapshot"] == "Hello "
|
||||
assert bus.events[1].data["snapshot"] == "Hello World"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_active_request_input():
|
||||
"""request_input blocks until provide_input is called."""
|
||||
bus = MockEventBus()
|
||||
io = ActiveNodeClientIO(node_id="n1", event_bus=bus)
|
||||
|
||||
async def fulfill_later():
|
||||
await asyncio.sleep(0.01)
|
||||
await io.provide_input("user says hi")
|
||||
|
||||
task = asyncio.create_task(fulfill_later())
|
||||
result = await io.request_input(prompt="What?")
|
||||
await task
|
||||
|
||||
assert result == "user says hi"
|
||||
assert len(bus.events) == 1
|
||||
assert bus.events[0].type == EventType.CLIENT_INPUT_REQUESTED
|
||||
assert bus.events[0].data["prompt"] == "What?"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_active_request_input_timeout():
|
||||
"""request_input raises TimeoutError when timeout expires."""
|
||||
io = ActiveNodeClientIO(node_id="n1")
|
||||
|
||||
with pytest.raises(TimeoutError):
|
||||
await io.request_input(prompt="waiting", timeout=0.01)
|
||||
|
||||
|
||||
# --- InertNodeClientIO tests ---
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inert_emit_publishes_internal():
|
||||
"""InertNodeClientIO.emit_output publishes NODE_INTERNAL_OUTPUT."""
|
||||
bus = MockEventBus()
|
||||
io = InertNodeClientIO(node_id="n2", event_bus=bus)
|
||||
|
||||
await io.emit_output("internal log")
|
||||
|
||||
assert len(bus.events) == 1
|
||||
assert bus.events[0].type == EventType.NODE_INTERNAL_OUTPUT
|
||||
assert bus.events[0].data["content"] == "internal log"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inert_request_input_returns_redirect():
|
||||
"""request_input returns a redirect string and publishes NODE_INPUT_BLOCKED."""
|
||||
bus = MockEventBus()
|
||||
io = InertNodeClientIO(node_id="n2", event_bus=bus)
|
||||
|
||||
result = await io.request_input(prompt="need data")
|
||||
|
||||
assert "internal processing node" in result
|
||||
assert len(bus.events) == 1
|
||||
assert bus.events[0].type == EventType.NODE_INPUT_BLOCKED
|
||||
assert bus.events[0].data["prompt"] == "need data"
|
||||
|
||||
|
||||
# --- ClientIOGateway tests ---
|
||||
|
||||
|
||||
def test_gateway_creates_active_for_client_facing():
|
||||
"""ClientIOGateway.create_io returns ActiveNodeClientIO when client_facing=True."""
|
||||
gateway = ClientIOGateway()
|
||||
io = gateway.create_io(node_id="n1", client_facing=True)
|
||||
|
||||
assert isinstance(io, ActiveNodeClientIO)
|
||||
assert isinstance(io, NodeClientIO)
|
||||
|
||||
|
||||
def test_gateway_creates_inert_for_internal():
|
||||
"""ClientIOGateway.create_io returns InertNodeClientIO when client_facing=False."""
|
||||
gateway = ClientIOGateway()
|
||||
io = gateway.create_io(node_id="n2", client_facing=False)
|
||||
|
||||
assert isinstance(io, InertNodeClientIO)
|
||||
assert isinstance(io, NodeClientIO)
|
||||
@@ -0,0 +1,165 @@
|
||||
"""Tests for ConcurrentStorage race condition and cache invalidation fixes."""
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.schemas.run import Run, RunMetrics, RunStatus
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
|
||||
|
||||
def create_test_run(
|
||||
run_id: str, goal_id: str = "test-goal", status: RunStatus = RunStatus.RUNNING
|
||||
) -> Run:
|
||||
"""Create a minimal test Run object."""
|
||||
return Run(
|
||||
id=run_id,
|
||||
goal_id=goal_id,
|
||||
status=status,
|
||||
narrative="Test run",
|
||||
metrics=RunMetrics(
|
||||
nodes_executed=[],
|
||||
),
|
||||
decisions=[],
|
||||
problems=[],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_invalidation_on_save(tmp_path: Path):
|
||||
"""Test that summary cache is invalidated when a run is saved.
|
||||
|
||||
This tests the fix for the cache invalidation bug where load_summary()
|
||||
would return stale data after a run was updated.
|
||||
"""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run_id = "test-run-1"
|
||||
|
||||
# Create and save initial run
|
||||
run = create_test_run(run_id, status=RunStatus.RUNNING)
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load summary to populate the cache
|
||||
summary = await storage.load_summary(run_id)
|
||||
assert summary is not None
|
||||
assert summary.status == RunStatus.RUNNING
|
||||
|
||||
# Update run with new status
|
||||
run.status = RunStatus.COMPLETED
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load summary again - should get fresh data, not cached stale data
|
||||
summary = await storage.load_summary(run_id)
|
||||
assert summary is not None
|
||||
assert summary.status == RunStatus.COMPLETED, (
|
||||
"Summary cache should be invalidated on save - got stale data"
|
||||
)
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batched_write_cache_consistency(tmp_path: Path):
|
||||
"""Test that cache is only updated after successful batched write.
|
||||
|
||||
This tests the fix for the race condition where cache was updated
|
||||
before the batched write completed.
|
||||
"""
|
||||
storage = ConcurrentStorage(tmp_path, batch_interval=0.05)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run_id = "test-run-2"
|
||||
|
||||
# Save via batching (immediate=False)
|
||||
run = create_test_run(run_id, status=RunStatus.RUNNING)
|
||||
await storage.save_run(run, immediate=False)
|
||||
|
||||
# Before batch flush, cache should NOT contain the run
|
||||
# (This is the fix - previously cache was updated immediately)
|
||||
cache_key = f"run:{run_id}"
|
||||
assert cache_key not in storage._cache, (
|
||||
"Cache should not be updated before batch is flushed"
|
||||
)
|
||||
|
||||
# Wait for batch to flush (poll instead of fixed sleep for CI reliability)
|
||||
for _ in range(500): # 500 * 0.01s = 5s max
|
||||
if cache_key in storage._cache:
|
||||
break
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# After batch flush, cache should contain the run
|
||||
assert cache_key in storage._cache, "Cache should be updated after batch flush"
|
||||
|
||||
# Verify data on disk matches cache
|
||||
loaded_run = await storage.load_run(run_id, use_cache=False)
|
||||
assert loaded_run is not None
|
||||
assert loaded_run.id == run_id
|
||||
assert loaded_run.status == RunStatus.RUNNING
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_immediate_write_updates_cache(tmp_path: Path):
|
||||
"""Test that immediate writes still update cache correctly."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run_id = "test-run-3"
|
||||
|
||||
# Save with immediate=True
|
||||
run = create_test_run(run_id, status=RunStatus.COMPLETED)
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Cache should be updated immediately for immediate writes
|
||||
cache_key = f"run:{run_id}"
|
||||
assert cache_key in storage._cache, "Cache should be updated after immediate write"
|
||||
|
||||
# Verify cached value is correct
|
||||
cached_run = storage._cache[cache_key].value
|
||||
assert cached_run.id == run_id
|
||||
assert cached_run.status == RunStatus.COMPLETED
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_summary_cache_invalidated_on_multiple_saves(tmp_path: Path):
|
||||
"""Test that summary cache is invalidated on each save, not just the first."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run_id = "test-run-4"
|
||||
|
||||
# First save
|
||||
run = create_test_run(run_id, status=RunStatus.RUNNING)
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load summary to cache it
|
||||
summary1 = await storage.load_summary(run_id)
|
||||
assert summary1.status == RunStatus.RUNNING
|
||||
|
||||
# Second save with new status
|
||||
run.status = RunStatus.RUNNING
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load summary - should be fresh
|
||||
summary2 = await storage.load_summary(run_id)
|
||||
assert summary2.status == RunStatus.RUNNING
|
||||
|
||||
# Third save with final status
|
||||
run.status = RunStatus.COMPLETED
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load summary - should be fresh again
|
||||
summary3 = await storage.load_summary(run_id)
|
||||
assert summary3.status == RunStatus.COMPLETED
|
||||
finally:
|
||||
await storage.stop()
|
||||
@@ -0,0 +1,326 @@
|
||||
"""Tests for ContextHandoff and HandoffContext."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.context_handoff import ContextHandoff, HandoffContext
|
||||
from framework.graph.conversation import NodeConversation
|
||||
from framework.llm.mock import MockLLMProvider
|
||||
from framework.llm.provider import LLMProvider, LLMResponse
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SpyLLMProvider(MockLLMProvider):
|
||||
"""MockLLMProvider that records whether complete() was called."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.complete_called = False
|
||||
self.complete_call_args: dict[str, Any] | None = None
|
||||
|
||||
def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
|
||||
self.complete_called = True
|
||||
self.complete_call_args = {"messages": messages, **kwargs}
|
||||
return super().complete(messages, **kwargs)
|
||||
|
||||
|
||||
class FailingLLMProvider(LLMProvider):
|
||||
"""LLM provider that always raises."""
|
||||
|
||||
def complete(self, messages: list[dict[str, Any]], **kwargs: Any) -> LLMResponse:
|
||||
raise RuntimeError("LLM unavailable")
|
||||
|
||||
def complete_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
system: str,
|
||||
tools: list,
|
||||
tool_executor: Any,
|
||||
max_iterations: int = 10,
|
||||
) -> LLMResponse:
|
||||
raise RuntimeError("LLM unavailable")
|
||||
|
||||
|
||||
async def _build_conversation(*pairs: tuple[str, str]) -> NodeConversation:
|
||||
"""Build a NodeConversation from (user, assistant) message pairs."""
|
||||
conv = NodeConversation()
|
||||
for user_msg, assistant_msg in pairs:
|
||||
await conv.add_user_message(user_msg)
|
||||
await conv.add_assistant_message(assistant_msg)
|
||||
return conv
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestHandoffContext
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHandoffContext:
|
||||
def test_instantiation(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="node_A",
|
||||
summary="Summary text",
|
||||
key_outputs={"result": "42"},
|
||||
turn_count=3,
|
||||
total_tokens_used=1200,
|
||||
)
|
||||
assert hc.source_node_id == "node_A"
|
||||
assert hc.summary == "Summary text"
|
||||
assert hc.key_outputs == {"result": "42"}
|
||||
assert hc.turn_count == 3
|
||||
assert hc.total_tokens_used == 1200
|
||||
|
||||
def test_field_access(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="n1",
|
||||
summary="s",
|
||||
key_outputs={},
|
||||
turn_count=0,
|
||||
total_tokens_used=0,
|
||||
)
|
||||
assert hc.key_outputs == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestExtractiveSummary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExtractiveSummary:
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_summary_includes_first_last(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("hello", "First response here."),
|
||||
("continue", "Middle response."),
|
||||
("finish", "Final conclusion."),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="test_node")
|
||||
|
||||
assert "First response here." in hc.summary
|
||||
assert "Final conclusion." in hc.summary
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_summary_metadata(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("hi", "hello"),
|
||||
("bye", "goodbye"),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="node_42")
|
||||
|
||||
assert hc.source_node_id == "node_42"
|
||||
assert hc.turn_count == 2
|
||||
assert hc.total_tokens_used > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_with_output_keys_colon(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("what is the answer?", "answer: 42"),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="n", output_keys=["answer"])
|
||||
|
||||
assert hc.key_outputs["answer"] == "42"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_with_output_keys_equals(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("compute", "result = success"),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="n", output_keys=["result"])
|
||||
|
||||
assert hc.key_outputs["result"] == "success"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_json_output_keys(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("give me json", '{"score": 95, "grade": "A"}'),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
|
||||
|
||||
assert hc.key_outputs["score"] == "95"
|
||||
assert hc.key_outputs["grade"] == "A"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_empty_conversation(self) -> None:
|
||||
conv = NodeConversation()
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="empty")
|
||||
|
||||
assert hc.summary == "Empty conversation."
|
||||
assert hc.turn_count == 0
|
||||
assert hc.key_outputs == {}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_no_assistant_messages(self) -> None:
|
||||
conv = NodeConversation()
|
||||
await conv.add_user_message("hello?")
|
||||
await conv.add_user_message("anyone there?")
|
||||
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="silent")
|
||||
|
||||
assert hc.summary == "No assistant responses."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_most_recent_wins(self) -> None:
|
||||
conv = await _build_conversation(
|
||||
("first", "status: old_value"),
|
||||
("second", "status: new_value"),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="n", output_keys=["status"])
|
||||
|
||||
assert hc.key_outputs["status"] == "new_value"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extractive_truncation(self) -> None:
|
||||
long_text = "x" * 1000
|
||||
conv = await _build_conversation(
|
||||
("go", long_text),
|
||||
)
|
||||
ch = ContextHandoff()
|
||||
hc = ch.summarize_conversation(conv, node_id="n")
|
||||
|
||||
# Summary should be truncated to ~500 chars
|
||||
assert len(hc.summary) <= 500
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestLLMSummary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLLMSummary:
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_summary_calls_provider(self) -> None:
|
||||
llm = SpyLLMProvider()
|
||||
conv = await _build_conversation(
|
||||
("hi", "hello back"),
|
||||
("what now?", "we are done"),
|
||||
)
|
||||
ch = ContextHandoff(llm=llm)
|
||||
hc = ch.summarize_conversation(conv, node_id="llm_node")
|
||||
|
||||
assert llm.complete_called, "LLM complete() was never invoked"
|
||||
assert hc.summary == "This is a mock response for testing purposes."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_summary_includes_output_key_hint(self) -> None:
|
||||
llm = SpyLLMProvider()
|
||||
conv = await _build_conversation(
|
||||
("compute", '{"score": 95}'),
|
||||
)
|
||||
ch = ContextHandoff(llm=llm)
|
||||
ch.summarize_conversation(conv, node_id="n", output_keys=["score", "grade"])
|
||||
|
||||
assert llm.complete_call_args is not None
|
||||
system = llm.complete_call_args.get("system", "")
|
||||
assert "score" in system
|
||||
assert "grade" in system
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_fallback_on_error(self) -> None:
|
||||
llm = FailingLLMProvider()
|
||||
conv = await _build_conversation(
|
||||
("start", "First assistant message."),
|
||||
("end", "Last assistant message."),
|
||||
)
|
||||
ch = ContextHandoff(llm=llm)
|
||||
hc = ch.summarize_conversation(conv, node_id="fallback_node")
|
||||
|
||||
# Should fall back to extractive (first + last assistant messages)
|
||||
assert "First assistant message." in hc.summary
|
||||
assert "Last assistant message." in hc.summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestFormatAsInput
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFormatAsInput:
|
||||
def test_format_structure(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="analyzer",
|
||||
summary="Analysis complete.",
|
||||
key_outputs={"score": "95"},
|
||||
turn_count=5,
|
||||
total_tokens_used=2000,
|
||||
)
|
||||
output = ContextHandoff.format_as_input(hc)
|
||||
|
||||
assert "--- CONTEXT FROM: analyzer" in output
|
||||
assert "KEY OUTPUTS:" in output
|
||||
assert "SUMMARY:" in output
|
||||
assert "--- END CONTEXT ---" in output
|
||||
|
||||
def test_format_no_key_outputs(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="simple",
|
||||
summary="Done.",
|
||||
key_outputs={},
|
||||
turn_count=1,
|
||||
total_tokens_used=100,
|
||||
)
|
||||
output = ContextHandoff.format_as_input(hc)
|
||||
|
||||
assert "KEY OUTPUTS:" not in output
|
||||
assert "SUMMARY:" in output
|
||||
|
||||
def test_format_content_values(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="node_X",
|
||||
summary="Found 3 bugs.",
|
||||
key_outputs={"bugs": "3", "severity": "high"},
|
||||
turn_count=7,
|
||||
total_tokens_used=5000,
|
||||
)
|
||||
output = ContextHandoff.format_as_input(hc)
|
||||
|
||||
assert "node_X" in output
|
||||
assert "7 turns" in output
|
||||
assert "~5000 tokens" in output
|
||||
assert "- bugs: 3" in output
|
||||
assert "- severity: high" in output
|
||||
assert "Found 3 bugs." in output
|
||||
|
||||
def test_format_empty_summary(self) -> None:
|
||||
hc = HandoffContext(
|
||||
source_node_id="n",
|
||||
summary="",
|
||||
key_outputs={},
|
||||
turn_count=0,
|
||||
total_tokens_used=0,
|
||||
)
|
||||
output = ContextHandoff.format_as_input(hc)
|
||||
|
||||
assert "No summary available." in output
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_format_as_input_usable_as_message(self) -> None:
|
||||
"""Formatted output can be fed into a NodeConversation as a user message."""
|
||||
hc = HandoffContext(
|
||||
source_node_id="prev_node",
|
||||
summary="Completed analysis.",
|
||||
key_outputs={"result": "42"},
|
||||
turn_count=3,
|
||||
total_tokens_used=900,
|
||||
)
|
||||
text = ContextHandoff.format_as_input(hc)
|
||||
|
||||
conv = NodeConversation()
|
||||
msg = await conv.add_user_message(text)
|
||||
|
||||
assert msg.role == "user"
|
||||
assert "CONTEXT FROM: prev_node" in msg.content
|
||||
assert conv.turn_count == 1
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user