Merge remote-tracking branch 'origin/main' into fix/first-success
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"mcp__agent-builder__create_session",
|
||||
"mcp__agent-builder__set_goal",
|
||||
"mcp__agent-builder__add_node",
|
||||
"mcp__agent-builder__add_edge",
|
||||
"mcp__agent-builder__configure_loop",
|
||||
"mcp__agent-builder__add_mcp_server",
|
||||
"mcp__agent-builder__validate_graph",
|
||||
"mcp__agent-builder__export_graph",
|
||||
"mcp__agent-builder__load_session_by_id",
|
||||
"Bash(git status:*)",
|
||||
"Bash(gh run view:*)",
|
||||
"Bash(uv run:*)",
|
||||
"Bash(env:*)",
|
||||
"mcp__agent-builder__test_node",
|
||||
"mcp__agent-builder__list_mcp_tools",
|
||||
"Bash(python -m py_compile:*)",
|
||||
"Bash(python -m pytest:*)",
|
||||
"Bash(source:*)",
|
||||
"mcp__agent-builder__update_node",
|
||||
"mcp__agent-builder__check_missing_credentials",
|
||||
"mcp__agent-builder__list_stored_credentials",
|
||||
"Bash(find:*)",
|
||||
"mcp__agent-builder__run_tests",
|
||||
"Bash(PYTHONPATH=core:exports:tools/src uv run pytest:*)",
|
||||
"mcp__agent-builder__list_agent_sessions",
|
||||
"mcp__agent-builder__generate_constraint_tests",
|
||||
"mcp__agent-builder__generate_success_tests"
|
||||
]
|
||||
},
|
||||
"enabledMcpjsonServers": ["agent-builder", "tools"]
|
||||
}
|
||||
@@ -562,15 +562,33 @@ PYTHONPATH=core:exports python -m {agent_name} --tui
|
||||
|
||||
### Find Available Checkpoints:
|
||||
|
||||
```bash
|
||||
# In TUI:
|
||||
/sessions {session_id}
|
||||
Use MCP tools to programmatically find and inspect checkpoints:
|
||||
|
||||
# This shows all checkpoints with timestamps:
|
||||
Available Checkpoints: (3)
|
||||
1. cp_node_complete_intake_143030
|
||||
2. cp_node_complete_research_143115
|
||||
3. cp_pause_research_143130
|
||||
```
|
||||
# List all sessions to find the failed one
|
||||
list_agent_sessions(agent_work_dir="~/.hive/agents/{agent_name}", status="failed")
|
||||
|
||||
# Inspect session state
|
||||
get_agent_session_state(agent_work_dir="~/.hive/agents/{agent_name}", session_id="{session_id}")
|
||||
|
||||
# Find clean checkpoints to resume from
|
||||
list_agent_checkpoints(agent_work_dir="~/.hive/agents/{agent_name}", session_id="{session_id}", is_clean="true")
|
||||
|
||||
# Compare checkpoints to understand what changed
|
||||
compare_agent_checkpoints(
|
||||
agent_work_dir="~/.hive/agents/{agent_name}",
|
||||
session_id="{session_id}",
|
||||
checkpoint_id_before="cp_node_complete_intake_143030",
|
||||
checkpoint_id_after="cp_node_complete_research_143115"
|
||||
)
|
||||
|
||||
# Inspect memory at a specific checkpoint
|
||||
get_agent_checkpoint(agent_work_dir="~/.hive/agents/{agent_name}", session_id="{session_id}", checkpoint_id="cp_node_complete_intake_143030")
|
||||
```
|
||||
|
||||
Or in TUI:
|
||||
```bash
|
||||
/sessions {session_id}
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
@@ -717,6 +735,80 @@ Let me know when you've run it and I'll help check the logs!"
|
||||
)
|
||||
```
|
||||
|
||||
### Session & Checkpoint Tools
|
||||
|
||||
**list_agent_sessions** - Browse sessions with filtering
|
||||
- **When to use:** Finding resumable sessions, identifying failed sessions, Stage 3 triage
|
||||
- **Returns:** Session list with status, timestamps, is_resumable, current_node, quality
|
||||
- **Example:**
|
||||
```
|
||||
list_agent_sessions(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
status="failed",
|
||||
limit=10
|
||||
)
|
||||
```
|
||||
|
||||
**get_agent_session_state** - Load full session state (excludes memory values)
|
||||
- **When to use:** Inspecting session progress, checking is_resumable, examining path
|
||||
- **Returns:** Full state with memory_keys/memory_size instead of memory values
|
||||
- **Example:**
|
||||
```
|
||||
get_agent_session_state(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
session_id="session_20260208_143022_abc12345"
|
||||
)
|
||||
```
|
||||
|
||||
**get_agent_session_memory** - Get memory contents from a session
|
||||
- **When to use:** Stage 5 root cause analysis, inspecting produced data
|
||||
- **Returns:** All memory keys+values, or a single key's value
|
||||
- **Example:**
|
||||
```
|
||||
get_agent_session_memory(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
session_id="session_20260208_143022_abc12345",
|
||||
key="twitter_handles"
|
||||
)
|
||||
```
|
||||
|
||||
**list_agent_checkpoints** - List checkpoints for a session
|
||||
- **When to use:** Stage 6 recovery, finding clean checkpoints to resume from
|
||||
- **Returns:** Checkpoint summaries with type, node, clean status
|
||||
- **Example:**
|
||||
```
|
||||
list_agent_checkpoints(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
session_id="session_20260208_143022_abc12345",
|
||||
is_clean="true"
|
||||
)
|
||||
```
|
||||
|
||||
**get_agent_checkpoint** - Load a specific checkpoint with full state
|
||||
- **When to use:** Inspecting exact state at a checkpoint, comparing to current state
|
||||
- **Returns:** Full checkpoint: memory snapshot, execution path, metrics
|
||||
- **Example:**
|
||||
```
|
||||
get_agent_checkpoint(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
session_id="session_20260208_143022_abc12345",
|
||||
checkpoint_id="cp_node_complete_intake_143030"
|
||||
)
|
||||
```
|
||||
|
||||
**compare_agent_checkpoints** - Diff memory between two checkpoints
|
||||
- **When to use:** Understanding data flow, finding where state diverged
|
||||
- **Returns:** Memory diff (added/removed/changed keys) + execution path diff
|
||||
- **Example:**
|
||||
```
|
||||
compare_agent_checkpoints(
|
||||
agent_work_dir="/home/user/.hive/agents/twitter_outreach",
|
||||
session_id="session_20260208_143022_abc12345",
|
||||
checkpoint_id_before="cp_node_complete_intake_143030",
|
||||
checkpoint_id_after="cp_node_complete_research_143115"
|
||||
)
|
||||
```
|
||||
|
||||
### Query Patterns
|
||||
|
||||
**Pattern 1: Top-Down Investigation** (Most common)
|
||||
@@ -739,6 +831,16 @@ Loop every 10 seconds:
|
||||
2. If found: Alert and drill into L2
|
||||
```
|
||||
|
||||
**Pattern 4: Session State + Checkpoint Recovery**
|
||||
```
|
||||
1. list_agent_sessions: Find failed/paused sessions
|
||||
2. get_agent_session_state: Check is_resumable, see execution path
|
||||
3. get_agent_session_memory: Inspect what data was produced
|
||||
4. list_agent_checkpoints: Find clean checkpoints before failure
|
||||
5. compare_agent_checkpoints: Understand what changed between checkpoints
|
||||
6. Recommend resume command with specific checkpoint
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Complete Example Walkthrough
|
||||
|
||||
+719
-973
File diff suppressed because it is too large
Load Diff
@@ -1,351 +1,333 @@
|
||||
# Example: Testing a YouTube Research Agent
|
||||
# Example: Iterative Testing of a Research Agent
|
||||
|
||||
This example walks through testing a YouTube research agent that finds relevant videos based on a topic.
|
||||
This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
|
||||
|
||||
## Prerequisites
|
||||
## Agent Structure
|
||||
|
||||
- Agent built with hive-create skill at `exports/youtube-research/`
|
||||
- Goal defined with success criteria and constraints
|
||||
|
||||
## Step 1: Load the Goal
|
||||
|
||||
First, load the goal that was defined during the Goal stage:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "youtube-research",
|
||||
"name": "YouTube Research Agent",
|
||||
"description": "Find relevant YouTube videos on a given topic",
|
||||
"success_criteria": [
|
||||
{
|
||||
"id": "find_videos",
|
||||
"description": "Find 3-5 relevant videos",
|
||||
"metric": "video_count",
|
||||
"target": "3-5",
|
||||
"weight": 1.0
|
||||
},
|
||||
{
|
||||
"id": "relevance",
|
||||
"description": "Videos must be relevant to the topic",
|
||||
"metric": "relevance_score",
|
||||
"target": ">0.8",
|
||||
"weight": 0.8
|
||||
}
|
||||
],
|
||||
"constraints": [
|
||||
{
|
||||
"id": "api_limits",
|
||||
"description": "Must not exceed YouTube API rate limits",
|
||||
"constraint_type": "hard",
|
||||
"category": "technical"
|
||||
},
|
||||
{
|
||||
"id": "content_safety",
|
||||
"description": "Must filter out inappropriate content",
|
||||
"constraint_type": "hard",
|
||||
"category": "safety"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
exports/deep_research_agent/
|
||||
├── agent.py # Goal + graph: intake → research → review → report
|
||||
├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
|
||||
├── config.py # Model config
|
||||
├── mcp_servers.json # Tools: web_search, web_scrape
|
||||
└── tests/ # Test files (we'll create these)
|
||||
```
|
||||
|
||||
## Step 2: Get Constraint Test Guidelines
|
||||
**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
|
||||
|
||||
During the Goal stage (or early Eval), get test guidelines for constraints:
|
||||
---
|
||||
|
||||
## Phase 1: Generate Tests
|
||||
|
||||
### Read the goal
|
||||
|
||||
```python
|
||||
result = generate_constraint_tests(
|
||||
goal_id="youtube-research",
|
||||
goal_json='<goal JSON above>',
|
||||
agent_path="exports/youtube-research"
|
||||
)
|
||||
Read(file_path="exports/deep_research_agent/agent.py")
|
||||
# Extract: goal_id="rigorous-interactive-research"
|
||||
# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
|
||||
# constraints: no-hallucination, source-attribution
|
||||
```
|
||||
|
||||
**The result contains guidelines (not generated tests):**
|
||||
- `output_file`: Where to write tests
|
||||
- `file_header`: Imports and fixtures to use
|
||||
- `test_template`: Format for test functions
|
||||
- `constraints_formatted`: The constraints to test
|
||||
- `test_guidelines`: Rules for writing tests
|
||||
|
||||
## Step 3: Write Constraint Tests
|
||||
|
||||
Using the guidelines, write tests directly with the Write tool:
|
||||
|
||||
```python
|
||||
# Write constraint tests using the provided file_header and guidelines
|
||||
Write(
|
||||
file_path="exports/youtube-research/tests/test_constraints.py",
|
||||
content='''
|
||||
"""Constraint tests for youtube-research agent."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from exports.youtube_research import default_agent
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
|
||||
reason="API key required for real testing."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_constraint_api_limits_respected():
|
||||
"""Verify API rate limits are not exceeded."""
|
||||
import time
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
|
||||
for i in range(10):
|
||||
result = await default_agent.run({"topic": f"test_{i}"}, mock_mode=mock_mode)
|
||||
time.sleep(0.1)
|
||||
|
||||
# Should complete without rate limit errors
|
||||
assert "rate limit" not in str(result).lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_constraint_content_safety_filter():
|
||||
"""Verify inappropriate content is filtered."""
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
result = await default_agent.run({"topic": "general topic"}, mock_mode=mock_mode)
|
||||
|
||||
for video in result.videos:
|
||||
assert video.safe_for_work is True
|
||||
assert video.age_restricted is False
|
||||
'''
|
||||
)
|
||||
```
|
||||
|
||||
## Step 4: Get Success Criteria Test Guidelines
|
||||
|
||||
After the agent is built, get success criteria test guidelines:
|
||||
### Get test guidelines
|
||||
|
||||
```python
|
||||
result = generate_success_tests(
|
||||
goal_id="youtube-research",
|
||||
goal_json='<goal JSON>',
|
||||
node_names="search_node,filter_node,rank_node,format_node",
|
||||
tool_names="youtube_search,video_details,channel_info",
|
||||
agent_path="exports/youtube-research"
|
||||
goal_id="rigorous-interactive-research",
|
||||
goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
|
||||
node_names="intake,research,review,report",
|
||||
tool_names="web_search,web_scrape",
|
||||
agent_path="exports/deep_research_agent"
|
||||
)
|
||||
```
|
||||
|
||||
## Step 5: Write Success Criteria Tests
|
||||
|
||||
Using the guidelines, write success criteria tests:
|
||||
### Write tests
|
||||
|
||||
```python
|
||||
Write(
|
||||
file_path="exports/youtube-research/tests/test_success_criteria.py",
|
||||
content='''
|
||||
"""Success criteria tests for youtube-research agent."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from exports.youtube_research import default_agent
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not os.environ.get("ANTHROPIC_API_KEY") and not os.environ.get("MOCK_MODE"),
|
||||
reason="API key required for real testing."
|
||||
)
|
||||
|
||||
file_path="exports/deep_research_agent/tests/test_success_criteria.py",
|
||||
content=result["file_header"] + '''
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_find_videos_happy_path():
|
||||
"""Test finding videos for a common topic."""
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
result = await default_agent.run({"topic": "machine learning"}, mock_mode=mock_mode)
|
||||
|
||||
assert result.success
|
||||
assert 3 <= len(result.videos) <= 5
|
||||
assert all(v.title for v in result.videos)
|
||||
assert all(v.video_id for v in result.videos)
|
||||
|
||||
async def test_success_source_diversity(runner, auto_responder, mock_mode):
|
||||
"""At least 5 diverse sources are found."""
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({"query": "impact of remote work on productivity"})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
assert result.success, f"Agent failed: {result.error}"
|
||||
output = result.output or {}
|
||||
sources = output.get("sources", [])
|
||||
if isinstance(sources, list):
|
||||
assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_find_videos_minimum_boundary():
|
||||
"""Test at minimum threshold (3 videos)."""
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
result = await default_agent.run({"topic": "niche topic xyz"}, mock_mode=mock_mode)
|
||||
|
||||
assert len(result.videos) >= 3
|
||||
|
||||
async def test_success_citation_coverage(runner, auto_responder, mock_mode):
|
||||
"""Every factual claim in the report cites its source."""
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({"query": "climate change effects on agriculture"})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
assert result.success, f"Agent failed: {result.error}"
|
||||
output = result.output or {}
|
||||
report = output.get("report", "")
|
||||
# Check that report contains numbered references
|
||||
assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_relevance_score_threshold():
|
||||
"""Test relevance scoring meets threshold."""
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
result = await default_agent.run({"topic": "python programming"}, mock_mode=mock_mode)
|
||||
|
||||
for video in result.videos:
|
||||
assert video.relevance_score > 0.8
|
||||
|
||||
async def test_success_report_completeness(runner, auto_responder, mock_mode):
|
||||
"""Report addresses the original research question."""
|
||||
query = "pros and cons of nuclear energy"
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({"query": query})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
assert result.success, f"Agent failed: {result.error}"
|
||||
output = result.output or {}
|
||||
report = output.get("report", "")
|
||||
assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_find_videos_no_results_graceful():
|
||||
"""Test graceful handling of no results."""
|
||||
mock_mode = bool(os.environ.get("MOCK_MODE"))
|
||||
result = await default_agent.run({"topic": "xyznonexistent123"}, mock_mode=mock_mode)
|
||||
async def test_empty_query_handling(runner, auto_responder, mock_mode):
|
||||
"""Agent handles empty input gracefully."""
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({"query": ""})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
output = result.output or {}
|
||||
assert not result.success or output.get("error"), "Should handle empty query"
|
||||
|
||||
# Should not crash, return empty or message
|
||||
assert result.videos == [] or result.message
|
||||
@pytest.mark.asyncio
|
||||
async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
|
||||
"""Feedback loop between review and research terminates."""
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({"query": "quantum computing basics"})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
visits = result.node_visit_counts or {}
|
||||
for node_id, count in visits.items():
|
||||
assert count <= 5, f"Node {node_id} visited {count} times"
|
||||
'''
|
||||
)
|
||||
```
|
||||
|
||||
## Step 6: Run All Tests
|
||||
---
|
||||
|
||||
Execute all tests:
|
||||
## Phase 2: First Execution
|
||||
|
||||
```python
|
||||
result = run_tests(
|
||||
goal_id="youtube-research",
|
||||
agent_path="exports/youtube-research",
|
||||
test_types='["all"]',
|
||||
parallel=4
|
||||
run_tests(
|
||||
goal_id="rigorous-interactive-research",
|
||||
agent_path="exports/deep_research_agent",
|
||||
fail_fast=True
|
||||
)
|
||||
```
|
||||
|
||||
**Results:**
|
||||
|
||||
**Result:**
|
||||
```json
|
||||
{
|
||||
"goal_id": "youtube-research",
|
||||
"overall_passed": false,
|
||||
"summary": {
|
||||
"total": 6,
|
||||
"passed": 5,
|
||||
"failed": 1,
|
||||
"pass_rate": "83.3%"
|
||||
},
|
||||
"duration_ms": 4521,
|
||||
"results": [
|
||||
{"test_id": "test_constraint_api_001", "passed": true, "duration_ms": 1234},
|
||||
{"test_id": "test_constraint_content_001", "passed": true, "duration_ms": 456},
|
||||
{"test_id": "test_success_001", "passed": true, "duration_ms": 789},
|
||||
{"test_id": "test_success_002", "passed": true, "duration_ms": 654},
|
||||
{"test_id": "test_success_003", "passed": true, "duration_ms": 543},
|
||||
{"test_id": "test_success_004", "passed": false, "duration_ms": 845,
|
||||
"error_category": "IMPLEMENTATION_ERROR",
|
||||
"error_message": "TypeError: 'NoneType' object has no attribute 'videos'"}
|
||||
"summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
|
||||
"failures": [
|
||||
{"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
|
||||
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Step 7: Debug the Failed Test
|
||||
---
|
||||
|
||||
## Phase 3: Analyze (Iteration 1)
|
||||
|
||||
### Debug the first failure
|
||||
|
||||
```python
|
||||
result = debug_test(
|
||||
goal_id="youtube-research",
|
||||
test_name="test_find_videos_no_results_graceful",
|
||||
agent_path="exports/youtube-research"
|
||||
debug_test(
|
||||
goal_id="rigorous-interactive-research",
|
||||
test_name="test_success_source_diversity",
|
||||
agent_path="exports/deep_research_agent"
|
||||
)
|
||||
# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
|
||||
```
|
||||
|
||||
### Find the session and inspect memory
|
||||
|
||||
```python
|
||||
list_agent_sessions(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
status="completed",
|
||||
limit=1
|
||||
)
|
||||
# → session_20260209_150000_abc12345
|
||||
|
||||
get_agent_session_memory(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
session_id="session_20260209_150000_abc12345",
|
||||
key="research_results"
|
||||
)
|
||||
# → Only 2 sources found. LLM stopped searching after 2 queries.
|
||||
```
|
||||
|
||||
### Check LLM behavior in the research node
|
||||
|
||||
```python
|
||||
query_runtime_log_raw(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
run_id="session_20260209_150000_abc12345",
|
||||
node_id="research"
|
||||
)
|
||||
# → LLM called web_search twice, got results, immediately called set_output.
|
||||
# → Prompt doesn't instruct it to find at least 5 sources.
|
||||
```
|
||||
|
||||
**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Fix (Iteration 1)
|
||||
|
||||
```python
|
||||
Read(file_path="exports/deep_research_agent/nodes/__init__.py")
|
||||
|
||||
# Fix the research node prompt
|
||||
Edit(
|
||||
file_path="exports/deep_research_agent/nodes/__init__.py",
|
||||
old_string='system_prompt="Search for information on the user\'s topic using web search."',
|
||||
new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
|
||||
)
|
||||
```
|
||||
|
||||
**Debug Output:**
|
||||
---
|
||||
|
||||
## Phase 5: Recover & Resume (Iteration 1)
|
||||
|
||||
The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
|
||||
|
||||
```python
|
||||
run_tests(
|
||||
goal_id="rigorous-interactive-research",
|
||||
agent_path="exports/deep_research_agent",
|
||||
fail_fast=True
|
||||
)
|
||||
```
|
||||
|
||||
**Result:**
|
||||
```json
|
||||
{
|
||||
"test_id": "test_success_004",
|
||||
"test_name": "test_find_videos_no_results_graceful",
|
||||
"input": {"topic": "xyznonexistent123"},
|
||||
"expected": "Empty list or message",
|
||||
"actual": {"error": "TypeError: 'NoneType' object has no attribute 'videos'"},
|
||||
"passed": false,
|
||||
"error_message": "TypeError: 'NoneType' object has no attribute 'videos'",
|
||||
"error_category": "IMPLEMENTATION_ERROR",
|
||||
"stack_trace": "Traceback (most recent call last):\n File \"filter_node.py\", line 42\n for video in result.videos:\nTypeError: 'NoneType' object has no attribute 'videos'",
|
||||
"logs": [
|
||||
{"timestamp": "2026-01-20T10:00:01", "node": "search_node", "level": "INFO", "msg": "Searching for: xyznonexistent123"},
|
||||
{"timestamp": "2026-01-20T10:00:02", "node": "search_node", "level": "WARNING", "msg": "No results found"},
|
||||
{"timestamp": "2026-01-20T10:00:02", "node": "filter_node", "level": "ERROR", "msg": "NoneType error"}
|
||||
],
|
||||
"runtime_data": {
|
||||
"execution_path": ["start", "search_node", "filter_node"],
|
||||
"node_outputs": {
|
||||
"search_node": null
|
||||
}
|
||||
},
|
||||
"suggested_fix": "Add null check in filter_node before accessing .videos attribute",
|
||||
"iteration_guidance": {
|
||||
"stage": "Agent",
|
||||
"action": "Fix the code in nodes/edges",
|
||||
"restart_required": false,
|
||||
"description": "The goal is correct, but filter_node doesn't handle null results from search_node."
|
||||
}
|
||||
"overall_passed": false,
|
||||
"summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
|
||||
"failures": [
|
||||
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Step 8: Iterate Based on Category
|
||||
Source diversity now passes. Citation coverage still fails.
|
||||
|
||||
Since this is an **IMPLEMENTATION_ERROR**, we:
|
||||
---
|
||||
|
||||
1. **Don't restart** the Goal → Agent → Eval flow
|
||||
2. **Fix the agent** using hive-create skill:
|
||||
- Modify `filter_node` to handle null results
|
||||
3. **Re-run Eval** (tests only)
|
||||
|
||||
### Fix in hive-create:
|
||||
## Phase 3: Analyze (Iteration 2)
|
||||
|
||||
```python
|
||||
# Update the filter_node to handle null
|
||||
add_node(
|
||||
node_id="filter_node",
|
||||
name="Filter Node",
|
||||
description="Filter and rank videos",
|
||||
node_type="function",
|
||||
input_keys=["search_results"],
|
||||
output_keys=["filtered_videos"],
|
||||
system_prompt="""
|
||||
Filter videos by relevance.
|
||||
IMPORTANT: Handle case where search_results is None or empty.
|
||||
Return empty list if no results.
|
||||
"""
|
||||
debug_test(
|
||||
goal_id="rigorous-interactive-research",
|
||||
test_name="test_success_citation_coverage",
|
||||
agent_path="exports/deep_research_agent"
|
||||
)
|
||||
# Category: ASSERTION_FAILURE — Report lacks citations
|
||||
|
||||
# Check what the report node produced
|
||||
list_agent_sessions(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
status="completed",
|
||||
limit=1
|
||||
)
|
||||
# → session_20260209_151500_def67890
|
||||
|
||||
get_agent_session_memory(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
session_id="session_20260209_151500_def67890",
|
||||
key="report"
|
||||
)
|
||||
# → Report text exists but uses no numbered references.
|
||||
# → Sources are in memory but report node doesn't cite them.
|
||||
```
|
||||
|
||||
**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Fix (Iteration 2)
|
||||
|
||||
```python
|
||||
Edit(
|
||||
file_path="exports/deep_research_agent/nodes/__init__.py",
|
||||
old_string='system_prompt="Write a comprehensive report based on the research findings."',
|
||||
new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
|
||||
)
|
||||
```
|
||||
|
||||
### Re-export and re-test:
|
||||
---
|
||||
|
||||
## Phase 5: Resume (Iteration 2)
|
||||
|
||||
The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
|
||||
|
||||
```bash
|
||||
# Run via CLI to get checkpoints
|
||||
uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
|
||||
|
||||
# After it runs, find the clean checkpoint before report
|
||||
list_agent_checkpoints(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
session_id="session_20260209_152000_ghi34567",
|
||||
is_clean="true"
|
||||
)
|
||||
# → cp_node_complete_review_152100 (after review, before report)
|
||||
|
||||
# Resume — skips intake, research, review entirely
|
||||
uv run hive run exports/deep_research_agent \
|
||||
--resume-session session_20260209_152000_ghi34567 \
|
||||
--checkpoint cp_node_complete_review_152100
|
||||
```
|
||||
|
||||
Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Final Verification
|
||||
|
||||
```python
|
||||
# Re-export the fixed agent
|
||||
export_graph(path="exports/youtube-research")
|
||||
|
||||
# Re-run tests
|
||||
result = run_tests(
|
||||
goal_id="youtube-research",
|
||||
agent_path="exports/youtube-research",
|
||||
test_types='["all"]'
|
||||
run_tests(
|
||||
goal_id="rigorous-interactive-research",
|
||||
agent_path="exports/deep_research_agent"
|
||||
)
|
||||
```
|
||||
|
||||
**Updated Results:**
|
||||
|
||||
**Result:**
|
||||
```json
|
||||
{
|
||||
"goal_id": "youtube-research",
|
||||
"overall_passed": true,
|
||||
"summary": {
|
||||
"total": 6,
|
||||
"passed": 6,
|
||||
"failed": 0,
|
||||
"pass_rate": "100.0%"
|
||||
}
|
||||
"summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
|
||||
}
|
||||
```
|
||||
|
||||
All tests pass.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
1. **Got guidelines** for constraint tests during Goal stage
|
||||
2. **Wrote** constraint tests using Write tool
|
||||
3. **Got guidelines** for success criteria tests during Eval stage
|
||||
4. **Wrote** success criteria tests using Write tool
|
||||
5. **Ran** tests in parallel
|
||||
6. **Debugged** the one failure
|
||||
7. **Categorized** as IMPLEMENTATION_ERROR
|
||||
8. **Fixed** the agent (not the goal)
|
||||
9. **Re-ran** Eval only (didn't restart full flow)
|
||||
10. **Passed** all tests
|
||||
| Iteration | Failure | Root Cause | Fix | Recovery |
|
||||
|-----------|---------|------------|-----|----------|
|
||||
| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
|
||||
| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
|
||||
|
||||
The agent is now validated and ready for production use.
|
||||
**Key takeaways:**
|
||||
- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
|
||||
- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
|
||||
- Final `run_tests` confirms all scenarios pass end-to-end
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: hive
|
||||
description: Hive Agent Builder & Manager
|
||||
mode: primary
|
||||
tools:
|
||||
agent-builder: true
|
||||
tools: true
|
||||
---
|
||||
|
||||
# Hive Agent
|
||||
You are the Hive Agent Builder. Your goal is to help the user construct, configure, and deploy AI agents using the Hive framework.
|
||||
|
||||
## Capabilities
|
||||
1. **Scaffold Agents:** Create new agent directories/configs.
|
||||
2. **Manage Tools:** Add/remove tools via MCP.
|
||||
3. **Debug:** Analyze agent workflows.
|
||||
|
||||
## Context
|
||||
- You are an expert in the Hive framework architecture.
|
||||
- Always use the `agent-builder` MCP server for filesystem operations.
|
||||
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"agent-builder": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"run",
|
||||
"python",
|
||||
"-m",
|
||||
"framework.mcp.agent_builder_server"
|
||||
],
|
||||
"cwd": "core",
|
||||
"env": {
|
||||
"PYTHONPATH": "../tools/src"
|
||||
}
|
||||
},
|
||||
"tools": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"run",
|
||||
"python",
|
||||
"mcp_server.py",
|
||||
"--stdio"
|
||||
],
|
||||
"cwd": "tools",
|
||||
"env": {
|
||||
"PYTHONPATH": "src"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-concepts
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-create
|
||||
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-credentials
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-debugger
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-patterns
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/hive-test
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../.claude/skills/triage-issue
|
||||
+5
-2
@@ -49,8 +49,8 @@ You may submit PRs without prior assignment for:
|
||||
make check # Lint and format checks (ruff check + ruff format --check on core/ and tools/)
|
||||
make test # Core tests (cd core && pytest tests/ -v)
|
||||
```
|
||||
6. Commit your changes following our commit conventions
|
||||
7. Push to your fork and submit a Pull Request
|
||||
8. Commit your changes following our commit conventions
|
||||
9. Push to your fork and submit a Pull Request
|
||||
|
||||
## Development Setup
|
||||
|
||||
@@ -145,6 +145,9 @@ make test
|
||||
# Or run tests directly
|
||||
cd core && pytest tests/ -v
|
||||
|
||||
# Run tools package tests (when contributing to tools/)
|
||||
cd tools && uv run pytest tests/ -v
|
||||
|
||||
# Run tests for a specific agent
|
||||
PYTHONPATH=exports uv run python -m agent_name test
|
||||
```
|
||||
|
||||
@@ -120,6 +120,16 @@ hive tui
|
||||
# Or run directly
|
||||
hive run exports/your_agent_name --input '{"key": "value"}'
|
||||
```
|
||||
## Coding Agent Support
|
||||
### Opencode
|
||||
Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
|
||||
|
||||
1. **Setup:** Run the quickstart script
|
||||
2. **Launch:** Open Opencode in the project root.
|
||||
3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
|
||||
4. **Verify:** Ask the agent *"List your tools"* to confirm the connection.
|
||||
|
||||
The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
|
||||
|
||||
**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
|
||||
|
||||
|
||||
@@ -274,6 +274,7 @@ class EventLoopNode(NodeProtocol):
|
||||
|
||||
# 5. Stall detection state
|
||||
recent_responses: list[str] = []
|
||||
user_interaction_count = 0 # tracks how many times this node blocked for user input
|
||||
|
||||
# 6. Main loop
|
||||
for iteration in range(start_iteration, self._config.max_iterations):
|
||||
@@ -485,13 +486,11 @@ class EventLoopNode(NodeProtocol):
|
||||
|
||||
# 6h. Client-facing input blocking
|
||||
#
|
||||
# For client_facing nodes, block for user input only when the
|
||||
# LLM explicitly called ask_user(). Text-only turns without
|
||||
# ask_user flow through without blocking, allowing progress
|
||||
# updates and summaries to stream freely.
|
||||
#
|
||||
# After user input, always fall through to judge evaluation
|
||||
# (6i). The judge handles all acceptance decisions.
|
||||
# Block ONLY when the LLM explicitly calls ask_user().
|
||||
# Text-only turns and set_output-only turns flow through
|
||||
# without blocking, allowing progress updates and summaries
|
||||
# to stream freely. After user input arrives, fall through
|
||||
# to judge evaluation (6i) — the judge handles acceptance.
|
||||
if ctx.node_spec.client_facing and user_input_requested:
|
||||
if self._shutdown:
|
||||
await self._publish_loop_completed(stream_id, node_id, iteration + 1)
|
||||
@@ -578,6 +577,7 @@ class EventLoopNode(NodeProtocol):
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
user_interaction_count += 1
|
||||
recent_responses.clear()
|
||||
# Fall through to judge evaluation (6i)
|
||||
|
||||
@@ -824,6 +824,12 @@ class EventLoopNode(NodeProtocol):
|
||||
|
||||
Returns True if input arrived, False if shutdown was signaled.
|
||||
"""
|
||||
# Clear BEFORE emitting so that synchronous handlers (e.g. the
|
||||
# headless stdin handler) can call inject_event() during the emit
|
||||
# and the signal won't be lost. TUI handlers return immediately
|
||||
# without injecting, so the wait still blocks until the user types.
|
||||
self._input_ready.clear()
|
||||
|
||||
if self._event_bus:
|
||||
await self._event_bus.emit_client_input_requested(
|
||||
stream_id=ctx.node_id,
|
||||
@@ -831,7 +837,6 @@ class EventLoopNode(NodeProtocol):
|
||||
prompt="",
|
||||
)
|
||||
|
||||
self._input_ready.clear()
|
||||
await self._input_ready.wait()
|
||||
return not self._shutdown
|
||||
|
||||
@@ -989,7 +994,7 @@ class EventLoopNode(NodeProtocol):
|
||||
is_error=result.is_error,
|
||||
)
|
||||
if not result.is_error:
|
||||
value = tc.tool_input["value"]
|
||||
value = tc.tool_input.get("value", "")
|
||||
# Parse JSON strings into native types so downstream
|
||||
# consumers get lists/dicts instead of serialised JSON,
|
||||
# and the hallucination validator skips non-string values.
|
||||
@@ -1000,8 +1005,9 @@ class EventLoopNode(NodeProtocol):
|
||||
value = parsed
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
await accumulator.set(tc.tool_input["key"], value)
|
||||
outputs_set_this_turn.append(tc.tool_input["key"])
|
||||
key = tc.tool_input.get("key", "")
|
||||
await accumulator.set(key, value)
|
||||
outputs_set_this_turn.append(key)
|
||||
logged_tool_calls.append(
|
||||
{
|
||||
"tool_use_id": tc.tool_use_id,
|
||||
@@ -1283,6 +1289,24 @@ class EventLoopNode(NodeProtocol):
|
||||
accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
|
||||
)
|
||||
if not missing:
|
||||
# Safety check: when ALL output keys are nullable and NONE
|
||||
# have been set, the node produced nothing useful. Retry
|
||||
# instead of accepting an empty result — this prevents
|
||||
# client-facing nodes from terminating before the user
|
||||
# ever interacts, and non-client-facing nodes from
|
||||
# short-circuiting without doing their work.
|
||||
output_keys = ctx.node_spec.output_keys or []
|
||||
nullable_keys = set(ctx.node_spec.nullable_output_keys or [])
|
||||
all_nullable = output_keys and nullable_keys >= set(output_keys)
|
||||
none_set = not any(accumulator.get(k) is not None for k in output_keys)
|
||||
if all_nullable and none_set:
|
||||
return JudgeVerdict(
|
||||
action="RETRY",
|
||||
feedback=(
|
||||
f"No output keys have been set yet. "
|
||||
f"Use set_output to set at least one of: {output_keys}"
|
||||
),
|
||||
)
|
||||
return JudgeVerdict(action="ACCEPT")
|
||||
else:
|
||||
return JudgeVerdict(
|
||||
|
||||
@@ -368,7 +368,7 @@ class GraphExecutor:
|
||||
# Check if resuming from paused_at (session state resume)
|
||||
paused_at = session_state.get("paused_at") if session_state else None
|
||||
node_ids = [n.id for n in graph.nodes]
|
||||
self.logger.info(f"🔍 Debug: paused_at={paused_at}, available node IDs={node_ids}")
|
||||
self.logger.debug(f"paused_at={paused_at}, available node IDs={node_ids}")
|
||||
|
||||
if paused_at and graph.get_node(paused_at) is not None:
|
||||
# Resume from paused_at node directly (works for any node, not just pause_nodes)
|
||||
@@ -505,6 +505,21 @@ class GraphExecutor:
|
||||
|
||||
path.append(current_node_id)
|
||||
|
||||
# Clear stale nullable outputs from previous visits.
|
||||
# When a node is re-visited (e.g. review → process-batch → review),
|
||||
# nullable outputs from the PREVIOUS visit linger in shared memory.
|
||||
# This causes stale edge conditions to fire (e.g. "feedback is not None"
|
||||
# from visit 1 triggers even when visit 2 sets "final_summary" instead).
|
||||
# Clearing them ensures only the CURRENT visit's outputs affect routing.
|
||||
if node_visit_counts.get(current_node_id, 0) > 1:
|
||||
nullable_keys = getattr(node_spec, "nullable_output_keys", None) or []
|
||||
for key in nullable_keys:
|
||||
if memory.read(key) is not None:
|
||||
memory.write(key, None, validate=False)
|
||||
self.logger.info(
|
||||
f" 🧹 Cleared stale nullable output '{key}' from previous visit"
|
||||
)
|
||||
|
||||
# Check if pause (HITL) before execution
|
||||
if current_node_id in graph.pause_nodes:
|
||||
self.logger.info(f"⏸ Paused at HITL node: {node_spec.name}")
|
||||
|
||||
@@ -1134,7 +1134,7 @@ Keep the same JSON structure but with shorter content values.
|
||||
decision_id=decision_id,
|
||||
success=True,
|
||||
result=response.content,
|
||||
tokens_used=response.input_tokens + response.output_tokens,
|
||||
tokens_used=total_input_tokens + total_output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
@@ -1233,7 +1233,7 @@ Keep the same JSON structure but with shorter content values.
|
||||
success=False,
|
||||
error=_extraction_error,
|
||||
output={},
|
||||
tokens_used=response.input_tokens + response.output_tokens,
|
||||
tokens_used=total_input_tokens + total_output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
# JSON extraction failed completely - still strip code blocks
|
||||
@@ -1275,7 +1275,7 @@ Keep the same JSON structure but with shorter content values.
|
||||
return NodeResult(
|
||||
success=True,
|
||||
output=output,
|
||||
tokens_used=response.input_tokens + response.output_tokens,
|
||||
tokens_used=total_input_tokens + total_output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,13 +14,15 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
# Project root resolution. This file lives at core/framework/mcp/agent_builder_server.py,
|
||||
# so the project root (where exports/ lives) is four parents up.
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent
|
||||
|
||||
# Ensure exports/ is on sys.path so AgentRunner can import agent modules.
|
||||
_framework_dir = Path(__file__).resolve().parent.parent # core/framework/ -> core/
|
||||
_project_root = _framework_dir.parent # core/ -> project root
|
||||
_exports_dir = _project_root / "exports"
|
||||
_exports_dir = _PROJECT_ROOT / "exports"
|
||||
if _exports_dir.is_dir() and str(_exports_dir) not in sys.path:
|
||||
sys.path.insert(0, str(_exports_dir))
|
||||
del _framework_dir, _project_root, _exports_dir
|
||||
del _exports_dir
|
||||
|
||||
from mcp.server import FastMCP # noqa: E402
|
||||
from pydantic import ValidationError # noqa: E402
|
||||
@@ -542,6 +544,9 @@ def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
|
||||
"""
|
||||
Validate and normalize agent_path.
|
||||
|
||||
Resolves relative paths against _PROJECT_ROOT since the MCP server's
|
||||
cwd (core/) differs from the user's cwd (project root).
|
||||
|
||||
Returns:
|
||||
(Path, None) if valid
|
||||
(None, error_json) if invalid
|
||||
@@ -556,6 +561,12 @@ def _validate_agent_path(agent_path: str) -> tuple[Path | None, str | None]:
|
||||
|
||||
path = Path(agent_path)
|
||||
|
||||
# Resolve relative paths against project root (not MCP server's cwd)
|
||||
if not path.is_absolute() and not path.exists():
|
||||
resolved = _PROJECT_ROOT / path
|
||||
if resolved.exists():
|
||||
path = resolved
|
||||
|
||||
if not path.exists():
|
||||
return None, json.dumps(
|
||||
{
|
||||
@@ -3019,18 +3030,15 @@ def _format_success_criteria(criteria: list[SuccessCriterion]) -> str:
|
||||
|
||||
# Test template for Claude to use when writing tests
|
||||
CONSTRAINT_TEST_TEMPLATE = '''@pytest.mark.asyncio
|
||||
async def test_constraint_{constraint_id}_{scenario}(mock_mode):
|
||||
async def test_constraint_{constraint_id}_{scenario}(runner, auto_responder, mock_mode):
|
||||
"""Test: {description}"""
|
||||
result = await default_agent.run({{"key": "value"}}, mock_mode=mock_mode)
|
||||
|
||||
# IMPORTANT: result is an ExecutionResult object with these attributes:
|
||||
# - result.success: bool - whether the agent succeeded
|
||||
# - result.output: dict - the agent's output data (access data here!)
|
||||
# - result.error: str or None - error message if failed
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({{"key": "value"}})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
|
||||
assert result.success, f"Agent failed: {{result.error}}"
|
||||
|
||||
# Access output data via result.output
|
||||
output_data = result.output or {{}}
|
||||
|
||||
# Add constraint-specific assertions here
|
||||
@@ -3038,18 +3046,15 @@ async def test_constraint_{constraint_id}_{scenario}(mock_mode):
|
||||
'''
|
||||
|
||||
SUCCESS_TEST_TEMPLATE = '''@pytest.mark.asyncio
|
||||
async def test_success_{criteria_id}_{scenario}(mock_mode):
|
||||
async def test_success_{criteria_id}_{scenario}(runner, auto_responder, mock_mode):
|
||||
"""Test: {description}"""
|
||||
result = await default_agent.run({{"key": "value"}}, mock_mode=mock_mode)
|
||||
|
||||
# IMPORTANT: result is an ExecutionResult object with these attributes:
|
||||
# - result.success: bool - whether the agent succeeded
|
||||
# - result.output: dict - the agent's output data (access data here!)
|
||||
# - result.error: str or None - error message if failed
|
||||
await auto_responder.start()
|
||||
try:
|
||||
result = await runner.run({{"key": "value"}})
|
||||
finally:
|
||||
await auto_responder.stop()
|
||||
|
||||
assert result.success, f"Agent failed: {{result.error}}"
|
||||
|
||||
# Access output data via result.output
|
||||
output_data = result.output or {{}}
|
||||
|
||||
# Add success criteria-specific assertions here
|
||||
@@ -3105,7 +3110,6 @@ def generate_constraint_tests(
|
||||
test_type="Constraint",
|
||||
agent_name=agent_module,
|
||||
description=f"Tests for constraints defined in goal: {goal.name}",
|
||||
agent_module=agent_module,
|
||||
)
|
||||
|
||||
# Return guidelines + data for Claude to write tests directly
|
||||
@@ -3121,14 +3125,22 @@ def generate_constraint_tests(
|
||||
"max_tests": 5,
|
||||
"naming_convention": "test_constraint_<constraint_id>_<scenario>",
|
||||
"required_decorator": "@pytest.mark.asyncio",
|
||||
"required_fixture": "mock_mode",
|
||||
"agent_call_pattern": "await default_agent.run(input_dict, mock_mode=mock_mode)",
|
||||
"required_fixtures": "runner, auto_responder, mock_mode",
|
||||
"agent_call_pattern": "await runner.run(input_dict)",
|
||||
"auto_responder_pattern": (
|
||||
"await auto_responder.start()\n"
|
||||
"try:\n"
|
||||
" result = await runner.run(input_dict)\n"
|
||||
"finally:\n"
|
||||
" await auto_responder.stop()"
|
||||
),
|
||||
"result_type": "ExecutionResult with .success, .output (dict), .error",
|
||||
"critical_rules": [
|
||||
"Every test function MUST be async with @pytest.mark.asyncio",
|
||||
"Every test MUST accept mock_mode as a parameter",
|
||||
"Use await default_agent.run(input, mock_mode=mock_mode)",
|
||||
"default_agent is already imported - do NOT add imports",
|
||||
"Every test MUST accept runner, auto_responder, and mock_mode fixtures",
|
||||
"Use await runner.run(input) -- NOT default_agent.run()",
|
||||
"Start auto_responder before running, stop in finally block",
|
||||
"runner and auto_responder are from conftest.py -- do NOT import them",
|
||||
"NEVER call result.get() - use result.output.get() instead",
|
||||
"Always check result.success before accessing result.output",
|
||||
],
|
||||
@@ -3192,7 +3204,6 @@ def generate_success_tests(
|
||||
test_type="Success criteria",
|
||||
agent_name=agent_module,
|
||||
description=f"Tests for success criteria defined in goal: {goal.name}",
|
||||
agent_module=agent_module,
|
||||
)
|
||||
|
||||
# Return guidelines + data for Claude to write tests directly
|
||||
@@ -3214,14 +3225,22 @@ def generate_success_tests(
|
||||
"max_tests": 12,
|
||||
"naming_convention": "test_success_<criteria_id>_<scenario>",
|
||||
"required_decorator": "@pytest.mark.asyncio",
|
||||
"required_fixture": "mock_mode",
|
||||
"agent_call_pattern": "await default_agent.run(input_dict, mock_mode=mock_mode)",
|
||||
"required_fixtures": "runner, auto_responder, mock_mode",
|
||||
"agent_call_pattern": "await runner.run(input_dict)",
|
||||
"auto_responder_pattern": (
|
||||
"await auto_responder.start()\n"
|
||||
"try:\n"
|
||||
" result = await runner.run(input_dict)\n"
|
||||
"finally:\n"
|
||||
" await auto_responder.stop()"
|
||||
),
|
||||
"result_type": "ExecutionResult with .success, .output (dict), .error",
|
||||
"critical_rules": [
|
||||
"Every test function MUST be async with @pytest.mark.asyncio",
|
||||
"Every test MUST accept mock_mode as a parameter",
|
||||
"Use await default_agent.run(input, mock_mode=mock_mode)",
|
||||
"default_agent is already imported - do NOT add imports",
|
||||
"Every test MUST accept runner, auto_responder, and mock_mode fixtures",
|
||||
"Use await runner.run(input) -- NOT default_agent.run()",
|
||||
"Start auto_responder before running, stop in finally block",
|
||||
"runner and auto_responder are from conftest.py -- do NOT import them",
|
||||
"NEVER call result.get() - use result.output.get() instead",
|
||||
"Always check result.success before accessing result.output",
|
||||
],
|
||||
@@ -3318,11 +3337,13 @@ def run_tests(
|
||||
# Add short traceback and quiet summary
|
||||
cmd.append("--tb=short")
|
||||
|
||||
# Set PYTHONPATH to project root so agents can import from core.framework
|
||||
# Set PYTHONPATH so framework and agent packages are importable
|
||||
env = os.environ.copy()
|
||||
pythonpath = env.get("PYTHONPATH", "")
|
||||
project_root = Path(__file__).parent.parent.parent.parent.resolve()
|
||||
env["PYTHONPATH"] = f"{project_root}:{pythonpath}"
|
||||
core_path = project_root / "core"
|
||||
exports_path = project_root / "exports"
|
||||
env["PYTHONPATH"] = f"{core_path}:{exports_path}:{project_root}:{pythonpath}"
|
||||
|
||||
# Run pytest
|
||||
try:
|
||||
@@ -3792,7 +3813,11 @@ def check_missing_credentials(
|
||||
|
||||
from framework.runner import AgentRunner
|
||||
|
||||
runner = AgentRunner.load(agent_path)
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
runner = AgentRunner.load(str(path))
|
||||
runner.validate()
|
||||
|
||||
store = _get_credential_store()
|
||||
@@ -3992,7 +4017,11 @@ def verify_credentials(
|
||||
try:
|
||||
from framework.runner import AgentRunner
|
||||
|
||||
runner = AgentRunner.load(agent_path)
|
||||
path, err = _validate_agent_path(agent_path)
|
||||
if err:
|
||||
return err
|
||||
|
||||
runner = AgentRunner.load(str(path))
|
||||
validation = runner.validate()
|
||||
|
||||
return json.dumps(
|
||||
@@ -4009,6 +4038,382 @@ def verify_credentials(
|
||||
return json.dumps({"error": str(e)})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SESSION & CHECKPOINT TOOLS (read-only, no build session required)
|
||||
# =============================================================================
|
||||
|
||||
_MAX_DIFF_VALUE_LEN = 500
|
||||
|
||||
|
||||
def _read_session_json(path: Path) -> dict | None:
|
||||
"""Read a JSON file, returning None on failure."""
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def _scan_agent_sessions(agent_work_dir: Path) -> list[tuple[str, Path]]:
|
||||
"""Find session directories with state.json, sorted most-recent-first."""
|
||||
sessions: list[tuple[str, Path]] = []
|
||||
sessions_dir = agent_work_dir / "sessions"
|
||||
if not sessions_dir.exists():
|
||||
return sessions
|
||||
for session_dir in sessions_dir.iterdir():
|
||||
if session_dir.is_dir() and session_dir.name.startswith("session_"):
|
||||
state_path = session_dir / "state.json"
|
||||
if state_path.exists():
|
||||
sessions.append((session_dir.name, state_path))
|
||||
sessions.sort(key=lambda t: t[0], reverse=True)
|
||||
return sessions
|
||||
|
||||
|
||||
def _truncate_value(value: object, max_len: int = _MAX_DIFF_VALUE_LEN) -> object:
|
||||
"""Truncate a value's JSON representation if too long."""
|
||||
s = json.dumps(value, default=str)
|
||||
if len(s) <= max_len:
|
||||
return value
|
||||
return {"_truncated": True, "_preview": s[:max_len] + "...", "_length": len(s)}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def list_agent_sessions(
|
||||
agent_work_dir: Annotated[
|
||||
str,
|
||||
"Path to the agent's working directory (e.g., ~/.hive/agents/my_agent)",
|
||||
],
|
||||
status: Annotated[
|
||||
str,
|
||||
"Filter by status: 'active', 'paused', 'completed', 'failed', 'cancelled'. Empty for all.",
|
||||
] = "",
|
||||
limit: Annotated[int, "Maximum number of results (default 20)"] = 20,
|
||||
offset: Annotated[int, "Number of sessions to skip for pagination"] = 0,
|
||||
) -> str:
|
||||
"""
|
||||
List sessions for an agent with optional status filter.
|
||||
|
||||
Use this to discover which sessions exist, find resumable sessions,
|
||||
or identify failed sessions for debugging. Combines well with
|
||||
query_runtime_logs for correlating session state with log data.
|
||||
"""
|
||||
work_dir = Path(agent_work_dir)
|
||||
all_sessions = _scan_agent_sessions(work_dir)
|
||||
|
||||
if not all_sessions:
|
||||
return json.dumps({"sessions": [], "total": 0, "offset": offset, "limit": limit})
|
||||
|
||||
summaries = []
|
||||
for session_id, state_path in all_sessions:
|
||||
data = _read_session_json(state_path)
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
session_status = data.get("status", "")
|
||||
if status and session_status != status:
|
||||
continue
|
||||
|
||||
timestamps = data.get("timestamps", {})
|
||||
progress = data.get("progress", {})
|
||||
checkpoint_dir = state_path.parent / "checkpoints"
|
||||
|
||||
summaries.append(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"status": session_status,
|
||||
"goal_id": data.get("goal_id", ""),
|
||||
"started_at": timestamps.get("started_at", ""),
|
||||
"updated_at": timestamps.get("updated_at", ""),
|
||||
"completed_at": timestamps.get("completed_at"),
|
||||
"is_resumable": data.get("is_resumable", False),
|
||||
"is_resumable_from_checkpoint": data.get("is_resumable_from_checkpoint", False),
|
||||
"current_node": progress.get("current_node"),
|
||||
"paused_at": progress.get("paused_at"),
|
||||
"steps_executed": progress.get("steps_executed", 0),
|
||||
"execution_quality": progress.get("execution_quality", ""),
|
||||
"has_checkpoints": checkpoint_dir.exists()
|
||||
and any(checkpoint_dir.glob("cp_*.json")),
|
||||
}
|
||||
)
|
||||
|
||||
total = len(summaries)
|
||||
page = summaries[offset : offset + limit]
|
||||
return json.dumps(
|
||||
{"sessions": page, "total": total, "offset": offset, "limit": limit}, indent=2
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_agent_session_state(
|
||||
agent_work_dir: Annotated[str, "Path to the agent's working directory"],
|
||||
session_id: Annotated[str, "The session ID (e.g., 'session_20260208_143022_abc12345')"],
|
||||
) -> str:
|
||||
"""
|
||||
Load full session state for a specific session.
|
||||
|
||||
Returns complete session data including status, progress, result,
|
||||
metrics, and checkpoint info. Memory values are excluded to prevent
|
||||
context bloat -- use get_agent_session_memory to retrieve memory contents.
|
||||
"""
|
||||
state_path = Path(agent_work_dir) / "sessions" / session_id / "state.json"
|
||||
data = _read_session_json(state_path)
|
||||
if data is None:
|
||||
return json.dumps({"error": f"Session not found: {session_id}"})
|
||||
|
||||
memory = data.get("memory", {})
|
||||
data["memory_keys"] = list(memory.keys()) if isinstance(memory, dict) else []
|
||||
data["memory_size"] = len(memory) if isinstance(memory, dict) else 0
|
||||
data.pop("memory", None)
|
||||
|
||||
return json.dumps(data, indent=2, default=str)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_agent_session_memory(
|
||||
agent_work_dir: Annotated[str, "Path to the agent's working directory"],
|
||||
session_id: Annotated[str, "The session ID"],
|
||||
key: Annotated[str, "Specific memory key to retrieve. Empty for all."] = "",
|
||||
) -> str:
|
||||
"""
|
||||
Get memory contents from a session.
|
||||
|
||||
Memory stores intermediate results passed between nodes. Use this
|
||||
to inspect what data was produced during execution.
|
||||
|
||||
If key is provided, returns only that memory key's value.
|
||||
If key is empty, returns all memory keys and their values.
|
||||
"""
|
||||
state_path = Path(agent_work_dir) / "sessions" / session_id / "state.json"
|
||||
data = _read_session_json(state_path)
|
||||
if data is None:
|
||||
return json.dumps({"error": f"Session not found: {session_id}"})
|
||||
|
||||
memory = data.get("memory", {})
|
||||
if not isinstance(memory, dict):
|
||||
memory = {}
|
||||
|
||||
if key:
|
||||
if key not in memory:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": f"Memory key not found: '{key}'",
|
||||
"available_keys": list(memory.keys()),
|
||||
}
|
||||
)
|
||||
value = memory[key]
|
||||
return json.dumps(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"key": key,
|
||||
"value": value,
|
||||
"value_type": type(value).__name__,
|
||||
},
|
||||
indent=2,
|
||||
default=str,
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{"session_id": session_id, "memory": memory, "total_keys": len(memory)},
|
||||
indent=2,
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def list_agent_checkpoints(
|
||||
agent_work_dir: Annotated[str, "Path to the agent's working directory"],
|
||||
session_id: Annotated[str, "The session ID to list checkpoints for"],
|
||||
checkpoint_type: Annotated[
|
||||
str,
|
||||
"Filter by type: 'node_start', 'node_complete', 'loop_iteration'. Empty for all.",
|
||||
] = "",
|
||||
is_clean: Annotated[str, "Filter by clean status: 'true', 'false', or empty for all."] = "",
|
||||
) -> str:
|
||||
"""
|
||||
List checkpoints for a specific session.
|
||||
|
||||
Checkpoints capture execution state at node boundaries for
|
||||
crash recovery and resume. Use with get_agent_checkpoint for
|
||||
detailed checkpoint inspection.
|
||||
"""
|
||||
session_dir = Path(agent_work_dir) / "sessions" / session_id
|
||||
checkpoint_dir = session_dir / "checkpoints"
|
||||
|
||||
if not session_dir.exists():
|
||||
return json.dumps({"error": f"Session not found: {session_id}"})
|
||||
|
||||
if not checkpoint_dir.exists():
|
||||
return json.dumps(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"checkpoints": [],
|
||||
"total": 0,
|
||||
"latest_checkpoint_id": None,
|
||||
}
|
||||
)
|
||||
|
||||
# Try index.json first
|
||||
index_data = _read_session_json(checkpoint_dir / "index.json")
|
||||
if index_data and "checkpoints" in index_data:
|
||||
checkpoints = index_data["checkpoints"]
|
||||
else:
|
||||
# Fallback: scan individual checkpoint files
|
||||
checkpoints = []
|
||||
for cp_file in sorted(checkpoint_dir.glob("cp_*.json")):
|
||||
cp_data = _read_session_json(cp_file)
|
||||
if cp_data:
|
||||
checkpoints.append(
|
||||
{
|
||||
"checkpoint_id": cp_data.get("checkpoint_id", cp_file.stem),
|
||||
"checkpoint_type": cp_data.get("checkpoint_type", ""),
|
||||
"created_at": cp_data.get("created_at", ""),
|
||||
"current_node": cp_data.get("current_node"),
|
||||
"next_node": cp_data.get("next_node"),
|
||||
"is_clean": cp_data.get("is_clean", True),
|
||||
"description": cp_data.get("description", ""),
|
||||
}
|
||||
)
|
||||
|
||||
# Apply filters
|
||||
if checkpoint_type:
|
||||
checkpoints = [c for c in checkpoints if c.get("checkpoint_type") == checkpoint_type]
|
||||
if is_clean:
|
||||
clean_val = is_clean.lower() == "true"
|
||||
checkpoints = [c for c in checkpoints if c.get("is_clean") == clean_val]
|
||||
|
||||
latest_id = None
|
||||
if index_data:
|
||||
latest_id = index_data.get("latest_checkpoint_id")
|
||||
elif checkpoints:
|
||||
latest_id = checkpoints[-1].get("checkpoint_id")
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"checkpoints": checkpoints,
|
||||
"total": len(checkpoints),
|
||||
"latest_checkpoint_id": latest_id,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_agent_checkpoint(
|
||||
agent_work_dir: Annotated[str, "Path to the agent's working directory"],
|
||||
session_id: Annotated[str, "The session ID"],
|
||||
checkpoint_id: Annotated[str, "Specific checkpoint ID, or empty for latest"] = "",
|
||||
) -> str:
|
||||
"""
|
||||
Load a specific checkpoint with full state data.
|
||||
|
||||
Returns the complete checkpoint including shared memory snapshot,
|
||||
execution path, accumulated outputs, and metrics. If checkpoint_id
|
||||
is empty, loads the latest checkpoint.
|
||||
"""
|
||||
session_dir = Path(agent_work_dir) / "sessions" / session_id
|
||||
checkpoint_dir = session_dir / "checkpoints"
|
||||
|
||||
if not checkpoint_dir.exists():
|
||||
return json.dumps({"error": f"No checkpoints found for session: {session_id}"})
|
||||
|
||||
if not checkpoint_id:
|
||||
index_data = _read_session_json(checkpoint_dir / "index.json")
|
||||
if index_data and index_data.get("latest_checkpoint_id"):
|
||||
checkpoint_id = index_data["latest_checkpoint_id"]
|
||||
else:
|
||||
cp_files = sorted(checkpoint_dir.glob("cp_*.json"))
|
||||
if not cp_files:
|
||||
return json.dumps({"error": f"No checkpoints found for session: {session_id}"})
|
||||
checkpoint_id = cp_files[-1].stem
|
||||
|
||||
cp_path = checkpoint_dir / f"{checkpoint_id}.json"
|
||||
data = _read_session_json(cp_path)
|
||||
if data is None:
|
||||
return json.dumps({"error": f"Checkpoint not found: {checkpoint_id}"})
|
||||
|
||||
return json.dumps(data, indent=2, default=str)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def compare_agent_checkpoints(
|
||||
agent_work_dir: Annotated[str, "Path to the agent's working directory"],
|
||||
session_id: Annotated[str, "The session ID"],
|
||||
checkpoint_id_before: Annotated[str, "The earlier checkpoint ID"],
|
||||
checkpoint_id_after: Annotated[str, "The later checkpoint ID"],
|
||||
) -> str:
|
||||
"""
|
||||
Compare memory state between two checkpoints.
|
||||
|
||||
Shows what memory keys were added, removed, or changed between
|
||||
two points in execution. Useful for understanding how data flows
|
||||
through the agent graph.
|
||||
"""
|
||||
checkpoint_dir = Path(agent_work_dir) / "sessions" / session_id / "checkpoints"
|
||||
|
||||
before = _read_session_json(checkpoint_dir / f"{checkpoint_id_before}.json")
|
||||
if before is None:
|
||||
return json.dumps({"error": f"Checkpoint not found: {checkpoint_id_before}"})
|
||||
|
||||
after = _read_session_json(checkpoint_dir / f"{checkpoint_id_after}.json")
|
||||
if after is None:
|
||||
return json.dumps({"error": f"Checkpoint not found: {checkpoint_id_after}"})
|
||||
|
||||
mem_before = before.get("shared_memory", {})
|
||||
mem_after = after.get("shared_memory", {})
|
||||
|
||||
keys_before = set(mem_before.keys())
|
||||
keys_after = set(mem_after.keys())
|
||||
|
||||
added = {k: _truncate_value(mem_after[k]) for k in keys_after - keys_before}
|
||||
removed = list(keys_before - keys_after)
|
||||
unchanged = []
|
||||
changed = {}
|
||||
|
||||
for k in keys_before & keys_after:
|
||||
if mem_before[k] == mem_after[k]:
|
||||
unchanged.append(k)
|
||||
else:
|
||||
changed[k] = {
|
||||
"before": _truncate_value(mem_before[k]),
|
||||
"after": _truncate_value(mem_after[k]),
|
||||
}
|
||||
|
||||
path_before = before.get("execution_path", [])
|
||||
path_after = after.get("execution_path", [])
|
||||
new_nodes = path_after[len(path_before) :]
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"before": {
|
||||
"checkpoint_id": checkpoint_id_before,
|
||||
"current_node": before.get("current_node"),
|
||||
"created_at": before.get("created_at", ""),
|
||||
},
|
||||
"after": {
|
||||
"checkpoint_id": checkpoint_id_after,
|
||||
"current_node": after.get("current_node"),
|
||||
"created_at": after.get("created_at", ""),
|
||||
},
|
||||
"memory_diff": {
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"changed": changed,
|
||||
"unchanged": unchanged,
|
||||
},
|
||||
"execution_path_diff": {
|
||||
"new_nodes": new_nodes,
|
||||
"path_before": path_before,
|
||||
"path_after": path_after,
|
||||
},
|
||||
},
|
||||
indent=2,
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MAIN
|
||||
# =============================================================================
|
||||
|
||||
@@ -332,6 +332,60 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
resume_parser.set_defaults(func=cmd_resume)
|
||||
|
||||
|
||||
def _load_resume_state(
|
||||
agent_path: str, session_id: str, checkpoint_id: str | None = None
|
||||
) -> dict | None:
|
||||
"""Load session or checkpoint state for headless resume.
|
||||
|
||||
Args:
|
||||
agent_path: Path to the agent folder (e.g., exports/my_agent)
|
||||
session_id: Session ID to resume from
|
||||
checkpoint_id: Optional checkpoint ID within the session
|
||||
|
||||
Returns:
|
||||
session_state dict for executor, or None if not found
|
||||
"""
|
||||
agent_name = Path(agent_path).name
|
||||
agent_work_dir = Path.home() / ".hive" / "agents" / agent_name
|
||||
session_dir = agent_work_dir / "sessions" / session_id
|
||||
|
||||
if not session_dir.exists():
|
||||
return None
|
||||
|
||||
if checkpoint_id:
|
||||
# Checkpoint-based resume: load checkpoint and extract state
|
||||
cp_path = session_dir / "checkpoints" / f"{checkpoint_id}.json"
|
||||
if not cp_path.exists():
|
||||
return None
|
||||
try:
|
||||
cp_data = json.loads(cp_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
return {
|
||||
"memory": cp_data.get("shared_memory", {}),
|
||||
"paused_at": cp_data.get("next_node") or cp_data.get("current_node"),
|
||||
"execution_path": cp_data.get("execution_path", []),
|
||||
"node_visit_counts": {},
|
||||
}
|
||||
else:
|
||||
# Session state resume: load state.json
|
||||
state_path = session_dir / "state.json"
|
||||
if not state_path.exists():
|
||||
return None
|
||||
try:
|
||||
state_data = json.loads(state_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
progress = state_data.get("progress", {})
|
||||
paused_at = progress.get("paused_at") or progress.get("resume_from")
|
||||
return {
|
||||
"memory": state_data.get("memory", {}),
|
||||
"paused_at": paused_at,
|
||||
"execution_path": progress.get("path", []),
|
||||
"node_visit_counts": progress.get("node_visit_counts", {}),
|
||||
}
|
||||
|
||||
|
||||
def cmd_run(args: argparse.Namespace) -> int:
|
||||
"""Run an exported agent."""
|
||||
import logging
|
||||
@@ -375,7 +429,6 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
runner = AgentRunner.load(
|
||||
args.agent_path,
|
||||
model=args.model,
|
||||
enable_tui=True,
|
||||
)
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
@@ -419,7 +472,6 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
runner = AgentRunner.load(
|
||||
args.agent_path,
|
||||
model=args.model,
|
||||
enable_tui=False,
|
||||
)
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
@@ -428,6 +480,27 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Load session/checkpoint state for resume (headless mode)
|
||||
session_state = None
|
||||
resume_session = getattr(args, "resume_session", None)
|
||||
checkpoint = getattr(args, "checkpoint", None)
|
||||
if resume_session:
|
||||
session_state = _load_resume_state(args.agent_path, resume_session, checkpoint)
|
||||
if session_state is None:
|
||||
print(
|
||||
f"Error: Could not load session state for {resume_session}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
if not args.quiet:
|
||||
resume_node = session_state.get("paused_at", "unknown")
|
||||
if checkpoint:
|
||||
print(f"Resuming from checkpoint: {checkpoint}")
|
||||
else:
|
||||
print(f"Resuming session: {resume_session}")
|
||||
print(f"Resume point: {resume_node}")
|
||||
print()
|
||||
|
||||
# Auto-inject user_id if the agent expects it but it's not provided
|
||||
entry_input_keys = runner.graph.nodes[0].input_keys if runner.graph.nodes else []
|
||||
if "user_id" in entry_input_keys and context.get("user_id") is None:
|
||||
@@ -447,7 +520,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
result = asyncio.run(runner.run(context))
|
||||
result = asyncio.run(runner.run(context, session_state=session_state))
|
||||
|
||||
# Format output
|
||||
output = {
|
||||
@@ -1205,7 +1278,6 @@ def cmd_tui(args: argparse.Namespace) -> int:
|
||||
runner = AgentRunner.load(
|
||||
agent_path,
|
||||
model=args.model,
|
||||
enable_tui=True,
|
||||
)
|
||||
except CredentialError as e:
|
||||
print(f"\n{e}", file=sys.stderr)
|
||||
|
||||
+70
-114
@@ -17,17 +17,13 @@ from framework.graph.edge import (
|
||||
EdgeSpec,
|
||||
GraphSpec,
|
||||
)
|
||||
from framework.graph.executor import ExecutionResult, GraphExecutor
|
||||
from framework.graph.executor import ExecutionResult
|
||||
from framework.graph.node import NodeSpec
|
||||
from framework.llm.provider import LLMProvider, Tool
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
|
||||
# Multi-entry-point runtime imports
|
||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.core import Runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
||||
from framework.runtime.runtime_logger import RuntimeLogger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runner.protocol import AgentMessage, CapabilityResponse
|
||||
@@ -271,7 +267,6 @@ class AgentRunner:
|
||||
mock_mode: bool = False,
|
||||
storage_path: Path | None = None,
|
||||
model: str | None = None,
|
||||
enable_tui: bool = False,
|
||||
intro_message: str = "",
|
||||
):
|
||||
"""
|
||||
@@ -284,7 +279,6 @@ class AgentRunner:
|
||||
mock_mode: If True, use mock LLM responses
|
||||
storage_path: Path for runtime storage (defaults to temp)
|
||||
model: Model to use (reads from agent config or ~/.hive/configuration.json if None)
|
||||
enable_tui: If True, forces use of AgentRuntime with EventBus
|
||||
intro_message: Optional greeting shown to user on TUI load
|
||||
"""
|
||||
self.agent_path = agent_path
|
||||
@@ -292,7 +286,6 @@ class AgentRunner:
|
||||
self.goal = goal
|
||||
self.mock_mode = mock_mode
|
||||
self.model = model or self._resolve_default_model()
|
||||
self.enable_tui = enable_tui
|
||||
self.intro_message = intro_message
|
||||
|
||||
# Set up storage
|
||||
@@ -313,12 +306,10 @@ class AgentRunner:
|
||||
|
||||
# Initialize components
|
||||
self._tool_registry = ToolRegistry()
|
||||
self._runtime: Runtime | None = None
|
||||
self._llm: LLMProvider | None = None
|
||||
self._executor: GraphExecutor | None = None
|
||||
self._approval_callback: Callable | None = None
|
||||
|
||||
# Multi-entry-point support (AgentRuntime)
|
||||
# AgentRuntime — unified execution path for all agents
|
||||
self._agent_runtime: AgentRuntime | None = None
|
||||
self._uses_async_entry_points = self.graph.has_async_entry_points()
|
||||
|
||||
@@ -466,7 +457,6 @@ class AgentRunner:
|
||||
mock_mode: bool = False,
|
||||
storage_path: Path | None = None,
|
||||
model: str | None = None,
|
||||
enable_tui: bool = False,
|
||||
) -> "AgentRunner":
|
||||
"""
|
||||
Load an agent from an export folder.
|
||||
@@ -480,7 +470,6 @@ class AgentRunner:
|
||||
mock_mode: If True, use mock LLM responses
|
||||
storage_path: Path for runtime storage (defaults to ~/.hive/agents/{name})
|
||||
model: LLM model to use (reads from agent's default_config if None)
|
||||
enable_tui: If True, forces use of AgentRuntime with EventBus
|
||||
|
||||
Returns:
|
||||
AgentRunner instance ready to run
|
||||
@@ -541,7 +530,6 @@ class AgentRunner:
|
||||
mock_mode=mock_mode,
|
||||
storage_path=storage_path,
|
||||
model=model,
|
||||
enable_tui=enable_tui,
|
||||
intro_message=intro_message,
|
||||
)
|
||||
|
||||
@@ -560,7 +548,6 @@ class AgentRunner:
|
||||
mock_mode=mock_mode,
|
||||
storage_path=storage_path,
|
||||
model=model,
|
||||
enable_tui=enable_tui,
|
||||
)
|
||||
|
||||
def register_tool(
|
||||
@@ -650,9 +637,6 @@ class AgentRunner:
|
||||
callback: Function to call for approval (receives node info, returns bool)
|
||||
"""
|
||||
self._approval_callback = callback
|
||||
# If executor already exists, update it
|
||||
if self._executor is not None:
|
||||
self._executor.approval_callback = callback
|
||||
|
||||
def _setup(self) -> None:
|
||||
"""Set up runtime, LLM, and executor."""
|
||||
@@ -717,16 +701,11 @@ class AgentRunner:
|
||||
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
|
||||
print(f"Set it with: export {api_key_env}=your-api-key")
|
||||
|
||||
# Get tools for executor/runtime
|
||||
# Get tools for runtime
|
||||
tools = list(self._tool_registry.get_tools().values())
|
||||
tool_executor = self._tool_registry.get_executor()
|
||||
|
||||
if self._uses_async_entry_points or self.enable_tui:
|
||||
# Multi-entry-point mode or TUI mode: use AgentRuntime
|
||||
self._setup_agent_runtime(tools, tool_executor)
|
||||
else:
|
||||
# Single-entry-point mode: use legacy GraphExecutor
|
||||
self._setup_legacy_executor(tools, tool_executor)
|
||||
|
||||
def _get_api_key_env_var(self, model: str) -> str | None:
|
||||
"""Get the environment variable name for the API key based on model name."""
|
||||
@@ -741,7 +720,7 @@ class AgentRunner:
|
||||
elif model_lower.startswith("anthropic/") or model_lower.startswith("claude"):
|
||||
return "ANTHROPIC_API_KEY"
|
||||
elif model_lower.startswith("gemini/") or model_lower.startswith("google/"):
|
||||
return "GOOGLE_API_KEY"
|
||||
return "GEMINI_API_KEY"
|
||||
elif model_lower.startswith("mistral/"):
|
||||
return "MISTRAL_API_KEY"
|
||||
elif model_lower.startswith("groq/"):
|
||||
@@ -787,26 +766,6 @@ class AgentRunner:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _setup_legacy_executor(self, tools: list, tool_executor: Callable | None) -> None:
|
||||
"""Set up legacy single-entry-point execution using GraphExecutor."""
|
||||
# Create runtime
|
||||
self._runtime = Runtime(storage_path=self._storage_path)
|
||||
|
||||
# Create runtime logger
|
||||
log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
|
||||
runtime_logger = RuntimeLogger(store=log_store, agent_id=self.graph.id)
|
||||
|
||||
# Create executor
|
||||
self._executor = GraphExecutor(
|
||||
runtime=self._runtime,
|
||||
llm=self._llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
approval_callback=self._approval_callback,
|
||||
runtime_logger=runtime_logger,
|
||||
loop_config=self.graph.loop_config,
|
||||
)
|
||||
|
||||
def _setup_agent_runtime(self, tools: list, tool_executor: Callable | None) -> None:
|
||||
"""Set up multi-entry-point execution using AgentRuntime."""
|
||||
# Convert AsyncEntryPointSpec to EntryPointSpec for AgentRuntime
|
||||
@@ -824,9 +783,9 @@ class AgentRunner:
|
||||
)
|
||||
entry_points.append(ep)
|
||||
|
||||
# If TUI enabled but no entry points (single-entry agent), create default
|
||||
if not entry_points and self.enable_tui and self.graph.entry_node:
|
||||
logger.info("Creating default entry point for TUI")
|
||||
# Single-entry agent with no async entry points: create a default entry point
|
||||
if not entry_points and self.graph.entry_node:
|
||||
logger.info("Creating default entry point for single-entry agent")
|
||||
entry_points.append(
|
||||
EntryPointSpec(
|
||||
id="default",
|
||||
@@ -905,32 +864,9 @@ class AgentRunner:
|
||||
error=error_msg,
|
||||
)
|
||||
|
||||
if self._uses_async_entry_points or self.enable_tui:
|
||||
# Multi-entry-point mode: use AgentRuntime
|
||||
return await self._run_with_agent_runtime(
|
||||
input_data=input_data or {},
|
||||
entry_point_id=entry_point_id,
|
||||
)
|
||||
else:
|
||||
# Legacy single-entry-point mode
|
||||
return await self._run_with_executor(
|
||||
input_data=input_data or {},
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
async def _run_with_executor(
|
||||
self,
|
||||
input_data: dict,
|
||||
session_state: dict | None = None,
|
||||
) -> ExecutionResult:
|
||||
"""Run using legacy GraphExecutor (single entry point)."""
|
||||
if self._executor is None:
|
||||
self._setup()
|
||||
|
||||
return await self._executor.execute(
|
||||
graph=self.graph,
|
||||
goal=self.goal,
|
||||
input_data=input_data,
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
@@ -938,8 +874,11 @@ class AgentRunner:
|
||||
self,
|
||||
input_data: dict,
|
||||
entry_point_id: str | None = None,
|
||||
session_state: dict | None = None,
|
||||
) -> ExecutionResult:
|
||||
"""Run using AgentRuntime (multi-entry-point)."""
|
||||
"""Run using AgentRuntime."""
|
||||
import sys
|
||||
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
@@ -947,6 +886,52 @@ class AgentRunner:
|
||||
if not self._agent_runtime.is_running:
|
||||
await self._agent_runtime.start()
|
||||
|
||||
# Set up stdin-based I/O for client-facing nodes in headless mode.
|
||||
# When a client_facing EventLoopNode calls ask_user(), it emits
|
||||
# CLIENT_INPUT_REQUESTED on the event bus and blocks. We subscribe
|
||||
# a handler that prints the prompt and reads from stdin, then injects
|
||||
# the user's response back into the node to unblock it.
|
||||
has_client_facing = any(n.client_facing for n in self.graph.nodes)
|
||||
sub_ids: list[str] = []
|
||||
|
||||
if has_client_facing and sys.stdin.isatty():
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
runtime = self._agent_runtime
|
||||
|
||||
async def _handle_client_output(event):
|
||||
"""Print agent output to stdout as it streams."""
|
||||
content = event.data.get("content", "")
|
||||
if content:
|
||||
print(content, end="", flush=True)
|
||||
|
||||
async def _handle_input_requested(event):
|
||||
"""Read user input from stdin and inject it into the node."""
|
||||
import asyncio
|
||||
|
||||
node_id = event.node_id
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
user_input = await loop.run_in_executor(None, input, "\n>>> ")
|
||||
except EOFError:
|
||||
user_input = ""
|
||||
|
||||
# Inject into the waiting EventLoopNode via runtime
|
||||
await runtime.inject_input(node_id, user_input)
|
||||
|
||||
sub_ids.append(
|
||||
runtime.subscribe_to_events(
|
||||
event_types=[EventType.CLIENT_OUTPUT_DELTA],
|
||||
handler=_handle_client_output,
|
||||
)
|
||||
)
|
||||
sub_ids.append(
|
||||
runtime.subscribe_to_events(
|
||||
event_types=[EventType.CLIENT_INPUT_REQUESTED],
|
||||
handler=_handle_input_requested,
|
||||
)
|
||||
)
|
||||
|
||||
# Determine entry point
|
||||
if entry_point_id is None:
|
||||
# Use first entry point or "default" if no entry points defined
|
||||
@@ -956,10 +941,12 @@ class AgentRunner:
|
||||
else:
|
||||
entry_point_id = "default"
|
||||
|
||||
try:
|
||||
# Trigger and wait for result
|
||||
result = await self._agent_runtime.trigger_and_wait(
|
||||
entry_point_id=entry_point_id,
|
||||
input_data=input_data,
|
||||
session_state=session_state,
|
||||
)
|
||||
|
||||
# Return result or create error result
|
||||
@@ -970,30 +957,22 @@ class AgentRunner:
|
||||
success=False,
|
||||
error="Execution timed out or failed to complete",
|
||||
)
|
||||
finally:
|
||||
# Clean up subscriptions
|
||||
for sub_id in sub_ids:
|
||||
self._agent_runtime.unsubscribe_from_events(sub_id)
|
||||
|
||||
# === Multi-Entry-Point API (for agents with async_entry_points) ===
|
||||
# === Runtime API ===
|
||||
|
||||
async def start(self) -> None:
|
||||
"""
|
||||
Start the agent runtime (for multi-entry-point agents).
|
||||
|
||||
This starts all registered entry points and allows concurrent execution.
|
||||
For single-entry-point agents, this is a no-op.
|
||||
"""
|
||||
if not self._uses_async_entry_points:
|
||||
return
|
||||
|
||||
"""Start the agent runtime."""
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
await self._agent_runtime.start()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""
|
||||
Stop the agent runtime (for multi-entry-point agents).
|
||||
|
||||
For single-entry-point agents, this is a no-op.
|
||||
"""
|
||||
"""Stop the agent runtime."""
|
||||
if self._agent_runtime is not None:
|
||||
await self._agent_runtime.stop()
|
||||
|
||||
@@ -1006,7 +985,7 @@ class AgentRunner:
|
||||
"""
|
||||
Trigger execution at a specific entry point (non-blocking).
|
||||
|
||||
For multi-entry-point agents only. Returns execution ID for tracking.
|
||||
Returns execution ID for tracking.
|
||||
|
||||
Args:
|
||||
entry_point_id: Which entry point to trigger
|
||||
@@ -1015,16 +994,7 @@ class AgentRunner:
|
||||
|
||||
Returns:
|
||||
Execution ID for tracking
|
||||
|
||||
Raises:
|
||||
RuntimeError: If agent doesn't use async entry points
|
||||
"""
|
||||
if not self._uses_async_entry_points:
|
||||
raise RuntimeError(
|
||||
"trigger() is only available for multi-entry-point agents. "
|
||||
"Use run() for single-entry-point agents."
|
||||
)
|
||||
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
@@ -1041,19 +1011,9 @@ class AgentRunner:
|
||||
"""
|
||||
Get goal progress across all execution streams.
|
||||
|
||||
For multi-entry-point agents only.
|
||||
|
||||
Returns:
|
||||
Dict with overall_progress, criteria_status, constraint_violations, etc.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If agent doesn't use async entry points
|
||||
"""
|
||||
if not self._uses_async_entry_points:
|
||||
raise RuntimeError(
|
||||
"get_goal_progress() is only available for multi-entry-point agents."
|
||||
)
|
||||
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
@@ -1061,14 +1021,11 @@ class AgentRunner:
|
||||
|
||||
def get_entry_points(self) -> list[EntryPointSpec]:
|
||||
"""
|
||||
Get all registered entry points (for multi-entry-point agents).
|
||||
Get all registered entry points.
|
||||
|
||||
Returns:
|
||||
List of EntryPointSpec objects
|
||||
"""
|
||||
if not self._uses_async_entry_points:
|
||||
return []
|
||||
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
@@ -1492,7 +1449,7 @@ Respond with JSON only:
|
||||
self._temp_dir = None
|
||||
|
||||
async def cleanup_async(self) -> None:
|
||||
"""Clean up resources (asynchronous - for multi-entry-point agents)."""
|
||||
"""Clean up resources (asynchronous)."""
|
||||
# Stop agent runtime if running
|
||||
if self._agent_runtime is not None and self._agent_runtime.is_running:
|
||||
await self._agent_runtime.stop()
|
||||
@@ -1503,8 +1460,7 @@ Respond with JSON only:
|
||||
async def __aenter__(self) -> "AgentRunner":
|
||||
"""Context manager entry."""
|
||||
self._setup()
|
||||
# Start runtime for multi-entry-point agents
|
||||
if self._uses_async_entry_points and self._agent_runtime is not None:
|
||||
if self._agent_runtime is not None:
|
||||
await self._agent_runtime.start()
|
||||
return self
|
||||
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
# Agent Runtime
|
||||
|
||||
Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or TUI — runs through the same runtime stack.
|
||||
|
||||
## Topology
|
||||
|
||||
```
|
||||
AgentRunner.load(agent_path)
|
||||
|
|
||||
AgentRunner
|
||||
(factory + public API)
|
||||
|
|
||||
_setup_agent_runtime()
|
||||
|
|
||||
AgentRuntime
|
||||
(lifecycle + orchestration)
|
||||
/ | \
|
||||
Stream A Stream B Stream C ← one per entry point
|
||||
| | |
|
||||
GraphExecutor GraphExecutor GraphExecutor
|
||||
| | |
|
||||
Node → Node → Node (graph traversal)
|
||||
```
|
||||
|
||||
Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
|
||||
|
||||
## Components
|
||||
|
||||
| Component | File | Role |
|
||||
|---|---|---|
|
||||
| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
|
||||
| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
|
||||
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
|
||||
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
|
||||
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
|
||||
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
|
||||
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
|
||||
|
||||
## Programming Interface
|
||||
|
||||
### AgentRunner (high-level)
|
||||
|
||||
```python
|
||||
from framework.runner import AgentRunner
|
||||
|
||||
# Load and run
|
||||
runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
|
||||
result = await runner.run({"query": "hello"})
|
||||
|
||||
# Resume from paused session
|
||||
result = await runner.run({"query": "continue"}, session_state=saved_state)
|
||||
|
||||
# Lifecycle
|
||||
await runner.start() # Start the runtime
|
||||
await runner.stop() # Stop the runtime
|
||||
exec_id = await runner.trigger("default", {}) # Non-blocking trigger
|
||||
progress = await runner.get_goal_progress() # Goal evaluation
|
||||
entry_points = runner.get_entry_points() # List entry points
|
||||
|
||||
# Context manager
|
||||
async with AgentRunner.load("exports/my_agent") as runner:
|
||||
result = await runner.run({"query": "hello"})
|
||||
|
||||
# Cleanup
|
||||
runner.cleanup() # Synchronous
|
||||
await runner.cleanup_async() # Asynchronous
|
||||
```
|
||||
|
||||
### AgentRuntime (lower-level)
|
||||
|
||||
```python
|
||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
|
||||
# Create runtime with entry points
|
||||
runtime = create_agent_runtime(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
storage_path=Path("~/.hive/agents/my_agent"),
|
||||
entry_points=[
|
||||
EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
|
||||
],
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
tool_executor=tool_executor,
|
||||
checkpoint_config=checkpoint_config,
|
||||
)
|
||||
|
||||
# Lifecycle
|
||||
await runtime.start()
|
||||
await runtime.stop()
|
||||
|
||||
# Execution
|
||||
exec_id = await runtime.trigger("default", {"query": "hello"}) # Non-blocking
|
||||
result = await runtime.trigger_and_wait("default", {"query": "hello"}) # Blocking
|
||||
result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
|
||||
|
||||
# Client-facing node I/O
|
||||
await runtime.inject_input(node_id="chat", content="user response")
|
||||
|
||||
# Events
|
||||
sub_id = runtime.subscribe_to_events(
|
||||
event_types=[EventType.CLIENT_OUTPUT_DELTA],
|
||||
handler=my_handler,
|
||||
)
|
||||
runtime.unsubscribe_from_events(sub_id)
|
||||
|
||||
# Inspection
|
||||
runtime.is_running # bool
|
||||
runtime.event_bus # EventBus
|
||||
runtime.state_manager # SharedStateManager
|
||||
runtime.get_stats() # Runtime statistics
|
||||
```
|
||||
|
||||
## Execution Flow
|
||||
|
||||
1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
|
||||
2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
|
||||
3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
|
||||
4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
|
||||
5. `ExecutionResult` flows back up through the stack
|
||||
6. `ExecutionStream` writes session state to disk
|
||||
|
||||
## Session Resume
|
||||
|
||||
All execution paths support session resume:
|
||||
|
||||
```python
|
||||
# First run (agent pauses at a client-facing node)
|
||||
result = await runner.run({"query": "start task"})
|
||||
# result.paused_at = "review-node"
|
||||
# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
|
||||
|
||||
# Resume
|
||||
result = await runner.run({"input": "approved"}, session_state=result.session_state)
|
||||
```
|
||||
|
||||
Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
|
||||
|
||||
Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
|
||||
|
||||
## Event Bus
|
||||
|
||||
The `EventBus` provides real-time execution visibility:
|
||||
|
||||
| Event | When |
|
||||
|---|---|
|
||||
| `NODE_STARTED` | Node begins execution |
|
||||
| `NODE_COMPLETED` | Node finishes |
|
||||
| `TOOL_CALL_STARTED` | Tool invocation begins |
|
||||
| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
|
||||
| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
|
||||
| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
|
||||
| `EXECUTION_COMPLETED` | Full execution finishes |
|
||||
|
||||
In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. In TUI mode, `AdenTUI` subscribes to route events to UI widgets.
|
||||
|
||||
## Storage Layout
|
||||
|
||||
```
|
||||
~/.hive/agents/{agent_name}/
|
||||
sessions/
|
||||
session_YYYYMMDD_HHMMSS_{uuid}/
|
||||
state.json # Session state (status, memory, progress)
|
||||
checkpoints/ # Node-boundary snapshots
|
||||
logs/
|
||||
summary.json # Execution summary
|
||||
details.jsonl # Detailed event log
|
||||
tool_logs.jsonl # Tool call log
|
||||
runtime_logs/ # Cross-session runtime logs
|
||||
```
|
||||
@@ -3,6 +3,10 @@ Pytest templates for test file generation.
|
||||
|
||||
These templates provide headers and fixtures for pytest-compatible async tests.
|
||||
Tests are written to exports/{agent}/tests/ as Python files and run with pytest.
|
||||
|
||||
Tests use AgentRunner.load() — the canonical runtime path — which creates
|
||||
AgentRuntime, ExecutionStream, and proper session/log storage. For agents
|
||||
with client-facing nodes, an auto_responder fixture handles input injection.
|
||||
"""
|
||||
|
||||
# Template for the test file header (imports and fixtures)
|
||||
@@ -11,17 +15,19 @@ PYTEST_TEST_FILE_HEADER = '''"""
|
||||
|
||||
{description}
|
||||
|
||||
REQUIRES: API_KEY (OpenAI or Anthropic) for real testing.
|
||||
REQUIRES: API_KEY for execution tests. Structure tests run without keys.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from {agent_module} import default_agent
|
||||
from pathlib import Path
|
||||
|
||||
# Agent path resolved from this test file's location
|
||||
AGENT_PATH = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def _get_api_key():
|
||||
"""Get API key from CredentialStoreAdapter or environment."""
|
||||
# 1. Try CredentialStoreAdapter for Anthropic
|
||||
try:
|
||||
from aden_tools.credentials import CredentialStoreAdapter
|
||||
creds = CredentialStoreAdapter.default()
|
||||
@@ -29,28 +35,43 @@ def _get_api_key():
|
||||
return creds.get("anthropic")
|
||||
except (ImportError, KeyError):
|
||||
pass
|
||||
|
||||
# 2. Fallback to standard environment variables for OpenAI and others
|
||||
return (
|
||||
os.environ.get("OPENAI_API_KEY") or
|
||||
os.environ.get("ANTHROPIC_API_KEY") or
|
||||
os.environ.get("CEREBRAS_API_KEY") or
|
||||
os.environ.get("GROQ_API_KEY")
|
||||
os.environ.get("GROQ_API_KEY") or
|
||||
os.environ.get("GEMINI_API_KEY")
|
||||
)
|
||||
|
||||
|
||||
# Skip all tests if no API key and not in mock mode
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not _get_api_key() and not os.environ.get("MOCK_MODE"),
|
||||
reason="API key required. Please set OPENAI_API_KEY, ANTHROPIC_API_KEY, or use MOCK_MODE=1."
|
||||
reason="API key required. Set ANTHROPIC_API_KEY or use MOCK_MODE=1 for structure tests."
|
||||
)
|
||||
'''
|
||||
|
||||
# Template for conftest.py with shared fixtures
|
||||
PYTEST_CONFTEST_TEMPLATE = '''"""Shared test fixtures for {agent_name} tests."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add exports/ and core/ to sys.path so the agent package and framework are importable
|
||||
_repo_root = Path(__file__).resolve().parents[3]
|
||||
for _p in ["exports", "core"]:
|
||||
_path = str(_repo_root / _p)
|
||||
if _path not in sys.path:
|
||||
sys.path.insert(0, _path)
|
||||
|
||||
import pytest
|
||||
from framework.runner.runner import AgentRunner
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
AGENT_PATH = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def _get_api_key():
|
||||
@@ -62,19 +83,80 @@ def _get_api_key():
|
||||
return creds.get("anthropic")
|
||||
except (ImportError, KeyError):
|
||||
pass
|
||||
|
||||
return (
|
||||
os.environ.get("OPENAI_API_KEY") or
|
||||
os.environ.get("ANTHROPIC_API_KEY") or
|
||||
os.environ.get("CEREBRAS_API_KEY") or
|
||||
os.environ.get("GROQ_API_KEY")
|
||||
os.environ.get("GROQ_API_KEY") or
|
||||
os.environ.get("GEMINI_API_KEY")
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_mode():
|
||||
"""Check if running in mock mode."""
|
||||
return bool(os.environ.get("MOCK_MODE"))
|
||||
"""Return True if running in mock mode (no API key or MOCK_MODE=1)."""
|
||||
if os.environ.get("MOCK_MODE"):
|
||||
return True
|
||||
return not bool(_get_api_key())
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
async def runner(tmp_path_factory, mock_mode):
|
||||
"""Create an AgentRunner using the canonical runtime path.
|
||||
|
||||
Uses tmp_path_factory for storage so tests don't pollute ~/.hive/agents/.
|
||||
Goes through AgentRunner.load() -> _setup() -> AgentRuntime, the same
|
||||
path as ``hive run``.
|
||||
"""
|
||||
storage = tmp_path_factory.mktemp("agent_storage")
|
||||
r = AgentRunner.load(
|
||||
AGENT_PATH,
|
||||
mock_mode=mock_mode,
|
||||
storage_path=storage,
|
||||
)
|
||||
r._setup()
|
||||
yield r
|
||||
await r.cleanup_async()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auto_responder(runner):
|
||||
"""Auto-respond to client-facing node input requests.
|
||||
|
||||
Subscribes to CLIENT_INPUT_REQUESTED events and injects a response
|
||||
to unblock the node. Customize the response before calling start():
|
||||
|
||||
auto_responder.response = "approve the report"
|
||||
await auto_responder.start()
|
||||
"""
|
||||
class AutoResponder:
|
||||
def __init__(self, runner_instance):
|
||||
self._runner = runner_instance
|
||||
self.response = "yes, proceed"
|
||||
self.interactions = []
|
||||
self._sub_id = None
|
||||
|
||||
async def start(self):
|
||||
runtime = self._runner._agent_runtime
|
||||
if runtime is None:
|
||||
return
|
||||
|
||||
async def _handle(event):
|
||||
self.interactions.append(event.node_id)
|
||||
await runtime.inject_input(event.node_id, self.response)
|
||||
|
||||
self._sub_id = runtime.subscribe_to_events(
|
||||
event_types=[EventType.CLIENT_INPUT_REQUESTED],
|
||||
handler=_handle,
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
runtime = self._runner._agent_runtime
|
||||
if self._sub_id and runtime:
|
||||
runtime.unsubscribe_from_events(self._sub_id)
|
||||
self._sub_id = None
|
||||
|
||||
return AutoResponder(runner)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@@ -82,19 +164,51 @@ def check_api_key():
|
||||
"""Ensure API key is set for real testing."""
|
||||
if not _get_api_key():
|
||||
if os.environ.get("MOCK_MODE"):
|
||||
print("\\n⚠️ Running in MOCK MODE - structure validation only")
|
||||
print(" This does NOT test LLM behavior or agent quality")
|
||||
print(" Set OPENAI_API_KEY or ANTHROPIC_API_KEY for real testing\\n")
|
||||
print("\\n Running in MOCK MODE - structure validation only")
|
||||
print(" Set ANTHROPIC_API_KEY for real testing\\n")
|
||||
else:
|
||||
pytest.fail(
|
||||
"\\n❌ No API key found!\\n\\n"
|
||||
"Real testing requires an API key. Choose one:\\n"
|
||||
"1. Set OpenAI key:\\n"
|
||||
" export OPENAI_API_KEY='your-key-here'\\n"
|
||||
"2. Set Anthropic key:\\n"
|
||||
" export ANTHROPIC_API_KEY='your-key-here'\\n"
|
||||
"3. Run structure validation only:\\n"
|
||||
" MOCK_MODE=1 pytest exports/{agent_name}/tests/\\n\\n"
|
||||
"Note: Mock mode does NOT validate agent behavior or quality."
|
||||
"\\nNo API key found!\\n"
|
||||
"Set ANTHROPIC_API_KEY or use MOCK_MODE=1 for structure tests.\\n"
|
||||
)
|
||||
|
||||
|
||||
def parse_json_from_output(result, key):
|
||||
"""Parse JSON from agent output (framework may store full LLM response as string)."""
|
||||
val = result.output.get(key, "")
|
||||
if isinstance(val, (dict, list)):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
json_text = re.sub(r"```json\\s*|\\s*```", "", val).strip()
|
||||
try:
|
||||
return json.loads(json_text)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return val
|
||||
return val
|
||||
|
||||
|
||||
def safe_get_nested(result, key_path, default=None):
|
||||
"""Safely get nested value from result.output."""
|
||||
output = result.output or {{}}
|
||||
current = output
|
||||
for key in key_path:
|
||||
if isinstance(current, dict):
|
||||
current = current.get(key)
|
||||
elif isinstance(current, str):
|
||||
try:
|
||||
json_text = re.sub(r"```json\\s*|\\s*```", "", current).strip()
|
||||
parsed = json.loads(json_text)
|
||||
if isinstance(parsed, dict):
|
||||
current = parsed.get(key)
|
||||
else:
|
||||
return default
|
||||
except json.JSONDecodeError:
|
||||
return default
|
||||
else:
|
||||
return default
|
||||
return current if current is not None else default
|
||||
|
||||
|
||||
pytest.parse_json_from_output = parse_json_from_output
|
||||
pytest.safe_get_nested = safe_get_nested
|
||||
'''
|
||||
|
||||
@@ -951,7 +951,7 @@ async def test_client_facing_node_streams_output():
|
||||
config=LoopConfig(max_iterations=5),
|
||||
)
|
||||
|
||||
# Text-only on client_facing no longer blocks (no ask_user called),
|
||||
# Text-only on client_facing does not block (no ask_user called),
|
||||
# so the node completes without needing a shutdown workaround.
|
||||
result = await node.execute(ctx)
|
||||
|
||||
|
||||
@@ -116,6 +116,16 @@ Skills are also available in Cursor. To enable:
|
||||
3. Restart Cursor to load the MCP servers from `.cursor/mcp.json`
|
||||
4. Type `/` in Agent chat and search for skills (e.g., `/hive-create`)
|
||||
|
||||
|
||||
### Opencode Support
|
||||
To enable Opencode integration:
|
||||
|
||||
1. Create/Ensure `.opencode/` directory exists
|
||||
2. Configure MCP servers in `.opencode/mcp.json`
|
||||
3. Restart Opencode to load the MCP servers
|
||||
4. Switch to the Hive agent
|
||||
* **Tools:** Accesses `agent-builder` and standard `tools` via standard MCP protocols over stdio.
|
||||
|
||||
### Verify Setup
|
||||
|
||||
```bash
|
||||
|
||||
+40
-32
@@ -65,28 +65,26 @@ source .venv/bin/activate
|
||||
|
||||
If you prefer to set up manually or the script fails:
|
||||
|
||||
### 1. Install Core Framework
|
||||
### 1. Sync Workspace Dependencies
|
||||
|
||||
```bash
|
||||
cd core
|
||||
uv pip install -e .
|
||||
# From repository root - this creates a single .venv at the root
|
||||
uv sync
|
||||
```
|
||||
|
||||
### 2. Install Tools Package
|
||||
> **Note:** The `uv sync` command uses the workspace configuration in `pyproject.toml` to install both `core` (framework) and `tools` (aden_tools) packages together. This is the recommended approach over individual `pip install -e` commands which may fail due to circular dependencies.
|
||||
|
||||
### 2. Activate the Virtual Environment
|
||||
|
||||
```bash
|
||||
cd tools
|
||||
uv pip install -e .
|
||||
# Linux/macOS
|
||||
source .venv/bin/activate
|
||||
|
||||
# Windows (PowerShell)
|
||||
.venv\Scripts\Activate.ps1
|
||||
```
|
||||
|
||||
### 3. Upgrade OpenAI Package
|
||||
|
||||
```bash
|
||||
# litellm requires openai >= 1.0.0
|
||||
uv pip install --upgrade "openai>=1.0.0"
|
||||
```
|
||||
|
||||
### 4. Verify Installation
|
||||
### 3. Verify Installation
|
||||
|
||||
```bash
|
||||
uv run python -c "import framework; print('✓ framework OK')"
|
||||
@@ -281,18 +279,20 @@ Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
|
||||
|
||||
### "ModuleNotFoundError: No module named 'framework'"
|
||||
|
||||
**Solution:** Install the core package:
|
||||
**Solution:** Sync the workspace dependencies:
|
||||
|
||||
```bash
|
||||
cd core && uv pip install -e .
|
||||
# From repository root
|
||||
uv sync
|
||||
```
|
||||
|
||||
### "ModuleNotFoundError: No module named 'aden_tools'"
|
||||
|
||||
**Solution:** Install the tools package:
|
||||
**Solution:** Sync the workspace dependencies:
|
||||
|
||||
```bash
|
||||
cd tools && uv pip install -e .
|
||||
# From repository root
|
||||
uv sync
|
||||
```
|
||||
|
||||
Or run the setup script:
|
||||
@@ -350,15 +350,14 @@ The Hive framework consists of three Python packages:
|
||||
|
||||
```
|
||||
hive/
|
||||
├── .venv/ # Single workspace venv (created by uv sync)
|
||||
├── core/ # Core framework (runtime, graph executor, LLM providers)
|
||||
│ ├── framework/
|
||||
│ ├── .venv/ # Created by quickstart.sh
|
||||
│ └── pyproject.toml
|
||||
│
|
||||
├── tools/ # Tools and MCP servers
|
||||
│ ├── src/
|
||||
│ │ └── aden_tools/ # Actual package location
|
||||
│ ├── .venv/ # Created by quickstart.sh
|
||||
│ └── pyproject.toml
|
||||
│
|
||||
├── exports/ # Agent packages (user-created, gitignored)
|
||||
@@ -368,28 +367,29 @@ hive/
|
||||
└── templates/ # Pre-built template agents
|
||||
```
|
||||
|
||||
## Separate Virtual Environments
|
||||
## Virtual Environment Setup
|
||||
|
||||
Hive primarily uses **uv** to create and manage separate virtual environments for `core` and `tools`.
|
||||
Hive uses **uv workspaces** to manage dependencies. When you run `uv sync` from the repository root, a **single `.venv`** is created at the root containing both packages.
|
||||
|
||||
The project uses separate virtual environments to:
|
||||
### Benefits of Workspace Mode
|
||||
|
||||
- Isolate dependencies and avoid conflicts
|
||||
- Allow independent development and testing of each package
|
||||
- Enable MCP servers to run with their specific dependencies
|
||||
- **Single environment** - No need to switch between multiple venvs
|
||||
- **Unified dependencies** - Consistent package versions across core and tools
|
||||
- **Simpler development** - One activation, access to everything
|
||||
|
||||
### How It Works
|
||||
|
||||
When you run `./quickstart.sh`, `uv` sets up:
|
||||
When you run `./quickstart.sh` or `uv sync`:
|
||||
|
||||
1. **core/.venv/** - Contains the `framework` package and its dependencies (anthropic, litellm, mcp, etc.)
|
||||
2. **tools/.venv/** - Contains the `aden_tools` package and its dependencies (beautifulsoup4, pandas, etc.)
|
||||
1. **/.venv/** - Single root virtual environment is created
|
||||
2. Both `framework` (from core/) and `aden_tools` (from tools/) are installed
|
||||
3. All dependencies (anthropic, litellm, beautifulsoup4, pandas, etc.) are resolved together
|
||||
|
||||
If you need to refresh environments manually, use `uv`:
|
||||
If you need to refresh the environment:
|
||||
|
||||
```bash
|
||||
cd core && uv sync
|
||||
cd ../tools && uv sync
|
||||
# From repository root
|
||||
uv sync
|
||||
```
|
||||
|
||||
### Cross-Package Imports
|
||||
@@ -521,7 +521,15 @@ export ADEN_CREDENTIALS_PATH="/custom/path"
|
||||
# Agent storage location (default: /tmp)
|
||||
export AGENT_STORAGE_PATH="/custom/storage"
|
||||
```
|
||||
## Opencode Setup
|
||||
|
||||
[Opencode](https://github.com/opencode-ai/opencode) is fully supported as a coding agent.
|
||||
|
||||
### Automatic Setup
|
||||
Run the quickstart script in the root directorys:
|
||||
```bash
|
||||
./quickstart.sh
|
||||
```
|
||||
## Additional Resources
|
||||
|
||||
- **Framework Documentation:** [core/README.md](../core/README.md)
|
||||
|
||||
@@ -40,7 +40,7 @@ Welcome to the Aden Engineering Challenges! These quizzes are designed for stude
|
||||
After completing challenges, submit your work by:
|
||||
|
||||
1. Creating a GitHub Gist with your answers
|
||||
2. Emailing the link to `careers@adenhq.com` with subject: `[Engineering Challenge] Your Name - Track Name`
|
||||
2. Emailing the link to `contact@adenhq.com` with subject: `[Engineering Challenge] Your Name - Track Name`
|
||||
3. Include your GitHub username in the email
|
||||
|
||||
## Getting Help
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# Why Conditional Edges Need Priority (Function Nodes)
|
||||
|
||||
## The problem
|
||||
|
||||
Function nodes return everything they computed. They don't pick one output key — they return all of them.
|
||||
|
||||
```python
|
||||
def score_lead(inputs):
|
||||
score = compute_score(inputs["profile"])
|
||||
return {
|
||||
"score": score,
|
||||
"is_high_value": score > 80,
|
||||
"needs_enrichment": score > 50 and not inputs["profile"].get("company"),
|
||||
}
|
||||
```
|
||||
|
||||
Lead comes in: score 92, no company on file. Output: `{"score": 92, "is_high_value": True, "needs_enrichment": True}`.
|
||||
|
||||
Two conditional edges leaving this node:
|
||||
|
||||
```
|
||||
Edge A: needs_enrichment == True → enrichment node
|
||||
Edge B: is_high_value == True → outreach node
|
||||
```
|
||||
|
||||
Both are true. Without priority, the graph either fans out to both (wrong — you'd email someone while still enriching their data) or picks one randomly (wrong — non-deterministic).
|
||||
|
||||
## Priority fixes it
|
||||
|
||||
```
|
||||
Edge A: needs_enrichment == True priority=2 (higher = checked first)
|
||||
Edge B: is_high_value == True priority=1
|
||||
Edge C: is_high_value == False priority=0
|
||||
```
|
||||
|
||||
Executor keeps only the highest-priority matching group. A wins. Lead gets enriched first, loops back, gets re-scored — now `needs_enrichment` is false, B wins, outreach happens.
|
||||
|
||||
## Why event loop nodes don't need this
|
||||
|
||||
The LLM understands "if/else." You tell it in the prompt: "if needs enrichment, set `needs_enrichment`. Otherwise if high value, set `approved`." It picks one. Only one conditional edge matches.
|
||||
|
||||
A function just returns a dict. It doesn't do "otherwise." Priority is the "otherwise" for function nodes.
|
||||
+12
-2
@@ -303,8 +303,8 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
)
|
||||
|
||||
declare -A DEFAULT_MODELS=(
|
||||
["anthropic"]="claude-opus-4-6"
|
||||
["openai"]="gpt-5.2"
|
||||
["anthropic"]="claude-haiku-4-5"
|
||||
["openai"]="gpt-5-mini"
|
||||
["gemini"]="gemini-3-flash-preview"
|
||||
["groq"]="moonshotai/kimi-k2-instruct-0905"
|
||||
["cerebras"]="zai-glm-4.7"
|
||||
@@ -945,6 +945,16 @@ else
|
||||
echo -e "${YELLOW}--${NC}"
|
||||
fi
|
||||
|
||||
echo -n " ⬡ local settings... "
|
||||
if [ -f "$SCRIPT_DIR/.claude/settings.local.json" ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
elif [ -f "$SCRIPT_DIR/.claude/settings.local.json.example" ]; then
|
||||
cp "$SCRIPT_DIR/.claude/settings.local.json.example" "$SCRIPT_DIR/.claude/settings.local.json"
|
||||
echo -e "${GREEN}copied from example${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}--${NC}"
|
||||
fi
|
||||
|
||||
echo -n " ⬡ credential store... "
|
||||
if [ -n "$HIVE_CREDENTIAL_KEY" ] && [ -d "$HOME/.hive/credentials/credentials" ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
|
||||
Reference in New Issue
Block a user