Merge branch 'adenhq:main' into feat/observability-trace-context

This commit is contained in:
Fernando Mano
2026-01-28 14:52:37 -03:00
committed by GitHub
22 changed files with 1696 additions and 75 deletions
+20
View File
@@ -0,0 +1,20 @@
{
"mcpServers": {
"agent-builder": {
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "python",
"args": ["mcp_server.py", "--stdio"],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
+1
View File
@@ -0,0 +1 @@
../../.claude/skills/agent-workflow
+1
View File
@@ -0,0 +1 @@
../../.claude/skills/building-agents-construction
+1
View File
@@ -0,0 +1 @@
../../.claude/skills/building-agents-core
+1
View File
@@ -0,0 +1 @@
../../.claude/skills/building-agents-patterns
+1
View File
@@ -0,0 +1 @@
../../.claude/skills/testing-agent
+19 -5
View File
@@ -51,12 +51,26 @@ jobs:
- Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
- Suggest the user react with a thumbs-down if they disagree
### 3. Check for invalid issues
If the issue lacks sufficient information, is spam, or doesn't make sense:
- Add the "invalid" label
- Comment asking for clarification or explaining why it's invalid
### 3. Check for Low-Quality / AI Spam
Analyze the issue quality. We are receiving many low-effort, AI-generated spam issues.
Flag the issue as INVALID if it matches these criteria:
- **Vague/Generic**: Title is "Fix bug" or "Error" without specific context.
- **Hallucinated**: Refers to files or features that do not exist in this repo.
- **Template Filler**: Body contains "Insert description here" or unrelated gibberish.
- **Low Effort**: No reproduction steps, no logs, only 1-2 sentences.
### 4. Categorize with labels (if NOT a duplicate)
If identified as spam/low-quality:
- Add the "invalid" label.
- Add a comment:
"This issue has been automatically flagged as low-quality or potentially AI-generated spam. It lacks specific details (logs, reproduction steps, file references) required for us to help. Please open a new issue following the template exactly if this is a legitimate request."
- Do NOT proceed to other steps.
### 4. Check for invalid issues (General)
If the issue is not spam but still lacks information:
- Add the "invalid" label
- Comment asking for clarification
### 5. Categorize with labels (if NOT a duplicate or spam)
Apply appropriate labels based on the issue content. Use ONLY these labels:
- bug: Something isn't working
- enhancement: New feature or request
BIN
View File
Binary file not shown.
+21 -1
View File
@@ -21,6 +21,26 @@ This will:
- Fix package compatibility issues (openai + litellm)
- Verify all installations
## Alpine Linux Setup
If you are using Alpine Linux (e.g., inside a Docker container), you must install system dependencies and use a virtual environment before running the setup script:
1. Install System Dependencies:
```bash
apk update
apk add bash git python3 py3-pip nodejs npm curl build-base python3-dev linux-headers libffi-dev
```
2. Set up Virtual Environment (Required for Python 3.12+):
```
python3 -m venv venv
source venv/bin/activate
pip install --upgrade pip setuptools wheel
```
3. Run the Quickstart Script:
```
./quickstart.sh
```
## Manual Setup (Alternative)
If you prefer to set up manually or the script fails:
@@ -432,4 +452,4 @@ When contributing agent packages:
- **Issues:** https://github.com/adenhq/hive/issues
- **Discord:** https://discord.com/invite/MXE49hrKDk
- **Documentation:** https://docs.adenhq.com/
- **Documentation:** https://docs.adenhq.com/
+10
View File
@@ -102,6 +102,15 @@ PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
**[📖 Complete Setup Guide](ENVIRONMENT_SETUP.md)** - Detailed instructions for agent development
### Cursor IDE Support
Skills are also available in Cursor. To enable:
1. Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`)
2. Run `MCP: Enable` to enable MCP servers
3. Restart Cursor to load the MCP servers from `.cursor/mcp.json`
4. Type `/` in Agent chat and search for skills (e.g., `/building-agents-construction`)
## Features
- **Goal-Driven Development** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
@@ -226,6 +235,7 @@ hive/
├── docs/ # Documentation and guides
├── scripts/ # Build and utility scripts
├── .claude/ # Claude Code skills for building agents
├── .cursor/ # Cursor IDE skills (symlinks to .claude/skills)
├── ENVIRONMENT_SETUP.md # Python setup guide for agent development
├── DEVELOPER.md # Developer guide
├── CONTRIBUTING.md # Contribution guidelines
+64 -7
View File
@@ -38,6 +38,23 @@ class StepStatus(str, Enum):
SKIPPED = "skipped"
REJECTED = "rejected" # Human rejected execution
def is_terminal(self) -> bool:
"""Check if this status represents a terminal (finished) state.
Terminal states are states where the step will not execute further,
either because it completed successfully or failed/was skipped.
"""
return self in (
StepStatus.COMPLETED,
StepStatus.FAILED,
StepStatus.SKIPPED,
StepStatus.REJECTED,
)
def is_successful(self) -> bool:
"""Check if this status represents successful completion."""
return self == StepStatus.COMPLETED
class ApprovalDecision(str, Enum):
"""Human decision on a step requiring approval."""
@@ -161,11 +178,23 @@ class PlanStep(BaseModel):
model_config = {"extra": "allow"}
def is_ready(self, completed_step_ids: set[str]) -> bool:
"""Check if this step is ready to execute (all dependencies met)."""
def is_ready(self, terminal_step_ids: set[str]) -> bool:
"""Check if this step is ready to execute (all dependencies finished).
A step is ready when:
1. Its status is PENDING (not yet started)
2. All its dependencies are in a terminal state (completed, failed, skipped, or rejected)
Note: This allows dependent steps to become "ready" even if their dependencies
failed. The executor should check if any dependencies failed and handle
accordingly (e.g., skip the step or mark it as blocked).
Args:
terminal_step_ids: Set of step IDs that are in a terminal state
"""
if self.status != StepStatus.PENDING:
return False
return all(dep in completed_step_ids for dep in self.dependencies)
return all(dep in terminal_step_ids for dep in self.dependencies)
class Judgment(BaseModel):
@@ -327,18 +356,46 @@ class Plan(BaseModel):
return None
def get_ready_steps(self) -> list[PlanStep]:
"""Get all steps that are ready to execute."""
completed_ids = {s.id for s in self.steps if s.status == StepStatus.COMPLETED}
return [s for s in self.steps if s.is_ready(completed_ids)]
"""Get all steps that are ready to execute.
A step is ready when all its dependencies are in terminal states
(completed, failed, skipped, or rejected).
"""
terminal_ids = {s.id for s in self.steps if s.status.is_terminal()}
return [s for s in self.steps if s.is_ready(terminal_ids)]
def get_completed_steps(self) -> list[PlanStep]:
"""Get all completed steps."""
return [s for s in self.steps if s.status == StepStatus.COMPLETED]
def is_complete(self) -> bool:
"""Check if all steps are completed."""
"""Check if all steps are in terminal states (finished executing).
Returns True when all steps have reached a terminal state, regardless
of whether they succeeded or failed. Use has_failed_steps() to check
if any steps failed.
"""
return all(s.status.is_terminal() for s in self.steps)
def is_successful(self) -> bool:
"""Check if all steps completed successfully."""
return all(s.status == StepStatus.COMPLETED for s in self.steps)
def has_failed_steps(self) -> bool:
"""Check if any steps failed, were skipped, or were rejected."""
return any(
s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
for s in self.steps
)
def get_failed_steps(self) -> list[PlanStep]:
"""Get all steps that failed, were skipped, or were rejected."""
return [
s
for s in self.steps
if s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
]
def to_feedback_context(self) -> dict[str, Any]:
"""Create context for replanning."""
return {
+3 -2
View File
@@ -417,8 +417,9 @@ def cmd_list(args: argparse.Namespace) -> int:
directory = Path(args.directory)
if not directory.exists():
print(f"Directory not found: {directory}", file=sys.stderr)
return 1
# FIX: Handle missing directory gracefully on fresh install
print(f"No agents found in {directory}")
return 0
agents = []
for path in directory.iterdir():
+39 -2
View File
@@ -46,6 +46,40 @@ class FileStorage:
for d in dirs:
d.mkdir(parents=True, exist_ok=True)
def _validate_key(self, key: str) -> None:
"""
Validate key to prevent path traversal attacks.
Args:
key: The key to validate
Raises:
ValueError: If key contains path traversal or dangerous patterns
"""
if not key or key.strip() == "":
raise ValueError("Key cannot be empty")
# Block path separators
if "/" in key or "\\" in key:
raise ValueError(f"Invalid key format: path separators not allowed in '{key}'")
# Block parent directory references
if ".." in key or key.startswith("."):
raise ValueError(f"Invalid key format: path traversal detected in '{key}'")
# Block absolute paths
if key.startswith("/") or (len(key) > 1 and key[1] == ":"):
raise ValueError(f"Invalid key format: absolute paths not allowed in '{key}'")
# Block null bytes (Unix path injection)
if "\x00" in key:
raise ValueError("Invalid key format: null bytes not allowed")
# Block other dangerous special characters
dangerous_chars = {"<", ">", "|", "&", "$", "`", "'", '"'}
if any(char in key for char in dangerous_chars):
raise ValueError(f"Invalid key format: contains dangerous characters in '{key}'")
# === RUN OPERATIONS ===
def save_run(self, run: Run) -> None:
@@ -140,6 +174,7 @@ class FileStorage:
def _get_index(self, index_type: str, key: str) -> list[str]:
"""Get values from an index."""
self._validate_key(key) # Prevent path traversal
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
if not index_path.exists():
return []
@@ -148,8 +183,9 @@ class FileStorage:
def _add_to_index(self, index_type: str, key: str, value: str) -> None:
"""Add a value to an index."""
self._validate_key(key) # Prevent path traversal
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
values = self._get_index(index_type, key)
values = self._get_index(index_type, key) # Already validated in _get_index
if value not in values:
values.append(value)
with open(index_path, "w") as f:
@@ -157,8 +193,9 @@ class FileStorage:
def _remove_from_index(self, index_type: str, key: str, value: str) -> None:
"""Remove a value from an index."""
self._validate_key(key) # Prevent path traversal
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
values = self._get_index(index_type, key)
values = self._get_index(index_type, key) # Already validated in _get_index
if value in values:
values.remove(value)
with open(index_path, "w") as f:
+217
View File
@@ -0,0 +1,217 @@
"""
Tests for path traversal vulnerability fix in FileStorage.
Verifies that the _validate_key() method properly blocks path traversal attempts.
"""
import tempfile
from pathlib import Path
import pytest
from framework.storage.backend import FileStorage
class TestPathTraversalProtection:
"""Tests for path traversal vulnerability protection."""
@pytest.fixture
def storage(self):
"""Create a temporary storage instance for testing."""
with tempfile.TemporaryDirectory() as tmpdir:
yield FileStorage(tmpdir)
# === VALID KEYS (should pass validation) ===
def test_valid_alphanumeric_key(self, storage):
"""Alphanumeric keys should be allowed."""
# Should not raise
storage._validate_key("goal_123")
storage._validate_key("run_abc_def")
storage._validate_key("status_completed")
def test_valid_key_with_hyphens_underscores(self, storage):
"""Keys with hyphens and underscores should be allowed."""
storage._validate_key("goal-123")
storage._validate_key("run_id_456")
storage._validate_key("completed-nodes_list")
# === PATH TRAVERSAL ATTEMPTS (should raise ValueError) ===
def test_blocks_parent_directory_traversal(self, storage):
"""Block .. path traversal attempts."""
# These all have path separators which are blocked first
with pytest.raises(ValueError):
storage._validate_key("../../../etc/passwd")
with pytest.raises(ValueError):
storage._validate_key("..\\..\\windows\\system32")
with pytest.raises(ValueError):
storage._validate_key("goal/../../../.env")
def test_blocks_leading_dot(self, storage):
"""Block keys starting with dot."""
with pytest.raises(ValueError, match="path traversal detected"):
storage._validate_key(".env")
# This also has path separator which is caught first
with pytest.raises(ValueError):
storage._validate_key(".ssh/id_rsa")
def test_blocks_absolute_paths_unix(self, storage):
"""Block absolute paths (Unix)."""
# These have path separators which are blocked first
with pytest.raises(ValueError):
storage._validate_key("/etc/passwd")
with pytest.raises(ValueError):
storage._validate_key("/var/www/html/shell.php")
def test_blocks_absolute_paths_windows(self, storage):
"""Block absolute paths (Windows)."""
# These have path separators which are blocked first
with pytest.raises(ValueError):
storage._validate_key("C:\\Windows\\System32")
with pytest.raises(ValueError):
storage._validate_key("D:\\config\\database.yaml")
def test_blocks_path_separators(self, storage):
"""Block forward and backward slashes."""
with pytest.raises(ValueError, match="path separators not allowed"):
storage._validate_key("goal/subdir/id")
with pytest.raises(ValueError, match="path separators not allowed"):
storage._validate_key("goal\\subdir\\id")
with pytest.raises(ValueError, match="path separators not allowed"):
storage._validate_key("some/path/to/../../.env")
def test_blocks_null_bytes(self, storage):
"""Block null byte injection."""
with pytest.raises(ValueError, match="null bytes not allowed"):
storage._validate_key("goal\x00passwd")
def test_blocks_dangerous_shell_chars(self, storage):
"""Block dangerous shell characters."""
with pytest.raises(ValueError, match="dangerous characters"):
storage._validate_key("goal`whoami`")
with pytest.raises(ValueError, match="dangerous characters"):
storage._validate_key("goal$(cat)")
with pytest.raises(ValueError, match="dangerous characters"):
storage._validate_key("goal|nc")
with pytest.raises(ValueError, match="dangerous characters"):
storage._validate_key("goal&& rm")
def test_blocks_empty_key(self, storage):
"""Block empty keys."""
with pytest.raises(ValueError, match="empty"):
storage._validate_key("")
with pytest.raises(ValueError, match="empty"):
storage._validate_key(" ")
# === END-TO-END TESTS ===
def test_get_runs_by_goal_blocks_traversal(self, storage):
"""get_runs_by_goal() should block path traversal."""
with pytest.raises(ValueError):
storage.get_runs_by_goal("../../../.env")
def test_get_runs_by_node_blocks_traversal(self, storage):
"""get_runs_by_node() should block path traversal."""
with pytest.raises(ValueError):
storage.get_runs_by_node("/etc/passwd")
def test_get_runs_by_status_blocks_traversal(self, storage):
"""get_runs_by_status() should block path traversal."""
with pytest.raises(ValueError):
storage.get_runs_by_status("..\\..\\windows\\system32")
def test_valid_queries_still_work(self, storage):
"""Valid queries should work after fix."""
# These should return empty list, not raise errors
result = storage.get_runs_by_goal("legitimate_goal")
assert result == []
result = storage.get_runs_by_node("legitimate_node")
assert result == []
result = storage.get_runs_by_status("completed")
assert result == []
# === REAL-WORLD ATTACK SCENARIOS ===
def test_blocks_env_file_escape(self, storage):
"""Block attempts to access .env files."""
with pytest.raises(ValueError):
storage.get_runs_by_goal("../../../.env")
def test_blocks_config_file_escape(self, storage):
"""Block attempts to access config files."""
with pytest.raises(ValueError):
storage.get_runs_by_goal("../../../../etc/aden/database.yaml")
def test_blocks_web_shell_creation(self, storage):
"""Block attempts to create web shells."""
with pytest.raises(ValueError):
storage._add_to_index("by_goal", "../../var/www/html/shell", "malicious_code")
def test_blocks_cron_injection(self, storage):
"""Block attempts to create cron jobs."""
with pytest.raises(ValueError):
storage._add_to_index("by_node", "../../../etc/cron.d/backdoor", "reverse_shell")
def test_blocks_sudoers_modification(self, storage):
"""Block attempts to modify sudoers file."""
with pytest.raises(ValueError):
storage._add_to_index("by_status", "../../../../etc/sudoers", "ALL=(ALL) NOPASSWD:ALL")
class TestPathTraversalWithActualFiles:
"""Test path traversal protection with actual file operations."""
def test_cannot_escape_storage_directory(self):
"""Verify that even with path traversal, we can't escape storage dir."""
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
storage_dir = tmpdir_path / "storage"
storage_dir.mkdir()
# Create a secret file outside storage
secret_file = tmpdir_path / "secret.txt"
secret_file.write_text("SENSITIVE_DATA")
storage = FileStorage(storage_dir)
# Attempt to read the secret file via path traversal
with pytest.raises(ValueError):
storage.get_runs_by_goal("../secret")
# Verify the secret file was not accessed (still contains original data)
assert secret_file.read_text() == "SENSITIVE_DATA"
def test_cannot_write_outside_storage(self):
"""Verify that we can't write files outside storage directory."""
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
storage_dir = tmpdir_path / "storage"
storage_dir.mkdir()
storage = FileStorage(storage_dir)
# Attempt to write outside storage directory
with pytest.raises(ValueError):
storage._add_to_index("by_goal", "../../malicious", "payload")
# Verify no file was created outside storage
malicious_file = tmpdir_path / "malicious.json"
assert not malicious_file.exists()
if __name__ == "__main__":
pytest.main([__file__, "-v"])
@@ -0,0 +1,384 @@
"""
Tests for Plan dependency resolution with failed steps.
These tests verify that plan execution correctly handles failed dependencies
instead of hanging indefinitely.
"""
import pytest
from framework.graph.plan import (
ActionSpec,
ActionType,
Plan,
PlanStep,
StepStatus,
)
class TestStepStatusTerminal:
"""Tests for StepStatus.is_terminal() method."""
def test_completed_is_terminal(self):
"""COMPLETED status should be terminal."""
assert StepStatus.COMPLETED.is_terminal() is True
def test_failed_is_terminal(self):
"""FAILED status should be terminal."""
assert StepStatus.FAILED.is_terminal() is True
def test_skipped_is_terminal(self):
"""SKIPPED status should be terminal."""
assert StepStatus.SKIPPED.is_terminal() is True
def test_rejected_is_terminal(self):
"""REJECTED status should be terminal."""
assert StepStatus.REJECTED.is_terminal() is True
def test_pending_is_not_terminal(self):
"""PENDING status should not be terminal."""
assert StepStatus.PENDING.is_terminal() is False
def test_in_progress_is_not_terminal(self):
"""IN_PROGRESS status should not be terminal."""
assert StepStatus.IN_PROGRESS.is_terminal() is False
def test_awaiting_approval_is_not_terminal(self):
"""AWAITING_APPROVAL status should not be terminal."""
assert StepStatus.AWAITING_APPROVAL.is_terminal() is False
def test_completed_is_successful(self):
"""Only COMPLETED should be successful."""
assert StepStatus.COMPLETED.is_successful() is True
assert StepStatus.FAILED.is_successful() is False
assert StepStatus.SKIPPED.is_successful() is False
class TestPlanStepIsReady:
"""Tests for PlanStep.is_ready() with terminal states."""
def _make_step(self, id: str, deps: list[str] = None, status: StepStatus = StepStatus.PENDING):
"""Helper to create a step."""
return PlanStep(
id=id,
description=f"Step {id}",
action=ActionSpec(action_type=ActionType.FUNCTION, function_name="test"),
dependencies=deps or [],
status=status,
)
def test_step_ready_when_no_dependencies(self):
"""Step with no dependencies should be ready."""
step = self._make_step("step1")
assert step.is_ready(set()) is True
def test_step_ready_when_dependency_completed(self):
"""Step should be ready when dependency is completed."""
step = self._make_step("step2", deps=["step1"])
assert step.is_ready({"step1"}) is True
def test_step_ready_when_dependency_failed(self):
"""Step should be ready when dependency failed (terminal state)."""
step = self._make_step("step2", deps=["step1"])
# step1 is in terminal_step_ids because it failed
assert step.is_ready({"step1"}) is True
def test_step_not_ready_when_dependency_pending(self):
"""Step should not be ready when dependency is still pending."""
step = self._make_step("step2", deps=["step1"])
assert step.is_ready(set()) is False
def test_step_not_ready_when_already_completed(self):
"""Completed step should not be ready."""
step = self._make_step("step1", status=StepStatus.COMPLETED)
assert step.is_ready(set()) is False
def test_step_not_ready_when_in_progress(self):
"""In-progress step should not be ready."""
step = self._make_step("step1", status=StepStatus.IN_PROGRESS)
assert step.is_ready(set()) is False
def test_step_ready_with_multiple_dependencies_all_terminal(self):
"""Step should be ready when all dependencies are terminal."""
step = self._make_step("step3", deps=["step1", "step2"])
assert step.is_ready({"step1", "step2"}) is True
def test_step_not_ready_with_partial_dependencies(self):
"""Step should not be ready when only some dependencies are terminal."""
step = self._make_step("step3", deps=["step1", "step2"])
assert step.is_ready({"step1"}) is False
class TestPlanGetReadySteps:
"""Tests for Plan.get_ready_steps() with failed dependencies."""
def _make_plan(self, steps: list[PlanStep]) -> Plan:
"""Helper to create a plan."""
return Plan(
id="test_plan",
goal_id="test_goal",
description="Test plan",
steps=steps,
)
def _make_step(self, id: str, deps: list[str] = None, status: StepStatus = StepStatus.PENDING):
"""Helper to create a step."""
return PlanStep(
id=id,
description=f"Step {id}",
action=ActionSpec(action_type=ActionType.FUNCTION, function_name="test"),
dependencies=deps or [],
status=status,
)
def test_ready_steps_with_no_dependencies(self):
"""Steps with no dependencies should be ready."""
plan = self._make_plan(
[
self._make_step("step1"),
self._make_step("step2"),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 2
assert {s.id for s in ready} == {"step1", "step2"}
def test_ready_steps_with_completed_dependency(self):
"""Dependent step should be ready when dependency is completed."""
plan = self._make_plan(
[
self._make_step("step1", status=StepStatus.COMPLETED),
self._make_step("step2", deps=["step1"]),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step2"
def test_ready_steps_with_failed_dependency(self):
"""Dependent step should be ready when dependency failed."""
plan = self._make_plan(
[
self._make_step("step1", status=StepStatus.FAILED),
self._make_step("step2", deps=["step1"]),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step2"
def test_ready_steps_with_skipped_dependency(self):
"""Dependent step should be ready when dependency was skipped."""
plan = self._make_plan(
[
self._make_step("step1", status=StepStatus.SKIPPED),
self._make_step("step2", deps=["step1"]),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step2"
def test_ready_steps_with_rejected_dependency(self):
"""Dependent step should be ready when dependency was rejected."""
plan = self._make_plan(
[
self._make_step("step1", status=StepStatus.REJECTED),
self._make_step("step2", deps=["step1"]),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step2"
def test_no_ready_steps_when_dependency_in_progress(self):
"""Dependent step should not be ready when dependency is in progress."""
plan = self._make_plan(
[
self._make_step("step1", status=StepStatus.IN_PROGRESS),
self._make_step("step2", deps=["step1"]),
]
)
ready = plan.get_ready_steps()
assert len(ready) == 0
class TestPlanCompletion:
"""Tests for Plan completion status methods."""
def _make_plan(self, steps: list[PlanStep]) -> Plan:
"""Helper to create a plan."""
return Plan(
id="test_plan",
goal_id="test_goal",
description="Test plan",
steps=steps,
)
def _make_step(self, id: str, status: StepStatus = StepStatus.PENDING):
"""Helper to create a step."""
return PlanStep(
id=id,
description=f"Step {id}",
action=ActionSpec(action_type=ActionType.FUNCTION, function_name="test"),
status=status,
)
def test_is_complete_when_all_completed(self):
"""Plan should be complete when all steps are completed."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.COMPLETED),
]
)
assert plan.is_complete() is True
def test_is_complete_when_all_terminal_mixed(self):
"""Plan should be complete when all steps are in terminal states (mixed)."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.FAILED),
self._make_step("step3", StepStatus.SKIPPED),
]
)
assert plan.is_complete() is True
def test_is_not_complete_when_pending(self):
"""Plan should not be complete when steps are pending."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.PENDING),
]
)
assert plan.is_complete() is False
def test_is_not_complete_when_in_progress(self):
"""Plan should not be complete when steps are in progress."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.IN_PROGRESS),
]
)
assert plan.is_complete() is False
def test_is_successful_when_all_completed(self):
"""Plan should be successful only when all steps completed."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.COMPLETED),
]
)
assert plan.is_successful() is True
def test_is_not_successful_when_failed(self):
"""Plan should not be successful when any step failed."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.FAILED),
]
)
assert plan.is_successful() is False
def test_has_failed_steps(self):
"""has_failed_steps should detect failed steps."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.FAILED),
]
)
assert plan.has_failed_steps() is True
def test_has_no_failed_steps(self):
"""has_failed_steps should return False when all succeeded."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.COMPLETED),
]
)
assert plan.has_failed_steps() is False
def test_get_failed_steps(self):
"""get_failed_steps should return all failed/skipped/rejected steps."""
plan = self._make_plan(
[
self._make_step("step1", StepStatus.COMPLETED),
self._make_step("step2", StepStatus.FAILED),
self._make_step("step3", StepStatus.SKIPPED),
self._make_step("step4", StepStatus.REJECTED),
]
)
failed = plan.get_failed_steps()
assert len(failed) == 3
assert {s.id for s in failed} == {"step2", "step3", "step4"}
class TestBugScenario:
"""Test the specific bug scenario that was fixed."""
def _make_step(self, id: str, deps: list[str] = None, status: StepStatus = StepStatus.PENDING):
"""Helper to create a step."""
return PlanStep(
id=id,
description=f"Step {id}",
action=ActionSpec(action_type=ActionType.FUNCTION, function_name="test"),
dependencies=deps or [],
status=status,
)
def test_dependent_step_becomes_ready_after_dependency_fails(self):
"""
BUG SCENARIO: When step1 fails, step2 (which depends on step1) should
become ready, allowing the executor to handle it appropriately.
Before fix: step2 would never become ready, causing infinite hang.
After fix: step2 becomes ready and executor can decide how to handle it.
"""
plan = Plan(
id="test_plan",
goal_id="test_goal",
description="Test plan with dependency",
steps=[
self._make_step("step1", status=StepStatus.PENDING),
self._make_step("step2", deps=["step1"], status=StepStatus.PENDING),
],
)
# Initially, only step1 is ready
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step1"
# Simulate step1 failing
plan.steps[0].status = StepStatus.FAILED
# Now step2 should be ready (dependency is in terminal state)
ready = plan.get_ready_steps()
assert len(ready) == 1
assert ready[0].id == "step2"
# Plan should not be complete yet (step2 is still pending)
assert plan.is_complete() is False
# Simulate step2 also failing (or being skipped due to failed dependency)
plan.steps[1].status = StepStatus.SKIPPED
# Now plan should be complete (all steps in terminal states)
assert plan.is_complete() is True
# But not successful
assert plan.is_successful() is False
# And should have failed steps
assert plan.has_failed_steps() is True
if __name__ == "__main__":
pytest.main([__file__, "-v"])
+620
View File
@@ -0,0 +1,620 @@
"""Tests for the storage module - FileStorage and ConcurrentStorage backends."""
import json
import time
from pathlib import Path
import pytest
from framework.schemas.run import Run, RunMetrics, RunStatus
from framework.storage.backend import FileStorage
from framework.storage.concurrent import CacheEntry, ConcurrentStorage
# === HELPER FUNCTIONS ===
def create_test_run(
run_id: str = "test_run_1",
goal_id: str = "test_goal",
status: RunStatus = RunStatus.COMPLETED,
nodes_executed: list[str] | None = None,
) -> Run:
"""Create a test Run object with minimal required fields."""
metrics = RunMetrics(
total_decisions=1,
successful_decisions=1,
failed_decisions=0,
nodes_executed=nodes_executed or ["node_1"],
)
return Run(
id=run_id,
goal_id=goal_id,
status=status,
metrics=metrics,
narrative="Test run completed.",
)
# === FILESTORAGE TESTS ===
class TestFileStorageBasics:
"""Test basic FileStorage operations."""
def test_init_creates_directories(self, tmp_path: Path):
"""FileStorage should create the directory structure on init."""
FileStorage(tmp_path)
assert (tmp_path / "runs").exists()
assert (tmp_path / "summaries").exists()
assert (tmp_path / "indexes" / "by_goal").exists()
assert (tmp_path / "indexes" / "by_status").exists()
assert (tmp_path / "indexes" / "by_node").exists()
def test_init_with_string_path(self, tmp_path: Path):
"""FileStorage should accept string paths."""
storage = FileStorage(str(tmp_path))
assert storage.base_path == tmp_path
class TestFileStorageRunOperations:
"""Test FileStorage run CRUD operations."""
def test_save_and_load_run(self, tmp_path: Path):
"""Test saving and loading a run."""
storage = FileStorage(tmp_path)
run = create_test_run()
storage.save_run(run)
loaded = storage.load_run(run.id)
assert loaded is not None
assert loaded.id == run.id
assert loaded.goal_id == run.goal_id
assert loaded.status == run.status
def test_load_nonexistent_run_returns_none(self, tmp_path: Path):
"""Loading a nonexistent run should return None."""
storage = FileStorage(tmp_path)
result = storage.load_run("nonexistent_id")
assert result is None
def test_save_creates_json_file(self, tmp_path: Path):
"""Saving a run should create a JSON file."""
storage = FileStorage(tmp_path)
run = create_test_run(run_id="my_run")
storage.save_run(run)
run_file = tmp_path / "runs" / "my_run.json"
assert run_file.exists()
# Verify it's valid JSON
with open(run_file) as f:
data = json.load(f)
assert data["id"] == "my_run"
def test_save_creates_summary(self, tmp_path: Path):
"""Saving a run should also create a summary file."""
storage = FileStorage(tmp_path)
run = create_test_run(run_id="my_run")
storage.save_run(run)
summary_file = tmp_path / "summaries" / "my_run.json"
assert summary_file.exists()
def test_load_summary(self, tmp_path: Path):
"""Test loading a run summary."""
storage = FileStorage(tmp_path)
run = create_test_run()
storage.save_run(run)
summary = storage.load_summary(run.id)
assert summary is not None
assert summary.run_id == run.id
assert summary.goal_id == run.goal_id
assert summary.status == run.status
def test_load_summary_fallback_to_run(self, tmp_path: Path):
"""If summary file is missing, load_summary should compute from run."""
storage = FileStorage(tmp_path)
run = create_test_run()
storage.save_run(run)
# Delete the summary file
summary_file = tmp_path / "summaries" / f"{run.id}.json"
summary_file.unlink()
# Should still work by computing from run
summary = storage.load_summary(run.id)
assert summary is not None
assert summary.run_id == run.id
def test_delete_run(self, tmp_path: Path):
"""Test deleting a run."""
storage = FileStorage(tmp_path)
run = create_test_run()
storage.save_run(run)
assert storage.load_run(run.id) is not None
result = storage.delete_run(run.id)
assert result is True
assert storage.load_run(run.id) is None
def test_delete_nonexistent_run_returns_false(self, tmp_path: Path):
"""Deleting a nonexistent run should return False."""
storage = FileStorage(tmp_path)
result = storage.delete_run("nonexistent")
assert result is False
class TestFileStorageIndexing:
"""Test FileStorage index operations."""
def test_index_by_goal(self, tmp_path: Path):
"""Runs should be indexed by goal_id."""
storage = FileStorage(tmp_path)
run1 = create_test_run(run_id="run_1", goal_id="goal_a")
run2 = create_test_run(run_id="run_2", goal_id="goal_a")
run3 = create_test_run(run_id="run_3", goal_id="goal_b")
storage.save_run(run1)
storage.save_run(run2)
storage.save_run(run3)
goal_a_runs = storage.get_runs_by_goal("goal_a")
goal_b_runs = storage.get_runs_by_goal("goal_b")
assert len(goal_a_runs) == 2
assert "run_1" in goal_a_runs
assert "run_2" in goal_a_runs
assert len(goal_b_runs) == 1
assert "run_3" in goal_b_runs
def test_index_by_status(self, tmp_path: Path):
"""Runs should be indexed by status."""
storage = FileStorage(tmp_path)
run1 = create_test_run(run_id="run_1", status=RunStatus.COMPLETED)
run2 = create_test_run(run_id="run_2", status=RunStatus.FAILED)
run3 = create_test_run(run_id="run_3", status=RunStatus.COMPLETED)
storage.save_run(run1)
storage.save_run(run2)
storage.save_run(run3)
completed = storage.get_runs_by_status(RunStatus.COMPLETED)
failed = storage.get_runs_by_status(RunStatus.FAILED)
assert len(completed) == 2
assert len(failed) == 1
def test_index_by_status_string(self, tmp_path: Path):
"""get_runs_by_status should accept string status."""
storage = FileStorage(tmp_path)
run = create_test_run(status=RunStatus.RUNNING)
storage.save_run(run)
runs = storage.get_runs_by_status("running")
assert len(runs) == 1
def test_index_by_node(self, tmp_path: Path):
"""Runs should be indexed by executed nodes."""
storage = FileStorage(tmp_path)
run1 = create_test_run(run_id="run_1", nodes_executed=["node_a", "node_b"])
run2 = create_test_run(run_id="run_2", nodes_executed=["node_a", "node_c"])
storage.save_run(run1)
storage.save_run(run2)
node_a_runs = storage.get_runs_by_node("node_a")
node_b_runs = storage.get_runs_by_node("node_b")
node_c_runs = storage.get_runs_by_node("node_c")
assert len(node_a_runs) == 2
assert len(node_b_runs) == 1
assert len(node_c_runs) == 1
def test_delete_removes_from_indexes(self, tmp_path: Path):
"""Deleting a run should remove it from all indexes."""
storage = FileStorage(tmp_path)
run = create_test_run(
run_id="run_1",
goal_id="goal_a",
status=RunStatus.COMPLETED,
nodes_executed=["node_1"],
)
storage.save_run(run)
# Verify indexed
assert "run_1" in storage.get_runs_by_goal("goal_a")
assert "run_1" in storage.get_runs_by_status(RunStatus.COMPLETED)
assert "run_1" in storage.get_runs_by_node("node_1")
# Delete
storage.delete_run("run_1")
# Verify removed from indexes
assert "run_1" not in storage.get_runs_by_goal("goal_a")
assert "run_1" not in storage.get_runs_by_status(RunStatus.COMPLETED)
assert "run_1" not in storage.get_runs_by_node("node_1")
def test_empty_index_returns_empty_list(self, tmp_path: Path):
"""Querying an empty index should return empty list."""
storage = FileStorage(tmp_path)
assert storage.get_runs_by_goal("nonexistent") == []
assert storage.get_runs_by_status("nonexistent") == []
assert storage.get_runs_by_node("nonexistent") == []
class TestFileStorageListOperations:
"""Test FileStorage list operations."""
def test_list_all_runs(self, tmp_path: Path):
"""Test listing all run IDs."""
storage = FileStorage(tmp_path)
storage.save_run(create_test_run(run_id="run_1"))
storage.save_run(create_test_run(run_id="run_2"))
storage.save_run(create_test_run(run_id="run_3"))
all_runs = storage.list_all_runs()
assert len(all_runs) == 3
assert set(all_runs) == {"run_1", "run_2", "run_3"}
def test_list_all_goals(self, tmp_path: Path):
"""Test listing all goal IDs that have runs."""
storage = FileStorage(tmp_path)
storage.save_run(create_test_run(run_id="run_1", goal_id="goal_a"))
storage.save_run(create_test_run(run_id="run_2", goal_id="goal_b"))
storage.save_run(create_test_run(run_id="run_3", goal_id="goal_a"))
all_goals = storage.list_all_goals()
assert len(all_goals) == 2
assert set(all_goals) == {"goal_a", "goal_b"}
def test_get_stats(self, tmp_path: Path):
"""Test getting storage statistics."""
storage = FileStorage(tmp_path)
storage.save_run(create_test_run(run_id="run_1", goal_id="goal_a"))
storage.save_run(create_test_run(run_id="run_2", goal_id="goal_b"))
stats = storage.get_stats()
assert stats["total_runs"] == 2
assert stats["total_goals"] == 2
assert stats["storage_path"] == str(tmp_path)
# === CACHE ENTRY TESTS ===
class TestCacheEntry:
"""Test CacheEntry dataclass."""
def test_is_expired_false_when_fresh(self):
"""Cache entry should not be expired when fresh."""
entry = CacheEntry(value="test", timestamp=time.time())
assert entry.is_expired(ttl=60.0) is False
def test_is_expired_true_when_old(self):
"""Cache entry should be expired when older than TTL."""
old_timestamp = time.time() - 120 # 2 minutes ago
entry = CacheEntry(value="test", timestamp=old_timestamp)
assert entry.is_expired(ttl=60.0) is True
# === CONCURRENTSTORAGE TESTS ===
class TestConcurrentStorageBasics:
"""Test basic ConcurrentStorage operations."""
def test_init(self, tmp_path: Path):
"""Test ConcurrentStorage initialization."""
storage = ConcurrentStorage(tmp_path)
assert storage.base_path == tmp_path
assert storage._running is False
@pytest.mark.asyncio
async def test_start_and_stop(self, tmp_path: Path):
"""Test starting and stopping the storage."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
assert storage._running is True
assert storage._batch_task is not None
await storage.stop()
assert storage._running is False
@pytest.mark.asyncio
async def test_double_start_is_idempotent(self, tmp_path: Path):
"""Starting twice should be safe."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
await storage.start() # Should not raise
assert storage._running is True
await storage.stop()
@pytest.mark.asyncio
async def test_double_stop_is_idempotent(self, tmp_path: Path):
"""Stopping twice should be safe."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
await storage.stop()
await storage.stop() # Should not raise
assert storage._running is False
class TestConcurrentStorageRunOperations:
"""Test ConcurrentStorage run operations."""
@pytest.mark.asyncio
async def test_save_and_load_run(self, tmp_path: Path):
"""Test async save and load of a run."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run()
await storage.save_run(run, immediate=True)
loaded = await storage.load_run(run.id)
assert loaded is not None
assert loaded.id == run.id
assert loaded.goal_id == run.goal_id
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_load_run_uses_cache(self, tmp_path: Path):
"""Second load should use cached value."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run()
await storage.save_run(run, immediate=True)
# First load
loaded1 = await storage.load_run(run.id)
# Second load (should use cache)
loaded2 = await storage.load_run(run.id, use_cache=True)
assert loaded1 is not None
assert loaded2 is not None
# Cache should return same object
assert loaded1 is loaded2
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_load_run_bypass_cache(self, tmp_path: Path):
"""Load with use_cache=False should bypass cache."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run()
await storage.save_run(run, immediate=True)
loaded1 = await storage.load_run(run.id)
loaded2 = await storage.load_run(run.id, use_cache=False)
assert loaded1 is not None
assert loaded2 is not None
# Fresh load should be different object
assert loaded1 is not loaded2
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_delete_run(self, tmp_path: Path):
"""Test async delete of a run."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run()
await storage.save_run(run, immediate=True)
result = await storage.delete_run(run.id)
assert result is True
loaded = await storage.load_run(run.id)
assert loaded is None
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_delete_clears_cache(self, tmp_path: Path):
"""Deleting a run should clear it from cache."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run()
await storage.save_run(run, immediate=True)
# Load to populate cache
await storage.load_run(run.id)
assert f"run:{run.id}" in storage._cache
# Delete
await storage.delete_run(run.id)
# Cache should be cleared
assert f"run:{run.id}" not in storage._cache
finally:
await storage.stop()
class TestConcurrentStorageQueryOperations:
"""Test ConcurrentStorage query operations."""
@pytest.mark.asyncio
async def test_get_runs_by_goal(self, tmp_path: Path):
"""Test async query by goal."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run1 = create_test_run(run_id="run_1", goal_id="goal_a")
run2 = create_test_run(run_id="run_2", goal_id="goal_a")
await storage.save_run(run1, immediate=True)
await storage.save_run(run2, immediate=True)
runs = await storage.get_runs_by_goal("goal_a")
assert len(runs) == 2
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_get_runs_by_status(self, tmp_path: Path):
"""Test async query by status."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
run = create_test_run(status=RunStatus.FAILED)
await storage.save_run(run, immediate=True)
runs = await storage.get_runs_by_status(RunStatus.FAILED)
assert len(runs) == 1
finally:
await storage.stop()
@pytest.mark.asyncio
async def test_list_all_runs(self, tmp_path: Path):
"""Test async list all runs."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
await storage.save_run(create_test_run(run_id="run_1"), immediate=True)
await storage.save_run(create_test_run(run_id="run_2"), immediate=True)
runs = await storage.list_all_runs()
assert len(runs) == 2
finally:
await storage.stop()
class TestConcurrentStorageCacheManagement:
"""Test ConcurrentStorage cache management."""
def test_clear_cache(self, tmp_path: Path):
"""Test clearing the cache."""
storage = ConcurrentStorage(tmp_path)
storage._cache["test_key"] = CacheEntry(value="test", timestamp=time.time())
storage.clear_cache()
assert len(storage._cache) == 0
def test_invalidate_cache(self, tmp_path: Path):
"""Test invalidating a specific cache entry."""
storage = ConcurrentStorage(tmp_path)
storage._cache["key1"] = CacheEntry(value="test1", timestamp=time.time())
storage._cache["key2"] = CacheEntry(value="test2", timestamp=time.time())
storage.invalidate_cache("key1")
assert "key1" not in storage._cache
assert "key2" in storage._cache
def test_get_cache_stats(self, tmp_path: Path):
"""Test getting cache statistics."""
storage = ConcurrentStorage(tmp_path, cache_ttl=60.0)
# Add fresh entry
storage._cache["fresh"] = CacheEntry(value="test", timestamp=time.time())
# Add expired entry
storage._cache["expired"] = CacheEntry(value="test", timestamp=time.time() - 120)
stats = storage.get_cache_stats()
assert stats["total_entries"] == 2
assert stats["expired_entries"] == 1
assert stats["valid_entries"] == 1
class TestConcurrentStorageSyncAPI:
"""Test ConcurrentStorage synchronous API for backward compatibility."""
def test_save_run_sync(self, tmp_path: Path):
"""Test synchronous save."""
storage = ConcurrentStorage(tmp_path)
run = create_test_run()
storage.save_run_sync(run)
# Verify saved
loaded = storage.load_run_sync(run.id)
assert loaded is not None
assert loaded.id == run.id
def test_load_run_sync(self, tmp_path: Path):
"""Test synchronous load."""
storage = ConcurrentStorage(tmp_path)
run = create_test_run()
storage.save_run_sync(run)
loaded = storage.load_run_sync(run.id)
assert loaded is not None
def test_load_run_sync_nonexistent(self, tmp_path: Path):
"""Synchronous load of nonexistent run returns None."""
storage = ConcurrentStorage(tmp_path)
loaded = storage.load_run_sync("nonexistent")
assert loaded is None
class TestConcurrentStorageStats:
"""Test ConcurrentStorage statistics."""
@pytest.mark.asyncio
async def test_get_stats(self, tmp_path: Path):
"""Test getting async storage stats."""
storage = ConcurrentStorage(tmp_path)
await storage.start()
try:
await storage.save_run(create_test_run(), immediate=True)
stats = await storage.get_stats()
assert stats["total_runs"] == 1
assert "cache" in stats
assert "pending_writes" in stats
assert stats["running"] is True
finally:
await storage.stop()
+31
View File
@@ -0,0 +1,31 @@
## Summary
Add Cursor IDE support for existing Claude Code skills and MCP servers.
## Changes
- Created `.cursor/skills/` directory with symlinks to all 5 existing skills:
- `agent-workflow`
- `building-agents-core`
- `building-agents-construction`
- `building-agents-patterns`
- `testing-agent`
- Added `.cursor/mcp.json` with MCP server configuration (same as `.mcp.json`)
## Why symlinks for skills?
- Single source of truth - updates to `.claude/skills/` are reflected in both IDEs
- No duplication or sync issues
- Cursor automatically loads skills from `.cursor/skills/`, `.claude/skills/`, and `.codex/skills/`
## MCP Configuration
Cursor requires `.cursor/mcp.json` for project-level MCP servers. This enables:
- `agent-builder` - Agent building MCP server
- `tools` - Hive tools MCP server
## Setup in Cursor
1. **Enable MCP**: Open Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`) and run `MCP: Enable`
2. **Restart Cursor** to load the MCP servers from `.cursor/mcp.json`
3. **Skills**: Type `/` in Agent chat and search for the skill name
+6 -19
View File
@@ -173,12 +173,13 @@ async function autoCloseDuplicates(): Promise<void> {
`[DEBUG] Issue #${issue.number} has ${comments.length} comments`
);
const dupeComments = comments.filter(
(comment) =>
comment.body.includes("Found") &&
comment.body.includes("possible duplicate") &&
const dupeComments = comments.filter((comment) => {
const bodyLower = comment.body.toLowerCase();
return (
bodyLower.includes("possible duplicate") &&
comment.user.type === "Bot"
);
);
});
console.log(
`[DEBUG] Issue #${issue.number} has ${dupeComments.length} duplicate detection comments`
);
@@ -212,20 +213,6 @@ async function autoCloseDuplicates(): Promise<void> {
)} hours)`
);
const commentsAfterDupe = comments.filter(
(comment) => new Date(comment.created_at) > dupeCommentDate
);
console.log(
`[DEBUG] Issue #${issue.number} - ${commentsAfterDupe.length} comments after duplicate detection`
);
if (commentsAfterDupe.length > 0) {
console.log(
`[DEBUG] Issue #${issue.number} - has activity after duplicate comment, skipping`
);
continue;
}
console.log(
`[DEBUG] Issue #${issue.number} - checking reactions on duplicate comment...`
);
@@ -9,7 +9,7 @@ Respect robots.txt by default for ethical scraping.
from __future__ import annotations
from typing import Any
from urllib.parse import urlparse
from urllib.parse import urljoin, urlparse
from urllib.robotparser import RobotFileParser
import httpx
@@ -137,10 +137,7 @@ def register_tools(mcp: FastMCP) -> None:
}
# Validate max_length
if max_length < 1000:
max_length = 1000
elif max_length > 500000:
max_length = 500000
max_length = max(1000, min(max_length, 500000))
# Make request
response = httpx.get(
@@ -157,8 +154,7 @@ def register_tools(mcp: FastMCP) -> None:
if response.status_code != 200:
return {"error": f"HTTP {response.status_code}: Failed to fetch URL"}
# --- START FIX: Validate Content-Type ---
# Added validation to prevent parsing non-HTML content (like JSON, PDF, Images)
# Check content type
content_type = response.headers.get("content-type", "").lower()
if not any(t in content_type for t in ["text/html", "application/xhtml+xml"]):
return {
@@ -166,7 +162,6 @@ def register_tools(mcp: FastMCP) -> None:
"url": url,
"skipped": True,
}
# --- END FIX ---
# Parse HTML
soup = BeautifulSoup(response.text, "html.parser")
@@ -178,10 +173,7 @@ def register_tools(mcp: FastMCP) -> None:
tag.decompose()
# Get title and description
title = ""
title_tag = soup.find("title")
if title_tag:
title = title_tag.get_text(strip=True)
title = soup.title.get_text(strip=True) if soup.title else ""
description = ""
meta_desc = soup.find("meta", attrs={"name": "description"})
@@ -224,11 +216,14 @@ def register_tools(mcp: FastMCP) -> None:
# Extract links if requested
if include_links:
links: list[dict[str, str]] = []
base_url = str(response.url) # Use final URL after redirects
for a in soup.find_all("a", href=True)[:50]:
href = a["href"]
# Convert relative URLs to absolute URLs
absolute_href = urljoin(base_url, href)
link_text = a.get_text(strip=True)
if link_text and href:
links.append({"text": link_text, "href": href})
if link_text and absolute_href:
links.append({"text": link_text, "href": absolute_href})
result["links"] = links
return result
+50 -18
View File
@@ -318,8 +318,8 @@ class TestCredentialSpecs:
assert spec.tools == []
assert "llm_generate" in spec.node_types
assert "llm_tool_use" in spec.node_types
assert spec.required is True
assert spec.startup_required is True
assert spec.required is False
assert spec.startup_required is False
assert "anthropic.com" in spec.help_url
@@ -328,22 +328,38 @@ class TestNodeTypeValidation:
def test_get_missing_for_node_types_returns_missing(self, monkeypatch, tmp_path):
"""get_missing_for_node_types() returns missing credentials."""
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("REQUIRED_KEY", raising=False)
creds = CredentialManager(dotenv_path=tmp_path / ".env")
missing = creds.get_missing_for_node_types(["llm_generate", "llm_tool_use"])
custom_specs = {
"required_cred": CredentialSpec(
env_var="REQUIRED_KEY",
node_types=["required_node"],
required=True,
)
}
creds = CredentialManager(specs=custom_specs, dotenv_path=tmp_path / ".env")
missing = creds.get_missing_for_node_types(["required_node"])
assert len(missing) == 1
cred_name, spec = missing[0]
assert cred_name == "anthropic"
assert spec.env_var == "ANTHROPIC_API_KEY"
assert cred_name == "required_cred"
assert spec.env_var == "REQUIRED_KEY"
def test_get_missing_for_node_types_returns_empty_when_present(self, monkeypatch):
"""get_missing_for_node_types() returns empty when credentials present."""
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
monkeypatch.setenv("REQUIRED_KEY", "test-key")
creds = CredentialManager()
missing = creds.get_missing_for_node_types(["llm_generate", "llm_tool_use"])
custom_specs = {
"required_cred": CredentialSpec(
env_var="REQUIRED_KEY",
node_types=["required_node"],
required=True,
)
}
creds = CredentialManager(specs=custom_specs)
missing = creds.get_missing_for_node_types(["required_node"])
assert missing == []
@@ -358,16 +374,24 @@ class TestNodeTypeValidation:
def test_validate_for_node_types_raises_for_missing(self, monkeypatch, tmp_path):
"""validate_for_node_types() raises CredentialError when missing."""
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("REQUIRED_KEY", raising=False)
creds = CredentialManager(dotenv_path=tmp_path / ".env")
custom_specs = {
"required_cred": CredentialSpec(
env_var="REQUIRED_KEY",
node_types=["required_node"],
required=True,
)
}
creds = CredentialManager(specs=custom_specs, dotenv_path=tmp_path / ".env")
with pytest.raises(CredentialError) as exc_info:
creds.validate_for_node_types(["llm_generate"])
creds.validate_for_node_types(["required_node"])
error_msg = str(exc_info.value)
assert "ANTHROPIC_API_KEY" in error_msg
assert "llm_generate" in error_msg
assert "REQUIRED_KEY" in error_msg
assert "required_node" in error_msg
def test_validate_for_node_types_passes_when_present(self, monkeypatch):
"""validate_for_node_types() passes when credentials present."""
@@ -384,15 +408,23 @@ class TestStartupValidation:
def test_validate_startup_raises_for_missing(self, monkeypatch, tmp_path):
"""validate_startup() raises CredentialError when startup creds missing."""
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("STARTUP_KEY", raising=False)
creds = CredentialManager(dotenv_path=tmp_path / ".env")
custom_specs = {
"startup_cred": CredentialSpec(
env_var="STARTUP_KEY",
startup_required=True,
required=True,
)
}
creds = CredentialManager(specs=custom_specs, dotenv_path=tmp_path / ".env")
with pytest.raises(CredentialError) as exc_info:
creds.validate_startup()
error_msg = str(exc_info.value)
assert "ANTHROPIC_API_KEY" in error_msg
assert "STARTUP_KEY" in error_msg
assert "Server startup failed" in error_msg
def test_validate_startup_passes_when_present(self, monkeypatch):
+4
View File
@@ -1,5 +1,6 @@
"""Tests for csv_tool - Read and manipulate CSV files."""
import importlib.util
from pathlib import Path
from unittest.mock import patch
@@ -8,6 +9,8 @@ from fastmcp import FastMCP
from aden_tools.tools.csv_tool.csv_tool import register_tools
duckdb_available = importlib.util.find_spec("duckdb") is not None
# Test IDs for sandbox
TEST_WORKSPACE_ID = "test-workspace"
TEST_AGENT_ID = "test-agent"
@@ -620,6 +623,7 @@ class TestCsvInfo:
assert ".csv" in result["error"].lower()
@pytest.mark.skipif(not duckdb_available, reason="duckdb not installed")
class TestCsvSql:
"""Tests for csv_sql function (requires duckdb)."""
+194 -7
View File
@@ -1,5 +1,7 @@
"""Tests for web_scrape tool (FastMCP)."""
from unittest.mock import MagicMock, patch
import pytest
from fastmcp import FastMCP
@@ -52,11 +54,196 @@ class TestWebScrapeTool:
result = web_scrape_fn(url="https://example.com", selector=".content")
assert isinstance(result, dict)
def test_non_html_content_rejected(self, web_scrape_fn):
"""Ensure non-HTML content types (like JSON) are rejected."""
# GitHub's Zen API returns text/plain, not html
result = web_scrape_fn(url="https://api.github.com/zen")
# We expect an error about skipping non-HTML
assert "error" in result
assert "Skipping non-HTML content" in result["error"]
class TestWebScrapeToolLinkConversion:
"""Tests for link URL conversion (relative to absolute)."""
def _mock_response(self, html_content, final_url="https://example.com/page"):
"""Create a mock httpx response object."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.text = html_content
mock_response.url = final_url
return mock_response
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_relative_links_converted_to_absolute(self, mock_get, web_scrape_fn):
"""Relative URLs like ../page are converted to absolute URLs."""
html = """
<html>
<body>
<a href="../home">Home</a>
<a href="page.html">Next Page</a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html, "https://example.com/blog/post")
result = web_scrape_fn(url="https://example.com/blog/post", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Verify relative URLs are converted to absolute
assert "Home" in hrefs
assert hrefs["Home"] == "https://example.com/home", f"Got {hrefs['Home']}"
assert "Next Page" in hrefs
expected = "https://example.com/blog/page.html"
assert hrefs["Next Page"] == expected, f"Got {hrefs['Next Page']}"
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_root_relative_links_converted(self, mock_get, web_scrape_fn):
"""Root-relative URLs like /about are converted to absolute URLs."""
html = """
<html>
<body>
<a href="/about">About</a>
<a href="/contact">Contact</a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html, "https://example.com/blog/post")
result = web_scrape_fn(url="https://example.com/blog/post", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Root-relative URLs should resolve to domain root
assert hrefs["About"] == "https://example.com/about"
assert hrefs["Contact"] == "https://example.com/contact"
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_absolute_links_unchanged(self, mock_get, web_scrape_fn):
"""Absolute URLs remain unchanged."""
html = """
<html>
<body>
<a href="https://other.com">Other Site</a>
<a href="https://example.com/page">Internal</a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html)
result = web_scrape_fn(url="https://example.com", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Absolute URLs should remain unchanged
assert hrefs["Other Site"] == "https://other.com"
assert hrefs["Internal"] == "https://example.com/page"
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_links_after_redirects(self, mock_get, web_scrape_fn):
"""Links are resolved relative to final URL after redirects."""
html = """
<html>
<body>
<a href="../prev">Previous</a>
<a href="next">Next</a>
</body>
</html>
"""
# Mock redirect: request to /old/url redirects to /new/location
mock_get.return_value = self._mock_response(
html,
final_url="https://example.com/new/location", # Final URL after redirect
)
result = web_scrape_fn(url="https://example.com/old/url", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Links should be resolved relative to FINAL URL, not requested URL
assert hrefs["Previous"] == "https://example.com/prev", (
"Links should resolve relative to final URL after redirects"
)
assert hrefs["Next"] == "https://example.com/new/next"
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_fragment_links_preserved(self, mock_get, web_scrape_fn):
"""Fragment links (anchors) are preserved."""
html = """
<html>
<body>
<a href="#section1">Section 1</a>
<a href="/page#section2">Page Section 2</a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html, "https://example.com/page")
result = web_scrape_fn(url="https://example.com/page", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Fragment links should be converted correctly
assert hrefs["Section 1"] == "https://example.com/page#section1"
assert hrefs["Page Section 2"] == "https://example.com/page#section2"
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_query_parameters_preserved(self, mock_get, web_scrape_fn):
"""Query parameters in URLs are preserved."""
html = """
<html>
<body>
<a href="page?id=123">View Item</a>
<a href="/search?q=test&sort=date">Search</a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html, "https://example.com/blog/post")
result = web_scrape_fn(url="https://example.com/blog/post", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
hrefs = {link["text"]: link["href"] for link in links}
# Query parameters should be preserved
assert "id=123" in hrefs["View Item"]
assert "q=test" in hrefs["Search"]
assert "sort=date" in hrefs["Search"]
@patch("aden_tools.tools.web_scrape_tool.web_scrape_tool.httpx.get")
def test_empty_href_skipped(self, mock_get, web_scrape_fn):
"""Links with empty or whitespace text are skipped."""
html = """
<html>
<body>
<a href="/valid">Valid Link</a>
<a href="/empty"></a>
<a href="/whitespace"> </a>
</body>
</html>
"""
mock_get.return_value = self._mock_response(html)
result = web_scrape_fn(url="https://example.com", include_links=True)
assert "error" not in result
assert "links" in result
links = result["links"]
texts = [link["text"] for link in links]
# Only valid links should be included
assert "Valid Link" in texts
# Empty and whitespace-only text should be filtered
assert "" not in texts
assert len([t for t in texts if not t.strip()]) == 0