613 lines
20 KiB
Python
613 lines
20 KiB
Python
"""
|
|
Unit tests for the goal-based testing framework.
|
|
|
|
Tests cover:
|
|
- Schema validation
|
|
- Storage CRUD operations
|
|
- Error categorization heuristics
|
|
- Parallel runner grouping logic
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
from framework.testing.test_case import (
|
|
Test,
|
|
TestType,
|
|
ApprovalStatus,
|
|
)
|
|
from framework.testing.test_result import (
|
|
TestResult,
|
|
TestSuiteResult,
|
|
ErrorCategory,
|
|
)
|
|
from framework.testing.test_storage import TestStorage
|
|
from framework.testing.categorizer import ErrorCategorizer
|
|
from framework.testing.parallel import ParallelTestRunner, ParallelConfig
|
|
from framework.testing.debug_tool import DebugTool
|
|
|
|
|
|
# ============================================================================
|
|
# Test Schema Tests
|
|
# ============================================================================
|
|
|
|
class TestTestCaseSchema:
|
|
"""Tests for Test schema."""
|
|
|
|
def test_create_test(self):
|
|
"""Test creating a basic test."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_api_limits",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_constraint_api_limits",
|
|
test_code="def test_constraint_api_limits(agent): pass",
|
|
description="Tests API rate limits",
|
|
input={"topic": "test"},
|
|
expected_output={"count": 5},
|
|
)
|
|
|
|
assert test.id == "test_001"
|
|
assert test.goal_id == "goal_001"
|
|
assert test.test_type == TestType.CONSTRAINT
|
|
assert test.approval_status == ApprovalStatus.PENDING
|
|
assert not test.is_approved
|
|
|
|
def test_approve_test(self):
|
|
"""Test approving a test."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
|
|
test.approve("test_user")
|
|
|
|
assert test.approval_status == ApprovalStatus.APPROVED
|
|
assert test.approved_by == "test_user"
|
|
assert test.approved_at is not None
|
|
assert test.is_approved
|
|
|
|
def test_modify_test(self):
|
|
"""Test modifying a test before approval."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="original code",
|
|
description="test",
|
|
)
|
|
|
|
test.modify("modified code", "test_user")
|
|
|
|
assert test.approval_status == ApprovalStatus.MODIFIED
|
|
assert test.original_code == "original code"
|
|
assert test.test_code == "modified code"
|
|
assert test.is_approved
|
|
|
|
def test_reject_test(self):
|
|
"""Test rejecting a test."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
|
|
test.reject("Not a valid test case")
|
|
|
|
assert test.approval_status == ApprovalStatus.REJECTED
|
|
assert test.rejection_reason == "Not a valid test case"
|
|
assert not test.is_approved
|
|
|
|
def test_record_result(self):
|
|
"""Test recording test results."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
|
|
test.record_result(passed=True)
|
|
assert test.last_result == "passed"
|
|
assert test.run_count == 1
|
|
assert test.pass_count == 1
|
|
assert test.pass_rate == 1.0
|
|
|
|
test.record_result(passed=False)
|
|
assert test.last_result == "failed"
|
|
assert test.run_count == 2
|
|
assert test.pass_count == 1
|
|
assert test.fail_count == 1
|
|
assert test.pass_rate == 0.5
|
|
|
|
|
|
class TestTestResultSchema:
|
|
"""Tests for TestResult schema."""
|
|
|
|
def test_create_passed_result(self):
|
|
"""Test creating a passed result."""
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=True,
|
|
duration_ms=100,
|
|
actual_output={"status": "ok"},
|
|
expected_output={"status": "ok"},
|
|
)
|
|
|
|
assert result.passed
|
|
assert result.duration_ms == 100
|
|
assert result.error_category is None
|
|
|
|
def test_create_failed_result(self):
|
|
"""Test creating a failed result."""
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=False,
|
|
duration_ms=50,
|
|
error_message="Assertion failed",
|
|
error_category=ErrorCategory.IMPLEMENTATION_ERROR,
|
|
stack_trace="Traceback...",
|
|
)
|
|
|
|
assert not result.passed
|
|
assert result.error_category == ErrorCategory.IMPLEMENTATION_ERROR
|
|
|
|
def test_summary_dict(self):
|
|
"""Test summary dict generation."""
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=False,
|
|
duration_ms=50,
|
|
error_message="Very long error " * 20,
|
|
error_category=ErrorCategory.LOGIC_ERROR,
|
|
)
|
|
|
|
summary = result.summary_dict()
|
|
assert summary["test_id"] == "test_001"
|
|
assert summary["passed"] is False
|
|
assert summary["error_category"] == "logic_error"
|
|
assert len(summary["error_message"]) == 100 # Truncated
|
|
|
|
|
|
class TestTestSuiteResult:
|
|
"""Tests for TestSuiteResult schema."""
|
|
|
|
def test_suite_result_properties(self):
|
|
"""Test suite result calculation properties."""
|
|
results = [
|
|
TestResult(test_id="t1", passed=True, duration_ms=100),
|
|
TestResult(test_id="t2", passed=True, duration_ms=50),
|
|
TestResult(test_id="t3", passed=False, duration_ms=75,
|
|
error_category=ErrorCategory.IMPLEMENTATION_ERROR),
|
|
]
|
|
|
|
suite = TestSuiteResult(
|
|
goal_id="goal_001",
|
|
total=3,
|
|
passed=2,
|
|
failed=1,
|
|
results=results,
|
|
duration_ms=225,
|
|
)
|
|
|
|
assert not suite.all_passed
|
|
assert suite.pass_rate == pytest.approx(2/3)
|
|
assert len(suite.get_failed_results()) == 1
|
|
|
|
def test_get_results_by_category(self):
|
|
"""Test filtering results by error category."""
|
|
results = [
|
|
TestResult(test_id="t1", passed=False, duration_ms=100,
|
|
error_category=ErrorCategory.LOGIC_ERROR),
|
|
TestResult(test_id="t2", passed=False, duration_ms=50,
|
|
error_category=ErrorCategory.IMPLEMENTATION_ERROR),
|
|
TestResult(test_id="t3", passed=False, duration_ms=75,
|
|
error_category=ErrorCategory.IMPLEMENTATION_ERROR),
|
|
]
|
|
|
|
suite = TestSuiteResult(
|
|
goal_id="goal_001",
|
|
total=3,
|
|
passed=0,
|
|
failed=3,
|
|
results=results,
|
|
)
|
|
|
|
impl_errors = suite.get_results_by_category(ErrorCategory.IMPLEMENTATION_ERROR)
|
|
assert len(impl_errors) == 2
|
|
|
|
|
|
# ============================================================================
|
|
# Storage Tests
|
|
# ============================================================================
|
|
|
|
class TestTestStorage:
|
|
"""Tests for TestStorage."""
|
|
|
|
@pytest.fixture
|
|
def storage(self, tmp_path):
|
|
"""Create a temporary storage instance."""
|
|
return TestStorage(tmp_path)
|
|
|
|
def test_save_and_load_test(self, storage):
|
|
"""Test saving and loading a test."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="def test_something(agent): pass",
|
|
description="A test",
|
|
)
|
|
|
|
storage.save_test(test)
|
|
|
|
loaded = storage.load_test("goal_001", "test_001")
|
|
assert loaded is not None
|
|
assert loaded.id == "test_001"
|
|
assert loaded.test_name == "test_something"
|
|
|
|
def test_delete_test(self, storage):
|
|
"""Test deleting a test."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
|
|
storage.save_test(test)
|
|
assert storage.load_test("goal_001", "test_001") is not None
|
|
|
|
storage.delete_test("goal_001", "test_001")
|
|
assert storage.load_test("goal_001", "test_001") is None
|
|
|
|
def test_get_tests_by_goal(self, storage):
|
|
"""Test querying tests by goal."""
|
|
for i in range(3):
|
|
test = Test(
|
|
id=f"test_{i}",
|
|
goal_id="goal_001",
|
|
parent_criteria_id=f"constraint_{i}",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name=f"test_{i}",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
storage.save_test(test)
|
|
|
|
tests = storage.get_tests_by_goal("goal_001")
|
|
assert len(tests) == 3
|
|
|
|
def test_get_approved_tests(self, storage):
|
|
"""Test querying approved tests."""
|
|
# Create tests with different approval statuses
|
|
test1 = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="c1",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_1",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
test1.approve()
|
|
storage.save_test(test1)
|
|
|
|
test2 = Test(
|
|
id="test_002",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="c2",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_2",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
# Leave pending
|
|
storage.save_test(test2)
|
|
|
|
test3 = Test(
|
|
id="test_003",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="c3",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_3",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
test3.modify("modified", "user")
|
|
storage.save_test(test3)
|
|
|
|
approved = storage.get_approved_tests("goal_001")
|
|
assert len(approved) == 2 # approved and modified
|
|
|
|
def test_save_and_load_result(self, storage):
|
|
"""Test saving and loading test results."""
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=True,
|
|
duration_ms=100,
|
|
)
|
|
|
|
storage.save_result("test_001", result)
|
|
|
|
loaded = storage.get_latest_result("test_001")
|
|
assert loaded is not None
|
|
assert loaded.passed is True
|
|
assert loaded.duration_ms == 100
|
|
|
|
def test_result_history(self, storage):
|
|
"""Test getting result history."""
|
|
# Save multiple results
|
|
for i in range(5):
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=(i % 2 == 0),
|
|
duration_ms=100 + i,
|
|
)
|
|
storage.save_result("test_001", result)
|
|
|
|
history = storage.get_result_history("test_001", limit=3)
|
|
assert len(history) <= 3
|
|
|
|
def test_get_stats(self, storage):
|
|
"""Test getting storage statistics."""
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="c1",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_1",
|
|
test_code="pass",
|
|
description="test",
|
|
)
|
|
test.approve()
|
|
storage.save_test(test)
|
|
|
|
stats = storage.get_stats()
|
|
assert stats["total_tests"] == 1
|
|
assert stats["by_approval"]["approved"] == 1
|
|
|
|
|
|
# ============================================================================
|
|
# Error Categorizer Tests
|
|
# ============================================================================
|
|
|
|
class TestErrorCategorizer:
|
|
"""Tests for ErrorCategorizer."""
|
|
|
|
@pytest.fixture
|
|
def categorizer(self):
|
|
return ErrorCategorizer()
|
|
|
|
def test_categorize_passed(self, categorizer):
|
|
"""Test that passed results return None."""
|
|
result = TestResult(test_id="t1", passed=True, duration_ms=100)
|
|
assert categorizer.categorize(result) is None
|
|
|
|
def test_categorize_logic_error(self, categorizer):
|
|
"""Test categorization of logic errors."""
|
|
result = TestResult(
|
|
test_id="t1",
|
|
passed=False,
|
|
duration_ms=100,
|
|
error_message="goal not achieved: expected success criteria was not met",
|
|
)
|
|
assert categorizer.categorize(result) == ErrorCategory.LOGIC_ERROR
|
|
|
|
def test_categorize_implementation_error(self, categorizer):
|
|
"""Test categorization of implementation errors."""
|
|
result = TestResult(
|
|
test_id="t1",
|
|
passed=False,
|
|
duration_ms=100,
|
|
error_message="TypeError: 'NoneType' object has no attribute 'get'",
|
|
)
|
|
assert categorizer.categorize(result) == ErrorCategory.IMPLEMENTATION_ERROR
|
|
|
|
def test_categorize_edge_case(self, categorizer):
|
|
"""Test categorization of edge cases."""
|
|
result = TestResult(
|
|
test_id="t1",
|
|
passed=False,
|
|
duration_ms=100,
|
|
error_message="timeout: request took longer than expected",
|
|
)
|
|
assert categorizer.categorize(result) == ErrorCategory.EDGE_CASE
|
|
|
|
def test_categorize_from_stack_trace(self, categorizer):
|
|
"""Test categorization from stack trace."""
|
|
result = TestResult(
|
|
test_id="t1",
|
|
passed=False,
|
|
duration_ms=100,
|
|
error_message="Error occurred",
|
|
stack_trace="KeyError: 'missing_key'\n at line 42",
|
|
)
|
|
assert categorizer.categorize(result) == ErrorCategory.IMPLEMENTATION_ERROR
|
|
|
|
def test_get_fix_suggestion(self, categorizer):
|
|
"""Test fix suggestions for each category."""
|
|
assert "Goal" in categorizer.get_fix_suggestion(ErrorCategory.LOGIC_ERROR)
|
|
assert "code" in categorizer.get_fix_suggestion(ErrorCategory.IMPLEMENTATION_ERROR).lower()
|
|
assert "test" in categorizer.get_fix_suggestion(ErrorCategory.EDGE_CASE).lower()
|
|
|
|
def test_get_iteration_guidance(self, categorizer):
|
|
"""Test iteration guidance."""
|
|
guidance = categorizer.get_iteration_guidance(ErrorCategory.LOGIC_ERROR)
|
|
assert guidance["stage"] == "Goal"
|
|
assert guidance["restart_required"] is True
|
|
|
|
guidance = categorizer.get_iteration_guidance(ErrorCategory.IMPLEMENTATION_ERROR)
|
|
assert guidance["stage"] == "Agent"
|
|
assert guidance["restart_required"] is False
|
|
|
|
|
|
# ============================================================================
|
|
# Parallel Runner Tests
|
|
# ============================================================================
|
|
|
|
class TestParallelRunner:
|
|
"""Tests for ParallelTestRunner."""
|
|
|
|
@pytest.fixture
|
|
def runner(self, tmp_path):
|
|
"""Create a test runner with temporary storage."""
|
|
storage = TestStorage(tmp_path)
|
|
config = ParallelConfig(num_workers=1) # Sequential for testing
|
|
return ParallelTestRunner(config, storage)
|
|
|
|
def test_create_suite_result(self, runner):
|
|
"""Test creating suite result from individual results."""
|
|
results = [
|
|
TestResult(test_id="t1", passed=True, duration_ms=100),
|
|
TestResult(test_id="t2", passed=False, duration_ms=50),
|
|
]
|
|
|
|
suite = runner._create_suite_result("goal_001", results)
|
|
|
|
assert suite.goal_id == "goal_001"
|
|
assert suite.total == 2
|
|
assert suite.passed == 1
|
|
assert suite.failed == 1
|
|
assert suite.duration_ms == 150
|
|
|
|
|
|
# ============================================================================
|
|
# Debug Tool Tests
|
|
# ============================================================================
|
|
|
|
class TestDebugTool:
|
|
"""Tests for DebugTool."""
|
|
|
|
@pytest.fixture
|
|
def debug_tool(self, tmp_path):
|
|
"""Create a debug tool with temporary storage."""
|
|
storage = TestStorage(tmp_path)
|
|
return DebugTool(storage)
|
|
|
|
def test_analyze_missing_test(self, debug_tool):
|
|
"""Test analyzing a non-existent test."""
|
|
info = debug_tool.analyze("goal_001", "nonexistent")
|
|
|
|
assert info.test_id == "nonexistent"
|
|
assert "not found" in info.error_message.lower()
|
|
|
|
def test_analyze_with_result(self, debug_tool, tmp_path):
|
|
"""Test analyzing a test with result."""
|
|
storage = TestStorage(tmp_path)
|
|
|
|
# Create and save test
|
|
test = Test(
|
|
id="test_001",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="c1",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name="test_something",
|
|
test_code="pass",
|
|
description="A test",
|
|
input={"key": "value"},
|
|
expected_output={"result": "expected"},
|
|
)
|
|
storage.save_test(test)
|
|
|
|
# Create and save result
|
|
result = TestResult(
|
|
test_id="test_001",
|
|
passed=False,
|
|
duration_ms=100,
|
|
error_message="TypeError: something went wrong",
|
|
error_category=ErrorCategory.IMPLEMENTATION_ERROR,
|
|
)
|
|
storage.save_result("test_001", result)
|
|
|
|
# Create new debug tool with same storage
|
|
debug_tool = DebugTool(storage)
|
|
|
|
info = debug_tool.analyze("goal_001", "test_001")
|
|
|
|
assert info.test_id == "test_001"
|
|
assert info.test_name == "test_something"
|
|
assert not info.passed
|
|
assert info.error_category == "implementation_error"
|
|
assert info.suggested_fix is not None
|
|
|
|
|
|
# ============================================================================
|
|
# Integration Tests
|
|
# ============================================================================
|
|
|
|
class TestIntegration:
|
|
"""Integration tests for the testing framework."""
|
|
|
|
def test_full_workflow(self, tmp_path):
|
|
"""Test a simplified full workflow."""
|
|
storage = TestStorage(tmp_path)
|
|
|
|
# 1. Create tests (simulating generation)
|
|
tests = []
|
|
for i in range(3):
|
|
test = Test(
|
|
id=f"test_{i}",
|
|
goal_id="goal_001",
|
|
parent_criteria_id="constraint_001",
|
|
test_type=TestType.CONSTRAINT,
|
|
test_name=f"test_constraint_{i}",
|
|
test_code=f"def test_constraint_{i}(agent): assert True",
|
|
description=f"Test {i}",
|
|
)
|
|
tests.append(test)
|
|
|
|
# 2. Approve tests
|
|
for test in tests:
|
|
test.approve("user")
|
|
storage.save_test(test)
|
|
|
|
# 3. Verify storage
|
|
approved = storage.get_approved_tests("goal_001")
|
|
assert len(approved) == 3
|
|
|
|
# 4. Simulate running tests
|
|
config = ParallelConfig(num_workers=1)
|
|
runner = ParallelTestRunner(config, storage)
|
|
|
|
class MockAgent:
|
|
def run(self, input):
|
|
return {"success": True}
|
|
|
|
results = runner.run_tests(approved, MockAgent())
|
|
assert len(results) == 3
|
|
|
|
# 5. Save results
|
|
for result in results:
|
|
storage.save_result(result.test_id, result)
|
|
|
|
# 6. Check stats
|
|
stats = storage.get_stats()
|
|
assert stats["total_tests"] == 3
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|