Files
hive/core/framework/testing/test_case.py
T
2026-01-20 16:28:21 -08:00

151 lines
4.8 KiB
Python

"""
Test case schema with approval tracking.
Tests are generated by LLM from Goal success_criteria and constraints,
but require mandatory user approval before being stored.
"""
from datetime import datetime
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
class ApprovalStatus(str, Enum):
"""Status of user approval for a generated test."""
PENDING = "pending" # Awaiting user review
APPROVED = "approved" # User accepted as-is
MODIFIED = "modified" # User edited before accepting
REJECTED = "rejected" # User declined (with reason)
class TestType(str, Enum):
"""Type of test based on what it validates."""
CONSTRAINT = "constraint" # Validates constraint boundaries
SUCCESS_CRITERIA = "outcome" # Validates success criteria achievement
EDGE_CASE = "edge_case" # Validates edge case handling
class Test(BaseModel):
"""
A test case generated from Goal success_criteria or constraints.
Tests are either:
- Generated by LLM during Goal stage (constraints) or Eval stage (success criteria)
- Created manually by human engineers
All tests require approval before being added to the test suite.
"""
id: str
goal_id: str
parent_criteria_id: str = Field(
description="Links to success_criteria.id or constraint.id"
)
test_type: TestType
# Test definition
test_name: str = Field(
description="Descriptive function name, e.g., test_constraint_api_limits_respected"
)
test_code: str = Field(
description="Python test function code (pytest compatible)"
)
description: str = Field(
description="Human-readable description of what the test validates"
)
input: dict[str, Any] = Field(
default_factory=dict,
description="Test input data"
)
expected_output: dict[str, Any] = Field(
default_factory=dict,
description="Expected output or assertions"
)
# LLM generation metadata
generated_by: str = Field(
default="llm",
description="Who created the test: 'llm' or 'human'"
)
llm_confidence: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="LLM's confidence in the test quality (0-1)"
)
# Approval tracking (CRITICAL - tests are never used without approval)
approval_status: ApprovalStatus = ApprovalStatus.PENDING
approved_by: str | None = None
approved_at: datetime | None = None
rejection_reason: str | None = Field(
default=None,
description="Reason for rejection if status is REJECTED"
)
original_code: str | None = Field(
default=None,
description="Original LLM-generated code if user modified it"
)
# Execution tracking
last_run: datetime | None = None
last_result: str | None = Field(
default=None,
description="Result of last run: 'passed', 'failed', 'error'"
)
run_count: int = 0
pass_count: int = 0
fail_count: int = 0
# Timestamps
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
model_config = {"extra": "allow"}
def approve(self, approved_by: str = "user") -> None:
"""Mark test as approved."""
self.approval_status = ApprovalStatus.APPROVED
self.approved_by = approved_by
self.approved_at = datetime.now()
self.updated_at = datetime.now()
def modify(self, new_code: str, approved_by: str = "user") -> None:
"""Approve test with modifications."""
self.original_code = self.test_code
self.test_code = new_code
self.approval_status = ApprovalStatus.MODIFIED
self.approved_by = approved_by
self.approved_at = datetime.now()
self.updated_at = datetime.now()
def reject(self, reason: str) -> None:
"""Reject the test with a reason."""
self.approval_status = ApprovalStatus.REJECTED
self.rejection_reason = reason
self.updated_at = datetime.now()
def record_result(self, passed: bool) -> None:
"""Record a test run result."""
self.last_run = datetime.now()
self.last_result = "passed" if passed else "failed"
self.run_count += 1
if passed:
self.pass_count += 1
else:
self.fail_count += 1
self.updated_at = datetime.now()
@property
def is_approved(self) -> bool:
"""Check if test has been approved (approved or modified)."""
return self.approval_status in (ApprovalStatus.APPROVED, ApprovalStatus.MODIFIED)
@property
def pass_rate(self) -> float | None:
"""Calculate pass rate if test has been run."""
if self.run_count == 0:
return None
return self.pass_count / self.run_count