151 lines
4.8 KiB
Python
151 lines
4.8 KiB
Python
"""
|
|
Test case schema with approval tracking.
|
|
|
|
Tests are generated by LLM from Goal success_criteria and constraints,
|
|
but require mandatory user approval before being stored.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class ApprovalStatus(str, Enum):
|
|
"""Status of user approval for a generated test."""
|
|
PENDING = "pending" # Awaiting user review
|
|
APPROVED = "approved" # User accepted as-is
|
|
MODIFIED = "modified" # User edited before accepting
|
|
REJECTED = "rejected" # User declined (with reason)
|
|
|
|
|
|
class TestType(str, Enum):
|
|
"""Type of test based on what it validates."""
|
|
CONSTRAINT = "constraint" # Validates constraint boundaries
|
|
SUCCESS_CRITERIA = "outcome" # Validates success criteria achievement
|
|
EDGE_CASE = "edge_case" # Validates edge case handling
|
|
|
|
|
|
class Test(BaseModel):
|
|
"""
|
|
A test case generated from Goal success_criteria or constraints.
|
|
|
|
Tests are either:
|
|
- Generated by LLM during Goal stage (constraints) or Eval stage (success criteria)
|
|
- Created manually by human engineers
|
|
|
|
All tests require approval before being added to the test suite.
|
|
"""
|
|
id: str
|
|
goal_id: str
|
|
parent_criteria_id: str = Field(
|
|
description="Links to success_criteria.id or constraint.id"
|
|
)
|
|
test_type: TestType
|
|
|
|
# Test definition
|
|
test_name: str = Field(
|
|
description="Descriptive function name, e.g., test_constraint_api_limits_respected"
|
|
)
|
|
test_code: str = Field(
|
|
description="Python test function code (pytest compatible)"
|
|
)
|
|
description: str = Field(
|
|
description="Human-readable description of what the test validates"
|
|
)
|
|
input: dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
description="Test input data"
|
|
)
|
|
expected_output: dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
description="Expected output or assertions"
|
|
)
|
|
|
|
# LLM generation metadata
|
|
generated_by: str = Field(
|
|
default="llm",
|
|
description="Who created the test: 'llm' or 'human'"
|
|
)
|
|
llm_confidence: float = Field(
|
|
default=0.0,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="LLM's confidence in the test quality (0-1)"
|
|
)
|
|
|
|
# Approval tracking (CRITICAL - tests are never used without approval)
|
|
approval_status: ApprovalStatus = ApprovalStatus.PENDING
|
|
approved_by: str | None = None
|
|
approved_at: datetime | None = None
|
|
rejection_reason: str | None = Field(
|
|
default=None,
|
|
description="Reason for rejection if status is REJECTED"
|
|
)
|
|
original_code: str | None = Field(
|
|
default=None,
|
|
description="Original LLM-generated code if user modified it"
|
|
)
|
|
|
|
# Execution tracking
|
|
last_run: datetime | None = None
|
|
last_result: str | None = Field(
|
|
default=None,
|
|
description="Result of last run: 'passed', 'failed', 'error'"
|
|
)
|
|
run_count: int = 0
|
|
pass_count: int = 0
|
|
fail_count: int = 0
|
|
|
|
# Timestamps
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
updated_at: datetime = Field(default_factory=datetime.now)
|
|
|
|
model_config = {"extra": "allow"}
|
|
|
|
def approve(self, approved_by: str = "user") -> None:
|
|
"""Mark test as approved."""
|
|
self.approval_status = ApprovalStatus.APPROVED
|
|
self.approved_by = approved_by
|
|
self.approved_at = datetime.now()
|
|
self.updated_at = datetime.now()
|
|
|
|
def modify(self, new_code: str, approved_by: str = "user") -> None:
|
|
"""Approve test with modifications."""
|
|
self.original_code = self.test_code
|
|
self.test_code = new_code
|
|
self.approval_status = ApprovalStatus.MODIFIED
|
|
self.approved_by = approved_by
|
|
self.approved_at = datetime.now()
|
|
self.updated_at = datetime.now()
|
|
|
|
def reject(self, reason: str) -> None:
|
|
"""Reject the test with a reason."""
|
|
self.approval_status = ApprovalStatus.REJECTED
|
|
self.rejection_reason = reason
|
|
self.updated_at = datetime.now()
|
|
|
|
def record_result(self, passed: bool) -> None:
|
|
"""Record a test run result."""
|
|
self.last_run = datetime.now()
|
|
self.last_result = "passed" if passed else "failed"
|
|
self.run_count += 1
|
|
if passed:
|
|
self.pass_count += 1
|
|
else:
|
|
self.fail_count += 1
|
|
self.updated_at = datetime.now()
|
|
|
|
@property
|
|
def is_approved(self) -> bool:
|
|
"""Check if test has been approved (approved or modified)."""
|
|
return self.approval_status in (ApprovalStatus.APPROVED, ApprovalStatus.MODIFIED)
|
|
|
|
@property
|
|
def pass_rate(self) -> float | None:
|
|
"""Calculate pass rate if test has been run."""
|
|
if self.run_count == 0:
|
|
return None
|
|
return self.pass_count / self.run_count
|