hive/core/framework/testing/test_case.py

"""
Test case schema with approval tracking.

Tests are generated by LLM from Goal success_criteria and constraints,
but require mandatory user approval before being stored.
"""

from datetime import datetime
from enum import Enum
from typing import Any

from pydantic import BaseModel, Field


class ApprovalStatus(str, Enum):
    """Status of user approval for a generated test."""
    PENDING = "pending"      # Awaiting user review
    APPROVED = "approved"    # User accepted as-is
    MODIFIED = "modified"    # User edited before accepting
    REJECTED = "rejected"    # User declined (with reason)


class TestType(str, Enum):
    """Type of test based on what it validates."""
    CONSTRAINT = "constraint"           # Validates constraint boundaries
    SUCCESS_CRITERIA = "outcome"        # Validates success criteria achievement
    EDGE_CASE = "edge_case"            # Validates edge case handling


class Test(BaseModel):
    """
    A test case generated from Goal success_criteria or constraints.

    Tests are either:
    - Generated by LLM during Goal stage (constraints) or Eval stage (success criteria)
    - Created manually by human engineers

    All tests require approval before being added to the test suite.
    """
    id: str
    goal_id: str
    parent_criteria_id: str = Field(
        description="Links to success_criteria.id or constraint.id"
    )
    test_type: TestType

    # Test definition
    test_name: str = Field(
        description="Descriptive function name, e.g., test_constraint_api_limits_respected"
    )
    test_code: str = Field(
        description="Python test function code (pytest compatible)"
    )
    description: str = Field(
        description="Human-readable description of what the test validates"
    )
    input: dict[str, Any] = Field(
        default_factory=dict,
        description="Test input data"
    )
    expected_output: dict[str, Any] = Field(
        default_factory=dict,
        description="Expected output or assertions"
    )

    # LLM generation metadata
    generated_by: str = Field(
        default="llm",
        description="Who created the test: 'llm' or 'human'"
    )
    llm_confidence: float = Field(
        default=0.0,
        ge=0.0,
        le=1.0,
        description="LLM's confidence in the test quality (0-1)"
    )

    # Approval tracking (CRITICAL - tests are never used without approval)
    approval_status: ApprovalStatus = ApprovalStatus.PENDING
    approved_by: str | None = None
    approved_at: datetime | None = None
    rejection_reason: str | None = Field(
        default=None,
        description="Reason for rejection if status is REJECTED"
    )
    original_code: str | None = Field(
        default=None,
        description="Original LLM-generated code if user modified it"
    )

    # Execution tracking
    last_run: datetime | None = None
    last_result: str | None = Field(
        default=None,
        description="Result of last run: 'passed', 'failed', 'error'"
    )
    run_count: int = 0
    pass_count: int = 0
    fail_count: int = 0

    # Timestamps
    created_at: datetime = Field(default_factory=datetime.now)
    updated_at: datetime = Field(default_factory=datetime.now)

    model_config = {"extra": "allow"}

    def approve(self, approved_by: str = "user") -> None:
        """Mark test as approved."""
        self.approval_status = ApprovalStatus.APPROVED
        self.approved_by = approved_by
        self.approved_at = datetime.now()
        self.updated_at = datetime.now()

    def modify(self, new_code: str, approved_by: str = "user") -> None:
        """Approve test with modifications."""
        self.original_code = self.test_code
        self.test_code = new_code
        self.approval_status = ApprovalStatus.MODIFIED
        self.approved_by = approved_by
        self.approved_at = datetime.now()
        self.updated_at = datetime.now()

    def reject(self, reason: str) -> None:
        """Reject the test with a reason."""
        self.approval_status = ApprovalStatus.REJECTED
        self.rejection_reason = reason
        self.updated_at = datetime.now()

    def record_result(self, passed: bool) -> None:
        """Record a test run result."""
        self.last_run = datetime.now()
        self.last_result = "passed" if passed else "failed"
        self.run_count += 1
        if passed:
            self.pass_count += 1
        else:
            self.fail_count += 1
        self.updated_at = datetime.now()

    @property
    def is_approved(self) -> bool:
        """Check if test has been approved (approved or modified)."""
        return self.approval_status in (ApprovalStatus.APPROVED, ApprovalStatus.MODIFIED)

    @property
    def pass_rate(self) -> float | None:
        """Calculate pass rate if test has been run."""
        if self.run_count == 0:
            return None
        return self.pass_count / self.run_count