hive/core/framework/testing/constraint_gen.py

"""
Constraint test generator.

Generates tests for Goal constraints using LLM.
Tests are returned with PENDING approval status.
"""

import uuid
from typing import TYPE_CHECKING

from framework.graph.goal import Constraint, Goal
from framework.llm.provider import Tool, ToolResult, ToolUse
from framework.testing.prompts import CONSTRAINT_TEST_PROMPT
from framework.testing.test_case import ApprovalStatus, Test, TestType

if TYPE_CHECKING:
    from framework.llm.provider import LLMProvider


# Tool for collecting generated tests - Claude handles JSON escaping automatically
SUBMIT_TEST_TOOL = Tool(
    name="submit_test",
    description="Submit a generated constraint test. Call once per test.",
    parameters={
        "properties": {
            "constraint_id": {
                "type": "string",
                "description": "ID of the constraint being tested",
            },
            "test_name": {
                "type": "string",
                "description": "pytest function name, e.g., test_constraint_api_limits_respected",
            },
            "test_code": {
                "type": "string",
                "description": "Complete Python test function code",
            },
            "description": {
                "type": "string",
                "description": "What the test validates",
            },
            "input": {
                "type": "object",
                "description": "Test input data",
            },
            "expected_output": {
                "type": "object",
                "description": "Expected output",
            },
            "confidence": {
                "type": "number",
                "description": "Confidence score 0-1",
            },
        },
        "required": ["constraint_id", "test_name", "test_code", "description", "confidence"],
    },
)


class ConstraintTestGenerator:
    """
    Generate constraint tests from Goal constraints.

    Generated tests require user approval before being added to the test suite.
    """

    def __init__(self, llm: "LLMProvider"):
        """
        Initialize generator with LLM provider.

        Args:
            llm: LLM provider for test generation (e.g., AnthropicProvider)
        """
        self.llm = llm

    def generate(self, goal: Goal, agent_module: str = "my_agent") -> list[Test]:
        """
        Generate tests for all constraints in a goal.

        Args:
            goal: Goal with constraints to test
            agent_module: The agent module name (e.g., "web_research_agent")
                          Used to generate import: from exports.{agent_module} import default_agent

        Returns:
            List of Test objects with approval_status=PENDING.
            These MUST be approved before being added to the test suite.
        """
        if not goal.constraints:
            return []

        # Format prompt
        prompt = CONSTRAINT_TEST_PROMPT.format(
            goal_name=goal.name,
            goal_description=goal.description,
            constraints_formatted=self._format_constraints(goal.constraints),
            agent_module=agent_module,
        )

        # Collect tests via tool calls - Claude handles JSON escaping automatically
        collected_tests: list[dict] = []

        def tool_executor(tool_use: ToolUse) -> ToolResult:
            if tool_use.name == "submit_test":
                collected_tests.append(tool_use.input)
                return ToolResult(tool_use_id=tool_use.id, content="Test recorded successfully")
            return ToolResult(tool_use_id=tool_use.id, content="Unknown tool", is_error=True)

        self.llm.complete_with_tools(
            messages=[{"role": "user", "content": prompt}],
            system=(
                "You are a test generation expert. "
                "For each constraint, call the submit_test tool with the test details."
            ),
            tools=[SUBMIT_TEST_TOOL],
            tool_executor=tool_executor,
            max_iterations=5,
        )

        tests = self._create_tests_from_collected(collected_tests, goal.id)
        # Filter out skeleton tests (empty code with default confidence)
        tests = [t for t in tests if t.test_code.strip() and t.llm_confidence != 0.5]
        # Enforce max 5 tests total
        return tests[:5]

    def generate_for_constraint(self, goal: Goal, constraint: Constraint, agent_module: str = "my_agent") -> list[Test]:
        """
        Generate tests for a single constraint.

        Args:
            goal: Goal containing the constraint
            constraint: Specific constraint to test
            agent_module: The agent module name (e.g., "web_research_agent")

        Returns:
            List of Test objects for the constraint
        """
        # Format prompt with just this constraint
        prompt = CONSTRAINT_TEST_PROMPT.format(
            goal_name=goal.name,
            goal_description=goal.description,
            constraints_formatted=self._format_constraint(constraint),
            agent_module=agent_module,
        )

        # Collect tests via tool calls
        collected_tests: list[dict] = []

        def tool_executor(tool_use: ToolUse) -> ToolResult:
            if tool_use.name == "submit_test":
                collected_tests.append(tool_use.input)
                return ToolResult(tool_use_id=tool_use.id, content="Test recorded successfully")
            return ToolResult(tool_use_id=tool_use.id, content="Unknown tool", is_error=True)

        self.llm.complete_with_tools(
            messages=[{"role": "user", "content": prompt}],
            system="You are a test generation expert. Call the submit_test tool with the test details.",
            tools=[SUBMIT_TEST_TOOL],
            tool_executor=tool_executor,
            max_iterations=3,
        )

        return self._create_tests_from_collected(collected_tests, goal.id)

    def _format_constraints(self, constraints: list[Constraint]) -> str:
        """Format constraints for prompt."""
        lines = []
        for c in constraints:
            lines.append(self._format_constraint(c))
            lines.append("")
        return "\n".join(lines)

    def _format_constraint(self, constraint: Constraint) -> str:
        """Format a single constraint for prompt."""
        severity = "HARD" if constraint.constraint_type == "hard" else "SOFT"
        return f"""### Constraint: {constraint.id}
- Type: {severity} ({constraint.constraint_type})
- Category: {constraint.category}
- Description: {constraint.description}
- Check: {constraint.check}"""

    def _create_tests_from_collected(self, collected: list[dict], goal_id: str) -> list[Test]:
        """Create Test objects from tool call data."""
        tests = []
        for td in collected:
            test = Test(
                id=f"test_{uuid.uuid4().hex[:8]}",
                goal_id=goal_id,
                parent_criteria_id=td.get("constraint_id", "unknown"),
                test_type=TestType.CONSTRAINT,
                test_name=td.get("test_name", "unnamed_test"),
                test_code=td.get("test_code", ""),
                description=td.get("description", ""),
                input=td.get("input", {}),
                expected_output=td.get("expected_output", {}),
                generated_by="llm",
                llm_confidence=float(td.get("confidence", 0.5)),
                approval_status=ApprovalStatus.PENDING,
            )
            tests.append(test)
        return tests