""" Plan Data Structures for Flexible Execution. Plans are created externally (by Claude Code or another LLM agent) and executed internally by the FlexibleGraphExecutor with Worker-Judge loop. The Plan is the contract between the external planner and the executor: - Planner creates a Plan with PlanSteps - Executor runs steps and judges results - If replanning needed, returns feedback to external planner """ from datetime import datetime from enum import Enum from typing import Any from pydantic import BaseModel, Field class ActionType(str, Enum): """Types of actions a PlanStep can perform.""" LLM_CALL = "llm_call" # Call LLM for generation TOOL_USE = "tool_use" # Use a registered tool SUB_GRAPH = "sub_graph" # Execute a sub-graph FUNCTION = "function" # Call a Python function CODE_EXECUTION = "code_execution" # Execute dynamic code (sandboxed) class StepStatus(str, Enum): """Status of a plan step.""" PENDING = "pending" AWAITING_APPROVAL = "awaiting_approval" # Waiting for human approval IN_PROGRESS = "in_progress" COMPLETED = "completed" FAILED = "failed" SKIPPED = "skipped" REJECTED = "rejected" # Human rejected execution def is_terminal(self) -> bool: """Check if this status represents a terminal (finished) state. Terminal states are states where the step will not execute further, either because it completed successfully or failed/was skipped. """ return self in ( StepStatus.COMPLETED, StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED, ) def is_successful(self) -> bool: """Check if this status represents successful completion.""" return self == StepStatus.COMPLETED class ApprovalDecision(str, Enum): """Human decision on a step requiring approval.""" APPROVE = "approve" # Execute as planned REJECT = "reject" # Skip this step MODIFY = "modify" # Execute with modifications ABORT = "abort" # Stop entire execution class ApprovalRequest(BaseModel): """Request for human approval before executing a step.""" step_id: str step_description: str action_type: str action_details: dict[str, Any] = Field(default_factory=dict) context: dict[str, Any] = Field(default_factory=dict) approval_message: str | None = None # Preview of what will happen preview: str | None = None model_config = {"extra": "allow"} class ApprovalResult(BaseModel): """Result of human approval decision.""" decision: ApprovalDecision reason: str | None = None modifications: dict[str, Any] = Field(default_factory=dict) model_config = {"extra": "allow"} class JudgmentAction(str, Enum): """Actions the judge can take after evaluating a step.""" ACCEPT = "accept" # Step completed successfully, continue RETRY = "retry" # Retry the step with feedback REPLAN = "replan" # Return to external planner for new plan ESCALATE = "escalate" # Request human intervention class ActionSpec(BaseModel): """ Specification for an action to be executed. This is the "what to do" part of a PlanStep. """ action_type: ActionType # For LLM_CALL prompt: str | None = None system_prompt: str | None = None model: str | None = None # For TOOL_USE tool_name: str | None = None tool_args: dict[str, Any] = Field(default_factory=dict) # For SUB_GRAPH graph_id: str | None = None # For FUNCTION function_name: str | None = None function_args: dict[str, Any] = Field(default_factory=dict) # For CODE_EXECUTION code: str | None = None language: str = "python" model_config = {"extra": "allow"} class PlanStep(BaseModel): """ A single step in a plan. Created by external planner, executed by Worker, evaluated by Judge. """ id: str description: str action: ActionSpec # Data flow inputs: dict[str, Any] = Field( default_factory=dict, description="Input data for this step (can reference previous step outputs)", ) expected_outputs: list[str] = Field( default_factory=list, description="Keys this step should produce" ) # Dependencies dependencies: list[str] = Field( default_factory=list, description="IDs of steps that must complete before this one" ) # Human-in-the-loop (HITL) requires_approval: bool = Field( default=False, description="If True, requires human approval before execution" ) approval_message: str | None = Field( default=None, description="Message to show human when requesting approval" ) # Execution state status: StepStatus = StepStatus.PENDING result: Any | None = None error: str | None = None attempts: int = 0 max_retries: int = 3 # Metadata started_at: datetime | None = None completed_at: datetime | None = None model_config = {"extra": "allow"} def is_ready(self, terminal_step_ids: set[str]) -> bool: """Check if this step is ready to execute (all dependencies finished). A step is ready when: 1. Its status is PENDING (not yet started) 2. All its dependencies are in a terminal state (completed, failed, skipped, or rejected) Note: This allows dependent steps to become "ready" even if their dependencies failed. The executor should check if any dependencies failed and handle accordingly (e.g., skip the step or mark it as blocked). Args: terminal_step_ids: Set of step IDs that are in a terminal state """ if self.status != StepStatus.PENDING: return False return all(dep in terminal_step_ids for dep in self.dependencies) class Judgment(BaseModel): """ Result of judging a step execution. The Judge evaluates step results and decides what to do next. """ action: JudgmentAction reasoning: str feedback: str | None = None # For retry/replan - what went wrong # For rule-based judgments rule_matched: str | None = None # For LLM-based judgments confidence: float = 1.0 llm_used: bool = False # Context for replanning context: dict[str, Any] = Field(default_factory=dict) model_config = {"extra": "allow"} class EvaluationRule(BaseModel): """ A rule for the HybridJudge to evaluate step results. Rules are checked before falling back to LLM evaluation. """ id: str description: str # Condition (Python expression evaluated with result, step, goal context) condition: str # What to do if condition matches action: JudgmentAction feedback_template: str = "" # Can use {result}, {step}, etc. # Priority (higher = checked first) priority: int = 0 model_config = {"extra": "allow"} class Plan(BaseModel): """ A complete execution plan. Created by external planner (Claude Code, etc). Executed by FlexibleGraphExecutor. """ id: str goal_id: str description: str # Steps to execute steps: list[PlanStep] = Field(default_factory=list) # Execution state revision: int = 1 # Incremented on replan current_step_idx: int = 0 # Accumulated context from execution context: dict[str, Any] = Field(default_factory=dict) # Metadata created_at: datetime = Field(default_factory=datetime.now) created_by: str = "external" # Who created this plan # Previous attempt info (for replanning) previous_feedback: str | None = None model_config = {"extra": "allow"} @classmethod def from_json(cls, data: str | dict) -> "Plan": """ Load a Plan from exported JSON. This handles the output from export_graph() and properly converts action_type strings to ActionType enums. Args: data: JSON string or dict from export_graph() Returns: Plan object ready for FlexibleGraphExecutor Example: # Load from export_graph() output exported = export_graph() plan = Plan.from_json(exported) # Load from file with open("plan.json") as f: plan = Plan.from_json(json.load(f)) """ import json as json_module if isinstance(data, str): data = json_module.loads(data) # Handle nested "plan" key from export_graph output if "plan" in data: data = data["plan"] # Convert steps steps = [] for step_data in data.get("steps", []): action_data = step_data.get("action", {}) # Convert action_type string to enum action_type_str = action_data.get("action_type", "function") action_type = ActionType(action_type_str) action = ActionSpec( action_type=action_type, prompt=action_data.get("prompt"), system_prompt=action_data.get("system_prompt"), tool_name=action_data.get("tool_name"), tool_args=action_data.get("tool_args", {}), function_name=action_data.get("function_name"), function_args=action_data.get("function_args", {}), code=action_data.get("code"), ) step = PlanStep( id=step_data["id"], description=step_data.get("description", ""), action=action, inputs=step_data.get("inputs", {}), expected_outputs=step_data.get("expected_outputs", []), dependencies=step_data.get("dependencies", []), requires_approval=step_data.get("requires_approval", False), approval_message=step_data.get("approval_message"), ) steps.append(step) return cls( id=data.get("id", "plan"), goal_id=data.get("goal_id", ""), description=data.get("description", ""), steps=steps, context=data.get("context", {}), revision=data.get("revision", 1), ) def get_step(self, step_id: str) -> PlanStep | None: """Get a step by ID.""" for step in self.steps: if step.id == step_id: return step return None def get_ready_steps(self) -> list[PlanStep]: """Get all steps that are ready to execute. A step is ready when all its dependencies are in terminal states (completed, failed, skipped, or rejected). """ terminal_ids = {s.id for s in self.steps if s.status.is_terminal()} return [s for s in self.steps if s.is_ready(terminal_ids)] def get_completed_steps(self) -> list[PlanStep]: """Get all completed steps.""" return [s for s in self.steps if s.status == StepStatus.COMPLETED] def is_complete(self) -> bool: """Check if all steps are in terminal states (finished executing). Returns True when all steps have reached a terminal state, regardless of whether they succeeded or failed. Use has_failed_steps() to check if any steps failed. """ return all(s.status.is_terminal() for s in self.steps) def is_successful(self) -> bool: """Check if all steps completed successfully.""" return all(s.status == StepStatus.COMPLETED for s in self.steps) def has_failed_steps(self) -> bool: """Check if any steps failed, were skipped, or were rejected.""" return any( s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED) for s in self.steps ) def get_failed_steps(self) -> list[PlanStep]: """Get all steps that failed, were skipped, or were rejected.""" return [ s for s in self.steps if s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED) ] def to_feedback_context(self) -> dict[str, Any]: """Create context for replanning.""" return { "plan_id": self.id, "revision": self.revision, "completed_steps": [ { "id": s.id, "description": s.description, "result": s.result, } for s in self.get_completed_steps() ], "failed_steps": [ { "id": s.id, "description": s.description, "error": s.error, "attempts": s.attempts, } for s in self.steps if s.status == StepStatus.FAILED ], "context": self.context, } class ExecutionStatus(str, Enum): """Status of plan execution.""" COMPLETED = "completed" AWAITING_APPROVAL = "awaiting_approval" # Paused for human approval NEEDS_REPLAN = "needs_replan" NEEDS_ESCALATION = "needs_escalation" REJECTED = "rejected" # Human rejected a step ABORTED = "aborted" # Human aborted execution FAILED = "failed" class PlanExecutionResult(BaseModel): """ Result of executing a plan. Returned to external planner with status and feedback. """ status: ExecutionStatus # Results from completed steps results: dict[str, Any] = Field(default_factory=dict) # For needs_replan - what to tell the planner feedback: str | None = None feedback_context: dict[str, Any] = Field(default_factory=dict) # Steps that completed before stopping completed_steps: list[str] = Field(default_factory=list) # Metrics steps_executed: int = 0 total_tokens: int = 0 total_latency_ms: int = 0 # Error info (for failed status) error: str | None = None model_config = {"extra": "allow"} def load_export(data: str | dict) -> tuple["Plan", Any]: """ Load both Plan and Goal from export_graph() output. The export_graph() MCP tool returns both the plan and the goal that was defined and approved during the agent building process. This function loads both so you can use them with FlexibleGraphExecutor. Args: data: JSON string or dict from export_graph() Returns: Tuple of (Plan, Goal) ready for FlexibleGraphExecutor Example: # Load from export_graph() output exported = export_graph() plan, goal = load_export(exported) result = await executor.execute_plan(plan, goal, context) """ import json as json_module from framework.graph.goal import Goal if isinstance(data, str): data = json_module.loads(data) # Load plan plan = Plan.from_json(data) # Load goal goal_data = data.get("goal", {}) if goal_data: goal = Goal.model_validate(goal_data) else: # Fallback: create minimal goal from plan metadata goal = Goal( id=plan.goal_id, name=plan.goal_id, description=plan.description, success_criteria=[], constraints=[], ) return plan, goal