fix: incubating mode approval guidence injection

2026-04-29 18:43:26 -07:00
parent 6357597e88
commit d26e7f33d2
3 changed files with 39 additions and 79 deletions
@@ -1,9 +1,8 @@
 """One-shot LLM gate that decides if a queen DM is ready to fork a colony.

 The queen's ``start_incubating_colony`` tool calls :func:`evaluate` with
-the queen's recent conversation, a proposed ``colony_name``, and a
-one-paragraph ``intended_purpose``.  The evaluator returns a structured
-verdict:
+the queen's recent conversation and a proposed ``colony_name``.  The
+evaluator returns a structured verdict:

    {
        "ready": bool,
@@ -38,8 +37,8 @@ You gate whether a queen agent should commit to forking a persistent
 expensive: it ends the user's chat with this queen and the worker runs
 unattended afterward, so the spec must be settled before you approve.

-Read the conversation excerpt and the queen's proposed colony_name +
-intended_purpose, then decide.
+Read the conversation excerpt and the queen's proposed colony_name,
+then decide.

 APPROVE (ready=true) only when ALL of the following hold:
  1. The user has explicitly asked for work that needs to outlive this
@@ -128,11 +127,9 @@ def format_conversation_excerpt(messages: list[Message]) -> str:
 def _build_user_message(
    conversation_excerpt: str,
    colony_name: str,
-    intended_purpose: str,
 ) -> str:
    return (
        f"## Proposed colony name\n{colony_name}\n\n"
-        f"## Queen's intended_purpose\n{intended_purpose.strip()}\n\n"
        f"## Recent conversation (oldest → newest)\n{conversation_excerpt}\n\n"
        "Decide: should this queen be approved to enter INCUBATING phase?"
    )
@@ -189,7 +186,6 @@ async def evaluate(
    llm: Any,
    messages: list[Message],
    colony_name: str,
-    intended_purpose: str,
 ) -> dict[str, Any]:
    """Run the incubating evaluator against the queen's conversation.

@@ -200,14 +196,13 @@ async def evaluate(
        messages: The queen's conversation messages, oldest first.  The
            evaluator slices its own tail; pass the full list.
        colony_name: Validated colony slug.
-        intended_purpose: Queen's one-paragraph brief.

    Returns:
        ``{"ready": bool, "reasons": [str], "missing_prerequisites": [str]}``.
        Fail-closed on any error.
    """
    excerpt = format_conversation_excerpt(messages)
-    user_msg = _build_user_message(excerpt, colony_name, intended_purpose)
+    user_msg = _build_user_message(excerpt, colony_name)

    try:
        response = await llm.acomplete(
@@ -144,13 +144,12 @@ several entries when you have multiple clarifications. \
 When the user clearly wants persistent / recurring / headless work that \
 needs to outlive THIS chat (e.g. "every morning", "monitor X and alert \
 me", "set up a job that…"), call ``start_incubating_colony`` with a \
-proposed colony_name and a one-paragraph intended_purpose. A side \
-evaluator reads the conversation and decides if the spec is settled. If \
-it returns ``not_ready`` you keep talking with the user — sort out \
-whatever the evaluator said is missing, then retry. If it returns \
-``incubating`` your phase flips and a new prompt takes over. Do not \
-try to write SKILL.md, fork directories, or otherwise build the colony \
-yourself in this phase.\
+proposed colony_name. A side evaluator reads the conversation and \
+decides if the spec is settled. If it returns ``not_ready`` you keep \
+talking with the user — sort out whatever the evaluator said is \
+missing, then retry. If it returns ``incubating`` your phase flips and \
+a new prompt takes over. Do not try to write SKILL.md, fork \
+directories, or otherwise build the colony yourself in this phase.\
 """

 _queen_role_incubating = """\
@@ -260,14 +259,14 @@ search_files, run_command, undo_changes
 - MUST Follow the browser-automation skill protocol before using browser tools.

 ## Hand off to a colony
- start_incubating_colony(colony_name, intended_purpose) — Use this when \
-  the user wants persistent / recurring / headless work that needs to \
-  outlive THIS chat. It does NOT fork on its own; it spawns a one-shot \
-  evaluator that reads this conversation and decides whether the spec \
-  is settled enough to proceed. On approval your phase flips to \
-  INCUBATING and a new tool surface (including create_colony itself) \
-  unlocks. On rejection you stay here and keep the conversation going \
-  to fill the gaps the evaluator named.
+- start_incubating_colony(colony_name) — Use this when the user wants \
+  persistent / recurring / headless work that needs to outlive THIS \
+  chat. It does NOT fork on its own; it spawns a one-shot evaluator \
+  that reads this conversation and decides whether the spec is settled \
+  enough to proceed. On approval your phase flips to INCUBATING and a \
+  new tool surface (including create_colony itself) unlocks. On \
+  rejection you stay here and keep the conversation going to fill the \
+  gaps the evaluator named.
 """

 _queen_tools_incubating = """
@@ -69,15 +69,17 @@ logger = logging.getLogger(__name__)
 # Phrasing intentionally invites the queen's judgement; do NOT turn this
 # into a hard checklist.
 _INCUBATING_APPROVAL_GUIDANCE = (
-    "Approved to incubate colony '{colony_name}' for: {intended_purpose}\n\n"
+    "Approved to incubate colony '{colony_name}'.\n\n"
    "Your phase has flipped to INCUBATING. Before you call create_colony, "
-    "the worker will need operational details that are easy to lose in a "
+    "you'll need operational details that are easy to lose in a "
    "planning conversation. Take a moment to figure out what's still "
-    "ambiguous for THIS colony — for example: how many tasks should run "
-    "in parallel, what schedule fits (cron, interval, manual-only), what "
-    "should the worker write into progress.db so the user can review "
-    "results later, how to handle partial failures, what credentials or "
-    "MCP servers the worker needs that you haven't discussed. You don't "
+    "ambiguous for THIS colony — for example: how many worker processes "
+    "should run in parallel (e.g. 1 for a digest, 5 for a fan-out), what "
+    "schedule fits (cron, interval), what should the worker write into "
+    "progress tracking(progress.db) so the user "
+    "can review results later, how to handle partial failures, what "
+    "credentials or MCP servers the worker needs that you haven't "
+    "discussed. You don't "
    "need to cover every example — only the items that actually matter "
    "for this colony, and only the ones the user hasn't already implied. "
    "Use ask_user (batch several questions into one call when you have "
@@ -151,13 +153,11 @@ class QueenPhaseState:
    prompt_working: str = ""
    prompt_reviewing: str = ""

-    # Last-set incubation context (colony_name + intended_purpose), populated
-    # by start_incubating_colony when the evaluator approves. Read by
-    # get_current_prompt() to interpolate the colony name into the
-    # incubating role prompt so the queen sees the same name across turns
-    # without having to remember it from the tool result.
+    # Last-set incubation context, populated by start_incubating_colony when
+    # the evaluator approves. Read by get_current_prompt() to interpolate the
+    # colony name into the incubating role prompt so the queen sees the same
+    # name across turns without having to remember it from the tool result.
    incubating_colony_name: str | None = None
-    incubating_intended_purpose: str | None = None

    # Default skill operational protocols — appended to every phase prompt
    protocols_prompt: str = ""
@@ -421,7 +421,6 @@ class QueenPhaseState:
        self.phase = "independent"
        # Clear stale incubation context so a future incubation starts fresh.
        self.incubating_colony_name = None
-        self.incubating_intended_purpose = None
        tool_names = [t.name for t in self.independent_tools]
        logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
        await self._emit_phase_event()
@@ -436,30 +435,25 @@ class QueenPhaseState:
        self,
        *,
        colony_name: str,
-        intended_purpose: str,
        source: str = "tool",
    ) -> None:
        """Switch to incubating phase — queen drafts the colony spec.

        Caller must already have validated colony_name. Stores the active
-        incubation context on self so get_current_prompt() can interpolate
-        it on every turn (the queen otherwise loses the colony_name after
-        the first tool result rolls past in the conversation history).
+        colony_name on self so get_current_prompt() can interpolate it on
+        every turn (the queen otherwise loses the colony_name after the
+        first tool result rolls past in the conversation history).

        Args:
            colony_name: Validated colony slug (lowercase alphanumeric + _).
-            intended_purpose: One-paragraph brief from the queen.
            source: "tool", "frontend", or "auto".
        """
        if self.phase == "incubating":
-            # Allow re-statement of context even when already incubating —
-            # the queen may have refined her intended_purpose mid-flight.
+            # Allow re-statement even when already incubating.
            self.incubating_colony_name = colony_name
-            self.incubating_intended_purpose = intended_purpose
            return
        self.phase = "incubating"
        self.incubating_colony_name = colony_name
-        self.incubating_intended_purpose = intended_purpose
        tool_names = [t.name for t in self.incubating_tools]
        logger.info(
            "Queen phase → incubating (source=%s, colony=%s, tools: %s)",
@@ -2211,7 +2205,6 @@ def register_queen_lifecycle_tools(
    async def start_incubating_colony(
        *,
        colony_name: str,
-        intended_purpose: str,
    ) -> str:
        """Gate the queen behind a one-shot readiness evaluator.

@@ -2233,18 +2226,6 @@ def register_queen_lifecycle_tools(
                {"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'morning_hn_digest').")}
            )

-        purpose = (intended_purpose or "").strip()
-        if not purpose:
-            return json.dumps(
-                {
-                    "error": (
-                        "intended_purpose is required — describe in one "
-                        "paragraph what the colony will do, on what "
-                        "cadence, and why it must outlive this chat."
-                    )
-                }
-            )
-
        phase_state = getattr(session, "phase_state", None)
        if phase_state is None:
            return json.dumps({"error": "phase_state is not initialised on this session."})
@@ -2305,7 +2286,6 @@ def register_queen_lifecycle_tools(
            llm=llm,
            messages=messages,
            colony_name=cn,
-            intended_purpose=purpose,
        )

        if not verdict.get("ready"):
@@ -2323,10 +2303,9 @@ def register_queen_lifecycle_tools(

        # Approved — flip phase.  switch_to_incubating publishes
        # QUEEN_PHASE_CHANGED so the frontend badge updates and stores
-        # the colony_name + purpose for the role prompt to interpolate.
+        # the colony_name for the role prompt to interpolate.
        await phase_state.switch_to_incubating(
            colony_name=cn,
-            intended_purpose=purpose,
            source="tool",
        )

@@ -2334,11 +2313,7 @@ def register_queen_lifecycle_tools(
            {
                "status": "incubating",
                "colony_name": cn,
-                "intended_purpose": purpose,
-                "guidance": _INCUBATING_APPROVAL_GUIDANCE.format(
-                    colony_name=cn,
-                    intended_purpose=purpose,
-                ),
+                "guidance": _INCUBATING_APPROVAL_GUIDANCE.format(colony_name=cn),
            }
        )

@@ -2378,17 +2353,8 @@ def register_queen_lifecycle_tools(
                        "'inbox_monitor')."
                    ),
                },
-                "intended_purpose": {
-                    "type": "string",
-                    "description": (
-                        "One-paragraph brief: what the colony will do, "
-                        "on what cadence, why it must outlive this "
-                        "chat. Do NOT write the SKILL.md here — that "
-                        "happens in INCUBATING phase after approval."
-                    ),
            },
-            },
-            "required": ["colony_name", "intended_purpose"],
+            "required": ["colony_name"],
        },
    )
    registry.register(