fix: incubating mode approval guidence injection

This commit is contained in:
Richard Tang
2026-04-29 18:43:26 -07:00
parent 6357597e88
commit d26e7f33d2
3 changed files with 39 additions and 79 deletions
@@ -1,9 +1,8 @@
"""One-shot LLM gate that decides if a queen DM is ready to fork a colony.
The queen's ``start_incubating_colony`` tool calls :func:`evaluate` with
the queen's recent conversation, a proposed ``colony_name``, and a
one-paragraph ``intended_purpose``. The evaluator returns a structured
verdict:
the queen's recent conversation and a proposed ``colony_name``. The
evaluator returns a structured verdict:
{
"ready": bool,
@@ -38,8 +37,8 @@ You gate whether a queen agent should commit to forking a persistent
expensive: it ends the user's chat with this queen and the worker runs
unattended afterward, so the spec must be settled before you approve.
Read the conversation excerpt and the queen's proposed colony_name +
intended_purpose, then decide.
Read the conversation excerpt and the queen's proposed colony_name,
then decide.
APPROVE (ready=true) only when ALL of the following hold:
1. The user has explicitly asked for work that needs to outlive this
@@ -128,11 +127,9 @@ def format_conversation_excerpt(messages: list[Message]) -> str:
def _build_user_message(
conversation_excerpt: str,
colony_name: str,
intended_purpose: str,
) -> str:
return (
f"## Proposed colony name\n{colony_name}\n\n"
f"## Queen's intended_purpose\n{intended_purpose.strip()}\n\n"
f"## Recent conversation (oldest → newest)\n{conversation_excerpt}\n\n"
"Decide: should this queen be approved to enter INCUBATING phase?"
)
@@ -189,7 +186,6 @@ async def evaluate(
llm: Any,
messages: list[Message],
colony_name: str,
intended_purpose: str,
) -> dict[str, Any]:
"""Run the incubating evaluator against the queen's conversation.
@@ -200,14 +196,13 @@ async def evaluate(
messages: The queen's conversation messages, oldest first. The
evaluator slices its own tail; pass the full list.
colony_name: Validated colony slug.
intended_purpose: Queen's one-paragraph brief.
Returns:
``{"ready": bool, "reasons": [str], "missing_prerequisites": [str]}``.
Fail-closed on any error.
"""
excerpt = format_conversation_excerpt(messages)
user_msg = _build_user_message(excerpt, colony_name, intended_purpose)
user_msg = _build_user_message(excerpt, colony_name)
try:
response = await llm.acomplete(
+14 -15
View File
@@ -144,13 +144,12 @@ several entries when you have multiple clarifications. \
When the user clearly wants persistent / recurring / headless work that \
needs to outlive THIS chat (e.g. "every morning", "monitor X and alert \
me", "set up a job that"), call ``start_incubating_colony`` with a \
proposed colony_name and a one-paragraph intended_purpose. A side \
evaluator reads the conversation and decides if the spec is settled. If \
it returns ``not_ready`` you keep talking with the user sort out \
whatever the evaluator said is missing, then retry. If it returns \
``incubating`` your phase flips and a new prompt takes over. Do not \
try to write SKILL.md, fork directories, or otherwise build the colony \
yourself in this phase.\
proposed colony_name. A side evaluator reads the conversation and \
decides if the spec is settled. If it returns ``not_ready`` you keep \
talking with the user sort out whatever the evaluator said is \
missing, then retry. If it returns ``incubating`` your phase flips and \
a new prompt takes over. Do not try to write SKILL.md, fork \
directories, or otherwise build the colony yourself in this phase.\
"""
_queen_role_incubating = """\
@@ -260,14 +259,14 @@ search_files, run_command, undo_changes
- MUST Follow the browser-automation skill protocol before using browser tools.
## Hand off to a colony
- start_incubating_colony(colony_name, intended_purpose) Use this when \
the user wants persistent / recurring / headless work that needs to \
outlive THIS chat. It does NOT fork on its own; it spawns a one-shot \
evaluator that reads this conversation and decides whether the spec \
is settled enough to proceed. On approval your phase flips to \
INCUBATING and a new tool surface (including create_colony itself) \
unlocks. On rejection you stay here and keep the conversation going \
to fill the gaps the evaluator named.
- start_incubating_colony(colony_name) Use this when the user wants \
persistent / recurring / headless work that needs to outlive THIS \
chat. It does NOT fork on its own; it spawns a one-shot evaluator \
that reads this conversation and decides whether the spec is settled \
enough to proceed. On approval your phase flips to INCUBATING and a \
new tool surface (including create_colony itself) unlocks. On \
rejection you stay here and keep the conversation going to fill the \
gaps the evaluator named.
"""
_queen_tools_incubating = """
+20 -54
View File
@@ -69,15 +69,17 @@ logger = logging.getLogger(__name__)
# Phrasing intentionally invites the queen's judgement; do NOT turn this
# into a hard checklist.
_INCUBATING_APPROVAL_GUIDANCE = (
"Approved to incubate colony '{colony_name}' for: {intended_purpose}\n\n"
"Approved to incubate colony '{colony_name}'.\n\n"
"Your phase has flipped to INCUBATING. Before you call create_colony, "
"the worker will need operational details that are easy to lose in a "
"you'll need operational details that are easy to lose in a "
"planning conversation. Take a moment to figure out what's still "
"ambiguous for THIS colony — for example: how many tasks should run "
"in parallel, what schedule fits (cron, interval, manual-only), what "
"should the worker write into progress.db so the user can review "
"results later, how to handle partial failures, what credentials or "
"MCP servers the worker needs that you haven't discussed. You don't "
"ambiguous for THIS colony — for example: how many worker processes "
"should run in parallel (e.g. 1 for a digest, 5 for a fan-out), what "
"schedule fits (cron, interval), what should the worker write into "
"progress tracking(progress.db) so the user "
"can review results later, how to handle partial failures, what "
"credentials or MCP servers the worker needs that you haven't "
"discussed. You don't "
"need to cover every example — only the items that actually matter "
"for this colony, and only the ones the user hasn't already implied. "
"Use ask_user (batch several questions into one call when you have "
@@ -151,13 +153,11 @@ class QueenPhaseState:
prompt_working: str = ""
prompt_reviewing: str = ""
# Last-set incubation context (colony_name + intended_purpose), populated
# by start_incubating_colony when the evaluator approves. Read by
# get_current_prompt() to interpolate the colony name into the
# incubating role prompt so the queen sees the same name across turns
# without having to remember it from the tool result.
# Last-set incubation context, populated by start_incubating_colony when
# the evaluator approves. Read by get_current_prompt() to interpolate the
# colony name into the incubating role prompt so the queen sees the same
# name across turns without having to remember it from the tool result.
incubating_colony_name: str | None = None
incubating_intended_purpose: str | None = None
# Default skill operational protocols — appended to every phase prompt
protocols_prompt: str = ""
@@ -421,7 +421,6 @@ class QueenPhaseState:
self.phase = "independent"
# Clear stale incubation context so a future incubation starts fresh.
self.incubating_colony_name = None
self.incubating_intended_purpose = None
tool_names = [t.name for t in self.independent_tools]
logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
await self._emit_phase_event()
@@ -436,30 +435,25 @@ class QueenPhaseState:
self,
*,
colony_name: str,
intended_purpose: str,
source: str = "tool",
) -> None:
"""Switch to incubating phase — queen drafts the colony spec.
Caller must already have validated colony_name. Stores the active
incubation context on self so get_current_prompt() can interpolate
it on every turn (the queen otherwise loses the colony_name after
the first tool result rolls past in the conversation history).
colony_name on self so get_current_prompt() can interpolate it on
every turn (the queen otherwise loses the colony_name after the
first tool result rolls past in the conversation history).
Args:
colony_name: Validated colony slug (lowercase alphanumeric + _).
intended_purpose: One-paragraph brief from the queen.
source: "tool", "frontend", or "auto".
"""
if self.phase == "incubating":
# Allow re-statement of context even when already incubating
# the queen may have refined her intended_purpose mid-flight.
# Allow re-statement even when already incubating.
self.incubating_colony_name = colony_name
self.incubating_intended_purpose = intended_purpose
return
self.phase = "incubating"
self.incubating_colony_name = colony_name
self.incubating_intended_purpose = intended_purpose
tool_names = [t.name for t in self.incubating_tools]
logger.info(
"Queen phase → incubating (source=%s, colony=%s, tools: %s)",
@@ -2211,7 +2205,6 @@ def register_queen_lifecycle_tools(
async def start_incubating_colony(
*,
colony_name: str,
intended_purpose: str,
) -> str:
"""Gate the queen behind a one-shot readiness evaluator.
@@ -2233,18 +2226,6 @@ def register_queen_lifecycle_tools(
{"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'morning_hn_digest').")}
)
purpose = (intended_purpose or "").strip()
if not purpose:
return json.dumps(
{
"error": (
"intended_purpose is required — describe in one "
"paragraph what the colony will do, on what "
"cadence, and why it must outlive this chat."
)
}
)
phase_state = getattr(session, "phase_state", None)
if phase_state is None:
return json.dumps({"error": "phase_state is not initialised on this session."})
@@ -2305,7 +2286,6 @@ def register_queen_lifecycle_tools(
llm=llm,
messages=messages,
colony_name=cn,
intended_purpose=purpose,
)
if not verdict.get("ready"):
@@ -2323,10 +2303,9 @@ def register_queen_lifecycle_tools(
# Approved — flip phase. switch_to_incubating publishes
# QUEEN_PHASE_CHANGED so the frontend badge updates and stores
# the colony_name + purpose for the role prompt to interpolate.
# the colony_name for the role prompt to interpolate.
await phase_state.switch_to_incubating(
colony_name=cn,
intended_purpose=purpose,
source="tool",
)
@@ -2334,11 +2313,7 @@ def register_queen_lifecycle_tools(
{
"status": "incubating",
"colony_name": cn,
"intended_purpose": purpose,
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(
colony_name=cn,
intended_purpose=purpose,
),
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(colony_name=cn),
}
)
@@ -2378,17 +2353,8 @@ def register_queen_lifecycle_tools(
"'inbox_monitor')."
),
},
"intended_purpose": {
"type": "string",
"description": (
"One-paragraph brief: what the colony will do, "
"on what cadence, why it must outlive this "
"chat. Do NOT write the SKILL.md here — that "
"happens in INCUBATING phase after approval."
),
},
},
"required": ["colony_name", "intended_purpose"],
"required": ["colony_name"],
},
)
registry.register(