fix: incubating mode approval guidence injection
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
"""One-shot LLM gate that decides if a queen DM is ready to fork a colony.
|
||||
|
||||
The queen's ``start_incubating_colony`` tool calls :func:`evaluate` with
|
||||
the queen's recent conversation, a proposed ``colony_name``, and a
|
||||
one-paragraph ``intended_purpose``. The evaluator returns a structured
|
||||
verdict:
|
||||
the queen's recent conversation and a proposed ``colony_name``. The
|
||||
evaluator returns a structured verdict:
|
||||
|
||||
{
|
||||
"ready": bool,
|
||||
@@ -38,8 +37,8 @@ You gate whether a queen agent should commit to forking a persistent
|
||||
expensive: it ends the user's chat with this queen and the worker runs
|
||||
unattended afterward, so the spec must be settled before you approve.
|
||||
|
||||
Read the conversation excerpt and the queen's proposed colony_name +
|
||||
intended_purpose, then decide.
|
||||
Read the conversation excerpt and the queen's proposed colony_name,
|
||||
then decide.
|
||||
|
||||
APPROVE (ready=true) only when ALL of the following hold:
|
||||
1. The user has explicitly asked for work that needs to outlive this
|
||||
@@ -128,11 +127,9 @@ def format_conversation_excerpt(messages: list[Message]) -> str:
|
||||
def _build_user_message(
|
||||
conversation_excerpt: str,
|
||||
colony_name: str,
|
||||
intended_purpose: str,
|
||||
) -> str:
|
||||
return (
|
||||
f"## Proposed colony name\n{colony_name}\n\n"
|
||||
f"## Queen's intended_purpose\n{intended_purpose.strip()}\n\n"
|
||||
f"## Recent conversation (oldest → newest)\n{conversation_excerpt}\n\n"
|
||||
"Decide: should this queen be approved to enter INCUBATING phase?"
|
||||
)
|
||||
@@ -189,7 +186,6 @@ async def evaluate(
|
||||
llm: Any,
|
||||
messages: list[Message],
|
||||
colony_name: str,
|
||||
intended_purpose: str,
|
||||
) -> dict[str, Any]:
|
||||
"""Run the incubating evaluator against the queen's conversation.
|
||||
|
||||
@@ -200,14 +196,13 @@ async def evaluate(
|
||||
messages: The queen's conversation messages, oldest first. The
|
||||
evaluator slices its own tail; pass the full list.
|
||||
colony_name: Validated colony slug.
|
||||
intended_purpose: Queen's one-paragraph brief.
|
||||
|
||||
Returns:
|
||||
``{"ready": bool, "reasons": [str], "missing_prerequisites": [str]}``.
|
||||
Fail-closed on any error.
|
||||
"""
|
||||
excerpt = format_conversation_excerpt(messages)
|
||||
user_msg = _build_user_message(excerpt, colony_name, intended_purpose)
|
||||
user_msg = _build_user_message(excerpt, colony_name)
|
||||
|
||||
try:
|
||||
response = await llm.acomplete(
|
||||
|
||||
@@ -144,13 +144,12 @@ several entries when you have multiple clarifications. \
|
||||
When the user clearly wants persistent / recurring / headless work that \
|
||||
needs to outlive THIS chat (e.g. "every morning", "monitor X and alert \
|
||||
me", "set up a job that…"), call ``start_incubating_colony`` with a \
|
||||
proposed colony_name and a one-paragraph intended_purpose. A side \
|
||||
evaluator reads the conversation and decides if the spec is settled. If \
|
||||
it returns ``not_ready`` you keep talking with the user — sort out \
|
||||
whatever the evaluator said is missing, then retry. If it returns \
|
||||
``incubating`` your phase flips and a new prompt takes over. Do not \
|
||||
try to write SKILL.md, fork directories, or otherwise build the colony \
|
||||
yourself in this phase.\
|
||||
proposed colony_name. A side evaluator reads the conversation and \
|
||||
decides if the spec is settled. If it returns ``not_ready`` you keep \
|
||||
talking with the user — sort out whatever the evaluator said is \
|
||||
missing, then retry. If it returns ``incubating`` your phase flips and \
|
||||
a new prompt takes over. Do not try to write SKILL.md, fork \
|
||||
directories, or otherwise build the colony yourself in this phase.\
|
||||
"""
|
||||
|
||||
_queen_role_incubating = """\
|
||||
@@ -260,14 +259,14 @@ search_files, run_command, undo_changes
|
||||
- MUST Follow the browser-automation skill protocol before using browser tools.
|
||||
|
||||
## Hand off to a colony
|
||||
- start_incubating_colony(colony_name, intended_purpose) — Use this when \
|
||||
the user wants persistent / recurring / headless work that needs to \
|
||||
outlive THIS chat. It does NOT fork on its own; it spawns a one-shot \
|
||||
evaluator that reads this conversation and decides whether the spec \
|
||||
is settled enough to proceed. On approval your phase flips to \
|
||||
INCUBATING and a new tool surface (including create_colony itself) \
|
||||
unlocks. On rejection you stay here and keep the conversation going \
|
||||
to fill the gaps the evaluator named.
|
||||
- start_incubating_colony(colony_name) — Use this when the user wants \
|
||||
persistent / recurring / headless work that needs to outlive THIS \
|
||||
chat. It does NOT fork on its own; it spawns a one-shot evaluator \
|
||||
that reads this conversation and decides whether the spec is settled \
|
||||
enough to proceed. On approval your phase flips to INCUBATING and a \
|
||||
new tool surface (including create_colony itself) unlocks. On \
|
||||
rejection you stay here and keep the conversation going to fill the \
|
||||
gaps the evaluator named.
|
||||
"""
|
||||
|
||||
_queen_tools_incubating = """
|
||||
|
||||
@@ -69,15 +69,17 @@ logger = logging.getLogger(__name__)
|
||||
# Phrasing intentionally invites the queen's judgement; do NOT turn this
|
||||
# into a hard checklist.
|
||||
_INCUBATING_APPROVAL_GUIDANCE = (
|
||||
"Approved to incubate colony '{colony_name}' for: {intended_purpose}\n\n"
|
||||
"Approved to incubate colony '{colony_name}'.\n\n"
|
||||
"Your phase has flipped to INCUBATING. Before you call create_colony, "
|
||||
"the worker will need operational details that are easy to lose in a "
|
||||
"you'll need operational details that are easy to lose in a "
|
||||
"planning conversation. Take a moment to figure out what's still "
|
||||
"ambiguous for THIS colony — for example: how many tasks should run "
|
||||
"in parallel, what schedule fits (cron, interval, manual-only), what "
|
||||
"should the worker write into progress.db so the user can review "
|
||||
"results later, how to handle partial failures, what credentials or "
|
||||
"MCP servers the worker needs that you haven't discussed. You don't "
|
||||
"ambiguous for THIS colony — for example: how many worker processes "
|
||||
"should run in parallel (e.g. 1 for a digest, 5 for a fan-out), what "
|
||||
"schedule fits (cron, interval), what should the worker write into "
|
||||
"progress tracking(progress.db) so the user "
|
||||
"can review results later, how to handle partial failures, what "
|
||||
"credentials or MCP servers the worker needs that you haven't "
|
||||
"discussed. You don't "
|
||||
"need to cover every example — only the items that actually matter "
|
||||
"for this colony, and only the ones the user hasn't already implied. "
|
||||
"Use ask_user (batch several questions into one call when you have "
|
||||
@@ -151,13 +153,11 @@ class QueenPhaseState:
|
||||
prompt_working: str = ""
|
||||
prompt_reviewing: str = ""
|
||||
|
||||
# Last-set incubation context (colony_name + intended_purpose), populated
|
||||
# by start_incubating_colony when the evaluator approves. Read by
|
||||
# get_current_prompt() to interpolate the colony name into the
|
||||
# incubating role prompt so the queen sees the same name across turns
|
||||
# without having to remember it from the tool result.
|
||||
# Last-set incubation context, populated by start_incubating_colony when
|
||||
# the evaluator approves. Read by get_current_prompt() to interpolate the
|
||||
# colony name into the incubating role prompt so the queen sees the same
|
||||
# name across turns without having to remember it from the tool result.
|
||||
incubating_colony_name: str | None = None
|
||||
incubating_intended_purpose: str | None = None
|
||||
|
||||
# Default skill operational protocols — appended to every phase prompt
|
||||
protocols_prompt: str = ""
|
||||
@@ -421,7 +421,6 @@ class QueenPhaseState:
|
||||
self.phase = "independent"
|
||||
# Clear stale incubation context so a future incubation starts fresh.
|
||||
self.incubating_colony_name = None
|
||||
self.incubating_intended_purpose = None
|
||||
tool_names = [t.name for t in self.independent_tools]
|
||||
logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
|
||||
await self._emit_phase_event()
|
||||
@@ -436,30 +435,25 @@ class QueenPhaseState:
|
||||
self,
|
||||
*,
|
||||
colony_name: str,
|
||||
intended_purpose: str,
|
||||
source: str = "tool",
|
||||
) -> None:
|
||||
"""Switch to incubating phase — queen drafts the colony spec.
|
||||
|
||||
Caller must already have validated colony_name. Stores the active
|
||||
incubation context on self so get_current_prompt() can interpolate
|
||||
it on every turn (the queen otherwise loses the colony_name after
|
||||
the first tool result rolls past in the conversation history).
|
||||
colony_name on self so get_current_prompt() can interpolate it on
|
||||
every turn (the queen otherwise loses the colony_name after the
|
||||
first tool result rolls past in the conversation history).
|
||||
|
||||
Args:
|
||||
colony_name: Validated colony slug (lowercase alphanumeric + _).
|
||||
intended_purpose: One-paragraph brief from the queen.
|
||||
source: "tool", "frontend", or "auto".
|
||||
"""
|
||||
if self.phase == "incubating":
|
||||
# Allow re-statement of context even when already incubating —
|
||||
# the queen may have refined her intended_purpose mid-flight.
|
||||
# Allow re-statement even when already incubating.
|
||||
self.incubating_colony_name = colony_name
|
||||
self.incubating_intended_purpose = intended_purpose
|
||||
return
|
||||
self.phase = "incubating"
|
||||
self.incubating_colony_name = colony_name
|
||||
self.incubating_intended_purpose = intended_purpose
|
||||
tool_names = [t.name for t in self.incubating_tools]
|
||||
logger.info(
|
||||
"Queen phase → incubating (source=%s, colony=%s, tools: %s)",
|
||||
@@ -2211,7 +2205,6 @@ def register_queen_lifecycle_tools(
|
||||
async def start_incubating_colony(
|
||||
*,
|
||||
colony_name: str,
|
||||
intended_purpose: str,
|
||||
) -> str:
|
||||
"""Gate the queen behind a one-shot readiness evaluator.
|
||||
|
||||
@@ -2233,18 +2226,6 @@ def register_queen_lifecycle_tools(
|
||||
{"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'morning_hn_digest').")}
|
||||
)
|
||||
|
||||
purpose = (intended_purpose or "").strip()
|
||||
if not purpose:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": (
|
||||
"intended_purpose is required — describe in one "
|
||||
"paragraph what the colony will do, on what "
|
||||
"cadence, and why it must outlive this chat."
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
phase_state = getattr(session, "phase_state", None)
|
||||
if phase_state is None:
|
||||
return json.dumps({"error": "phase_state is not initialised on this session."})
|
||||
@@ -2305,7 +2286,6 @@ def register_queen_lifecycle_tools(
|
||||
llm=llm,
|
||||
messages=messages,
|
||||
colony_name=cn,
|
||||
intended_purpose=purpose,
|
||||
)
|
||||
|
||||
if not verdict.get("ready"):
|
||||
@@ -2323,10 +2303,9 @@ def register_queen_lifecycle_tools(
|
||||
|
||||
# Approved — flip phase. switch_to_incubating publishes
|
||||
# QUEEN_PHASE_CHANGED so the frontend badge updates and stores
|
||||
# the colony_name + purpose for the role prompt to interpolate.
|
||||
# the colony_name for the role prompt to interpolate.
|
||||
await phase_state.switch_to_incubating(
|
||||
colony_name=cn,
|
||||
intended_purpose=purpose,
|
||||
source="tool",
|
||||
)
|
||||
|
||||
@@ -2334,11 +2313,7 @@ def register_queen_lifecycle_tools(
|
||||
{
|
||||
"status": "incubating",
|
||||
"colony_name": cn,
|
||||
"intended_purpose": purpose,
|
||||
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(
|
||||
colony_name=cn,
|
||||
intended_purpose=purpose,
|
||||
),
|
||||
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(colony_name=cn),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -2378,17 +2353,8 @@ def register_queen_lifecycle_tools(
|
||||
"'inbox_monitor')."
|
||||
),
|
||||
},
|
||||
"intended_purpose": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"One-paragraph brief: what the colony will do, "
|
||||
"on what cadence, why it must outlive this "
|
||||
"chat. Do NOT write the SKILL.md here — that "
|
||||
"happens in INCUBATING phase after approval."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["colony_name", "intended_purpose"],
|
||||
"required": ["colony_name"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
|
||||
Reference in New Issue
Block a user