fix: incubating mode approval guidence injection
This commit is contained in:
@@ -1,9 +1,8 @@
|
|||||||
"""One-shot LLM gate that decides if a queen DM is ready to fork a colony.
|
"""One-shot LLM gate that decides if a queen DM is ready to fork a colony.
|
||||||
|
|
||||||
The queen's ``start_incubating_colony`` tool calls :func:`evaluate` with
|
The queen's ``start_incubating_colony`` tool calls :func:`evaluate` with
|
||||||
the queen's recent conversation, a proposed ``colony_name``, and a
|
the queen's recent conversation and a proposed ``colony_name``. The
|
||||||
one-paragraph ``intended_purpose``. The evaluator returns a structured
|
evaluator returns a structured verdict:
|
||||||
verdict:
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"ready": bool,
|
"ready": bool,
|
||||||
@@ -38,8 +37,8 @@ You gate whether a queen agent should commit to forking a persistent
|
|||||||
expensive: it ends the user's chat with this queen and the worker runs
|
expensive: it ends the user's chat with this queen and the worker runs
|
||||||
unattended afterward, so the spec must be settled before you approve.
|
unattended afterward, so the spec must be settled before you approve.
|
||||||
|
|
||||||
Read the conversation excerpt and the queen's proposed colony_name +
|
Read the conversation excerpt and the queen's proposed colony_name,
|
||||||
intended_purpose, then decide.
|
then decide.
|
||||||
|
|
||||||
APPROVE (ready=true) only when ALL of the following hold:
|
APPROVE (ready=true) only when ALL of the following hold:
|
||||||
1. The user has explicitly asked for work that needs to outlive this
|
1. The user has explicitly asked for work that needs to outlive this
|
||||||
@@ -128,11 +127,9 @@ def format_conversation_excerpt(messages: list[Message]) -> str:
|
|||||||
def _build_user_message(
|
def _build_user_message(
|
||||||
conversation_excerpt: str,
|
conversation_excerpt: str,
|
||||||
colony_name: str,
|
colony_name: str,
|
||||||
intended_purpose: str,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
return (
|
return (
|
||||||
f"## Proposed colony name\n{colony_name}\n\n"
|
f"## Proposed colony name\n{colony_name}\n\n"
|
||||||
f"## Queen's intended_purpose\n{intended_purpose.strip()}\n\n"
|
|
||||||
f"## Recent conversation (oldest → newest)\n{conversation_excerpt}\n\n"
|
f"## Recent conversation (oldest → newest)\n{conversation_excerpt}\n\n"
|
||||||
"Decide: should this queen be approved to enter INCUBATING phase?"
|
"Decide: should this queen be approved to enter INCUBATING phase?"
|
||||||
)
|
)
|
||||||
@@ -189,7 +186,6 @@ async def evaluate(
|
|||||||
llm: Any,
|
llm: Any,
|
||||||
messages: list[Message],
|
messages: list[Message],
|
||||||
colony_name: str,
|
colony_name: str,
|
||||||
intended_purpose: str,
|
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Run the incubating evaluator against the queen's conversation.
|
"""Run the incubating evaluator against the queen's conversation.
|
||||||
|
|
||||||
@@ -200,14 +196,13 @@ async def evaluate(
|
|||||||
messages: The queen's conversation messages, oldest first. The
|
messages: The queen's conversation messages, oldest first. The
|
||||||
evaluator slices its own tail; pass the full list.
|
evaluator slices its own tail; pass the full list.
|
||||||
colony_name: Validated colony slug.
|
colony_name: Validated colony slug.
|
||||||
intended_purpose: Queen's one-paragraph brief.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
``{"ready": bool, "reasons": [str], "missing_prerequisites": [str]}``.
|
``{"ready": bool, "reasons": [str], "missing_prerequisites": [str]}``.
|
||||||
Fail-closed on any error.
|
Fail-closed on any error.
|
||||||
"""
|
"""
|
||||||
excerpt = format_conversation_excerpt(messages)
|
excerpt = format_conversation_excerpt(messages)
|
||||||
user_msg = _build_user_message(excerpt, colony_name, intended_purpose)
|
user_msg = _build_user_message(excerpt, colony_name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await llm.acomplete(
|
response = await llm.acomplete(
|
||||||
|
|||||||
@@ -144,13 +144,12 @@ several entries when you have multiple clarifications. \
|
|||||||
When the user clearly wants persistent / recurring / headless work that \
|
When the user clearly wants persistent / recurring / headless work that \
|
||||||
needs to outlive THIS chat (e.g. "every morning", "monitor X and alert \
|
needs to outlive THIS chat (e.g. "every morning", "monitor X and alert \
|
||||||
me", "set up a job that…"), call ``start_incubating_colony`` with a \
|
me", "set up a job that…"), call ``start_incubating_colony`` with a \
|
||||||
proposed colony_name and a one-paragraph intended_purpose. A side \
|
proposed colony_name. A side evaluator reads the conversation and \
|
||||||
evaluator reads the conversation and decides if the spec is settled. If \
|
decides if the spec is settled. If it returns ``not_ready`` you keep \
|
||||||
it returns ``not_ready`` you keep talking with the user — sort out \
|
talking with the user — sort out whatever the evaluator said is \
|
||||||
whatever the evaluator said is missing, then retry. If it returns \
|
missing, then retry. If it returns ``incubating`` your phase flips and \
|
||||||
``incubating`` your phase flips and a new prompt takes over. Do not \
|
a new prompt takes over. Do not try to write SKILL.md, fork \
|
||||||
try to write SKILL.md, fork directories, or otherwise build the colony \
|
directories, or otherwise build the colony yourself in this phase.\
|
||||||
yourself in this phase.\
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_queen_role_incubating = """\
|
_queen_role_incubating = """\
|
||||||
@@ -260,14 +259,14 @@ search_files, run_command, undo_changes
|
|||||||
- MUST Follow the browser-automation skill protocol before using browser tools.
|
- MUST Follow the browser-automation skill protocol before using browser tools.
|
||||||
|
|
||||||
## Hand off to a colony
|
## Hand off to a colony
|
||||||
- start_incubating_colony(colony_name, intended_purpose) — Use this when \
|
- start_incubating_colony(colony_name) — Use this when the user wants \
|
||||||
the user wants persistent / recurring / headless work that needs to \
|
persistent / recurring / headless work that needs to outlive THIS \
|
||||||
outlive THIS chat. It does NOT fork on its own; it spawns a one-shot \
|
chat. It does NOT fork on its own; it spawns a one-shot evaluator \
|
||||||
evaluator that reads this conversation and decides whether the spec \
|
that reads this conversation and decides whether the spec is settled \
|
||||||
is settled enough to proceed. On approval your phase flips to \
|
enough to proceed. On approval your phase flips to INCUBATING and a \
|
||||||
INCUBATING and a new tool surface (including create_colony itself) \
|
new tool surface (including create_colony itself) unlocks. On \
|
||||||
unlocks. On rejection you stay here and keep the conversation going \
|
rejection you stay here and keep the conversation going to fill the \
|
||||||
to fill the gaps the evaluator named.
|
gaps the evaluator named.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_queen_tools_incubating = """
|
_queen_tools_incubating = """
|
||||||
|
|||||||
@@ -69,15 +69,17 @@ logger = logging.getLogger(__name__)
|
|||||||
# Phrasing intentionally invites the queen's judgement; do NOT turn this
|
# Phrasing intentionally invites the queen's judgement; do NOT turn this
|
||||||
# into a hard checklist.
|
# into a hard checklist.
|
||||||
_INCUBATING_APPROVAL_GUIDANCE = (
|
_INCUBATING_APPROVAL_GUIDANCE = (
|
||||||
"Approved to incubate colony '{colony_name}' for: {intended_purpose}\n\n"
|
"Approved to incubate colony '{colony_name}'.\n\n"
|
||||||
"Your phase has flipped to INCUBATING. Before you call create_colony, "
|
"Your phase has flipped to INCUBATING. Before you call create_colony, "
|
||||||
"the worker will need operational details that are easy to lose in a "
|
"you'll need operational details that are easy to lose in a "
|
||||||
"planning conversation. Take a moment to figure out what's still "
|
"planning conversation. Take a moment to figure out what's still "
|
||||||
"ambiguous for THIS colony — for example: how many tasks should run "
|
"ambiguous for THIS colony — for example: how many worker processes "
|
||||||
"in parallel, what schedule fits (cron, interval, manual-only), what "
|
"should run in parallel (e.g. 1 for a digest, 5 for a fan-out), what "
|
||||||
"should the worker write into progress.db so the user can review "
|
"schedule fits (cron, interval), what should the worker write into "
|
||||||
"results later, how to handle partial failures, what credentials or "
|
"progress tracking(progress.db) so the user "
|
||||||
"MCP servers the worker needs that you haven't discussed. You don't "
|
"can review results later, how to handle partial failures, what "
|
||||||
|
"credentials or MCP servers the worker needs that you haven't "
|
||||||
|
"discussed. You don't "
|
||||||
"need to cover every example — only the items that actually matter "
|
"need to cover every example — only the items that actually matter "
|
||||||
"for this colony, and only the ones the user hasn't already implied. "
|
"for this colony, and only the ones the user hasn't already implied. "
|
||||||
"Use ask_user (batch several questions into one call when you have "
|
"Use ask_user (batch several questions into one call when you have "
|
||||||
@@ -151,13 +153,11 @@ class QueenPhaseState:
|
|||||||
prompt_working: str = ""
|
prompt_working: str = ""
|
||||||
prompt_reviewing: str = ""
|
prompt_reviewing: str = ""
|
||||||
|
|
||||||
# Last-set incubation context (colony_name + intended_purpose), populated
|
# Last-set incubation context, populated by start_incubating_colony when
|
||||||
# by start_incubating_colony when the evaluator approves. Read by
|
# the evaluator approves. Read by get_current_prompt() to interpolate the
|
||||||
# get_current_prompt() to interpolate the colony name into the
|
# colony name into the incubating role prompt so the queen sees the same
|
||||||
# incubating role prompt so the queen sees the same name across turns
|
# name across turns without having to remember it from the tool result.
|
||||||
# without having to remember it from the tool result.
|
|
||||||
incubating_colony_name: str | None = None
|
incubating_colony_name: str | None = None
|
||||||
incubating_intended_purpose: str | None = None
|
|
||||||
|
|
||||||
# Default skill operational protocols — appended to every phase prompt
|
# Default skill operational protocols — appended to every phase prompt
|
||||||
protocols_prompt: str = ""
|
protocols_prompt: str = ""
|
||||||
@@ -421,7 +421,6 @@ class QueenPhaseState:
|
|||||||
self.phase = "independent"
|
self.phase = "independent"
|
||||||
# Clear stale incubation context so a future incubation starts fresh.
|
# Clear stale incubation context so a future incubation starts fresh.
|
||||||
self.incubating_colony_name = None
|
self.incubating_colony_name = None
|
||||||
self.incubating_intended_purpose = None
|
|
||||||
tool_names = [t.name for t in self.independent_tools]
|
tool_names = [t.name for t in self.independent_tools]
|
||||||
logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
|
logger.info("Queen phase → independent (source=%s, tools: %s)", source, tool_names)
|
||||||
await self._emit_phase_event()
|
await self._emit_phase_event()
|
||||||
@@ -436,30 +435,25 @@ class QueenPhaseState:
|
|||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
colony_name: str,
|
colony_name: str,
|
||||||
intended_purpose: str,
|
|
||||||
source: str = "tool",
|
source: str = "tool",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Switch to incubating phase — queen drafts the colony spec.
|
"""Switch to incubating phase — queen drafts the colony spec.
|
||||||
|
|
||||||
Caller must already have validated colony_name. Stores the active
|
Caller must already have validated colony_name. Stores the active
|
||||||
incubation context on self so get_current_prompt() can interpolate
|
colony_name on self so get_current_prompt() can interpolate it on
|
||||||
it on every turn (the queen otherwise loses the colony_name after
|
every turn (the queen otherwise loses the colony_name after the
|
||||||
the first tool result rolls past in the conversation history).
|
first tool result rolls past in the conversation history).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
colony_name: Validated colony slug (lowercase alphanumeric + _).
|
colony_name: Validated colony slug (lowercase alphanumeric + _).
|
||||||
intended_purpose: One-paragraph brief from the queen.
|
|
||||||
source: "tool", "frontend", or "auto".
|
source: "tool", "frontend", or "auto".
|
||||||
"""
|
"""
|
||||||
if self.phase == "incubating":
|
if self.phase == "incubating":
|
||||||
# Allow re-statement of context even when already incubating —
|
# Allow re-statement even when already incubating.
|
||||||
# the queen may have refined her intended_purpose mid-flight.
|
|
||||||
self.incubating_colony_name = colony_name
|
self.incubating_colony_name = colony_name
|
||||||
self.incubating_intended_purpose = intended_purpose
|
|
||||||
return
|
return
|
||||||
self.phase = "incubating"
|
self.phase = "incubating"
|
||||||
self.incubating_colony_name = colony_name
|
self.incubating_colony_name = colony_name
|
||||||
self.incubating_intended_purpose = intended_purpose
|
|
||||||
tool_names = [t.name for t in self.incubating_tools]
|
tool_names = [t.name for t in self.incubating_tools]
|
||||||
logger.info(
|
logger.info(
|
||||||
"Queen phase → incubating (source=%s, colony=%s, tools: %s)",
|
"Queen phase → incubating (source=%s, colony=%s, tools: %s)",
|
||||||
@@ -2211,7 +2205,6 @@ def register_queen_lifecycle_tools(
|
|||||||
async def start_incubating_colony(
|
async def start_incubating_colony(
|
||||||
*,
|
*,
|
||||||
colony_name: str,
|
colony_name: str,
|
||||||
intended_purpose: str,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Gate the queen behind a one-shot readiness evaluator.
|
"""Gate the queen behind a one-shot readiness evaluator.
|
||||||
|
|
||||||
@@ -2233,18 +2226,6 @@ def register_queen_lifecycle_tools(
|
|||||||
{"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'morning_hn_digest').")}
|
{"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'morning_hn_digest').")}
|
||||||
)
|
)
|
||||||
|
|
||||||
purpose = (intended_purpose or "").strip()
|
|
||||||
if not purpose:
|
|
||||||
return json.dumps(
|
|
||||||
{
|
|
||||||
"error": (
|
|
||||||
"intended_purpose is required — describe in one "
|
|
||||||
"paragraph what the colony will do, on what "
|
|
||||||
"cadence, and why it must outlive this chat."
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
phase_state = getattr(session, "phase_state", None)
|
phase_state = getattr(session, "phase_state", None)
|
||||||
if phase_state is None:
|
if phase_state is None:
|
||||||
return json.dumps({"error": "phase_state is not initialised on this session."})
|
return json.dumps({"error": "phase_state is not initialised on this session."})
|
||||||
@@ -2305,7 +2286,6 @@ def register_queen_lifecycle_tools(
|
|||||||
llm=llm,
|
llm=llm,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
colony_name=cn,
|
colony_name=cn,
|
||||||
intended_purpose=purpose,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not verdict.get("ready"):
|
if not verdict.get("ready"):
|
||||||
@@ -2323,10 +2303,9 @@ def register_queen_lifecycle_tools(
|
|||||||
|
|
||||||
# Approved — flip phase. switch_to_incubating publishes
|
# Approved — flip phase. switch_to_incubating publishes
|
||||||
# QUEEN_PHASE_CHANGED so the frontend badge updates and stores
|
# QUEEN_PHASE_CHANGED so the frontend badge updates and stores
|
||||||
# the colony_name + purpose for the role prompt to interpolate.
|
# the colony_name for the role prompt to interpolate.
|
||||||
await phase_state.switch_to_incubating(
|
await phase_state.switch_to_incubating(
|
||||||
colony_name=cn,
|
colony_name=cn,
|
||||||
intended_purpose=purpose,
|
|
||||||
source="tool",
|
source="tool",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -2334,11 +2313,7 @@ def register_queen_lifecycle_tools(
|
|||||||
{
|
{
|
||||||
"status": "incubating",
|
"status": "incubating",
|
||||||
"colony_name": cn,
|
"colony_name": cn,
|
||||||
"intended_purpose": purpose,
|
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(colony_name=cn),
|
||||||
"guidance": _INCUBATING_APPROVAL_GUIDANCE.format(
|
|
||||||
colony_name=cn,
|
|
||||||
intended_purpose=purpose,
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -2378,17 +2353,8 @@ def register_queen_lifecycle_tools(
|
|||||||
"'inbox_monitor')."
|
"'inbox_monitor')."
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"intended_purpose": {
|
|
||||||
"type": "string",
|
|
||||||
"description": (
|
|
||||||
"One-paragraph brief: what the colony will do, "
|
|
||||||
"on what cadence, why it must outlive this "
|
|
||||||
"chat. Do NOT write the SKILL.md here — that "
|
|
||||||
"happens in INCUBATING phase after approval."
|
|
||||||
),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
"required": ["colony_name", "intended_purpose"],
|
"required": ["colony_name"],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|||||||
Reference in New Issue
Block a user