diff --git a/core/framework/agent_loop/agent_loop.py b/core/framework/agent_loop/agent_loop.py index f9e932d6..feb0471f 100644 --- a/core/framework/agent_loop/agent_loop.py +++ b/core/framework/agent_loop/agent_loop.py @@ -71,6 +71,7 @@ from framework.agent_loop.internals.synthetic_tools import ( build_report_to_parent_tool, handle_report_to_parent, ) +from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input from framework.agent_loop.internals.tool_result_handler import ( build_json_preview, execute_tool, @@ -2835,7 +2836,17 @@ class AgentLoop(AgentProtocol): # nudge on its next turn without losing the real execution output. replay_prefixes_by_id: dict[str, str] = {} + # Schema-driven coercion of tool arguments. Heals the small + # handful of drift patterns that non-frontier models emit + # (numbers-as-strings, array-of-{label} wrappers, arrays + # sent as JSON strings, singleton scalars). Runs once per + # tool call before dispatch; see tool_input_coercer module. + _tool_by_name = {t.name: t for t in tools} + for tc in tool_calls: + _tool_schema = _tool_by_name.get(tc.tool_name) + if _tool_schema is not None: + coerce_tool_input(_tool_schema, tc.tool_input) tool_call_count += 1 if hard_limit > 0 and tool_call_count > hard_limit: limit_hit = True @@ -2893,11 +2904,15 @@ class AgentLoop(AgentProtocol): user_input_requested = False continue - # Normalize + self-heal each question entry. Some - # model families cram options inside the prompt as a - # pseudo-XML blob like "What now?\n - # _OPTIONS: [\"A\", \"B\"]". sanitize_ask_user_inputs - # strips the tag and recovers the inline options. + # Normalize + self-heal each question entry. The + # generic tool_input_coercer has already handled + # schema-shape drift (array-of-string options, JSON + # strings, etc.), so here we only deal with + # prompt-style drift: some model families cram + # options inside the prompt as a pseudo-XML blob + # like "What now?\n_OPTIONS: [\"A\", \"B\"]". + # sanitize_ask_user_inputs strips the tag and + # recovers the inline options as a fallback. questions: list[dict] = [] for i, q in enumerate(raw_questions): if not isinstance(q, dict): @@ -2906,21 +2921,12 @@ class AgentLoop(AgentProtocol): raw_prompt = q.get("prompt", q.get("question", "")) raw_opts = q.get("options", None) cleaned_prompt, recovered_opts = sanitize_ask_user_inputs(raw_prompt, raw_opts) - if recovered_opts is not None and raw_opts is None: - raw_opts = recovered_opts opts: list[str] | None = None - if isinstance(raw_opts, list): + if isinstance(raw_opts, list) and raw_opts: opts = [str(o) for o in raw_opts if o] - elif isinstance(raw_opts, str) and raw_opts.strip(): - # Defensive: smaller models sometimes send a - # JSON-encoded string instead of an array. - try: - parsed = json.loads(raw_opts) - if isinstance(parsed, list): - opts = [str(o) for o in parsed if o] - except (json.JSONDecodeError, TypeError): - pass + elif recovered_opts is not None: + opts = recovered_opts if opts is not None and len(opts) < 2: opts = None # fall back to free-text diff --git a/core/framework/agent_loop/internals/synthetic_tools.py b/core/framework/agent_loop/internals/synthetic_tools.py index 016cfe42..e41b2d9e 100644 --- a/core/framework/agent_loop/internals/synthetic_tools.py +++ b/core/framework/agent_loop/internals/synthetic_tools.py @@ -101,9 +101,9 @@ Use this tool when you need to ask the user questions during execution. Reach fo - You want post-task feedback, or to offer saving a skill or updating memory Usage notes: -- Users will always be able to select "Other" to provide custom text input -- Use multiSelect: true to allow multiple answers to be selected for a question -- If you recommend a specific option, make that the first option in the list and add "(Recommended)" at the end of the label +- Users will always be able to select "Other" to provide custom text input, so do not include catch-all options like "Other" or "Something else" yourself. +- Each option is a plain string. Do NOT wrap options in `{"label": "..."}` or `{"value": "..."}` objects — pass the raw choice text directly, e.g. `"Email"`, not `{"label": "Email"}`. +- If you recommend a specific option, make that the first option in the list and append " (Recommended)" to the end of its text. - Call this tool whenever you need the user's response. - The prompt field must be plain text only. - Do not include XML, pseudo-tags, or inline option lists inside prompt. @@ -159,8 +159,13 @@ def build_ask_user_tool() -> Tool: "type": "array", "items": {"type": "string"}, "description": ( - "2-3 predefined choices. The UI appends an " - "'Other' free-text input automatically. " + "2-3 predefined choices as plain strings " + '(e.g. ["Yes", "No", "Maybe"]). Do NOT ' + 'wrap items in {"label": "..."} or ' + '{"value": "..."} objects — pass the raw ' + "choice text directly. The UI appends an " + "'Other' free-text input automatically, " + "so don't include catch-all options. " "Omit only when the user must type a free-form answer." ), "minItems": 2, diff --git a/core/framework/agent_loop/internals/tool_input_coercer.py b/core/framework/agent_loop/internals/tool_input_coercer.py new file mode 100644 index 00000000..c153e163 --- /dev/null +++ b/core/framework/agent_loop/internals/tool_input_coercer.py @@ -0,0 +1,291 @@ +"""Generic coercion of LLM-emitted tool arguments to match each tool's JSON schema. + +Small/mid-size models drift from tool schemas in predictable, boring ways: + +- A number field comes back as a string (``"42"`` instead of ``42``). +- A boolean field comes back as a string (``"true"`` instead of ``True``). +- An array-of-string field comes back as an array of objects + (``[{"label": "A"}, ...]`` instead of ``["A", ...]``). +- An array/object field comes back as a JSON-encoded string + (``'["A","B"]'`` instead of ``["A", "B"]``). +- A lone scalar arrives where the schema expects an array. + +This module centralizes the healing in one schema-driven pass that runs +on every tool call before dispatch. Coercion is conservative: + +- Values that already match the expected type are untouched. +- Shapes we don't recognize are returned as-is, so real bugs surface + instead of getting silently munged into something plausible. +- Every actual coercion is logged with the tool, property, and shape + transition so we can see which models/tools are drifting. + +Tool-specific prompt drift (e.g. ```` tags leaking into an +``ask_user`` prompt string) is NOT this module's job — that belongs in +per-tool sanitizers, because it's about prompt style, not schema shape. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any + +from framework.llm.provider import Tool + +logger = logging.getLogger(__name__) + +# When an ``array`` field arrives as an array of objects, look +# for a text-carrying field in preference order. Covers the wrappers +# small models tend to produce: ``[{"label": "A"}]``, ``[{"value": "A"}]``, +# ``[{"text": "A"}]``, etc. +_STRING_EXTRACT_KEYS: tuple[str, ...] = ( + "label", + "value", + "text", + "name", + "title", + "display", +) + + +def coerce_tool_input(tool: Tool, raw_input: dict[str, Any] | None) -> dict[str, Any]: + """Coerce *raw_input* in place to match *tool*'s JSON schema. + + Returns the mutated input dict (same object as *raw_input* when + possible, for callers that assume in-place mutation). Properties + not present in the schema are left untouched. + """ + if not isinstance(raw_input, dict): + return raw_input or {} + + schema = tool.parameters or {} + props = schema.get("properties") + if not isinstance(props, dict): + return raw_input + + for key in list(raw_input.keys()): + prop_schema = props.get(key) + if not isinstance(prop_schema, dict): + continue + original = raw_input[key] + coerced = _coerce(original, prop_schema) + if coerced is not original: + logger.info( + "coerced tool input tool=%s prop=%s from=%s to=%s", + tool.name, + key, + _shape(original), + _shape(coerced), + ) + raw_input[key] = coerced + + return raw_input + + +def _coerce(value: Any, schema: dict[str, Any]) -> Any: + """Dispatch on the schema's ``type`` field. + + Returns the *same object* on passthrough so callers can detect + no-ops via identity (``coerced is value``). + """ + expected = schema.get("type") + if not expected: + return value + + # Union type: try each in order, return the first coercion that + # actually changes the value. Falls back to the original. + if isinstance(expected, list): + for t in expected: + sub_schema = {**schema, "type": t} + coerced = _coerce(value, sub_schema) + if coerced is not value: + return coerced + return value + + if expected == "integer": + return _coerce_integer(value) + if expected == "number": + return _coerce_number(value) + if expected == "boolean": + return _coerce_boolean(value) + if expected == "string": + return _coerce_string(value) + if expected == "array": + return _coerce_array(value, schema) + if expected == "object": + return _coerce_object(value, schema) + + return value + + +def _coerce_integer(value: Any) -> Any: + # bool is a subclass of int in Python; don't mistake True for 1 here. + if isinstance(value, bool): + return value + if isinstance(value, int): + return value + if isinstance(value, str): + parsed = _parse_number(value) + if parsed is None: + return value + if parsed != int(parsed): + # Has a fractional part — caller asked for int, don't truncate. + return value + return int(parsed) + return value + + +def _coerce_number(value: Any) -> Any: + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value + if isinstance(value, str): + parsed = _parse_number(value) + if parsed is None: + return value + if parsed == int(parsed): + return int(parsed) + return parsed + return value + + +def _coerce_boolean(value: Any) -> Any: + if isinstance(value, bool): + return value + if isinstance(value, str): + low = value.strip().lower() + if low == "true": + return True + if low == "false": + return False + return value + + +def _coerce_string(value: Any) -> Any: + if isinstance(value, str): + return value + # Common drift: model sent ``{"label": "..."}`` when we wanted "...". + if isinstance(value, dict): + extracted = _extract_string_from_object(value) + if extracted is not None: + return extracted + return value + + +def _coerce_array(value: Any, schema: dict[str, Any]) -> Any: + # Heal: JSON-encoded array string → array. + if isinstance(value, str): + parsed = _try_parse_json(value) + if isinstance(parsed, list): + value = parsed + else: + # Scalar string where an array is expected — wrap it. + return [value] + elif not isinstance(value, list): + # Any other scalar (int, bool, dict, ...) — wrap. + return [value] + + items_schema = schema.get("items") + if not isinstance(items_schema, dict): + return value + + coerced_items: list[Any] = [] + changed = False + for item in value: + c = _coerce(item, items_schema) + if c is not item: + changed = True + coerced_items.append(c) + return coerced_items if changed else value + + +def _coerce_object(value: Any, schema: dict[str, Any]) -> Any: + # Heal: JSON-encoded object string → object. + if isinstance(value, str): + parsed = _try_parse_json(value) + if isinstance(parsed, dict): + value = parsed + else: + return value + if not isinstance(value, dict): + return value + + sub_props = schema.get("properties") + if not isinstance(sub_props, dict): + return value + + changed = False + for k in list(value.keys()): + sub_schema = sub_props.get(k) + if not isinstance(sub_schema, dict): + continue + original = value[k] + coerced = _coerce(original, sub_schema) + if coerced is not original: + value[k] = coerced + changed = True + # Return the same dict on mutation so callers that passed a shared + # reference see the updates. ``changed`` is only used to decide + # whether we need to log at a coarser level upstream. + return value if changed or not sub_props else value + + +def _extract_string_from_object(obj: dict[str, Any]) -> str | None: + """Pick a likely-text field out of a wrapper object. + + Tries the known keys first, falls back to the sole value if the + object has exactly one entry. Returns None when nothing plausible + is found — the caller keeps the original. + """ + for k in _STRING_EXTRACT_KEYS: + v = obj.get(k) + if isinstance(v, str) and v: + return v + if len(obj) == 1: + (only,) = obj.values() + if isinstance(only, str) and only: + return only + return None + + +def _try_parse_json(raw: str) -> Any: + try: + return json.loads(raw) + except (ValueError, TypeError): + return None + + +def _parse_number(raw: str) -> float | None: + try: + f = float(raw) + except (ValueError, OverflowError): + return None + # Reject NaN and inf — they pass float() but aren't useful numeric + # values for tool arguments. + if f != f or f == float("inf") or f == float("-inf"): + return None + return f + + +def _shape(value: Any) -> str: + """Short type/shape description used in coercion log lines.""" + if value is None: + return "None" + if isinstance(value, bool): + return "bool" + if isinstance(value, int): + return "int" + if isinstance(value, float): + return "float" + if isinstance(value, str): + return f"str[{len(value)}]" + if isinstance(value, list): + if not value: + return "list[0]" + return f"list[{len(value)}]<{_shape(value[0])}>" + if isinstance(value, dict): + keys = sorted(value.keys())[:3] + suffix = ",…" if len(value) > 3 else "" + return f"dict{{{','.join(keys)}{suffix}}}" + return type(value).__name__ diff --git a/core/frontend/src/pages/queen-dm.tsx b/core/frontend/src/pages/queen-dm.tsx index d56c5b2c..50864ed7 100644 --- a/core/frontend/src/pages/queen-dm.tsx +++ b/core/frontend/src/pages/queen-dm.tsx @@ -55,6 +55,12 @@ export default function QueenDM() { const [cloneDialogOpen, setCloneDialogOpen] = useState(false); const [cloneColonyName, setCloneColonyName] = useState(""); const [cloneTask, setCloneTask] = useState(""); + const [cloneOutputs, setCloneOutputs] = useState(""); + const [cloneDataSources, setCloneDataSources] = useState(""); + const [cloneSchedule, setCloneSchedule] = useState(""); + const [cloneConcurrency, setCloneConcurrency] = useState(""); + const [showCloneSchedule, setShowCloneSchedule] = useState(false); + const [showCloneConcurrency, setShowCloneConcurrency] = useState(false); // Colony-spawned lock state. Once a colony has been spawned from this DM // and the user clicked into it, /chat is rejected server-side and the // composer is replaced with a "compact + new session" button. Hydrated @@ -652,14 +658,53 @@ export default function QueenDM() { const colony = cloneColonyName.trim(); if (!colony) return; const task = cloneTask.trim(); - const message = task - ? `Create a colony named \`${colony}\` for the following task:\n\n${task}` - : `Create a colony named \`${colony}\` from this session.`; + + const briefLines = [ + `Colony name: ${colony}`, + `Purpose: ${task || "Use the current conversation to propose the colony's purpose."}`, + ]; + if (cloneOutputs.trim()) { + briefLines.push(`Expected outputs: ${cloneOutputs.trim()}`); + } + if (cloneDataSources.trim()) { + briefLines.push(`Inputs, data sources, tools, or credentials: ${cloneDataSources.trim()}`); + } + if (showCloneSchedule && cloneSchedule.trim()) { + briefLines.push(`Schedule/triggers: ${cloneSchedule.trim()}`); + } + if (showCloneConcurrency && cloneConcurrency.trim()) { + briefLines.push(`Concurrency: ${cloneConcurrency.trim()}`); + } + + const message = [ + "I want to set up a persistent colony.", + "", + briefLines.join("\n"), + "", + "Please use start_incubating_colony if this is appropriate. Ask me for any missing details before calling create_colony, then generate the self-contained task, skill name, skill description, skill body, and any optional triggers or concurrency hint needed by the colony.", + ].join("\n"); + handleSend(message, "queen-dm"); setCloneDialogOpen(false); setCloneColonyName(""); setCloneTask(""); - }, [cloneColonyName, cloneTask, handleSend]); + setCloneOutputs(""); + setCloneDataSources(""); + setCloneSchedule(""); + setCloneConcurrency(""); + setShowCloneSchedule(false); + setShowCloneConcurrency(false); + }, [ + cloneColonyName, + cloneConcurrency, + cloneDataSources, + cloneOutputs, + cloneSchedule, + cloneTask, + handleSend, + showCloneConcurrency, + showCloneSchedule, + ]); const handleQuestionAnswer = useCallback( (answers: Record) => { @@ -747,7 +792,7 @@ export default function QueenDM() { className="flex items-center gap-1 px-2.5 py-1 rounded-md text-[11px] font-medium text-primary hover:bg-primary/10 transition-colors disabled:opacity-40" > - Create a Colony + Start Colony Setup } /> @@ -759,13 +804,13 @@ export default function QueenDM() { className="absolute inset-0 bg-black/40 backdrop-blur-sm" onClick={() => setCloneDialogOpen(false)} /> -
+

- Create a Colony + Set Up a Colony

- Create a new colony from this queen's session. The colony inherits - the queen's tools, context, and conversation history. + Share the brief. The queen will fill gaps, write the worker skill, + and create the colony when the setup is ready.

@@ -787,17 +832,94 @@ export default function QueenDM() {
- setCloneTask(e.target.value)} - placeholder="Continue the work from the queen's session" - className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary" + placeholder="Monitor launches, process a backlog, prepare a report, or continue this session's work." + rows={3} + className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary" />
+
+ +