feat(tool call): add format _coerce before execution
This commit is contained in:
@@ -71,6 +71,7 @@ from framework.agent_loop.internals.synthetic_tools import (
|
|||||||
build_report_to_parent_tool,
|
build_report_to_parent_tool,
|
||||||
handle_report_to_parent,
|
handle_report_to_parent,
|
||||||
)
|
)
|
||||||
|
from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
|
||||||
from framework.agent_loop.internals.tool_result_handler import (
|
from framework.agent_loop.internals.tool_result_handler import (
|
||||||
build_json_preview,
|
build_json_preview,
|
||||||
execute_tool,
|
execute_tool,
|
||||||
@@ -2835,7 +2836,17 @@ class AgentLoop(AgentProtocol):
|
|||||||
# nudge on its next turn without losing the real execution output.
|
# nudge on its next turn without losing the real execution output.
|
||||||
replay_prefixes_by_id: dict[str, str] = {}
|
replay_prefixes_by_id: dict[str, str] = {}
|
||||||
|
|
||||||
|
# Schema-driven coercion of tool arguments. Heals the small
|
||||||
|
# handful of drift patterns that non-frontier models emit
|
||||||
|
# (numbers-as-strings, array-of-{label} wrappers, arrays
|
||||||
|
# sent as JSON strings, singleton scalars). Runs once per
|
||||||
|
# tool call before dispatch; see tool_input_coercer module.
|
||||||
|
_tool_by_name = {t.name: t for t in tools}
|
||||||
|
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
|
_tool_schema = _tool_by_name.get(tc.tool_name)
|
||||||
|
if _tool_schema is not None:
|
||||||
|
coerce_tool_input(_tool_schema, tc.tool_input)
|
||||||
tool_call_count += 1
|
tool_call_count += 1
|
||||||
if hard_limit > 0 and tool_call_count > hard_limit:
|
if hard_limit > 0 and tool_call_count > hard_limit:
|
||||||
limit_hit = True
|
limit_hit = True
|
||||||
@@ -2893,11 +2904,15 @@ class AgentLoop(AgentProtocol):
|
|||||||
user_input_requested = False
|
user_input_requested = False
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Normalize + self-heal each question entry. Some
|
# Normalize + self-heal each question entry. The
|
||||||
# model families cram options inside the prompt as a
|
# generic tool_input_coercer has already handled
|
||||||
# pseudo-XML blob like "What now?</question>\n
|
# schema-shape drift (array-of-string options, JSON
|
||||||
# _OPTIONS: [\"A\", \"B\"]". sanitize_ask_user_inputs
|
# strings, etc.), so here we only deal with
|
||||||
# strips the tag and recovers the inline options.
|
# prompt-style drift: some model families cram
|
||||||
|
# options inside the prompt as a pseudo-XML blob
|
||||||
|
# like "What now?</question>\n_OPTIONS: [\"A\", \"B\"]".
|
||||||
|
# sanitize_ask_user_inputs strips the tag and
|
||||||
|
# recovers the inline options as a fallback.
|
||||||
questions: list[dict] = []
|
questions: list[dict] = []
|
||||||
for i, q in enumerate(raw_questions):
|
for i, q in enumerate(raw_questions):
|
||||||
if not isinstance(q, dict):
|
if not isinstance(q, dict):
|
||||||
@@ -2906,21 +2921,12 @@ class AgentLoop(AgentProtocol):
|
|||||||
raw_prompt = q.get("prompt", q.get("question", ""))
|
raw_prompt = q.get("prompt", q.get("question", ""))
|
||||||
raw_opts = q.get("options", None)
|
raw_opts = q.get("options", None)
|
||||||
cleaned_prompt, recovered_opts = sanitize_ask_user_inputs(raw_prompt, raw_opts)
|
cleaned_prompt, recovered_opts = sanitize_ask_user_inputs(raw_prompt, raw_opts)
|
||||||
if recovered_opts is not None and raw_opts is None:
|
|
||||||
raw_opts = recovered_opts
|
|
||||||
|
|
||||||
opts: list[str] | None = None
|
opts: list[str] | None = None
|
||||||
if isinstance(raw_opts, list):
|
if isinstance(raw_opts, list) and raw_opts:
|
||||||
opts = [str(o) for o in raw_opts if o]
|
opts = [str(o) for o in raw_opts if o]
|
||||||
elif isinstance(raw_opts, str) and raw_opts.strip():
|
elif recovered_opts is not None:
|
||||||
# Defensive: smaller models sometimes send a
|
opts = recovered_opts
|
||||||
# JSON-encoded string instead of an array.
|
|
||||||
try:
|
|
||||||
parsed = json.loads(raw_opts)
|
|
||||||
if isinstance(parsed, list):
|
|
||||||
opts = [str(o) for o in parsed if o]
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
pass
|
|
||||||
if opts is not None and len(opts) < 2:
|
if opts is not None and len(opts) < 2:
|
||||||
opts = None # fall back to free-text
|
opts = None # fall back to free-text
|
||||||
|
|
||||||
|
|||||||
@@ -101,9 +101,9 @@ Use this tool when you need to ask the user questions during execution. Reach fo
|
|||||||
- You want post-task feedback, or to offer saving a skill or updating memory
|
- You want post-task feedback, or to offer saving a skill or updating memory
|
||||||
|
|
||||||
Usage notes:
|
Usage notes:
|
||||||
- Users will always be able to select "Other" to provide custom text input
|
- Users will always be able to select "Other" to provide custom text input, so do not include catch-all options like "Other" or "Something else" yourself.
|
||||||
- Use multiSelect: true to allow multiple answers to be selected for a question
|
- Each option is a plain string. Do NOT wrap options in `{"label": "..."}` or `{"value": "..."}` objects — pass the raw choice text directly, e.g. `"Email"`, not `{"label": "Email"}`.
|
||||||
- If you recommend a specific option, make that the first option in the list and add "(Recommended)" at the end of the label
|
- If you recommend a specific option, make that the first option in the list and append " (Recommended)" to the end of its text.
|
||||||
- Call this tool whenever you need the user's response.
|
- Call this tool whenever you need the user's response.
|
||||||
- The prompt field must be plain text only.
|
- The prompt field must be plain text only.
|
||||||
- Do not include XML, pseudo-tags, or inline option lists inside prompt.
|
- Do not include XML, pseudo-tags, or inline option lists inside prompt.
|
||||||
@@ -159,8 +159,13 @@ def build_ask_user_tool() -> Tool:
|
|||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"description": (
|
"description": (
|
||||||
"2-3 predefined choices. The UI appends an "
|
"2-3 predefined choices as plain strings "
|
||||||
"'Other' free-text input automatically. "
|
'(e.g. ["Yes", "No", "Maybe"]). Do NOT '
|
||||||
|
'wrap items in {"label": "..."} or '
|
||||||
|
'{"value": "..."} objects — pass the raw '
|
||||||
|
"choice text directly. The UI appends an "
|
||||||
|
"'Other' free-text input automatically, "
|
||||||
|
"so don't include catch-all options. "
|
||||||
"Omit only when the user must type a free-form answer."
|
"Omit only when the user must type a free-form answer."
|
||||||
),
|
),
|
||||||
"minItems": 2,
|
"minItems": 2,
|
||||||
|
|||||||
@@ -0,0 +1,291 @@
|
|||||||
|
"""Generic coercion of LLM-emitted tool arguments to match each tool's JSON schema.
|
||||||
|
|
||||||
|
Small/mid-size models drift from tool schemas in predictable, boring ways:
|
||||||
|
|
||||||
|
- A number field comes back as a string (``"42"`` instead of ``42``).
|
||||||
|
- A boolean field comes back as a string (``"true"`` instead of ``True``).
|
||||||
|
- An array-of-string field comes back as an array of objects
|
||||||
|
(``[{"label": "A"}, ...]`` instead of ``["A", ...]``).
|
||||||
|
- An array/object field comes back as a JSON-encoded string
|
||||||
|
(``'["A","B"]'`` instead of ``["A", "B"]``).
|
||||||
|
- A lone scalar arrives where the schema expects an array.
|
||||||
|
|
||||||
|
This module centralizes the healing in one schema-driven pass that runs
|
||||||
|
on every tool call before dispatch. Coercion is conservative:
|
||||||
|
|
||||||
|
- Values that already match the expected type are untouched.
|
||||||
|
- Shapes we don't recognize are returned as-is, so real bugs surface
|
||||||
|
instead of getting silently munged into something plausible.
|
||||||
|
- Every actual coercion is logged with the tool, property, and shape
|
||||||
|
transition so we can see which models/tools are drifting.
|
||||||
|
|
||||||
|
Tool-specific prompt drift (e.g. ``</question>`` tags leaking into an
|
||||||
|
``ask_user`` prompt string) is NOT this module's job — that belongs in
|
||||||
|
per-tool sanitizers, because it's about prompt style, not schema shape.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.llm.provider import Tool
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# When an ``array<string>`` field arrives as an array of objects, look
|
||||||
|
# for a text-carrying field in preference order. Covers the wrappers
|
||||||
|
# small models tend to produce: ``[{"label": "A"}]``, ``[{"value": "A"}]``,
|
||||||
|
# ``[{"text": "A"}]``, etc.
|
||||||
|
_STRING_EXTRACT_KEYS: tuple[str, ...] = (
|
||||||
|
"label",
|
||||||
|
"value",
|
||||||
|
"text",
|
||||||
|
"name",
|
||||||
|
"title",
|
||||||
|
"display",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def coerce_tool_input(tool: Tool, raw_input: dict[str, Any] | None) -> dict[str, Any]:
|
||||||
|
"""Coerce *raw_input* in place to match *tool*'s JSON schema.
|
||||||
|
|
||||||
|
Returns the mutated input dict (same object as *raw_input* when
|
||||||
|
possible, for callers that assume in-place mutation). Properties
|
||||||
|
not present in the schema are left untouched.
|
||||||
|
"""
|
||||||
|
if not isinstance(raw_input, dict):
|
||||||
|
return raw_input or {}
|
||||||
|
|
||||||
|
schema = tool.parameters or {}
|
||||||
|
props = schema.get("properties")
|
||||||
|
if not isinstance(props, dict):
|
||||||
|
return raw_input
|
||||||
|
|
||||||
|
for key in list(raw_input.keys()):
|
||||||
|
prop_schema = props.get(key)
|
||||||
|
if not isinstance(prop_schema, dict):
|
||||||
|
continue
|
||||||
|
original = raw_input[key]
|
||||||
|
coerced = _coerce(original, prop_schema)
|
||||||
|
if coerced is not original:
|
||||||
|
logger.info(
|
||||||
|
"coerced tool input tool=%s prop=%s from=%s to=%s",
|
||||||
|
tool.name,
|
||||||
|
key,
|
||||||
|
_shape(original),
|
||||||
|
_shape(coerced),
|
||||||
|
)
|
||||||
|
raw_input[key] = coerced
|
||||||
|
|
||||||
|
return raw_input
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce(value: Any, schema: dict[str, Any]) -> Any:
|
||||||
|
"""Dispatch on the schema's ``type`` field.
|
||||||
|
|
||||||
|
Returns the *same object* on passthrough so callers can detect
|
||||||
|
no-ops via identity (``coerced is value``).
|
||||||
|
"""
|
||||||
|
expected = schema.get("type")
|
||||||
|
if not expected:
|
||||||
|
return value
|
||||||
|
|
||||||
|
# Union type: try each in order, return the first coercion that
|
||||||
|
# actually changes the value. Falls back to the original.
|
||||||
|
if isinstance(expected, list):
|
||||||
|
for t in expected:
|
||||||
|
sub_schema = {**schema, "type": t}
|
||||||
|
coerced = _coerce(value, sub_schema)
|
||||||
|
if coerced is not value:
|
||||||
|
return coerced
|
||||||
|
return value
|
||||||
|
|
||||||
|
if expected == "integer":
|
||||||
|
return _coerce_integer(value)
|
||||||
|
if expected == "number":
|
||||||
|
return _coerce_number(value)
|
||||||
|
if expected == "boolean":
|
||||||
|
return _coerce_boolean(value)
|
||||||
|
if expected == "string":
|
||||||
|
return _coerce_string(value)
|
||||||
|
if expected == "array":
|
||||||
|
return _coerce_array(value, schema)
|
||||||
|
if expected == "object":
|
||||||
|
return _coerce_object(value, schema)
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_integer(value: Any) -> Any:
|
||||||
|
# bool is a subclass of int in Python; don't mistake True for 1 here.
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
parsed = _parse_number(value)
|
||||||
|
if parsed is None:
|
||||||
|
return value
|
||||||
|
if parsed != int(parsed):
|
||||||
|
# Has a fractional part — caller asked for int, don't truncate.
|
||||||
|
return value
|
||||||
|
return int(parsed)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_number(value: Any) -> Any:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
parsed = _parse_number(value)
|
||||||
|
if parsed is None:
|
||||||
|
return value
|
||||||
|
if parsed == int(parsed):
|
||||||
|
return int(parsed)
|
||||||
|
return parsed
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_boolean(value: Any) -> Any:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
low = value.strip().lower()
|
||||||
|
if low == "true":
|
||||||
|
return True
|
||||||
|
if low == "false":
|
||||||
|
return False
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_string(value: Any) -> Any:
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value
|
||||||
|
# Common drift: model sent ``{"label": "..."}`` when we wanted "...".
|
||||||
|
if isinstance(value, dict):
|
||||||
|
extracted = _extract_string_from_object(value)
|
||||||
|
if extracted is not None:
|
||||||
|
return extracted
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_array(value: Any, schema: dict[str, Any]) -> Any:
|
||||||
|
# Heal: JSON-encoded array string → array.
|
||||||
|
if isinstance(value, str):
|
||||||
|
parsed = _try_parse_json(value)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
value = parsed
|
||||||
|
else:
|
||||||
|
# Scalar string where an array is expected — wrap it.
|
||||||
|
return [value]
|
||||||
|
elif not isinstance(value, list):
|
||||||
|
# Any other scalar (int, bool, dict, ...) — wrap.
|
||||||
|
return [value]
|
||||||
|
|
||||||
|
items_schema = schema.get("items")
|
||||||
|
if not isinstance(items_schema, dict):
|
||||||
|
return value
|
||||||
|
|
||||||
|
coerced_items: list[Any] = []
|
||||||
|
changed = False
|
||||||
|
for item in value:
|
||||||
|
c = _coerce(item, items_schema)
|
||||||
|
if c is not item:
|
||||||
|
changed = True
|
||||||
|
coerced_items.append(c)
|
||||||
|
return coerced_items if changed else value
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_object(value: Any, schema: dict[str, Any]) -> Any:
|
||||||
|
# Heal: JSON-encoded object string → object.
|
||||||
|
if isinstance(value, str):
|
||||||
|
parsed = _try_parse_json(value)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
value = parsed
|
||||||
|
else:
|
||||||
|
return value
|
||||||
|
if not isinstance(value, dict):
|
||||||
|
return value
|
||||||
|
|
||||||
|
sub_props = schema.get("properties")
|
||||||
|
if not isinstance(sub_props, dict):
|
||||||
|
return value
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
for k in list(value.keys()):
|
||||||
|
sub_schema = sub_props.get(k)
|
||||||
|
if not isinstance(sub_schema, dict):
|
||||||
|
continue
|
||||||
|
original = value[k]
|
||||||
|
coerced = _coerce(original, sub_schema)
|
||||||
|
if coerced is not original:
|
||||||
|
value[k] = coerced
|
||||||
|
changed = True
|
||||||
|
# Return the same dict on mutation so callers that passed a shared
|
||||||
|
# reference see the updates. ``changed`` is only used to decide
|
||||||
|
# whether we need to log at a coarser level upstream.
|
||||||
|
return value if changed or not sub_props else value
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_string_from_object(obj: dict[str, Any]) -> str | None:
|
||||||
|
"""Pick a likely-text field out of a wrapper object.
|
||||||
|
|
||||||
|
Tries the known keys first, falls back to the sole value if the
|
||||||
|
object has exactly one entry. Returns None when nothing plausible
|
||||||
|
is found — the caller keeps the original.
|
||||||
|
"""
|
||||||
|
for k in _STRING_EXTRACT_KEYS:
|
||||||
|
v = obj.get(k)
|
||||||
|
if isinstance(v, str) and v:
|
||||||
|
return v
|
||||||
|
if len(obj) == 1:
|
||||||
|
(only,) = obj.values()
|
||||||
|
if isinstance(only, str) and only:
|
||||||
|
return only
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _try_parse_json(raw: str) -> Any:
|
||||||
|
try:
|
||||||
|
return json.loads(raw)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_number(raw: str) -> float | None:
|
||||||
|
try:
|
||||||
|
f = float(raw)
|
||||||
|
except (ValueError, OverflowError):
|
||||||
|
return None
|
||||||
|
# Reject NaN and inf — they pass float() but aren't useful numeric
|
||||||
|
# values for tool arguments.
|
||||||
|
if f != f or f == float("inf") or f == float("-inf"):
|
||||||
|
return None
|
||||||
|
return f
|
||||||
|
|
||||||
|
|
||||||
|
def _shape(value: Any) -> str:
|
||||||
|
"""Short type/shape description used in coercion log lines."""
|
||||||
|
if value is None:
|
||||||
|
return "None"
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return "bool"
|
||||||
|
if isinstance(value, int):
|
||||||
|
return "int"
|
||||||
|
if isinstance(value, float):
|
||||||
|
return "float"
|
||||||
|
if isinstance(value, str):
|
||||||
|
return f"str[{len(value)}]"
|
||||||
|
if isinstance(value, list):
|
||||||
|
if not value:
|
||||||
|
return "list[0]"
|
||||||
|
return f"list[{len(value)}]<{_shape(value[0])}>"
|
||||||
|
if isinstance(value, dict):
|
||||||
|
keys = sorted(value.keys())[:3]
|
||||||
|
suffix = ",…" if len(value) > 3 else ""
|
||||||
|
return f"dict{{{','.join(keys)}{suffix}}}"
|
||||||
|
return type(value).__name__
|
||||||
@@ -55,6 +55,12 @@ export default function QueenDM() {
|
|||||||
const [cloneDialogOpen, setCloneDialogOpen] = useState(false);
|
const [cloneDialogOpen, setCloneDialogOpen] = useState(false);
|
||||||
const [cloneColonyName, setCloneColonyName] = useState("");
|
const [cloneColonyName, setCloneColonyName] = useState("");
|
||||||
const [cloneTask, setCloneTask] = useState("");
|
const [cloneTask, setCloneTask] = useState("");
|
||||||
|
const [cloneOutputs, setCloneOutputs] = useState("");
|
||||||
|
const [cloneDataSources, setCloneDataSources] = useState("");
|
||||||
|
const [cloneSchedule, setCloneSchedule] = useState("");
|
||||||
|
const [cloneConcurrency, setCloneConcurrency] = useState("");
|
||||||
|
const [showCloneSchedule, setShowCloneSchedule] = useState(false);
|
||||||
|
const [showCloneConcurrency, setShowCloneConcurrency] = useState(false);
|
||||||
// Colony-spawned lock state. Once a colony has been spawned from this DM
|
// Colony-spawned lock state. Once a colony has been spawned from this DM
|
||||||
// and the user clicked into it, /chat is rejected server-side and the
|
// and the user clicked into it, /chat is rejected server-side and the
|
||||||
// composer is replaced with a "compact + new session" button. Hydrated
|
// composer is replaced with a "compact + new session" button. Hydrated
|
||||||
@@ -652,14 +658,53 @@ export default function QueenDM() {
|
|||||||
const colony = cloneColonyName.trim();
|
const colony = cloneColonyName.trim();
|
||||||
if (!colony) return;
|
if (!colony) return;
|
||||||
const task = cloneTask.trim();
|
const task = cloneTask.trim();
|
||||||
const message = task
|
|
||||||
? `Create a colony named \`${colony}\` for the following task:\n\n${task}`
|
const briefLines = [
|
||||||
: `Create a colony named \`${colony}\` from this session.`;
|
`Colony name: ${colony}`,
|
||||||
|
`Purpose: ${task || "Use the current conversation to propose the colony's purpose."}`,
|
||||||
|
];
|
||||||
|
if (cloneOutputs.trim()) {
|
||||||
|
briefLines.push(`Expected outputs: ${cloneOutputs.trim()}`);
|
||||||
|
}
|
||||||
|
if (cloneDataSources.trim()) {
|
||||||
|
briefLines.push(`Inputs, data sources, tools, or credentials: ${cloneDataSources.trim()}`);
|
||||||
|
}
|
||||||
|
if (showCloneSchedule && cloneSchedule.trim()) {
|
||||||
|
briefLines.push(`Schedule/triggers: ${cloneSchedule.trim()}`);
|
||||||
|
}
|
||||||
|
if (showCloneConcurrency && cloneConcurrency.trim()) {
|
||||||
|
briefLines.push(`Concurrency: ${cloneConcurrency.trim()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const message = [
|
||||||
|
"I want to set up a persistent colony.",
|
||||||
|
"",
|
||||||
|
briefLines.join("\n"),
|
||||||
|
"",
|
||||||
|
"Please use start_incubating_colony if this is appropriate. Ask me for any missing details before calling create_colony, then generate the self-contained task, skill name, skill description, skill body, and any optional triggers or concurrency hint needed by the colony.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
handleSend(message, "queen-dm");
|
handleSend(message, "queen-dm");
|
||||||
setCloneDialogOpen(false);
|
setCloneDialogOpen(false);
|
||||||
setCloneColonyName("");
|
setCloneColonyName("");
|
||||||
setCloneTask("");
|
setCloneTask("");
|
||||||
}, [cloneColonyName, cloneTask, handleSend]);
|
setCloneOutputs("");
|
||||||
|
setCloneDataSources("");
|
||||||
|
setCloneSchedule("");
|
||||||
|
setCloneConcurrency("");
|
||||||
|
setShowCloneSchedule(false);
|
||||||
|
setShowCloneConcurrency(false);
|
||||||
|
}, [
|
||||||
|
cloneColonyName,
|
||||||
|
cloneConcurrency,
|
||||||
|
cloneDataSources,
|
||||||
|
cloneOutputs,
|
||||||
|
cloneSchedule,
|
||||||
|
cloneTask,
|
||||||
|
handleSend,
|
||||||
|
showCloneConcurrency,
|
||||||
|
showCloneSchedule,
|
||||||
|
]);
|
||||||
|
|
||||||
const handleQuestionAnswer = useCallback(
|
const handleQuestionAnswer = useCallback(
|
||||||
(answers: Record<string, string>) => {
|
(answers: Record<string, string>) => {
|
||||||
@@ -747,7 +792,7 @@ export default function QueenDM() {
|
|||||||
className="flex items-center gap-1 px-2.5 py-1 rounded-md text-[11px] font-medium text-primary hover:bg-primary/10 transition-colors disabled:opacity-40"
|
className="flex items-center gap-1 px-2.5 py-1 rounded-md text-[11px] font-medium text-primary hover:bg-primary/10 transition-colors disabled:opacity-40"
|
||||||
>
|
>
|
||||||
<Plus className="w-3 h-3" />
|
<Plus className="w-3 h-3" />
|
||||||
Create a Colony
|
Start Colony Setup
|
||||||
</button>
|
</button>
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
@@ -759,13 +804,13 @@ export default function QueenDM() {
|
|||||||
className="absolute inset-0 bg-black/40 backdrop-blur-sm"
|
className="absolute inset-0 bg-black/40 backdrop-blur-sm"
|
||||||
onClick={() => setCloneDialogOpen(false)}
|
onClick={() => setCloneDialogOpen(false)}
|
||||||
/>
|
/>
|
||||||
<div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-md p-6 space-y-4">
|
<div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-lg max-h-[90vh] overflow-y-auto p-6 space-y-4">
|
||||||
<h2 className="text-sm font-semibold text-foreground">
|
<h2 className="text-sm font-semibold text-foreground">
|
||||||
Create a Colony
|
Set Up a Colony
|
||||||
</h2>
|
</h2>
|
||||||
<p className="text-[11px] text-muted-foreground">
|
<p className="text-[11px] text-muted-foreground">
|
||||||
Create a new colony from this queen's session. The colony inherits
|
Share the brief. The queen will fill gaps, write the worker skill,
|
||||||
the queen's tools, context, and conversation history.
|
and create the colony when the setup is ready.
|
||||||
</p>
|
</p>
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
<div>
|
<div>
|
||||||
@@ -787,17 +832,94 @@ export default function QueenDM() {
|
|||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||||
Task{" "}
|
What should it do?
|
||||||
<span className="text-muted-foreground/40">(optional)</span>
|
|
||||||
</label>
|
</label>
|
||||||
<input
|
<textarea
|
||||||
type="text"
|
|
||||||
value={cloneTask}
|
value={cloneTask}
|
||||||
onChange={(e) => setCloneTask(e.target.value)}
|
onChange={(e) => setCloneTask(e.target.value)}
|
||||||
placeholder="Continue the work from the queen's session"
|
placeholder="Monitor launches, process a backlog, prepare a report, or continue this session's work."
|
||||||
className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
rows={3}
|
||||||
|
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||||
|
Expected output{" "}
|
||||||
|
<span className="text-muted-foreground/40">(optional)</span>
|
||||||
|
</label>
|
||||||
|
<textarea
|
||||||
|
value={cloneOutputs}
|
||||||
|
onChange={(e) => setCloneOutputs(e.target.value)}
|
||||||
|
placeholder="A digest, saved rows, alerts, files, or a final summary."
|
||||||
|
rows={2}
|
||||||
|
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||||
|
Inputs, tools, or credentials{" "}
|
||||||
|
<span className="text-muted-foreground/40">(optional)</span>
|
||||||
|
</label>
|
||||||
|
<textarea
|
||||||
|
value={cloneDataSources}
|
||||||
|
onChange={(e) => setCloneDataSources(e.target.value)}
|
||||||
|
placeholder="APIs, websites, files, accounts, OAuth tools, or credentials it will need."
|
||||||
|
rows={2}
|
||||||
|
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="space-y-2 pt-1">
|
||||||
|
{!showCloneSchedule ? (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => setShowCloneSchedule(true)}
|
||||||
|
className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
|
||||||
|
>
|
||||||
|
<Plus className="h-3.5 w-3.5" />
|
||||||
|
Schedule / triggers
|
||||||
|
</button>
|
||||||
|
) : (
|
||||||
|
<div>
|
||||||
|
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||||
|
Schedule / triggers{" "}
|
||||||
|
<span className="text-muted-foreground/40">(optional)</span>
|
||||||
|
</label>
|
||||||
|
<textarea
|
||||||
|
value={cloneSchedule}
|
||||||
|
onChange={(e) => setCloneSchedule(e.target.value)}
|
||||||
|
placeholder="Manual only, every weekday at 9 AM, every 30 minutes, or webhook path."
|
||||||
|
rows={2}
|
||||||
|
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{!showCloneConcurrency ? (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => setShowCloneConcurrency(true)}
|
||||||
|
className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
|
||||||
|
>
|
||||||
|
<Plus className="h-3.5 w-3.5" />
|
||||||
|
Concurrency
|
||||||
|
</button>
|
||||||
|
) : (
|
||||||
|
<div>
|
||||||
|
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||||
|
Concurrency{" "}
|
||||||
|
<span className="text-muted-foreground/40">(optional)</span>
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={cloneConcurrency}
|
||||||
|
onChange={(e) => setCloneConcurrency(e.target.value)}
|
||||||
|
placeholder="1 for a single worker, 5 for a parallel backlog, or any limit to respect."
|
||||||
|
className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex justify-end gap-2 pt-2">
|
<div className="flex justify-end gap-2 pt-2">
|
||||||
<button
|
<button
|
||||||
@@ -805,6 +927,12 @@ export default function QueenDM() {
|
|||||||
setCloneDialogOpen(false);
|
setCloneDialogOpen(false);
|
||||||
setCloneColonyName("");
|
setCloneColonyName("");
|
||||||
setCloneTask("");
|
setCloneTask("");
|
||||||
|
setCloneOutputs("");
|
||||||
|
setCloneDataSources("");
|
||||||
|
setCloneSchedule("");
|
||||||
|
setCloneConcurrency("");
|
||||||
|
setShowCloneSchedule(false);
|
||||||
|
setShowCloneConcurrency(false);
|
||||||
}}
|
}}
|
||||||
className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors"
|
className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors"
|
||||||
>
|
>
|
||||||
@@ -815,7 +943,7 @@ export default function QueenDM() {
|
|||||||
disabled={!cloneColonyName.trim()}
|
disabled={!cloneColonyName.trim()}
|
||||||
className="px-3 py-1.5 rounded-md text-xs font-medium bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50"
|
className="px-3 py-1.5 rounded-md text-xs font-medium bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50"
|
||||||
>
|
>
|
||||||
Create
|
Start setup
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -0,0 +1,252 @@
|
|||||||
|
"""Tests for ``coerce_tool_input``.
|
||||||
|
|
||||||
|
The coercer centralizes healing for the small handful of schema-shape
|
||||||
|
drift patterns that non-frontier models emit. These tests pin the
|
||||||
|
expected behavior for each pattern plus the passthrough / failure cases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
|
||||||
|
from framework.llm.provider import Tool
|
||||||
|
|
||||||
|
|
||||||
|
def _tool(parameters: dict) -> Tool:
|
||||||
|
return Tool(name="t", description="test", parameters=parameters)
|
||||||
|
|
||||||
|
|
||||||
|
# ---- passthrough / no-op cases ---------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_input_passes_through() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"x": {"type": "string"}}})
|
||||||
|
assert coerce_tool_input(tool, {}) == {}
|
||||||
|
assert coerce_tool_input(tool, None) == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_schema_is_noop() -> None:
|
||||||
|
tool = _tool({})
|
||||||
|
args = {"anything": 123}
|
||||||
|
assert coerce_tool_input(tool, args) is args
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_property_is_untouched() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"known": {"type": "integer"}}})
|
||||||
|
args = {"unknown": "42"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"unknown": "42"} # untouched
|
||||||
|
|
||||||
|
|
||||||
|
def test_type_already_matches_is_noop() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||||
|
args = {"n": 42}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": 42}
|
||||||
|
|
||||||
|
|
||||||
|
# ---- primitive coercion (the reference implementation's scope) -------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_to_integer() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||||
|
args = {"n": "42"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": 42}
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_to_integer_rejects_fractional() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||||
|
args = {"n": "3.14"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": "3.14"} # kept as string — schema says int
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_to_number_float() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||||
|
args = {"n": "3.14"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": 3.14}
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_to_number_whole() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||||
|
args = {"n": "42"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": 42} # whole numbers collapse to int
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_to_boolean() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"a": {"type": "boolean"},
|
||||||
|
"b": {"type": "boolean"},
|
||||||
|
"c": {"type": "boolean"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
args = {"a": "true", "b": "False", "c": "nope"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"a": True, "b": False, "c": "nope"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_union_type_first_match_wins() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"x": {"type": ["integer", "string"]}},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
args = {"x": "42"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"x": 42}
|
||||||
|
|
||||||
|
|
||||||
|
def test_nan_and_inf_rejected() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||||
|
args = {"n": "inf"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"n": "inf"} # inf not a valid tool arg — keep original
|
||||||
|
|
||||||
|
|
||||||
|
# ---- the ask_user bug: [{"label": "..."}] -> ["..."] ------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_array_of_label_objects_unwraps_to_strings() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"options": {"type": "array", "items": {"type": "string"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
args = {"options": [{"label": "A"}, {"label": "B"}, {"label": "C"}]}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"options": ["A", "B", "C"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_array_of_value_objects_unwraps() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": [{"value": "A"}, {"text": "B"}, {"name": "C"}]}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": ["A", "B", "C"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_key_object_falls_back_to_sole_value() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": [{"weirdkey": "A"}]}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": ["A"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_unrecognized_object_is_preserved() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": [{"a": "x", "b": "y"}]} # ambiguous — no known key, multi-value
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": [{"a": "x", "b": "y"}]} # untouched
|
||||||
|
|
||||||
|
|
||||||
|
# ---- JSON-encoded-string-as-array ------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_string_array_is_parsed() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": '["A","B","C"]'}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": ["A", "B", "C"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_scalar_wraps_into_singleton_array() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": "solo"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": ["solo"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_json_string_wraps_as_singleton() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||||
|
)
|
||||||
|
args = {"xs": "not json [[]"}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"xs": ["not json [[]"]}
|
||||||
|
|
||||||
|
|
||||||
|
# ---- nested: the actual ask_user schema shape -------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_nested_questions_array_with_wrapped_options() -> None:
|
||||||
|
"""Exercises the real bug — questions[i].options arriving as [{label}]."""
|
||||||
|
tool = _tool(
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"questions": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {"type": "string"},
|
||||||
|
"prompt": {"type": "string"},
|
||||||
|
"options": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
args = {
|
||||||
|
"questions": [
|
||||||
|
{
|
||||||
|
"id": "q1",
|
||||||
|
"prompt": "Pick one",
|
||||||
|
"options": [{"label": "Email (Recommended)"}, {"label": "Slack"}],
|
||||||
|
},
|
||||||
|
{"id": "q2", "prompt": "Free form"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args["questions"][0]["options"] == ["Email (Recommended)", "Slack"]
|
||||||
|
assert args["questions"][1] == {"id": "q2", "prompt": "Free form"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_string_for_object_is_parsed() -> None:
|
||||||
|
tool = _tool(
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cfg": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"n": {"type": "integer"}},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
args = {"cfg": '{"n": "42"}'}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"cfg": {"n": 42}}
|
||||||
|
|
||||||
|
|
||||||
|
# ---- string property receiving a {label} object -----------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_string_property_unwraps_label_object() -> None:
|
||||||
|
tool = _tool({"type": "object", "properties": {"choice": {"type": "string"}}})
|
||||||
|
args = {"choice": {"label": "Email"}}
|
||||||
|
coerce_tool_input(tool, args)
|
||||||
|
assert args == {"choice": "Email"}
|
||||||
Reference in New Issue
Block a user