feat(tool call): add format _coerce before execution
This commit is contained in:
@@ -71,6 +71,7 @@ from framework.agent_loop.internals.synthetic_tools import (
|
||||
build_report_to_parent_tool,
|
||||
handle_report_to_parent,
|
||||
)
|
||||
from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
|
||||
from framework.agent_loop.internals.tool_result_handler import (
|
||||
build_json_preview,
|
||||
execute_tool,
|
||||
@@ -2835,7 +2836,17 @@ class AgentLoop(AgentProtocol):
|
||||
# nudge on its next turn without losing the real execution output.
|
||||
replay_prefixes_by_id: dict[str, str] = {}
|
||||
|
||||
# Schema-driven coercion of tool arguments. Heals the small
|
||||
# handful of drift patterns that non-frontier models emit
|
||||
# (numbers-as-strings, array-of-{label} wrappers, arrays
|
||||
# sent as JSON strings, singleton scalars). Runs once per
|
||||
# tool call before dispatch; see tool_input_coercer module.
|
||||
_tool_by_name = {t.name: t for t in tools}
|
||||
|
||||
for tc in tool_calls:
|
||||
_tool_schema = _tool_by_name.get(tc.tool_name)
|
||||
if _tool_schema is not None:
|
||||
coerce_tool_input(_tool_schema, tc.tool_input)
|
||||
tool_call_count += 1
|
||||
if hard_limit > 0 and tool_call_count > hard_limit:
|
||||
limit_hit = True
|
||||
@@ -2893,11 +2904,15 @@ class AgentLoop(AgentProtocol):
|
||||
user_input_requested = False
|
||||
continue
|
||||
|
||||
# Normalize + self-heal each question entry. Some
|
||||
# model families cram options inside the prompt as a
|
||||
# pseudo-XML blob like "What now?</question>\n
|
||||
# _OPTIONS: [\"A\", \"B\"]". sanitize_ask_user_inputs
|
||||
# strips the tag and recovers the inline options.
|
||||
# Normalize + self-heal each question entry. The
|
||||
# generic tool_input_coercer has already handled
|
||||
# schema-shape drift (array-of-string options, JSON
|
||||
# strings, etc.), so here we only deal with
|
||||
# prompt-style drift: some model families cram
|
||||
# options inside the prompt as a pseudo-XML blob
|
||||
# like "What now?</question>\n_OPTIONS: [\"A\", \"B\"]".
|
||||
# sanitize_ask_user_inputs strips the tag and
|
||||
# recovers the inline options as a fallback.
|
||||
questions: list[dict] = []
|
||||
for i, q in enumerate(raw_questions):
|
||||
if not isinstance(q, dict):
|
||||
@@ -2906,21 +2921,12 @@ class AgentLoop(AgentProtocol):
|
||||
raw_prompt = q.get("prompt", q.get("question", ""))
|
||||
raw_opts = q.get("options", None)
|
||||
cleaned_prompt, recovered_opts = sanitize_ask_user_inputs(raw_prompt, raw_opts)
|
||||
if recovered_opts is not None and raw_opts is None:
|
||||
raw_opts = recovered_opts
|
||||
|
||||
opts: list[str] | None = None
|
||||
if isinstance(raw_opts, list):
|
||||
if isinstance(raw_opts, list) and raw_opts:
|
||||
opts = [str(o) for o in raw_opts if o]
|
||||
elif isinstance(raw_opts, str) and raw_opts.strip():
|
||||
# Defensive: smaller models sometimes send a
|
||||
# JSON-encoded string instead of an array.
|
||||
try:
|
||||
parsed = json.loads(raw_opts)
|
||||
if isinstance(parsed, list):
|
||||
opts = [str(o) for o in parsed if o]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
elif recovered_opts is not None:
|
||||
opts = recovered_opts
|
||||
if opts is not None and len(opts) < 2:
|
||||
opts = None # fall back to free-text
|
||||
|
||||
|
||||
@@ -101,9 +101,9 @@ Use this tool when you need to ask the user questions during execution. Reach fo
|
||||
- You want post-task feedback, or to offer saving a skill or updating memory
|
||||
|
||||
Usage notes:
|
||||
- Users will always be able to select "Other" to provide custom text input
|
||||
- Use multiSelect: true to allow multiple answers to be selected for a question
|
||||
- If you recommend a specific option, make that the first option in the list and add "(Recommended)" at the end of the label
|
||||
- Users will always be able to select "Other" to provide custom text input, so do not include catch-all options like "Other" or "Something else" yourself.
|
||||
- Each option is a plain string. Do NOT wrap options in `{"label": "..."}` or `{"value": "..."}` objects — pass the raw choice text directly, e.g. `"Email"`, not `{"label": "Email"}`.
|
||||
- If you recommend a specific option, make that the first option in the list and append " (Recommended)" to the end of its text.
|
||||
- Call this tool whenever you need the user's response.
|
||||
- The prompt field must be plain text only.
|
||||
- Do not include XML, pseudo-tags, or inline option lists inside prompt.
|
||||
@@ -159,8 +159,13 @@ def build_ask_user_tool() -> Tool:
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"2-3 predefined choices. The UI appends an "
|
||||
"'Other' free-text input automatically. "
|
||||
"2-3 predefined choices as plain strings "
|
||||
'(e.g. ["Yes", "No", "Maybe"]). Do NOT '
|
||||
'wrap items in {"label": "..."} or '
|
||||
'{"value": "..."} objects — pass the raw '
|
||||
"choice text directly. The UI appends an "
|
||||
"'Other' free-text input automatically, "
|
||||
"so don't include catch-all options. "
|
||||
"Omit only when the user must type a free-form answer."
|
||||
),
|
||||
"minItems": 2,
|
||||
|
||||
@@ -0,0 +1,291 @@
|
||||
"""Generic coercion of LLM-emitted tool arguments to match each tool's JSON schema.
|
||||
|
||||
Small/mid-size models drift from tool schemas in predictable, boring ways:
|
||||
|
||||
- A number field comes back as a string (``"42"`` instead of ``42``).
|
||||
- A boolean field comes back as a string (``"true"`` instead of ``True``).
|
||||
- An array-of-string field comes back as an array of objects
|
||||
(``[{"label": "A"}, ...]`` instead of ``["A", ...]``).
|
||||
- An array/object field comes back as a JSON-encoded string
|
||||
(``'["A","B"]'`` instead of ``["A", "B"]``).
|
||||
- A lone scalar arrives where the schema expects an array.
|
||||
|
||||
This module centralizes the healing in one schema-driven pass that runs
|
||||
on every tool call before dispatch. Coercion is conservative:
|
||||
|
||||
- Values that already match the expected type are untouched.
|
||||
- Shapes we don't recognize are returned as-is, so real bugs surface
|
||||
instead of getting silently munged into something plausible.
|
||||
- Every actual coercion is logged with the tool, property, and shape
|
||||
transition so we can see which models/tools are drifting.
|
||||
|
||||
Tool-specific prompt drift (e.g. ``</question>`` tags leaking into an
|
||||
``ask_user`` prompt string) is NOT this module's job — that belongs in
|
||||
per-tool sanitizers, because it's about prompt style, not schema shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from framework.llm.provider import Tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# When an ``array<string>`` field arrives as an array of objects, look
|
||||
# for a text-carrying field in preference order. Covers the wrappers
|
||||
# small models tend to produce: ``[{"label": "A"}]``, ``[{"value": "A"}]``,
|
||||
# ``[{"text": "A"}]``, etc.
|
||||
_STRING_EXTRACT_KEYS: tuple[str, ...] = (
|
||||
"label",
|
||||
"value",
|
||||
"text",
|
||||
"name",
|
||||
"title",
|
||||
"display",
|
||||
)
|
||||
|
||||
|
||||
def coerce_tool_input(tool: Tool, raw_input: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Coerce *raw_input* in place to match *tool*'s JSON schema.
|
||||
|
||||
Returns the mutated input dict (same object as *raw_input* when
|
||||
possible, for callers that assume in-place mutation). Properties
|
||||
not present in the schema are left untouched.
|
||||
"""
|
||||
if not isinstance(raw_input, dict):
|
||||
return raw_input or {}
|
||||
|
||||
schema = tool.parameters or {}
|
||||
props = schema.get("properties")
|
||||
if not isinstance(props, dict):
|
||||
return raw_input
|
||||
|
||||
for key in list(raw_input.keys()):
|
||||
prop_schema = props.get(key)
|
||||
if not isinstance(prop_schema, dict):
|
||||
continue
|
||||
original = raw_input[key]
|
||||
coerced = _coerce(original, prop_schema)
|
||||
if coerced is not original:
|
||||
logger.info(
|
||||
"coerced tool input tool=%s prop=%s from=%s to=%s",
|
||||
tool.name,
|
||||
key,
|
||||
_shape(original),
|
||||
_shape(coerced),
|
||||
)
|
||||
raw_input[key] = coerced
|
||||
|
||||
return raw_input
|
||||
|
||||
|
||||
def _coerce(value: Any, schema: dict[str, Any]) -> Any:
|
||||
"""Dispatch on the schema's ``type`` field.
|
||||
|
||||
Returns the *same object* on passthrough so callers can detect
|
||||
no-ops via identity (``coerced is value``).
|
||||
"""
|
||||
expected = schema.get("type")
|
||||
if not expected:
|
||||
return value
|
||||
|
||||
# Union type: try each in order, return the first coercion that
|
||||
# actually changes the value. Falls back to the original.
|
||||
if isinstance(expected, list):
|
||||
for t in expected:
|
||||
sub_schema = {**schema, "type": t}
|
||||
coerced = _coerce(value, sub_schema)
|
||||
if coerced is not value:
|
||||
return coerced
|
||||
return value
|
||||
|
||||
if expected == "integer":
|
||||
return _coerce_integer(value)
|
||||
if expected == "number":
|
||||
return _coerce_number(value)
|
||||
if expected == "boolean":
|
||||
return _coerce_boolean(value)
|
||||
if expected == "string":
|
||||
return _coerce_string(value)
|
||||
if expected == "array":
|
||||
return _coerce_array(value, schema)
|
||||
if expected == "object":
|
||||
return _coerce_object(value, schema)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_integer(value: Any) -> Any:
|
||||
# bool is a subclass of int in Python; don't mistake True for 1 here.
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
parsed = _parse_number(value)
|
||||
if parsed is None:
|
||||
return value
|
||||
if parsed != int(parsed):
|
||||
# Has a fractional part — caller asked for int, don't truncate.
|
||||
return value
|
||||
return int(parsed)
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_number(value: Any) -> Any:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
parsed = _parse_number(value)
|
||||
if parsed is None:
|
||||
return value
|
||||
if parsed == int(parsed):
|
||||
return int(parsed)
|
||||
return parsed
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_boolean(value: Any) -> Any:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
low = value.strip().lower()
|
||||
if low == "true":
|
||||
return True
|
||||
if low == "false":
|
||||
return False
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_string(value: Any) -> Any:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
# Common drift: model sent ``{"label": "..."}`` when we wanted "...".
|
||||
if isinstance(value, dict):
|
||||
extracted = _extract_string_from_object(value)
|
||||
if extracted is not None:
|
||||
return extracted
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_array(value: Any, schema: dict[str, Any]) -> Any:
|
||||
# Heal: JSON-encoded array string → array.
|
||||
if isinstance(value, str):
|
||||
parsed = _try_parse_json(value)
|
||||
if isinstance(parsed, list):
|
||||
value = parsed
|
||||
else:
|
||||
# Scalar string where an array is expected — wrap it.
|
||||
return [value]
|
||||
elif not isinstance(value, list):
|
||||
# Any other scalar (int, bool, dict, ...) — wrap.
|
||||
return [value]
|
||||
|
||||
items_schema = schema.get("items")
|
||||
if not isinstance(items_schema, dict):
|
||||
return value
|
||||
|
||||
coerced_items: list[Any] = []
|
||||
changed = False
|
||||
for item in value:
|
||||
c = _coerce(item, items_schema)
|
||||
if c is not item:
|
||||
changed = True
|
||||
coerced_items.append(c)
|
||||
return coerced_items if changed else value
|
||||
|
||||
|
||||
def _coerce_object(value: Any, schema: dict[str, Any]) -> Any:
|
||||
# Heal: JSON-encoded object string → object.
|
||||
if isinstance(value, str):
|
||||
parsed = _try_parse_json(value)
|
||||
if isinstance(parsed, dict):
|
||||
value = parsed
|
||||
else:
|
||||
return value
|
||||
if not isinstance(value, dict):
|
||||
return value
|
||||
|
||||
sub_props = schema.get("properties")
|
||||
if not isinstance(sub_props, dict):
|
||||
return value
|
||||
|
||||
changed = False
|
||||
for k in list(value.keys()):
|
||||
sub_schema = sub_props.get(k)
|
||||
if not isinstance(sub_schema, dict):
|
||||
continue
|
||||
original = value[k]
|
||||
coerced = _coerce(original, sub_schema)
|
||||
if coerced is not original:
|
||||
value[k] = coerced
|
||||
changed = True
|
||||
# Return the same dict on mutation so callers that passed a shared
|
||||
# reference see the updates. ``changed`` is only used to decide
|
||||
# whether we need to log at a coarser level upstream.
|
||||
return value if changed or not sub_props else value
|
||||
|
||||
|
||||
def _extract_string_from_object(obj: dict[str, Any]) -> str | None:
|
||||
"""Pick a likely-text field out of a wrapper object.
|
||||
|
||||
Tries the known keys first, falls back to the sole value if the
|
||||
object has exactly one entry. Returns None when nothing plausible
|
||||
is found — the caller keeps the original.
|
||||
"""
|
||||
for k in _STRING_EXTRACT_KEYS:
|
||||
v = obj.get(k)
|
||||
if isinstance(v, str) and v:
|
||||
return v
|
||||
if len(obj) == 1:
|
||||
(only,) = obj.values()
|
||||
if isinstance(only, str) and only:
|
||||
return only
|
||||
return None
|
||||
|
||||
|
||||
def _try_parse_json(raw: str) -> Any:
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_number(raw: str) -> float | None:
|
||||
try:
|
||||
f = float(raw)
|
||||
except (ValueError, OverflowError):
|
||||
return None
|
||||
# Reject NaN and inf — they pass float() but aren't useful numeric
|
||||
# values for tool arguments.
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return None
|
||||
return f
|
||||
|
||||
|
||||
def _shape(value: Any) -> str:
|
||||
"""Short type/shape description used in coercion log lines."""
|
||||
if value is None:
|
||||
return "None"
|
||||
if isinstance(value, bool):
|
||||
return "bool"
|
||||
if isinstance(value, int):
|
||||
return "int"
|
||||
if isinstance(value, float):
|
||||
return "float"
|
||||
if isinstance(value, str):
|
||||
return f"str[{len(value)}]"
|
||||
if isinstance(value, list):
|
||||
if not value:
|
||||
return "list[0]"
|
||||
return f"list[{len(value)}]<{_shape(value[0])}>"
|
||||
if isinstance(value, dict):
|
||||
keys = sorted(value.keys())[:3]
|
||||
suffix = ",…" if len(value) > 3 else ""
|
||||
return f"dict{{{','.join(keys)}{suffix}}}"
|
||||
return type(value).__name__
|
||||
@@ -55,6 +55,12 @@ export default function QueenDM() {
|
||||
const [cloneDialogOpen, setCloneDialogOpen] = useState(false);
|
||||
const [cloneColonyName, setCloneColonyName] = useState("");
|
||||
const [cloneTask, setCloneTask] = useState("");
|
||||
const [cloneOutputs, setCloneOutputs] = useState("");
|
||||
const [cloneDataSources, setCloneDataSources] = useState("");
|
||||
const [cloneSchedule, setCloneSchedule] = useState("");
|
||||
const [cloneConcurrency, setCloneConcurrency] = useState("");
|
||||
const [showCloneSchedule, setShowCloneSchedule] = useState(false);
|
||||
const [showCloneConcurrency, setShowCloneConcurrency] = useState(false);
|
||||
// Colony-spawned lock state. Once a colony has been spawned from this DM
|
||||
// and the user clicked into it, /chat is rejected server-side and the
|
||||
// composer is replaced with a "compact + new session" button. Hydrated
|
||||
@@ -652,14 +658,53 @@ export default function QueenDM() {
|
||||
const colony = cloneColonyName.trim();
|
||||
if (!colony) return;
|
||||
const task = cloneTask.trim();
|
||||
const message = task
|
||||
? `Create a colony named \`${colony}\` for the following task:\n\n${task}`
|
||||
: `Create a colony named \`${colony}\` from this session.`;
|
||||
|
||||
const briefLines = [
|
||||
`Colony name: ${colony}`,
|
||||
`Purpose: ${task || "Use the current conversation to propose the colony's purpose."}`,
|
||||
];
|
||||
if (cloneOutputs.trim()) {
|
||||
briefLines.push(`Expected outputs: ${cloneOutputs.trim()}`);
|
||||
}
|
||||
if (cloneDataSources.trim()) {
|
||||
briefLines.push(`Inputs, data sources, tools, or credentials: ${cloneDataSources.trim()}`);
|
||||
}
|
||||
if (showCloneSchedule && cloneSchedule.trim()) {
|
||||
briefLines.push(`Schedule/triggers: ${cloneSchedule.trim()}`);
|
||||
}
|
||||
if (showCloneConcurrency && cloneConcurrency.trim()) {
|
||||
briefLines.push(`Concurrency: ${cloneConcurrency.trim()}`);
|
||||
}
|
||||
|
||||
const message = [
|
||||
"I want to set up a persistent colony.",
|
||||
"",
|
||||
briefLines.join("\n"),
|
||||
"",
|
||||
"Please use start_incubating_colony if this is appropriate. Ask me for any missing details before calling create_colony, then generate the self-contained task, skill name, skill description, skill body, and any optional triggers or concurrency hint needed by the colony.",
|
||||
].join("\n");
|
||||
|
||||
handleSend(message, "queen-dm");
|
||||
setCloneDialogOpen(false);
|
||||
setCloneColonyName("");
|
||||
setCloneTask("");
|
||||
}, [cloneColonyName, cloneTask, handleSend]);
|
||||
setCloneOutputs("");
|
||||
setCloneDataSources("");
|
||||
setCloneSchedule("");
|
||||
setCloneConcurrency("");
|
||||
setShowCloneSchedule(false);
|
||||
setShowCloneConcurrency(false);
|
||||
}, [
|
||||
cloneColonyName,
|
||||
cloneConcurrency,
|
||||
cloneDataSources,
|
||||
cloneOutputs,
|
||||
cloneSchedule,
|
||||
cloneTask,
|
||||
handleSend,
|
||||
showCloneConcurrency,
|
||||
showCloneSchedule,
|
||||
]);
|
||||
|
||||
const handleQuestionAnswer = useCallback(
|
||||
(answers: Record<string, string>) => {
|
||||
@@ -747,7 +792,7 @@ export default function QueenDM() {
|
||||
className="flex items-center gap-1 px-2.5 py-1 rounded-md text-[11px] font-medium text-primary hover:bg-primary/10 transition-colors disabled:opacity-40"
|
||||
>
|
||||
<Plus className="w-3 h-3" />
|
||||
Create a Colony
|
||||
Start Colony Setup
|
||||
</button>
|
||||
}
|
||||
/>
|
||||
@@ -759,13 +804,13 @@ export default function QueenDM() {
|
||||
className="absolute inset-0 bg-black/40 backdrop-blur-sm"
|
||||
onClick={() => setCloneDialogOpen(false)}
|
||||
/>
|
||||
<div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-md p-6 space-y-4">
|
||||
<div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-lg max-h-[90vh] overflow-y-auto p-6 space-y-4">
|
||||
<h2 className="text-sm font-semibold text-foreground">
|
||||
Create a Colony
|
||||
Set Up a Colony
|
||||
</h2>
|
||||
<p className="text-[11px] text-muted-foreground">
|
||||
Create a new colony from this queen's session. The colony inherits
|
||||
the queen's tools, context, and conversation history.
|
||||
Share the brief. The queen will fill gaps, write the worker skill,
|
||||
and create the colony when the setup is ready.
|
||||
</p>
|
||||
<div className="space-y-3">
|
||||
<div>
|
||||
@@ -787,17 +832,94 @@ export default function QueenDM() {
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||
Task{" "}
|
||||
<span className="text-muted-foreground/40">(optional)</span>
|
||||
What should it do?
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
<textarea
|
||||
value={cloneTask}
|
||||
onChange={(e) => setCloneTask(e.target.value)}
|
||||
placeholder="Continue the work from the queen's session"
|
||||
className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
placeholder="Monitor launches, process a backlog, prepare a report, or continue this session's work."
|
||||
rows={3}
|
||||
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||
Expected output{" "}
|
||||
<span className="text-muted-foreground/40">(optional)</span>
|
||||
</label>
|
||||
<textarea
|
||||
value={cloneOutputs}
|
||||
onChange={(e) => setCloneOutputs(e.target.value)}
|
||||
placeholder="A digest, saved rows, alerts, files, or a final summary."
|
||||
rows={2}
|
||||
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||
Inputs, tools, or credentials{" "}
|
||||
<span className="text-muted-foreground/40">(optional)</span>
|
||||
</label>
|
||||
<textarea
|
||||
value={cloneDataSources}
|
||||
onChange={(e) => setCloneDataSources(e.target.value)}
|
||||
placeholder="APIs, websites, files, accounts, OAuth tools, or credentials it will need."
|
||||
rows={2}
|
||||
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2 pt-1">
|
||||
{!showCloneSchedule ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowCloneSchedule(true)}
|
||||
className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
|
||||
>
|
||||
<Plus className="h-3.5 w-3.5" />
|
||||
Schedule / triggers
|
||||
</button>
|
||||
) : (
|
||||
<div>
|
||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||
Schedule / triggers{" "}
|
||||
<span className="text-muted-foreground/40">(optional)</span>
|
||||
</label>
|
||||
<textarea
|
||||
value={cloneSchedule}
|
||||
onChange={(e) => setCloneSchedule(e.target.value)}
|
||||
placeholder="Manual only, every weekday at 9 AM, every 30 minutes, or webhook path."
|
||||
rows={2}
|
||||
className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!showCloneConcurrency ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowCloneConcurrency(true)}
|
||||
className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
|
||||
>
|
||||
<Plus className="h-3.5 w-3.5" />
|
||||
Concurrency
|
||||
</button>
|
||||
) : (
|
||||
<div>
|
||||
<label className="block text-[11px] font-medium text-muted-foreground mb-1">
|
||||
Concurrency{" "}
|
||||
<span className="text-muted-foreground/40">(optional)</span>
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={cloneConcurrency}
|
||||
onChange={(e) => setCloneConcurrency(e.target.value)}
|
||||
placeholder="1 for a single worker, 5 for a parallel backlog, or any limit to respect."
|
||||
className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex justify-end gap-2 pt-2">
|
||||
<button
|
||||
@@ -805,6 +927,12 @@ export default function QueenDM() {
|
||||
setCloneDialogOpen(false);
|
||||
setCloneColonyName("");
|
||||
setCloneTask("");
|
||||
setCloneOutputs("");
|
||||
setCloneDataSources("");
|
||||
setCloneSchedule("");
|
||||
setCloneConcurrency("");
|
||||
setShowCloneSchedule(false);
|
||||
setShowCloneConcurrency(false);
|
||||
}}
|
||||
className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors"
|
||||
>
|
||||
@@ -815,7 +943,7 @@ export default function QueenDM() {
|
||||
disabled={!cloneColonyName.trim()}
|
||||
className="px-3 py-1.5 rounded-md text-xs font-medium bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Create
|
||||
Start setup
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,252 @@
|
||||
"""Tests for ``coerce_tool_input``.
|
||||
|
||||
The coercer centralizes healing for the small handful of schema-shape
|
||||
drift patterns that non-frontier models emit. These tests pin the
|
||||
expected behavior for each pattern plus the passthrough / failure cases.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
|
||||
from framework.llm.provider import Tool
|
||||
|
||||
|
||||
def _tool(parameters: dict) -> Tool:
|
||||
return Tool(name="t", description="test", parameters=parameters)
|
||||
|
||||
|
||||
# ---- passthrough / no-op cases ---------------------------------------------
|
||||
|
||||
|
||||
def test_empty_input_passes_through() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"x": {"type": "string"}}})
|
||||
assert coerce_tool_input(tool, {}) == {}
|
||||
assert coerce_tool_input(tool, None) == {}
|
||||
|
||||
|
||||
def test_missing_schema_is_noop() -> None:
|
||||
tool = _tool({})
|
||||
args = {"anything": 123}
|
||||
assert coerce_tool_input(tool, args) is args
|
||||
|
||||
|
||||
def test_unknown_property_is_untouched() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"known": {"type": "integer"}}})
|
||||
args = {"unknown": "42"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"unknown": "42"} # untouched
|
||||
|
||||
|
||||
def test_type_already_matches_is_noop() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||
args = {"n": 42}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": 42}
|
||||
|
||||
|
||||
# ---- primitive coercion (the reference implementation's scope) -------------
|
||||
|
||||
|
||||
def test_string_to_integer() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||
args = {"n": "42"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": 42}
|
||||
|
||||
|
||||
def test_string_to_integer_rejects_fractional() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
|
||||
args = {"n": "3.14"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": "3.14"} # kept as string — schema says int
|
||||
|
||||
|
||||
def test_string_to_number_float() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||
args = {"n": "3.14"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": 3.14}
|
||||
|
||||
|
||||
def test_string_to_number_whole() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||
args = {"n": "42"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": 42} # whole numbers collapse to int
|
||||
|
||||
|
||||
def test_string_to_boolean() -> None:
|
||||
tool = _tool(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {"type": "boolean"},
|
||||
"b": {"type": "boolean"},
|
||||
"c": {"type": "boolean"},
|
||||
},
|
||||
}
|
||||
)
|
||||
args = {"a": "true", "b": "False", "c": "nope"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"a": True, "b": False, "c": "nope"}
|
||||
|
||||
|
||||
def test_union_type_first_match_wins() -> None:
|
||||
tool = _tool(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {"x": {"type": ["integer", "string"]}},
|
||||
}
|
||||
)
|
||||
args = {"x": "42"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"x": 42}
|
||||
|
||||
|
||||
def test_nan_and_inf_rejected() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
|
||||
args = {"n": "inf"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"n": "inf"} # inf not a valid tool arg — keep original
|
||||
|
||||
|
||||
# ---- the ask_user bug: [{"label": "..."}] -> ["..."] ------------------------
|
||||
|
||||
|
||||
def test_array_of_label_objects_unwraps_to_strings() -> None:
|
||||
tool = _tool(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"options": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
}
|
||||
)
|
||||
args = {"options": [{"label": "A"}, {"label": "B"}, {"label": "C"}]}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"options": ["A", "B", "C"]}
|
||||
|
||||
|
||||
def test_array_of_value_objects_unwraps() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": [{"value": "A"}, {"text": "B"}, {"name": "C"}]}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": ["A", "B", "C"]}
|
||||
|
||||
|
||||
def test_single_key_object_falls_back_to_sole_value() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": [{"weirdkey": "A"}]}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": ["A"]}
|
||||
|
||||
|
||||
def test_unrecognized_object_is_preserved() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": [{"a": "x", "b": "y"}]} # ambiguous — no known key, multi-value
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": [{"a": "x", "b": "y"}]} # untouched
|
||||
|
||||
|
||||
# ---- JSON-encoded-string-as-array ------------------------------------------
|
||||
|
||||
|
||||
def test_json_string_array_is_parsed() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": '["A","B","C"]'}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": ["A", "B", "C"]}
|
||||
|
||||
|
||||
def test_scalar_wraps_into_singleton_array() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": "solo"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": ["solo"]}
|
||||
|
||||
|
||||
def test_invalid_json_string_wraps_as_singleton() -> None:
|
||||
tool = _tool(
|
||||
{"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
|
||||
)
|
||||
args = {"xs": "not json [[]"}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"xs": ["not json [[]"]}
|
||||
|
||||
|
||||
# ---- nested: the actual ask_user schema shape -------------------------------
|
||||
|
||||
|
||||
def test_nested_questions_array_with_wrapped_options() -> None:
|
||||
"""Exercises the real bug — questions[i].options arriving as [{label}]."""
|
||||
tool = _tool(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"questions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"prompt": {"type": "string"},
|
||||
"options": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
args = {
|
||||
"questions": [
|
||||
{
|
||||
"id": "q1",
|
||||
"prompt": "Pick one",
|
||||
"options": [{"label": "Email (Recommended)"}, {"label": "Slack"}],
|
||||
},
|
||||
{"id": "q2", "prompt": "Free form"},
|
||||
]
|
||||
}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args["questions"][0]["options"] == ["Email (Recommended)", "Slack"]
|
||||
assert args["questions"][1] == {"id": "q2", "prompt": "Free form"}
|
||||
|
||||
|
||||
def test_json_string_for_object_is_parsed() -> None:
|
||||
tool = _tool(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cfg": {
|
||||
"type": "object",
|
||||
"properties": {"n": {"type": "integer"}},
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
args = {"cfg": '{"n": "42"}'}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"cfg": {"n": 42}}
|
||||
|
||||
|
||||
# ---- string property receiving a {label} object -----------------------------
|
||||
|
||||
|
||||
def test_single_string_property_unwraps_label_object() -> None:
|
||||
tool = _tool({"type": "object", "properties": {"choice": {"type": "string"}}})
|
||||
args = {"choice": {"label": "Email"}}
|
||||
coerce_tool_input(tool, args)
|
||||
assert args == {"choice": "Email"}
|
||||
Reference in New Issue
Block a user