feat(tool call): add format _coerce before execution

2026-04-20 18:58:12 -07:00
parent 4e9d9bf1ea
commit 81731587ff
5 changed files with 720 additions and 38 deletions
@@ -71,6 +71,7 @@ from framework.agent_loop.internals.synthetic_tools import (
    build_report_to_parent_tool,
    handle_report_to_parent,
 )
+from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
 from framework.agent_loop.internals.tool_result_handler import (
    build_json_preview,
    execute_tool,
@@ -2835,7 +2836,17 @@ class AgentLoop(AgentProtocol):
            # nudge on its next turn without losing the real execution output.
            replay_prefixes_by_id: dict[str, str] = {}

+            # Schema-driven coercion of tool arguments. Heals the small
+            # handful of drift patterns that non-frontier models emit
+            # (numbers-as-strings, array-of-{label} wrappers, arrays
+            # sent as JSON strings, singleton scalars). Runs once per
+            # tool call before dispatch; see tool_input_coercer module.
+            _tool_by_name = {t.name: t for t in tools}
+
            for tc in tool_calls:
+                _tool_schema = _tool_by_name.get(tc.tool_name)
+                if _tool_schema is not None:
+                    coerce_tool_input(_tool_schema, tc.tool_input)
                tool_call_count += 1
                if hard_limit > 0 and tool_call_count > hard_limit:
                    limit_hit = True
@@ -2893,11 +2904,15 @@ class AgentLoop(AgentProtocol):
                        user_input_requested = False
                        continue

-                    # Normalize + self-heal each question entry. Some
-                    # model families cram options inside the prompt as a
-                    # pseudo-XML blob like "What now?</question>\n
-                    # _OPTIONS: [\"A\", \"B\"]". sanitize_ask_user_inputs
-                    # strips the tag and recovers the inline options.
+                    # Normalize + self-heal each question entry. The
+                    # generic tool_input_coercer has already handled
+                    # schema-shape drift (array-of-string options, JSON
+                    # strings, etc.), so here we only deal with
+                    # prompt-style drift: some model families cram
+                    # options inside the prompt as a pseudo-XML blob
+                    # like "What now?</question>\n_OPTIONS: [\"A\", \"B\"]".
+                    # sanitize_ask_user_inputs strips the tag and
+                    # recovers the inline options as a fallback.
                    questions: list[dict] = []
                    for i, q in enumerate(raw_questions):
                        if not isinstance(q, dict):
@@ -2906,21 +2921,12 @@ class AgentLoop(AgentProtocol):
                        raw_prompt = q.get("prompt", q.get("question", ""))
                        raw_opts = q.get("options", None)
                        cleaned_prompt, recovered_opts = sanitize_ask_user_inputs(raw_prompt, raw_opts)
-                        if recovered_opts is not None and raw_opts is None:
-                            raw_opts = recovered_opts

                        opts: list[str] | None = None
-                        if isinstance(raw_opts, list):
+                        if isinstance(raw_opts, list) and raw_opts:
                            opts = [str(o) for o in raw_opts if o]
-                        elif isinstance(raw_opts, str) and raw_opts.strip():
-                            # Defensive: smaller models sometimes send a
-                            # JSON-encoded string instead of an array.
-                            try:
-                                parsed = json.loads(raw_opts)
-                                if isinstance(parsed, list):
-                                    opts = [str(o) for o in parsed if o]
-                            except (json.JSONDecodeError, TypeError):
-                                pass
+                        elif recovered_opts is not None:
+                            opts = recovered_opts
                        if opts is not None and len(opts) < 2:
                            opts = None  # fall back to free-text

@@ -101,9 +101,9 @@ Use this tool when you need to ask the user questions during execution. Reach fo
 - You want post-task feedback, or to offer saving a skill or updating memory

 Usage notes:
- Users will always be able to select "Other" to provide custom text input
- Use multiSelect: true to allow multiple answers to be selected for a question
- If you recommend a specific option, make that the first option in the list and add "(Recommended)" at the end of the label
+- Users will always be able to select "Other" to provide custom text input, so do not include catch-all options like "Other" or "Something else" yourself.
+- Each option is a plain string. Do NOT wrap options in `{"label": "..."}` or `{"value": "..."}` objects — pass the raw choice text directly, e.g. `"Email"`, not `{"label": "Email"}`.
+- If you recommend a specific option, make that the first option in the list and append " (Recommended)" to the end of its text.
 - Call this tool whenever you need the user's response.
 - The prompt field must be plain text only.
 - Do not include XML, pseudo-tags, or inline option lists inside prompt.
@@ -159,8 +159,13 @@ def build_ask_user_tool() -> Tool:
                                "type": "array",
                                "items": {"type": "string"},
                                "description": (
-                                    "2-3 predefined choices. The UI appends an "
-                                    "'Other' free-text input automatically. "
+                                    "2-3 predefined choices as plain strings "
+                                    '(e.g. ["Yes", "No", "Maybe"]). Do NOT '
+                                    'wrap items in {"label": "..."} or '
+                                    '{"value": "..."} objects — pass the raw '
+                                    "choice text directly. The UI appends an "
+                                    "'Other' free-text input automatically, "
+                                    "so don't include catch-all options. "
                                    "Omit only when the user must type a free-form answer."
                                ),
                                "minItems": 2,
@@ -0,0 +1,291 @@
+"""Generic coercion of LLM-emitted tool arguments to match each tool's JSON schema.
+
+Small/mid-size models drift from tool schemas in predictable, boring ways:
+
+- A number field comes back as a string (``"42"`` instead of ``42``).
+- A boolean field comes back as a string (``"true"`` instead of ``True``).
+- An array-of-string field comes back as an array of objects
+  (``[{"label": "A"}, ...]`` instead of ``["A", ...]``).
+- An array/object field comes back as a JSON-encoded string
+  (``'["A","B"]'`` instead of ``["A", "B"]``).
+- A lone scalar arrives where the schema expects an array.
+
+This module centralizes the healing in one schema-driven pass that runs
+on every tool call before dispatch. Coercion is conservative:
+
+- Values that already match the expected type are untouched.
+- Shapes we don't recognize are returned as-is, so real bugs surface
+  instead of getting silently munged into something plausible.
+- Every actual coercion is logged with the tool, property, and shape
+  transition so we can see which models/tools are drifting.
+
+Tool-specific prompt drift (e.g. ``</question>`` tags leaking into an
+``ask_user`` prompt string) is NOT this module's job — that belongs in
+per-tool sanitizers, because it's about prompt style, not schema shape.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any
+
+from framework.llm.provider import Tool
+
+logger = logging.getLogger(__name__)
+
+# When an ``array<string>`` field arrives as an array of objects, look
+# for a text-carrying field in preference order. Covers the wrappers
+# small models tend to produce: ``[{"label": "A"}]``, ``[{"value": "A"}]``,
+# ``[{"text": "A"}]``, etc.
+_STRING_EXTRACT_KEYS: tuple[str, ...] = (
+    "label",
+    "value",
+    "text",
+    "name",
+    "title",
+    "display",
+)
+
+
+def coerce_tool_input(tool: Tool, raw_input: dict[str, Any] | None) -> dict[str, Any]:
+    """Coerce *raw_input* in place to match *tool*'s JSON schema.
+
+    Returns the mutated input dict (same object as *raw_input* when
+    possible, for callers that assume in-place mutation). Properties
+    not present in the schema are left untouched.
+    """
+    if not isinstance(raw_input, dict):
+        return raw_input or {}
+
+    schema = tool.parameters or {}
+    props = schema.get("properties")
+    if not isinstance(props, dict):
+        return raw_input
+
+    for key in list(raw_input.keys()):
+        prop_schema = props.get(key)
+        if not isinstance(prop_schema, dict):
+            continue
+        original = raw_input[key]
+        coerced = _coerce(original, prop_schema)
+        if coerced is not original:
+            logger.info(
+                "coerced tool input tool=%s prop=%s from=%s to=%s",
+                tool.name,
+                key,
+                _shape(original),
+                _shape(coerced),
+            )
+            raw_input[key] = coerced
+
+    return raw_input
+
+
+def _coerce(value: Any, schema: dict[str, Any]) -> Any:
+    """Dispatch on the schema's ``type`` field.
+
+    Returns the *same object* on passthrough so callers can detect
+    no-ops via identity (``coerced is value``).
+    """
+    expected = schema.get("type")
+    if not expected:
+        return value
+
+    # Union type: try each in order, return the first coercion that
+    # actually changes the value. Falls back to the original.
+    if isinstance(expected, list):
+        for t in expected:
+            sub_schema = {**schema, "type": t}
+            coerced = _coerce(value, sub_schema)
+            if coerced is not value:
+                return coerced
+        return value
+
+    if expected == "integer":
+        return _coerce_integer(value)
+    if expected == "number":
+        return _coerce_number(value)
+    if expected == "boolean":
+        return _coerce_boolean(value)
+    if expected == "string":
+        return _coerce_string(value)
+    if expected == "array":
+        return _coerce_array(value, schema)
+    if expected == "object":
+        return _coerce_object(value, schema)
+
+    return value
+
+
+def _coerce_integer(value: Any) -> Any:
+    # bool is a subclass of int in Python; don't mistake True for 1 here.
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, int):
+        return value
+    if isinstance(value, str):
+        parsed = _parse_number(value)
+        if parsed is None:
+            return value
+        if parsed != int(parsed):
+            # Has a fractional part — caller asked for int, don't truncate.
+            return value
+        return int(parsed)
+    return value
+
+
+def _coerce_number(value: Any) -> Any:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return value
+    if isinstance(value, str):
+        parsed = _parse_number(value)
+        if parsed is None:
+            return value
+        if parsed == int(parsed):
+            return int(parsed)
+        return parsed
+    return value
+
+
+def _coerce_boolean(value: Any) -> Any:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        low = value.strip().lower()
+        if low == "true":
+            return True
+        if low == "false":
+            return False
+    return value
+
+
+def _coerce_string(value: Any) -> Any:
+    if isinstance(value, str):
+        return value
+    # Common drift: model sent ``{"label": "..."}`` when we wanted "...".
+    if isinstance(value, dict):
+        extracted = _extract_string_from_object(value)
+        if extracted is not None:
+            return extracted
+    return value
+
+
+def _coerce_array(value: Any, schema: dict[str, Any]) -> Any:
+    # Heal: JSON-encoded array string → array.
+    if isinstance(value, str):
+        parsed = _try_parse_json(value)
+        if isinstance(parsed, list):
+            value = parsed
+        else:
+            # Scalar string where an array is expected — wrap it.
+            return [value]
+    elif not isinstance(value, list):
+        # Any other scalar (int, bool, dict, ...) — wrap.
+        return [value]
+
+    items_schema = schema.get("items")
+    if not isinstance(items_schema, dict):
+        return value
+
+    coerced_items: list[Any] = []
+    changed = False
+    for item in value:
+        c = _coerce(item, items_schema)
+        if c is not item:
+            changed = True
+        coerced_items.append(c)
+    return coerced_items if changed else value
+
+
+def _coerce_object(value: Any, schema: dict[str, Any]) -> Any:
+    # Heal: JSON-encoded object string → object.
+    if isinstance(value, str):
+        parsed = _try_parse_json(value)
+        if isinstance(parsed, dict):
+            value = parsed
+        else:
+            return value
+    if not isinstance(value, dict):
+        return value
+
+    sub_props = schema.get("properties")
+    if not isinstance(sub_props, dict):
+        return value
+
+    changed = False
+    for k in list(value.keys()):
+        sub_schema = sub_props.get(k)
+        if not isinstance(sub_schema, dict):
+            continue
+        original = value[k]
+        coerced = _coerce(original, sub_schema)
+        if coerced is not original:
+            value[k] = coerced
+            changed = True
+    # Return the same dict on mutation so callers that passed a shared
+    # reference see the updates. ``changed`` is only used to decide
+    # whether we need to log at a coarser level upstream.
+    return value if changed or not sub_props else value
+
+
+def _extract_string_from_object(obj: dict[str, Any]) -> str | None:
+    """Pick a likely-text field out of a wrapper object.
+
+    Tries the known keys first, falls back to the sole value if the
+    object has exactly one entry. Returns None when nothing plausible
+    is found — the caller keeps the original.
+    """
+    for k in _STRING_EXTRACT_KEYS:
+        v = obj.get(k)
+        if isinstance(v, str) and v:
+            return v
+    if len(obj) == 1:
+        (only,) = obj.values()
+        if isinstance(only, str) and only:
+            return only
+    return None
+
+
+def _try_parse_json(raw: str) -> Any:
+    try:
+        return json.loads(raw)
+    except (ValueError, TypeError):
+        return None
+
+
+def _parse_number(raw: str) -> float | None:
+    try:
+        f = float(raw)
+    except (ValueError, OverflowError):
+        return None
+    # Reject NaN and inf — they pass float() but aren't useful numeric
+    # values for tool arguments.
+    if f != f or f == float("inf") or f == float("-inf"):
+        return None
+    return f
+
+
+def _shape(value: Any) -> str:
+    """Short type/shape description used in coercion log lines."""
+    if value is None:
+        return "None"
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return f"str[{len(value)}]"
+    if isinstance(value, list):
+        if not value:
+            return "list[0]"
+        return f"list[{len(value)}]<{_shape(value[0])}>"
+    if isinstance(value, dict):
+        keys = sorted(value.keys())[:3]
+        suffix = ",…" if len(value) > 3 else ""
+        return f"dict{{{','.join(keys)}{suffix}}}"
+    return type(value).__name__
@@ -55,6 +55,12 @@ export default function QueenDM() {
  const [cloneDialogOpen, setCloneDialogOpen] = useState(false);
  const [cloneColonyName, setCloneColonyName] = useState("");
  const [cloneTask, setCloneTask] = useState("");
+  const [cloneOutputs, setCloneOutputs] = useState("");
+  const [cloneDataSources, setCloneDataSources] = useState("");
+  const [cloneSchedule, setCloneSchedule] = useState("");
+  const [cloneConcurrency, setCloneConcurrency] = useState("");
+  const [showCloneSchedule, setShowCloneSchedule] = useState(false);
+  const [showCloneConcurrency, setShowCloneConcurrency] = useState(false);
  // Colony-spawned lock state. Once a colony has been spawned from this DM
  // and the user clicked into it, /chat is rejected server-side and the
  // composer is replaced with a "compact + new session" button. Hydrated
@@ -652,14 +658,53 @@ export default function QueenDM() {
    const colony = cloneColonyName.trim();
    if (!colony) return;
    const task = cloneTask.trim();
-    const message = task
-      ? `Create a colony named \`${colony}\` for the following task:\n\n${task}`
-      : `Create a colony named \`${colony}\` from this session.`;
+
+    const briefLines = [
+      `Colony name: ${colony}`,
+      `Purpose: ${task || "Use the current conversation to propose the colony's purpose."}`,
+    ];
+    if (cloneOutputs.trim()) {
+      briefLines.push(`Expected outputs: ${cloneOutputs.trim()}`);
+    }
+    if (cloneDataSources.trim()) {
+      briefLines.push(`Inputs, data sources, tools, or credentials: ${cloneDataSources.trim()}`);
+    }
+    if (showCloneSchedule && cloneSchedule.trim()) {
+      briefLines.push(`Schedule/triggers: ${cloneSchedule.trim()}`);
+    }
+    if (showCloneConcurrency && cloneConcurrency.trim()) {
+      briefLines.push(`Concurrency: ${cloneConcurrency.trim()}`);
+    }
+
+    const message = [
+      "I want to set up a persistent colony.",
+      "",
+      briefLines.join("\n"),
+      "",
+      "Please use start_incubating_colony if this is appropriate. Ask me for any missing details before calling create_colony, then generate the self-contained task, skill name, skill description, skill body, and any optional triggers or concurrency hint needed by the colony.",
+    ].join("\n");
+
    handleSend(message, "queen-dm");
    setCloneDialogOpen(false);
    setCloneColonyName("");
    setCloneTask("");
-  }, [cloneColonyName, cloneTask, handleSend]);
+    setCloneOutputs("");
+    setCloneDataSources("");
+    setCloneSchedule("");
+    setCloneConcurrency("");
+    setShowCloneSchedule(false);
+    setShowCloneConcurrency(false);
+  }, [
+    cloneColonyName,
+    cloneConcurrency,
+    cloneDataSources,
+    cloneOutputs,
+    cloneSchedule,
+    cloneTask,
+    handleSend,
+    showCloneConcurrency,
+    showCloneSchedule,
+  ]);

  const handleQuestionAnswer = useCallback(
    (answers: Record<string, string>) => {
@@ -747,7 +792,7 @@ export default function QueenDM() {
              className="flex items-center gap-1 px-2.5 py-1 rounded-md text-[11px] font-medium text-primary hover:bg-primary/10 transition-colors disabled:opacity-40"
            >
              <Plus className="w-3 h-3" />
-              Create a Colony
+              Start Colony Setup
            </button>
          }
        />
@@ -759,13 +804,13 @@ export default function QueenDM() {
            className="absolute inset-0 bg-black/40 backdrop-blur-sm"
            onClick={() => setCloneDialogOpen(false)}
          />
-          <div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-md p-6 space-y-4">
+          <div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-lg max-h-[90vh] overflow-y-auto p-6 space-y-4">
            <h2 className="text-sm font-semibold text-foreground">
-              Create a Colony
+              Set Up a Colony
            </h2>
            <p className="text-[11px] text-muted-foreground">
-              Create a new colony from this queen's session. The colony inherits
-              the queen's tools, context, and conversation history.
+              Share the brief. The queen will fill gaps, write the worker skill,
+              and create the colony when the setup is ready.
            </p>
            <div className="space-y-3">
              <div>
@@ -787,17 +832,94 @@ export default function QueenDM() {
              </div>
              <div>
                <label className="block text-[11px] font-medium text-muted-foreground mb-1">
-                  Task{" "}
+                  What should it do?
+                </label>
+                <textarea
+                  value={cloneTask}
+                  onChange={(e) => setCloneTask(e.target.value)}
+                  placeholder="Monitor launches, process a backlog, prepare a report, or continue this session's work."
+                  rows={3}
+                  className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
+                />
+              </div>
+              <div>
+                <label className="block text-[11px] font-medium text-muted-foreground mb-1">
+                  Expected output{" "}
+                  <span className="text-muted-foreground/40">(optional)</span>
+                </label>
+                <textarea
+                  value={cloneOutputs}
+                  onChange={(e) => setCloneOutputs(e.target.value)}
+                  placeholder="A digest, saved rows, alerts, files, or a final summary."
+                  rows={2}
+                  className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
+                />
+              </div>
+              <div>
+                <label className="block text-[11px] font-medium text-muted-foreground mb-1">
+                  Inputs, tools, or credentials{" "}
+                  <span className="text-muted-foreground/40">(optional)</span>
+                </label>
+                <textarea
+                  value={cloneDataSources}
+                  onChange={(e) => setCloneDataSources(e.target.value)}
+                  placeholder="APIs, websites, files, accounts, OAuth tools, or credentials it will need."
+                  rows={2}
+                  className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
+                />
+              </div>
+
+              <div className="space-y-2 pt-1">
+                {!showCloneSchedule ? (
+                  <button
+                    type="button"
+                    onClick={() => setShowCloneSchedule(true)}
+                    className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
+                  >
+                    <Plus className="h-3.5 w-3.5" />
+                    Schedule / triggers
+                  </button>
+                ) : (
+                  <div>
+                    <label className="block text-[11px] font-medium text-muted-foreground mb-1">
+                      Schedule / triggers{" "}
+                      <span className="text-muted-foreground/40">(optional)</span>
+                    </label>
+                    <textarea
+                      value={cloneSchedule}
+                      onChange={(e) => setCloneSchedule(e.target.value)}
+                      placeholder="Manual only, every weekday at 9 AM, every 30 minutes, or webhook path."
+                      rows={2}
+                      className="w-full resize-none rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
+                    />
+                  </div>
+                )}
+
+                {!showCloneConcurrency ? (
+                  <button
+                    type="button"
+                    onClick={() => setShowCloneConcurrency(true)}
+                    className="inline-flex items-center gap-2 rounded-md border border-border/60 px-3 py-2 text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors"
+                  >
+                    <Plus className="h-3.5 w-3.5" />
+                    Concurrency
+                  </button>
+                ) : (
+                  <div>
+                    <label className="block text-[11px] font-medium text-muted-foreground mb-1">
+                      Concurrency{" "}
                      <span className="text-muted-foreground/40">(optional)</span>
                    </label>
                    <input
                      type="text"
-                  value={cloneTask}
-                  onChange={(e) => setCloneTask(e.target.value)}
-                  placeholder="Continue the work from the queen's session"
+                      value={cloneConcurrency}
+                      onChange={(e) => setCloneConcurrency(e.target.value)}
+                      placeholder="1 for a single worker, 5 for a parallel backlog, or any limit to respect."
                      className="w-full rounded-md border border-border/60 bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary"
                    />
                  </div>
+                )}
+              </div>
            </div>
            <div className="flex justify-end gap-2 pt-2">
              <button
@@ -805,6 +927,12 @@ export default function QueenDM() {
                  setCloneDialogOpen(false);
                  setCloneColonyName("");
                  setCloneTask("");
+                  setCloneOutputs("");
+                  setCloneDataSources("");
+                  setCloneSchedule("");
+                  setCloneConcurrency("");
+                  setShowCloneSchedule(false);
+                  setShowCloneConcurrency(false);
                }}
                className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors"
              >
@@ -815,7 +943,7 @@ export default function QueenDM() {
                disabled={!cloneColonyName.trim()}
                className="px-3 py-1.5 rounded-md text-xs font-medium bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50"
              >
-                Create
+                Start setup
              </button>
            </div>
          </div>
@@ -0,0 +1,252 @@
+"""Tests for ``coerce_tool_input``.
+
+The coercer centralizes healing for the small handful of schema-shape
+drift patterns that non-frontier models emit. These tests pin the
+expected behavior for each pattern plus the passthrough / failure cases.
+"""
+
+from __future__ import annotations
+
+from framework.agent_loop.internals.tool_input_coercer import coerce_tool_input
+from framework.llm.provider import Tool
+
+
+def _tool(parameters: dict) -> Tool:
+    return Tool(name="t", description="test", parameters=parameters)
+
+
+# ---- passthrough / no-op cases ---------------------------------------------
+
+
+def test_empty_input_passes_through() -> None:
+    tool = _tool({"type": "object", "properties": {"x": {"type": "string"}}})
+    assert coerce_tool_input(tool, {}) == {}
+    assert coerce_tool_input(tool, None) == {}
+
+
+def test_missing_schema_is_noop() -> None:
+    tool = _tool({})
+    args = {"anything": 123}
+    assert coerce_tool_input(tool, args) is args
+
+
+def test_unknown_property_is_untouched() -> None:
+    tool = _tool({"type": "object", "properties": {"known": {"type": "integer"}}})
+    args = {"unknown": "42"}
+    coerce_tool_input(tool, args)
+    assert args == {"unknown": "42"}  # untouched
+
+
+def test_type_already_matches_is_noop() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
+    args = {"n": 42}
+    coerce_tool_input(tool, args)
+    assert args == {"n": 42}
+
+
+# ---- primitive coercion (the reference implementation's scope) -------------
+
+
+def test_string_to_integer() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
+    args = {"n": "42"}
+    coerce_tool_input(tool, args)
+    assert args == {"n": 42}
+
+
+def test_string_to_integer_rejects_fractional() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "integer"}}})
+    args = {"n": "3.14"}
+    coerce_tool_input(tool, args)
+    assert args == {"n": "3.14"}  # kept as string — schema says int
+
+
+def test_string_to_number_float() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
+    args = {"n": "3.14"}
+    coerce_tool_input(tool, args)
+    assert args == {"n": 3.14}
+
+
+def test_string_to_number_whole() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
+    args = {"n": "42"}
+    coerce_tool_input(tool, args)
+    assert args == {"n": 42}  # whole numbers collapse to int
+
+
+def test_string_to_boolean() -> None:
+    tool = _tool(
+        {
+            "type": "object",
+            "properties": {
+                "a": {"type": "boolean"},
+                "b": {"type": "boolean"},
+                "c": {"type": "boolean"},
+            },
+        }
+    )
+    args = {"a": "true", "b": "False", "c": "nope"}
+    coerce_tool_input(tool, args)
+    assert args == {"a": True, "b": False, "c": "nope"}
+
+
+def test_union_type_first_match_wins() -> None:
+    tool = _tool(
+        {
+            "type": "object",
+            "properties": {"x": {"type": ["integer", "string"]}},
+        }
+    )
+    args = {"x": "42"}
+    coerce_tool_input(tool, args)
+    assert args == {"x": 42}
+
+
+def test_nan_and_inf_rejected() -> None:
+    tool = _tool({"type": "object", "properties": {"n": {"type": "number"}}})
+    args = {"n": "inf"}
+    coerce_tool_input(tool, args)
+    assert args == {"n": "inf"}  # inf not a valid tool arg — keep original
+
+
+# ---- the ask_user bug: [{"label": "..."}] -> ["..."] ------------------------
+
+
+def test_array_of_label_objects_unwraps_to_strings() -> None:
+    tool = _tool(
+        {
+            "type": "object",
+            "properties": {
+                "options": {"type": "array", "items": {"type": "string"}},
+            },
+        }
+    )
+    args = {"options": [{"label": "A"}, {"label": "B"}, {"label": "C"}]}
+    coerce_tool_input(tool, args)
+    assert args == {"options": ["A", "B", "C"]}
+
+
+def test_array_of_value_objects_unwraps() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": [{"value": "A"}, {"text": "B"}, {"name": "C"}]}
+    coerce_tool_input(tool, args)
+    assert args == {"xs": ["A", "B", "C"]}
+
+
+def test_single_key_object_falls_back_to_sole_value() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": [{"weirdkey": "A"}]}
+    coerce_tool_input(tool, args)
+    assert args == {"xs": ["A"]}
+
+
+def test_unrecognized_object_is_preserved() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": [{"a": "x", "b": "y"}]}  # ambiguous — no known key, multi-value
+    coerce_tool_input(tool, args)
+    assert args == {"xs": [{"a": "x", "b": "y"}]}  # untouched
+
+
+# ---- JSON-encoded-string-as-array ------------------------------------------
+
+
+def test_json_string_array_is_parsed() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": '["A","B","C"]'}
+    coerce_tool_input(tool, args)
+    assert args == {"xs": ["A", "B", "C"]}
+
+
+def test_scalar_wraps_into_singleton_array() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": "solo"}
+    coerce_tool_input(tool, args)
+    assert args == {"xs": ["solo"]}
+
+
+def test_invalid_json_string_wraps_as_singleton() -> None:
+    tool = _tool(
+        {"type": "object", "properties": {"xs": {"type": "array", "items": {"type": "string"}}}}
+    )
+    args = {"xs": "not json [[]"}
+    coerce_tool_input(tool, args)
+    assert args == {"xs": ["not json [[]"]}
+
+
+# ---- nested: the actual ask_user schema shape -------------------------------
+
+
+def test_nested_questions_array_with_wrapped_options() -> None:
+    """Exercises the real bug — questions[i].options arriving as [{label}]."""
+    tool = _tool(
+        {
+            "type": "object",
+            "properties": {
+                "questions": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "id": {"type": "string"},
+                            "prompt": {"type": "string"},
+                            "options": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                            },
+                        },
+                    },
+                }
+            },
+        }
+    )
+    args = {
+        "questions": [
+            {
+                "id": "q1",
+                "prompt": "Pick one",
+                "options": [{"label": "Email (Recommended)"}, {"label": "Slack"}],
+            },
+            {"id": "q2", "prompt": "Free form"},
+        ]
+    }
+    coerce_tool_input(tool, args)
+    assert args["questions"][0]["options"] == ["Email (Recommended)", "Slack"]
+    assert args["questions"][1] == {"id": "q2", "prompt": "Free form"}
+
+
+def test_json_string_for_object_is_parsed() -> None:
+    tool = _tool(
+        {
+            "type": "object",
+            "properties": {
+                "cfg": {
+                    "type": "object",
+                    "properties": {"n": {"type": "integer"}},
+                }
+            },
+        }
+    )
+    args = {"cfg": '{"n": "42"}'}
+    coerce_tool_input(tool, args)
+    assert args == {"cfg": {"n": 42}}
+
+
+# ---- string property receiving a {label} object -----------------------------
+
+
+def test_single_string_property_unwraps_label_object() -> None:
+    tool = _tool({"type": "object", "properties": {"choice": {"type": "string"}}})
+    args = {"choice": {"label": "Email"}}
+    coerce_tool_input(tool, args)
+    assert args == {"choice": "Email"}