fix: shell split command

2026-04-30 15:34:54 -07:00
parent de4b2dc151
commit 725dd1f410
4 changed files with 120 additions and 10 deletions
@@ -107,13 +107,20 @@ The store has a 64 MB cap with LRU eviction. For huge outputs, prefer `shell_job

 ## Pipelines and complex commands

-For pipes, redirects, globs, and bash builtins, set `shell=True`:
+Pipes (`|`), redirects (`>`, `<`, `>>`), conditionals (`&&`, `||`, `;`), and globs (`*`, `?`, `[`) are detected automatically. You can pass them with the default `shell=False` and the runtime will transparently route through `/bin/bash -c` and surface `auto_shell: true` in the envelope:

 ```
-shell_exec("find . -type f -name '*.log' | xargs grep -l ERROR | head", shell=True)
+shell_exec("ps aux | sort -k3 -rn | head -40")
+  → { exit_code: 0, stdout: "...", auto_shell: true, ... }
 ```

-For simple `argv` commands, `shell=False` (the default) is faster and avoids quoting hazards. Naive whitespace splitting is fine for the common case; use `shell=True` when arguments contain spaces or quotes.
+For simple argv commands (no metacharacters) `shell=False` is faster and direct-execs the binary. For commands with shell features but no metacharacters that the detector catches (rare — exotic bash builtins, here-strings), pass `shell=True` explicitly:
+
+```
+shell_exec("set -e; complicated bash logic", shell=True)
+```
+
+Quoted strings work either way — the detector uses `shlex.split` which handles `"quoted args with spaces"` correctly.

 ## When to use what (cheat sheet)

@@ -47,6 +47,7 @@ def build_exec_envelope(
    max_output_kb: int = 256,
    auto_backgrounded: bool = False,
    job_id: str | None = None,
+    auto_shell: bool = False,
 ) -> dict:
    """Construct the standard exec envelope.

@@ -99,6 +100,7 @@ def build_exec_envelope(
        "warning": warning,
        "auto_backgrounded": bool(auto_backgrounded),
        "job_id": job_id,
+        "auto_shell": bool(auto_shell),
    }


@@ -25,6 +25,7 @@ Implementation notes:

 from __future__ import annotations

+import shlex
 import subprocess
 import threading
 import time
@@ -37,6 +38,18 @@ from shell_tools.common.limits import (
    make_preexec_fn,
    sanitized_env,
 )
+
+
+# Tokens that indicate the user passed a shell-syntax command (pipes,
+# redirects, conditional chains) rather than an argv list. When any of
+# these appear as standalone tokens in shlex.split(command), we silently
+# route the command through /bin/bash -c instead of trying to exec it
+# directly — the alternative is spawning the first program with the rest
+# of the line as junk argv, which either errors or returns fake success
+# (e.g. `echo "..." && ps ...` → echo prints the literal command).
+_SHELL_METACHARS: frozenset[str] = frozenset(
+    {"|", "&&", "||", ";", ">", "<", ">>", "<<", "&", "2>", "2>&1", "|&"}
+)
 from shell_tools.common.ring_buffer import RingBuffer
 from shell_tools.common.truncation import build_exec_envelope
 from shell_tools.jobs.manager import JobLimitExceeded, get_manager
@@ -89,31 +102,53 @@ def register_exec_tools(mcp: FastMCP) -> None:

        Returns the standard envelope: see `shell-tools-foundations` skill.
        """
+        # Auto-detect shell-syntax commands. If the agent passes
+        # ``shell=False`` (the default) but the command contains a pipe,
+        # redirect, ``&&``, etc., naive argv splitting silently mangles
+        # it — exec the first token with the rest as junk arguments.
+        # Detect that case and transparently route through bash -c, then
+        # surface an ``auto_shell=True`` flag in the envelope so the
+        # foundational skill / agent feedback loop can learn from it.
+        auto_shell = False
        try:
-            argv: list[str] | str
            if shell:
-                argv = command
+                # User opted in; trust them.
+                pass
            else:
-                argv = command.split()
-                if not argv:
+                try:
+                    tokens = shlex.split(command, posix=True)
+                except ValueError:
+                    # Unbalanced quotes — almost certainly meant for the shell.
+                    auto_shell = True
+                    tokens = []
+                if not auto_shell:
+                    if not tokens:
                        return _err_envelope(command, "command was empty")
+                    if any(t in _SHELL_METACHARS for t in tokens) or any(
+                        # globs that shlex left unexpanded (`*`, `?`, `[`)
+                        any(c in t for c in "*?[") and t != "[" for t in tokens
+                    ):
+                        auto_shell = True

            full_env = sanitized_env(env) if env is not None else None
            preexec = make_preexec_fn(coerce_limits(limits))
        except ZshRefused as e:
            return _err_envelope(command, str(e))

+        effective_shell: bool | str = True if auto_shell else shell
+
        # Resolve shell here so the same logic the JobManager uses applies
        # in both the inline + promoted paths.
        try:
-            resolved_shell = _resolve_shell(shell)
+            resolved_shell = _resolve_shell(effective_shell)
        except ZshRefused as e:
            return _err_envelope(command, str(e))

        if resolved_shell is not None:
            spawn_argv: list[str] = [resolved_shell, "-c", command]
        else:
-            spawn_argv = list(argv) if isinstance(argv, list) else [str(argv)]
+            # shell=False AND no metacharacters → safe to direct-exec.
+            spawn_argv = tokens

        start = time.monotonic()
        try:
@@ -208,6 +243,7 @@ def register_exec_tools(mcp: FastMCP) -> None:
                        max_output_kb=max_output_kb,
                        auto_backgrounded=True,
                        job_id=record.job_id,
+                        auto_shell=auto_shell,
                    )
                except JobLimitExceeded:
                    # Cap reached; treat as a hard timeout rather than spin.
@@ -242,6 +278,7 @@ def register_exec_tools(mcp: FastMCP) -> None:
            timed_out=timed_out,
            signaled=(exit_code is not None and exit_code < 0),
            max_output_kb=max_output_kb,
+            auto_shell=auto_shell,
        )


@@ -262,6 +299,7 @@ def _err_envelope(command: str, message: str) -> dict:
        "warning": None,
        "auto_backgrounded": False,
        "job_id": None,
+        "auto_shell": False,
        "error": message,
    }

@@ -137,6 +137,69 @@ def test_timed_out_marker(exec_tool):
    assert result["timed_out"] is True


+def test_auto_shell_for_pipelines(exec_tool):
+    """Regression for the queen_technology session 152038 silent-mangling bug.
+
+    The agent passed shell=False (default) with a pipeline command. The naive
+    command.split() spawned the first program with the rest as junk argv —
+    `ps aux | sort ...` produced "ps: error: garbage option", and `echo "..."
+    && ps ...` produced fake-success output where echo printed the entire
+    rest of the command verbatim. Fix: detect shell metacharacters and
+    transparently route through bash -c.
+    """
+    # Case 1: pipeline. Was: ps spawned with "aux | sort ..." as argv → garbage option.
+    result = exec_tool(command="ps aux | head -1")
+    assert result["exit_code"] == 0, result
+    assert result["auto_shell"] is True
+    assert "USER" in result["stdout"] or "PID" in result["stdout"]
+    assert "garbage option" not in (result.get("stderr") or "")
+
+    # Case 2: && + pipe + awk. Was: echo printed the whole rest of the line.
+    result = exec_tool(
+        command="echo HEADER && echo line1 | head -1",
+    )
+    assert result["exit_code"] == 0, result
+    assert result["auto_shell"] is True
+    assert "HEADER" in result["stdout"]
+    assert "line1" in result["stdout"]
+    # The literal "&&" must NOT appear in stdout — that would mean echo
+    # captured it as an argument again.
+    assert "&&" not in result["stdout"]
+
+    # Case 3: redirect + glob. Was: '*' passed as a literal arg to ls.
+    import os
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(os.path.join(tmp, "a.txt"), "w") as f:
+            f.write("x")
+        with open(os.path.join(tmp, "b.txt"), "w") as f:
+            f.write("y")
+        result = exec_tool(command=f"ls {tmp}/*.txt")
+        assert result["exit_code"] == 0, result
+        assert result["auto_shell"] is True
+        assert "a.txt" in result["stdout"]
+        assert "b.txt" in result["stdout"]
+
+
+def test_no_auto_shell_for_argv_commands(exec_tool):
+    """Plain argv commands (no metacharacters) should NOT auto-route to bash.
+    Direct exec is faster and avoids quoting hazards."""
+    result = exec_tool(command="echo hello")
+    assert result["exit_code"] == 0
+    assert result["auto_shell"] is False
+    assert result["stdout"].strip() == "hello"
+
+
+def test_explicit_shell_true_unchanged(exec_tool):
+    """When the agent explicitly opts in via shell=True, auto_shell stays
+    False — auto-detection only fires for shell=False."""
+    result = exec_tool(command="echo a | tr a b", shell=True)
+    assert result["exit_code"] == 0
+    assert result["auto_shell"] is False
+    assert result["stdout"].strip() == "b"
+
+
 def test_auto_promotion(exec_tool, mcp):
    """Past auto_background_after_sec, the call returns auto_backgrounded=True."""
    from shell_tools.jobs.tools import register_job_tools