Files
hive/tools/tests/test_terminal_tools_exec.py
Hundao 8cb0531959 fix(ci): unblock main CI, sort imports + install Playwright Chromium (#7172)
* fix(lint): organize imports in queen_orchestrator.create_queen

Ruff I001 blocks CI on every PR against main. The deferred imports
inside create_queen were not in alphabetical order between the queen
package and the framework package; ruff auto-fix moves
framework.config below the framework.agents.queen.nodes block.

No behavior change.

* fix(ci): install Playwright Chromium before Test Tools job

The new chart_tools smoke tests added in feabf327 require a Chromium
build for ECharts/Mermaid rendering, but the test-tools workflow only
ran `uv sync` and went straight to pytest. Three tests
(test_render_echarts_bar_chart, test_render_echarts_accepts_string_spec,
test_render_mermaid_flowchart) crash on every PR with:

    BrowserType.launch: Executable doesn't exist at
    /home/runner/.cache/ms-playwright/chromium_headless_shell-1208/...

Split the install/run into separate steps and add `playwright install
chromium` before pytest. Use `--with-deps` on Linux to pull system
libraries; Windows runners only need the browser binary.

* fix(tests): adapt test_file_state_cache to new file_ops API

The file_ops rewrite in feabf327 dropped the standalone hashline_edit
tool (the file_system_toolkits/hashline_edit/ directory was removed)
and switched edit_file to a mode-first signature
(mode, path, old_string, new_string, ...).

The test fixture still tried to look up "hashline_edit" via the MCP
tool manager and crashed with KeyError before any test could run, and
the edit_file calls were positional in the old order so they hit
"unknown mode 'e.py'" once the fixture was fixed.

Drop the stale hashline_edit lookup and pass mode="replace" explicitly
to every edit_file call. All 11 tests pass locally.

* fix(tests): skip terminal_tools tests on Windows (POSIX-only)

The new terminal_tools package added in feabf327 imports the Unix-only
`resource` module in tools/src/terminal_tools/common/limits.py to set
RLIMIT_CPU / RLIMIT_AS / RLIMIT_FSIZE on subprocesses. Five of the
six terminal_tools test files therefore crash on windows-latest with
`ModuleNotFoundError: No module named 'resource'` once their fixtures
trigger the import chain.

test_terminal_tools_pty.py already has the right module-level skip
(PTY is POSIX-only). Apply the same `pytestmark = skipif(win32)` to
the other five so the whole suite skips cleanly on Windows. The
terminal-tools package is bash-only by design (zsh refused at the
shell-resolver level), so a Windows port is out of scope.
2026-05-05 00:32:59 +08:00

229 lines
8.3 KiB
Python

"""terminal_exec — envelope shape, semantic exits, warnings, auto-promotion."""
from __future__ import annotations
import sys
import time
import pytest
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
@pytest.fixture
def exec_tool(mcp):
from terminal_tools.exec import register_exec_tools
register_exec_tools(mcp)
return mcp._tool_manager._tools["terminal_exec"].fn
def test_envelope_shape_simple_echo(exec_tool):
result = exec_tool(command="echo hello world")
assert result["exit_code"] == 0
assert result["stdout"].strip() == "hello world"
assert result["stderr"] == ""
assert result["semantic_status"] == "ok"
assert result["timed_out"] is False
assert result["auto_backgrounded"] is False
assert result["job_id"] is None
assert result["warning"] is None
assert result["pid"] is not None
def test_grep_no_matches_is_ok_not_error(exec_tool, tmp_path):
f = tmp_path / "haystack.txt"
f.write_text("apples\nbananas\n")
result = exec_tool(command=f"grep zzz {f}")
assert result["exit_code"] == 1
assert result["semantic_status"] == "ok"
assert "No matches found" in (result["semantic_message"] or "")
def test_diff_files_differ_is_ok_not_error(exec_tool, tmp_path):
a = tmp_path / "a.txt"
a.write_text("hi\n")
b = tmp_path / "b.txt"
b.write_text("bye\n")
result = exec_tool(command=f"diff {a} {b}")
assert result["exit_code"] == 1
assert result["semantic_status"] == "ok"
assert "differ" in (result["semantic_message"] or "")
def test_destructive_warning_for_rm_rf(exec_tool, tmp_path):
# Don't actually delete anything — point at a missing path so the
# command exits non-zero but the warning still fires from regex.
target = tmp_path / "definitely_missing_dir"
result = exec_tool(command=f"rm -rf {target}")
assert result["warning"] is not None
assert "force-remove" in result["warning"] or "recursively" in result["warning"]
def test_destructive_warning_drop_table(exec_tool):
# Run `true` so the test doesn't depend on echo behavior; pass the
# destructive text via stdin so the regex still matches the command.
result = exec_tool(command="echo 'DROP TABLE users;'", shell=True)
assert result["warning"] is not None
assert "drop" in result["warning"].lower() or "truncate" in result["warning"].lower()
def test_command_not_found(exec_tool):
result = exec_tool(command="this_command_does_not_exist_xyzzy")
assert result["exit_code"] is None or result["exit_code"] != 0
# Either pre-spawn FileNotFoundError or shell exit 127 — both are fine
# as long as semantic_status reflects an error or the error field is set.
assert (
result["semantic_status"] == "error"
or result.get("error")
or "not found" in (result["semantic_message"] or "").lower()
)
def test_zsh_refused(exec_tool):
result = exec_tool(command="echo hi", shell=True)
# shell=True (the bool) → /bin/bash → succeeds
assert result["exit_code"] == 0
def test_zsh_string_refused():
"""Calling _resolve_shell with zsh path raises ZshRefused."""
from terminal_tools.common.limits import ZshRefused, _resolve_shell
with pytest.raises(ZshRefused):
_resolve_shell("/bin/zsh")
with pytest.raises(ZshRefused):
_resolve_shell("/usr/local/bin/zsh")
def test_truncation_via_handle(exec_tool):
"""Generate >256 KB of output, verify output_handle is returned."""
# Generate ~300 KB of output
result = exec_tool(
command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'",
shell=True,
max_output_kb=128, # smaller cap to force truncation
)
assert result["exit_code"] == 0
assert result["stdout_truncated_bytes"] > 0
assert result["output_handle"] is not None
assert result["output_handle"].startswith("out_")
def test_output_handle_round_trip(exec_tool, mcp):
from terminal_tools.output import register_output_tools
register_output_tools(mcp)
output_get = mcp._tool_manager._tools["terminal_output_get"].fn
result = exec_tool(
command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'",
shell=True,
max_output_kb=64,
)
handle = result["output_handle"]
assert handle is not None
# First page
page = output_get(output_handle=handle, since_offset=0, max_kb=64)
assert page["expired"] is False
assert len(page["data"]) > 0
assert page["next_offset"] > 0
# Bogus handle
bogus = output_get(output_handle="out_doesnotexist", since_offset=0, max_kb=64)
assert bogus["expired"] is True
def test_timed_out_marker(exec_tool):
result = exec_tool(command="sleep 5", timeout_sec=1, auto_background_after_sec=0)
assert result["timed_out"] is True
def test_auto_shell_for_pipelines(exec_tool):
"""Regression for the queen_technology session 152038 silent-mangling bug.
The agent passed shell=False (default) with a pipeline command. The naive
command.split() spawned the first program with the rest as junk argv —
`ps aux | sort ...` produced "ps: error: garbage option", and `echo "..."
&& ps ...` produced fake-success output where echo printed the entire
rest of the command verbatim. Fix: detect shell metacharacters and
transparently route through bash -c.
"""
# Case 1: pipeline. Was: ps spawned with "aux | sort ..." as argv → garbage option.
result = exec_tool(command="ps aux | head -1")
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "USER" in result["stdout"] or "PID" in result["stdout"]
assert "garbage option" not in (result.get("stderr") or "")
# Case 2: && + pipe + awk. Was: echo printed the whole rest of the line.
result = exec_tool(
command="echo HEADER && echo line1 | head -1",
)
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "HEADER" in result["stdout"]
assert "line1" in result["stdout"]
# The literal "&&" must NOT appear in stdout — that would mean echo
# captured it as an argument again.
assert "&&" not in result["stdout"]
# Case 3: redirect + glob. Was: '*' passed as a literal arg to ls.
import os
import tempfile
with tempfile.TemporaryDirectory() as tmp:
with open(os.path.join(tmp, "a.txt"), "w") as f:
f.write("x")
with open(os.path.join(tmp, "b.txt"), "w") as f:
f.write("y")
result = exec_tool(command=f"ls {tmp}/*.txt")
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "a.txt" in result["stdout"]
assert "b.txt" in result["stdout"]
def test_no_auto_shell_for_argv_commands(exec_tool):
"""Plain argv commands (no metacharacters) should NOT auto-route to bash.
Direct exec is faster and avoids quoting hazards."""
result = exec_tool(command="echo hello")
assert result["exit_code"] == 0
assert result["auto_shell"] is False
assert result["stdout"].strip() == "hello"
def test_explicit_shell_true_unchanged(exec_tool):
"""When the agent explicitly opts in via shell=True, auto_shell stays
False — auto-detection only fires for shell=False."""
result = exec_tool(command="echo a | tr a b", shell=True)
assert result["exit_code"] == 0
assert result["auto_shell"] is False
assert result["stdout"].strip() == "b"
def test_auto_promotion(exec_tool, mcp):
"""Past auto_background_after_sec, the call returns auto_backgrounded=True."""
from terminal_tools.jobs.tools import register_job_tools
register_job_tools(mcp)
# Use a 1s budget so the test runs quickly.
start = time.monotonic()
result = exec_tool(
command="sleep 5",
auto_background_after_sec=1,
timeout_sec=10,
)
elapsed = time.monotonic() - start
assert result["auto_backgrounded"] is True, result
assert result["job_id"] is not None
assert result["exit_code"] is None
assert elapsed < 3, "auto-promotion should return quickly past the budget"
# Take over via terminal_job_logs
job_logs = mcp._tool_manager._tools["terminal_job_logs"].fn
log_result = job_logs(job_id=result["job_id"], wait_until_exit=True, wait_timeout_sec=10)
assert log_result["status"] == "exited"
assert log_result["exit_code"] == 0