diff --git a/core/framework/agent_loop/agent_loop.py b/core/framework/agent_loop/agent_loop.py index c54c06b3..c968a95f 100644 --- a/core/framework/agent_loop/agent_loop.py +++ b/core/framework/agent_loop/agent_loop.py @@ -219,8 +219,20 @@ async def _captioning_chain( logger.warning("vision_fallback failed; retrying configured model") if result := await caption_tool_image(intent, image_content): return result - logger.warning("vision_fallback retry failed; trying gemini-3-flash-preview") - return await caption_tool_image(intent, image_content, model_override="gemini/gemini-3-flash-preview") + # Match the configured model's proxy prefix so the override is routed + # through the same endpoint with the same auth shape. Without this, + # a Hive subscriber's `hive/...` config would override to + # `gemini/...` — which sends Google's Gemini protocol to the + # Anthropic-compatible Hive proxy (404), not what we want. + configured = (get_vision_fallback_model() or "").lower() + if configured.startswith("hive/"): + override = "hive/gemini-3-flash-preview" + elif configured.startswith("kimi/"): + override = "kimi/gemini-3-flash-preview" + else: + override = "gemini/gemini-3-flash-preview" + logger.warning("vision_fallback retry failed; trying %s", override) + return await caption_tool_image(intent, image_content, model_override=override) # Pattern for detecting context-window-exceeded errors across LLM providers. diff --git a/core/framework/agent_loop/internals/vision_fallback.py b/core/framework/agent_loop/internals/vision_fallback.py index a9cbe212..aa8b1cf9 100644 --- a/core/framework/agent_loop/internals/vision_fallback.py +++ b/core/framework/agent_loop/internals/vision_fallback.py @@ -211,10 +211,12 @@ async def caption_tool_image( "max_tokens": 8192, "timeout": timeout_s, } - # Pass api_key directly only when there are no proxy-rewritten - # extra_headers carrying the auth (e.g. the gemini-3-flash override - # path goes direct to Gemini, not through the Hive proxy). - if api_key and not extra_headers: + # Always pass api_key when we have one, even alongside proxy-rewritten + # extra_headers. litellm's anthropic handler refuses to dispatch + # without an api_key (it sends it as x-api-key); the proxy itself + # authenticates via the Authorization: Bearer header in + # extra_headers. Both are needed — matches LiteLLMProvider's path. + if api_key: kwargs["api_key"] = api_key if rewritten_base: kwargs["api_base"] = rewritten_base diff --git a/core/framework/agents/queen/mcp_registry.json b/core/framework/agents/queen/mcp_registry.json index d825f937..6d57cb8d 100644 --- a/core/framework/agents/queen/mcp_registry.json +++ b/core/framework/agents/queen/mcp_registry.json @@ -1,3 +1,3 @@ { - "include": ["gcu-tools", "hive_tools"] + "include": ["gcu-tools", "hive_tools", "terminal-tools"] } diff --git a/core/framework/agents/queen/queen_tools_defaults.py b/core/framework/agents/queen/queen_tools_defaults.py index 739c93d1..50664601 100644 --- a/core/framework/agents/queen/queen_tools_defaults.py +++ b/core/framework/agents/queen/queen_tools_defaults.py @@ -51,10 +51,14 @@ _TOOL_CATEGORIES: dict[str, list[str]] = { "hashline_edit", ], # Shell + process control — engineering personas only. + # Includes the legacy coder-tools commands (run_command, bash_*) and + # the full terminal-tools MCP server (foreground exec with auto-promotion, + # background jobs, persistent PTY sessions, ripgrep/find). "shell": [ "execute_command_tool", "bash_kill", "bash_output", + "@server:terminal-tools", ], # Tabular data. CSV/Excel read/write + DuckDB SQL. "data": [ diff --git a/core/framework/loader/mcp_registry.py b/core/framework/loader/mcp_registry.py index e711706b..688a4c12 100644 --- a/core/framework/loader/mcp_registry.py +++ b/core/framework/loader/mcp_registry.py @@ -51,6 +51,10 @@ _DEFAULT_LOCAL_SERVERS: dict[str, dict[str, Any]] = { "description": "File I/O: read, write, edit, search, list, run commands", "args": ["run", "python", "files_server.py", "--stdio"], }, + "terminal-tools": { + "description": "Terminal capabilities: process exec, background jobs, PTY sessions, fs search. Bash-only on POSIX.", + "args": ["run", "python", "terminal_tools_server.py", "--stdio"], + }, } # Aliases that earlier versions of ensure_defaults wrote under the wrong name. @@ -58,6 +62,10 @@ _DEFAULT_LOCAL_SERVERS: dict[str, dict[str, Any]] = { # name so the active agents (queen, credential_tester) can find their tools. _STALE_DEFAULT_ALIASES: dict[str, str] = { "hive_tools": "hive-tools", + # 2026-04-30: shell-tools renamed to terminal-tools. Drop the stale name + # on next ensure_defaults() so the queen's allowlist (which now includes + # @server:terminal-tools) actually finds a server with the new name. + "terminal-tools": "shell-tools", } @@ -77,7 +85,30 @@ class MCPRegistry: # ── Initialization ────────────────────────────────────────────── def initialize(self) -> None: - """Create directory structure and default files if missing.""" + """Create directory structure, default files, and seed bundled servers. + + Every read path (queen orchestrator, pipeline stage, CLI, routes) + calls this — keeping the seeding here means a fresh ``HIVE_HOME`` + (e.g. the desktop's per-user dir under ``~/.config/Hive/users//`` + or ``~/Library/Application Support/Hive/users//``) is always + populated with ``hive_tools`` / ``gcu-tools`` / ``files-tools`` / + ``shell-tools`` before any agent code reads ``installed.json``. + Without this, ``load_agent_selection()`` resolves an empty registry + and emits "Server X requested but not installed" warnings even + though the server is bundled. + + Idempotent — already-installed entries are left untouched. + """ + self._bootstrap_io() + self._seed_defaults() + + def _bootstrap_io(self) -> None: + """Create the registry directory + empty config/installed files. + + Split out from ``initialize()`` so ``_seed_defaults()`` can call it + without re-entering the seeding logic (which would recurse via + ``_read_installed()`` → ``initialize()``). + """ self._base.mkdir(parents=True, exist_ok=True) self._cache_dir.mkdir(parents=True, exist_ok=True) @@ -88,21 +119,26 @@ class MCPRegistry: self._write_json(self._installed_path, {"servers": {}}) def ensure_defaults(self) -> list[str]: - """Seed the built-in local MCP servers (hive-tools, gcu-tools, files-tools). + """Public alias kept for the ``hive mcp init`` CLI command. - Idempotent — servers already present are left untouched. Skips seeding - entirely when the source-tree ``tools/`` directory cannot be located - (e.g. when Hive is installed from a wheel rather than a checkout). - - Returns the list of names that were newly registered. + Returns the list of newly-registered server names so the CLI can + print them. Same idempotent seeding logic as ``initialize()``. """ - self.initialize() + self._bootstrap_io() + return self._seed_defaults() + def _seed_defaults(self) -> list[str]: + """Idempotently register the bundled default local servers. + + Skips entirely when the source-tree ``tools/`` directory cannot + be located (e.g. wheel installs). Returns the list of names that + were newly registered. + """ # parents: [0]=loader, [1]=framework, [2]=core, [3]=repo root tools_dir = Path(__file__).resolve().parents[3] / "tools" if not tools_dir.is_dir(): logger.debug( - "MCPRegistry.ensure_defaults: tools dir %s missing; skipping default seed", + "MCPRegistry._seed_defaults: tools dir %s missing; skipping default seed", tools_dir, ) return [] @@ -119,7 +155,7 @@ class MCPRegistry: for canonical, stale in _STALE_DEFAULT_ALIASES.items(): if stale in existing and canonical not in existing: logger.info( - "MCPRegistry.ensure_defaults: removing stale alias '%s' (canonical: '%s')", + "MCPRegistry._seed_defaults: removing stale alias '%s' (canonical: '%s')", stale, canonical, ) @@ -142,7 +178,7 @@ class MCPRegistry: ) added.append(name) except MCPError as exc: - logger.warning("MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc) + logger.warning("MCPRegistry._seed_defaults: failed to seed '%s': %s", name, exc) if added: logger.info("MCPRegistry: seeded default local servers: %s", added) diff --git a/core/framework/pipeline/stages/mcp_registry.py b/core/framework/pipeline/stages/mcp_registry.py index 667dec23..386342c5 100644 --- a/core/framework/pipeline/stages/mcp_registry.py +++ b/core/framework/pipeline/stages/mcp_registry.py @@ -44,6 +44,9 @@ class McpRegistryStage(PipelineStage): from framework.loader.mcp_registry import MCPRegistry from framework.orchestrator.files import FILES_MCP_SERVER_NAME + # Bundled defaults (hive_tools / gcu-tools / files-tools / shell-tools) + # are seeded inside MCPRegistry.initialize(); resolve_for_agent below + # will find them even on a fresh HIVE_HOME. registry = MCPRegistry() mcp_loaded = False diff --git a/core/framework/skills/_preset_skills/terminal-tools-foundations/SKILL.md b/core/framework/skills/_preset_skills/terminal-tools-foundations/SKILL.md new file mode 100644 index 00000000..346ae6e0 --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-foundations/SKILL.md @@ -0,0 +1,139 @@ +--- +name: hive.terminal-tools-foundations +description: Required reading whenever any shell_* tool is available. Teaches the foreground/background dichotomy (terminal_exec auto-promotes past 30s, returns a job_id you poll with terminal_job_logs), the standard envelope shape (exit_code, stdout, stdout_truncated_bytes, output_handle, semantic_status, warning, auto_backgrounded, job_id), output handle pagination via terminal_output_get, when to read semantic_status instead of raw exit_code (grep/rg/find/diff/test exit 1 is NOT an error), the destructive-warning surface (rm -rf, git push --force, DROP TABLE), tool preference (use files-tools / gcu-tools / hive_tools before raw shell), and the bash-only-on-macOS policy. Skipping this leads to "tool returned no output" surprises, orphaned jobs, and panic over benign grep exit codes. +metadata: + author: hive + type: preset-skill + version: "1.0" +--- + +# terminal-tools — foundations + +These tools give you a real terminal: foreground exec with smart envelopes, background jobs with offset-based log streaming, persistent PTY shells, and filesystem search. Bash-only on POSIX. + +## Tool preference (read first) + +Before reaching for terminal-tools, check whether a higher-level tool already covers the task. Shell is for system operations the other servers don't reach. + +- **Reading files** → `files-tools.read_file` (handles size, paging, line-numbered output) — NOT `terminal_exec("cat ...")` +- **Editing files** → `files-tools.edit_file` (atomic patch with diff verification) — NOT `terminal_exec("sed -i ...")` +- **Writing files** → `files-tools.write_file` — NOT `terminal_exec("echo > ...")` +- **In-project search** → `files-tools.search_files` (project-scoped, code-aware) — use `terminal_rg` only for raw paths outside the project (`/var/log`, `/etc`) +- **Browser / web pages** → `gcu-tools.browser_*` for rendered pages — NOT `terminal_exec("curl ...")` +- **Web search** → `hive_tools.web_search` — NOT scraping +- **System operations** (process exec, jobs, PTYs, raw fs search) → terminal-tools. This is its territory. + +## The standard envelope + +Every spawn-style call (`terminal_exec`, the auto-promoted job state) returns this shape: + +```jsonc +{ + "exit_code": 0, // null when auto-backgrounded or pre-spawn error + "stdout": "...", // decoded, truncated to max_output_kb (default 256 KB) + "stderr": "...", + "stdout_truncated_bytes": 0, // > 0 means more is in output_handle + "stderr_truncated_bytes": 0, + "runtime_ms": 42, + "pid": 12345, + "output_handle": null, // "out_" when truncated — paginate with terminal_output_get + "timed_out": false, + "semantic_status": "ok", // "ok" | "signal" | "error" — read THIS, not just exit_code + "semantic_message": null, // e.g. "No matches found" for grep exit 1 + "warning": null, // e.g. "may force-remove files" for rm -rf + "auto_backgrounded": false, + "job_id": null // set when auto_backgrounded=true +} +``` + +## Auto-promotion (the core mental model) + +`terminal_exec` runs commands in the foreground until the **auto-background budget** (default 30s) elapses. Past that point, the process is silently transferred to a background job and the call returns immediately with: + +```jsonc +{ "auto_backgrounded": true, "exit_code": null, "job_id": "job_", ... } +``` + +When you see `auto_backgrounded: true`, **pivot to polling**. The job is still running: + +``` +terminal_job_logs(job_id, since_offset=0, wait_until_exit=true, wait_timeout_sec=60) + → blocks server-side until the job exits or the timeout, returns logs + status +``` + +You're not failing — you're freed up to do other work while the long task runs. + +To force pure-foreground (kill on `timeout_sec`), pass `auto_background_after_sec=0`. Use this when you genuinely don't want a background job (small commands where promotion would surprise you). + +## Semantic exit codes — read `semantic_status`, not raw `exit_code` + +Several common commands use exit 1 for legitimate non-error states: + +| Command | exit 0 | exit 1 | +|---|---|---| +| `grep` / `rg` | matches found | **no matches** (not an error) | +| `find` | success | **some dirs unreadable** (informational) | +| `diff` | identical | **files differ** (informational) | +| `test` / `[` | true | **false** (informational) | + +For these, `semantic_status` will be `"ok"` even when `exit_code == 1`, with `semantic_message` describing why ("No matches found"). For everything else, `semantic_status` defaults to `"ok"` on 0 and `"error"` on nonzero. + +**Rule**: always check `semantic_status` first. Only fall back to `exit_code` when you need the exact number (e.g. distinguishing `make` errors). + +## Destructive warnings — re-read your command + +The envelope's `warning` field is set when the command matches a known destructive pattern (`rm -rf`, `git push --force`, `git reset --hard`, `DROP TABLE`, `kubectl delete`, `terraform destroy`, etc.). The command **still ran** — the warning is informational. Use it as a "did I mean to do that?" prompt before trusting subsequent steps that depend on the side effect. + +If a `warning` appears unexpectedly, stop and verify: was the destructive action intended, or did a path/glob slip in? + +## Output handles — never lose output + +When `stdout_truncated_bytes > 0` or `stderr_truncated_bytes > 0`, the inline output was capped at `max_output_kb` (default 256 KB). The full bytes are stashed under `output_handle` for **5 minutes**. Paginate with: + +``` +terminal_output_get(output_handle, since_offset=0, max_kb=64) + → { data, offset, next_offset, eof, expired } +``` + +Track `next_offset` across calls. If `expired: true`, re-run the command (the handle's TTL has lapsed). + +The store has a 64 MB cap with LRU eviction. For huge outputs, prefer `terminal_job_start` + `terminal_job_logs` polling (4 MB ring buffer per stream, infinite total throughput). + +## Bash, not zsh — even on macOS + +`terminal_exec` and `terminal_pty_open` always invoke `/bin/bash`. The user's `$SHELL` is ignored. Explicit `shell="/bin/zsh"` is **rejected** with a clear error. This is a deliberate security stance, not aesthetic — zsh has command/builtin classes (`zmodload`, `=cmd` expansion, `zpty`, `ztcp`, `zf_*`) that bypass bash-shaped checks. The `terminal-tools-pty-sessions` skill explains the implications for PTY sessions specifically. + +`ZDOTDIR` and `ZSH_*` env vars are stripped before exec to prevent zsh dotfiles leaking in. Bash dotfiles still apply when invoked interactively (e.g. PTY sessions use `bash --norc --noprofile` to keep things predictable). + +## Pipelines and complex commands + +Pipes (`|`), redirects (`>`, `<`, `>>`), conditionals (`&&`, `||`, `;`), and globs (`*`, `?`, `[`) are detected automatically. You can pass them with the default `shell=False` and the runtime will transparently route through `/bin/bash -c` and surface `auto_shell: true` in the envelope: + +``` +terminal_exec("ps aux | sort -k3 -rn | head -40") + → { exit_code: 0, stdout: "...", auto_shell: true, ... } +``` + +For simple argv commands (no metacharacters) `shell=False` is faster and direct-execs the binary. For commands with shell features but no metacharacters that the detector catches (rare — exotic bash builtins, here-strings), pass `shell=True` explicitly: + +``` +terminal_exec("set -e; complicated bash logic", shell=True) +``` + +Quoted strings work either way — the detector uses `shlex.split` which handles `"quoted args with spaces"` correctly. + +## When to use what (cheat sheet) + +| Need | Tool | +|---|---| +| One-shot command, ≤30s | `terminal_exec` | +| One-shot command, might be longer | `terminal_exec` (auto-promotes) | +| Long-running job from the start | `terminal_job_start` | +| State across calls (cd, env, REPL) | `terminal_pty_open` + `terminal_pty_run` | +| Search file contents (raw paths) | `terminal_rg` | +| Find files by predicate | `terminal_find` | +| Retrieve truncated output | `terminal_output_get` | +| Tree / stat / du | `terminal_exec("ls -la"/"stat foo"/"du -sh path")` | +| HTTP / DNS / ping / archives | `terminal_exec("curl ..."/"dig ..."/"tar xzf ...")` | + +See `references/exit_codes.md` for the full POSIX + signal-induced + semantic catalog. diff --git a/core/framework/skills/_preset_skills/terminal-tools-foundations/references/exit_codes.md b/core/framework/skills/_preset_skills/terminal-tools-foundations/references/exit_codes.md new file mode 100644 index 00000000..adcd8c1b --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-foundations/references/exit_codes.md @@ -0,0 +1,50 @@ +# Exit code reference + +## POSIX conventions + +| Code | Meaning | +|---|---| +| 0 | Success | +| 1 | General error / catchall | +| 2 | Misuse of shell builtins, syntax error | +| 126 | Command found but not executable | +| 127 | Command not found | +| 128 | Invalid argument to `exit` | +| 128 + N | Killed by signal N | +| 130 | Killed by SIGINT (Ctrl-C) | +| 137 | Killed by SIGKILL | +| 143 | Killed by SIGTERM | +| 255 | Exit status out of range | + +When `exit_code < 0` in the envelope, the process was killed by a signal: `abs(exit_code)` is the signal number (subprocess uses negative codes for signaled exits, separate from the `128 + N` shell convention). + +## Semantic exits — when exit 1 is NOT an error + +terminal-tools encodes these in `semantic_status`. The agent should read `semantic_status` first. + +| Command | Code 0 | Code 1 | Code ≥2 | +|---|---|---|---| +| `grep` / `rg` / `ripgrep` | matches found | **no matches** (ok) | error | +| `find` | success | **some dirs unreadable** (ok) | error | +| `diff` | files identical | **files differ** (ok) | error | +| `test` / `[` | condition true | **condition false** (ok) | error | + +For any command not in this table, the default convention applies (0 = ok, nonzero = error). + +## When `exit_code` is `null` + +- `auto_backgrounded: true` — the process is still running under a `job_id`. Poll with `terminal_job_logs`. +- Pre-spawn error (command not found, exec failed) — see `error` field in the envelope. +- `timed_out: true` and the process refused to die — extremely rare; the kernel has the answer. + +## Common signal-induced exits + +| Signal | Number | Subprocess exit | Shell exit | Meaning | +|---|---|---|---|---| +| SIGHUP | 1 | -1 | 129 | Terminal hangup | +| SIGINT | 2 | -2 | 130 | Interrupt (Ctrl-C) | +| SIGQUIT | 3 | -3 | 131 | Quit (Ctrl-\\) | +| SIGKILL | 9 | -9 | 137 | Forced kill (uncatchable) | +| SIGTERM | 15 | -15 | 143 | Polite termination | +| SIGSEGV | 11 | -11 | 139 | Segmentation fault | +| SIGABRT | 6 | -6 | 134 | Abort (assertion failed, etc.) | diff --git a/core/framework/skills/_preset_skills/terminal-tools-fs-search/SKILL.md b/core/framework/skills/_preset_skills/terminal-tools-fs-search/SKILL.md new file mode 100644 index 00000000..e2585f4c --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-fs-search/SKILL.md @@ -0,0 +1,96 @@ +--- +name: hive.terminal-tools-fs-search +description: Use terminal_rg / terminal_find when you need raw filesystem search outside the project tree — system configs, /var/log, /etc, archive contents — or when files-tools.search_files is too project-scoped. Teaches the rg vs find vs terminal_exec("ls/du/tree") split, common rg flag combos for code/logs/configs, find predicates for mtime/size/type queries, and the rule that for tree views or single-file stat info you should just use terminal_exec instead of inventing a tool. Read before reaching for raw shell to grep or find anything. +metadata: + author: hive + type: preset-skill + version: "1.0" +--- + +# Filesystem search + +terminal-tools provides two structured search tools: `terminal_rg` (ripgrep for content) and `terminal_find` (find for predicates). Everything else (tree, stat, du) is just `terminal_exec`. + +## When to use what + +| Task | Tool | +|---|---| +| Find code/text matching a pattern in your **project** | `files-tools.search_files` (project-aware, ranks by relevance) | +| Find code/text matching a pattern in `/var/log`, `/etc`, archives, system dirs | `terminal_rg` | +| Find files matching name/glob/predicate | `terminal_find` | +| List a directory | `terminal_exec("ls -la /path")` | +| Tree view | `terminal_exec("tree -L 2 /path")` | +| Single-path stat | `terminal_exec("stat /path")` | +| Disk usage | `terminal_exec("du -sh /path")` or `terminal_exec("du -h --max-depth=2 /")` | +| Count matches across files | `terminal_rg(pattern, count=True via extra_args=["-c"])` | + +## `terminal_rg` — content search + +ripgrep is fast, gitignore-aware, and has a deep flag surface. The structured wrapper exposes the most useful flags directly; `extra_args` covers the rest. + +### Common patterns + +``` +# All Python files containing "TODO" +terminal_rg(pattern="TODO", path=".", type_filter="py") + +# Case-insensitive, with context +terminal_rg(pattern="error", path="/var/log", ignore_case=True, context=2) + +# Search hidden files (rg ignores them by default) +terminal_rg(pattern="api_key", path="~", hidden=True) + +# Don't respect .gitignore (find files git would ignore) +terminal_rg(pattern="generated", path=".", no_ignore=True) + +# Multi-line pattern (e.g., function definitions spanning lines) +terminal_rg(pattern=r"def\s+\w+\(.*\n.*\n", path="src", extra_args=["--multiline"]) + +# Specific filename glob +terminal_rg(pattern="version", path=".", glob="*.toml") +``` + +### rg flag idioms + +| Flag | Effect | +|---|---| +| `-tpy` (`type_filter="py"`) | Only Python files | +| `-uu` | Don't respect any ignores (incl. `.git/`) | +| `--multiline` (`extra_args`) | Allow regex spanning lines | +| `--max-count` (`max_count`) | Stop after N matches per file | +| `--max-depth` (`max_depth`) | Limit recursion | +| `-w` (`extra_args`) | Whole word match | +| `-F` (`extra_args`) | Fixed string (no regex) | + +See `references/ripgrep_cheatsheet.md` for the long form. + +## `terminal_find` — predicate search + +`find` excels at "files matching N criteria". The wrapper surfaces the most common predicates; combine via the structured arguments. + +``` +# All .log files modified in the last 7 days, larger than 1MB +terminal_find(path="/var/log", iname="*.log", mtime_days=7, size_kb_min=1024) + +# All directories named ".git" (find Git repos under a tree) +terminal_find(path="~/projects", name=".git", type_filter="d") + +# Only the top three levels +terminal_find(path="/etc", max_depth=3, type_filter="f") + +# Symlinks +terminal_find(path=".", type_filter="l") +``` + +See `references/find_predicates.md` for combinations not directly exposed. + +## Output truncation + +Both tools return `truncated: true` when their output exceeded the inline cap. For `terminal_rg`, this means matches were dropped (refine the pattern or narrow the path); for `terminal_find`, results past `max_results` (default 1000) are dropped. Tighten predicates rather than raising the cap. + +## Anti-patterns + +- **Don't `terminal_rg` your project tree** — `files-tools.search_files` is project-aware and ranks results. +- **Don't reach for `terminal_find` to list one directory** — `terminal_exec("ls -la /path")` is shorter. +- **Don't use `terminal_exec("grep ...")`** when `terminal_rg` exists — rg is faster, gitignore-aware, and returns structured matches. +- **Don't use `terminal_exec("find ...")`** to invent your own predicate combinations — use `terminal_find` and report missing capabilities. diff --git a/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/find_predicates.md b/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/find_predicates.md new file mode 100644 index 00000000..72322b6f --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/find_predicates.md @@ -0,0 +1,78 @@ +# find predicate reference + +The `terminal_find` wrapper exposes name/iname, type, mtime_days, size bounds, max_depth, max_results. For combinations beyond that, drop to `terminal_exec("find ...")`. + +## Time predicates + +| Need | find predicate | +|---|---| +| Modified within N days | `-mtime -N` (wrapper: `mtime_days=N`) | +| Modified more than N days ago | `-mtime +N` | +| Modified exactly N days ago | `-mtime N` | +| Accessed within N days | `-atime -N` | +| Inode changed within N days | `-ctime -N` | +| Modified in last N minutes | `-mmin -N` | +| Newer than reference file | `-newer ref` | + +## Size predicates + +| Need | find predicate | +|---|---| +| Bigger than N kilobytes | `-size +Nk` (wrapper: `size_kb_min`) | +| Smaller than N kilobytes | `-size -Nk` (wrapper: `size_kb_max`) | +| Exactly N kilobytes | `-size Nk` | +| Bigger than N megabytes | `-size +NM` | +| Empty files | `-empty` | + +## Type predicates + +| Need | find predicate | +|---|---| +| Regular file | `-type f` (wrapper: `type_filter="f"`) | +| Directory | `-type d` (wrapper: `type_filter="d"`) | +| Symlink | `-type l` (wrapper: `type_filter="l"`) | +| Block device | `-type b` | +| Character device | `-type c` | +| FIFO | `-type p` | +| Socket | `-type s` | + +## Permission predicates + +| Need | find predicate | +|---|---| +| Owned by user | `-user alice` | +| Owned by group | `-group dev` | +| Permission bits exact | `-perm 644` | +| Has any of these bits | `-perm /u+x` | +| Has all of these bits | `-perm -u+x` | +| Readable by current user | `-readable` | +| Writable | `-writable` | +| Executable | `-executable` | + +## Composing + +`find` evaluates predicates left-to-right with implicit AND. For OR, use `\(`...\` or . + +``` +# .log OR .txt (drop to terminal_exec for OR) +terminal_exec(r"find /path \( -name '*.log' -o -name '*.txt' \) -type f", shell=True) + +# NOT in a directory called node_modules +terminal_exec("find . -path '*/node_modules' -prune -o -name '*.js' -print", shell=True) +``` + +## Actions + +| Need | predicate | +|---|---| +| Print path (default) | (implicit `-print`) | +| Print null-separated | `-print0` (for piping to xargs -0) | +| Delete | `-delete` (DANGEROUS — use terminal_exec with explicit confirmation) | +| Run command per match | `-exec cmd {} \;` (drop to terminal_exec) | +| Run command, batched | `-exec cmd {} +` | + +## When NOT to use find + +- **One directory listing**: `terminal_exec("ls -la /path")` +- **Recursive grep**: `terminal_rg` +- **Count files**: `terminal_exec("find /path -type f | wc -l")` diff --git a/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/ripgrep_cheatsheet.md b/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/ripgrep_cheatsheet.md new file mode 100644 index 00000000..71eac8fa --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-fs-search/references/ripgrep_cheatsheet.md @@ -0,0 +1,70 @@ +# ripgrep cheatsheet + +For when the structured `terminal_rg` flags don't cover the case. Pass via `extra_args=[...]`. + +## Filtering + +| Need | Flag | +|---|---| +| Whole word | `-w` | +| Fixed string (no regex) | `-F` | +| Match files only (paths, not lines) | `-l` | +| Count matches per file | `-c` | +| Print only filenames with no matches | `--files-without-match` | +| Exclude binary files | (default) | +| Include binaries | `--binary` | +| Search archives transparently | (rg doesn't — extract first) | + +## Output shape + +| Need | Flag | +|---|---| +| Show only matched part | `-o` | +| Show byte offset of match | `-b` | +| No filename prefix | `-N` (or pipe through awk) | +| Color always (for piping into a colorizer) | `--color=always` | +| JSON output | (the wrapper already uses `--json` internally) | + +## Boundaries + +| Need | Flag | +|---|---| +| Line-by-line (default) | (default) | +| Multi-line regex | `--multiline` (or `-U`) | +| Multi-line dotall (`.` matches `\n`) | `--multiline-dotall` | +| Crlf line endings | `--crlf` | + +## Path control + +| Need | Flag | +|---|---| +| Follow symlinks | `-L` | +| Don't follow | (default) | +| Search hidden | `-.` (also expressed as `hidden=True`) | +| Don't respect any ignores | `-uuu` | +| Glob include | `-g 'pattern'` (also `glob="..."`) | +| Glob exclude | `-g '!pattern'` | + +## Performance + +| Need | Flag | +|---|---| +| One thread | `-j 1` | +| Smaller mmap chunks | `--mmap` (default behavior usually fine) | +| Per-file match cap | `-m N` (also `max_count=N`) | + +## Common composed queries + +``` +# Find unused imports in Python +terminal_rg(pattern=r"^import\s+\w+$", path="src", type_filter="py") + +# All TODO/FIXME/XXX with file:line +terminal_rg(pattern=r"\b(TODO|FIXME|XXX)\b", path=".", extra_args=["-n"]) + +# Functions defined at module top-level +terminal_rg(pattern=r"^def\s+\w+", path=".", type_filter="py") + +# Lines that DON'T match a pattern (filtered through awk) +# rg can't invert at line level; use terminal_exec with grep -v +``` diff --git a/core/framework/skills/_preset_skills/terminal-tools-job-control/SKILL.md b/core/framework/skills/_preset_skills/terminal-tools-job-control/SKILL.md new file mode 100644 index 00000000..d7928ee0 --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-job-control/SKILL.md @@ -0,0 +1,110 @@ +--- +name: hive.terminal-tools-job-control +description: Use when launching anything that runs longer than a minute, anything that streams logs, anything you want to keep running while doing other work — or when terminal_exec auto-backgrounded on you and returned a job_id. Teaches the start→poll→wait pattern with terminal_job_logs offset bookkeeping, the `wait_until_exit=True` blocking-poll idiom, the truncated_bytes_dropped resumption signal, the merge_stderr decision, the SIGINT→SIGTERM→SIGKILL escalation ladder via terminal_job_manage, and the hard rule that jobs die when the terminal-tools server restarts. Read before calling terminal_job_start, or right after terminal_exec auto-backgrounded. +metadata: + author: hive + type: preset-skill + version: "1.0" +--- + +# Background job control + +Background jobs are how you do things that take time without blocking your conversation. Three tools cover the surface: `terminal_job_start`, `terminal_job_logs`, `terminal_job_manage`. + +## When to use a job + +- Builds, deploys, long tests +- Processes you want to monitor (streaming a log file, a dev server) +- Anything that auto-backgrounded from `terminal_exec` (you have a `job_id`; pivot to this skill's idioms) + +For one-shot work expected to finish quickly, `terminal_exec` is simpler. The auto-promotion mechanic in `terminal_exec` is your safety net — start with `terminal_exec`, take over with this skill if needed. + +## Lifecycle + +``` +terminal_job_start(command, ...) + → { job_id, pid, started_at } + +terminal_job_logs(job_id, since_offset=0, max_bytes=64000) + → { data, offset, next_offset, status: "running"|"exited", exit_code, ... } + +# Repeat with since_offset = previous next_offset until status == "exited" +# Or block once with wait_until_exit=True: +terminal_job_logs(job_id, since_offset=N, wait_until_exit=True, wait_timeout_sec=60) + → blocks server-side until exit or timeout +``` + +After exit, the job is retained for inspection (`terminal_job_manage(action="list")`) until evicted by FIFO (50 most recent exits kept). + +## Offset bookkeeping — the only rule that matters + +The job's output lives in a 4 MB ring buffer per stream. Each call to `terminal_job_logs` returns: + +- `data` — bytes between `since_offset` and `next_offset` +- `next_offset` — pass this as `since_offset` on your next call +- `truncated_bytes_dropped` — non-zero when your `since_offset` was older than the ring's floor (you fell behind) + +**Always carry `next_offset` forward.** Don't replay from 0 — that's an offset reset, you'll see the same data twice and miss the part that fell off. + +When `truncated_bytes_dropped > 0`, the buffer evicted N bytes between your last call and now. Treat it as a signal that the job is producing output faster than you're consuming. Either poll more often or accept the gap and read from `next_offset` going forward. + +## merge_stderr — interleaved or separate + +``` +merge_stderr=False → two streams, request "stdout" or "stderr" by name +merge_stderr=True → one stream ("merged"), order preserved +``` + +Pick `merge_stderr=True` when: +- The job's logs are designed to be read together (most servers, build tools) +- You don't need to distinguish "this was stderr" + +Pick `merge_stderr=False` when: +- stderr is genuinely error-only and stdout is data +- You'll process them differently + +## Signal escalation + +``` +terminal_job_manage(action="signal_int", job_id=...) # graceful (Ctrl-C-equivalent) +terminal_job_manage(action="signal_term", job_id=...) # polite kill (SIGTERM) +terminal_job_manage(action="signal_kill", job_id=...) # forced kill (SIGKILL, uncatchable) +``` + +The idiom: `signal_int` → wait 2-5s → `signal_term` → wait 2-5s → `signal_kill`. Most well-behaved processes handle SIGINT (graceful) and SIGTERM (cleanup, then exit). SIGKILL bypasses cleanup — use only when the process is truly unresponsive. + +After signaling, check exit with `terminal_job_logs(job_id, wait_until_exit=True, wait_timeout_sec=2)`. + +## Stdin + +``` +terminal_job_manage(action="stdin", job_id=..., data="some input\n") +terminal_job_manage(action="close_stdin", job_id=...) +``` + +For tools that read stdin to EOF, `close_stdin` after writing flushes them. For interactive tools that read line-by-line, just write each line. + +## Take-over: when terminal_exec auto-backgrounds + +When `terminal_exec` returned `auto_backgrounded: true, job_id: `, the process is **already** in the JobManager with its output flowing into the ring buffer. Your transition is seamless: + +``` +# Already saw the start of output in terminal_exec's stdout/stderr. +# Pick up reading where the env left off — use the byte count of the +# initial stdout as your since_offset, OR just request tail output: +terminal_job_logs(job_id="job_xxx", tail=True, max_bytes=64000) +``` + +Or block until exit and grab everything: + +``` +terminal_job_logs(job_id="job_xxx", since_offset=0, wait_until_exit=True, wait_timeout_sec=120) +``` + +## Hard rules + +- **Jobs die when the server restarts.** The desktop runtime restarts terminal-tools when Hive restarts. There's no re-attach. If you need durability, use `nohup` + `terminal_exec` to detach into the system's process tree and track the PID yourself. +- **Server-wide hard cap on concurrent jobs** (`TERMINAL_TOOLS_MAX_JOBS`, default 32). Past the cap, `terminal_job_start` returns an error. Wait for jobs to exit or kill old ones. +- **No cross-restart output.** Output handles and ring buffers are in-memory only. + +See `references/signals.md` for the full signal catalog. diff --git a/core/framework/skills/_preset_skills/terminal-tools-job-control/references/signals.md b/core/framework/skills/_preset_skills/terminal-tools-job-control/references/signals.md new file mode 100644 index 00000000..ffa63b41 --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-job-control/references/signals.md @@ -0,0 +1,41 @@ +# Signal reference + +terminal_job_manage exposes six signals via the action name. + +| Action | Signal | Number | Purpose | Catchable? | +|---|---|---|---|---| +| `signal_int` | SIGINT | 2 | Interrupt — Ctrl-C equivalent. Most CLIs treat as "stop gracefully". | Yes | +| `signal_term` | SIGTERM | 15 | Polite termination request. Default for `kill`. | Yes | +| `signal_kill` | SIGKILL | 9 | Forced kill. Process can't catch, clean up, or finalize. Use sparingly. | **No** | +| `signal_hup` | SIGHUP | 1 | Hangup. Many daemons reload config on this. | Yes | +| `signal_usr1` | SIGUSR1 | 10 | User-defined #1. Common: dump state, rotate logs (nginx, etc). | Yes | +| `signal_usr2` | SIGUSR2 | 12 | User-defined #2. Common: graceful binary upgrade (unicorn, etc). | Yes | + +## Escalation idiom + +``` +1. signal_int (Ctrl-C — graceful) +2. wait 2-5s, check status with terminal_job_logs(wait_until_exit=True, wait_timeout_sec=3) +3. if still running: signal_term (cleanup-then-exit) +4. wait 2-5s +5. if still running: signal_kill (forced) +``` + +The waits matter: SIGTERM handlers do real work (flush logs, close DBs, release locks) and need time. Skipping straight to SIGKILL leaks resources. + +## When to use SIGUSR1 / SIGUSR2 + +These are application-defined. Read the target's docs first. Common: +- **nginx**: SIGUSR1 → reopen log files (for log rotation) +- **unicorn / puma**: SIGUSR2 → fork a new master with the latest binary (graceful restart) +- **rsync**: SIGUSR1 → print stats so far + +## Reading exit codes after a signal + +When a job exits via signal, `terminal_job_logs` returns `exit_code: -N` (subprocess convention) where `abs(N)` is the signal number. The shell convention `128 + N` doesn't apply to the JobManager — that's for shell-spawned children. + +| exit_code | Means | +|---|---| +| -2 | Killed by SIGINT | +| -9 | Killed by SIGKILL | +| -15 | Killed by SIGTERM | diff --git a/core/framework/skills/_preset_skills/terminal-tools-pty-sessions/SKILL.md b/core/framework/skills/_preset_skills/terminal-tools-pty-sessions/SKILL.md new file mode 100644 index 00000000..a59fa2c3 --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-pty-sessions/SKILL.md @@ -0,0 +1,127 @@ +--- +name: hive.terminal-tools-pty-sessions +description: Use when you need state across calls — building env vars, navigating with cd, driving REPLs (python -i, mysql, psql, node), or responding to interactive prompts (sudo password, ssh host-key confirmation, mysql connection). Teaches the prompt-sentinel exec pattern (default mode), raw I/O for REPLs (raw_send=True then read_only=True), the one-in-flight-per-session rule, and the close-or-leak-against-the-cap discipline. Bash on macOS — never zsh; explicit shell=/bin/zsh is rejected. Read before calling terminal_pty_open. +metadata: + author: hive + type: preset-skill + version: "1.0" +--- + +# Persistent PTY sessions + +PTY sessions are how you talk to interactive programs — programs that detect a terminal (`isatty()`) and behave differently when they don't see one. Use a session when: + +- You need state to persist across calls (`cd`, env vars, sourced scripts) +- You're driving a REPL (`python -i`, `mysql`, `psql`, `node`, `irb`) +- A program demands an interactive prompt (`sudo`, `ssh`, `npm login`, `gh auth login`) + +For everything else, `terminal_exec` is simpler. Sessions cost more (per-session bash process, ring buffer, idle-reaping bookkeeping) and have a hard cap (`TERMINAL_TOOLS_MAX_PTY`, default 8). + +## Why PTY (and not subprocess pipes) + +Subprocess pipes break on every interactive program. The moment a program calls `isatty()` and sees False, it disables prompts, color, line-editing, password masking, progress bars — sometimes refuses to start. PTY makes us look like a real terminal so these programs work the same as in your shell. + +The cost: PTY output includes terminal escape codes (cursor moves, color codes). The session captures them as-is; if you need clean text, strip ANSI escapes in your processing layer. + +## Bash on macOS — by deliberate policy + +`terminal_pty_open` always invokes `/bin/bash`, regardless of the user's `$SHELL`. macOS users: yes, even when zsh is your interactive default. This is the **terminal-tools-foundations** policy applied to PTYs. + +Reasons: +- zsh has command/builtin classes (`zmodload`, `=cmd` expansion, `zpty`, `ztcp`) that bypass bash-shaped security checks +- One shell behavior across platforms eliminates "works on Linux, breaks on macOS" surprises +- Bash is universal: any shell you've used will accept the bash subset + +The bash invocation uses `--norc --noprofile` so user dotfiles don't leak in. PS1 is set to a unique sentinel for prompt detection. PS2 is empty. PROMPT_COMMAND is empty. + +## Three modes of `terminal_pty_run` + +### 1. Default: send command, wait for prompt sentinel + +``` +terminal_pty_run(session_id, command="ls -la") + → { output, prompt_after: True, ... } +``` + +The session writes `ls -la\n`, waits for the sentinel that its custom PS1 emits, returns the slice between submission and prompt. **One in-flight call per session** — a concurrent call returns a `"session busy"` error. + +### 2. raw_send: send raw input, no waiting + +``` +terminal_pty_run(session_id, command="print('hi')\n", raw_send=True) + → { bytes_sent: 12 } +``` + +For REPLs, vim keystrokes, password prompts. The session writes the bytes and returns immediately — it doesn't wait for a prompt (REPLs don't print bash's prompt; they print their own). + +After a `raw_send`, you typically follow with: + +### 3. read_only: drain currently-buffered output + +``` +terminal_pty_run(session_id, read_only=True, timeout_sec=2) + → { output: "hi\n", more: False, ... } +``` + +Reads whatever the session has accumulated since the last drain, with a brief settle window. Use after raw_send to capture the REPL's response. + +## Custom prompt detection (`expect`) + +When the command launches a program with its own prompt (Python REPL's `>>> `, mysql's `mysql> `, sudo's password prompt), the bash sentinel won't appear until the program exits. Override: + +``` +terminal_pty_run(session_id, command="python3", expect=r">>>\s*$", timeout_sec=10) + → output up to and including ">>>", then control returns +``` + +For sudo: + +``` +terminal_pty_run(session_id, command="sudo -k && sudo whoami", expect=r"[Pp]assword:") +terminal_pty_run(session_id, command="", raw_send=True, command="\n") +terminal_pty_run(session_id, read_only=True, timeout_sec=5) +``` + +(Treat passwords carefully — they end up in the ring buffer.) + +## Always close + +``` +terminal_pty_close(session_id) +``` + +Leaked sessions count against `TERMINAL_TOOLS_MAX_PTY` (default 8). Idle reaping happens lazily on every `_open` call (sessions inactive longer than `idle_timeout_sec`, default 1800s, are dropped) — but don't rely on it. Close when you're done. + +For unresponsive sessions, `force=True` skips the graceful "exit" attempt and goes straight to SIGTERM/SIGKILL. + +## Common patterns + +### Stateful navigation + +``` +sid = terminal_pty_open(cwd="/") +terminal_pty_run(sid, command="cd /var/log") +terminal_pty_run(sid, command="ls -la *.log | head") +terminal_pty_close(sid) +``` + +### Python REPL + +``` +sid = terminal_pty_open() +terminal_pty_run(sid, command="python3", expect=r">>>\s*$") +terminal_pty_run(sid, command="x = 42", raw_send=True) +terminal_pty_run(sid, command="print(x*x)\n", raw_send=True) +result = terminal_pty_run(sid, read_only=True) # → "1764\n>>> " +terminal_pty_run(sid, command="exit()", raw_send=True) +terminal_pty_close(sid) +``` + +### ssh with host-key prompt + +``` +sid = terminal_pty_open() +terminal_pty_run(sid, command="ssh user@new-host", expect=r"\(yes/no.*\)\?") +terminal_pty_run(sid, command="yes\n", raw_send=True) +terminal_pty_run(sid, read_only=True, timeout_sec=10) # password prompt or login +``` diff --git a/core/framework/skills/_preset_skills/terminal-tools-troubleshooting/SKILL.md b/core/framework/skills/_preset_skills/terminal-tools-troubleshooting/SKILL.md new file mode 100644 index 00000000..867e97a7 --- /dev/null +++ b/core/framework/skills/_preset_skills/terminal-tools-troubleshooting/SKILL.md @@ -0,0 +1,92 @@ +--- +name: hive.terminal-tools-troubleshooting +description: Read when a terminal-tools call returned something surprising — empty stdout despite no error, exit_code is null, output_handle came back expired, "too many jobs" / "session busy" / "too many PTYs", warning was set unexpectedly, semantic_status disagrees with exit_code. Diagnostic recipes only — load on demand. Don't preload; the foundational skill covers the happy path. +metadata: + author: hive + type: preset-skill + version: "1.0" +--- + +# Troubleshooting terminal-tools + +Recipes for surprising results. Match the symptom to the section. + +## Empty `stdout` despite the command "should have" produced output + +Possible causes: +1. Output went to **stderr** instead. Check `stderr` in the envelope (or use `merge_stderr=True` for jobs). +2. Output was **fully truncated** because `max_output_kb` is too small. Check `stdout_truncated_bytes > 0`. Bump `max_output_kb` or paginate via `output_handle`. +3. Command produced no output (correct, just unexpected — `silent` flags, no matches). +4. Pipeline issue: the last stage of a pipe ran but stdout went elsewhere (`> /dev/null`, redirected via `2>&1`). +5. Process is buffering its output and didn't flush before exit. Add `stdbuf -oL` (line-buffered) or `unbuffer` to the command. + +## `exit_code: null` + +| Cause | Other field | +|---|---| +| Auto-backgrounded | `auto_backgrounded: true, job_id: ` | +| Hard timeout, process killed | `timed_out: true` | +| Pre-spawn failure (command not found) | `error: ...` set, `pid: null` | +| Still running (in `terminal_job_logs`) | `status: "running"` | + +## `output_handle` returned `expired: true` + +5-minute TTL. Either (a) you waited too long, or (b) the store evicted it under memory pressure (64 MB total cap, LRU eviction). Re-run the command. + +To reduce risk: paginate the handle as soon as you receive it, or use `terminal_job_*` for huge outputs (4 MB ring buffer with offsets — no expiry). + +## "too many jobs" / `JobLimitExceeded` + +`TERMINAL_TOOLS_MAX_JOBS` (default 32) hit. Either: +- Wait for jobs to exit (poll with `terminal_job_logs(wait_until_exit=True)`) +- Kill old jobs: `terminal_job_manage(action="list")` to see what's running, then `signal_term` the abandoned ones +- Raise the cap via env (rare) + +## "session busy" + +A `terminal_pty_run` was issued while another `_run` is in flight on the same session. PTY sessions are single-threaded conversations. Wait for the prior call to return, or open a second session. + +## "PTY cap reached" + +`TERMINAL_TOOLS_MAX_PTY` (default 8) hit. Close idle sessions (`terminal_pty_close`). Idle reaping is lazy; force it by opening — no, actually, opening throws when the cap is hit. Just close manually. + +## `warning` is set, the command worked + +Informational only. The pattern matched (e.g. `rm -rf` literally appears, or `git push --force` was used). The command ran. The warning is your "did I mean to do that?" prompt — verify the side effect was intended before continuing. + +## `semantic_status: "ok"` but `exit_code: 1` + +Working as designed. Some commands use exit 1 for legitimate non-error states: +- `grep` / `rg` exit 1 when **no matches** found +- `find` exit 1 when **some directories were unreadable** (typical on `/proc`, etc.) +- `diff` exit 1 when **files differ** +- `test` / `[` exit 1 when **condition is false** + +The `semantic_message` field explains. Trust `semantic_status`, not raw `exit_code`. + +## `semantic_status: "error"` but `exit_code: 0` + +Shouldn't happen. If it does, file a bug. + +## `truncated_bytes_dropped > 0` in `terminal_job_logs` + +Your `since_offset` was older than the ring buffer's floor — bytes evicted before you could read them. Either: +- Poll faster (lower latency between calls) +- Use `merge_stderr=True` (single 4 MB ring instead of 4 MB × 2) +- Accept the gap and move forward from `next_offset` + +## `terminal_pty_open` succeeds but the first `_run` times out + +The session may not have produced its first prompt sentinel within the 2-second startup window. Try: +- A `terminal_pty_run(sid, read_only=True, timeout_sec=2)` to drain whatever's accumulated +- A noop command (`terminal_pty_run(sid, command="true")`) to force a prompt cycle + +Could also indicate the bash process died at startup — `terminal_pty_run(sid, ...)` would then return `"session has exited"`. + +## `shell="/bin/zsh"` returned an error + +By design. terminal-tools is bash-only on POSIX. Use `shell=True` (default `/bin/bash`) or omit `shell=` to exec directly. + +## A command in `shell=True` is interpreted differently than expected + +Bash, not zsh, semantics. `**/*` doesn't recurse without `shopt -s globstar`; `=cmd` expansion doesn't work; arrays use `arr[idx]` not `${arr[idx]}` differently than zsh. When in doubt, the foundational skill's "bash, not zsh" section is the canonical statement. diff --git a/core/framework/skills/tool_gating.py b/core/framework/skills/tool_gating.py index 426d30b4..14d8751d 100644 --- a/core/framework/skills/tool_gating.py +++ b/core/framework/skills/tool_gating.py @@ -33,6 +33,7 @@ _BUNDLED_DIRS: tuple[Path, ...] = ( # (tool-name prefix, skill directory name, display name) _TOOL_GATED_SKILLS: list[tuple[str, str, str]] = [ ("browser_", "browser-automation", "hive.browser-automation"), + ("terminal_", "terminal-tools-foundations", "hive.terminal-tools-foundations"), ] _BODY_CACHE: dict[str, str] = {} diff --git a/tools/src/terminal_tools/__init__.py b/tools/src/terminal_tools/__init__.py new file mode 100644 index 00000000..e4e4708a --- /dev/null +++ b/tools/src/terminal_tools/__init__.py @@ -0,0 +1,43 @@ +"""terminal-tools — Terminal capabilities MCP server. + +Exposes ten tools (prefix ``terminal_*``) covering: + - Foreground exec with auto-promotion to background (``terminal_exec``) + - Background job lifecycle (``terminal_job_*``) + - Persistent PTY-backed bash sessions (``terminal_pty_*``) + - Filesystem search (``terminal_rg``, ``terminal_find``) + - Truncation handle retrieval (``terminal_output_get``) + +Bash-only on POSIX. zsh is rejected at the shell-resolver level. See +``common/limits.py:_resolve_shell`` for the single enforcement point. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +def register_terminal_tools(mcp: FastMCP) -> list[str]: + """Register all ten terminal-tools with the FastMCP server. + + Returns the list of registered tool names so the caller can log / + smoke-test how many landed. + """ + from terminal_tools.exec import register_exec_tools + from terminal_tools.jobs.tools import register_job_tools + from terminal_tools.output import register_output_tools + from terminal_tools.pty.tools import register_pty_tools + from terminal_tools.search.tools import register_search_tools + + register_exec_tools(mcp) + register_job_tools(mcp) + register_pty_tools(mcp) + register_search_tools(mcp) + register_output_tools(mcp) + + return [name for name in mcp._tool_manager._tools.keys() if name.startswith("terminal_")] + + +__all__ = ["register_terminal_tools"] diff --git a/tools/src/terminal_tools/common/__init__.py b/tools/src/terminal_tools/common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/src/terminal_tools/common/destructive_warning.py b/tools/src/terminal_tools/common/destructive_warning.py new file mode 100644 index 00000000..1afdeec2 --- /dev/null +++ b/tools/src/terminal_tools/common/destructive_warning.py @@ -0,0 +1,72 @@ +"""Detect potentially destructive commands and surface a warning string. + +Informational only — the warning is included in the exec envelope, not +used to block execution. Lets the agent re-read its command before +trusting the result of an irreversible action. Catalog ported from +claudecode's BashTool/destructiveCommandWarning.ts. +""" + +from __future__ import annotations + +import re +from collections.abc import Sequence + +_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = ( + # Git — data loss / hard to reverse + (re.compile(r"\bgit\s+reset\s+--hard\b"), "may discard uncommitted changes"), + ( + re.compile(r"\bgit\s+push\b[^;&|\n]*[ \t](--force|--force-with-lease|-f)\b"), + "may overwrite remote history", + ), + ( + re.compile(r"\bgit\s+clean\b(?![^;&|\n]*(?:-[a-zA-Z]*n|--dry-run))[^;&|\n]*-[a-zA-Z]*f"), + "may permanently delete untracked files", + ), + (re.compile(r"\bgit\s+checkout\s+(--\s+)?\.[ \t]*($|[;&|\n])"), "may discard all working tree changes"), + (re.compile(r"\bgit\s+restore\s+(--\s+)?\.[ \t]*($|[;&|\n])"), "may discard all working tree changes"), + (re.compile(r"\bgit\s+stash[ \t]+(drop|clear)\b"), "may permanently remove stashed changes"), + ( + re.compile(r"\bgit\s+branch\s+(-D[ \t]|--delete\s+--force|--force\s+--delete)\b"), + "may force-delete a branch", + ), + # Git — safety bypass + (re.compile(r"\bgit\s+(commit|push|merge)\b[^;&|\n]*--no-verify\b"), "may skip safety hooks"), + (re.compile(r"\bgit\s+commit\b[^;&|\n]*--amend\b"), "may rewrite the last commit"), + # File deletion — most specific patterns first so the warning is descriptive + ( + re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*[rR][a-zA-Z]*f|(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*f[a-zA-Z]*[rR]"), + "may recursively force-remove files", + ), + (re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*[rR]"), "may recursively remove files"), + (re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*f"), "may force-remove files"), + # Database + ( + re.compile(r"\b(DROP|TRUNCATE)\s+(TABLE|DATABASE|SCHEMA)\b", re.IGNORECASE), + "may drop or truncate database objects", + ), + (re.compile(r"\bDELETE\s+FROM\s+\w+[ \t]*(;|\"|'|\n|$)", re.IGNORECASE), "may delete rows from a database table"), + # Infrastructure + (re.compile(r"\bkubectl\s+delete\b"), "may delete Kubernetes resources"), + (re.compile(r"\bterraform\s+destroy\b"), "may destroy Terraform infrastructure"), +) + + +def get_warning(command: str | Sequence[str]) -> str | None: + """Return a warning string if the command matches a destructive pattern. + + For argv-style invocations (``command=["rm", "-rf", "/tmp/x"]``), we + join with spaces so the same regex catalog applies. Returns None + when nothing matches. + """ + if isinstance(command, (list, tuple)): + text = " ".join(str(c) for c in command) + else: + text = command + + for pattern, message in _PATTERNS: + if pattern.search(text): + return message + return None + + +__all__ = ["get_warning"] diff --git a/tools/src/terminal_tools/common/limits.py b/tools/src/terminal_tools/common/limits.py new file mode 100644 index 00000000..f97b3df6 --- /dev/null +++ b/tools/src/terminal_tools/common/limits.py @@ -0,0 +1,153 @@ +"""Shell resolution + resource limits. + +The single place that decides which shell binary we invoke and how to +strip zsh-specific environment leakage. Per the terminal-tools security +stance (see ``destructive_warning.py`` neighbours), zsh constructs +(``zmodload``, ``=cmd``, ``zpty``, ``ztcp``) bypass bash-shaped +checks — refusing zsh isn't aesthetic, it's a deliberate hardening +choice. +""" + +from __future__ import annotations + +import os +import resource +from collections.abc import Callable +from typing import Any + +# Env vars that influence zsh startup. Strip these before exec so a +# user with zsh dotfiles can't accidentally jam zsh behaviour into +# the bash subprocess. +_ZSH_ENV_PREFIXES: tuple[str, ...] = ("ZDOTDIR", "ZSH_") + + +class ZshRefused(ValueError): + """Raised when an explicit zsh shell is requested.""" + + +def _resolve_shell(shell: bool | str) -> str | None: + """Return the shell executable to use, or None for direct exec. + + - ``shell=False`` → None (caller should exec command directly) + - ``shell=True`` → ``/bin/bash`` always (ignores ``$SHELL``) + - ``shell="/bin/bash"`` or any path containing ``bash`` → that path + - ``shell="/bin/zsh"`` or any zsh-containing path → raises ZshRefused + + Caller is expected to invoke as ``[shell_path, "-c", command]``. + """ + if shell is False or shell is None: + return None + + if shell is True: + return "/bin/bash" + + if not isinstance(shell, str): + raise TypeError(f"shell must be bool or str, got {type(shell).__name__}") + + lower = shell.lower() + if "zsh" in lower: + raise ZshRefused( + f"shell={shell!r} rejected: terminal-tools is bash-only on POSIX. " + "Use shell=True (bash) or omit the shell parameter to exec directly. " + "This is a deliberate security stance — zsh has command/builtin " + "classes (zmodload, =cmd, zpty, ztcp) that bypass bash-shaped checks." + ) + + return shell + + +def sanitized_env(extra: dict[str, str] | None = None) -> dict[str, str]: + """Return os.environ with zsh-related vars stripped, plus optional overrides. + + Stripping ``ZDOTDIR`` and ``ZSH_*`` ensures zsh dotfiles don't leak + into the bash subprocess's startup. Bash dotfiles still apply when + the shell is invoked interactively. + """ + env = {k: v for k, v in os.environ.items() if not k.startswith(_ZSH_ENV_PREFIXES)} + if extra: + env.update(extra) + return env + + +# ── Resource limits ─────────────────────────────────────────────────── + + +# Maps the public limit name to its (resource constant, multiplier) +# tuple. Multipliers convert the agent-friendly unit (seconds, MB) to +# the kernel unit (seconds, bytes). +_LIMIT_MAP: dict[str, tuple[int, int]] = { + "cpu_sec": (resource.RLIMIT_CPU, 1), + "rss_mb": (resource.RLIMIT_AS, 1024 * 1024), + "fsize_mb": (resource.RLIMIT_FSIZE, 1024 * 1024), + "nofile": (resource.RLIMIT_NOFILE, 1), +} + + +def make_preexec_fn(limits: dict[str, int] | None) -> Callable[[], None] | None: + """Build a preexec_fn that applies setrlimit before exec. + + Returns None if no limits are configured (so subprocess.Popen can + skip the fork hook entirely). Unknown keys are ignored — agents + pass arbitrary dicts and we don't want a typo to crash exec. + """ + if not limits: + return None + + def _apply() -> None: + for key, value in limits.items(): + spec = _LIMIT_MAP.get(key) + if spec is None or value is None: + continue + rlimit_const, multiplier = spec + limit = int(value) * multiplier + try: + resource.setrlimit(rlimit_const, (limit, limit)) + except (OSError, ValueError): + # Hard limit may exceed the current ceiling. Best-effort: + # set just the soft limit to whatever we can. + try: + soft, hard = resource.getrlimit(rlimit_const) + resource.setrlimit(rlimit_const, (min(limit, hard), hard)) + except Exception: + pass + + return _apply + + +def coerce_limits(limits: Any) -> dict[str, int] | None: + """Validate and normalize a user-supplied limits dict. + + Accepts the four supported keys (``cpu_sec``, ``rss_mb``, + ``fsize_mb``, ``nofile``); silently drops unknown keys; returns + None when the result is empty. Negative or non-int values are + dropped too — invalid limits are better as no-ops than as errors, + since the agent didn't ask for enforcement of a *specific* + failure mode. + """ + if not limits: + return None + if not isinstance(limits, dict): + return None + + out: dict[str, int] = {} + for key in _LIMIT_MAP: + value = limits.get(key) + if value is None: + continue + try: + ivalue = int(value) + except (TypeError, ValueError): + continue + if ivalue <= 0: + continue + out[key] = ivalue + return out or None + + +__all__ = [ + "ZshRefused", + "_resolve_shell", + "coerce_limits", + "make_preexec_fn", + "sanitized_env", +] diff --git a/tools/src/terminal_tools/common/output_store.py b/tools/src/terminal_tools/common/output_store.py new file mode 100644 index 00000000..63fb1895 --- /dev/null +++ b/tools/src/terminal_tools/common/output_store.py @@ -0,0 +1,121 @@ +"""TTL-bounded output handle store. + +When an exec produces more output than the inline cap (default 256 KB), +the surplus is kept here under a short-lived handle. The agent passes +the handle to ``terminal_output_get`` to paginate the rest. Handles +expire after 5 minutes; total store size is capped at 64 MB with LRU +eviction so the server can't be DoS'd by a chatty subprocess. + +Thread-safe — exec/job code paths populate; the MCP request thread +drains. +""" + +from __future__ import annotations + +import secrets +import threading +import time +from dataclasses import dataclass, field + +_DEFAULT_TTL_SEC = 300 +_DEFAULT_TOTAL_CAP_BYTES = 64 * 1024 * 1024 + + +@dataclass(slots=True) +class _Entry: + data: bytes + created_at: float + last_accessed_at: float = field(default_factory=time.monotonic) + + +class OutputStore: + """LRU-with-TTL byte store keyed by opaque handle.""" + + def __init__( + self, + ttl_sec: int = _DEFAULT_TTL_SEC, + total_cap_bytes: int = _DEFAULT_TOTAL_CAP_BYTES, + ): + self._ttl = ttl_sec + self._cap = total_cap_bytes + self._entries: dict[str, _Entry] = {} + self._total_bytes = 0 + self._lock = threading.Lock() + + def put(self, data: bytes) -> str: + """Store ``data``, return a fresh handle. Evicts older entries + if the total cap would be exceeded.""" + if not data: + # Empty payloads don't need a handle. + return "" + handle = "out_" + secrets.token_hex(8) + now = time.monotonic() + with self._lock: + self._evict_locked(now) + # Reserve room for new entry; evict LRU until it fits. + while self._total_bytes + len(data) > self._cap and self._entries: + self._pop_lru_locked() + self._entries[handle] = _Entry(data=data, created_at=now, last_accessed_at=now) + self._total_bytes += len(data) + return handle + + def get(self, handle: str, since_offset: int = 0, max_bytes: int = 64 * 1024) -> dict: + """Retrieve a slice of stored data. + + Returns ``{data, offset, next_offset, eof, expired}`` so the + agent can paginate without separate calls. ``expired=True`` + when the handle is unknown or the TTL has lapsed. + """ + now = time.monotonic() + with self._lock: + self._evict_locked(now) + entry = self._entries.get(handle) + if entry is None: + return { + "data": "", + "offset": int(since_offset), + "next_offset": int(since_offset), + "eof": True, + "expired": True, + } + entry.last_accessed_at = now + buf = entry.data + + since = max(0, int(since_offset)) + end = min(len(buf), since + max(0, int(max_bytes))) + data_slice = buf[since:end] + return { + "data": data_slice.decode("utf-8", errors="replace"), + "offset": since, + "next_offset": end, + "eof": end >= len(buf), + "expired": False, + } + + # ── Eviction ────────────────────────────────────────────────── + + def _evict_locked(self, now: float) -> None: + # TTL eviction — anything past TTL goes. + stale = [h for h, e in self._entries.items() if now - e.created_at > self._ttl] + for h in stale: + entry = self._entries.pop(h, None) + if entry is not None: + self._total_bytes -= len(entry.data) + + def _pop_lru_locked(self) -> None: + if not self._entries: + return + oldest_handle = min(self._entries, key=lambda h: self._entries[h].last_accessed_at) + entry = self._entries.pop(oldest_handle) + self._total_bytes -= len(entry.data) + + +# Module-level singleton; the server has one instance per process. +_STORE = OutputStore() + + +def get_store() -> OutputStore: + return _STORE + + +__all__ = ["OutputStore", "get_store"] diff --git a/tools/src/terminal_tools/common/ring_buffer.py b/tools/src/terminal_tools/common/ring_buffer.py new file mode 100644 index 00000000..5bede88e --- /dev/null +++ b/tools/src/terminal_tools/common/ring_buffer.py @@ -0,0 +1,155 @@ +"""Bounded byte ring buffer with absolute monotonic offsets. + +The streaming primitive shared by jobs and PTY sessions. Writers push +bytes; readers ask for ``[since_offset, since_offset + N)`` and the +buffer either returns the data (if still in window) or signals how +many bytes were dropped from the floor. This lets the agent resume +after a missed poll without silent loss. + +Thread-safe via a single lock — readers and writers can come from +different threads (a pump thread fills it, the MCP request thread +drains it). +""" + +from __future__ import annotations + +import threading +from collections import deque +from dataclasses import dataclass + + +@dataclass(slots=True) +class ReadResult: + data: bytes + offset: int + next_offset: int + truncated_bytes_dropped: int # bytes lost between since_offset and the buffer floor + + +class RingBuffer: + """Capacity-bounded byte ring with absolute offsets. + + The total written count never resets; each call sees absolute + offsets growing monotonically. The on-disk window slides forward + once total_written exceeds capacity_bytes. + """ + + def __init__(self, capacity_bytes: int = 4 * 1024 * 1024): + if capacity_bytes <= 0: + raise ValueError("capacity_bytes must be positive") + self._capacity = capacity_bytes + self._chunks: deque[bytes] = deque() + self._buffered_bytes = 0 + self._floor = 0 # absolute offset of the oldest byte still in buffer + self._total_written = 0 + self._eof = False + self._lock = threading.Lock() + + # ── Writer side ─────────────────────────────────────────────── + + def write(self, data: bytes) -> None: + if not data: + return + with self._lock: + self._chunks.append(data) + self._buffered_bytes += len(data) + self._total_written += len(data) + self._evict_locked() + + def close(self) -> None: + """Mark the stream as ended. Subsequent reads will see eof=True + once they catch up to total_written.""" + with self._lock: + self._eof = True + + def _evict_locked(self) -> None: + while self._buffered_bytes > self._capacity and self._chunks: + head = self._chunks[0] + overshoot = self._buffered_bytes - self._capacity + if len(head) <= overshoot: + self._chunks.popleft() + self._buffered_bytes -= len(head) + self._floor += len(head) + else: + self._chunks[0] = head[overshoot:] + self._buffered_bytes -= overshoot + self._floor += overshoot + + # ── Reader side ─────────────────────────────────────────────── + + @property + def total_written(self) -> int: + with self._lock: + return self._total_written + + @property + def floor(self) -> int: + with self._lock: + return self._floor + + @property + def eof(self) -> bool: + with self._lock: + return self._eof + + def read(self, since_offset: int, max_bytes: int) -> ReadResult: + """Read up to ``max_bytes`` starting at ``since_offset``. + + - If ``since_offset`` is past total_written, returns empty data + (and ``next_offset == since_offset``, signaling caller to wait). + - If ``since_offset`` is below the buffer floor, the missed + bytes are reported as ``truncated_bytes_dropped`` and reading + starts from the floor. + """ + max_bytes = max(0, int(max_bytes)) + with self._lock: + since = max(0, int(since_offset)) + dropped = 0 + if since < self._floor: + dropped = self._floor - since + since = self._floor + + available = self._total_written - since + if available <= 0 or max_bytes == 0: + return ReadResult( + data=b"", + offset=since, + next_offset=since, + truncated_bytes_dropped=dropped, + ) + + to_take = min(available, max_bytes) + # Walk chunks to assemble [since, since+to_take) + cursor = self._floor + collected: list[bytes] = [] + remaining = to_take + for chunk in self._chunks: + chunk_end = cursor + len(chunk) + if chunk_end <= since: + cursor = chunk_end + continue + start_in_chunk = max(0, since - cursor) + end_in_chunk = min(len(chunk), start_in_chunk + remaining) + slice_ = chunk[start_in_chunk:end_in_chunk] + collected.append(slice_) + remaining -= len(slice_) + cursor = chunk_end + if remaining <= 0: + break + + data = b"".join(collected) + return ReadResult( + data=data, + offset=since, + next_offset=since + len(data), + truncated_bytes_dropped=dropped, + ) + + def tail(self, max_bytes: int) -> ReadResult: + """Read the last ``max_bytes`` (or as much as is buffered).""" + with self._lock: + start = max(self._floor, self._total_written - max(0, int(max_bytes))) + return self.read(start, max_bytes) + + +__all__ = ["RingBuffer", "ReadResult"] diff --git a/tools/src/terminal_tools/common/semantic_exit.py b/tools/src/terminal_tools/common/semantic_exit.py new file mode 100644 index 00000000..d24bc6c1 --- /dev/null +++ b/tools/src/terminal_tools/common/semantic_exit.py @@ -0,0 +1,103 @@ +"""Per-command exit-code semantics. + +Many commands use exit codes to convey information other than just +success/failure. ``grep`` returns 1 when no matches are found, which +is not an error. Encoding this lookup means the agent reads +``semantic_status`` instead of having to memorize per-command quirks. + +Catalog ported from claudecode's BashTool/commandSemantics.ts. We +inspect only the *final* command in a piped chain (its exit code is +what the shell propagates), and only when the command is run with +``shell=False`` (i.e. we know the argv). For ``shell=True`` we fall +back to default semantics — the heuristic of parsing a bash command +string for "the last command" is fragile and the upstream tool +already documents the issue. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +SemanticStatus = str # "ok" | "signal" | "error" + + +# Maps base command name → (exit_code → semantic). Returning +# (status, message) — message may be None for the success cases. +_SEMANTICS: dict[str, dict[int, tuple[SemanticStatus, str | None]]] = { + # grep: 0=matches, 1=no matches (not an error), 2+=error + "grep": {0: ("ok", None), 1: ("ok", "No matches found")}, + "rg": {0: ("ok", None), 1: ("ok", "No matches found")}, + "ripgrep": {0: ("ok", None), 1: ("ok", "No matches found")}, + # find: 0=success, 1=partial (some dirs unreadable), 2+=error + "find": {0: ("ok", None), 1: ("ok", "Some directories were inaccessible")}, + # diff: 0=identical, 1=differ (informational), 2+=error + "diff": {0: ("ok", None), 1: ("ok", "Files differ")}, + # test / [: 0=true, 1=false, 2+=error + "test": {0: ("ok", None), 1: ("ok", "Condition is false")}, + "[": {0: ("ok", None), 1: ("ok", "Condition is false")}, +} + + +def _base_command(command: str | Sequence[str]) -> str: + """Extract the base command (first word) from argv or a string. + + For shell=True strings, picks the *last* command in a pipeline since + that determines the propagated exit code. Heuristic and intentionally + not security-critical — only used to label the exit-code semantics. + """ + if isinstance(command, (list, tuple)): + return command[0] if command else "" + + if not isinstance(command, str): + return "" + + # Take the segment after the last unquoted pipe/&&/||/; — best-effort. + text = command + for sep in ("||", "&&", "|", ";"): + # Crude split — fine for the heuristic. + if sep in text: + text = text.split(sep)[-1] + + text = text.strip() + if not text: + return "" + first = text.split()[0] + # Strip a leading path: /usr/bin/grep → grep + return first.rsplit("/", 1)[-1] + + +def classify( + command: str | Sequence[str], + exit_code: int | None, + *, + timed_out: bool = False, + signaled: bool = False, +) -> tuple[SemanticStatus, str | None]: + """Classify an exit code with command-specific semantics. + + Returns (status, message) where status is one of "ok"/"signal"/"error" + and message is a short explanation when the status would otherwise + surprise the agent (e.g. ``grep`` exiting 1). + """ + if timed_out: + return ("error", "Command timed out") + if signaled: + return ("signal", f"Killed by signal (exit {exit_code})") + if exit_code is None: + return ("ok", "Still running") # auto-backgrounded case + + base = _base_command(command) + table = _SEMANTICS.get(base) + if table is not None: + if exit_code in table: + return table[exit_code] + # Beyond the catalog's known codes for this command, treat as error. + return ("error", f"Command failed with exit code {exit_code}") + + # Default: zero is success, nonzero is error. + if exit_code == 0: + return ("ok", None) + return ("error", f"Command failed with exit code {exit_code}") + + +__all__ = ["classify"] diff --git a/tools/src/terminal_tools/common/truncation.py b/tools/src/terminal_tools/common/truncation.py new file mode 100644 index 00000000..214e3642 --- /dev/null +++ b/tools/src/terminal_tools/common/truncation.py @@ -0,0 +1,107 @@ +"""Helpers to build the standard exec/job envelope with truncation. + +The envelope shape is documented in the foundational skill — keep +this module's output stable so skill updates don't have to chase +field renames. Callers pass raw bytes; we decode and trim. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from terminal_tools.common.destructive_warning import get_warning +from terminal_tools.common.output_store import get_store +from terminal_tools.common.semantic_exit import classify + + +def _truncate_bytes(buf: bytes, max_bytes: int) -> tuple[str, int, str]: + """Trim ``buf`` to ``max_bytes`` (decoded). Returns + ``(decoded_text, dropped_bytes, full_for_handle)``. We always store + the *original* bytes in the handle so the agent gets exactly what + the process emitted, even when truncation point split a multi-byte + char. + """ + if max_bytes < 0: + max_bytes = 0 + if len(buf) <= max_bytes: + return buf.decode("utf-8", errors="replace"), 0, buf.decode("utf-8", errors="replace") + + head = buf[:max_bytes] + return ( + head.decode("utf-8", errors="replace"), + len(buf) - max_bytes, + buf.decode("utf-8", errors="replace"), + ) + + +def build_exec_envelope( + *, + command: str | Sequence[str], + exit_code: int | None, + stdout_bytes: bytes, + stderr_bytes: bytes, + runtime_ms: int, + pid: int | None, + timed_out: bool, + signaled: bool = False, + max_output_kb: int = 256, + auto_backgrounded: bool = False, + job_id: str | None = None, + auto_shell: bool = False, +) -> dict: + """Construct the standard exec envelope. + + See ``terminal-tools-foundations`` SKILL for the field semantics. The + inline ``stdout``/``stderr`` are decoded and trimmed; if either + overflows ``max_output_kb`` the *full* bytes are stashed in the + output store under ``output_handle`` for retrieval via + ``terminal_output_get``. Both streams share the same handle (with + ``out_:stdout`` / ``out_:stderr`` suffixes) when both + overflow — the agent uses the suffix to pick a stream. + """ + max_bytes = max(1024, max_output_kb * 1024) + + stdout_text, stdout_dropped, stdout_full = _truncate_bytes(stdout_bytes, max_bytes) + stderr_text, stderr_dropped, stderr_full = _truncate_bytes(stderr_bytes, max_bytes) + + output_handle: str | None = None + if stdout_dropped > 0 or stderr_dropped > 0: + store = get_store() + # Stash whichever overflowed (or both, joined with a separator + # the foundational skill documents). For simplicity we always + # store both when either overflows so the agent can fetch the + # other stream in full too if it wants. + combined = ( + b"--- stdout ---\n" + + stdout_bytes + + b"\n--- stderr ---\n" + + stderr_bytes + ) + output_handle = store.put(combined) + + semantic_status, semantic_message = classify( + command, exit_code, timed_out=timed_out, signaled=signaled + ) + + warning = get_warning(command) + + return { + "exit_code": exit_code, + "stdout": stdout_text, + "stderr": stderr_text, + "stdout_truncated_bytes": stdout_dropped, + "stderr_truncated_bytes": stderr_dropped, + "runtime_ms": int(runtime_ms), + "pid": int(pid) if pid is not None else None, + "output_handle": output_handle, + "timed_out": bool(timed_out), + "semantic_status": semantic_status, + "semantic_message": semantic_message, + "warning": warning, + "auto_backgrounded": bool(auto_backgrounded), + "job_id": job_id, + "auto_shell": bool(auto_shell), + } + + +__all__ = ["build_exec_envelope"] diff --git a/tools/src/terminal_tools/exec.py b/tools/src/terminal_tools/exec.py new file mode 100644 index 00000000..396a4004 --- /dev/null +++ b/tools/src/terminal_tools/exec.py @@ -0,0 +1,307 @@ +"""``terminal_exec`` — foreground exec with auto-promotion to background. + +The flagship tool. Most agent terminal interactions go through here: +fast commands (<30s) return inline with the standard envelope; longer +commands silently transition into the JobManager and surface a +``job_id`` so the agent can poll. The "should I background this?" +decision is removed — the answer is always yes-if-needed. + +Implementation notes: + - We spawn the process the same way JobManager does, then wait with + ``proc.wait(timeout=auto_background_after_sec)``. Inline path + drains pipes via ``proc.communicate()`` to avoid pipe-fill + deadlocks. + - Auto-promotion: when the timeout fires while the process is still + running, we already have its stdin/stdout/stderr file objects. + We hand them to JobManager which spawns pump threads to fill ring + buffers from that point on. The agent sees an envelope with + ``auto_backgrounded=True, exit_code=None, job_id=<…>`` and + transitions to ``terminal_job_logs``. **There's no early-output loss** + because the pumps start before we return from the tool call. + - For pure-foreground use (``auto_background_after_sec=0``), we + fall back to ``proc.communicate(timeout=timeout_sec)`` which has + the simpler "kill on overall timeout" semantics. +""" + +from __future__ import annotations + +import shlex +import subprocess +import threading +import time +from typing import TYPE_CHECKING + +from terminal_tools.common.limits import ( + ZshRefused, + _resolve_shell, + coerce_limits, + make_preexec_fn, + sanitized_env, +) +from terminal_tools.common.ring_buffer import RingBuffer +from terminal_tools.common.truncation import build_exec_envelope +from terminal_tools.jobs.manager import JobLimitExceeded, get_manager + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +# Tokens that indicate the user passed a shell-syntax command (pipes, +# redirects, conditional chains) rather than an argv list. When any of +# these appear as standalone tokens in shlex.split(command), we silently +# route the command through /bin/bash -c instead of trying to exec it +# directly — the alternative is spawning the first program with the rest +# of the line as junk argv, which either errors or returns fake success +# (e.g. `echo "..." && ps ...` → echo prints the literal command). +_SHELL_METACHARS: frozenset[str] = frozenset( + {"|", "&&", "||", ";", ">", "<", ">>", "<<", "&", "2>", "2>&1", "|&"} +) + + +def register_exec_tools(mcp: FastMCP) -> None: + @mcp.tool() + def terminal_exec( + command: str, + cwd: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int = 60, + auto_background_after_sec: int = 30, + shell: bool = False, + stdin: str | None = None, + limits: dict[str, int] | None = None, + max_output_kb: int = 256, + ) -> dict: + """Run a shell command and capture its output. + + Past auto_background_after_sec, the call auto-promotes to a background + job and returns immediately with `auto_backgrounded=True, job_id=...` + — poll with terminal_job_logs(job_id, since_offset=...) to read the rest. + Set auto_background_after_sec=0 to force pure foreground (kill on + timeout_sec). + + Bash-only on POSIX. Passing shell="/bin/zsh" raises an error — this is + a deliberate security stance. + + Args: + command: The command. With shell=False we naively split on + whitespace; for pipes / quoting / globs use shell=True. + cwd: Working directory. + env: Environment override (merged into a sanitized base — zsh + dotfile vars are stripped). + timeout_sec: Hard kill deadline. Past this, the process is + terminated and `timed_out=True` is returned. Should be ≥ + auto_background_after_sec for the auto-promote path to work. + auto_background_after_sec: Inline budget. Past this, promote to + a background job and return. 0 disables auto-promotion. + shell: True for `/bin/bash -c `. zsh refused. + stdin: Optional stdin payload (string). + limits: Optional setrlimit caps. Keys: cpu_sec, rss_mb, + fsize_mb, nofile. + max_output_kb: Inline output cap. Overflow stashes to an + output_handle for retrieval via terminal_output_get. + + Returns the standard envelope: see `terminal-tools-foundations` skill. + """ + # Auto-detect shell-syntax commands. If the agent passes + # ``shell=False`` (the default) but the command contains a pipe, + # redirect, ``&&``, etc., naive argv splitting silently mangles + # it — exec the first token with the rest as junk arguments. + # Detect that case and transparently route through bash -c, then + # surface an ``auto_shell=True`` flag in the envelope so the + # foundational skill / agent feedback loop can learn from it. + auto_shell = False + try: + if shell: + # User opted in; trust them. + pass + else: + try: + tokens = shlex.split(command, posix=True) + except ValueError: + # Unbalanced quotes — almost certainly meant for the shell. + auto_shell = True + tokens = [] + if not auto_shell: + if not tokens: + return _err_envelope(command, "command was empty") + if any(t in _SHELL_METACHARS for t in tokens) or any( + # globs that shlex left unexpanded (`*`, `?`, `[`) + any(c in t for c in "*?[") and t != "[" for t in tokens + ): + auto_shell = True + + full_env = sanitized_env(env) if env is not None else None + preexec = make_preexec_fn(coerce_limits(limits)) + except ZshRefused as e: + return _err_envelope(command, str(e)) + + effective_shell: bool | str = True if auto_shell else shell + + # Resolve shell here so the same logic the JobManager uses applies + # in both the inline + promoted paths. + try: + resolved_shell = _resolve_shell(effective_shell) + except ZshRefused as e: + return _err_envelope(command, str(e)) + + if resolved_shell is not None: + spawn_argv: list[str] = [resolved_shell, "-c", command] + else: + # shell=False AND no metacharacters → safe to direct-exec. + spawn_argv = tokens + + start = time.monotonic() + try: + proc = subprocess.Popen( + spawn_argv, + cwd=cwd, + env=full_env, + stdin=subprocess.PIPE if stdin is not None else None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + preexec_fn=preexec, + close_fds=True, + bufsize=0, + ) + except FileNotFoundError as e: + return _err_envelope(command, f"command not found: {e}") + except OSError as e: + return _err_envelope(command, f"spawn failed: {e}") + + # Push stdin without blocking on the process draining it. For + # large stdin payloads this would deadlock; for typical agent + # use (small payloads or None) it's fine. + if stdin is not None and proc.stdin is not None: + try: + proc.stdin.write(stdin.encode("utf-8")) + proc.stdin.close() + except (BrokenPipeError, OSError): + pass + + # Pump stdout/stderr into ring buffers so we don't deadlock on + # full pipes during the wait. These same buffers become the + # job's buffers if we auto-promote. + stdout_buf = RingBuffer() + stderr_buf = RingBuffer() + pumps: list[threading.Thread] = [] + + def _pump(stream, ring: RingBuffer) -> None: + try: + while True: + chunk = stream.read(4096) + if not chunk: + break + ring.write(chunk) + except (OSError, ValueError): + pass + finally: + try: + stream.close() + except Exception: + pass + ring.close() + + if proc.stdout is not None: + t = threading.Thread(target=_pump, args=(proc.stdout, stdout_buf), daemon=True) + t.start() + pumps.append(t) + if proc.stderr is not None: + t = threading.Thread(target=_pump, args=(proc.stderr, stderr_buf), daemon=True) + t.start() + pumps.append(t) + + # Wait for either: auto-bg budget, hard timeout, or natural exit. + promoted = False + timed_out = False + budget = auto_background_after_sec if auto_background_after_sec > 0 else timeout_sec + budget = min(budget, timeout_sec) if timeout_sec > 0 else budget + + try: + proc.wait(timeout=budget if budget > 0 else None) + except subprocess.TimeoutExpired: + if auto_background_after_sec > 0: + # Promote: the process keeps running, we hand its + # already-pumping buffers to the JobManager. + try: + record = get_manager().adopt_running( + proc, + spawn_argv if resolved_shell is None else command, + merged=False, + existing_stdout_buf=stdout_buf, + existing_stderr_buf=stderr_buf, + existing_pumps=pumps, + ) + promoted = True + return build_exec_envelope( + command=command, + exit_code=None, + stdout_bytes=stdout_buf.tail(64 * 1024).data, + stderr_bytes=stderr_buf.tail(64 * 1024).data, + runtime_ms=int((time.monotonic() - start) * 1000), + pid=proc.pid, + timed_out=False, + max_output_kb=max_output_kb, + auto_backgrounded=True, + job_id=record.job_id, + auto_shell=auto_shell, + ) + except JobLimitExceeded: + # Cap reached; treat as a hard timeout rather than spin. + pass + # Fall through to hard-kill path. + try: + proc.terminate() + proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + timed_out = True + + # Inline path: drain pump threads. + for t in pumps: + t.join(timeout=2.0) + + runtime_ms = int((time.monotonic() - start) * 1000) + exit_code = proc.returncode if not promoted else None + + # The whole stream is in the ring; read from offset 0 to grab everything. + stdout_full = stdout_buf.read(0, stdout_buf.total_written).data + stderr_full = stderr_buf.read(0, stderr_buf.total_written).data + + return build_exec_envelope( + command=command, + exit_code=exit_code, + stdout_bytes=stdout_full, + stderr_bytes=stderr_full, + runtime_ms=runtime_ms, + pid=proc.pid, + timed_out=timed_out, + signaled=(exit_code is not None and exit_code < 0), + max_output_kb=max_output_kb, + auto_shell=auto_shell, + ) + + +def _err_envelope(command: str, message: str) -> dict: + """Construct an envelope-shaped error reply for pre-spawn failures.""" + return { + "exit_code": None, + "stdout": "", + "stderr": message, + "stdout_truncated_bytes": 0, + "stderr_truncated_bytes": 0, + "runtime_ms": 0, + "pid": None, + "output_handle": None, + "timed_out": False, + "semantic_status": "error", + "semantic_message": message, + "warning": None, + "auto_backgrounded": False, + "job_id": None, + "auto_shell": False, + "error": message, + } + + +__all__ = ["register_exec_tools"] diff --git a/tools/src/terminal_tools/jobs/__init__.py b/tools/src/terminal_tools/jobs/__init__.py new file mode 100644 index 00000000..406967f6 --- /dev/null +++ b/tools/src/terminal_tools/jobs/__init__.py @@ -0,0 +1,6 @@ +"""Background job management for terminal-tools.""" + +from terminal_tools.jobs.manager import JobManager, JobRecord, get_manager +from terminal_tools.jobs.tools import register_job_tools + +__all__ = ["JobManager", "JobRecord", "get_manager", "register_job_tools"] diff --git a/tools/src/terminal_tools/jobs/manager.py b/tools/src/terminal_tools/jobs/manager.py new file mode 100644 index 00000000..62f4b0a9 --- /dev/null +++ b/tools/src/terminal_tools/jobs/manager.py @@ -0,0 +1,424 @@ +"""Background job manager. + +Owns the long-lived ``Popen`` instances backing ``terminal_job_*`` and +``terminal_exec`` auto-promotion. Each job has up to two ring buffers +(stdout / stderr, or one merged) fed by background pump threads. + +Design notes: + - We don't use asyncio here. FastMCP's tool handlers run in a worker + thread; subprocess + threads compose more naturally with that + model than asyncio Subprocess (which would need its own loop). + - ``terminal_exec`` "promotes" by adopting an already-running Popen + into the manager — it doesn't re-spawn. The pump threads were + already filling buffers in the exec path. + - Hard concurrency cap (env: ``TERMINAL_TOOLS_MAX_JOBS``, default 32). + The cap is the only non-bypassable safety pin per the soft- + guardrails design. + - On server shutdown the lifespan hook calls ``shutdown_all()`` + which TERMs every child, waits 2s, then KILLs. Eliminates + orphans. +""" + +from __future__ import annotations + +import os +import secrets +import signal +import subprocess +import threading +import time +from collections.abc import Sequence +from dataclasses import dataclass, field +from typing import Any + +from terminal_tools.common.ring_buffer import RingBuffer + +_MAX_JOBS_DEFAULT = 32 +_DEFAULT_RING_BYTES = 4 * 1024 * 1024 +_RECENT_EXIT_KEEP = 50 # exited jobs we still surface to ``terminal_job_manage(action="list")`` + + +@dataclass(slots=True) +class JobRecord: + job_id: str + pid: int + name: str + command: str | list[str] + started_at: float + proc: subprocess.Popen[bytes] + stdout_buf: RingBuffer | None + stderr_buf: RingBuffer | None + merged: bool + pumps: list[threading.Thread] = field(default_factory=list) + exited_at: float | None = None + exit_code: int | None = None + signaled: bool = False + # Adopted=True when the job started life as a foreground terminal_exec + # and was promoted past the auto-background budget. + adopted: bool = False + + @property + def status(self) -> str: + return "exited" if self.exited_at is not None else "running" + + def runtime_ms(self) -> int: + end = self.exited_at if self.exited_at is not None else time.monotonic() + return int((end - self.started_at) * 1000) + + def to_summary(self) -> dict[str, Any]: + return { + "job_id": self.job_id, + "pid": self.pid, + "name": self.name, + "command": self.command, + "started_at": self.started_at, + "status": self.status, + "exit_code": self.exit_code, + "runtime_ms": self.runtime_ms(), + "merged": self.merged, + "stdout_bytes": (self.stdout_buf.total_written if self.stdout_buf else 0), + "stderr_bytes": (self.stderr_buf.total_written if self.stderr_buf else 0), + "adopted": self.adopted, + } + + +class JobLimitExceeded(RuntimeError): + """Raised when the per-server concurrent-job cap would be exceeded.""" + + +class JobManager: + def __init__(self, max_jobs: int | None = None, ring_bytes: int = _DEFAULT_RING_BYTES): + self._max_jobs = max_jobs or int(os.getenv("TERMINAL_TOOLS_MAX_JOBS", str(_MAX_JOBS_DEFAULT))) + self._ring_bytes = ring_bytes + self._jobs: dict[str, JobRecord] = {} + # FIFO of recently-exited job_ids so list/inspect can still + # find them for a while after exit. + self._exited_order: list[str] = [] + self._lock = threading.Lock() + + # ── Public API ──────────────────────────────────────────────── + + def active_count(self) -> int: + with self._lock: + return sum(1 for j in self._jobs.values() if j.exited_at is None) + + def start( + self, + command: str | Sequence[str], + *, + cwd: str | None = None, + env: dict[str, str] | None = None, + shell: bool | str = False, + merge_stderr: bool = False, + name: str | None = None, + preexec_fn=None, + ) -> JobRecord: + """Spawn a process and start pumping its output into ring buffers.""" + if self.active_count() >= self._max_jobs: + raise JobLimitExceeded( + f"terminal-tools job cap reached ({self._max_jobs}). " + "Wait for a job to finish or raise TERMINAL_TOOLS_MAX_JOBS." + ) + + proc = self._spawn(command, cwd=cwd, env=env, shell=shell, merge_stderr=merge_stderr, preexec_fn=preexec_fn) + record = self._adopt(proc, command, name=name, merged=merge_stderr) + return record + + def adopt_running( + self, + proc: subprocess.Popen[bytes], + command: str | Sequence[str], + *, + name: str | None = None, + merged: bool = False, + existing_stdout_buf: RingBuffer | None = None, + existing_stderr_buf: RingBuffer | None = None, + existing_pumps: list[threading.Thread] | None = None, + ) -> JobRecord: + """Adopt a Popen that's already running with pumps in flight. + + Used by ``terminal_exec`` for auto-promotion: the foreground path + had already started pump threads filling its own ring buffers. + We hand the buffers + pumps over to the manager so the agent + can resume reading via ``terminal_job_logs``. + """ + if self.active_count() >= self._max_jobs: + # Mid-call cap exceeded — kill and report. + try: + proc.terminate() + except Exception: + pass + raise JobLimitExceeded( + f"terminal-tools job cap reached ({self._max_jobs}); foreground exec was killed during auto-promotion." + ) + record = self._wrap( + proc, + command, + name=name, + merged=merged, + stdout_buf=existing_stdout_buf, + stderr_buf=existing_stderr_buf, + pumps=existing_pumps, + adopted=True, + ) + with self._lock: + self._jobs[record.job_id] = record + # Watcher only — pumps already running. + threading.Thread(target=self._watch_for_exit, args=(record,), daemon=True).start() + return record + + def get(self, job_id: str) -> JobRecord | None: + with self._lock: + return self._jobs.get(job_id) + + def list(self) -> list[dict]: + with self._lock: + jobs = list(self._jobs.values()) + # Recent first — running, then exited by exit time descending + jobs.sort( + key=lambda j: (j.exited_at is not None, -(j.exited_at or j.started_at)), + ) + return [j.to_summary() for j in jobs] + + def signal(self, job_id: str, signum: int) -> bool: + record = self.get(job_id) + if record is None or record.exited_at is not None: + return False + try: + record.proc.send_signal(signum) + return True + except (ProcessLookupError, OSError): + return False + + def write_stdin(self, job_id: str, data: bytes, *, close_after: bool = False) -> int: + record = self.get(job_id) + if record is None or record.proc.stdin is None or record.exited_at is not None: + return 0 + try: + n = record.proc.stdin.write(data) + record.proc.stdin.flush() + if close_after: + record.proc.stdin.close() + return int(n or len(data)) + except (BrokenPipeError, OSError): + return 0 + + def close_stdin(self, job_id: str) -> bool: + record = self.get(job_id) + if record is None or record.proc.stdin is None: + return False + try: + record.proc.stdin.close() + return True + except OSError: + return False + + def wait(self, job_id: str, timeout_sec: float | None = None) -> JobRecord | None: + """Block until the job exits or ``timeout_sec`` elapses. Returns + the (possibly still-running) record so callers can read final state.""" + record = self.get(job_id) + if record is None: + return None + try: + record.proc.wait(timeout=timeout_sec) + except subprocess.TimeoutExpired: + pass + return record + + def shutdown_all(self, grace_sec: float = 2.0) -> None: + """SIGTERM every running job, wait ``grace_sec``, then SIGKILL. + Called from the FastMCP lifespan hook. Idempotent.""" + with self._lock: + running = [j for j in self._jobs.values() if j.exited_at is None] + for record in running: + try: + record.proc.terminate() + except Exception: + pass + deadline = time.monotonic() + grace_sec + while time.monotonic() < deadline and any(j.proc.poll() is None for j in running): + time.sleep(0.05) + for record in running: + if record.proc.poll() is None: + try: + record.proc.kill() + except Exception: + pass + + # ── Internals ───────────────────────────────────────────────── + + def _spawn( + self, + command: str | Sequence[str], + *, + cwd: str | None, + env: dict[str, str] | None, + shell: bool | str, + merge_stderr: bool, + preexec_fn, + ) -> subprocess.Popen[bytes]: + # Resolve shell: a string shell is coerced to ``[, "-c", command]``, + # bool=True means /bin/bash with the same shape. + from terminal_tools.common.limits import _resolve_shell + + resolved = _resolve_shell(shell) + if resolved is not None: + if isinstance(command, (list, tuple)): + command_str = " ".join(str(c) for c in command) + else: + command_str = str(command) + argv: list[str] = [resolved, "-c", command_str] + shell_arg = False + else: + argv = list(command) if isinstance(command, (list, tuple)) else command # type: ignore[assignment] + shell_arg = False + + return subprocess.Popen( + argv, + cwd=cwd, + env=env, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=(subprocess.STDOUT if merge_stderr else subprocess.PIPE), + shell=shell_arg, + preexec_fn=preexec_fn, + close_fds=True, + bufsize=0, + ) + + def _adopt( + self, + proc: subprocess.Popen[bytes], + command: str | Sequence[str], + *, + name: str | None, + merged: bool, + ) -> JobRecord: + stdout_buf = RingBuffer(self._ring_bytes) + stderr_buf = None if merged else RingBuffer(self._ring_bytes) + + record = self._wrap(proc, command, name=name, merged=merged, stdout_buf=stdout_buf, stderr_buf=stderr_buf) + with self._lock: + self._jobs[record.job_id] = record + + # Start pumps + watcher + if proc.stdout is not None: + t = threading.Thread( + target=_pump_stream, + args=(proc.stdout, stdout_buf), + daemon=True, + name=f"shell-job-stdout-{record.job_id}", + ) + t.start() + record.pumps.append(t) + if not merged and proc.stderr is not None and stderr_buf is not None: + t = threading.Thread( + target=_pump_stream, + args=(proc.stderr, stderr_buf), + daemon=True, + name=f"shell-job-stderr-{record.job_id}", + ) + t.start() + record.pumps.append(t) + threading.Thread(target=self._watch_for_exit, args=(record,), daemon=True).start() + return record + + def _wrap( + self, + proc: subprocess.Popen[bytes], + command: str | Sequence[str], + *, + name: str | None, + merged: bool, + stdout_buf: RingBuffer | None = None, + stderr_buf: RingBuffer | None = None, + pumps: list[threading.Thread] | None = None, + adopted: bool = False, + ) -> JobRecord: + return JobRecord( + job_id="job_" + secrets.token_hex(6), + pid=proc.pid, + name=name or _default_name(command), + command=list(command) if isinstance(command, (list, tuple)) else str(command), + started_at=time.monotonic(), + proc=proc, + stdout_buf=stdout_buf, + stderr_buf=stderr_buf, + merged=merged, + pumps=pumps or [], + adopted=adopted, + ) + + def _watch_for_exit(self, record: JobRecord) -> None: + rc = record.proc.wait() + # Drain any final bytes — pump threads exit on EOF, so this is + # mostly a join; we don't need to actively pull. + for pump in record.pumps: + pump.join(timeout=2.0) + if record.stdout_buf is not None: + record.stdout_buf.close() + if record.stderr_buf is not None: + record.stderr_buf.close() + with self._lock: + record.exited_at = time.monotonic() + record.exit_code = rc + record.signaled = rc < 0 or (rc != 0 and abs(rc) in _SIGNAL_NUMBERS) + self._exited_order.append(record.job_id) + self._evict_old_exits_locked() + + def _evict_old_exits_locked(self) -> None: + while len(self._exited_order) > _RECENT_EXIT_KEEP: + old_id = self._exited_order.pop(0) + self._jobs.pop(old_id, None) + + +def _pump_stream(stream, ring: RingBuffer) -> None: + """Read bytes from ``stream`` until EOF; push into ``ring``.""" + try: + while True: + chunk = stream.read(4096) + if not chunk: + break + ring.write(chunk) + except (OSError, ValueError): + pass + finally: + try: + stream.close() + except Exception: + pass + ring.close() + + +def _default_name(command: str | Sequence[str]) -> str: + if isinstance(command, (list, tuple)): + return command[0] if command else "job" + text = str(command).strip().split() + return text[0] if text else "job" + + +_SIGNAL_NUMBERS = { + signal.SIGINT, + signal.SIGTERM, + signal.SIGKILL, + signal.SIGHUP, + signal.SIGUSR1, + signal.SIGUSR2, +} + + +# Module-level singleton. +_MANAGER: JobManager | None = None +_MANAGER_LOCK = threading.Lock() + + +def get_manager() -> JobManager: + global _MANAGER + if _MANAGER is None: + with _MANAGER_LOCK: + if _MANAGER is None: + _MANAGER = JobManager() + return _MANAGER + + +__all__ = ["JobManager", "JobRecord", "JobLimitExceeded", "get_manager"] diff --git a/tools/src/terminal_tools/jobs/tools.py b/tools/src/terminal_tools/jobs/tools.py new file mode 100644 index 00000000..ac0d9daa --- /dev/null +++ b/tools/src/terminal_tools/jobs/tools.py @@ -0,0 +1,221 @@ +"""Job-control MCP tools: ``terminal_job_start``, ``terminal_job_logs``, +``terminal_job_manage``. + +Three tools, not seven: ``_logs`` rolls in status + wait, ``_manage`` +covers list + signals + stdin so the agent has fewer tool names to +remember. Tradeoff is multi-action ``_manage`` is slightly less +self-documenting; the foundational skill compensates. +""" + +from __future__ import annotations + +import signal +from typing import TYPE_CHECKING, Any + +from terminal_tools.common.limits import coerce_limits, make_preexec_fn, sanitized_env +from terminal_tools.jobs.manager import JobLimitExceeded, get_manager + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +_SIGNAL_ALIASES = { + "signal_term": signal.SIGTERM, + "signal_kill": signal.SIGKILL, + "signal_int": signal.SIGINT, + "signal_hup": signal.SIGHUP, + "signal_usr1": signal.SIGUSR1, + "signal_usr2": signal.SIGUSR2, +} + + +def register_job_tools(mcp: FastMCP) -> None: + manager = get_manager() + + @mcp.tool() + def terminal_job_start( + command: str, + cwd: str | None = None, + env: dict[str, str] | None = None, + merge_stderr: bool = False, + shell: bool = False, + name: str | None = None, + limits: dict[str, int] | None = None, + ) -> dict: + """Spawn a background process. Returns a job_id you poll with terminal_job_logs. + + Use this when work might run >1 minute, when you want to keep doing + other things while it runs, or when you need to stream logs as they + arrive. Jobs die when the terminal-tools server restarts — they are NOT + persistent across reboots. + + Args: + command: Shell command to run. With shell=False, pass argv via the + command string and we'll split on whitespace; for complex + quoting use shell=True. + cwd: Working directory. Default: server's cwd. + env: Environment override. Merged into a sanitized base env (with + zsh dotfile vars stripped). + merge_stderr: When True, stderr is interleaved into stdout in a + single ring buffer. Convenient for log-shaped output where + ordering matters. + shell: True to invoke /bin/bash -c. Refuses zsh. + name: Optional human label surfaced in terminal_job_manage(action="list"). + limits: Optional resource caps applied via setrlimit before exec. + Keys: cpu_sec, rss_mb, fsize_mb, nofile. + + Returns: {job_id, pid, started_at} + """ + try: + # Build argv: for shell=False, naive split is fine for the common case; + # the foundational skill steers complex commands toward shell=True. + argv: list[str] | str + if shell: + argv = command + else: + argv = command.split() + if not argv: + return {"error": "command was empty"} + + full_env = sanitized_env(env) if env is not None else None + preexec = make_preexec_fn(coerce_limits(limits)) + record = manager.start( + argv, + cwd=cwd, + env=full_env, + shell=shell, + merge_stderr=merge_stderr, + name=name, + preexec_fn=preexec, + ) + return { + "job_id": record.job_id, + "pid": record.pid, + "started_at": record.started_at, + "name": record.name, + "merged": merge_stderr, + } + except JobLimitExceeded as e: + return {"error": str(e)} + except Exception as e: + return {"error": f"{type(e).__name__}: {e}"} + + @mcp.tool() + def terminal_job_logs( + job_id: str, + stream: str = "stdout", + since_offset: int = 0, + max_bytes: int = 64000, + wait_until_exit: bool = False, + wait_timeout_sec: float = 30.0, + tail: bool = False, + ) -> dict: + """Read job output at an offset. Combined read + status + wait primitive. + + Track next_offset across calls to avoid replaying data. When + wait_until_exit=True, blocks server-side until the job exits or + wait_timeout_sec elapses, then returns logs and final status. + + Args: + job_id: From terminal_job_start (or auto-promoted from terminal_exec). + stream: "stdout" | "stderr" | "merged". Use "merged" only when the + job was started with merge_stderr=True. + since_offset: Absolute byte offset to start reading from. Pass 0 + on first call; pass next_offset on subsequent calls. + max_bytes: Max bytes of decoded output to return inline. + wait_until_exit: When True, blocks until the job exits before reading. + wait_timeout_sec: Cap on the wait. Returns whatever's accumulated. + tail: When True, ignores since_offset and returns the last max_bytes. + + Returns: {data, offset, next_offset, status, exit_code, eof, truncated_bytes_dropped} + """ + record = manager.get(job_id) + if record is None: + return {"error": f"unknown job_id: {job_id}"} + + if wait_until_exit: + manager.wait(job_id, timeout_sec=wait_timeout_sec) + record = manager.get(job_id) or record + + if stream == "merged": + # Merged jobs always read from stdout_buf (which received both) + buf = record.stdout_buf + elif stream == "stderr": + buf = record.stderr_buf + else: + buf = record.stdout_buf + + if buf is None: + return { + "error": f"stream={stream!r} not available (merge_stderr={record.merged})", + } + + result = buf.tail(max_bytes) if tail else buf.read(since_offset, max_bytes) + return { + "data": result.data.decode("utf-8", errors="replace"), + "offset": result.offset, + "next_offset": result.next_offset, + "truncated_bytes_dropped": result.truncated_bytes_dropped, + "eof": buf.eof and result.next_offset >= buf.total_written, + "status": record.status, + "exit_code": record.exit_code, + "runtime_ms": record.runtime_ms(), + } + + @mcp.tool() + def terminal_job_manage( + action: str, + job_id: str | None = None, + data: str | None = None, + ) -> dict: + """List jobs, send signals, or write to job stdin. + + Single tool covering job-control side effects. The action argument + picks the operation: + + - "list": list active + recently-exited jobs. job_id ignored. + - "signal_term" | "signal_kill" | "signal_int" | "signal_hup" + | "signal_usr1" | "signal_usr2": send the named signal. Requires job_id. + - "stdin": write `data` to the job's stdin. Requires job_id and data. + - "close_stdin": close the job's stdin pipe (e.g. to flush a tool that + reads until EOF). Requires job_id. + + Signal escalation idiom (foundational skill teaches this): try + signal_int first (graceful), then signal_term after a few seconds, then + signal_kill as a last resort. The OS may take a moment to deliver. + + Returns vary by action. List → {jobs: [...]}. Signals → {ok, signal}. + Stdin → {bytes_written}. + """ + if action == "list": + return {"jobs": manager.list()} + + if not job_id: + return {"error": f"action={action!r} requires job_id"} + + if action in _SIGNAL_ALIASES: + ok = manager.signal(job_id, _SIGNAL_ALIASES[action]) + return {"ok": ok, "signal": action.removeprefix("signal_").upper()} + + if action == "stdin": + if data is None: + return {"error": "action=stdin requires data"} + n = manager.write_stdin(job_id, data.encode("utf-8")) + return {"bytes_written": n} + + if action == "close_stdin": + return {"ok": manager.close_stdin(job_id)} + + return {"error": f"unknown action: {action!r}"} + + # Expose a non-tool reference so the lifespan hook can shutdown_all(). + register_job_tools.manager = manager # type: ignore[attr-defined] + + +def get_registered_manager() -> Any: + """Return the JobManager registered for the most recent FastMCP setup. + Used by the server lifespan to reap on shutdown.""" + return get_manager() + + +__all__ = ["register_job_tools", "get_registered_manager"] diff --git a/tools/src/terminal_tools/output.py b/tools/src/terminal_tools/output.py new file mode 100644 index 00000000..be80f514 --- /dev/null +++ b/tools/src/terminal_tools/output.py @@ -0,0 +1,41 @@ +"""``terminal_output_get`` — retrieve truncated output via handle.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from terminal_tools.common.output_store import get_store + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +def register_output_tools(mcp: FastMCP) -> None: + @mcp.tool() + def terminal_output_get( + output_handle: str, + since_offset: int = 0, + max_kb: int = 64, + ) -> dict: + """Retrieve a slice of truncated output by handle. + + When terminal_exec or terminal_job_logs returns more output than fits inline, + you'll see `output_handle: "out_"`. Pass it here with successive + offsets to paginate. The full output is preserved (combined stdout+stderr + with `--- stdout ---` / `--- stderr ---` separators) for 5 minutes. + + Args: + output_handle: From a prior tool's envelope. + since_offset: Pass 0 first, then next_offset from the previous call. + max_kb: Max KB to return per call. + + Returns: {data, offset, next_offset, eof, expired} + """ + return get_store().get( + output_handle, + since_offset=since_offset, + max_bytes=max_kb * 1024, + ) + + +__all__ = ["register_output_tools"] diff --git a/tools/src/terminal_tools/pty/__init__.py b/tools/src/terminal_tools/pty/__init__.py new file mode 100644 index 00000000..ebb9fe54 --- /dev/null +++ b/tools/src/terminal_tools/pty/__init__.py @@ -0,0 +1,5 @@ +"""Persistent PTY-backed shell sessions.""" + +from terminal_tools.pty.tools import register_pty_tools + +__all__ = ["register_pty_tools"] diff --git a/tools/src/terminal_tools/pty/session.py b/tools/src/terminal_tools/pty/session.py new file mode 100644 index 00000000..9d4b8552 --- /dev/null +++ b/tools/src/terminal_tools/pty/session.py @@ -0,0 +1,367 @@ +"""Persistent PTY-backed bash sessions. + +Built on stdlib ``pty.openpty()`` + ``os.fork()``. A reader thread +fills a ring buffer; the public API exposes three modes: + + - ``run(command, timeout_sec)``: write the command, wait for the + unique prompt sentinel (or an ``expect=`` regex override), return + everything in between. + - ``send_raw(data)``: write bytes, no waiting. For REPLs / vim / + sudo-prompt-style flows. + - ``drain(timeout_sec)``: read whatever's currently buffered (after + a raw send). + +A unique ``PS1`` sentinel is set at session start so ``run()`` can +unambiguously detect command completion. Per-session concurrency is +serialized: a busy session refuses concurrent ``run()`` calls. + +POSIX-only: imports stdlib ``pty`` which doesn't exist on Windows. +""" + +from __future__ import annotations + +import errno +import fcntl +import os +import pty +import re +import select +import signal +import struct +import termios +import threading +import time +import uuid + +from terminal_tools.common.limits import _resolve_shell, sanitized_env +from terminal_tools.common.ring_buffer import RingBuffer + +_BUF_BYTES = 2 * 1024 * 1024 + + +class SessionBusy(RuntimeError): + """Raised when a concurrent run() attempts to use a session that's already executing.""" + + +class PtySession: + """One persistent bash session bound to a PTY. + + Thread-safe for the disjoint-mode operations: ``run`` serializes via + ``_busy_lock``, ``send_raw`` and ``drain`` use the ring's own lock. + """ + + def __init__( + self, + *, + cwd: str | None = None, + env: dict[str, str] | None = None, + shell: bool | str = True, + cols: int = 120, + rows: int = 40, + idle_timeout_sec: int = 1800, + ): + self.session_id = "pty_" + uuid.uuid4().hex[:10] + self.shell_path = _resolve_shell(shell) or "/bin/bash" + self._sentinel_token = uuid.uuid4().hex + self._sentinel = f"__TERMINALTOOLS_PROMPT_{self._sentinel_token}__" + self._sentinel_re = re.compile(re.escape(self._sentinel)) + + # Build env: zsh leakage stripped, prompt set to our sentinel. + merged_env = sanitized_env(env) + merged_env["PS1"] = f"{self._sentinel}\n$ " + merged_env["PS2"] = "" + merged_env["PROMPT_COMMAND"] = "" # don't let user dotfiles override PS1 + merged_env["TERM"] = merged_env.get("TERM", "xterm-256color") + + self._created_at = time.monotonic() + self._last_activity = self._created_at + self.idle_timeout_sec = idle_timeout_sec + + self._pid, self._fd = pty.fork() + if self._pid == 0: + # Child — exec bash. --norc --noprofile keeps things + # predictable; the foundational skill teaches that the + # session runs vanilla bash, not the user's interactive + # shell. + try: + if cwd: + os.chdir(cwd) + argv = [self.shell_path, "--norc", "--noprofile", "-i"] + os.execve(self.shell_path, argv, merged_env) + except Exception as e: # pragma: no cover — child exec + os.write(2, f"terminal-tools pty: exec failed: {e}\n".encode()) + os._exit(127) + + # Parent + _set_pty_size(self._fd, rows, cols) + _set_nonblocking(self._fd) + + self._buf = RingBuffer(_BUF_BYTES) + self._busy_lock = threading.Lock() + self._closed = threading.Event() + + self._reader = threading.Thread(target=self._read_loop, daemon=True, name=f"pty-reader-{self.session_id}") + self._reader.start() + + # Wait for the first prompt so the session is "ready" before we return. + # If bash --norc somehow doesn't print one, give up after 2 seconds — + # the session is still usable, it just won't have a prompt-aligned + # initial offset. + self._wait_for_sentinel(timeout_sec=2.0, since_offset=0) + + # ── Public API ──────────────────────────────────────────────── + + @property + def pid(self) -> int: + return self._pid + + def is_alive(self) -> bool: + if self._closed.is_set(): + return False + try: + pid, _ = os.waitpid(self._pid, os.WNOHANG) + return pid == 0 + except ChildProcessError: + return False + + def run(self, command: str, *, expect: str | None = None, timeout_sec: float = 60.0) -> dict: + """Send ``command`` + newline, wait for the prompt sentinel + (or ``expect`` regex override), return the slice in between.""" + if not self._busy_lock.acquire(blocking=False): + raise SessionBusy(f"session {self.session_id} is busy") + try: + start_offset = self._buf.total_written + self._write(command.encode("utf-8") + b"\n") + self._last_activity = time.monotonic() + return self._wait_for_sentinel( + timeout_sec=timeout_sec, + since_offset=start_offset, + expect_pattern=expect, + ) + finally: + self._busy_lock.release() + + def send_raw(self, data: str, *, add_newline: bool = False) -> int: + """Write bytes without waiting for prompt. For REPLs/vim/sudo prompts.""" + payload = data.encode("utf-8") + if add_newline: + payload += b"\n" + n = self._write(payload) + self._last_activity = time.monotonic() + return n + + def drain(self, *, timeout_sec: float = 2.0, max_bytes: int = 64000) -> dict: + """Read whatever's currently buffered. Used after send_raw to capture + REPL / interactive-program output.""" + deadline = time.monotonic() + timeout_sec + last_total = self._buf.total_written + # Wait for activity to settle for a brief window — gives the + # process a chance to finish its current line. + while time.monotonic() < deadline: + time.sleep(0.05) + current = self._buf.total_written + if current == last_total: + break + last_total = current + + result = self._buf.tail(max_bytes) + return { + "output": result.data.decode("utf-8", errors="replace"), + "more": result.next_offset < self._buf.total_written, + "offset": result.offset, + "next_offset": result.next_offset, + "timed_out": False, + } + + def close(self, *, force: bool = False, grace_sec: float = 1.0) -> dict: + """Terminate the session. Returns final output.""" + if self._closed.is_set(): + return {"exit_code": None, "final_output": "", "already_closed": True} + + # Flush an exit if not forcing. + if not force: + try: + self._write(b"exit\n") + except OSError: + pass + + deadline = time.monotonic() + grace_sec + while time.monotonic() < deadline: + try: + pid, status = os.waitpid(self._pid, os.WNOHANG) + if pid != 0: + break + except ChildProcessError: + break + time.sleep(0.05) + + try: + os.kill(self._pid, signal.SIGTERM) + except (ProcessLookupError, PermissionError): + pass + try: + os.waitpid(self._pid, os.WNOHANG) + except ChildProcessError: + pass + + if self.is_alive(): + try: + os.kill(self._pid, signal.SIGKILL) + except (ProcessLookupError, PermissionError): + pass + + self._closed.set() + try: + os.close(self._fd) + except OSError: + pass + + # Final output = whatever's still in the ring. + result = self._buf.tail(64 * 1024) + try: + _pid, status = os.waitpid(self._pid, os.WNOHANG) + exit_code = os.WEXITSTATUS(status) if os.WIFEXITED(status) else None + except ChildProcessError: + exit_code = None + return { + "exit_code": exit_code, + "final_output": result.data.decode("utf-8", errors="replace"), + "already_closed": False, + } + + def to_summary(self) -> dict: + return { + "session_id": self.session_id, + "pid": self._pid, + "shell": self.shell_path, + "alive": self.is_alive(), + "idle_sec": int(time.monotonic() - self._last_activity), + "created_at": self._created_at, + } + + # ── Internals ───────────────────────────────────────────────── + + def _write(self, data: bytes) -> int: + if self._closed.is_set(): + raise OSError("session is closed") + try: + return os.write(self._fd, data) + except OSError as e: + if e.errno == errno.EAGAIN: + # PTY is full — retry briefly. + deadline = time.monotonic() + 1.0 + while time.monotonic() < deadline: + time.sleep(0.01) + try: + return os.write(self._fd, data) + except OSError: + continue + raise + + def _read_loop(self) -> None: + while not self._closed.is_set(): + try: + ready, _, _ = select.select([self._fd], [], [], 0.5) + except (OSError, ValueError): + break + if not ready: + # Periodically check for child death even when no data. + try: + pid, _ = os.waitpid(self._pid, os.WNOHANG) + if pid != 0: + break + except ChildProcessError: + break + continue + try: + chunk = os.read(self._fd, 4096) + except OSError: + break + if not chunk: + break + self._buf.write(chunk) + self._buf.close() + self._closed.set() + + def _wait_for_sentinel( + self, + *, + timeout_sec: float, + since_offset: int, + expect_pattern: str | None = None, + ) -> dict: + """Poll the buffer until we see the sentinel (or expect pattern).""" + deadline = time.monotonic() + timeout_sec + pattern: re.Pattern[str] | None = None + if expect_pattern is not None: + pattern = re.compile(expect_pattern) + + prompt_offset = since_offset + while time.monotonic() < deadline: + slice_ = self._buf.read(since_offset, self._buf.total_written - since_offset) + text = slice_.data.decode("utf-8", errors="replace") + if pattern is not None: + m = pattern.search(text) + if m is not None: + output = text[: m.start()] + prompt_offset = since_offset + len(text[: m.end()].encode("utf-8", errors="replace")) + return { + "output": output, + "prompt_after": True, + "matched_expect": True, + "next_offset": prompt_offset, + "timed_out": False, + } + else: + m = self._sentinel_re.search(text) + if m is not None: + output = text[: m.start()] + # Strip the trailing echoed command/newline above the sentinel + output = _strip_command_echo(output) + return { + "output": output, + "prompt_after": True, + "matched_expect": False, + "next_offset": since_offset + len(text[: m.end()].encode("utf-8", errors="replace")), + "timed_out": False, + } + time.sleep(0.05) + if self._closed.is_set(): + break + + # Timed out — return whatever we have. + slice_ = self._buf.read(since_offset, self._buf.total_written - since_offset) + return { + "output": slice_.data.decode("utf-8", errors="replace"), + "prompt_after": False, + "matched_expect": False, + "next_offset": slice_.next_offset, + "timed_out": True, + } + + +def _set_pty_size(fd: int, rows: int, cols: int) -> None: + try: + fcntl.ioctl(fd, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0)) + except OSError: + pass + + +def _set_nonblocking(fd: int) -> None: + flags = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) + + +def _strip_command_echo(text: str) -> str: + """Drop the first line if it looks like the echoed command. PTYs in + canonical mode echo the user's input back; we want only the program's + output. Best-effort heuristic — leaves the text alone if uncertain.""" + if "\n" in text: + first, rest = text.split("\n", 1) + # Keep only the rest if the first line is short (likely the echo). + if len(first) < 4096: + return rest + return text + + +__all__ = ["PtySession", "SessionBusy"] diff --git a/tools/src/terminal_tools/pty/tools.py b/tools/src/terminal_tools/pty/tools.py new file mode 100644 index 00000000..67836933 --- /dev/null +++ b/tools/src/terminal_tools/pty/tools.py @@ -0,0 +1,243 @@ +"""Three PTY tools: ``terminal_pty_open``, ``terminal_pty_run``, ``terminal_pty_close``. + +Per-server hard cap on concurrent sessions (env: ``TERMINAL_TOOLS_MAX_PTY``, +default 8) prevents PTY exhaustion. Idle sessions older than +``idle_timeout_sec`` are reaped lazily on every ``_open`` so an +abandoned session can't leak a bash forever. +""" + +from __future__ import annotations + +import os +import sys +import threading +import time +from typing import TYPE_CHECKING + +from terminal_tools.common.limits import ZshRefused + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +_MAX_PTY_DEFAULT = 8 + + +class _PtyRegistry: + def __init__(self): + self._sessions: dict[str, PtySession] = {} # noqa: F821 + self._lock = threading.Lock() + self._max = int(os.getenv("TERMINAL_TOOLS_MAX_PTY", str(_MAX_PTY_DEFAULT))) + + def reap_idle(self) -> None: + """Drop sessions whose idle time exceeded their idle_timeout_sec.""" + with self._lock: + now = time.monotonic() + stale = [ + sid + for sid, sess in self._sessions.items() + if not sess.is_alive() or (now - sess._last_activity) > sess.idle_timeout_sec + ] + for sid in stale: + sess = self._sessions.pop(sid, None) + if sess is not None: + try: + sess.close(force=True, grace_sec=0.5) + except Exception: + pass + + def count(self) -> int: + with self._lock: + return len(self._sessions) + + def add(self, sess) -> None: + with self._lock: + if len(self._sessions) >= self._max: + # Caller should have reaped first; treat as cap. + raise RuntimeError( + f"terminal-tools PTY cap reached ({self._max}). " + "Close idle sessions or raise TERMINAL_TOOLS_MAX_PTY." + ) + self._sessions[sess.session_id] = sess + + def get(self, sid: str): + with self._lock: + return self._sessions.get(sid) + + def remove(self, sid: str) -> None: + with self._lock: + self._sessions.pop(sid, None) + + def list(self) -> list[dict]: + with self._lock: + return [s.to_summary() for s in self._sessions.values()] + + def shutdown_all(self) -> None: + with self._lock: + sessions = list(self._sessions.values()) + self._sessions.clear() + for sess in sessions: + try: + sess.close(force=True, grace_sec=0.5) + except Exception: + pass + + +_REGISTRY = _PtyRegistry() + + +def get_registry() -> _PtyRegistry: + return _REGISTRY + + +def register_pty_tools(mcp: FastMCP) -> None: + if sys.platform == "win32": + # Register stub tools that report unsupported; keeps the tool + # surface uniform across platforms even when PTY is unavailable. + @mcp.tool() + def terminal_pty_open(*args, **kwargs) -> dict: + """Persistent PTY-backed bash session. POSIX-only. + + Windows is not supported in v1 — use terminal_exec / terminal_job_* + for non-interactive work. The PTY tools require stdlib pty, + which exists only on Linux + macOS. + """ + return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"} + + @mcp.tool() + def terminal_pty_run(*args, **kwargs) -> dict: # noqa: D401 + """Persistent PTY-backed bash session. POSIX-only.""" + return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"} + + @mcp.tool() + def terminal_pty_close(*args, **kwargs) -> dict: # noqa: D401 + """Persistent PTY-backed bash session. POSIX-only.""" + return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"} + + return + + from terminal_tools.pty.session import PtySession, SessionBusy + + @mcp.tool() + def terminal_pty_open( + cwd: str | None = None, + env: dict[str, str] | None = None, + cols: int = 120, + rows: int = 40, + idle_timeout_sec: int = 1800, + ) -> dict: + """Open a persistent /bin/bash session in a PTY. + + Use a session when you need state across calls — building env vars, + navigating with cd, driving REPLs, or responding to interactive + prompts (sudo, ssh, mysql). For one-shot work, use terminal_exec + instead. + + The session runs vanilla bash (--norc --noprofile) so dotfiles + don't surprise you. A unique PS1 sentinel is set so terminal_pty_run + can unambiguously detect command completion. macOS users: this + is /bin/bash, not zsh, by deliberate policy — explicit + shell="/bin/zsh" overrides are rejected. + + Args: + cwd: Initial working directory. + env: Environment override (zsh dotfile vars are stripped). + cols, rows: Terminal size. + idle_timeout_sec: Drop the session after this many seconds idle. + + Returns: {session_id, pid, shell} + """ + _REGISTRY.reap_idle() + try: + sess = PtySession(cwd=cwd, env=env, cols=cols, rows=rows, idle_timeout_sec=idle_timeout_sec) + except ZshRefused as e: + return {"error": str(e)} + except Exception as e: + return {"error": f"failed to open session: {type(e).__name__}: {e}"} + try: + _REGISTRY.add(sess) + except RuntimeError as e: + sess.close(force=True, grace_sec=0.2) + return {"error": str(e)} + return { + "session_id": sess.session_id, + "pid": sess.pid, + "shell": sess.shell_path, + } + + @mcp.tool() + def terminal_pty_run( + session_id: str, + command: str | None = None, + expect: str | None = None, + raw_send: bool = False, + read_only: bool = False, + timeout_sec: float = 60.0, + ) -> dict: + """Run a command in a session, send raw input, or drain output. + + Three modes: + - Default: pass a command. The session sends it, waits for the + unique prompt sentinel (or `expect` regex if provided), and + returns the output between submission and prompt. + - raw_send=True: pass a command. The text is written without + waiting for prompt. Use for REPL input ("p('hi')\\n"), for + password prompts (sudo), or for vim keystrokes. + - read_only=True: drains whatever's currently buffered. + Typically follows raw_send. + + Args: + session_id: From terminal_pty_open. + command: The text to send. None when read_only=True. + expect: Regex to wait for INSTEAD of the default prompt sentinel. + Useful when the command launches a REPL with its own prompt. + raw_send: Don't wait for prompt; just write. + read_only: Don't send anything; drain the buffer. + timeout_sec: Max wait. On timeout, returns whatever's buffered + with timed_out=True (the command may still be running — + check with another _run call). + + Returns: {output, prompt_after, timed_out, ...} + """ + sess = _REGISTRY.get(session_id) + if sess is None: + return {"error": f"unknown session_id: {session_id}"} + if not sess.is_alive(): + _REGISTRY.remove(session_id) + return {"error": f"session {session_id} has exited"} + + if read_only: + return sess.drain(timeout_sec=timeout_sec) + + if command is None: + return {"error": "command is required unless read_only=True"} + + if raw_send: + n = sess.send_raw(command, add_newline=False) + return {"bytes_sent": n} + + try: + return sess.run(command, expect=expect, timeout_sec=timeout_sec) + except SessionBusy as e: + return {"error": str(e)} + + @mcp.tool() + def terminal_pty_close(session_id: str, force: bool = False) -> dict: + """Terminate a PTY session. Always do this when you're done — leaked + sessions count against the per-server PTY cap. + + Args: + session_id: From terminal_pty_open. + force: Skip the graceful "exit\\n" attempt and SIGTERM/SIGKILL. + + Returns: {exit_code, final_output, already_closed} + """ + sess = _REGISTRY.get(session_id) + if sess is None: + return {"error": f"unknown session_id: {session_id}"} + result = sess.close(force=force) + _REGISTRY.remove(session_id) + return result + + +__all__ = ["register_pty_tools", "get_registry"] diff --git a/tools/src/terminal_tools/search/__init__.py b/tools/src/terminal_tools/search/__init__.py new file mode 100644 index 00000000..1ec476c8 --- /dev/null +++ b/tools/src/terminal_tools/search/__init__.py @@ -0,0 +1,5 @@ +"""Filesystem search tools (rg + find).""" + +from terminal_tools.search.tools import register_search_tools + +__all__ = ["register_search_tools"] diff --git a/tools/src/terminal_tools/search/tools.py b/tools/src/terminal_tools/search/tools.py new file mode 100644 index 00000000..7be14d78 --- /dev/null +++ b/tools/src/terminal_tools/search/tools.py @@ -0,0 +1,204 @@ +"""``terminal_rg`` and ``terminal_find`` — structured wrappers over ripgrep / find. + +Distinct from ``files-tools.search_files`` (project-relative, +code-editor-tuned) — these accept arbitrary paths and surface the +underlying tool's full feature set. The foundational skill steers +agents to ``files-tools`` for in-project work and these tools for +``/var/log``, ``/etc``, archive contents, etc. +""" + +from __future__ import annotations + +import shutil +import subprocess +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fastmcp import FastMCP + + +_DEFAULT_TIMEOUT_SEC = 30 +_MAX_OUTPUT_BYTES = 256 * 1024 + + +def register_search_tools(mcp: FastMCP) -> None: + @mcp.tool() + def terminal_rg( + pattern: str, + path: str = ".", + glob: str | None = None, + type_filter: str | None = None, + ignore_case: bool = False, + context: int = 0, + max_count: int | None = None, + max_depth: int | None = None, + hidden: bool = False, + no_ignore: bool = False, + extra_args: list[str] | None = None, + ) -> dict: + """Run ripgrep on `path` for `pattern`. + + For project-scoped code search use files-tools.search_files instead; + this tool is for raw paths (system configs, /var/log, archive contents) + and exposes the full rg flag surface. + + Args: + pattern: Regex pattern. + path: Directory or file to search. Default: current dir. + glob: Filename glob (e.g. "*.py"). + type_filter: rg filetype shortcut (e.g. "py", "rust", "md"). + ignore_case: Case-insensitive search. + context: Lines of context above and below each match. + max_count: Stop after N matches per file. + max_depth: Limit directory recursion depth. + hidden: Include hidden files (rg ignores them by default). + no_ignore: Don't respect .gitignore. + extra_args: Raw flags to append (use sparingly — most needs are covered above). + + Returns: {matches: [...], total, truncated, command} + """ + if not shutil.which("rg"): + return {"error": "ripgrep (rg) is not installed on this host"} + + argv = ["rg", "--json", "--no-heading"] + if ignore_case: + argv.append("-i") + if context > 0: + argv.extend(["-C", str(context)]) + if max_count is not None: + argv.extend(["-m", str(max_count)]) + if max_depth is not None: + argv.extend(["--max-depth", str(max_depth)]) + if hidden: + argv.append("--hidden") + if no_ignore: + argv.append("--no-ignore") + if type_filter: + argv.extend(["-t", type_filter]) + if glob: + argv.extend(["-g", glob]) + if extra_args: + argv.extend(str(a) for a in extra_args) + argv.extend(["--", pattern, path]) + + try: + proc = subprocess.run( + argv, + capture_output=True, + timeout=_DEFAULT_TIMEOUT_SEC, + check=False, + ) + except subprocess.TimeoutExpired: + return {"error": "ripgrep timed out", "command": argv} + except FileNotFoundError: + return {"error": "ripgrep (rg) is not installed on this host"} + + # Parse JSON-line output: only "match" events are interesting for the + # default surface. Errors land in stderr. + import json + + matches: list[dict] = [] + truncated = False + bytes_seen = 0 + for line in proc.stdout.splitlines(): + if not line: + continue + bytes_seen += len(line) + if bytes_seen > _MAX_OUTPUT_BYTES: + truncated = True + break + try: + evt = json.loads(line) + except json.JSONDecodeError: + continue + if evt.get("type") != "match": + continue + data = evt.get("data", {}) + path_data = (data.get("path") or {}).get("text") or "" + line_no = data.get("line_number") + text = (data.get("lines") or {}).get("text") or "" + matches.append({"path": path_data, "line": line_no, "text": text.rstrip("\n")}) + + return { + "matches": matches, + "total": len(matches), + "truncated": truncated, + "exit_code": proc.returncode, + "stderr": proc.stderr.decode("utf-8", errors="replace")[-2000:] if proc.stderr else "", + } + + @mcp.tool() + def terminal_find( + path: str, + name: str | None = None, + iname: str | None = None, + type_filter: str | None = None, + mtime_days: int | None = None, + size_kb_min: int | None = None, + size_kb_max: int | None = None, + max_depth: int | None = None, + max_results: int = 1000, + ) -> dict: + """Run `find` with structured predicates. + + For tree views or stat-like info on a single path, use terminal_exec + ("ls -la", "tree -L 2", "stat foo"). This tool is for predicate-driven + searches (find me .log files modified in the last 7 days bigger than 1MB). + + Args: + path: Directory to search under. + name: Glob match (case-sensitive). e.g. "*.log". + iname: Glob match (case-insensitive). + type_filter: "f" file, "d" dir, "l" symlink. + mtime_days: Modified within the last N days (negative or 0 means + exact-day; we use -N for "within"). + size_kb_min, size_kb_max: Size bounds in KB. + max_depth: Limit directory recursion. + max_results: Cap on returned paths. + + Returns: {paths: [...], count, truncated, command} + """ + if not shutil.which("find"): + return {"error": "find is not installed on this host"} + + argv = ["find", path] + if max_depth is not None: + argv.extend(["-maxdepth", str(max_depth)]) + if type_filter in {"f", "d", "l"}: + argv.extend(["-type", type_filter]) + if name: + argv.extend(["-name", name]) + if iname: + argv.extend(["-iname", iname]) + if mtime_days is not None: + argv.extend(["-mtime", f"-{abs(mtime_days)}"]) + if size_kb_min is not None: + argv.extend(["-size", f"+{int(size_kb_min)}k"]) + if size_kb_max is not None: + argv.extend(["-size", f"-{int(size_kb_max)}k"]) + + try: + proc = subprocess.run( + argv, + capture_output=True, + timeout=_DEFAULT_TIMEOUT_SEC, + check=False, + ) + except subprocess.TimeoutExpired: + return {"error": "find timed out", "command": argv} + + all_paths = proc.stdout.decode("utf-8", errors="replace").splitlines() + truncated = len(all_paths) > max_results + paths = all_paths[:max_results] + return { + "paths": paths, + "count": len(paths), + "truncated": truncated, + "total_seen": len(all_paths), + "exit_code": proc.returncode, + "stderr": proc.stderr.decode("utf-8", errors="replace")[-2000:] if proc.stderr else "", + "command": argv, + } + + +__all__ = ["register_search_tools"] diff --git a/tools/src/terminal_tools/server.py b/tools/src/terminal_tools/server.py new file mode 100644 index 00000000..38179d2e --- /dev/null +++ b/tools/src/terminal_tools/server.py @@ -0,0 +1,145 @@ +"""terminal-tools FastMCP server — entry module. + +Run via: + uv run python -m terminal_tools.server --stdio + uv run python terminal_tools_server.py --stdio (preferred, see _DEFAULT_LOCAL_SERVERS) +""" + +from __future__ import annotations + +import argparse +import asyncio +import atexit +import logging +import os +import sys +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager + +logger = logging.getLogger(__name__) + + +def setup_logger() -> None: + if not logger.handlers: + stream = sys.stderr if "--stdio" in sys.argv else sys.stdout + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("[terminal-tools] %(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + +setup_logger() + +# Suppress FastMCP banner in STDIO mode (mirrors gcu/server.py). +if "--stdio" in sys.argv: + import rich.console + + _orig_console_init = rich.console.Console.__init__ + + def _patched_console_init(self, *args, **kwargs): + kwargs["file"] = sys.stderr + _orig_console_init(self, *args, **kwargs) + + rich.console.Console.__init__ = _patched_console_init + + +from fastmcp import FastMCP # noqa: E402 + +from terminal_tools import register_terminal_tools # noqa: E402 +from terminal_tools.jobs.manager import get_manager # noqa: E402 +from terminal_tools.pty.tools import get_registry as get_pty_registry # noqa: E402 + + +@asynccontextmanager +async def _lifespan(_server: FastMCP) -> AsyncIterator[dict]: + """Reap children on shutdown so we don't orphan jobs/PTYs. + + Mirrors the gcu-tools lifespan pattern. Runs in the FastMCP event + loop on graceful shutdown; the atexit hook below catches abrupt + exits (SIGTERM, etc.) where lifespan teardown may not complete. + """ + parent_pid_env = os.getenv("HIVE_DESKTOP_PARENT_PID") + if parent_pid_env: + try: + parent_pid = int(parent_pid_env) + asyncio.create_task(_parent_watchdog(parent_pid)) + logger.info("Parent watchdog armed for PID %d", parent_pid) + except ValueError: + logger.warning("Invalid HIVE_DESKTOP_PARENT_PID=%r", parent_pid_env) + + yield {} + + logger.info("Shutting down — reaping jobs and PTY sessions...") + try: + get_manager().shutdown_all(grace_sec=2.0) + except Exception as e: + logger.warning("JobManager shutdown error: %s", e) + try: + get_pty_registry().shutdown_all() + except Exception as e: + logger.warning("PTY registry shutdown error: %s", e) + + +def _is_alive(pid: int) -> bool: + try: + os.kill(pid, 0) + return True + except (ProcessLookupError, PermissionError): + return False + + +async def _parent_watchdog(parent_pid: int) -> None: + """Self-destruct when the desktop parent dies.""" + while True: + await asyncio.sleep(2.0) + if not _is_alive(parent_pid): + logger.warning("Parent PID %d gone — terminal-tools exiting", parent_pid) + try: + get_manager().shutdown_all(grace_sec=1.0) + except Exception: + pass + try: + get_pty_registry().shutdown_all() + except Exception: + pass + os._exit(0) + + +def _atexit_reap() -> None: + """Last-ditch reaping if lifespan didn't run.""" + try: + get_manager().shutdown_all(grace_sec=1.0) + except Exception: + pass + try: + get_pty_registry().shutdown_all() + except Exception: + pass + + +atexit.register(_atexit_reap) + +mcp = FastMCP("terminal-tools", lifespan=_lifespan) + + +def main() -> None: + parser = argparse.ArgumentParser(description="terminal-tools MCP server") + parser.add_argument("--port", type=int, default=int(os.getenv("TERMINAL_TOOLS_PORT", "4004"))) + parser.add_argument("--host", default="0.0.0.0") + parser.add_argument("--stdio", action="store_true") + args = parser.parse_args() + + tools = register_terminal_tools(mcp) + + if not args.stdio: + logger.info("Registered %d terminal-tools: %s", len(tools), tools) + + if args.stdio: + mcp.run(transport="stdio") + else: + logger.info("Starting terminal-tools on %s:%d", args.host, args.port) + asyncio.run(mcp.run_async(transport="http", host=args.host, port=args.port)) + + +if __name__ == "__main__": + main() diff --git a/tools/terminal_tools_server.py b/tools/terminal_tools_server.py new file mode 100644 index 00000000..e43736df --- /dev/null +++ b/tools/terminal_tools_server.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""terminal-tools MCP server entry point. + +Wired into _DEFAULT_LOCAL_SERVERS in core/framework/loader/mcp_registry.py +so that running ``uv run python terminal_tools_server.py --stdio`` from this +directory starts the server. The cwd of ``tools/`` puts ``src/terminal_tools`` +on the import path via uv's workspace setup. + +Usage: + uv run python terminal_tools_server.py --stdio # for agent integration + uv run python terminal_tools_server.py --port 4004 # HTTP for inspection +""" + +from __future__ import annotations + +from terminal_tools.server import main + +if __name__ == "__main__": + main() diff --git a/tools/tests/test_terminal_tools_exec.py b/tools/tests/test_terminal_tools_exec.py new file mode 100644 index 00000000..44526815 --- /dev/null +++ b/tools/tests/test_terminal_tools_exec.py @@ -0,0 +1,225 @@ +"""terminal_exec — envelope shape, semantic exits, warnings, auto-promotion.""" + +from __future__ import annotations + +import time + +import pytest + + +@pytest.fixture +def exec_tool(mcp): + from terminal_tools.exec import register_exec_tools + + register_exec_tools(mcp) + return mcp._tool_manager._tools["terminal_exec"].fn + + +def test_envelope_shape_simple_echo(exec_tool): + result = exec_tool(command="echo hello world") + assert result["exit_code"] == 0 + assert result["stdout"].strip() == "hello world" + assert result["stderr"] == "" + assert result["semantic_status"] == "ok" + assert result["timed_out"] is False + assert result["auto_backgrounded"] is False + assert result["job_id"] is None + assert result["warning"] is None + assert result["pid"] is not None + + +def test_grep_no_matches_is_ok_not_error(exec_tool, tmp_path): + f = tmp_path / "haystack.txt" + f.write_text("apples\nbananas\n") + result = exec_tool(command=f"grep zzz {f}") + assert result["exit_code"] == 1 + assert result["semantic_status"] == "ok" + assert "No matches found" in (result["semantic_message"] or "") + + +def test_diff_files_differ_is_ok_not_error(exec_tool, tmp_path): + a = tmp_path / "a.txt" + a.write_text("hi\n") + b = tmp_path / "b.txt" + b.write_text("bye\n") + result = exec_tool(command=f"diff {a} {b}") + assert result["exit_code"] == 1 + assert result["semantic_status"] == "ok" + assert "differ" in (result["semantic_message"] or "") + + +def test_destructive_warning_for_rm_rf(exec_tool, tmp_path): + # Don't actually delete anything — point at a missing path so the + # command exits non-zero but the warning still fires from regex. + target = tmp_path / "definitely_missing_dir" + result = exec_tool(command=f"rm -rf {target}") + assert result["warning"] is not None + assert "force-remove" in result["warning"] or "recursively" in result["warning"] + + +def test_destructive_warning_drop_table(exec_tool): + # Run `true` so the test doesn't depend on echo behavior; pass the + # destructive text via stdin so the regex still matches the command. + result = exec_tool(command="echo 'DROP TABLE users;'", shell=True) + assert result["warning"] is not None + assert "drop" in result["warning"].lower() or "truncate" in result["warning"].lower() + + +def test_command_not_found(exec_tool): + result = exec_tool(command="this_command_does_not_exist_xyzzy") + assert result["exit_code"] is None or result["exit_code"] != 0 + # Either pre-spawn FileNotFoundError or shell exit 127 — both are fine + # as long as semantic_status reflects an error or the error field is set. + assert ( + result["semantic_status"] == "error" + or result.get("error") + or "not found" in (result["semantic_message"] or "").lower() + ) + + +def test_zsh_refused(exec_tool): + result = exec_tool(command="echo hi", shell=True) + # shell=True (the bool) → /bin/bash → succeeds + assert result["exit_code"] == 0 + + +def test_zsh_string_refused(): + """Calling _resolve_shell with zsh path raises ZshRefused.""" + from terminal_tools.common.limits import ZshRefused, _resolve_shell + + with pytest.raises(ZshRefused): + _resolve_shell("/bin/zsh") + with pytest.raises(ZshRefused): + _resolve_shell("/usr/local/bin/zsh") + + +def test_truncation_via_handle(exec_tool): + """Generate >256 KB of output, verify output_handle is returned.""" + # Generate ~300 KB of output + result = exec_tool( + command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'", + shell=True, + max_output_kb=128, # smaller cap to force truncation + ) + assert result["exit_code"] == 0 + assert result["stdout_truncated_bytes"] > 0 + assert result["output_handle"] is not None + assert result["output_handle"].startswith("out_") + + +def test_output_handle_round_trip(exec_tool, mcp): + from terminal_tools.output import register_output_tools + + register_output_tools(mcp) + output_get = mcp._tool_manager._tools["terminal_output_get"].fn + + result = exec_tool( + command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'", + shell=True, + max_output_kb=64, + ) + handle = result["output_handle"] + assert handle is not None + + # First page + page = output_get(output_handle=handle, since_offset=0, max_kb=64) + assert page["expired"] is False + assert len(page["data"]) > 0 + assert page["next_offset"] > 0 + + # Bogus handle + bogus = output_get(output_handle="out_doesnotexist", since_offset=0, max_kb=64) + assert bogus["expired"] is True + + +def test_timed_out_marker(exec_tool): + result = exec_tool(command="sleep 5", timeout_sec=1, auto_background_after_sec=0) + assert result["timed_out"] is True + + +def test_auto_shell_for_pipelines(exec_tool): + """Regression for the queen_technology session 152038 silent-mangling bug. + + The agent passed shell=False (default) with a pipeline command. The naive + command.split() spawned the first program with the rest as junk argv — + `ps aux | sort ...` produced "ps: error: garbage option", and `echo "..." + && ps ...` produced fake-success output where echo printed the entire + rest of the command verbatim. Fix: detect shell metacharacters and + transparently route through bash -c. + """ + # Case 1: pipeline. Was: ps spawned with "aux | sort ..." as argv → garbage option. + result = exec_tool(command="ps aux | head -1") + assert result["exit_code"] == 0, result + assert result["auto_shell"] is True + assert "USER" in result["stdout"] or "PID" in result["stdout"] + assert "garbage option" not in (result.get("stderr") or "") + + # Case 2: && + pipe + awk. Was: echo printed the whole rest of the line. + result = exec_tool( + command="echo HEADER && echo line1 | head -1", + ) + assert result["exit_code"] == 0, result + assert result["auto_shell"] is True + assert "HEADER" in result["stdout"] + assert "line1" in result["stdout"] + # The literal "&&" must NOT appear in stdout — that would mean echo + # captured it as an argument again. + assert "&&" not in result["stdout"] + + # Case 3: redirect + glob. Was: '*' passed as a literal arg to ls. + import os + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + with open(os.path.join(tmp, "a.txt"), "w") as f: + f.write("x") + with open(os.path.join(tmp, "b.txt"), "w") as f: + f.write("y") + result = exec_tool(command=f"ls {tmp}/*.txt") + assert result["exit_code"] == 0, result + assert result["auto_shell"] is True + assert "a.txt" in result["stdout"] + assert "b.txt" in result["stdout"] + + +def test_no_auto_shell_for_argv_commands(exec_tool): + """Plain argv commands (no metacharacters) should NOT auto-route to bash. + Direct exec is faster and avoids quoting hazards.""" + result = exec_tool(command="echo hello") + assert result["exit_code"] == 0 + assert result["auto_shell"] is False + assert result["stdout"].strip() == "hello" + + +def test_explicit_shell_true_unchanged(exec_tool): + """When the agent explicitly opts in via shell=True, auto_shell stays + False — auto-detection only fires for shell=False.""" + result = exec_tool(command="echo a | tr a b", shell=True) + assert result["exit_code"] == 0 + assert result["auto_shell"] is False + assert result["stdout"].strip() == "b" + + +def test_auto_promotion(exec_tool, mcp): + """Past auto_background_after_sec, the call returns auto_backgrounded=True.""" + from terminal_tools.jobs.tools import register_job_tools + + register_job_tools(mcp) + # Use a 1s budget so the test runs quickly. + start = time.monotonic() + result = exec_tool( + command="sleep 5", + auto_background_after_sec=1, + timeout_sec=10, + ) + elapsed = time.monotonic() - start + assert result["auto_backgrounded"] is True, result + assert result["job_id"] is not None + assert result["exit_code"] is None + assert elapsed < 3, "auto-promotion should return quickly past the budget" + + # Take over via terminal_job_logs + job_logs = mcp._tool_manager._tools["terminal_job_logs"].fn + log_result = job_logs(job_id=result["job_id"], wait_until_exit=True, wait_timeout_sec=10) + assert log_result["status"] == "exited" + assert log_result["exit_code"] == 0 diff --git a/tools/tests/test_terminal_tools_jobs.py b/tools/tests/test_terminal_tools_jobs.py new file mode 100644 index 00000000..29d707db --- /dev/null +++ b/tools/tests/test_terminal_tools_jobs.py @@ -0,0 +1,97 @@ +"""Job lifecycle: ring buffer offsets, signals, stdin.""" + +from __future__ import annotations + +import time + +import pytest + + +@pytest.fixture +def job_tools(mcp): + from terminal_tools.jobs.tools import register_job_tools + + register_job_tools(mcp) + return { + "start": mcp._tool_manager._tools["terminal_job_start"].fn, + "logs": mcp._tool_manager._tools["terminal_job_logs"].fn, + "manage": mcp._tool_manager._tools["terminal_job_manage"].fn, + } + + +def test_start_logs_wait_basic(job_tools): + started = job_tools["start"](command="echo first; echo second; echo third", shell=True) + assert "job_id" in started + job_id = started["job_id"] + + # Wait for completion via logs + result = job_tools["logs"](job_id=job_id, wait_until_exit=True, wait_timeout_sec=5) + assert result["status"] == "exited" + assert result["exit_code"] == 0 + assert "first" in result["data"] and "third" in result["data"] + + +def test_offset_bookkeeping(job_tools): + started = job_tools["start"]( + command="for i in 1 2 3 4 5; do echo line$i; sleep 0.1; done", + shell=True, + ) + job_id = started["job_id"] + + # Read a couple times with offset bookkeeping + seen = "" + offset = 0 + for _ in range(20): + result = job_tools["logs"](job_id=job_id, since_offset=offset, max_bytes=4096) + seen += result["data"] + offset = result["next_offset"] + if result["status"] == "exited": + # Drain anything left + tail = job_tools["logs"](job_id=job_id, since_offset=offset, max_bytes=4096) + seen += tail["data"] + break + time.sleep(0.1) + + for n in range(1, 6): + assert f"line{n}" in seen, f"missing line{n} from {seen!r}" + + +def test_merge_stderr(job_tools): + started = job_tools["start"]( + command="echo stdout1; echo stderr1 1>&2; echo stdout2", + shell=True, + merge_stderr=True, + ) + job_id = started["job_id"] + result = job_tools["logs"]( + job_id=job_id, stream="merged", wait_until_exit=True, wait_timeout_sec=5 + ) + assert "stdout1" in result["data"] + assert "stderr1" in result["data"] + + +def test_signal_term(job_tools): + started = job_tools["start"](command="sleep 30") + job_id = started["job_id"] + + # Give it a moment to actually start + time.sleep(0.2) + + result = job_tools["manage"](action="signal_term", job_id=job_id) + assert result["ok"] is True + + final = job_tools["logs"](job_id=job_id, wait_until_exit=True, wait_timeout_sec=3) + assert final["status"] == "exited" + # On SIGTERM, exit_code is -15 (subprocess convention) + assert final["exit_code"] == -15 + + +def test_list_action(job_tools): + started = job_tools["start"](command="sleep 1") + listing = job_tools["manage"](action="list") + assert any(j["job_id"] == started["job_id"] for j in listing["jobs"]) + + +def test_unknown_job_id(job_tools): + result = job_tools["logs"](job_id="job_doesnotexist", wait_until_exit=False) + assert "error" in result diff --git a/tools/tests/test_terminal_tools_pty.py b/tools/tests/test_terminal_tools_pty.py new file mode 100644 index 00000000..cc6ee0f9 --- /dev/null +++ b/tools/tests/test_terminal_tools_pty.py @@ -0,0 +1,109 @@ +"""PTY sessions: bash-on-macOS, prompt sentinel, raw I/O, zsh refusal.""" + +from __future__ import annotations + +import sys +import time + +import pytest + +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="PTY is POSIX-only") + + +@pytest.fixture +def pty_tools(mcp): + from terminal_tools.pty.tools import register_pty_tools + + register_pty_tools(mcp) + return { + "open": mcp._tool_manager._tools["terminal_pty_open"].fn, + "run": mcp._tool_manager._tools["terminal_pty_run"].fn, + "close": mcp._tool_manager._tools["terminal_pty_close"].fn, + } + + +def test_open_close_basic(pty_tools): + opened = pty_tools["open"]() + assert "session_id" in opened + assert opened["shell"] == "/bin/bash", "terminal-tools must default to bash, not zsh" + closed = pty_tools["close"](session_id=opened["session_id"]) + assert closed.get("already_closed") in (False, None) + + +def test_bash_on_darwin(): + """Even on macOS, the resolved shell is /bin/bash, not /bin/zsh.""" + from terminal_tools.common.limits import _resolve_shell + + assert _resolve_shell(True) == "/bin/bash" + + +def test_pty_run_command(pty_tools): + opened = pty_tools["open"]() + sid = opened["session_id"] + try: + result = pty_tools["run"](session_id=sid, command="echo hello-pty", timeout_sec=5) + assert result.get("timed_out") is False + assert "hello-pty" in result["output"] + assert result["prompt_after"] is True + finally: + pty_tools["close"](session_id=sid) + + +def test_pty_state_persists(pty_tools): + opened = pty_tools["open"]() + sid = opened["session_id"] + try: + pty_tools["run"](session_id=sid, command="MY_VAR=42") + result = pty_tools["run"](session_id=sid, command="echo $MY_VAR", timeout_sec=3) + assert "42" in result["output"] + finally: + pty_tools["close"](session_id=sid) + + +def test_raw_send_then_read_only(pty_tools): + """Drive the python REPL via raw_send + read_only.""" + opened = pty_tools["open"]() + sid = opened["session_id"] + try: + # Launch python with our own prompt regex + pty_tools["run"]( + session_id=sid, + command="python3 -q", + expect=r">>>\s*$", + timeout_sec=10, + ) + pty_tools["run"](session_id=sid, command="x = 7\n", raw_send=True) + pty_tools["run"](session_id=sid, command="print(x*x)\n", raw_send=True) + time.sleep(0.5) + drained = pty_tools["run"](session_id=sid, read_only=True, timeout_sec=2) + assert "49" in drained["output"] + finally: + pty_tools["close"](session_id=sid, force=True) + + +def test_session_busy(pty_tools): + """Concurrent run() calls on the same session return 'session busy'.""" + import threading + + opened = pty_tools["open"]() + sid = opened["session_id"] + try: + results = [] + + def run_long(): + results.append(pty_tools["run"](session_id=sid, command="sleep 2", timeout_sec=5)) + + t = threading.Thread(target=run_long) + t.start() + time.sleep(0.2) + # Concurrent call should fail + result = pty_tools["run"](session_id=sid, command="echo nope", timeout_sec=1) + assert "error" in result and "busy" in result["error"].lower() + t.join(timeout=10) + finally: + pty_tools["close"](session_id=sid, force=True) + + +def test_unknown_session(pty_tools): + result = pty_tools["run"](session_id="pty_doesnotexist", command="ls") + assert "error" in result diff --git a/tools/tests/test_terminal_tools_search.py b/tools/tests/test_terminal_tools_search.py new file mode 100644 index 00000000..c8336991 --- /dev/null +++ b/tools/tests/test_terminal_tools_search.py @@ -0,0 +1,58 @@ +"""terminal_rg + terminal_find — basic functionality, structured output.""" + +from __future__ import annotations + +import shutil + +import pytest + + +@pytest.fixture +def search_tools(mcp): + from terminal_tools.search.tools import register_search_tools + + register_search_tools(mcp) + return { + "rg": mcp._tool_manager._tools["terminal_rg"].fn, + "find": mcp._tool_manager._tools["terminal_find"].fn, + } + + +@pytest.mark.skipif(not shutil.which("rg"), reason="ripgrep not installed") +def test_rg_finds_pattern(search_tools, tmp_path): + (tmp_path / "a.txt").write_text("hello\nworld\nfoo\n") + (tmp_path / "b.txt").write_text("bar\nworld\n") + + result = search_tools["rg"](pattern="world", path=str(tmp_path)) + assert result["total"] >= 2 + paths = {m["path"] for m in result["matches"]} + assert any("a.txt" in p for p in paths) + + +@pytest.mark.skipif(not shutil.which("rg"), reason="ripgrep not installed") +def test_rg_no_matches(search_tools, tmp_path): + (tmp_path / "a.txt").write_text("hello\n") + result = search_tools["rg"](pattern="zzz_no_match_zzz", path=str(tmp_path)) + assert result["total"] == 0 + assert result["matches"] == [] + + +def test_find_by_name(search_tools, tmp_path): + (tmp_path / "alpha.log").write_text("a") + (tmp_path / "beta.log").write_text("b") + (tmp_path / "ignore.txt").write_text("c") + + result = search_tools["find"](path=str(tmp_path), name="*.log") + assert result["count"] == 2 + assert all(p.endswith(".log") for p in result["paths"]) + + +def test_find_by_type_dir(search_tools, tmp_path): + (tmp_path / "sub").mkdir() + (tmp_path / "file.txt").write_text("x") + + result = search_tools["find"](path=str(tmp_path), type_filter="d") + paths = result["paths"] + # tmp_path itself + sub + assert any(p.endswith("sub") for p in paths) + assert not any(p.endswith("file.txt") for p in paths) diff --git a/tools/tests/test_terminal_tools_security.py b/tools/tests/test_terminal_tools_security.py new file mode 100644 index 00000000..5401d109 --- /dev/null +++ b/tools/tests/test_terminal_tools_security.py @@ -0,0 +1,102 @@ +"""Security/policy tests: zsh refusal, env stripping, destructive catalog.""" + +from __future__ import annotations + +import pytest + + +def test_resolve_shell_rejects_zsh(): + from terminal_tools.common.limits import ZshRefused, _resolve_shell + + for path in ("/bin/zsh", "/usr/bin/zsh", "/usr/local/bin/zsh", "ZSH"): + with pytest.raises(ZshRefused): + _resolve_shell(path) + + +def test_resolve_shell_accepts_bash(): + from terminal_tools.common.limits import _resolve_shell + + assert _resolve_shell(True) == "/bin/bash" + assert _resolve_shell("/bin/bash") == "/bin/bash" + assert _resolve_shell(False) is None + + +def test_sanitized_env_strips_zsh_vars(monkeypatch): + from terminal_tools.common.limits import sanitized_env + + monkeypatch.setenv("ZDOTDIR", "/some/path") + monkeypatch.setenv("ZSH_VERSION", "5.9") + monkeypatch.setenv("ZSH_NAME", "zsh") + monkeypatch.setenv("PATH", "/usr/bin:/bin") + + env = sanitized_env() + assert "ZDOTDIR" not in env + assert "ZSH_VERSION" not in env + assert "ZSH_NAME" not in env + # Non-zsh vars survive + assert env["PATH"] == "/usr/bin:/bin" + + +def test_destructive_warning_catalog(): + from terminal_tools.common.destructive_warning import get_warning + + cases = [ + ("rm -rf /tmp/foo", "force-remove"), + ("rm -r /tmp/foo", "recursively remove"), + ("git reset --hard HEAD~1", "discard"), + ("git push --force origin main", "remote history"), + ("git push -f origin main", "remote history"), + ("git commit --amend -m 'x'", "rewrite"), + ("DROP TABLE users;", "drop or truncate"), + ("DELETE FROM users;", "delete rows"), + ("kubectl delete pod foo", "Kubernetes"), + ("terraform destroy", "Terraform"), + ] + for cmd, expected in cases: + warning = get_warning(cmd) + assert warning is not None, f"expected warning for {cmd!r}" + assert expected in warning, f"warning {warning!r} should mention {expected!r}" + + +def test_destructive_warning_clean_commands(): + from terminal_tools.common.destructive_warning import get_warning + + for cmd in ["ls -la", "echo hi", "git status", "git commit -m 'x'"]: + assert get_warning(cmd) is None, f"unexpected warning for {cmd!r}" + + +def test_semantic_exit_grep(): + from terminal_tools.common.semantic_exit import classify + + status, msg = classify("grep foo /tmp/x", 0) + assert status == "ok" + status, msg = classify("grep foo /tmp/x", 1) + assert status == "ok" + assert "No matches" in msg + status, msg = classify("grep foo /tmp/x", 2) + assert status == "error" + + +def test_semantic_exit_default(): + from terminal_tools.common.semantic_exit import classify + + status, msg = classify("ls", 0) + assert status == "ok" + assert msg is None + status, msg = classify("ls", 1) + assert status == "error" + + +def test_semantic_exit_signaled(): + from terminal_tools.common.semantic_exit import classify + + status, msg = classify("sleep 999", -15, signaled=True) + assert status == "signal" + + +def test_semantic_exit_timed_out(): + from terminal_tools.common.semantic_exit import classify + + status, msg = classify("sleep 999", None, timed_out=True) + assert status == "error" + assert "timed out" in msg.lower() diff --git a/tools/tests/test_terminal_tools_smoke.py b/tools/tests/test_terminal_tools_smoke.py new file mode 100644 index 00000000..4abaddce --- /dev/null +++ b/tools/tests/test_terminal_tools_smoke.py @@ -0,0 +1,33 @@ +"""Smoke test: load the server module, register tools, assert all 10 land.""" + +from __future__ import annotations + +EXPECTED_TOOLS = { + "terminal_exec", + "terminal_job_start", + "terminal_job_logs", + "terminal_job_manage", + "terminal_pty_open", + "terminal_pty_run", + "terminal_pty_close", + "terminal_rg", + "terminal_find", + "terminal_output_get", +} + + +def test_register_terminal_tools_lands_all_ten(mcp): + from terminal_tools import register_terminal_tools + + names = register_terminal_tools(mcp) + assert set(names) == EXPECTED_TOOLS, ( + f"missing: {EXPECTED_TOOLS - set(names)}, extra: {set(names) - EXPECTED_TOOLS}" + ) + + +def test_all_tools_have_terminal_prefix(mcp): + from terminal_tools import register_terminal_tools + + names = register_terminal_tools(mcp) + for n in names: + assert n.startswith("terminal_"), f"tool {n!r} missing terminal_ prefix"