Merge remote-tracking branch 'origin/feature/sync-20260430' into feat/file-ops

This commit is contained in:
Richard Tang
2026-05-01 07:42:20 -07:00
42 changed files with 4244 additions and 18 deletions
+14 -2
View File
@@ -219,8 +219,20 @@ async def _captioning_chain(
logger.warning("vision_fallback failed; retrying configured model")
if result := await caption_tool_image(intent, image_content):
return result
logger.warning("vision_fallback retry failed; trying gemini-3-flash-preview")
return await caption_tool_image(intent, image_content, model_override="gemini/gemini-3-flash-preview")
# Match the configured model's proxy prefix so the override is routed
# through the same endpoint with the same auth shape. Without this,
# a Hive subscriber's `hive/...` config would override to
# `gemini/...` — which sends Google's Gemini protocol to the
# Anthropic-compatible Hive proxy (404), not what we want.
configured = (get_vision_fallback_model() or "").lower()
if configured.startswith("hive/"):
override = "hive/gemini-3-flash-preview"
elif configured.startswith("kimi/"):
override = "kimi/gemini-3-flash-preview"
else:
override = "gemini/gemini-3-flash-preview"
logger.warning("vision_fallback retry failed; trying %s", override)
return await caption_tool_image(intent, image_content, model_override=override)
# Pattern for detecting context-window-exceeded errors across LLM providers.
@@ -211,10 +211,12 @@ async def caption_tool_image(
"max_tokens": 8192,
"timeout": timeout_s,
}
# Pass api_key directly only when there are no proxy-rewritten
# extra_headers carrying the auth (e.g. the gemini-3-flash override
# path goes direct to Gemini, not through the Hive proxy).
if api_key and not extra_headers:
# Always pass api_key when we have one, even alongside proxy-rewritten
# extra_headers. litellm's anthropic handler refuses to dispatch
# without an api_key (it sends it as x-api-key); the proxy itself
# authenticates via the Authorization: Bearer header in
# extra_headers. Both are needed — matches LiteLLMProvider's path.
if api_key:
kwargs["api_key"] = api_key
if rewritten_base:
kwargs["api_base"] = rewritten_base
@@ -1,3 +1,3 @@
{
"include": ["gcu-tools", "hive_tools"]
"include": ["gcu-tools", "hive_tools", "terminal-tools"]
}
@@ -51,10 +51,14 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
"hashline_edit",
],
# Shell + process control — engineering personas only.
# Includes the legacy coder-tools commands (run_command, bash_*) and
# the full terminal-tools MCP server (foreground exec with auto-promotion,
# background jobs, persistent PTY sessions, ripgrep/find).
"shell": [
"execute_command_tool",
"bash_kill",
"bash_output",
"@server:terminal-tools",
],
# Tabular data. CSV/Excel read/write + DuckDB SQL.
"data": [
+47 -11
View File
@@ -51,6 +51,10 @@ _DEFAULT_LOCAL_SERVERS: dict[str, dict[str, Any]] = {
"description": "File I/O: read, write, edit, search, list, run commands",
"args": ["run", "python", "files_server.py", "--stdio"],
},
"terminal-tools": {
"description": "Terminal capabilities: process exec, background jobs, PTY sessions, fs search. Bash-only on POSIX.",
"args": ["run", "python", "terminal_tools_server.py", "--stdio"],
},
}
# Aliases that earlier versions of ensure_defaults wrote under the wrong name.
@@ -58,6 +62,10 @@ _DEFAULT_LOCAL_SERVERS: dict[str, dict[str, Any]] = {
# name so the active agents (queen, credential_tester) can find their tools.
_STALE_DEFAULT_ALIASES: dict[str, str] = {
"hive_tools": "hive-tools",
# 2026-04-30: shell-tools renamed to terminal-tools. Drop the stale name
# on next ensure_defaults() so the queen's allowlist (which now includes
# @server:terminal-tools) actually finds a server with the new name.
"terminal-tools": "shell-tools",
}
@@ -77,7 +85,30 @@ class MCPRegistry:
# ── Initialization ──────────────────────────────────────────────
def initialize(self) -> None:
"""Create directory structure and default files if missing."""
"""Create directory structure, default files, and seed bundled servers.
Every read path (queen orchestrator, pipeline stage, CLI, routes)
calls this keeping the seeding here means a fresh ``HIVE_HOME``
(e.g. the desktop's per-user dir under ``~/.config/Hive/users/<hash>/``
or ``~/Library/Application Support/Hive/users/<hash>/``) is always
populated with ``hive_tools`` / ``gcu-tools`` / ``files-tools`` /
``shell-tools`` before any agent code reads ``installed.json``.
Without this, ``load_agent_selection()`` resolves an empty registry
and emits "Server X requested but not installed" warnings even
though the server is bundled.
Idempotent already-installed entries are left untouched.
"""
self._bootstrap_io()
self._seed_defaults()
def _bootstrap_io(self) -> None:
"""Create the registry directory + empty config/installed files.
Split out from ``initialize()`` so ``_seed_defaults()`` can call it
without re-entering the seeding logic (which would recurse via
``_read_installed()`` ``initialize()``).
"""
self._base.mkdir(parents=True, exist_ok=True)
self._cache_dir.mkdir(parents=True, exist_ok=True)
@@ -88,21 +119,26 @@ class MCPRegistry:
self._write_json(self._installed_path, {"servers": {}})
def ensure_defaults(self) -> list[str]:
"""Seed the built-in local MCP servers (hive-tools, gcu-tools, files-tools).
"""Public alias kept for the ``hive mcp init`` CLI command.
Idempotent servers already present are left untouched. Skips seeding
entirely when the source-tree ``tools/`` directory cannot be located
(e.g. when Hive is installed from a wheel rather than a checkout).
Returns the list of names that were newly registered.
Returns the list of newly-registered server names so the CLI can
print them. Same idempotent seeding logic as ``initialize()``.
"""
self.initialize()
self._bootstrap_io()
return self._seed_defaults()
def _seed_defaults(self) -> list[str]:
"""Idempotently register the bundled default local servers.
Skips entirely when the source-tree ``tools/`` directory cannot
be located (e.g. wheel installs). Returns the list of names that
were newly registered.
"""
# parents: [0]=loader, [1]=framework, [2]=core, [3]=repo root
tools_dir = Path(__file__).resolve().parents[3] / "tools"
if not tools_dir.is_dir():
logger.debug(
"MCPRegistry.ensure_defaults: tools dir %s missing; skipping default seed",
"MCPRegistry._seed_defaults: tools dir %s missing; skipping default seed",
tools_dir,
)
return []
@@ -119,7 +155,7 @@ class MCPRegistry:
for canonical, stale in _STALE_DEFAULT_ALIASES.items():
if stale in existing and canonical not in existing:
logger.info(
"MCPRegistry.ensure_defaults: removing stale alias '%s' (canonical: '%s')",
"MCPRegistry._seed_defaults: removing stale alias '%s' (canonical: '%s')",
stale,
canonical,
)
@@ -142,7 +178,7 @@ class MCPRegistry:
)
added.append(name)
except MCPError as exc:
logger.warning("MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc)
logger.warning("MCPRegistry._seed_defaults: failed to seed '%s': %s", name, exc)
if added:
logger.info("MCPRegistry: seeded default local servers: %s", added)
@@ -44,6 +44,9 @@ class McpRegistryStage(PipelineStage):
from framework.loader.mcp_registry import MCPRegistry
from framework.orchestrator.files import FILES_MCP_SERVER_NAME
# Bundled defaults (hive_tools / gcu-tools / files-tools / shell-tools)
# are seeded inside MCPRegistry.initialize(); resolve_for_agent below
# will find them even on a fresh HIVE_HOME.
registry = MCPRegistry()
mcp_loaded = False
@@ -0,0 +1,139 @@
---
name: hive.terminal-tools-foundations
description: Required reading whenever any shell_* tool is available. Teaches the foreground/background dichotomy (terminal_exec auto-promotes past 30s, returns a job_id you poll with terminal_job_logs), the standard envelope shape (exit_code, stdout, stdout_truncated_bytes, output_handle, semantic_status, warning, auto_backgrounded, job_id), output handle pagination via terminal_output_get, when to read semantic_status instead of raw exit_code (grep/rg/find/diff/test exit 1 is NOT an error), the destructive-warning surface (rm -rf, git push --force, DROP TABLE), tool preference (use files-tools / gcu-tools / hive_tools before raw shell), and the bash-only-on-macOS policy. Skipping this leads to "tool returned no output" surprises, orphaned jobs, and panic over benign grep exit codes.
metadata:
author: hive
type: preset-skill
version: "1.0"
---
# terminal-tools — foundations
These tools give you a real terminal: foreground exec with smart envelopes, background jobs with offset-based log streaming, persistent PTY shells, and filesystem search. Bash-only on POSIX.
## Tool preference (read first)
Before reaching for terminal-tools, check whether a higher-level tool already covers the task. Shell is for system operations the other servers don't reach.
- **Reading files** → `files-tools.read_file` (handles size, paging, line-numbered output) — NOT `terminal_exec("cat ...")`
- **Editing files** → `files-tools.edit_file` (atomic patch with diff verification) — NOT `terminal_exec("sed -i ...")`
- **Writing files** → `files-tools.write_file` — NOT `terminal_exec("echo > ...")`
- **In-project search** → `files-tools.search_files` (project-scoped, code-aware) — use `terminal_rg` only for raw paths outside the project (`/var/log`, `/etc`)
- **Browser / web pages** → `gcu-tools.browser_*` for rendered pages — NOT `terminal_exec("curl ...")`
- **Web search** → `hive_tools.web_search` — NOT scraping
- **System operations** (process exec, jobs, PTYs, raw fs search) → terminal-tools. This is its territory.
## The standard envelope
Every spawn-style call (`terminal_exec`, the auto-promoted job state) returns this shape:
```jsonc
{
"exit_code": 0, // null when auto-backgrounded or pre-spawn error
"stdout": "...", // decoded, truncated to max_output_kb (default 256 KB)
"stderr": "...",
"stdout_truncated_bytes": 0, // > 0 means more is in output_handle
"stderr_truncated_bytes": 0,
"runtime_ms": 42,
"pid": 12345,
"output_handle": null, // "out_<hex>" when truncated — paginate with terminal_output_get
"timed_out": false,
"semantic_status": "ok", // "ok" | "signal" | "error" — read THIS, not just exit_code
"semantic_message": null, // e.g. "No matches found" for grep exit 1
"warning": null, // e.g. "may force-remove files" for rm -rf
"auto_backgrounded": false,
"job_id": null // set when auto_backgrounded=true
}
```
## Auto-promotion (the core mental model)
`terminal_exec` runs commands in the foreground until the **auto-background budget** (default 30s) elapses. Past that point, the process is silently transferred to a background job and the call returns immediately with:
```jsonc
{ "auto_backgrounded": true, "exit_code": null, "job_id": "job_<hex>", ... }
```
When you see `auto_backgrounded: true`, **pivot to polling**. The job is still running:
```
terminal_job_logs(job_id, since_offset=0, wait_until_exit=true, wait_timeout_sec=60)
→ blocks server-side until the job exits or the timeout, returns logs + status
```
You're not failing — you're freed up to do other work while the long task runs.
To force pure-foreground (kill on `timeout_sec`), pass `auto_background_after_sec=0`. Use this when you genuinely don't want a background job (small commands where promotion would surprise you).
## Semantic exit codes — read `semantic_status`, not raw `exit_code`
Several common commands use exit 1 for legitimate non-error states:
| Command | exit 0 | exit 1 |
|---|---|---|
| `grep` / `rg` | matches found | **no matches** (not an error) |
| `find` | success | **some dirs unreadable** (informational) |
| `diff` | identical | **files differ** (informational) |
| `test` / `[` | true | **false** (informational) |
For these, `semantic_status` will be `"ok"` even when `exit_code == 1`, with `semantic_message` describing why ("No matches found"). For everything else, `semantic_status` defaults to `"ok"` on 0 and `"error"` on nonzero.
**Rule**: always check `semantic_status` first. Only fall back to `exit_code` when you need the exact number (e.g. distinguishing `make` errors).
## Destructive warnings — re-read your command
The envelope's `warning` field is set when the command matches a known destructive pattern (`rm -rf`, `git push --force`, `git reset --hard`, `DROP TABLE`, `kubectl delete`, `terraform destroy`, etc.). The command **still ran** — the warning is informational. Use it as a "did I mean to do that?" prompt before trusting subsequent steps that depend on the side effect.
If a `warning` appears unexpectedly, stop and verify: was the destructive action intended, or did a path/glob slip in?
## Output handles — never lose output
When `stdout_truncated_bytes > 0` or `stderr_truncated_bytes > 0`, the inline output was capped at `max_output_kb` (default 256 KB). The full bytes are stashed under `output_handle` for **5 minutes**. Paginate with:
```
terminal_output_get(output_handle, since_offset=0, max_kb=64)
→ { data, offset, next_offset, eof, expired }
```
Track `next_offset` across calls. If `expired: true`, re-run the command (the handle's TTL has lapsed).
The store has a 64 MB cap with LRU eviction. For huge outputs, prefer `terminal_job_start` + `terminal_job_logs` polling (4 MB ring buffer per stream, infinite total throughput).
## Bash, not zsh — even on macOS
`terminal_exec` and `terminal_pty_open` always invoke `/bin/bash`. The user's `$SHELL` is ignored. Explicit `shell="/bin/zsh"` is **rejected** with a clear error. This is a deliberate security stance, not aesthetic — zsh has command/builtin classes (`zmodload`, `=cmd` expansion, `zpty`, `ztcp`, `zf_*`) that bypass bash-shaped checks. The `terminal-tools-pty-sessions` skill explains the implications for PTY sessions specifically.
`ZDOTDIR` and `ZSH_*` env vars are stripped before exec to prevent zsh dotfiles leaking in. Bash dotfiles still apply when invoked interactively (e.g. PTY sessions use `bash --norc --noprofile` to keep things predictable).
## Pipelines and complex commands
Pipes (`|`), redirects (`>`, `<`, `>>`), conditionals (`&&`, `||`, `;`), and globs (`*`, `?`, `[`) are detected automatically. You can pass them with the default `shell=False` and the runtime will transparently route through `/bin/bash -c` and surface `auto_shell: true` in the envelope:
```
terminal_exec("ps aux | sort -k3 -rn | head -40")
→ { exit_code: 0, stdout: "...", auto_shell: true, ... }
```
For simple argv commands (no metacharacters) `shell=False` is faster and direct-execs the binary. For commands with shell features but no metacharacters that the detector catches (rare — exotic bash builtins, here-strings), pass `shell=True` explicitly:
```
terminal_exec("set -e; complicated bash logic", shell=True)
```
Quoted strings work either way — the detector uses `shlex.split` which handles `"quoted args with spaces"` correctly.
## When to use what (cheat sheet)
| Need | Tool |
|---|---|
| One-shot command, ≤30s | `terminal_exec` |
| One-shot command, might be longer | `terminal_exec` (auto-promotes) |
| Long-running job from the start | `terminal_job_start` |
| State across calls (cd, env, REPL) | `terminal_pty_open` + `terminal_pty_run` |
| Search file contents (raw paths) | `terminal_rg` |
| Find files by predicate | `terminal_find` |
| Retrieve truncated output | `terminal_output_get` |
| Tree / stat / du | `terminal_exec("ls -la"/"stat foo"/"du -sh path")` |
| HTTP / DNS / ping / archives | `terminal_exec("curl ..."/"dig ..."/"tar xzf ...")` |
See `references/exit_codes.md` for the full POSIX + signal-induced + semantic catalog.
@@ -0,0 +1,50 @@
# Exit code reference
## POSIX conventions
| Code | Meaning |
|---|---|
| 0 | Success |
| 1 | General error / catchall |
| 2 | Misuse of shell builtins, syntax error |
| 126 | Command found but not executable |
| 127 | Command not found |
| 128 | Invalid argument to `exit` |
| 128 + N | Killed by signal N |
| 130 | Killed by SIGINT (Ctrl-C) |
| 137 | Killed by SIGKILL |
| 143 | Killed by SIGTERM |
| 255 | Exit status out of range |
When `exit_code < 0` in the envelope, the process was killed by a signal: `abs(exit_code)` is the signal number (subprocess uses negative codes for signaled exits, separate from the `128 + N` shell convention).
## Semantic exits — when exit 1 is NOT an error
terminal-tools encodes these in `semantic_status`. The agent should read `semantic_status` first.
| Command | Code 0 | Code 1 | Code ≥2 |
|---|---|---|---|
| `grep` / `rg` / `ripgrep` | matches found | **no matches** (ok) | error |
| `find` | success | **some dirs unreadable** (ok) | error |
| `diff` | files identical | **files differ** (ok) | error |
| `test` / `[` | condition true | **condition false** (ok) | error |
For any command not in this table, the default convention applies (0 = ok, nonzero = error).
## When `exit_code` is `null`
- `auto_backgrounded: true` — the process is still running under a `job_id`. Poll with `terminal_job_logs`.
- Pre-spawn error (command not found, exec failed) — see `error` field in the envelope.
- `timed_out: true` and the process refused to die — extremely rare; the kernel has the answer.
## Common signal-induced exits
| Signal | Number | Subprocess exit | Shell exit | Meaning |
|---|---|---|---|---|
| SIGHUP | 1 | -1 | 129 | Terminal hangup |
| SIGINT | 2 | -2 | 130 | Interrupt (Ctrl-C) |
| SIGQUIT | 3 | -3 | 131 | Quit (Ctrl-\\) |
| SIGKILL | 9 | -9 | 137 | Forced kill (uncatchable) |
| SIGTERM | 15 | -15 | 143 | Polite termination |
| SIGSEGV | 11 | -11 | 139 | Segmentation fault |
| SIGABRT | 6 | -6 | 134 | Abort (assertion failed, etc.) |
@@ -0,0 +1,96 @@
---
name: hive.terminal-tools-fs-search
description: Use terminal_rg / terminal_find when you need raw filesystem search outside the project tree — system configs, /var/log, /etc, archive contents — or when files-tools.search_files is too project-scoped. Teaches the rg vs find vs terminal_exec("ls/du/tree") split, common rg flag combos for code/logs/configs, find predicates for mtime/size/type queries, and the rule that for tree views or single-file stat info you should just use terminal_exec instead of inventing a tool. Read before reaching for raw shell to grep or find anything.
metadata:
author: hive
type: preset-skill
version: "1.0"
---
# Filesystem search
terminal-tools provides two structured search tools: `terminal_rg` (ripgrep for content) and `terminal_find` (find for predicates). Everything else (tree, stat, du) is just `terminal_exec`.
## When to use what
| Task | Tool |
|---|---|
| Find code/text matching a pattern in your **project** | `files-tools.search_files` (project-aware, ranks by relevance) |
| Find code/text matching a pattern in `/var/log`, `/etc`, archives, system dirs | `terminal_rg` |
| Find files matching name/glob/predicate | `terminal_find` |
| List a directory | `terminal_exec("ls -la /path")` |
| Tree view | `terminal_exec("tree -L 2 /path")` |
| Single-path stat | `terminal_exec("stat /path")` |
| Disk usage | `terminal_exec("du -sh /path")` or `terminal_exec("du -h --max-depth=2 /")` |
| Count matches across files | `terminal_rg(pattern, count=True via extra_args=["-c"])` |
## `terminal_rg` — content search
ripgrep is fast, gitignore-aware, and has a deep flag surface. The structured wrapper exposes the most useful flags directly; `extra_args` covers the rest.
### Common patterns
```
# All Python files containing "TODO"
terminal_rg(pattern="TODO", path=".", type_filter="py")
# Case-insensitive, with context
terminal_rg(pattern="error", path="/var/log", ignore_case=True, context=2)
# Search hidden files (rg ignores them by default)
terminal_rg(pattern="api_key", path="~", hidden=True)
# Don't respect .gitignore (find files git would ignore)
terminal_rg(pattern="generated", path=".", no_ignore=True)
# Multi-line pattern (e.g., function definitions spanning lines)
terminal_rg(pattern=r"def\s+\w+\(.*\n.*\n", path="src", extra_args=["--multiline"])
# Specific filename glob
terminal_rg(pattern="version", path=".", glob="*.toml")
```
### rg flag idioms
| Flag | Effect |
|---|---|
| `-tpy` (`type_filter="py"`) | Only Python files |
| `-uu` | Don't respect any ignores (incl. `.git/`) |
| `--multiline` (`extra_args`) | Allow regex spanning lines |
| `--max-count` (`max_count`) | Stop after N matches per file |
| `--max-depth` (`max_depth`) | Limit recursion |
| `-w` (`extra_args`) | Whole word match |
| `-F` (`extra_args`) | Fixed string (no regex) |
See `references/ripgrep_cheatsheet.md` for the long form.
## `terminal_find` — predicate search
`find` excels at "files matching N criteria". The wrapper surfaces the most common predicates; combine via the structured arguments.
```
# All .log files modified in the last 7 days, larger than 1MB
terminal_find(path="/var/log", iname="*.log", mtime_days=7, size_kb_min=1024)
# All directories named ".git" (find Git repos under a tree)
terminal_find(path="~/projects", name=".git", type_filter="d")
# Only the top three levels
terminal_find(path="/etc", max_depth=3, type_filter="f")
# Symlinks
terminal_find(path=".", type_filter="l")
```
See `references/find_predicates.md` for combinations not directly exposed.
## Output truncation
Both tools return `truncated: true` when their output exceeded the inline cap. For `terminal_rg`, this means matches were dropped (refine the pattern or narrow the path); for `terminal_find`, results past `max_results` (default 1000) are dropped. Tighten predicates rather than raising the cap.
## Anti-patterns
- **Don't `terminal_rg` your project tree** — `files-tools.search_files` is project-aware and ranks results.
- **Don't reach for `terminal_find` to list one directory** — `terminal_exec("ls -la /path")` is shorter.
- **Don't use `terminal_exec("grep ...")`** when `terminal_rg` exists — rg is faster, gitignore-aware, and returns structured matches.
- **Don't use `terminal_exec("find ...")`** to invent your own predicate combinations — use `terminal_find` and report missing capabilities.
@@ -0,0 +1,78 @@
# find predicate reference
The `terminal_find` wrapper exposes name/iname, type, mtime_days, size bounds, max_depth, max_results. For combinations beyond that, drop to `terminal_exec("find ...")`.
## Time predicates
| Need | find predicate |
|---|---|
| Modified within N days | `-mtime -N` (wrapper: `mtime_days=N`) |
| Modified more than N days ago | `-mtime +N` |
| Modified exactly N days ago | `-mtime N` |
| Accessed within N days | `-atime -N` |
| Inode changed within N days | `-ctime -N` |
| Modified in last N minutes | `-mmin -N` |
| Newer than reference file | `-newer ref` |
## Size predicates
| Need | find predicate |
|---|---|
| Bigger than N kilobytes | `-size +Nk` (wrapper: `size_kb_min`) |
| Smaller than N kilobytes | `-size -Nk` (wrapper: `size_kb_max`) |
| Exactly N kilobytes | `-size Nk` |
| Bigger than N megabytes | `-size +NM` |
| Empty files | `-empty` |
## Type predicates
| Need | find predicate |
|---|---|
| Regular file | `-type f` (wrapper: `type_filter="f"`) |
| Directory | `-type d` (wrapper: `type_filter="d"`) |
| Symlink | `-type l` (wrapper: `type_filter="l"`) |
| Block device | `-type b` |
| Character device | `-type c` |
| FIFO | `-type p` |
| Socket | `-type s` |
## Permission predicates
| Need | find predicate |
|---|---|
| Owned by user | `-user alice` |
| Owned by group | `-group dev` |
| Permission bits exact | `-perm 644` |
| Has any of these bits | `-perm /u+x` |
| Has all of these bits | `-perm -u+x` |
| Readable by current user | `-readable` |
| Writable | `-writable` |
| Executable | `-executable` |
## Composing
`find` evaluates predicates left-to-right with implicit AND. For OR, use `\(`...\` or .
```
# .log OR .txt (drop to terminal_exec for OR)
terminal_exec(r"find /path \( -name '*.log' -o -name '*.txt' \) -type f", shell=True)
# NOT in a directory called node_modules
terminal_exec("find . -path '*/node_modules' -prune -o -name '*.js' -print", shell=True)
```
## Actions
| Need | predicate |
|---|---|
| Print path (default) | (implicit `-print`) |
| Print null-separated | `-print0` (for piping to xargs -0) |
| Delete | `-delete` (DANGEROUS — use terminal_exec with explicit confirmation) |
| Run command per match | `-exec cmd {} \;` (drop to terminal_exec) |
| Run command, batched | `-exec cmd {} +` |
## When NOT to use find
- **One directory listing**: `terminal_exec("ls -la /path")`
- **Recursive grep**: `terminal_rg`
- **Count files**: `terminal_exec("find /path -type f | wc -l")`
@@ -0,0 +1,70 @@
# ripgrep cheatsheet
For when the structured `terminal_rg` flags don't cover the case. Pass via `extra_args=[...]`.
## Filtering
| Need | Flag |
|---|---|
| Whole word | `-w` |
| Fixed string (no regex) | `-F` |
| Match files only (paths, not lines) | `-l` |
| Count matches per file | `-c` |
| Print only filenames with no matches | `--files-without-match` |
| Exclude binary files | (default) |
| Include binaries | `--binary` |
| Search archives transparently | (rg doesn't — extract first) |
## Output shape
| Need | Flag |
|---|---|
| Show only matched part | `-o` |
| Show byte offset of match | `-b` |
| No filename prefix | `-N` (or pipe through awk) |
| Color always (for piping into a colorizer) | `--color=always` |
| JSON output | (the wrapper already uses `--json` internally) |
## Boundaries
| Need | Flag |
|---|---|
| Line-by-line (default) | (default) |
| Multi-line regex | `--multiline` (or `-U`) |
| Multi-line dotall (`.` matches `\n`) | `--multiline-dotall` |
| Crlf line endings | `--crlf` |
## Path control
| Need | Flag |
|---|---|
| Follow symlinks | `-L` |
| Don't follow | (default) |
| Search hidden | `-.` (also expressed as `hidden=True`) |
| Don't respect any ignores | `-uuu` |
| Glob include | `-g 'pattern'` (also `glob="..."`) |
| Glob exclude | `-g '!pattern'` |
## Performance
| Need | Flag |
|---|---|
| One thread | `-j 1` |
| Smaller mmap chunks | `--mmap` (default behavior usually fine) |
| Per-file match cap | `-m N` (also `max_count=N`) |
## Common composed queries
```
# Find unused imports in Python
terminal_rg(pattern=r"^import\s+\w+$", path="src", type_filter="py")
# All TODO/FIXME/XXX with file:line
terminal_rg(pattern=r"\b(TODO|FIXME|XXX)\b", path=".", extra_args=["-n"])
# Functions defined at module top-level
terminal_rg(pattern=r"^def\s+\w+", path=".", type_filter="py")
# Lines that DON'T match a pattern (filtered through awk)
# rg can't invert at line level; use terminal_exec with grep -v
```
@@ -0,0 +1,110 @@
---
name: hive.terminal-tools-job-control
description: Use when launching anything that runs longer than a minute, anything that streams logs, anything you want to keep running while doing other work — or when terminal_exec auto-backgrounded on you and returned a job_id. Teaches the start→poll→wait pattern with terminal_job_logs offset bookkeeping, the `wait_until_exit=True` blocking-poll idiom, the truncated_bytes_dropped resumption signal, the merge_stderr decision, the SIGINT→SIGTERM→SIGKILL escalation ladder via terminal_job_manage, and the hard rule that jobs die when the terminal-tools server restarts. Read before calling terminal_job_start, or right after terminal_exec auto-backgrounded.
metadata:
author: hive
type: preset-skill
version: "1.0"
---
# Background job control
Background jobs are how you do things that take time without blocking your conversation. Three tools cover the surface: `terminal_job_start`, `terminal_job_logs`, `terminal_job_manage`.
## When to use a job
- Builds, deploys, long tests
- Processes you want to monitor (streaming a log file, a dev server)
- Anything that auto-backgrounded from `terminal_exec` (you have a `job_id`; pivot to this skill's idioms)
For one-shot work expected to finish quickly, `terminal_exec` is simpler. The auto-promotion mechanic in `terminal_exec` is your safety net — start with `terminal_exec`, take over with this skill if needed.
## Lifecycle
```
terminal_job_start(command, ...)
→ { job_id, pid, started_at }
terminal_job_logs(job_id, since_offset=0, max_bytes=64000)
→ { data, offset, next_offset, status: "running"|"exited", exit_code, ... }
# Repeat with since_offset = previous next_offset until status == "exited"
# Or block once with wait_until_exit=True:
terminal_job_logs(job_id, since_offset=N, wait_until_exit=True, wait_timeout_sec=60)
→ blocks server-side until exit or timeout
```
After exit, the job is retained for inspection (`terminal_job_manage(action="list")`) until evicted by FIFO (50 most recent exits kept).
## Offset bookkeeping — the only rule that matters
The job's output lives in a 4 MB ring buffer per stream. Each call to `terminal_job_logs` returns:
- `data` — bytes between `since_offset` and `next_offset`
- `next_offset` — pass this as `since_offset` on your next call
- `truncated_bytes_dropped` — non-zero when your `since_offset` was older than the ring's floor (you fell behind)
**Always carry `next_offset` forward.** Don't replay from 0 — that's an offset reset, you'll see the same data twice and miss the part that fell off.
When `truncated_bytes_dropped > 0`, the buffer evicted N bytes between your last call and now. Treat it as a signal that the job is producing output faster than you're consuming. Either poll more often or accept the gap and read from `next_offset` going forward.
## merge_stderr — interleaved or separate
```
merge_stderr=False → two streams, request "stdout" or "stderr" by name
merge_stderr=True → one stream ("merged"), order preserved
```
Pick `merge_stderr=True` when:
- The job's logs are designed to be read together (most servers, build tools)
- You don't need to distinguish "this was stderr"
Pick `merge_stderr=False` when:
- stderr is genuinely error-only and stdout is data
- You'll process them differently
## Signal escalation
```
terminal_job_manage(action="signal_int", job_id=...) # graceful (Ctrl-C-equivalent)
terminal_job_manage(action="signal_term", job_id=...) # polite kill (SIGTERM)
terminal_job_manage(action="signal_kill", job_id=...) # forced kill (SIGKILL, uncatchable)
```
The idiom: `signal_int` → wait 2-5s → `signal_term` → wait 2-5s → `signal_kill`. Most well-behaved processes handle SIGINT (graceful) and SIGTERM (cleanup, then exit). SIGKILL bypasses cleanup — use only when the process is truly unresponsive.
After signaling, check exit with `terminal_job_logs(job_id, wait_until_exit=True, wait_timeout_sec=2)`.
## Stdin
```
terminal_job_manage(action="stdin", job_id=..., data="some input\n")
terminal_job_manage(action="close_stdin", job_id=...)
```
For tools that read stdin to EOF, `close_stdin` after writing flushes them. For interactive tools that read line-by-line, just write each line.
## Take-over: when terminal_exec auto-backgrounds
When `terminal_exec` returned `auto_backgrounded: true, job_id: <X>`, the process is **already** in the JobManager with its output flowing into the ring buffer. Your transition is seamless:
```
# Already saw the start of output in terminal_exec's stdout/stderr.
# Pick up reading where the env left off — use the byte count of the
# initial stdout as your since_offset, OR just request tail output:
terminal_job_logs(job_id="job_xxx", tail=True, max_bytes=64000)
```
Or block until exit and grab everything:
```
terminal_job_logs(job_id="job_xxx", since_offset=0, wait_until_exit=True, wait_timeout_sec=120)
```
## Hard rules
- **Jobs die when the server restarts.** The desktop runtime restarts terminal-tools when Hive restarts. There's no re-attach. If you need durability, use `nohup` + `terminal_exec` to detach into the system's process tree and track the PID yourself.
- **Server-wide hard cap on concurrent jobs** (`TERMINAL_TOOLS_MAX_JOBS`, default 32). Past the cap, `terminal_job_start` returns an error. Wait for jobs to exit or kill old ones.
- **No cross-restart output.** Output handles and ring buffers are in-memory only.
See `references/signals.md` for the full signal catalog.
@@ -0,0 +1,41 @@
# Signal reference
terminal_job_manage exposes six signals via the action name.
| Action | Signal | Number | Purpose | Catchable? |
|---|---|---|---|---|
| `signal_int` | SIGINT | 2 | Interrupt — Ctrl-C equivalent. Most CLIs treat as "stop gracefully". | Yes |
| `signal_term` | SIGTERM | 15 | Polite termination request. Default for `kill`. | Yes |
| `signal_kill` | SIGKILL | 9 | Forced kill. Process can't catch, clean up, or finalize. Use sparingly. | **No** |
| `signal_hup` | SIGHUP | 1 | Hangup. Many daemons reload config on this. | Yes |
| `signal_usr1` | SIGUSR1 | 10 | User-defined #1. Common: dump state, rotate logs (nginx, etc). | Yes |
| `signal_usr2` | SIGUSR2 | 12 | User-defined #2. Common: graceful binary upgrade (unicorn, etc). | Yes |
## Escalation idiom
```
1. signal_int (Ctrl-C — graceful)
2. wait 2-5s, check status with terminal_job_logs(wait_until_exit=True, wait_timeout_sec=3)
3. if still running: signal_term (cleanup-then-exit)
4. wait 2-5s
5. if still running: signal_kill (forced)
```
The waits matter: SIGTERM handlers do real work (flush logs, close DBs, release locks) and need time. Skipping straight to SIGKILL leaks resources.
## When to use SIGUSR1 / SIGUSR2
These are application-defined. Read the target's docs first. Common:
- **nginx**: SIGUSR1 → reopen log files (for log rotation)
- **unicorn / puma**: SIGUSR2 → fork a new master with the latest binary (graceful restart)
- **rsync**: SIGUSR1 → print stats so far
## Reading exit codes after a signal
When a job exits via signal, `terminal_job_logs` returns `exit_code: -N` (subprocess convention) where `abs(N)` is the signal number. The shell convention `128 + N` doesn't apply to the JobManager — that's for shell-spawned children.
| exit_code | Means |
|---|---|
| -2 | Killed by SIGINT |
| -9 | Killed by SIGKILL |
| -15 | Killed by SIGTERM |
@@ -0,0 +1,127 @@
---
name: hive.terminal-tools-pty-sessions
description: Use when you need state across calls — building env vars, navigating with cd, driving REPLs (python -i, mysql, psql, node), or responding to interactive prompts (sudo password, ssh host-key confirmation, mysql connection). Teaches the prompt-sentinel exec pattern (default mode), raw I/O for REPLs (raw_send=True then read_only=True), the one-in-flight-per-session rule, and the close-or-leak-against-the-cap discipline. Bash on macOS — never zsh; explicit shell=/bin/zsh is rejected. Read before calling terminal_pty_open.
metadata:
author: hive
type: preset-skill
version: "1.0"
---
# Persistent PTY sessions
PTY sessions are how you talk to interactive programs — programs that detect a terminal (`isatty()`) and behave differently when they don't see one. Use a session when:
- You need state to persist across calls (`cd`, env vars, sourced scripts)
- You're driving a REPL (`python -i`, `mysql`, `psql`, `node`, `irb`)
- A program demands an interactive prompt (`sudo`, `ssh`, `npm login`, `gh auth login`)
For everything else, `terminal_exec` is simpler. Sessions cost more (per-session bash process, ring buffer, idle-reaping bookkeeping) and have a hard cap (`TERMINAL_TOOLS_MAX_PTY`, default 8).
## Why PTY (and not subprocess pipes)
Subprocess pipes break on every interactive program. The moment a program calls `isatty()` and sees False, it disables prompts, color, line-editing, password masking, progress bars — sometimes refuses to start. PTY makes us look like a real terminal so these programs work the same as in your shell.
The cost: PTY output includes terminal escape codes (cursor moves, color codes). The session captures them as-is; if you need clean text, strip ANSI escapes in your processing layer.
## Bash on macOS — by deliberate policy
`terminal_pty_open` always invokes `/bin/bash`, regardless of the user's `$SHELL`. macOS users: yes, even when zsh is your interactive default. This is the **terminal-tools-foundations** policy applied to PTYs.
Reasons:
- zsh has command/builtin classes (`zmodload`, `=cmd` expansion, `zpty`, `ztcp`) that bypass bash-shaped security checks
- One shell behavior across platforms eliminates "works on Linux, breaks on macOS" surprises
- Bash is universal: any shell you've used will accept the bash subset
The bash invocation uses `--norc --noprofile` so user dotfiles don't leak in. PS1 is set to a unique sentinel for prompt detection. PS2 is empty. PROMPT_COMMAND is empty.
## Three modes of `terminal_pty_run`
### 1. Default: send command, wait for prompt sentinel
```
terminal_pty_run(session_id, command="ls -la")
→ { output, prompt_after: True, ... }
```
The session writes `ls -la\n`, waits for the sentinel that its custom PS1 emits, returns the slice between submission and prompt. **One in-flight call per session** — a concurrent call returns a `"session busy"` error.
### 2. raw_send: send raw input, no waiting
```
terminal_pty_run(session_id, command="print('hi')\n", raw_send=True)
→ { bytes_sent: 12 }
```
For REPLs, vim keystrokes, password prompts. The session writes the bytes and returns immediately — it doesn't wait for a prompt (REPLs don't print bash's prompt; they print their own).
After a `raw_send`, you typically follow with:
### 3. read_only: drain currently-buffered output
```
terminal_pty_run(session_id, read_only=True, timeout_sec=2)
→ { output: "hi\n", more: False, ... }
```
Reads whatever the session has accumulated since the last drain, with a brief settle window. Use after raw_send to capture the REPL's response.
## Custom prompt detection (`expect`)
When the command launches a program with its own prompt (Python REPL's `>>> `, mysql's `mysql> `, sudo's password prompt), the bash sentinel won't appear until the program exits. Override:
```
terminal_pty_run(session_id, command="python3", expect=r">>>\s*$", timeout_sec=10)
→ output up to and including ">>>", then control returns
```
For sudo:
```
terminal_pty_run(session_id, command="sudo -k && sudo whoami", expect=r"[Pp]assword:")
terminal_pty_run(session_id, command="<password>", raw_send=True, command="<password>\n")
terminal_pty_run(session_id, read_only=True, timeout_sec=5)
```
(Treat passwords carefully — they end up in the ring buffer.)
## Always close
```
terminal_pty_close(session_id)
```
Leaked sessions count against `TERMINAL_TOOLS_MAX_PTY` (default 8). Idle reaping happens lazily on every `_open` call (sessions inactive longer than `idle_timeout_sec`, default 1800s, are dropped) — but don't rely on it. Close when you're done.
For unresponsive sessions, `force=True` skips the graceful "exit" attempt and goes straight to SIGTERM/SIGKILL.
## Common patterns
### Stateful navigation
```
sid = terminal_pty_open(cwd="/")
terminal_pty_run(sid, command="cd /var/log")
terminal_pty_run(sid, command="ls -la *.log | head")
terminal_pty_close(sid)
```
### Python REPL
```
sid = terminal_pty_open()
terminal_pty_run(sid, command="python3", expect=r">>>\s*$")
terminal_pty_run(sid, command="x = 42", raw_send=True)
terminal_pty_run(sid, command="print(x*x)\n", raw_send=True)
result = terminal_pty_run(sid, read_only=True) # → "1764\n>>> "
terminal_pty_run(sid, command="exit()", raw_send=True)
terminal_pty_close(sid)
```
### ssh with host-key prompt
```
sid = terminal_pty_open()
terminal_pty_run(sid, command="ssh user@new-host", expect=r"\(yes/no.*\)\?")
terminal_pty_run(sid, command="yes\n", raw_send=True)
terminal_pty_run(sid, read_only=True, timeout_sec=10) # password prompt or login
```
@@ -0,0 +1,92 @@
---
name: hive.terminal-tools-troubleshooting
description: Read when a terminal-tools call returned something surprising — empty stdout despite no error, exit_code is null, output_handle came back expired, "too many jobs" / "session busy" / "too many PTYs", warning was set unexpectedly, semantic_status disagrees with exit_code. Diagnostic recipes only — load on demand. Don't preload; the foundational skill covers the happy path.
metadata:
author: hive
type: preset-skill
version: "1.0"
---
# Troubleshooting terminal-tools
Recipes for surprising results. Match the symptom to the section.
## Empty `stdout` despite the command "should have" produced output
Possible causes:
1. Output went to **stderr** instead. Check `stderr` in the envelope (or use `merge_stderr=True` for jobs).
2. Output was **fully truncated** because `max_output_kb` is too small. Check `stdout_truncated_bytes > 0`. Bump `max_output_kb` or paginate via `output_handle`.
3. Command produced no output (correct, just unexpected — `silent` flags, no matches).
4. Pipeline issue: the last stage of a pipe ran but stdout went elsewhere (`> /dev/null`, redirected via `2>&1`).
5. Process is buffering its output and didn't flush before exit. Add `stdbuf -oL` (line-buffered) or `unbuffer` to the command.
## `exit_code: null`
| Cause | Other field |
|---|---|
| Auto-backgrounded | `auto_backgrounded: true, job_id: <X>` |
| Hard timeout, process killed | `timed_out: true` |
| Pre-spawn failure (command not found) | `error: ...` set, `pid: null` |
| Still running (in `terminal_job_logs`) | `status: "running"` |
## `output_handle` returned `expired: true`
5-minute TTL. Either (a) you waited too long, or (b) the store evicted it under memory pressure (64 MB total cap, LRU eviction). Re-run the command.
To reduce risk: paginate the handle as soon as you receive it, or use `terminal_job_*` for huge outputs (4 MB ring buffer with offsets — no expiry).
## "too many jobs" / `JobLimitExceeded`
`TERMINAL_TOOLS_MAX_JOBS` (default 32) hit. Either:
- Wait for jobs to exit (poll with `terminal_job_logs(wait_until_exit=True)`)
- Kill old jobs: `terminal_job_manage(action="list")` to see what's running, then `signal_term` the abandoned ones
- Raise the cap via env (rare)
## "session busy"
A `terminal_pty_run` was issued while another `_run` is in flight on the same session. PTY sessions are single-threaded conversations. Wait for the prior call to return, or open a second session.
## "PTY cap reached"
`TERMINAL_TOOLS_MAX_PTY` (default 8) hit. Close idle sessions (`terminal_pty_close`). Idle reaping is lazy; force it by opening — no, actually, opening throws when the cap is hit. Just close manually.
## `warning` is set, the command worked
Informational only. The pattern matched (e.g. `rm -rf` literally appears, or `git push --force` was used). The command ran. The warning is your "did I mean to do that?" prompt — verify the side effect was intended before continuing.
## `semantic_status: "ok"` but `exit_code: 1`
Working as designed. Some commands use exit 1 for legitimate non-error states:
- `grep` / `rg` exit 1 when **no matches** found
- `find` exit 1 when **some directories were unreadable** (typical on `/proc`, etc.)
- `diff` exit 1 when **files differ**
- `test` / `[` exit 1 when **condition is false**
The `semantic_message` field explains. Trust `semantic_status`, not raw `exit_code`.
## `semantic_status: "error"` but `exit_code: 0`
Shouldn't happen. If it does, file a bug.
## `truncated_bytes_dropped > 0` in `terminal_job_logs`
Your `since_offset` was older than the ring buffer's floor — bytes evicted before you could read them. Either:
- Poll faster (lower latency between calls)
- Use `merge_stderr=True` (single 4 MB ring instead of 4 MB × 2)
- Accept the gap and move forward from `next_offset`
## `terminal_pty_open` succeeds but the first `_run` times out
The session may not have produced its first prompt sentinel within the 2-second startup window. Try:
- A `terminal_pty_run(sid, read_only=True, timeout_sec=2)` to drain whatever's accumulated
- A noop command (`terminal_pty_run(sid, command="true")`) to force a prompt cycle
Could also indicate the bash process died at startup — `terminal_pty_run(sid, ...)` would then return `"session has exited"`.
## `shell="/bin/zsh"` returned an error
By design. terminal-tools is bash-only on POSIX. Use `shell=True` (default `/bin/bash`) or omit `shell=` to exec directly.
## A command in `shell=True` is interpreted differently than expected
Bash, not zsh, semantics. `**/*` doesn't recurse without `shopt -s globstar`; `=cmd` expansion doesn't work; arrays use `arr[idx]` not `${arr[idx]}` differently than zsh. When in doubt, the foundational skill's "bash, not zsh" section is the canonical statement.
+1
View File
@@ -33,6 +33,7 @@ _BUNDLED_DIRS: tuple[Path, ...] = (
# (tool-name prefix, skill directory name, display name)
_TOOL_GATED_SKILLS: list[tuple[str, str, str]] = [
("browser_", "browser-automation", "hive.browser-automation"),
("terminal_", "terminal-tools-foundations", "hive.terminal-tools-foundations"),
]
_BODY_CACHE: dict[str, str] = {}
+43
View File
@@ -0,0 +1,43 @@
"""terminal-tools — Terminal capabilities MCP server.
Exposes ten tools (prefix ``terminal_*``) covering:
- Foreground exec with auto-promotion to background (``terminal_exec``)
- Background job lifecycle (``terminal_job_*``)
- Persistent PTY-backed bash sessions (``terminal_pty_*``)
- Filesystem search (``terminal_rg``, ``terminal_find``)
- Truncation handle retrieval (``terminal_output_get``)
Bash-only on POSIX. zsh is rejected at the shell-resolver level. See
``common/limits.py:_resolve_shell`` for the single enforcement point.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from fastmcp import FastMCP
def register_terminal_tools(mcp: FastMCP) -> list[str]:
"""Register all ten terminal-tools with the FastMCP server.
Returns the list of registered tool names so the caller can log /
smoke-test how many landed.
"""
from terminal_tools.exec import register_exec_tools
from terminal_tools.jobs.tools import register_job_tools
from terminal_tools.output import register_output_tools
from terminal_tools.pty.tools import register_pty_tools
from terminal_tools.search.tools import register_search_tools
register_exec_tools(mcp)
register_job_tools(mcp)
register_pty_tools(mcp)
register_search_tools(mcp)
register_output_tools(mcp)
return [name for name in mcp._tool_manager._tools.keys() if name.startswith("terminal_")]
__all__ = ["register_terminal_tools"]
@@ -0,0 +1,72 @@
"""Detect potentially destructive commands and surface a warning string.
Informational only the warning is included in the exec envelope, not
used to block execution. Lets the agent re-read its command before
trusting the result of an irreversible action. Catalog ported from
claudecode's BashTool/destructiveCommandWarning.ts.
"""
from __future__ import annotations
import re
from collections.abc import Sequence
_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
# Git — data loss / hard to reverse
(re.compile(r"\bgit\s+reset\s+--hard\b"), "may discard uncommitted changes"),
(
re.compile(r"\bgit\s+push\b[^;&|\n]*[ \t](--force|--force-with-lease|-f)\b"),
"may overwrite remote history",
),
(
re.compile(r"\bgit\s+clean\b(?![^;&|\n]*(?:-[a-zA-Z]*n|--dry-run))[^;&|\n]*-[a-zA-Z]*f"),
"may permanently delete untracked files",
),
(re.compile(r"\bgit\s+checkout\s+(--\s+)?\.[ \t]*($|[;&|\n])"), "may discard all working tree changes"),
(re.compile(r"\bgit\s+restore\s+(--\s+)?\.[ \t]*($|[;&|\n])"), "may discard all working tree changes"),
(re.compile(r"\bgit\s+stash[ \t]+(drop|clear)\b"), "may permanently remove stashed changes"),
(
re.compile(r"\bgit\s+branch\s+(-D[ \t]|--delete\s+--force|--force\s+--delete)\b"),
"may force-delete a branch",
),
# Git — safety bypass
(re.compile(r"\bgit\s+(commit|push|merge)\b[^;&|\n]*--no-verify\b"), "may skip safety hooks"),
(re.compile(r"\bgit\s+commit\b[^;&|\n]*--amend\b"), "may rewrite the last commit"),
# File deletion — most specific patterns first so the warning is descriptive
(
re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*[rR][a-zA-Z]*f|(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*f[a-zA-Z]*[rR]"),
"may recursively force-remove files",
),
(re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*[rR]"), "may recursively remove files"),
(re.compile(r"(^|[;&|\n]\s*)rm\s+-[a-zA-Z]*f"), "may force-remove files"),
# Database
(
re.compile(r"\b(DROP|TRUNCATE)\s+(TABLE|DATABASE|SCHEMA)\b", re.IGNORECASE),
"may drop or truncate database objects",
),
(re.compile(r"\bDELETE\s+FROM\s+\w+[ \t]*(;|\"|'|\n|$)", re.IGNORECASE), "may delete rows from a database table"),
# Infrastructure
(re.compile(r"\bkubectl\s+delete\b"), "may delete Kubernetes resources"),
(re.compile(r"\bterraform\s+destroy\b"), "may destroy Terraform infrastructure"),
)
def get_warning(command: str | Sequence[str]) -> str | None:
"""Return a warning string if the command matches a destructive pattern.
For argv-style invocations (``command=["rm", "-rf", "/tmp/x"]``), we
join with spaces so the same regex catalog applies. Returns None
when nothing matches.
"""
if isinstance(command, (list, tuple)):
text = " ".join(str(c) for c in command)
else:
text = command
for pattern, message in _PATTERNS:
if pattern.search(text):
return message
return None
__all__ = ["get_warning"]
+153
View File
@@ -0,0 +1,153 @@
"""Shell resolution + resource limits.
The single place that decides which shell binary we invoke and how to
strip zsh-specific environment leakage. Per the terminal-tools security
stance (see ``destructive_warning.py`` neighbours), zsh constructs
(``zmodload``, ``=cmd``, ``zpty``, ``ztcp``) bypass bash-shaped
checks refusing zsh isn't aesthetic, it's a deliberate hardening
choice.
"""
from __future__ import annotations
import os
import resource
from collections.abc import Callable
from typing import Any
# Env vars that influence zsh startup. Strip these before exec so a
# user with zsh dotfiles can't accidentally jam zsh behaviour into
# the bash subprocess.
_ZSH_ENV_PREFIXES: tuple[str, ...] = ("ZDOTDIR", "ZSH_")
class ZshRefused(ValueError):
"""Raised when an explicit zsh shell is requested."""
def _resolve_shell(shell: bool | str) -> str | None:
"""Return the shell executable to use, or None for direct exec.
- ``shell=False`` None (caller should exec command directly)
- ``shell=True`` ``/bin/bash`` always (ignores ``$SHELL``)
- ``shell="/bin/bash"`` or any path containing ``bash`` that path
- ``shell="/bin/zsh"`` or any zsh-containing path raises ZshRefused
Caller is expected to invoke as ``[shell_path, "-c", command]``.
"""
if shell is False or shell is None:
return None
if shell is True:
return "/bin/bash"
if not isinstance(shell, str):
raise TypeError(f"shell must be bool or str, got {type(shell).__name__}")
lower = shell.lower()
if "zsh" in lower:
raise ZshRefused(
f"shell={shell!r} rejected: terminal-tools is bash-only on POSIX. "
"Use shell=True (bash) or omit the shell parameter to exec directly. "
"This is a deliberate security stance — zsh has command/builtin "
"classes (zmodload, =cmd, zpty, ztcp) that bypass bash-shaped checks."
)
return shell
def sanitized_env(extra: dict[str, str] | None = None) -> dict[str, str]:
"""Return os.environ with zsh-related vars stripped, plus optional overrides.
Stripping ``ZDOTDIR`` and ``ZSH_*`` ensures zsh dotfiles don't leak
into the bash subprocess's startup. Bash dotfiles still apply when
the shell is invoked interactively.
"""
env = {k: v for k, v in os.environ.items() if not k.startswith(_ZSH_ENV_PREFIXES)}
if extra:
env.update(extra)
return env
# ── Resource limits ───────────────────────────────────────────────────
# Maps the public limit name to its (resource constant, multiplier)
# tuple. Multipliers convert the agent-friendly unit (seconds, MB) to
# the kernel unit (seconds, bytes).
_LIMIT_MAP: dict[str, tuple[int, int]] = {
"cpu_sec": (resource.RLIMIT_CPU, 1),
"rss_mb": (resource.RLIMIT_AS, 1024 * 1024),
"fsize_mb": (resource.RLIMIT_FSIZE, 1024 * 1024),
"nofile": (resource.RLIMIT_NOFILE, 1),
}
def make_preexec_fn(limits: dict[str, int] | None) -> Callable[[], None] | None:
"""Build a preexec_fn that applies setrlimit before exec.
Returns None if no limits are configured (so subprocess.Popen can
skip the fork hook entirely). Unknown keys are ignored agents
pass arbitrary dicts and we don't want a typo to crash exec.
"""
if not limits:
return None
def _apply() -> None:
for key, value in limits.items():
spec = _LIMIT_MAP.get(key)
if spec is None or value is None:
continue
rlimit_const, multiplier = spec
limit = int(value) * multiplier
try:
resource.setrlimit(rlimit_const, (limit, limit))
except (OSError, ValueError):
# Hard limit may exceed the current ceiling. Best-effort:
# set just the soft limit to whatever we can.
try:
soft, hard = resource.getrlimit(rlimit_const)
resource.setrlimit(rlimit_const, (min(limit, hard), hard))
except Exception:
pass
return _apply
def coerce_limits(limits: Any) -> dict[str, int] | None:
"""Validate and normalize a user-supplied limits dict.
Accepts the four supported keys (``cpu_sec``, ``rss_mb``,
``fsize_mb``, ``nofile``); silently drops unknown keys; returns
None when the result is empty. Negative or non-int values are
dropped too invalid limits are better as no-ops than as errors,
since the agent didn't ask for enforcement of a *specific*
failure mode.
"""
if not limits:
return None
if not isinstance(limits, dict):
return None
out: dict[str, int] = {}
for key in _LIMIT_MAP:
value = limits.get(key)
if value is None:
continue
try:
ivalue = int(value)
except (TypeError, ValueError):
continue
if ivalue <= 0:
continue
out[key] = ivalue
return out or None
__all__ = [
"ZshRefused",
"_resolve_shell",
"coerce_limits",
"make_preexec_fn",
"sanitized_env",
]
@@ -0,0 +1,121 @@
"""TTL-bounded output handle store.
When an exec produces more output than the inline cap (default 256 KB),
the surplus is kept here under a short-lived handle. The agent passes
the handle to ``terminal_output_get`` to paginate the rest. Handles
expire after 5 minutes; total store size is capped at 64 MB with LRU
eviction so the server can't be DoS'd by a chatty subprocess.
Thread-safe exec/job code paths populate; the MCP request thread
drains.
"""
from __future__ import annotations
import secrets
import threading
import time
from dataclasses import dataclass, field
_DEFAULT_TTL_SEC = 300
_DEFAULT_TOTAL_CAP_BYTES = 64 * 1024 * 1024
@dataclass(slots=True)
class _Entry:
data: bytes
created_at: float
last_accessed_at: float = field(default_factory=time.monotonic)
class OutputStore:
"""LRU-with-TTL byte store keyed by opaque handle."""
def __init__(
self,
ttl_sec: int = _DEFAULT_TTL_SEC,
total_cap_bytes: int = _DEFAULT_TOTAL_CAP_BYTES,
):
self._ttl = ttl_sec
self._cap = total_cap_bytes
self._entries: dict[str, _Entry] = {}
self._total_bytes = 0
self._lock = threading.Lock()
def put(self, data: bytes) -> str:
"""Store ``data``, return a fresh handle. Evicts older entries
if the total cap would be exceeded."""
if not data:
# Empty payloads don't need a handle.
return ""
handle = "out_" + secrets.token_hex(8)
now = time.monotonic()
with self._lock:
self._evict_locked(now)
# Reserve room for new entry; evict LRU until it fits.
while self._total_bytes + len(data) > self._cap and self._entries:
self._pop_lru_locked()
self._entries[handle] = _Entry(data=data, created_at=now, last_accessed_at=now)
self._total_bytes += len(data)
return handle
def get(self, handle: str, since_offset: int = 0, max_bytes: int = 64 * 1024) -> dict:
"""Retrieve a slice of stored data.
Returns ``{data, offset, next_offset, eof, expired}`` so the
agent can paginate without separate calls. ``expired=True``
when the handle is unknown or the TTL has lapsed.
"""
now = time.monotonic()
with self._lock:
self._evict_locked(now)
entry = self._entries.get(handle)
if entry is None:
return {
"data": "",
"offset": int(since_offset),
"next_offset": int(since_offset),
"eof": True,
"expired": True,
}
entry.last_accessed_at = now
buf = entry.data
since = max(0, int(since_offset))
end = min(len(buf), since + max(0, int(max_bytes)))
data_slice = buf[since:end]
return {
"data": data_slice.decode("utf-8", errors="replace"),
"offset": since,
"next_offset": end,
"eof": end >= len(buf),
"expired": False,
}
# ── Eviction ──────────────────────────────────────────────────
def _evict_locked(self, now: float) -> None:
# TTL eviction — anything past TTL goes.
stale = [h for h, e in self._entries.items() if now - e.created_at > self._ttl]
for h in stale:
entry = self._entries.pop(h, None)
if entry is not None:
self._total_bytes -= len(entry.data)
def _pop_lru_locked(self) -> None:
if not self._entries:
return
oldest_handle = min(self._entries, key=lambda h: self._entries[h].last_accessed_at)
entry = self._entries.pop(oldest_handle)
self._total_bytes -= len(entry.data)
# Module-level singleton; the server has one instance per process.
_STORE = OutputStore()
def get_store() -> OutputStore:
return _STORE
__all__ = ["OutputStore", "get_store"]
@@ -0,0 +1,155 @@
"""Bounded byte ring buffer with absolute monotonic offsets.
The streaming primitive shared by jobs and PTY sessions. Writers push
bytes; readers ask for ``[since_offset, since_offset + N)`` and the
buffer either returns the data (if still in window) or signals how
many bytes were dropped from the floor. This lets the agent resume
after a missed poll without silent loss.
Thread-safe via a single lock readers and writers can come from
different threads (a pump thread fills it, the MCP request thread
drains it).
"""
from __future__ import annotations
import threading
from collections import deque
from dataclasses import dataclass
@dataclass(slots=True)
class ReadResult:
data: bytes
offset: int
next_offset: int
truncated_bytes_dropped: int # bytes lost between since_offset and the buffer floor
class RingBuffer:
"""Capacity-bounded byte ring with absolute offsets.
The total written count never resets; each call sees absolute
offsets growing monotonically. The on-disk window slides forward
once total_written exceeds capacity_bytes.
"""
def __init__(self, capacity_bytes: int = 4 * 1024 * 1024):
if capacity_bytes <= 0:
raise ValueError("capacity_bytes must be positive")
self._capacity = capacity_bytes
self._chunks: deque[bytes] = deque()
self._buffered_bytes = 0
self._floor = 0 # absolute offset of the oldest byte still in buffer
self._total_written = 0
self._eof = False
self._lock = threading.Lock()
# ── Writer side ───────────────────────────────────────────────
def write(self, data: bytes) -> None:
if not data:
return
with self._lock:
self._chunks.append(data)
self._buffered_bytes += len(data)
self._total_written += len(data)
self._evict_locked()
def close(self) -> None:
"""Mark the stream as ended. Subsequent reads will see eof=True
once they catch up to total_written."""
with self._lock:
self._eof = True
def _evict_locked(self) -> None:
while self._buffered_bytes > self._capacity and self._chunks:
head = self._chunks[0]
overshoot = self._buffered_bytes - self._capacity
if len(head) <= overshoot:
self._chunks.popleft()
self._buffered_bytes -= len(head)
self._floor += len(head)
else:
self._chunks[0] = head[overshoot:]
self._buffered_bytes -= overshoot
self._floor += overshoot
# ── Reader side ───────────────────────────────────────────────
@property
def total_written(self) -> int:
with self._lock:
return self._total_written
@property
def floor(self) -> int:
with self._lock:
return self._floor
@property
def eof(self) -> bool:
with self._lock:
return self._eof
def read(self, since_offset: int, max_bytes: int) -> ReadResult:
"""Read up to ``max_bytes`` starting at ``since_offset``.
- If ``since_offset`` is past total_written, returns empty data
(and ``next_offset == since_offset``, signaling caller to wait).
- If ``since_offset`` is below the buffer floor, the missed
bytes are reported as ``truncated_bytes_dropped`` and reading
starts from the floor.
"""
max_bytes = max(0, int(max_bytes))
with self._lock:
since = max(0, int(since_offset))
dropped = 0
if since < self._floor:
dropped = self._floor - since
since = self._floor
available = self._total_written - since
if available <= 0 or max_bytes == 0:
return ReadResult(
data=b"",
offset=since,
next_offset=since,
truncated_bytes_dropped=dropped,
)
to_take = min(available, max_bytes)
# Walk chunks to assemble [since, since+to_take)
cursor = self._floor
collected: list[bytes] = []
remaining = to_take
for chunk in self._chunks:
chunk_end = cursor + len(chunk)
if chunk_end <= since:
cursor = chunk_end
continue
start_in_chunk = max(0, since - cursor)
end_in_chunk = min(len(chunk), start_in_chunk + remaining)
slice_ = chunk[start_in_chunk:end_in_chunk]
collected.append(slice_)
remaining -= len(slice_)
cursor = chunk_end
if remaining <= 0:
break
data = b"".join(collected)
return ReadResult(
data=data,
offset=since,
next_offset=since + len(data),
truncated_bytes_dropped=dropped,
)
def tail(self, max_bytes: int) -> ReadResult:
"""Read the last ``max_bytes`` (or as much as is buffered)."""
with self._lock:
start = max(self._floor, self._total_written - max(0, int(max_bytes)))
return self.read(start, max_bytes)
__all__ = ["RingBuffer", "ReadResult"]
@@ -0,0 +1,103 @@
"""Per-command exit-code semantics.
Many commands use exit codes to convey information other than just
success/failure. ``grep`` returns 1 when no matches are found, which
is not an error. Encoding this lookup means the agent reads
``semantic_status`` instead of having to memorize per-command quirks.
Catalog ported from claudecode's BashTool/commandSemantics.ts. We
inspect only the *final* command in a piped chain (its exit code is
what the shell propagates), and only when the command is run with
``shell=False`` (i.e. we know the argv). For ``shell=True`` we fall
back to default semantics the heuristic of parsing a bash command
string for "the last command" is fragile and the upstream tool
already documents the issue.
"""
from __future__ import annotations
from collections.abc import Sequence
SemanticStatus = str # "ok" | "signal" | "error"
# Maps base command name → (exit_code → semantic). Returning
# (status, message) — message may be None for the success cases.
_SEMANTICS: dict[str, dict[int, tuple[SemanticStatus, str | None]]] = {
# grep: 0=matches, 1=no matches (not an error), 2+=error
"grep": {0: ("ok", None), 1: ("ok", "No matches found")},
"rg": {0: ("ok", None), 1: ("ok", "No matches found")},
"ripgrep": {0: ("ok", None), 1: ("ok", "No matches found")},
# find: 0=success, 1=partial (some dirs unreadable), 2+=error
"find": {0: ("ok", None), 1: ("ok", "Some directories were inaccessible")},
# diff: 0=identical, 1=differ (informational), 2+=error
"diff": {0: ("ok", None), 1: ("ok", "Files differ")},
# test / [: 0=true, 1=false, 2+=error
"test": {0: ("ok", None), 1: ("ok", "Condition is false")},
"[": {0: ("ok", None), 1: ("ok", "Condition is false")},
}
def _base_command(command: str | Sequence[str]) -> str:
"""Extract the base command (first word) from argv or a string.
For shell=True strings, picks the *last* command in a pipeline since
that determines the propagated exit code. Heuristic and intentionally
not security-critical only used to label the exit-code semantics.
"""
if isinstance(command, (list, tuple)):
return command[0] if command else ""
if not isinstance(command, str):
return ""
# Take the segment after the last unquoted pipe/&&/||/; — best-effort.
text = command
for sep in ("||", "&&", "|", ";"):
# Crude split — fine for the heuristic.
if sep in text:
text = text.split(sep)[-1]
text = text.strip()
if not text:
return ""
first = text.split()[0]
# Strip a leading path: /usr/bin/grep → grep
return first.rsplit("/", 1)[-1]
def classify(
command: str | Sequence[str],
exit_code: int | None,
*,
timed_out: bool = False,
signaled: bool = False,
) -> tuple[SemanticStatus, str | None]:
"""Classify an exit code with command-specific semantics.
Returns (status, message) where status is one of "ok"/"signal"/"error"
and message is a short explanation when the status would otherwise
surprise the agent (e.g. ``grep`` exiting 1).
"""
if timed_out:
return ("error", "Command timed out")
if signaled:
return ("signal", f"Killed by signal (exit {exit_code})")
if exit_code is None:
return ("ok", "Still running") # auto-backgrounded case
base = _base_command(command)
table = _SEMANTICS.get(base)
if table is not None:
if exit_code in table:
return table[exit_code]
# Beyond the catalog's known codes for this command, treat as error.
return ("error", f"Command failed with exit code {exit_code}")
# Default: zero is success, nonzero is error.
if exit_code == 0:
return ("ok", None)
return ("error", f"Command failed with exit code {exit_code}")
__all__ = ["classify"]
@@ -0,0 +1,107 @@
"""Helpers to build the standard exec/job envelope with truncation.
The envelope shape is documented in the foundational skill keep
this module's output stable so skill updates don't have to chase
field renames. Callers pass raw bytes; we decode and trim.
"""
from __future__ import annotations
from collections.abc import Sequence
from terminal_tools.common.destructive_warning import get_warning
from terminal_tools.common.output_store import get_store
from terminal_tools.common.semantic_exit import classify
def _truncate_bytes(buf: bytes, max_bytes: int) -> tuple[str, int, str]:
"""Trim ``buf`` to ``max_bytes`` (decoded). Returns
``(decoded_text, dropped_bytes, full_for_handle)``. We always store
the *original* bytes in the handle so the agent gets exactly what
the process emitted, even when truncation point split a multi-byte
char.
"""
if max_bytes < 0:
max_bytes = 0
if len(buf) <= max_bytes:
return buf.decode("utf-8", errors="replace"), 0, buf.decode("utf-8", errors="replace")
head = buf[:max_bytes]
return (
head.decode("utf-8", errors="replace"),
len(buf) - max_bytes,
buf.decode("utf-8", errors="replace"),
)
def build_exec_envelope(
*,
command: str | Sequence[str],
exit_code: int | None,
stdout_bytes: bytes,
stderr_bytes: bytes,
runtime_ms: int,
pid: int | None,
timed_out: bool,
signaled: bool = False,
max_output_kb: int = 256,
auto_backgrounded: bool = False,
job_id: str | None = None,
auto_shell: bool = False,
) -> dict:
"""Construct the standard exec envelope.
See ``terminal-tools-foundations`` SKILL for the field semantics. The
inline ``stdout``/``stderr`` are decoded and trimmed; if either
overflows ``max_output_kb`` the *full* bytes are stashed in the
output store under ``output_handle`` for retrieval via
``terminal_output_get``. Both streams share the same handle (with
``out_<hex>:stdout`` / ``out_<hex>:stderr`` suffixes) when both
overflow the agent uses the suffix to pick a stream.
"""
max_bytes = max(1024, max_output_kb * 1024)
stdout_text, stdout_dropped, stdout_full = _truncate_bytes(stdout_bytes, max_bytes)
stderr_text, stderr_dropped, stderr_full = _truncate_bytes(stderr_bytes, max_bytes)
output_handle: str | None = None
if stdout_dropped > 0 or stderr_dropped > 0:
store = get_store()
# Stash whichever overflowed (or both, joined with a separator
# the foundational skill documents). For simplicity we always
# store both when either overflows so the agent can fetch the
# other stream in full too if it wants.
combined = (
b"--- stdout ---\n"
+ stdout_bytes
+ b"\n--- stderr ---\n"
+ stderr_bytes
)
output_handle = store.put(combined)
semantic_status, semantic_message = classify(
command, exit_code, timed_out=timed_out, signaled=signaled
)
warning = get_warning(command)
return {
"exit_code": exit_code,
"stdout": stdout_text,
"stderr": stderr_text,
"stdout_truncated_bytes": stdout_dropped,
"stderr_truncated_bytes": stderr_dropped,
"runtime_ms": int(runtime_ms),
"pid": int(pid) if pid is not None else None,
"output_handle": output_handle,
"timed_out": bool(timed_out),
"semantic_status": semantic_status,
"semantic_message": semantic_message,
"warning": warning,
"auto_backgrounded": bool(auto_backgrounded),
"job_id": job_id,
"auto_shell": bool(auto_shell),
}
__all__ = ["build_exec_envelope"]
+307
View File
@@ -0,0 +1,307 @@
"""``terminal_exec`` — foreground exec with auto-promotion to background.
The flagship tool. Most agent terminal interactions go through here:
fast commands (<30s) return inline with the standard envelope; longer
commands silently transition into the JobManager and surface a
``job_id`` so the agent can poll. The "should I background this?"
decision is removed the answer is always yes-if-needed.
Implementation notes:
- We spawn the process the same way JobManager does, then wait with
``proc.wait(timeout=auto_background_after_sec)``. Inline path
drains pipes via ``proc.communicate()`` to avoid pipe-fill
deadlocks.
- Auto-promotion: when the timeout fires while the process is still
running, we already have its stdin/stdout/stderr file objects.
We hand them to JobManager which spawns pump threads to fill ring
buffers from that point on. The agent sees an envelope with
``auto_backgrounded=True, exit_code=None, job_id=<>`` and
transitions to ``terminal_job_logs``. **There's no early-output loss**
because the pumps start before we return from the tool call.
- For pure-foreground use (``auto_background_after_sec=0``), we
fall back to ``proc.communicate(timeout=timeout_sec)`` which has
the simpler "kill on overall timeout" semantics.
"""
from __future__ import annotations
import shlex
import subprocess
import threading
import time
from typing import TYPE_CHECKING
from terminal_tools.common.limits import (
ZshRefused,
_resolve_shell,
coerce_limits,
make_preexec_fn,
sanitized_env,
)
from terminal_tools.common.ring_buffer import RingBuffer
from terminal_tools.common.truncation import build_exec_envelope
from terminal_tools.jobs.manager import JobLimitExceeded, get_manager
if TYPE_CHECKING:
from fastmcp import FastMCP
# Tokens that indicate the user passed a shell-syntax command (pipes,
# redirects, conditional chains) rather than an argv list. When any of
# these appear as standalone tokens in shlex.split(command), we silently
# route the command through /bin/bash -c instead of trying to exec it
# directly — the alternative is spawning the first program with the rest
# of the line as junk argv, which either errors or returns fake success
# (e.g. `echo "..." && ps ...` → echo prints the literal command).
_SHELL_METACHARS: frozenset[str] = frozenset(
{"|", "&&", "||", ";", ">", "<", ">>", "<<", "&", "2>", "2>&1", "|&"}
)
def register_exec_tools(mcp: FastMCP) -> None:
@mcp.tool()
def terminal_exec(
command: str,
cwd: str | None = None,
env: dict[str, str] | None = None,
timeout_sec: int = 60,
auto_background_after_sec: int = 30,
shell: bool = False,
stdin: str | None = None,
limits: dict[str, int] | None = None,
max_output_kb: int = 256,
) -> dict:
"""Run a shell command and capture its output.
Past auto_background_after_sec, the call auto-promotes to a background
job and returns immediately with `auto_backgrounded=True, job_id=...`
poll with terminal_job_logs(job_id, since_offset=...) to read the rest.
Set auto_background_after_sec=0 to force pure foreground (kill on
timeout_sec).
Bash-only on POSIX. Passing shell="/bin/zsh" raises an error this is
a deliberate security stance.
Args:
command: The command. With shell=False we naively split on
whitespace; for pipes / quoting / globs use shell=True.
cwd: Working directory.
env: Environment override (merged into a sanitized base zsh
dotfile vars are stripped).
timeout_sec: Hard kill deadline. Past this, the process is
terminated and `timed_out=True` is returned. Should be
auto_background_after_sec for the auto-promote path to work.
auto_background_after_sec: Inline budget. Past this, promote to
a background job and return. 0 disables auto-promotion.
shell: True for `/bin/bash -c <command>`. zsh refused.
stdin: Optional stdin payload (string).
limits: Optional setrlimit caps. Keys: cpu_sec, rss_mb,
fsize_mb, nofile.
max_output_kb: Inline output cap. Overflow stashes to an
output_handle for retrieval via terminal_output_get.
Returns the standard envelope: see `terminal-tools-foundations` skill.
"""
# Auto-detect shell-syntax commands. If the agent passes
# ``shell=False`` (the default) but the command contains a pipe,
# redirect, ``&&``, etc., naive argv splitting silently mangles
# it — exec the first token with the rest as junk arguments.
# Detect that case and transparently route through bash -c, then
# surface an ``auto_shell=True`` flag in the envelope so the
# foundational skill / agent feedback loop can learn from it.
auto_shell = False
try:
if shell:
# User opted in; trust them.
pass
else:
try:
tokens = shlex.split(command, posix=True)
except ValueError:
# Unbalanced quotes — almost certainly meant for the shell.
auto_shell = True
tokens = []
if not auto_shell:
if not tokens:
return _err_envelope(command, "command was empty")
if any(t in _SHELL_METACHARS for t in tokens) or any(
# globs that shlex left unexpanded (`*`, `?`, `[`)
any(c in t for c in "*?[") and t != "[" for t in tokens
):
auto_shell = True
full_env = sanitized_env(env) if env is not None else None
preexec = make_preexec_fn(coerce_limits(limits))
except ZshRefused as e:
return _err_envelope(command, str(e))
effective_shell: bool | str = True if auto_shell else shell
# Resolve shell here so the same logic the JobManager uses applies
# in both the inline + promoted paths.
try:
resolved_shell = _resolve_shell(effective_shell)
except ZshRefused as e:
return _err_envelope(command, str(e))
if resolved_shell is not None:
spawn_argv: list[str] = [resolved_shell, "-c", command]
else:
# shell=False AND no metacharacters → safe to direct-exec.
spawn_argv = tokens
start = time.monotonic()
try:
proc = subprocess.Popen(
spawn_argv,
cwd=cwd,
env=full_env,
stdin=subprocess.PIPE if stdin is not None else None,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
preexec_fn=preexec,
close_fds=True,
bufsize=0,
)
except FileNotFoundError as e:
return _err_envelope(command, f"command not found: {e}")
except OSError as e:
return _err_envelope(command, f"spawn failed: {e}")
# Push stdin without blocking on the process draining it. For
# large stdin payloads this would deadlock; for typical agent
# use (small payloads or None) it's fine.
if stdin is not None and proc.stdin is not None:
try:
proc.stdin.write(stdin.encode("utf-8"))
proc.stdin.close()
except (BrokenPipeError, OSError):
pass
# Pump stdout/stderr into ring buffers so we don't deadlock on
# full pipes during the wait. These same buffers become the
# job's buffers if we auto-promote.
stdout_buf = RingBuffer()
stderr_buf = RingBuffer()
pumps: list[threading.Thread] = []
def _pump(stream, ring: RingBuffer) -> None:
try:
while True:
chunk = stream.read(4096)
if not chunk:
break
ring.write(chunk)
except (OSError, ValueError):
pass
finally:
try:
stream.close()
except Exception:
pass
ring.close()
if proc.stdout is not None:
t = threading.Thread(target=_pump, args=(proc.stdout, stdout_buf), daemon=True)
t.start()
pumps.append(t)
if proc.stderr is not None:
t = threading.Thread(target=_pump, args=(proc.stderr, stderr_buf), daemon=True)
t.start()
pumps.append(t)
# Wait for either: auto-bg budget, hard timeout, or natural exit.
promoted = False
timed_out = False
budget = auto_background_after_sec if auto_background_after_sec > 0 else timeout_sec
budget = min(budget, timeout_sec) if timeout_sec > 0 else budget
try:
proc.wait(timeout=budget if budget > 0 else None)
except subprocess.TimeoutExpired:
if auto_background_after_sec > 0:
# Promote: the process keeps running, we hand its
# already-pumping buffers to the JobManager.
try:
record = get_manager().adopt_running(
proc,
spawn_argv if resolved_shell is None else command,
merged=False,
existing_stdout_buf=stdout_buf,
existing_stderr_buf=stderr_buf,
existing_pumps=pumps,
)
promoted = True
return build_exec_envelope(
command=command,
exit_code=None,
stdout_bytes=stdout_buf.tail(64 * 1024).data,
stderr_bytes=stderr_buf.tail(64 * 1024).data,
runtime_ms=int((time.monotonic() - start) * 1000),
pid=proc.pid,
timed_out=False,
max_output_kb=max_output_kb,
auto_backgrounded=True,
job_id=record.job_id,
auto_shell=auto_shell,
)
except JobLimitExceeded:
# Cap reached; treat as a hard timeout rather than spin.
pass
# Fall through to hard-kill path.
try:
proc.terminate()
proc.wait(timeout=2.0)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()
timed_out = True
# Inline path: drain pump threads.
for t in pumps:
t.join(timeout=2.0)
runtime_ms = int((time.monotonic() - start) * 1000)
exit_code = proc.returncode if not promoted else None
# The whole stream is in the ring; read from offset 0 to grab everything.
stdout_full = stdout_buf.read(0, stdout_buf.total_written).data
stderr_full = stderr_buf.read(0, stderr_buf.total_written).data
return build_exec_envelope(
command=command,
exit_code=exit_code,
stdout_bytes=stdout_full,
stderr_bytes=stderr_full,
runtime_ms=runtime_ms,
pid=proc.pid,
timed_out=timed_out,
signaled=(exit_code is not None and exit_code < 0),
max_output_kb=max_output_kb,
auto_shell=auto_shell,
)
def _err_envelope(command: str, message: str) -> dict:
"""Construct an envelope-shaped error reply for pre-spawn failures."""
return {
"exit_code": None,
"stdout": "",
"stderr": message,
"stdout_truncated_bytes": 0,
"stderr_truncated_bytes": 0,
"runtime_ms": 0,
"pid": None,
"output_handle": None,
"timed_out": False,
"semantic_status": "error",
"semantic_message": message,
"warning": None,
"auto_backgrounded": False,
"job_id": None,
"auto_shell": False,
"error": message,
}
__all__ = ["register_exec_tools"]
@@ -0,0 +1,6 @@
"""Background job management for terminal-tools."""
from terminal_tools.jobs.manager import JobManager, JobRecord, get_manager
from terminal_tools.jobs.tools import register_job_tools
__all__ = ["JobManager", "JobRecord", "get_manager", "register_job_tools"]
+424
View File
@@ -0,0 +1,424 @@
"""Background job manager.
Owns the long-lived ``Popen`` instances backing ``terminal_job_*`` and
``terminal_exec`` auto-promotion. Each job has up to two ring buffers
(stdout / stderr, or one merged) fed by background pump threads.
Design notes:
- We don't use asyncio here. FastMCP's tool handlers run in a worker
thread; subprocess + threads compose more naturally with that
model than asyncio Subprocess (which would need its own loop).
- ``terminal_exec`` "promotes" by adopting an already-running Popen
into the manager it doesn't re-spawn. The pump threads were
already filling buffers in the exec path.
- Hard concurrency cap (env: ``TERMINAL_TOOLS_MAX_JOBS``, default 32).
The cap is the only non-bypassable safety pin per the soft-
guardrails design.
- On server shutdown the lifespan hook calls ``shutdown_all()``
which TERMs every child, waits 2s, then KILLs. Eliminates
orphans.
"""
from __future__ import annotations
import os
import secrets
import signal
import subprocess
import threading
import time
from collections.abc import Sequence
from dataclasses import dataclass, field
from typing import Any
from terminal_tools.common.ring_buffer import RingBuffer
_MAX_JOBS_DEFAULT = 32
_DEFAULT_RING_BYTES = 4 * 1024 * 1024
_RECENT_EXIT_KEEP = 50 # exited jobs we still surface to ``terminal_job_manage(action="list")``
@dataclass(slots=True)
class JobRecord:
job_id: str
pid: int
name: str
command: str | list[str]
started_at: float
proc: subprocess.Popen[bytes]
stdout_buf: RingBuffer | None
stderr_buf: RingBuffer | None
merged: bool
pumps: list[threading.Thread] = field(default_factory=list)
exited_at: float | None = None
exit_code: int | None = None
signaled: bool = False
# Adopted=True when the job started life as a foreground terminal_exec
# and was promoted past the auto-background budget.
adopted: bool = False
@property
def status(self) -> str:
return "exited" if self.exited_at is not None else "running"
def runtime_ms(self) -> int:
end = self.exited_at if self.exited_at is not None else time.monotonic()
return int((end - self.started_at) * 1000)
def to_summary(self) -> dict[str, Any]:
return {
"job_id": self.job_id,
"pid": self.pid,
"name": self.name,
"command": self.command,
"started_at": self.started_at,
"status": self.status,
"exit_code": self.exit_code,
"runtime_ms": self.runtime_ms(),
"merged": self.merged,
"stdout_bytes": (self.stdout_buf.total_written if self.stdout_buf else 0),
"stderr_bytes": (self.stderr_buf.total_written if self.stderr_buf else 0),
"adopted": self.adopted,
}
class JobLimitExceeded(RuntimeError):
"""Raised when the per-server concurrent-job cap would be exceeded."""
class JobManager:
def __init__(self, max_jobs: int | None = None, ring_bytes: int = _DEFAULT_RING_BYTES):
self._max_jobs = max_jobs or int(os.getenv("TERMINAL_TOOLS_MAX_JOBS", str(_MAX_JOBS_DEFAULT)))
self._ring_bytes = ring_bytes
self._jobs: dict[str, JobRecord] = {}
# FIFO of recently-exited job_ids so list/inspect can still
# find them for a while after exit.
self._exited_order: list[str] = []
self._lock = threading.Lock()
# ── Public API ────────────────────────────────────────────────
def active_count(self) -> int:
with self._lock:
return sum(1 for j in self._jobs.values() if j.exited_at is None)
def start(
self,
command: str | Sequence[str],
*,
cwd: str | None = None,
env: dict[str, str] | None = None,
shell: bool | str = False,
merge_stderr: bool = False,
name: str | None = None,
preexec_fn=None,
) -> JobRecord:
"""Spawn a process and start pumping its output into ring buffers."""
if self.active_count() >= self._max_jobs:
raise JobLimitExceeded(
f"terminal-tools job cap reached ({self._max_jobs}). "
"Wait for a job to finish or raise TERMINAL_TOOLS_MAX_JOBS."
)
proc = self._spawn(command, cwd=cwd, env=env, shell=shell, merge_stderr=merge_stderr, preexec_fn=preexec_fn)
record = self._adopt(proc, command, name=name, merged=merge_stderr)
return record
def adopt_running(
self,
proc: subprocess.Popen[bytes],
command: str | Sequence[str],
*,
name: str | None = None,
merged: bool = False,
existing_stdout_buf: RingBuffer | None = None,
existing_stderr_buf: RingBuffer | None = None,
existing_pumps: list[threading.Thread] | None = None,
) -> JobRecord:
"""Adopt a Popen that's already running with pumps in flight.
Used by ``terminal_exec`` for auto-promotion: the foreground path
had already started pump threads filling its own ring buffers.
We hand the buffers + pumps over to the manager so the agent
can resume reading via ``terminal_job_logs``.
"""
if self.active_count() >= self._max_jobs:
# Mid-call cap exceeded — kill and report.
try:
proc.terminate()
except Exception:
pass
raise JobLimitExceeded(
f"terminal-tools job cap reached ({self._max_jobs}); foreground exec was killed during auto-promotion."
)
record = self._wrap(
proc,
command,
name=name,
merged=merged,
stdout_buf=existing_stdout_buf,
stderr_buf=existing_stderr_buf,
pumps=existing_pumps,
adopted=True,
)
with self._lock:
self._jobs[record.job_id] = record
# Watcher only — pumps already running.
threading.Thread(target=self._watch_for_exit, args=(record,), daemon=True).start()
return record
def get(self, job_id: str) -> JobRecord | None:
with self._lock:
return self._jobs.get(job_id)
def list(self) -> list[dict]:
with self._lock:
jobs = list(self._jobs.values())
# Recent first — running, then exited by exit time descending
jobs.sort(
key=lambda j: (j.exited_at is not None, -(j.exited_at or j.started_at)),
)
return [j.to_summary() for j in jobs]
def signal(self, job_id: str, signum: int) -> bool:
record = self.get(job_id)
if record is None or record.exited_at is not None:
return False
try:
record.proc.send_signal(signum)
return True
except (ProcessLookupError, OSError):
return False
def write_stdin(self, job_id: str, data: bytes, *, close_after: bool = False) -> int:
record = self.get(job_id)
if record is None or record.proc.stdin is None or record.exited_at is not None:
return 0
try:
n = record.proc.stdin.write(data)
record.proc.stdin.flush()
if close_after:
record.proc.stdin.close()
return int(n or len(data))
except (BrokenPipeError, OSError):
return 0
def close_stdin(self, job_id: str) -> bool:
record = self.get(job_id)
if record is None or record.proc.stdin is None:
return False
try:
record.proc.stdin.close()
return True
except OSError:
return False
def wait(self, job_id: str, timeout_sec: float | None = None) -> JobRecord | None:
"""Block until the job exits or ``timeout_sec`` elapses. Returns
the (possibly still-running) record so callers can read final state."""
record = self.get(job_id)
if record is None:
return None
try:
record.proc.wait(timeout=timeout_sec)
except subprocess.TimeoutExpired:
pass
return record
def shutdown_all(self, grace_sec: float = 2.0) -> None:
"""SIGTERM every running job, wait ``grace_sec``, then SIGKILL.
Called from the FastMCP lifespan hook. Idempotent."""
with self._lock:
running = [j for j in self._jobs.values() if j.exited_at is None]
for record in running:
try:
record.proc.terminate()
except Exception:
pass
deadline = time.monotonic() + grace_sec
while time.monotonic() < deadline and any(j.proc.poll() is None for j in running):
time.sleep(0.05)
for record in running:
if record.proc.poll() is None:
try:
record.proc.kill()
except Exception:
pass
# ── Internals ─────────────────────────────────────────────────
def _spawn(
self,
command: str | Sequence[str],
*,
cwd: str | None,
env: dict[str, str] | None,
shell: bool | str,
merge_stderr: bool,
preexec_fn,
) -> subprocess.Popen[bytes]:
# Resolve shell: a string shell is coerced to ``[<shell>, "-c", command]``,
# bool=True means /bin/bash with the same shape.
from terminal_tools.common.limits import _resolve_shell
resolved = _resolve_shell(shell)
if resolved is not None:
if isinstance(command, (list, tuple)):
command_str = " ".join(str(c) for c in command)
else:
command_str = str(command)
argv: list[str] = [resolved, "-c", command_str]
shell_arg = False
else:
argv = list(command) if isinstance(command, (list, tuple)) else command # type: ignore[assignment]
shell_arg = False
return subprocess.Popen(
argv,
cwd=cwd,
env=env,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=(subprocess.STDOUT if merge_stderr else subprocess.PIPE),
shell=shell_arg,
preexec_fn=preexec_fn,
close_fds=True,
bufsize=0,
)
def _adopt(
self,
proc: subprocess.Popen[bytes],
command: str | Sequence[str],
*,
name: str | None,
merged: bool,
) -> JobRecord:
stdout_buf = RingBuffer(self._ring_bytes)
stderr_buf = None if merged else RingBuffer(self._ring_bytes)
record = self._wrap(proc, command, name=name, merged=merged, stdout_buf=stdout_buf, stderr_buf=stderr_buf)
with self._lock:
self._jobs[record.job_id] = record
# Start pumps + watcher
if proc.stdout is not None:
t = threading.Thread(
target=_pump_stream,
args=(proc.stdout, stdout_buf),
daemon=True,
name=f"shell-job-stdout-{record.job_id}",
)
t.start()
record.pumps.append(t)
if not merged and proc.stderr is not None and stderr_buf is not None:
t = threading.Thread(
target=_pump_stream,
args=(proc.stderr, stderr_buf),
daemon=True,
name=f"shell-job-stderr-{record.job_id}",
)
t.start()
record.pumps.append(t)
threading.Thread(target=self._watch_for_exit, args=(record,), daemon=True).start()
return record
def _wrap(
self,
proc: subprocess.Popen[bytes],
command: str | Sequence[str],
*,
name: str | None,
merged: bool,
stdout_buf: RingBuffer | None = None,
stderr_buf: RingBuffer | None = None,
pumps: list[threading.Thread] | None = None,
adopted: bool = False,
) -> JobRecord:
return JobRecord(
job_id="job_" + secrets.token_hex(6),
pid=proc.pid,
name=name or _default_name(command),
command=list(command) if isinstance(command, (list, tuple)) else str(command),
started_at=time.monotonic(),
proc=proc,
stdout_buf=stdout_buf,
stderr_buf=stderr_buf,
merged=merged,
pumps=pumps or [],
adopted=adopted,
)
def _watch_for_exit(self, record: JobRecord) -> None:
rc = record.proc.wait()
# Drain any final bytes — pump threads exit on EOF, so this is
# mostly a join; we don't need to actively pull.
for pump in record.pumps:
pump.join(timeout=2.0)
if record.stdout_buf is not None:
record.stdout_buf.close()
if record.stderr_buf is not None:
record.stderr_buf.close()
with self._lock:
record.exited_at = time.monotonic()
record.exit_code = rc
record.signaled = rc < 0 or (rc != 0 and abs(rc) in _SIGNAL_NUMBERS)
self._exited_order.append(record.job_id)
self._evict_old_exits_locked()
def _evict_old_exits_locked(self) -> None:
while len(self._exited_order) > _RECENT_EXIT_KEEP:
old_id = self._exited_order.pop(0)
self._jobs.pop(old_id, None)
def _pump_stream(stream, ring: RingBuffer) -> None:
"""Read bytes from ``stream`` until EOF; push into ``ring``."""
try:
while True:
chunk = stream.read(4096)
if not chunk:
break
ring.write(chunk)
except (OSError, ValueError):
pass
finally:
try:
stream.close()
except Exception:
pass
ring.close()
def _default_name(command: str | Sequence[str]) -> str:
if isinstance(command, (list, tuple)):
return command[0] if command else "job"
text = str(command).strip().split()
return text[0] if text else "job"
_SIGNAL_NUMBERS = {
signal.SIGINT,
signal.SIGTERM,
signal.SIGKILL,
signal.SIGHUP,
signal.SIGUSR1,
signal.SIGUSR2,
}
# Module-level singleton.
_MANAGER: JobManager | None = None
_MANAGER_LOCK = threading.Lock()
def get_manager() -> JobManager:
global _MANAGER
if _MANAGER is None:
with _MANAGER_LOCK:
if _MANAGER is None:
_MANAGER = JobManager()
return _MANAGER
__all__ = ["JobManager", "JobRecord", "JobLimitExceeded", "get_manager"]
+221
View File
@@ -0,0 +1,221 @@
"""Job-control MCP tools: ``terminal_job_start``, ``terminal_job_logs``,
``terminal_job_manage``.
Three tools, not seven: ``_logs`` rolls in status + wait, ``_manage``
covers list + signals + stdin so the agent has fewer tool names to
remember. Tradeoff is multi-action ``_manage`` is slightly less
self-documenting; the foundational skill compensates.
"""
from __future__ import annotations
import signal
from typing import TYPE_CHECKING, Any
from terminal_tools.common.limits import coerce_limits, make_preexec_fn, sanitized_env
from terminal_tools.jobs.manager import JobLimitExceeded, get_manager
if TYPE_CHECKING:
from fastmcp import FastMCP
_SIGNAL_ALIASES = {
"signal_term": signal.SIGTERM,
"signal_kill": signal.SIGKILL,
"signal_int": signal.SIGINT,
"signal_hup": signal.SIGHUP,
"signal_usr1": signal.SIGUSR1,
"signal_usr2": signal.SIGUSR2,
}
def register_job_tools(mcp: FastMCP) -> None:
manager = get_manager()
@mcp.tool()
def terminal_job_start(
command: str,
cwd: str | None = None,
env: dict[str, str] | None = None,
merge_stderr: bool = False,
shell: bool = False,
name: str | None = None,
limits: dict[str, int] | None = None,
) -> dict:
"""Spawn a background process. Returns a job_id you poll with terminal_job_logs.
Use this when work might run >1 minute, when you want to keep doing
other things while it runs, or when you need to stream logs as they
arrive. Jobs die when the terminal-tools server restarts they are NOT
persistent across reboots.
Args:
command: Shell command to run. With shell=False, pass argv via the
command string and we'll split on whitespace; for complex
quoting use shell=True.
cwd: Working directory. Default: server's cwd.
env: Environment override. Merged into a sanitized base env (with
zsh dotfile vars stripped).
merge_stderr: When True, stderr is interleaved into stdout in a
single ring buffer. Convenient for log-shaped output where
ordering matters.
shell: True to invoke /bin/bash -c. Refuses zsh.
name: Optional human label surfaced in terminal_job_manage(action="list").
limits: Optional resource caps applied via setrlimit before exec.
Keys: cpu_sec, rss_mb, fsize_mb, nofile.
Returns: {job_id, pid, started_at}
"""
try:
# Build argv: for shell=False, naive split is fine for the common case;
# the foundational skill steers complex commands toward shell=True.
argv: list[str] | str
if shell:
argv = command
else:
argv = command.split()
if not argv:
return {"error": "command was empty"}
full_env = sanitized_env(env) if env is not None else None
preexec = make_preexec_fn(coerce_limits(limits))
record = manager.start(
argv,
cwd=cwd,
env=full_env,
shell=shell,
merge_stderr=merge_stderr,
name=name,
preexec_fn=preexec,
)
return {
"job_id": record.job_id,
"pid": record.pid,
"started_at": record.started_at,
"name": record.name,
"merged": merge_stderr,
}
except JobLimitExceeded as e:
return {"error": str(e)}
except Exception as e:
return {"error": f"{type(e).__name__}: {e}"}
@mcp.tool()
def terminal_job_logs(
job_id: str,
stream: str = "stdout",
since_offset: int = 0,
max_bytes: int = 64000,
wait_until_exit: bool = False,
wait_timeout_sec: float = 30.0,
tail: bool = False,
) -> dict:
"""Read job output at an offset. Combined read + status + wait primitive.
Track next_offset across calls to avoid replaying data. When
wait_until_exit=True, blocks server-side until the job exits or
wait_timeout_sec elapses, then returns logs and final status.
Args:
job_id: From terminal_job_start (or auto-promoted from terminal_exec).
stream: "stdout" | "stderr" | "merged". Use "merged" only when the
job was started with merge_stderr=True.
since_offset: Absolute byte offset to start reading from. Pass 0
on first call; pass next_offset on subsequent calls.
max_bytes: Max bytes of decoded output to return inline.
wait_until_exit: When True, blocks until the job exits before reading.
wait_timeout_sec: Cap on the wait. Returns whatever's accumulated.
tail: When True, ignores since_offset and returns the last max_bytes.
Returns: {data, offset, next_offset, status, exit_code, eof, truncated_bytes_dropped}
"""
record = manager.get(job_id)
if record is None:
return {"error": f"unknown job_id: {job_id}"}
if wait_until_exit:
manager.wait(job_id, timeout_sec=wait_timeout_sec)
record = manager.get(job_id) or record
if stream == "merged":
# Merged jobs always read from stdout_buf (which received both)
buf = record.stdout_buf
elif stream == "stderr":
buf = record.stderr_buf
else:
buf = record.stdout_buf
if buf is None:
return {
"error": f"stream={stream!r} not available (merge_stderr={record.merged})",
}
result = buf.tail(max_bytes) if tail else buf.read(since_offset, max_bytes)
return {
"data": result.data.decode("utf-8", errors="replace"),
"offset": result.offset,
"next_offset": result.next_offset,
"truncated_bytes_dropped": result.truncated_bytes_dropped,
"eof": buf.eof and result.next_offset >= buf.total_written,
"status": record.status,
"exit_code": record.exit_code,
"runtime_ms": record.runtime_ms(),
}
@mcp.tool()
def terminal_job_manage(
action: str,
job_id: str | None = None,
data: str | None = None,
) -> dict:
"""List jobs, send signals, or write to job stdin.
Single tool covering job-control side effects. The action argument
picks the operation:
- "list": list active + recently-exited jobs. job_id ignored.
- "signal_term" | "signal_kill" | "signal_int" | "signal_hup"
| "signal_usr1" | "signal_usr2": send the named signal. Requires job_id.
- "stdin": write `data` to the job's stdin. Requires job_id and data.
- "close_stdin": close the job's stdin pipe (e.g. to flush a tool that
reads until EOF). Requires job_id.
Signal escalation idiom (foundational skill teaches this): try
signal_int first (graceful), then signal_term after a few seconds, then
signal_kill as a last resort. The OS may take a moment to deliver.
Returns vary by action. List {jobs: [...]}. Signals {ok, signal}.
Stdin {bytes_written}.
"""
if action == "list":
return {"jobs": manager.list()}
if not job_id:
return {"error": f"action={action!r} requires job_id"}
if action in _SIGNAL_ALIASES:
ok = manager.signal(job_id, _SIGNAL_ALIASES[action])
return {"ok": ok, "signal": action.removeprefix("signal_").upper()}
if action == "stdin":
if data is None:
return {"error": "action=stdin requires data"}
n = manager.write_stdin(job_id, data.encode("utf-8"))
return {"bytes_written": n}
if action == "close_stdin":
return {"ok": manager.close_stdin(job_id)}
return {"error": f"unknown action: {action!r}"}
# Expose a non-tool reference so the lifespan hook can shutdown_all().
register_job_tools.manager = manager # type: ignore[attr-defined]
def get_registered_manager() -> Any:
"""Return the JobManager registered for the most recent FastMCP setup.
Used by the server lifespan to reap on shutdown."""
return get_manager()
__all__ = ["register_job_tools", "get_registered_manager"]
+41
View File
@@ -0,0 +1,41 @@
"""``terminal_output_get`` — retrieve truncated output via handle."""
from __future__ import annotations
from typing import TYPE_CHECKING
from terminal_tools.common.output_store import get_store
if TYPE_CHECKING:
from fastmcp import FastMCP
def register_output_tools(mcp: FastMCP) -> None:
@mcp.tool()
def terminal_output_get(
output_handle: str,
since_offset: int = 0,
max_kb: int = 64,
) -> dict:
"""Retrieve a slice of truncated output by handle.
When terminal_exec or terminal_job_logs returns more output than fits inline,
you'll see `output_handle: "out_<hex>"`. Pass it here with successive
offsets to paginate. The full output is preserved (combined stdout+stderr
with `--- stdout ---` / `--- stderr ---` separators) for 5 minutes.
Args:
output_handle: From a prior tool's envelope.
since_offset: Pass 0 first, then next_offset from the previous call.
max_kb: Max KB to return per call.
Returns: {data, offset, next_offset, eof, expired}
"""
return get_store().get(
output_handle,
since_offset=since_offset,
max_bytes=max_kb * 1024,
)
__all__ = ["register_output_tools"]
+5
View File
@@ -0,0 +1,5 @@
"""Persistent PTY-backed shell sessions."""
from terminal_tools.pty.tools import register_pty_tools
__all__ = ["register_pty_tools"]
+367
View File
@@ -0,0 +1,367 @@
"""Persistent PTY-backed bash sessions.
Built on stdlib ``pty.openpty()`` + ``os.fork()``. A reader thread
fills a ring buffer; the public API exposes three modes:
- ``run(command, timeout_sec)``: write the command, wait for the
unique prompt sentinel (or an ``expect=`` regex override), return
everything in between.
- ``send_raw(data)``: write bytes, no waiting. For REPLs / vim /
sudo-prompt-style flows.
- ``drain(timeout_sec)``: read whatever's currently buffered (after
a raw send).
A unique ``PS1`` sentinel is set at session start so ``run()`` can
unambiguously detect command completion. Per-session concurrency is
serialized: a busy session refuses concurrent ``run()`` calls.
POSIX-only: imports stdlib ``pty`` which doesn't exist on Windows.
"""
from __future__ import annotations
import errno
import fcntl
import os
import pty
import re
import select
import signal
import struct
import termios
import threading
import time
import uuid
from terminal_tools.common.limits import _resolve_shell, sanitized_env
from terminal_tools.common.ring_buffer import RingBuffer
_BUF_BYTES = 2 * 1024 * 1024
class SessionBusy(RuntimeError):
"""Raised when a concurrent run() attempts to use a session that's already executing."""
class PtySession:
"""One persistent bash session bound to a PTY.
Thread-safe for the disjoint-mode operations: ``run`` serializes via
``_busy_lock``, ``send_raw`` and ``drain`` use the ring's own lock.
"""
def __init__(
self,
*,
cwd: str | None = None,
env: dict[str, str] | None = None,
shell: bool | str = True,
cols: int = 120,
rows: int = 40,
idle_timeout_sec: int = 1800,
):
self.session_id = "pty_" + uuid.uuid4().hex[:10]
self.shell_path = _resolve_shell(shell) or "/bin/bash"
self._sentinel_token = uuid.uuid4().hex
self._sentinel = f"__TERMINALTOOLS_PROMPT_{self._sentinel_token}__"
self._sentinel_re = re.compile(re.escape(self._sentinel))
# Build env: zsh leakage stripped, prompt set to our sentinel.
merged_env = sanitized_env(env)
merged_env["PS1"] = f"{self._sentinel}\n$ "
merged_env["PS2"] = ""
merged_env["PROMPT_COMMAND"] = "" # don't let user dotfiles override PS1
merged_env["TERM"] = merged_env.get("TERM", "xterm-256color")
self._created_at = time.monotonic()
self._last_activity = self._created_at
self.idle_timeout_sec = idle_timeout_sec
self._pid, self._fd = pty.fork()
if self._pid == 0:
# Child — exec bash. --norc --noprofile keeps things
# predictable; the foundational skill teaches that the
# session runs vanilla bash, not the user's interactive
# shell.
try:
if cwd:
os.chdir(cwd)
argv = [self.shell_path, "--norc", "--noprofile", "-i"]
os.execve(self.shell_path, argv, merged_env)
except Exception as e: # pragma: no cover — child exec
os.write(2, f"terminal-tools pty: exec failed: {e}\n".encode())
os._exit(127)
# Parent
_set_pty_size(self._fd, rows, cols)
_set_nonblocking(self._fd)
self._buf = RingBuffer(_BUF_BYTES)
self._busy_lock = threading.Lock()
self._closed = threading.Event()
self._reader = threading.Thread(target=self._read_loop, daemon=True, name=f"pty-reader-{self.session_id}")
self._reader.start()
# Wait for the first prompt so the session is "ready" before we return.
# If bash --norc somehow doesn't print one, give up after 2 seconds —
# the session is still usable, it just won't have a prompt-aligned
# initial offset.
self._wait_for_sentinel(timeout_sec=2.0, since_offset=0)
# ── Public API ────────────────────────────────────────────────
@property
def pid(self) -> int:
return self._pid
def is_alive(self) -> bool:
if self._closed.is_set():
return False
try:
pid, _ = os.waitpid(self._pid, os.WNOHANG)
return pid == 0
except ChildProcessError:
return False
def run(self, command: str, *, expect: str | None = None, timeout_sec: float = 60.0) -> dict:
"""Send ``command`` + newline, wait for the prompt sentinel
(or ``expect`` regex override), return the slice in between."""
if not self._busy_lock.acquire(blocking=False):
raise SessionBusy(f"session {self.session_id} is busy")
try:
start_offset = self._buf.total_written
self._write(command.encode("utf-8") + b"\n")
self._last_activity = time.monotonic()
return self._wait_for_sentinel(
timeout_sec=timeout_sec,
since_offset=start_offset,
expect_pattern=expect,
)
finally:
self._busy_lock.release()
def send_raw(self, data: str, *, add_newline: bool = False) -> int:
"""Write bytes without waiting for prompt. For REPLs/vim/sudo prompts."""
payload = data.encode("utf-8")
if add_newline:
payload += b"\n"
n = self._write(payload)
self._last_activity = time.monotonic()
return n
def drain(self, *, timeout_sec: float = 2.0, max_bytes: int = 64000) -> dict:
"""Read whatever's currently buffered. Used after send_raw to capture
REPL / interactive-program output."""
deadline = time.monotonic() + timeout_sec
last_total = self._buf.total_written
# Wait for activity to settle for a brief window — gives the
# process a chance to finish its current line.
while time.monotonic() < deadline:
time.sleep(0.05)
current = self._buf.total_written
if current == last_total:
break
last_total = current
result = self._buf.tail(max_bytes)
return {
"output": result.data.decode("utf-8", errors="replace"),
"more": result.next_offset < self._buf.total_written,
"offset": result.offset,
"next_offset": result.next_offset,
"timed_out": False,
}
def close(self, *, force: bool = False, grace_sec: float = 1.0) -> dict:
"""Terminate the session. Returns final output."""
if self._closed.is_set():
return {"exit_code": None, "final_output": "", "already_closed": True}
# Flush an exit if not forcing.
if not force:
try:
self._write(b"exit\n")
except OSError:
pass
deadline = time.monotonic() + grace_sec
while time.monotonic() < deadline:
try:
pid, status = os.waitpid(self._pid, os.WNOHANG)
if pid != 0:
break
except ChildProcessError:
break
time.sleep(0.05)
try:
os.kill(self._pid, signal.SIGTERM)
except (ProcessLookupError, PermissionError):
pass
try:
os.waitpid(self._pid, os.WNOHANG)
except ChildProcessError:
pass
if self.is_alive():
try:
os.kill(self._pid, signal.SIGKILL)
except (ProcessLookupError, PermissionError):
pass
self._closed.set()
try:
os.close(self._fd)
except OSError:
pass
# Final output = whatever's still in the ring.
result = self._buf.tail(64 * 1024)
try:
_pid, status = os.waitpid(self._pid, os.WNOHANG)
exit_code = os.WEXITSTATUS(status) if os.WIFEXITED(status) else None
except ChildProcessError:
exit_code = None
return {
"exit_code": exit_code,
"final_output": result.data.decode("utf-8", errors="replace"),
"already_closed": False,
}
def to_summary(self) -> dict:
return {
"session_id": self.session_id,
"pid": self._pid,
"shell": self.shell_path,
"alive": self.is_alive(),
"idle_sec": int(time.monotonic() - self._last_activity),
"created_at": self._created_at,
}
# ── Internals ─────────────────────────────────────────────────
def _write(self, data: bytes) -> int:
if self._closed.is_set():
raise OSError("session is closed")
try:
return os.write(self._fd, data)
except OSError as e:
if e.errno == errno.EAGAIN:
# PTY is full — retry briefly.
deadline = time.monotonic() + 1.0
while time.monotonic() < deadline:
time.sleep(0.01)
try:
return os.write(self._fd, data)
except OSError:
continue
raise
def _read_loop(self) -> None:
while not self._closed.is_set():
try:
ready, _, _ = select.select([self._fd], [], [], 0.5)
except (OSError, ValueError):
break
if not ready:
# Periodically check for child death even when no data.
try:
pid, _ = os.waitpid(self._pid, os.WNOHANG)
if pid != 0:
break
except ChildProcessError:
break
continue
try:
chunk = os.read(self._fd, 4096)
except OSError:
break
if not chunk:
break
self._buf.write(chunk)
self._buf.close()
self._closed.set()
def _wait_for_sentinel(
self,
*,
timeout_sec: float,
since_offset: int,
expect_pattern: str | None = None,
) -> dict:
"""Poll the buffer until we see the sentinel (or expect pattern)."""
deadline = time.monotonic() + timeout_sec
pattern: re.Pattern[str] | None = None
if expect_pattern is not None:
pattern = re.compile(expect_pattern)
prompt_offset = since_offset
while time.monotonic() < deadline:
slice_ = self._buf.read(since_offset, self._buf.total_written - since_offset)
text = slice_.data.decode("utf-8", errors="replace")
if pattern is not None:
m = pattern.search(text)
if m is not None:
output = text[: m.start()]
prompt_offset = since_offset + len(text[: m.end()].encode("utf-8", errors="replace"))
return {
"output": output,
"prompt_after": True,
"matched_expect": True,
"next_offset": prompt_offset,
"timed_out": False,
}
else:
m = self._sentinel_re.search(text)
if m is not None:
output = text[: m.start()]
# Strip the trailing echoed command/newline above the sentinel
output = _strip_command_echo(output)
return {
"output": output,
"prompt_after": True,
"matched_expect": False,
"next_offset": since_offset + len(text[: m.end()].encode("utf-8", errors="replace")),
"timed_out": False,
}
time.sleep(0.05)
if self._closed.is_set():
break
# Timed out — return whatever we have.
slice_ = self._buf.read(since_offset, self._buf.total_written - since_offset)
return {
"output": slice_.data.decode("utf-8", errors="replace"),
"prompt_after": False,
"matched_expect": False,
"next_offset": slice_.next_offset,
"timed_out": True,
}
def _set_pty_size(fd: int, rows: int, cols: int) -> None:
try:
fcntl.ioctl(fd, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0))
except OSError:
pass
def _set_nonblocking(fd: int) -> None:
flags = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
def _strip_command_echo(text: str) -> str:
"""Drop the first line if it looks like the echoed command. PTYs in
canonical mode echo the user's input back; we want only the program's
output. Best-effort heuristic leaves the text alone if uncertain."""
if "\n" in text:
first, rest = text.split("\n", 1)
# Keep only the rest if the first line is short (likely the echo).
if len(first) < 4096:
return rest
return text
__all__ = ["PtySession", "SessionBusy"]
+243
View File
@@ -0,0 +1,243 @@
"""Three PTY tools: ``terminal_pty_open``, ``terminal_pty_run``, ``terminal_pty_close``.
Per-server hard cap on concurrent sessions (env: ``TERMINAL_TOOLS_MAX_PTY``,
default 8) prevents PTY exhaustion. Idle sessions older than
``idle_timeout_sec`` are reaped lazily on every ``_open`` so an
abandoned session can't leak a bash forever.
"""
from __future__ import annotations
import os
import sys
import threading
import time
from typing import TYPE_CHECKING
from terminal_tools.common.limits import ZshRefused
if TYPE_CHECKING:
from fastmcp import FastMCP
_MAX_PTY_DEFAULT = 8
class _PtyRegistry:
def __init__(self):
self._sessions: dict[str, PtySession] = {} # noqa: F821
self._lock = threading.Lock()
self._max = int(os.getenv("TERMINAL_TOOLS_MAX_PTY", str(_MAX_PTY_DEFAULT)))
def reap_idle(self) -> None:
"""Drop sessions whose idle time exceeded their idle_timeout_sec."""
with self._lock:
now = time.monotonic()
stale = [
sid
for sid, sess in self._sessions.items()
if not sess.is_alive() or (now - sess._last_activity) > sess.idle_timeout_sec
]
for sid in stale:
sess = self._sessions.pop(sid, None)
if sess is not None:
try:
sess.close(force=True, grace_sec=0.5)
except Exception:
pass
def count(self) -> int:
with self._lock:
return len(self._sessions)
def add(self, sess) -> None:
with self._lock:
if len(self._sessions) >= self._max:
# Caller should have reaped first; treat as cap.
raise RuntimeError(
f"terminal-tools PTY cap reached ({self._max}). "
"Close idle sessions or raise TERMINAL_TOOLS_MAX_PTY."
)
self._sessions[sess.session_id] = sess
def get(self, sid: str):
with self._lock:
return self._sessions.get(sid)
def remove(self, sid: str) -> None:
with self._lock:
self._sessions.pop(sid, None)
def list(self) -> list[dict]:
with self._lock:
return [s.to_summary() for s in self._sessions.values()]
def shutdown_all(self) -> None:
with self._lock:
sessions = list(self._sessions.values())
self._sessions.clear()
for sess in sessions:
try:
sess.close(force=True, grace_sec=0.5)
except Exception:
pass
_REGISTRY = _PtyRegistry()
def get_registry() -> _PtyRegistry:
return _REGISTRY
def register_pty_tools(mcp: FastMCP) -> None:
if sys.platform == "win32":
# Register stub tools that report unsupported; keeps the tool
# surface uniform across platforms even when PTY is unavailable.
@mcp.tool()
def terminal_pty_open(*args, **kwargs) -> dict:
"""Persistent PTY-backed bash session. POSIX-only.
Windows is not supported in v1 use terminal_exec / terminal_job_*
for non-interactive work. The PTY tools require stdlib pty,
which exists only on Linux + macOS.
"""
return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"}
@mcp.tool()
def terminal_pty_run(*args, **kwargs) -> dict: # noqa: D401
"""Persistent PTY-backed bash session. POSIX-only."""
return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"}
@mcp.tool()
def terminal_pty_close(*args, **kwargs) -> dict: # noqa: D401
"""Persistent PTY-backed bash session. POSIX-only."""
return {"error": "terminal_pty_* tools are POSIX-only; not supported on Windows"}
return
from terminal_tools.pty.session import PtySession, SessionBusy
@mcp.tool()
def terminal_pty_open(
cwd: str | None = None,
env: dict[str, str] | None = None,
cols: int = 120,
rows: int = 40,
idle_timeout_sec: int = 1800,
) -> dict:
"""Open a persistent /bin/bash session in a PTY.
Use a session when you need state across calls building env vars,
navigating with cd, driving REPLs, or responding to interactive
prompts (sudo, ssh, mysql). For one-shot work, use terminal_exec
instead.
The session runs vanilla bash (--norc --noprofile) so dotfiles
don't surprise you. A unique PS1 sentinel is set so terminal_pty_run
can unambiguously detect command completion. macOS users: this
is /bin/bash, not zsh, by deliberate policy explicit
shell="/bin/zsh" overrides are rejected.
Args:
cwd: Initial working directory.
env: Environment override (zsh dotfile vars are stripped).
cols, rows: Terminal size.
idle_timeout_sec: Drop the session after this many seconds idle.
Returns: {session_id, pid, shell}
"""
_REGISTRY.reap_idle()
try:
sess = PtySession(cwd=cwd, env=env, cols=cols, rows=rows, idle_timeout_sec=idle_timeout_sec)
except ZshRefused as e:
return {"error": str(e)}
except Exception as e:
return {"error": f"failed to open session: {type(e).__name__}: {e}"}
try:
_REGISTRY.add(sess)
except RuntimeError as e:
sess.close(force=True, grace_sec=0.2)
return {"error": str(e)}
return {
"session_id": sess.session_id,
"pid": sess.pid,
"shell": sess.shell_path,
}
@mcp.tool()
def terminal_pty_run(
session_id: str,
command: str | None = None,
expect: str | None = None,
raw_send: bool = False,
read_only: bool = False,
timeout_sec: float = 60.0,
) -> dict:
"""Run a command in a session, send raw input, or drain output.
Three modes:
- Default: pass a command. The session sends it, waits for the
unique prompt sentinel (or `expect` regex if provided), and
returns the output between submission and prompt.
- raw_send=True: pass a command. The text is written without
waiting for prompt. Use for REPL input ("p('hi')\\n"), for
password prompts (sudo), or for vim keystrokes.
- read_only=True: drains whatever's currently buffered.
Typically follows raw_send.
Args:
session_id: From terminal_pty_open.
command: The text to send. None when read_only=True.
expect: Regex to wait for INSTEAD of the default prompt sentinel.
Useful when the command launches a REPL with its own prompt.
raw_send: Don't wait for prompt; just write.
read_only: Don't send anything; drain the buffer.
timeout_sec: Max wait. On timeout, returns whatever's buffered
with timed_out=True (the command may still be running
check with another _run call).
Returns: {output, prompt_after, timed_out, ...}
"""
sess = _REGISTRY.get(session_id)
if sess is None:
return {"error": f"unknown session_id: {session_id}"}
if not sess.is_alive():
_REGISTRY.remove(session_id)
return {"error": f"session {session_id} has exited"}
if read_only:
return sess.drain(timeout_sec=timeout_sec)
if command is None:
return {"error": "command is required unless read_only=True"}
if raw_send:
n = sess.send_raw(command, add_newline=False)
return {"bytes_sent": n}
try:
return sess.run(command, expect=expect, timeout_sec=timeout_sec)
except SessionBusy as e:
return {"error": str(e)}
@mcp.tool()
def terminal_pty_close(session_id: str, force: bool = False) -> dict:
"""Terminate a PTY session. Always do this when you're done — leaked
sessions count against the per-server PTY cap.
Args:
session_id: From terminal_pty_open.
force: Skip the graceful "exit\\n" attempt and SIGTERM/SIGKILL.
Returns: {exit_code, final_output, already_closed}
"""
sess = _REGISTRY.get(session_id)
if sess is None:
return {"error": f"unknown session_id: {session_id}"}
result = sess.close(force=force)
_REGISTRY.remove(session_id)
return result
__all__ = ["register_pty_tools", "get_registry"]
@@ -0,0 +1,5 @@
"""Filesystem search tools (rg + find)."""
from terminal_tools.search.tools import register_search_tools
__all__ = ["register_search_tools"]
+204
View File
@@ -0,0 +1,204 @@
"""``terminal_rg`` and ``terminal_find`` — structured wrappers over ripgrep / find.
Distinct from ``files-tools.search_files`` (project-relative,
code-editor-tuned) these accept arbitrary paths and surface the
underlying tool's full feature set. The foundational skill steers
agents to ``files-tools`` for in-project work and these tools for
``/var/log``, ``/etc``, archive contents, etc.
"""
from __future__ import annotations
import shutil
import subprocess
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from fastmcp import FastMCP
_DEFAULT_TIMEOUT_SEC = 30
_MAX_OUTPUT_BYTES = 256 * 1024
def register_search_tools(mcp: FastMCP) -> None:
@mcp.tool()
def terminal_rg(
pattern: str,
path: str = ".",
glob: str | None = None,
type_filter: str | None = None,
ignore_case: bool = False,
context: int = 0,
max_count: int | None = None,
max_depth: int | None = None,
hidden: bool = False,
no_ignore: bool = False,
extra_args: list[str] | None = None,
) -> dict:
"""Run ripgrep on `path` for `pattern`.
For project-scoped code search use files-tools.search_files instead;
this tool is for raw paths (system configs, /var/log, archive contents)
and exposes the full rg flag surface.
Args:
pattern: Regex pattern.
path: Directory or file to search. Default: current dir.
glob: Filename glob (e.g. "*.py").
type_filter: rg filetype shortcut (e.g. "py", "rust", "md").
ignore_case: Case-insensitive search.
context: Lines of context above and below each match.
max_count: Stop after N matches per file.
max_depth: Limit directory recursion depth.
hidden: Include hidden files (rg ignores them by default).
no_ignore: Don't respect .gitignore.
extra_args: Raw flags to append (use sparingly most needs are covered above).
Returns: {matches: [...], total, truncated, command}
"""
if not shutil.which("rg"):
return {"error": "ripgrep (rg) is not installed on this host"}
argv = ["rg", "--json", "--no-heading"]
if ignore_case:
argv.append("-i")
if context > 0:
argv.extend(["-C", str(context)])
if max_count is not None:
argv.extend(["-m", str(max_count)])
if max_depth is not None:
argv.extend(["--max-depth", str(max_depth)])
if hidden:
argv.append("--hidden")
if no_ignore:
argv.append("--no-ignore")
if type_filter:
argv.extend(["-t", type_filter])
if glob:
argv.extend(["-g", glob])
if extra_args:
argv.extend(str(a) for a in extra_args)
argv.extend(["--", pattern, path])
try:
proc = subprocess.run(
argv,
capture_output=True,
timeout=_DEFAULT_TIMEOUT_SEC,
check=False,
)
except subprocess.TimeoutExpired:
return {"error": "ripgrep timed out", "command": argv}
except FileNotFoundError:
return {"error": "ripgrep (rg) is not installed on this host"}
# Parse JSON-line output: only "match" events are interesting for the
# default surface. Errors land in stderr.
import json
matches: list[dict] = []
truncated = False
bytes_seen = 0
for line in proc.stdout.splitlines():
if not line:
continue
bytes_seen += len(line)
if bytes_seen > _MAX_OUTPUT_BYTES:
truncated = True
break
try:
evt = json.loads(line)
except json.JSONDecodeError:
continue
if evt.get("type") != "match":
continue
data = evt.get("data", {})
path_data = (data.get("path") or {}).get("text") or ""
line_no = data.get("line_number")
text = (data.get("lines") or {}).get("text") or ""
matches.append({"path": path_data, "line": line_no, "text": text.rstrip("\n")})
return {
"matches": matches,
"total": len(matches),
"truncated": truncated,
"exit_code": proc.returncode,
"stderr": proc.stderr.decode("utf-8", errors="replace")[-2000:] if proc.stderr else "",
}
@mcp.tool()
def terminal_find(
path: str,
name: str | None = None,
iname: str | None = None,
type_filter: str | None = None,
mtime_days: int | None = None,
size_kb_min: int | None = None,
size_kb_max: int | None = None,
max_depth: int | None = None,
max_results: int = 1000,
) -> dict:
"""Run `find` with structured predicates.
For tree views or stat-like info on a single path, use terminal_exec
("ls -la", "tree -L 2", "stat foo"). This tool is for predicate-driven
searches (find me .log files modified in the last 7 days bigger than 1MB).
Args:
path: Directory to search under.
name: Glob match (case-sensitive). e.g. "*.log".
iname: Glob match (case-insensitive).
type_filter: "f" file, "d" dir, "l" symlink.
mtime_days: Modified within the last N days (negative or 0 means
exact-day; we use -N for "within").
size_kb_min, size_kb_max: Size bounds in KB.
max_depth: Limit directory recursion.
max_results: Cap on returned paths.
Returns: {paths: [...], count, truncated, command}
"""
if not shutil.which("find"):
return {"error": "find is not installed on this host"}
argv = ["find", path]
if max_depth is not None:
argv.extend(["-maxdepth", str(max_depth)])
if type_filter in {"f", "d", "l"}:
argv.extend(["-type", type_filter])
if name:
argv.extend(["-name", name])
if iname:
argv.extend(["-iname", iname])
if mtime_days is not None:
argv.extend(["-mtime", f"-{abs(mtime_days)}"])
if size_kb_min is not None:
argv.extend(["-size", f"+{int(size_kb_min)}k"])
if size_kb_max is not None:
argv.extend(["-size", f"-{int(size_kb_max)}k"])
try:
proc = subprocess.run(
argv,
capture_output=True,
timeout=_DEFAULT_TIMEOUT_SEC,
check=False,
)
except subprocess.TimeoutExpired:
return {"error": "find timed out", "command": argv}
all_paths = proc.stdout.decode("utf-8", errors="replace").splitlines()
truncated = len(all_paths) > max_results
paths = all_paths[:max_results]
return {
"paths": paths,
"count": len(paths),
"truncated": truncated,
"total_seen": len(all_paths),
"exit_code": proc.returncode,
"stderr": proc.stderr.decode("utf-8", errors="replace")[-2000:] if proc.stderr else "",
"command": argv,
}
__all__ = ["register_search_tools"]
+145
View File
@@ -0,0 +1,145 @@
"""terminal-tools FastMCP server — entry module.
Run via:
uv run python -m terminal_tools.server --stdio
uv run python terminal_tools_server.py --stdio (preferred, see _DEFAULT_LOCAL_SERVERS)
"""
from __future__ import annotations
import argparse
import asyncio
import atexit
import logging
import os
import sys
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
logger = logging.getLogger(__name__)
def setup_logger() -> None:
if not logger.handlers:
stream = sys.stderr if "--stdio" in sys.argv else sys.stdout
handler = logging.StreamHandler(stream)
handler.setFormatter(logging.Formatter("[terminal-tools] %(message)s"))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
setup_logger()
# Suppress FastMCP banner in STDIO mode (mirrors gcu/server.py).
if "--stdio" in sys.argv:
import rich.console
_orig_console_init = rich.console.Console.__init__
def _patched_console_init(self, *args, **kwargs):
kwargs["file"] = sys.stderr
_orig_console_init(self, *args, **kwargs)
rich.console.Console.__init__ = _patched_console_init
from fastmcp import FastMCP # noqa: E402
from terminal_tools import register_terminal_tools # noqa: E402
from terminal_tools.jobs.manager import get_manager # noqa: E402
from terminal_tools.pty.tools import get_registry as get_pty_registry # noqa: E402
@asynccontextmanager
async def _lifespan(_server: FastMCP) -> AsyncIterator[dict]:
"""Reap children on shutdown so we don't orphan jobs/PTYs.
Mirrors the gcu-tools lifespan pattern. Runs in the FastMCP event
loop on graceful shutdown; the atexit hook below catches abrupt
exits (SIGTERM, etc.) where lifespan teardown may not complete.
"""
parent_pid_env = os.getenv("HIVE_DESKTOP_PARENT_PID")
if parent_pid_env:
try:
parent_pid = int(parent_pid_env)
asyncio.create_task(_parent_watchdog(parent_pid))
logger.info("Parent watchdog armed for PID %d", parent_pid)
except ValueError:
logger.warning("Invalid HIVE_DESKTOP_PARENT_PID=%r", parent_pid_env)
yield {}
logger.info("Shutting down — reaping jobs and PTY sessions...")
try:
get_manager().shutdown_all(grace_sec=2.0)
except Exception as e:
logger.warning("JobManager shutdown error: %s", e)
try:
get_pty_registry().shutdown_all()
except Exception as e:
logger.warning("PTY registry shutdown error: %s", e)
def _is_alive(pid: int) -> bool:
try:
os.kill(pid, 0)
return True
except (ProcessLookupError, PermissionError):
return False
async def _parent_watchdog(parent_pid: int) -> None:
"""Self-destruct when the desktop parent dies."""
while True:
await asyncio.sleep(2.0)
if not _is_alive(parent_pid):
logger.warning("Parent PID %d gone — terminal-tools exiting", parent_pid)
try:
get_manager().shutdown_all(grace_sec=1.0)
except Exception:
pass
try:
get_pty_registry().shutdown_all()
except Exception:
pass
os._exit(0)
def _atexit_reap() -> None:
"""Last-ditch reaping if lifespan didn't run."""
try:
get_manager().shutdown_all(grace_sec=1.0)
except Exception:
pass
try:
get_pty_registry().shutdown_all()
except Exception:
pass
atexit.register(_atexit_reap)
mcp = FastMCP("terminal-tools", lifespan=_lifespan)
def main() -> None:
parser = argparse.ArgumentParser(description="terminal-tools MCP server")
parser.add_argument("--port", type=int, default=int(os.getenv("TERMINAL_TOOLS_PORT", "4004")))
parser.add_argument("--host", default="0.0.0.0")
parser.add_argument("--stdio", action="store_true")
args = parser.parse_args()
tools = register_terminal_tools(mcp)
if not args.stdio:
logger.info("Registered %d terminal-tools: %s", len(tools), tools)
if args.stdio:
mcp.run(transport="stdio")
else:
logger.info("Starting terminal-tools on %s:%d", args.host, args.port)
asyncio.run(mcp.run_async(transport="http", host=args.host, port=args.port))
if __name__ == "__main__":
main()
+19
View File
@@ -0,0 +1,19 @@
#!/usr/bin/env python3
"""terminal-tools MCP server entry point.
Wired into _DEFAULT_LOCAL_SERVERS in core/framework/loader/mcp_registry.py
so that running ``uv run python terminal_tools_server.py --stdio`` from this
directory starts the server. The cwd of ``tools/`` puts ``src/terminal_tools``
on the import path via uv's workspace setup.
Usage:
uv run python terminal_tools_server.py --stdio # for agent integration
uv run python terminal_tools_server.py --port 4004 # HTTP for inspection
"""
from __future__ import annotations
from terminal_tools.server import main
if __name__ == "__main__":
main()
+225
View File
@@ -0,0 +1,225 @@
"""terminal_exec — envelope shape, semantic exits, warnings, auto-promotion."""
from __future__ import annotations
import time
import pytest
@pytest.fixture
def exec_tool(mcp):
from terminal_tools.exec import register_exec_tools
register_exec_tools(mcp)
return mcp._tool_manager._tools["terminal_exec"].fn
def test_envelope_shape_simple_echo(exec_tool):
result = exec_tool(command="echo hello world")
assert result["exit_code"] == 0
assert result["stdout"].strip() == "hello world"
assert result["stderr"] == ""
assert result["semantic_status"] == "ok"
assert result["timed_out"] is False
assert result["auto_backgrounded"] is False
assert result["job_id"] is None
assert result["warning"] is None
assert result["pid"] is not None
def test_grep_no_matches_is_ok_not_error(exec_tool, tmp_path):
f = tmp_path / "haystack.txt"
f.write_text("apples\nbananas\n")
result = exec_tool(command=f"grep zzz {f}")
assert result["exit_code"] == 1
assert result["semantic_status"] == "ok"
assert "No matches found" in (result["semantic_message"] or "")
def test_diff_files_differ_is_ok_not_error(exec_tool, tmp_path):
a = tmp_path / "a.txt"
a.write_text("hi\n")
b = tmp_path / "b.txt"
b.write_text("bye\n")
result = exec_tool(command=f"diff {a} {b}")
assert result["exit_code"] == 1
assert result["semantic_status"] == "ok"
assert "differ" in (result["semantic_message"] or "")
def test_destructive_warning_for_rm_rf(exec_tool, tmp_path):
# Don't actually delete anything — point at a missing path so the
# command exits non-zero but the warning still fires from regex.
target = tmp_path / "definitely_missing_dir"
result = exec_tool(command=f"rm -rf {target}")
assert result["warning"] is not None
assert "force-remove" in result["warning"] or "recursively" in result["warning"]
def test_destructive_warning_drop_table(exec_tool):
# Run `true` so the test doesn't depend on echo behavior; pass the
# destructive text via stdin so the regex still matches the command.
result = exec_tool(command="echo 'DROP TABLE users;'", shell=True)
assert result["warning"] is not None
assert "drop" in result["warning"].lower() or "truncate" in result["warning"].lower()
def test_command_not_found(exec_tool):
result = exec_tool(command="this_command_does_not_exist_xyzzy")
assert result["exit_code"] is None or result["exit_code"] != 0
# Either pre-spawn FileNotFoundError or shell exit 127 — both are fine
# as long as semantic_status reflects an error or the error field is set.
assert (
result["semantic_status"] == "error"
or result.get("error")
or "not found" in (result["semantic_message"] or "").lower()
)
def test_zsh_refused(exec_tool):
result = exec_tool(command="echo hi", shell=True)
# shell=True (the bool) → /bin/bash → succeeds
assert result["exit_code"] == 0
def test_zsh_string_refused():
"""Calling _resolve_shell with zsh path raises ZshRefused."""
from terminal_tools.common.limits import ZshRefused, _resolve_shell
with pytest.raises(ZshRefused):
_resolve_shell("/bin/zsh")
with pytest.raises(ZshRefused):
_resolve_shell("/usr/local/bin/zsh")
def test_truncation_via_handle(exec_tool):
"""Generate >256 KB of output, verify output_handle is returned."""
# Generate ~300 KB of output
result = exec_tool(
command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'",
shell=True,
max_output_kb=128, # smaller cap to force truncation
)
assert result["exit_code"] == 0
assert result["stdout_truncated_bytes"] > 0
assert result["output_handle"] is not None
assert result["output_handle"].startswith("out_")
def test_output_handle_round_trip(exec_tool, mcp):
from terminal_tools.output import register_output_tools
register_output_tools(mcp)
output_get = mcp._tool_manager._tools["terminal_output_get"].fn
result = exec_tool(
command="python3 -c 'import sys; sys.stdout.write(\"x\" * 300_000)'",
shell=True,
max_output_kb=64,
)
handle = result["output_handle"]
assert handle is not None
# First page
page = output_get(output_handle=handle, since_offset=0, max_kb=64)
assert page["expired"] is False
assert len(page["data"]) > 0
assert page["next_offset"] > 0
# Bogus handle
bogus = output_get(output_handle="out_doesnotexist", since_offset=0, max_kb=64)
assert bogus["expired"] is True
def test_timed_out_marker(exec_tool):
result = exec_tool(command="sleep 5", timeout_sec=1, auto_background_after_sec=0)
assert result["timed_out"] is True
def test_auto_shell_for_pipelines(exec_tool):
"""Regression for the queen_technology session 152038 silent-mangling bug.
The agent passed shell=False (default) with a pipeline command. The naive
command.split() spawned the first program with the rest as junk argv
`ps aux | sort ...` produced "ps: error: garbage option", and `echo "..."
&& ps ...` produced fake-success output where echo printed the entire
rest of the command verbatim. Fix: detect shell metacharacters and
transparently route through bash -c.
"""
# Case 1: pipeline. Was: ps spawned with "aux | sort ..." as argv → garbage option.
result = exec_tool(command="ps aux | head -1")
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "USER" in result["stdout"] or "PID" in result["stdout"]
assert "garbage option" not in (result.get("stderr") or "")
# Case 2: && + pipe + awk. Was: echo printed the whole rest of the line.
result = exec_tool(
command="echo HEADER && echo line1 | head -1",
)
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "HEADER" in result["stdout"]
assert "line1" in result["stdout"]
# The literal "&&" must NOT appear in stdout — that would mean echo
# captured it as an argument again.
assert "&&" not in result["stdout"]
# Case 3: redirect + glob. Was: '*' passed as a literal arg to ls.
import os
import tempfile
with tempfile.TemporaryDirectory() as tmp:
with open(os.path.join(tmp, "a.txt"), "w") as f:
f.write("x")
with open(os.path.join(tmp, "b.txt"), "w") as f:
f.write("y")
result = exec_tool(command=f"ls {tmp}/*.txt")
assert result["exit_code"] == 0, result
assert result["auto_shell"] is True
assert "a.txt" in result["stdout"]
assert "b.txt" in result["stdout"]
def test_no_auto_shell_for_argv_commands(exec_tool):
"""Plain argv commands (no metacharacters) should NOT auto-route to bash.
Direct exec is faster and avoids quoting hazards."""
result = exec_tool(command="echo hello")
assert result["exit_code"] == 0
assert result["auto_shell"] is False
assert result["stdout"].strip() == "hello"
def test_explicit_shell_true_unchanged(exec_tool):
"""When the agent explicitly opts in via shell=True, auto_shell stays
False auto-detection only fires for shell=False."""
result = exec_tool(command="echo a | tr a b", shell=True)
assert result["exit_code"] == 0
assert result["auto_shell"] is False
assert result["stdout"].strip() == "b"
def test_auto_promotion(exec_tool, mcp):
"""Past auto_background_after_sec, the call returns auto_backgrounded=True."""
from terminal_tools.jobs.tools import register_job_tools
register_job_tools(mcp)
# Use a 1s budget so the test runs quickly.
start = time.monotonic()
result = exec_tool(
command="sleep 5",
auto_background_after_sec=1,
timeout_sec=10,
)
elapsed = time.monotonic() - start
assert result["auto_backgrounded"] is True, result
assert result["job_id"] is not None
assert result["exit_code"] is None
assert elapsed < 3, "auto-promotion should return quickly past the budget"
# Take over via terminal_job_logs
job_logs = mcp._tool_manager._tools["terminal_job_logs"].fn
log_result = job_logs(job_id=result["job_id"], wait_until_exit=True, wait_timeout_sec=10)
assert log_result["status"] == "exited"
assert log_result["exit_code"] == 0
+97
View File
@@ -0,0 +1,97 @@
"""Job lifecycle: ring buffer offsets, signals, stdin."""
from __future__ import annotations
import time
import pytest
@pytest.fixture
def job_tools(mcp):
from terminal_tools.jobs.tools import register_job_tools
register_job_tools(mcp)
return {
"start": mcp._tool_manager._tools["terminal_job_start"].fn,
"logs": mcp._tool_manager._tools["terminal_job_logs"].fn,
"manage": mcp._tool_manager._tools["terminal_job_manage"].fn,
}
def test_start_logs_wait_basic(job_tools):
started = job_tools["start"](command="echo first; echo second; echo third", shell=True)
assert "job_id" in started
job_id = started["job_id"]
# Wait for completion via logs
result = job_tools["logs"](job_id=job_id, wait_until_exit=True, wait_timeout_sec=5)
assert result["status"] == "exited"
assert result["exit_code"] == 0
assert "first" in result["data"] and "third" in result["data"]
def test_offset_bookkeeping(job_tools):
started = job_tools["start"](
command="for i in 1 2 3 4 5; do echo line$i; sleep 0.1; done",
shell=True,
)
job_id = started["job_id"]
# Read a couple times with offset bookkeeping
seen = ""
offset = 0
for _ in range(20):
result = job_tools["logs"](job_id=job_id, since_offset=offset, max_bytes=4096)
seen += result["data"]
offset = result["next_offset"]
if result["status"] == "exited":
# Drain anything left
tail = job_tools["logs"](job_id=job_id, since_offset=offset, max_bytes=4096)
seen += tail["data"]
break
time.sleep(0.1)
for n in range(1, 6):
assert f"line{n}" in seen, f"missing line{n} from {seen!r}"
def test_merge_stderr(job_tools):
started = job_tools["start"](
command="echo stdout1; echo stderr1 1>&2; echo stdout2",
shell=True,
merge_stderr=True,
)
job_id = started["job_id"]
result = job_tools["logs"](
job_id=job_id, stream="merged", wait_until_exit=True, wait_timeout_sec=5
)
assert "stdout1" in result["data"]
assert "stderr1" in result["data"]
def test_signal_term(job_tools):
started = job_tools["start"](command="sleep 30")
job_id = started["job_id"]
# Give it a moment to actually start
time.sleep(0.2)
result = job_tools["manage"](action="signal_term", job_id=job_id)
assert result["ok"] is True
final = job_tools["logs"](job_id=job_id, wait_until_exit=True, wait_timeout_sec=3)
assert final["status"] == "exited"
# On SIGTERM, exit_code is -15 (subprocess convention)
assert final["exit_code"] == -15
def test_list_action(job_tools):
started = job_tools["start"](command="sleep 1")
listing = job_tools["manage"](action="list")
assert any(j["job_id"] == started["job_id"] for j in listing["jobs"])
def test_unknown_job_id(job_tools):
result = job_tools["logs"](job_id="job_doesnotexist", wait_until_exit=False)
assert "error" in result
+109
View File
@@ -0,0 +1,109 @@
"""PTY sessions: bash-on-macOS, prompt sentinel, raw I/O, zsh refusal."""
from __future__ import annotations
import sys
import time
import pytest
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="PTY is POSIX-only")
@pytest.fixture
def pty_tools(mcp):
from terminal_tools.pty.tools import register_pty_tools
register_pty_tools(mcp)
return {
"open": mcp._tool_manager._tools["terminal_pty_open"].fn,
"run": mcp._tool_manager._tools["terminal_pty_run"].fn,
"close": mcp._tool_manager._tools["terminal_pty_close"].fn,
}
def test_open_close_basic(pty_tools):
opened = pty_tools["open"]()
assert "session_id" in opened
assert opened["shell"] == "/bin/bash", "terminal-tools must default to bash, not zsh"
closed = pty_tools["close"](session_id=opened["session_id"])
assert closed.get("already_closed") in (False, None)
def test_bash_on_darwin():
"""Even on macOS, the resolved shell is /bin/bash, not /bin/zsh."""
from terminal_tools.common.limits import _resolve_shell
assert _resolve_shell(True) == "/bin/bash"
def test_pty_run_command(pty_tools):
opened = pty_tools["open"]()
sid = opened["session_id"]
try:
result = pty_tools["run"](session_id=sid, command="echo hello-pty", timeout_sec=5)
assert result.get("timed_out") is False
assert "hello-pty" in result["output"]
assert result["prompt_after"] is True
finally:
pty_tools["close"](session_id=sid)
def test_pty_state_persists(pty_tools):
opened = pty_tools["open"]()
sid = opened["session_id"]
try:
pty_tools["run"](session_id=sid, command="MY_VAR=42")
result = pty_tools["run"](session_id=sid, command="echo $MY_VAR", timeout_sec=3)
assert "42" in result["output"]
finally:
pty_tools["close"](session_id=sid)
def test_raw_send_then_read_only(pty_tools):
"""Drive the python REPL via raw_send + read_only."""
opened = pty_tools["open"]()
sid = opened["session_id"]
try:
# Launch python with our own prompt regex
pty_tools["run"](
session_id=sid,
command="python3 -q",
expect=r">>>\s*$",
timeout_sec=10,
)
pty_tools["run"](session_id=sid, command="x = 7\n", raw_send=True)
pty_tools["run"](session_id=sid, command="print(x*x)\n", raw_send=True)
time.sleep(0.5)
drained = pty_tools["run"](session_id=sid, read_only=True, timeout_sec=2)
assert "49" in drained["output"]
finally:
pty_tools["close"](session_id=sid, force=True)
def test_session_busy(pty_tools):
"""Concurrent run() calls on the same session return 'session busy'."""
import threading
opened = pty_tools["open"]()
sid = opened["session_id"]
try:
results = []
def run_long():
results.append(pty_tools["run"](session_id=sid, command="sleep 2", timeout_sec=5))
t = threading.Thread(target=run_long)
t.start()
time.sleep(0.2)
# Concurrent call should fail
result = pty_tools["run"](session_id=sid, command="echo nope", timeout_sec=1)
assert "error" in result and "busy" in result["error"].lower()
t.join(timeout=10)
finally:
pty_tools["close"](session_id=sid, force=True)
def test_unknown_session(pty_tools):
result = pty_tools["run"](session_id="pty_doesnotexist", command="ls")
assert "error" in result
+58
View File
@@ -0,0 +1,58 @@
"""terminal_rg + terminal_find — basic functionality, structured output."""
from __future__ import annotations
import shutil
import pytest
@pytest.fixture
def search_tools(mcp):
from terminal_tools.search.tools import register_search_tools
register_search_tools(mcp)
return {
"rg": mcp._tool_manager._tools["terminal_rg"].fn,
"find": mcp._tool_manager._tools["terminal_find"].fn,
}
@pytest.mark.skipif(not shutil.which("rg"), reason="ripgrep not installed")
def test_rg_finds_pattern(search_tools, tmp_path):
(tmp_path / "a.txt").write_text("hello\nworld\nfoo\n")
(tmp_path / "b.txt").write_text("bar\nworld\n")
result = search_tools["rg"](pattern="world", path=str(tmp_path))
assert result["total"] >= 2
paths = {m["path"] for m in result["matches"]}
assert any("a.txt" in p for p in paths)
@pytest.mark.skipif(not shutil.which("rg"), reason="ripgrep not installed")
def test_rg_no_matches(search_tools, tmp_path):
(tmp_path / "a.txt").write_text("hello\n")
result = search_tools["rg"](pattern="zzz_no_match_zzz", path=str(tmp_path))
assert result["total"] == 0
assert result["matches"] == []
def test_find_by_name(search_tools, tmp_path):
(tmp_path / "alpha.log").write_text("a")
(tmp_path / "beta.log").write_text("b")
(tmp_path / "ignore.txt").write_text("c")
result = search_tools["find"](path=str(tmp_path), name="*.log")
assert result["count"] == 2
assert all(p.endswith(".log") for p in result["paths"])
def test_find_by_type_dir(search_tools, tmp_path):
(tmp_path / "sub").mkdir()
(tmp_path / "file.txt").write_text("x")
result = search_tools["find"](path=str(tmp_path), type_filter="d")
paths = result["paths"]
# tmp_path itself + sub
assert any(p.endswith("sub") for p in paths)
assert not any(p.endswith("file.txt") for p in paths)
+102
View File
@@ -0,0 +1,102 @@
"""Security/policy tests: zsh refusal, env stripping, destructive catalog."""
from __future__ import annotations
import pytest
def test_resolve_shell_rejects_zsh():
from terminal_tools.common.limits import ZshRefused, _resolve_shell
for path in ("/bin/zsh", "/usr/bin/zsh", "/usr/local/bin/zsh", "ZSH"):
with pytest.raises(ZshRefused):
_resolve_shell(path)
def test_resolve_shell_accepts_bash():
from terminal_tools.common.limits import _resolve_shell
assert _resolve_shell(True) == "/bin/bash"
assert _resolve_shell("/bin/bash") == "/bin/bash"
assert _resolve_shell(False) is None
def test_sanitized_env_strips_zsh_vars(monkeypatch):
from terminal_tools.common.limits import sanitized_env
monkeypatch.setenv("ZDOTDIR", "/some/path")
monkeypatch.setenv("ZSH_VERSION", "5.9")
monkeypatch.setenv("ZSH_NAME", "zsh")
monkeypatch.setenv("PATH", "/usr/bin:/bin")
env = sanitized_env()
assert "ZDOTDIR" not in env
assert "ZSH_VERSION" not in env
assert "ZSH_NAME" not in env
# Non-zsh vars survive
assert env["PATH"] == "/usr/bin:/bin"
def test_destructive_warning_catalog():
from terminal_tools.common.destructive_warning import get_warning
cases = [
("rm -rf /tmp/foo", "force-remove"),
("rm -r /tmp/foo", "recursively remove"),
("git reset --hard HEAD~1", "discard"),
("git push --force origin main", "remote history"),
("git push -f origin main", "remote history"),
("git commit --amend -m 'x'", "rewrite"),
("DROP TABLE users;", "drop or truncate"),
("DELETE FROM users;", "delete rows"),
("kubectl delete pod foo", "Kubernetes"),
("terraform destroy", "Terraform"),
]
for cmd, expected in cases:
warning = get_warning(cmd)
assert warning is not None, f"expected warning for {cmd!r}"
assert expected in warning, f"warning {warning!r} should mention {expected!r}"
def test_destructive_warning_clean_commands():
from terminal_tools.common.destructive_warning import get_warning
for cmd in ["ls -la", "echo hi", "git status", "git commit -m 'x'"]:
assert get_warning(cmd) is None, f"unexpected warning for {cmd!r}"
def test_semantic_exit_grep():
from terminal_tools.common.semantic_exit import classify
status, msg = classify("grep foo /tmp/x", 0)
assert status == "ok"
status, msg = classify("grep foo /tmp/x", 1)
assert status == "ok"
assert "No matches" in msg
status, msg = classify("grep foo /tmp/x", 2)
assert status == "error"
def test_semantic_exit_default():
from terminal_tools.common.semantic_exit import classify
status, msg = classify("ls", 0)
assert status == "ok"
assert msg is None
status, msg = classify("ls", 1)
assert status == "error"
def test_semantic_exit_signaled():
from terminal_tools.common.semantic_exit import classify
status, msg = classify("sleep 999", -15, signaled=True)
assert status == "signal"
def test_semantic_exit_timed_out():
from terminal_tools.common.semantic_exit import classify
status, msg = classify("sleep 999", None, timed_out=True)
assert status == "error"
assert "timed out" in msg.lower()
+33
View File
@@ -0,0 +1,33 @@
"""Smoke test: load the server module, register tools, assert all 10 land."""
from __future__ import annotations
EXPECTED_TOOLS = {
"terminal_exec",
"terminal_job_start",
"terminal_job_logs",
"terminal_job_manage",
"terminal_pty_open",
"terminal_pty_run",
"terminal_pty_close",
"terminal_rg",
"terminal_find",
"terminal_output_get",
}
def test_register_terminal_tools_lands_all_ten(mcp):
from terminal_tools import register_terminal_tools
names = register_terminal_tools(mcp)
assert set(names) == EXPECTED_TOOLS, (
f"missing: {EXPECTED_TOOLS - set(names)}, extra: {set(names) - EXPECTED_TOOLS}"
)
def test_all_tools_have_terminal_prefix(mcp):
from terminal_tools import register_terminal_tools
names = register_terminal_tools(mcp)
for n in names:
assert n.startswith("terminal_"), f"tool {n!r} missing terminal_ prefix"