feat: consolidate search and list file tools
This commit is contained in:
@@ -42,7 +42,7 @@ COMPACTABLE_TOOLS: frozenset[str] = frozenset(
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"browser_screenshot",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
}
|
||||
)
|
||||
|
||||
@@ -858,7 +858,7 @@ def build_emergency_summary(
|
||||
if not all_files:
|
||||
parts.append(
|
||||
"NOTE: Large tool results may have been saved to files. "
|
||||
"Use list_directory to check the data directory."
|
||||
"Use search_files(target='files', path='.') to check the data directory."
|
||||
)
|
||||
except Exception:
|
||||
parts.append("NOTE: Large tool results were saved to files. Use read_file(path='<path>') to read them.")
|
||||
|
||||
@@ -41,7 +41,6 @@ _QUEEN_INDEPENDENT_TOOLS = [
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"hashline_edit",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
"undo_changes",
|
||||
@@ -60,7 +59,6 @@ _QUEEN_INDEPENDENT_TOOLS = [
|
||||
# (e.g. inspect an existing skill) before committing.
|
||||
_QUEEN_INCUBATING_TOOLS = [
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
# Schedule lives on the colony, not on the queen session — pass it
|
||||
@@ -76,7 +74,6 @@ _QUEEN_INCUBATING_TOOLS = [
|
||||
_QUEEN_WORKING_TOOLS = [
|
||||
# Read-only
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
# Monitoring + worker dialogue
|
||||
@@ -95,7 +92,6 @@ _QUEEN_WORKING_TOOLS = [
|
||||
_QUEEN_REVIEWING_TOOLS = [
|
||||
# Read-only
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
# Status + escalation replies
|
||||
@@ -249,8 +245,11 @@ re-read state.
|
||||
See "Independent execution" for the per-step flow and granularity rule.
|
||||
|
||||
## File I/O (coder-tools MCP)
|
||||
- read_file, write_file, edit_file, hashline_edit, list_directory, \
|
||||
search_files, run_command, undo_changes
|
||||
- read_file, write_file, edit_file, hashline_edit, search_files, \
|
||||
run_command, undo_changes
|
||||
- search_files covers grep/find/ls in one tool: target='content' to \
|
||||
search inside files, target='files' (with a glob like '*.py') to list \
|
||||
or find files. Mtime-sorted in files mode.
|
||||
|
||||
## Browser Automation (gcu-tools MCP)
|
||||
- Use `browser_*` tools (browser_start, browser_navigate, browser_click, \
|
||||
@@ -277,9 +276,10 @@ purpose — your job in this phase is to nail the spec, not keep doing \
|
||||
work. Available:
|
||||
|
||||
## Read-only inspection (coder-tools MCP)
|
||||
- read_file, list_directory, search_files, run_command — for confirming \
|
||||
details before you commit (e.g. peek at an existing skill in \
|
||||
~/.hive/skills/, sanity-check an API URL).
|
||||
- read_file, search_files, run_command — for confirming details before \
|
||||
you commit (e.g. peek at an existing skill in ~/.hive/skills/, sanity-check \
|
||||
an API URL). search_files covers both grep (target='content') and ls/find \
|
||||
(target='files', glob like '*.py').
|
||||
|
||||
## Approved → operational checklist (use your judgement, ask only what's missing)
|
||||
The conversation that got you here probably did NOT cover all of:
|
||||
@@ -373,7 +373,8 @@ operational, not editorial.
|
||||
born from a fresh chat via start_incubating_colony.
|
||||
|
||||
## Read-only inspection
|
||||
- read_file, list_directory, search_files, run_command
|
||||
- read_file, search_files, run_command (search_files covers grep/find/ls \
|
||||
via target='content' or target='files')
|
||||
|
||||
When every worker has reported (success or failure), the phase \
|
||||
auto-moves to REVIEWING. You do not need to call a transition tool \
|
||||
@@ -392,7 +393,7 @@ _queen_tools_reviewing = """
|
||||
# Tools (REVIEWING mode)
|
||||
|
||||
Workers have finished. You have:
|
||||
- Read-only: read_file, list_directory, search_files, run_command
|
||||
- Read-only: read_file, search_files, run_command (search_files = grep+find+ls)
|
||||
- get_worker_status(focus?) — Pull the final status / per-worker reports
|
||||
- list_worker_questions() / reply_to_worker(request_id, reply) — Answer any \
|
||||
late escalations still in the inbox
|
||||
|
||||
@@ -37,11 +37,11 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
# Read-only file operations — safe baseline for every knowledge queen.
|
||||
# search_files is unified: covers content grep AND directory listing
|
||||
# via target='content' / target='files'. It replaces list_directory,
|
||||
# list_dir, and list_files.
|
||||
"file_read": [
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"list_dir",
|
||||
"list_files",
|
||||
"search_files",
|
||||
"grep_search",
|
||||
"pdf_read",
|
||||
@@ -80,23 +80,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
],
|
||||
# Browser automation — every tool from the gcu-tools MCP server.
|
||||
"browser": ["@server:gcu-tools"],
|
||||
# External research / information-gathering.
|
||||
"research": [
|
||||
"search_papers",
|
||||
"download_paper",
|
||||
"search_wikipedia",
|
||||
"web_scrape",
|
||||
],
|
||||
# Security scanners — pentest-ish, only for engineering/security roles.
|
||||
"security": [
|
||||
"dns_security_scan",
|
||||
"http_headers_scan",
|
||||
"port_scan",
|
||||
"ssl_tls_scan",
|
||||
"subdomain_enumerate",
|
||||
"tech_stack_detect",
|
||||
"risk_score",
|
||||
],
|
||||
# Lightweight context helpers — good default for every queen.
|
||||
"time_context": [
|
||||
"get_current_time",
|
||||
|
||||
@@ -71,7 +71,7 @@ class ToolRegistry:
|
||||
{
|
||||
# File system reads
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"grep",
|
||||
"glob",
|
||||
# Web reads
|
||||
|
||||
@@ -29,7 +29,6 @@ _ALWAYS_AVAILABLE_TOOLS: frozenset[str] = frozenset(
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"hashline_edit",
|
||||
"set_output",
|
||||
|
||||
@@ -683,7 +683,7 @@ class Orchestrator:
|
||||
# Set per-execution data_dir and agent_id so data tools and
|
||||
# spillover files share the same session-scoped directory, and
|
||||
# so MCP tools whose server-side schemas mark agent_id as a
|
||||
# required field (list_dir, hashline_edit, replace_file_content,
|
||||
# required field (search_files, hashline_edit, replace_file_content,
|
||||
# execute_command_tool, …) get a valid value injected even on
|
||||
# registry instances where agent_loader.setup() didn't populate
|
||||
# the session_context. Without this, FastMCP rejects those
|
||||
|
||||
@@ -43,7 +43,6 @@ _WORKER_INHERITED_TOOLS: frozenset[str] = frozenset(
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"hashline_edit",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"undo_changes",
|
||||
# Shell
|
||||
|
||||
@@ -889,7 +889,7 @@ def test_concurrency_safe_allowlist_is_conservative():
|
||||
allowlist = ToolRegistry.CONCURRENCY_SAFE_TOOLS
|
||||
|
||||
# Positive assertions: known-safe read operations are present.
|
||||
for name in ("read_file", "grep", "glob", "list_directory", "web_search"):
|
||||
for name in ("read_file", "grep", "glob", "search_files", "web_search"):
|
||||
assert name in allowlist, f"{name} should be concurrency-safe"
|
||||
|
||||
# Negative assertions: nothing that mutates state is allowed in.
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
def search_tool(pattern: str, target: str = "content", path: str = ".",
|
||||
file_glob: str = None, limit: int = 50, offset: int = 0,
|
||||
output_mode: str = "content", context: int = 0,
|
||||
task_id: str = "default") -> str:
|
||||
"""Search for content or files."""
|
||||
try:
|
||||
# Track searches to detect *consecutive* repeated search loops.
|
||||
# Include pagination args so users can page through truncated
|
||||
# results without tripping the repeated-search guard.
|
||||
search_key = (
|
||||
"search",
|
||||
pattern,
|
||||
target,
|
||||
str(path),
|
||||
file_glob or "",
|
||||
limit,
|
||||
offset,
|
||||
)
|
||||
with _read_tracker_lock:
|
||||
task_data = _read_tracker.setdefault(task_id, {
|
||||
"last_key": None, "consecutive": 0, "read_history": set(),
|
||||
})
|
||||
if task_data["last_key"] == search_key:
|
||||
task_data["consecutive"] += 1
|
||||
else:
|
||||
task_data["last_key"] = search_key
|
||||
task_data["consecutive"] = 1
|
||||
count = task_data["consecutive"]
|
||||
|
||||
if count >= 4:
|
||||
return json.dumps({
|
||||
"error": (
|
||||
f"BLOCKED: You have run this exact search {count} times in a row. "
|
||||
"The results have NOT changed. You already have this information. "
|
||||
"STOP re-searching and proceed with your task."
|
||||
),
|
||||
"pattern": pattern,
|
||||
"already_searched": count,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.search(
|
||||
pattern=pattern, path=path, target=target, file_glob=file_glob,
|
||||
limit=limit, offset=offset, output_mode=output_mode, context=context
|
||||
)
|
||||
if hasattr(result, 'matches'):
|
||||
for m in result.matches:
|
||||
if hasattr(m, 'content') and m.content:
|
||||
m.content = redact_sensitive_text(m.content)
|
||||
result_dict = result.to_dict()
|
||||
|
||||
if count >= 3:
|
||||
result_dict["_warning"] = (
|
||||
f"You have run this exact search {count} times consecutively. "
|
||||
"The results have not changed. Use the information you already have."
|
||||
)
|
||||
|
||||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when results were truncated — explicit next offset is clearer
|
||||
# than relying on the model to infer it from total_count vs match count.
|
||||
if result_dict.get("truncated"):
|
||||
next_offset = offset + limit
|
||||
result_json += f"\n\n[Hint: Results truncated. Use offset={next_offset} to see more, or narrow with a more specific pattern or file_glob.]"
|
||||
return result_json
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
@@ -2,8 +2,9 @@
|
||||
"""
|
||||
File Tools MCP Server
|
||||
|
||||
Minimal FastMCP server exposing 6 file tools (read_file, write_file, edit_file,
|
||||
list_directory, search_files, run_command) with no path sandboxing.
|
||||
Minimal FastMCP server exposing 5 file tools (read_file, write_file, edit_file,
|
||||
hashline_edit, search_files) with no path sandboxing. ``search_files`` is
|
||||
unified — covers grep, find, and ls via target='content' / target='files'.
|
||||
|
||||
Usage:
|
||||
# Run with STDIO transport (for agent integration)
|
||||
@@ -82,7 +83,7 @@ def main() -> None:
|
||||
|
||||
if not args.stdio:
|
||||
logger.info(
|
||||
"Registered 6 file tools: read_file, write_file, edit_file, list_directory, search_files, run_command"
|
||||
"Registered 5 file tools: read_file, write_file, edit_file, hashline_edit, search_files"
|
||||
)
|
||||
|
||||
if args.stdio:
|
||||
|
||||
+377
-164
@@ -1,8 +1,12 @@
|
||||
"""
|
||||
Shared file operation tools for MCP servers.
|
||||
|
||||
Provides 7 tools (read_file, write_file, edit_file, hashline_edit,
|
||||
list_directory, search_files, run_command) plus supporting helpers.
|
||||
Provides 5 tools (read_file, write_file, edit_file, hashline_edit,
|
||||
search_files) plus supporting helpers. ``search_files`` is unified —
|
||||
it covers both content grep (``target='content'``) and file listing
|
||||
(``target='files'``), replacing the older ``list_directory`` tool and
|
||||
the LLM's choice between grep/find/ls.
|
||||
|
||||
Used by both files_server.py (unsandboxed) and coder_tools_server.py
|
||||
(project-root sandboxed with git snapshots).
|
||||
|
||||
@@ -108,6 +112,285 @@ BINARY_EXTENSIONS = frozenset(
|
||||
}
|
||||
)
|
||||
|
||||
# ── search_files anti-loop tracker ────────────────────────────────────────
|
||||
#
|
||||
# Process-level memory of the most recent search_files call per task. When
|
||||
# the same query (target+pattern+path+glob+pagination+output) is repeated
|
||||
# back-to-back, we warn the model on the 3rd hit and block on the 4th.
|
||||
# Mirrors the Hermes design — see scripts/hermes_search_files.md.
|
||||
|
||||
import threading as _threading
|
||||
|
||||
_SEARCH_TRACKER_LOCK = _threading.Lock()
|
||||
_SEARCH_TRACKER: dict[str, dict] = {}
|
||||
|
||||
# Skip set shared by both search targets — common build/cache dirs that are
|
||||
# almost never what the model wants to walk.
|
||||
_SEARCH_SKIP_DIRS = frozenset(
|
||||
{".git", "__pycache__", "node_modules", ".venv", ".tox", ".mypy_cache", ".ruff_cache"}
|
||||
)
|
||||
|
||||
|
||||
def _relativize(path: str, root: str | None) -> str:
|
||||
"""Best-effort relative path; falls back to the original on cross-volume."""
|
||||
if not root:
|
||||
return path
|
||||
try:
|
||||
norm_path = os.path.normpath(path.replace("/", os.sep))
|
||||
norm_root = os.path.normpath(root.replace("/", os.sep))
|
||||
return os.path.relpath(norm_path, norm_root)
|
||||
except ValueError:
|
||||
return path
|
||||
|
||||
|
||||
def _do_search_files_target(
|
||||
pattern: str,
|
||||
resolved: str,
|
||||
display_root: str,
|
||||
limit: int,
|
||||
offset: int,
|
||||
) -> str:
|
||||
"""target='files': enumerate files matching a glob, mtime-sorted (newest first)."""
|
||||
if not os.path.isdir(resolved):
|
||||
return f"Error: Directory not found: {resolved}"
|
||||
|
||||
glob = pattern or "*"
|
||||
files: list[tuple[float, str]] = []
|
||||
|
||||
# Try ripgrep --files first; it respects .gitignore which is what we want.
|
||||
try:
|
||||
cmd = [
|
||||
"rg",
|
||||
"--files",
|
||||
"--no-messages",
|
||||
"--hidden",
|
||||
"--glob=!.git/*",
|
||||
]
|
||||
if glob and glob != "*":
|
||||
cmd.extend(["--glob", glob])
|
||||
cmd.append(resolved)
|
||||
rg = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
encoding="utf-8",
|
||||
stdin=subprocess.DEVNULL,
|
||||
)
|
||||
if rg.returncode <= 1:
|
||||
for raw in rg.stdout.splitlines():
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
files.append((os.path.getmtime(raw), raw))
|
||||
except OSError:
|
||||
continue
|
||||
else:
|
||||
files = []
|
||||
except FileNotFoundError:
|
||||
# ripgrep absent — fall through to os.walk
|
||||
files = []
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: file listing timed out after 30 seconds"
|
||||
|
||||
# Python fallback (also runs when rg returned nothing on platforms where
|
||||
# rg.returncode reports >1 for "no files in glob").
|
||||
if not files:
|
||||
for root, dirs, fnames in os.walk(resolved):
|
||||
dirs[:] = [d for d in dirs if d not in _SEARCH_SKIP_DIRS and not d.startswith(".")]
|
||||
for fname in fnames:
|
||||
if fname.startswith("."):
|
||||
continue
|
||||
if glob and glob != "*" and not fnmatch.fnmatch(fname, glob):
|
||||
continue
|
||||
full = os.path.join(root, fname)
|
||||
try:
|
||||
files.append((os.path.getmtime(full), full))
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
files.sort(reverse=True)
|
||||
total = len(files)
|
||||
page = files[offset : offset + max(0, int(limit))]
|
||||
if not page:
|
||||
return "No files found." if total == 0 else f"No files at offset {offset} (total: {total})."
|
||||
|
||||
lines = [_relativize(p, display_root) for _, p in page]
|
||||
out = "\n".join(lines)
|
||||
next_offset = offset + len(page)
|
||||
if total > next_offset:
|
||||
out += (
|
||||
f"\n\n[Hint: showing {len(page)} of {total} files. "
|
||||
f"Use offset={next_offset} for more, or narrow with a more specific glob.]"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _do_search_content_target(
|
||||
pattern: str,
|
||||
resolved: str,
|
||||
project_root: str | None,
|
||||
file_glob: str,
|
||||
limit: int,
|
||||
offset: int,
|
||||
output_mode: str,
|
||||
context: int,
|
||||
hashline: bool,
|
||||
) -> str:
|
||||
"""target='content': regex search across file contents (ripgrep + Python fallback)."""
|
||||
display_root = project_root or (resolved if os.path.isdir(resolved) else os.path.dirname(resolved))
|
||||
cap = max(1, int(limit))
|
||||
|
||||
# Try ripgrep first.
|
||||
try:
|
||||
cmd = ["rg", "-nH", "--no-messages", "--hidden", "--glob=!.git/*"]
|
||||
if context and output_mode == "content":
|
||||
cmd.extend(["-C", str(int(context))])
|
||||
if file_glob:
|
||||
cmd.extend(["--glob", file_glob])
|
||||
if output_mode == "files_only":
|
||||
cmd.append("-l")
|
||||
elif output_mode == "count":
|
||||
cmd.append("-c")
|
||||
cmd.append(pattern)
|
||||
cmd.append(resolved)
|
||||
|
||||
rg = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
encoding="utf-8",
|
||||
stdin=subprocess.DEVNULL,
|
||||
)
|
||||
if rg.returncode <= 1:
|
||||
raw_lines = [ln for ln in rg.stdout.splitlines() if ln]
|
||||
total = len(raw_lines)
|
||||
page = raw_lines[offset : offset + cap]
|
||||
if not page:
|
||||
return "No matches found." if total == 0 else f"No matches at offset {offset} (total: {total})."
|
||||
|
||||
formatted: list[str] = []
|
||||
for line in page:
|
||||
# Relativize path prefix on every line.
|
||||
m = re.match(r"^(.+?):(\d+):(.*)$", line) if output_mode == "content" else None
|
||||
if m:
|
||||
fpath, lineno, rest = m.group(1), m.group(2), m.group(3)
|
||||
rel = _relativize(fpath, display_root)
|
||||
if hashline:
|
||||
h = compute_line_hash(rest)
|
||||
line = f"{rel}:{lineno}:{h}|{rest}"
|
||||
else:
|
||||
line = f"{rel}:{lineno}:{rest}"
|
||||
else:
|
||||
# files_only/count: single path (or path:count) per line
|
||||
head, sep, tail = line.partition(":")
|
||||
if sep and tail.isdigit():
|
||||
line = f"{_relativize(head, display_root)}:{tail}"
|
||||
else:
|
||||
line = _relativize(line, display_root)
|
||||
if len(line) > MAX_LINE_LENGTH:
|
||||
line = line[:MAX_LINE_LENGTH] + "..."
|
||||
formatted.append(line)
|
||||
|
||||
out = "\n".join(formatted)
|
||||
next_offset = offset + len(page)
|
||||
if total > next_offset:
|
||||
out += (
|
||||
f"\n\n[Hint: showing {len(page)} of {total} matches. "
|
||||
f"Use offset={next_offset} for more, or narrow with file_glob/pattern.]"
|
||||
)
|
||||
return out
|
||||
except FileNotFoundError:
|
||||
pass # ripgrep missing — Python fallback below
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: search timed out after 30 seconds"
|
||||
|
||||
# Python fallback (no ripgrep): regex over file contents.
|
||||
try:
|
||||
compiled = re.compile(pattern)
|
||||
except re.error as e:
|
||||
return f"Error: invalid regex: {e}"
|
||||
|
||||
if os.path.isfile(resolved):
|
||||
candidates = [resolved]
|
||||
else:
|
||||
candidates = []
|
||||
for root, dirs, fnames in os.walk(resolved):
|
||||
dirs[:] = [d for d in dirs if d not in _SEARCH_SKIP_DIRS and not d.startswith(".")]
|
||||
for fname in fnames:
|
||||
if file_glob and not fnmatch.fnmatch(fname, file_glob):
|
||||
continue
|
||||
candidates.append(os.path.join(root, fname))
|
||||
|
||||
# files_only / count modes need per-file aggregation.
|
||||
if output_mode in ("files_only", "count"):
|
||||
items: list[tuple[str, int]] = []
|
||||
for fpath in candidates:
|
||||
try:
|
||||
with open(fpath, encoding="utf-8", errors="ignore") as f:
|
||||
n = sum(1 for line in f if compiled.search(line.rstrip()))
|
||||
except OSError:
|
||||
continue
|
||||
if n:
|
||||
items.append((fpath, n))
|
||||
total = len(items)
|
||||
page = items[offset : offset + cap]
|
||||
if not page:
|
||||
return "No matches found." if total == 0 else f"No matches at offset {offset} (total: {total})."
|
||||
if output_mode == "files_only":
|
||||
lines = [_relativize(p, display_root) for p, _ in page]
|
||||
else:
|
||||
lines = [f"{_relativize(p, display_root)}:{n}" for p, n in page]
|
||||
out = "\n".join(lines)
|
||||
next_offset = offset + len(page)
|
||||
if total > next_offset:
|
||||
out += f"\n\n[Hint: showing {len(page)} of {total}. Use offset={next_offset} for more.]"
|
||||
return out
|
||||
|
||||
# output_mode == "content"
|
||||
matches: list[str] = []
|
||||
for fpath in candidates:
|
||||
rel = _relativize(fpath, display_root)
|
||||
try:
|
||||
with open(fpath, encoding="utf-8", errors="ignore") as f:
|
||||
buf = f.readlines()
|
||||
except OSError:
|
||||
continue
|
||||
for i, raw in enumerate(buf, 1):
|
||||
stripped = raw.rstrip()
|
||||
if not compiled.search(stripped):
|
||||
continue
|
||||
if context > 0:
|
||||
lo = max(0, i - 1 - context)
|
||||
hi = min(len(buf), i + context)
|
||||
ctx = []
|
||||
for j in range(lo, hi):
|
||||
marker = ":" if (j + 1) == i else "-"
|
||||
ln = buf[j].rstrip()
|
||||
ctx.append(f"{rel}:{j + 1}{marker}{ln[:MAX_LINE_LENGTH]}")
|
||||
matches.append("\n".join(ctx))
|
||||
elif hashline:
|
||||
h = compute_line_hash(stripped)
|
||||
matches.append(f"{rel}:{i}:{h}|{stripped}")
|
||||
else:
|
||||
matches.append(f"{rel}:{i}:{stripped[:MAX_LINE_LENGTH]}")
|
||||
|
||||
total = len(matches)
|
||||
page = matches[offset : offset + cap]
|
||||
if not page:
|
||||
return "No matches found." if total == 0 else f"No matches at offset {offset} (total: {total})."
|
||||
out = "\n\n".join(page) if context > 0 else "\n".join(page)
|
||||
next_offset = offset + len(page)
|
||||
if total > next_offset:
|
||||
out += (
|
||||
f"\n\n[Hint: showing {len(page)} of {total} matches. "
|
||||
f"Use offset={next_offset} for more, or narrow with file_glob/pattern.]"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
# ── Context-aware sandboxing ─────────────────────────────────────────────────
|
||||
|
||||
# Context variable for additional allowed paths (beyond base_root)
|
||||
@@ -603,180 +886,110 @@ def register_file_tools(
|
||||
return f"Error editing file: {e}"
|
||||
|
||||
@mcp.tool()
|
||||
def list_directory(path: str = ".", recursive: bool = False) -> str:
|
||||
"""List directory contents with type indicators.
|
||||
def search_files(
|
||||
pattern: str,
|
||||
target: str = "content",
|
||||
path: str = ".",
|
||||
file_glob: str = "",
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
output_mode: str = "content",
|
||||
context: int = 0,
|
||||
hashline: bool = False,
|
||||
task_id: str = "",
|
||||
) -> str:
|
||||
"""Search file contents or find files by name. Use this instead of grep, find, or ls.
|
||||
|
||||
Directories have a / suffix. Hidden files and common build directories
|
||||
are skipped.
|
||||
Two modes:
|
||||
target='content' (default): Regex search inside files. Output modes:
|
||||
'content' (lines+numbers, default), 'files_only' (paths only), 'count' (per-file counts).
|
||||
target='files': Find files by glob pattern (e.g. '*.py', '*config*').
|
||||
Also use this instead of ls — results sorted by modification time (newest first).
|
||||
|
||||
Pagination: limit/offset both apply; the response includes a hint with the
|
||||
next offset when truncated. The same query repeated back-to-back is warned
|
||||
at the 3rd call and blocked at the 4th — use the results you already have.
|
||||
|
||||
Args:
|
||||
path: Absolute directory path (default: current directory).
|
||||
recursive: List recursively (default: false). Truncates at 500 entries.
|
||||
pattern: Regex (content mode) or glob (files mode, e.g. '*.py'). For
|
||||
an "ls"-style listing pass '*' or '*.<ext>'.
|
||||
target: 'content' to grep inside files, 'files' to list/find files.
|
||||
Legacy aliases: 'grep' -> 'content', 'find'/'ls' -> 'files'.
|
||||
path: Directory (or, in content mode, a single file) to search.
|
||||
file_glob: Restrict content search to filenames matching this glob.
|
||||
Ignored in files mode (use ``pattern``).
|
||||
limit: Max results to return (default 50).
|
||||
offset: Skip first N results for pagination (default 0).
|
||||
output_mode: Content-mode output shape — 'content' | 'files_only' | 'count'.
|
||||
context: Lines of context before and after each match (content mode only).
|
||||
hashline: Content mode: include N:hhhh hash anchors for hashline_edit.
|
||||
task_id: Optional anti-loop scope key (defaults to a shared bucket).
|
||||
"""
|
||||
resolved = _resolve(path)
|
||||
if not os.path.isdir(resolved):
|
||||
return f"Error: Directory not found: {path}"
|
||||
# Legacy aliases — keep older prompts working.
|
||||
if target in ("grep",):
|
||||
target = "content"
|
||||
elif target in ("find", "ls"):
|
||||
target = "files"
|
||||
|
||||
try:
|
||||
skip = {
|
||||
".git",
|
||||
"__pycache__",
|
||||
"node_modules",
|
||||
".venv",
|
||||
".tox",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
}
|
||||
entries: list[str] = []
|
||||
if recursive:
|
||||
for root, dirs, files in os.walk(resolved):
|
||||
dirs[:] = sorted(d for d in dirs if d not in skip and not d.startswith("."))
|
||||
rel_root = os.path.relpath(root, resolved)
|
||||
if rel_root == ".":
|
||||
rel_root = ""
|
||||
for f in sorted(files):
|
||||
if f.startswith("."):
|
||||
continue
|
||||
entries.append(os.path.join(rel_root, f) if rel_root else f)
|
||||
if len(entries) >= 500:
|
||||
entries.append("... (truncated at 500 entries)")
|
||||
return "\n".join(entries)
|
||||
if target not in ("content", "files"):
|
||||
return f"Error: invalid target '{target}'. Use 'content' or 'files'."
|
||||
if output_mode not in ("content", "files_only", "count"):
|
||||
return f"Error: invalid output_mode '{output_mode}'. Use 'content', 'files_only', or 'count'."
|
||||
|
||||
# Anti-loop guard. Key includes everything that would change results so
|
||||
# paginating through the same query doesn't trip the alarm.
|
||||
key = (target, pattern, str(path), file_glob, int(limit), int(offset), output_mode, int(context))
|
||||
bucket = task_id or "_default"
|
||||
with _SEARCH_TRACKER_LOCK:
|
||||
td = _SEARCH_TRACKER.setdefault(bucket, {"last_key": None, "consecutive": 0})
|
||||
if td["last_key"] == key:
|
||||
td["consecutive"] += 1
|
||||
else:
|
||||
for entry in sorted(os.listdir(resolved)):
|
||||
if entry.startswith(".") or entry in skip:
|
||||
continue
|
||||
full = os.path.join(resolved, entry)
|
||||
suffix = "/" if os.path.isdir(full) else ""
|
||||
entries.append(f"{entry}{suffix}")
|
||||
td["last_key"] = key
|
||||
td["consecutive"] = 1
|
||||
consecutive = td["consecutive"]
|
||||
|
||||
return "\n".join(entries) if entries else "(empty directory)"
|
||||
except Exception as e:
|
||||
return f"Error listing directory: {e}"
|
||||
|
||||
@mcp.tool()
|
||||
def search_files(pattern: str, path: str = ".", include: str = "", hashline: bool = False) -> str:
|
||||
"""Search file contents using regex. Uses ripgrep if available.
|
||||
|
||||
Results sorted by file with line numbers. Set hashline=True to include
|
||||
content-hash anchors (N:hhhh) for use with hashline_edit.
|
||||
|
||||
Args:
|
||||
pattern: Regex pattern to search for.
|
||||
path: Absolute directory path to search (default: current directory).
|
||||
include: File glob filter (e.g. '*.py').
|
||||
hashline: If True, include hash anchors in results (default: False).
|
||||
"""
|
||||
resolved = _resolve(path)
|
||||
if not os.path.isdir(resolved):
|
||||
return f"Error: Directory not found: {path}"
|
||||
|
||||
# Try ripgrep first
|
||||
try:
|
||||
cmd = [
|
||||
"rg",
|
||||
"-nH",
|
||||
"--no-messages",
|
||||
"--hidden",
|
||||
"--max-count=20",
|
||||
"--glob=!.git/*",
|
||||
pattern,
|
||||
]
|
||||
if include:
|
||||
cmd.extend(["--glob", include])
|
||||
cmd.append(resolved)
|
||||
|
||||
rg_result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
encoding="utf-8",
|
||||
stdin=subprocess.DEVNULL,
|
||||
if consecutive >= 4:
|
||||
return (
|
||||
f"BLOCKED: this exact search has run {consecutive} times in a row. "
|
||||
"Results have NOT changed. Use the information you already have and proceed."
|
||||
)
|
||||
if rg_result.returncode <= 1:
|
||||
output = rg_result.stdout.strip()
|
||||
if not output:
|
||||
return "No matches found."
|
||||
|
||||
lines = []
|
||||
for line in output.split("\n")[:SEARCH_RESULT_LIMIT]:
|
||||
if project_root:
|
||||
line = line.replace(project_root + "/", "")
|
||||
if hashline:
|
||||
# Parse file:linenum:content and insert hash anchor
|
||||
parts = line.split(":", 2)
|
||||
if len(parts) >= 3:
|
||||
content = parts[2]
|
||||
h = compute_line_hash(content)
|
||||
line = f"{parts[0]}:{parts[1]}:{h}|{content}"
|
||||
else:
|
||||
# Platform-agnostic relativization: ripgrep may output
|
||||
# forward or backslash paths; normalize before relpath (Windows).
|
||||
match = re.match(r"^(.+):(\d+):", line)
|
||||
if match:
|
||||
path_part, line_num, rest = (
|
||||
match.group(1),
|
||||
match.group(2),
|
||||
line[match.end() :],
|
||||
)
|
||||
path_part = os.path.normpath(path_part.replace("/", os.sep))
|
||||
proj_norm = os.path.normpath(project_root.replace("/", os.sep))
|
||||
try:
|
||||
rel = os.path.relpath(path_part, proj_norm)
|
||||
line = f"{rel}:{line_num}:{rest}"
|
||||
except ValueError:
|
||||
pass
|
||||
if len(line) > MAX_LINE_LENGTH:
|
||||
line = line[:MAX_LINE_LENGTH] + "..."
|
||||
lines.append(line)
|
||||
total = output.count("\n") + 1
|
||||
result_str = "\n".join(lines)
|
||||
if total > SEARCH_RESULT_LIMIT:
|
||||
result_str += f"\n\n... ({total} total matches, showing first {SEARCH_RESULT_LIMIT})"
|
||||
return result_str
|
||||
except FileNotFoundError:
|
||||
pass # ripgrep not installed — fall through to Python
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Search timed out after 30 seconds"
|
||||
|
||||
# Fallback: Python regex
|
||||
try:
|
||||
compiled = re.compile(pattern)
|
||||
matches: list[str] = []
|
||||
skip_dirs = {".git", "__pycache__", "node_modules", ".venv", ".tox"}
|
||||
resolved = _resolve(path)
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
for root, dirs, files in os.walk(resolved):
|
||||
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
||||
for fname in files:
|
||||
if include and not fnmatch.fnmatch(fname, include):
|
||||
continue
|
||||
fpath = os.path.join(root, fname)
|
||||
if project_root:
|
||||
proj_norm = os.path.normpath(project_root.replace("/", os.sep))
|
||||
try:
|
||||
display_path = os.path.relpath(fpath, proj_norm)
|
||||
except ValueError:
|
||||
display_path = fpath
|
||||
else:
|
||||
display_path = fpath
|
||||
try:
|
||||
with open(fpath, encoding="utf-8", errors="ignore") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
stripped = line.rstrip()
|
||||
if compiled.search(stripped):
|
||||
if hashline:
|
||||
h = compute_line_hash(stripped)
|
||||
matches.append(f"{display_path}:{i}:{h}|{stripped}")
|
||||
else:
|
||||
matches.append(f"{display_path}:{i}:{stripped[:MAX_LINE_LENGTH]}")
|
||||
if len(matches) >= SEARCH_RESULT_LIMIT:
|
||||
return "\n".join(matches) + "\n... (truncated)"
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
if target == "files":
|
||||
result = _do_search_files_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
display_root=project_root or resolved,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
else:
|
||||
# content mode allows a single file as path; the target=files mode does not
|
||||
if not os.path.isdir(resolved) and not os.path.isfile(resolved):
|
||||
return f"Error: Path not found: {path}"
|
||||
result = _do_search_content_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
project_root=project_root,
|
||||
file_glob=file_glob,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
output_mode=output_mode,
|
||||
context=context,
|
||||
hashline=hashline,
|
||||
)
|
||||
|
||||
return "\n".join(matches) if matches else "No matches found."
|
||||
except re.error as e:
|
||||
return f"Error: Invalid regex: {e}"
|
||||
if consecutive == 3:
|
||||
result += (
|
||||
f"\n\n[Warning: this exact search has run {consecutive} times consecutively. "
|
||||
"Results have not changed — use what you have instead of re-searching.]"
|
||||
)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
def hashline_edit(
|
||||
|
||||
@@ -203,106 +203,100 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
except Exception as e:
|
||||
return f"Error writing file: {e}"
|
||||
|
||||
@mcp.tool()
|
||||
def list_files(
|
||||
path: str = ".",
|
||||
recursive: bool = False,
|
||||
data_dir: str = "",
|
||||
) -> str:
|
||||
"""List directory contents with type indicators.
|
||||
|
||||
Directories have a / suffix. Hidden files and common build directories
|
||||
are skipped.
|
||||
|
||||
Args:
|
||||
path: Directory path (default: data_dir).
|
||||
recursive: List recursively (default: false).
|
||||
data_dir: Auto-injected - the session's data directory.
|
||||
"""
|
||||
try:
|
||||
resolved = _resolve_path(path, data_dir)
|
||||
except ValueError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
if not os.path.isdir(resolved):
|
||||
return f"Error: Directory not found: {path}"
|
||||
|
||||
try:
|
||||
skip = {".git", "__pycache__", "node_modules", ".venv", ".tox"}
|
||||
entries: list[str] = []
|
||||
|
||||
if recursive:
|
||||
for root, dirs, files in os.walk(resolved):
|
||||
dirs[:] = sorted(d for d in dirs if d not in skip and not d.startswith("."))
|
||||
rel_root = os.path.relpath(root, resolved)
|
||||
if rel_root == ".":
|
||||
rel_root = ""
|
||||
for f in sorted(files):
|
||||
if f.startswith("."):
|
||||
continue
|
||||
entries.append(os.path.join(rel_root, f) if rel_root else f)
|
||||
if len(entries) >= 500:
|
||||
entries.append("... (truncated at 500 entries)")
|
||||
return "\n".join(entries)
|
||||
else:
|
||||
for entry in sorted(os.listdir(resolved)):
|
||||
if entry.startswith(".") or entry in skip:
|
||||
continue
|
||||
full = os.path.join(resolved, entry)
|
||||
suffix = "/" if os.path.isdir(full) else ""
|
||||
entries.append(f"{entry}{suffix}")
|
||||
|
||||
return "\n".join(entries) if entries else "(empty directory)"
|
||||
except Exception as e:
|
||||
return f"Error listing directory: {e}"
|
||||
|
||||
@mcp.tool()
|
||||
def search_files(
|
||||
pattern: str,
|
||||
target: str = "content",
|
||||
path: str = ".",
|
||||
file_glob: str = "",
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
output_mode: str = "content",
|
||||
context: int = 0,
|
||||
data_dir: str = "",
|
||||
agent_id: str = "",
|
||||
) -> str:
|
||||
"""Search file contents using regex.
|
||||
"""Search file contents or find files by name. Use this instead of grep, find, or ls.
|
||||
|
||||
Results sorted by file with line numbers. Searches within
|
||||
the session's data directory or ~/.hive/.
|
||||
Sandboxed to the session's data directory and ~/.hive/.
|
||||
|
||||
Args:
|
||||
pattern: Regex pattern to search for.
|
||||
path: Directory path to search (default: data_dir).
|
||||
data_dir: Auto-injected - the session's data directory.
|
||||
Two modes:
|
||||
target='content' (default): Regex search inside files.
|
||||
target='files': Find files by glob pattern (e.g. '*.py'). Results
|
||||
sorted by modification time (newest first) — also use this instead of ls.
|
||||
|
||||
See file_ops.search_files for the full parameter contract.
|
||||
"""
|
||||
import re
|
||||
from aden_tools.file_ops import (
|
||||
_do_search_content_target,
|
||||
_do_search_files_target,
|
||||
_SEARCH_TRACKER,
|
||||
_SEARCH_TRACKER_LOCK,
|
||||
)
|
||||
|
||||
# Legacy aliases
|
||||
if target == "grep":
|
||||
target = "content"
|
||||
elif target in ("find", "ls"):
|
||||
target = "files"
|
||||
|
||||
if target not in ("content", "files"):
|
||||
return f"Error: invalid target '{target}'. Use 'content' or 'files'."
|
||||
if output_mode not in ("content", "files_only", "count"):
|
||||
return f"Error: invalid output_mode '{output_mode}'."
|
||||
|
||||
try:
|
||||
resolved = _resolve_path(path, data_dir)
|
||||
except ValueError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
if not os.path.isdir(resolved):
|
||||
return f"Error: Directory not found: {path}"
|
||||
# Anti-loop guard scoped per agent_id (or shared bucket if absent).
|
||||
bucket = agent_id or "_default"
|
||||
key = (target, pattern, str(path), file_glob, int(limit), int(offset), output_mode, int(context))
|
||||
with _SEARCH_TRACKER_LOCK:
|
||||
td = _SEARCH_TRACKER.setdefault(bucket, {"last_key": None, "consecutive": 0})
|
||||
if td["last_key"] == key:
|
||||
td["consecutive"] += 1
|
||||
else:
|
||||
td["last_key"] = key
|
||||
td["consecutive"] = 1
|
||||
consecutive = td["consecutive"]
|
||||
if consecutive >= 4:
|
||||
return (
|
||||
f"BLOCKED: this exact search has run {consecutive} times in a row. "
|
||||
"Results have NOT changed. Use the information you already have and proceed."
|
||||
)
|
||||
|
||||
try:
|
||||
compiled = re.compile(pattern)
|
||||
matches: list[str] = []
|
||||
skip_dirs = {".git", "__pycache__", "node_modules", ".venv"}
|
||||
# display_root: relativize against the data_dir (or the search root) so
|
||||
# output paths read naturally inside the agent's workspace.
|
||||
display_root = data_dir or resolved
|
||||
|
||||
for root, dirs, files in os.walk(resolved):
|
||||
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
||||
for fname in files:
|
||||
fpath = os.path.join(root, fname)
|
||||
display_path = os.path.relpath(fpath, resolved)
|
||||
try:
|
||||
with open(fpath, encoding="utf-8", errors="ignore") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
stripped = line.rstrip()
|
||||
if compiled.search(stripped):
|
||||
matches.append(f"{display_path}:{i}:{stripped[:2000]}")
|
||||
if len(matches) >= 100:
|
||||
return "\n".join(matches) + "\n... (truncated)"
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
if target == "files":
|
||||
result = _do_search_files_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
display_root=display_root,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
else:
|
||||
if not os.path.isdir(resolved) and not os.path.isfile(resolved):
|
||||
return f"Error: Path not found: {path}"
|
||||
result = _do_search_content_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
project_root=display_root,
|
||||
file_glob=file_glob,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
output_mode=output_mode,
|
||||
context=context,
|
||||
hashline=False,
|
||||
)
|
||||
|
||||
return "\n".join(matches) if matches else "No matches found."
|
||||
except re.error as e:
|
||||
return f"Error: Invalid regex: {e}"
|
||||
if consecutive == 3:
|
||||
result += (
|
||||
f"\n\n[Warning: this exact search has run {consecutive} times consecutively. "
|
||||
"Results have not changed — use what you have instead of re-searching.]"
|
||||
)
|
||||
return result
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
"""Agent-sandboxed search_files registration.
|
||||
|
||||
This toolkit historically registered a separate ``list_dir`` tool that
|
||||
returned ``{name, type, size_bytes}`` dicts. It has been folded into
|
||||
``search_files`` — one tool covers grep, find, and ls. We keep this
|
||||
module as the registration site for the agent-sandboxed variant so
|
||||
toolkits scoped via ``get_sandboxed_path(path, agent_id)`` continue to
|
||||
expose file search through the same canonical name.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
@@ -6,51 +16,108 @@ from ..security import get_sandboxed_path
|
||||
|
||||
|
||||
def register_tools(mcp: FastMCP) -> None:
|
||||
"""Register directory listing tools with the MCP server."""
|
||||
"""Register the agent-sandboxed search_files tool with the MCP server."""
|
||||
|
||||
@mcp.tool()
|
||||
def list_dir(path: str, agent_id: str) -> dict:
|
||||
"""
|
||||
Purpose
|
||||
List the contents of a directory within the agent sandbox.
|
||||
def search_files(
|
||||
pattern: str = "*",
|
||||
target: str = "files",
|
||||
path: str = ".",
|
||||
file_glob: str = "",
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
output_mode: str = "content",
|
||||
context: int = 0,
|
||||
agent_id: str = "",
|
||||
) -> str:
|
||||
"""Search file contents or find files by name within the agent sandbox.
|
||||
|
||||
When to use
|
||||
Explore directory structure and contents
|
||||
Discover available files and subdirectories
|
||||
Verify file existence before reading or writing
|
||||
Use this instead of grep, find, or ls.
|
||||
|
||||
Rules & Constraints
|
||||
Path must point to an existing directory
|
||||
Returns file names, types, and sizes
|
||||
Does not recurse into subdirectories
|
||||
target='files' (default here): list/find files by glob — mtime-sorted.
|
||||
target='content': regex search inside files.
|
||||
|
||||
Args:
|
||||
path: The directory path (relative to agent sandbox)
|
||||
agent_id: The ID of the agent
|
||||
|
||||
Returns:
|
||||
Dict with directory contents and metadata, or error dict
|
||||
pattern: Glob (files mode) or regex (content mode). Defaults to ``*``
|
||||
so a bare call lists every file in the sandbox.
|
||||
target: 'files' (default) or 'content'. Legacy aliases: 'grep'/'find'/'ls'.
|
||||
path: Directory or file relative to the agent sandbox.
|
||||
file_glob: Restrict content search to files matching this glob.
|
||||
limit: Max results (default 50).
|
||||
offset: Pagination offset (default 0).
|
||||
output_mode: Content-mode output — 'content' | 'files_only' | 'count'.
|
||||
context: Lines of surrounding context for content matches.
|
||||
agent_id: Auto-injected — sandbox owner.
|
||||
"""
|
||||
from aden_tools.file_ops import (
|
||||
_do_search_content_target,
|
||||
_do_search_files_target,
|
||||
_SEARCH_TRACKER,
|
||||
_SEARCH_TRACKER_LOCK,
|
||||
)
|
||||
|
||||
if target == "grep":
|
||||
target = "content"
|
||||
elif target in ("find", "ls"):
|
||||
target = "files"
|
||||
if target not in ("content", "files"):
|
||||
return f"Error: invalid target '{target}'. Use 'content' or 'files'."
|
||||
if output_mode not in ("content", "files_only", "count"):
|
||||
return f"Error: invalid output_mode '{output_mode}'."
|
||||
|
||||
try:
|
||||
secure_path = get_sandboxed_path(path, agent_id)
|
||||
if not os.path.exists(secure_path):
|
||||
return {"error": f"Path not found: {path}"}
|
||||
|
||||
if not os.path.isdir(secure_path):
|
||||
return {"error": f"Path is not a directory: {path}"}
|
||||
|
||||
items = os.listdir(secure_path)
|
||||
entries = []
|
||||
for item in items:
|
||||
full_path = os.path.join(secure_path, item)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
entry = {
|
||||
"name": item,
|
||||
"type": "directory" if is_dir else "file",
|
||||
"size_bytes": os.path.getsize(full_path) if not is_dir else None,
|
||||
}
|
||||
entries.append(entry)
|
||||
|
||||
return {"success": True, "path": path, "entries": entries, "total_count": len(entries)}
|
||||
resolved = get_sandboxed_path(path, agent_id)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to list directory: {str(e)}"}
|
||||
return f"Error: {e}"
|
||||
if not os.path.exists(resolved):
|
||||
return f"Error: Path not found: {path}"
|
||||
|
||||
bucket = agent_id or "_default"
|
||||
key = (target, pattern, str(path), file_glob, int(limit), int(offset), output_mode, int(context))
|
||||
with _SEARCH_TRACKER_LOCK:
|
||||
td = _SEARCH_TRACKER.setdefault(bucket, {"last_key": None, "consecutive": 0})
|
||||
if td["last_key"] == key:
|
||||
td["consecutive"] += 1
|
||||
else:
|
||||
td["last_key"] = key
|
||||
td["consecutive"] = 1
|
||||
consecutive = td["consecutive"]
|
||||
if consecutive >= 4:
|
||||
return (
|
||||
f"BLOCKED: this exact search has run {consecutive} times in a row. "
|
||||
"Results have NOT changed. Use the information you already have and proceed."
|
||||
)
|
||||
|
||||
# Display paths relative to the sandbox root, not the resolved absolute.
|
||||
try:
|
||||
sandbox_root = get_sandboxed_path(".", agent_id)
|
||||
except Exception:
|
||||
sandbox_root = resolved
|
||||
|
||||
if target == "files":
|
||||
result = _do_search_files_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
display_root=sandbox_root,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
else:
|
||||
result = _do_search_content_target(
|
||||
pattern=pattern,
|
||||
resolved=resolved,
|
||||
project_root=sandbox_root,
|
||||
file_glob=file_glob,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
output_mode=output_mode,
|
||||
context=context,
|
||||
hashline=False,
|
||||
)
|
||||
|
||||
if consecutive == 3:
|
||||
result += (
|
||||
f"\n\n[Warning: this exact search has run {consecutive} times consecutively. "
|
||||
"Results have not changed — use what you have instead of re-searching.]"
|
||||
)
|
||||
return result
|
||||
|
||||
@@ -7,7 +7,7 @@ Provides file I/O capabilities so GCU subagents can read spillover files
|
||||
Adapted from coder_tools_server.py for the GCU context:
|
||||
- No project root restriction (accepts absolute paths)
|
||||
- No git snapshots
|
||||
- Focused on read_file, list_directory, search_files
|
||||
- Focused on read_file, search_files (search_files = grep+find+ls)
|
||||
"""
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
@@ -86,76 +86,56 @@ def mock_secure_path(tmp_path):
|
||||
yield
|
||||
|
||||
|
||||
class TestListDirTool:
|
||||
"""Tests for list_dir tool."""
|
||||
class TestSandboxedSearchFiles:
|
||||
"""Tests for the agent-sandboxed search_files registration (formerly list_dir)."""
|
||||
|
||||
@pytest.fixture
|
||||
def list_dir_fn(self, mcp):
|
||||
def search_files_fn(self, mcp):
|
||||
from aden_tools.tools.file_system_toolkits.list_dir import register_tools
|
||||
|
||||
register_tools(mcp)
|
||||
return mcp._tool_manager._tools["list_dir"].fn
|
||||
return mcp._tool_manager._tools["search_files"].fn
|
||||
|
||||
def test_list_directory(self, list_dir_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""Listing a directory returns all entries."""
|
||||
# Create test files and directories
|
||||
def test_files_mode_lists_entries(self, search_files_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""target='files' returns every file in the sandbox, one per line."""
|
||||
(tmp_path / "file1.txt").write_text("content", encoding="utf-8")
|
||||
(tmp_path / "file2.txt").write_text("content", encoding="utf-8")
|
||||
(tmp_path / "subdir").mkdir()
|
||||
(tmp_path / "subdir" / "nested.txt").write_text("x", encoding="utf-8")
|
||||
|
||||
result = list_dir_fn(path=".", **mock_workspace)
|
||||
result = search_files_fn(pattern="*", target="files", path=".", **mock_workspace)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["total_count"] == 3
|
||||
assert len(result["entries"]) == 3
|
||||
assert "file1.txt" in result
|
||||
assert "file2.txt" in result
|
||||
# rg --files / os.walk return files only, so subdir itself isn't listed,
|
||||
# but its contents are.
|
||||
assert "nested.txt" in result
|
||||
|
||||
# Check that entries have correct structure
|
||||
for entry in result["entries"]:
|
||||
assert "name" in entry
|
||||
assert "type" in entry
|
||||
assert entry["type"] in ["file", "directory"]
|
||||
def test_files_mode_glob_filter(self, search_files_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""target='files' with a glob restricts the listing."""
|
||||
(tmp_path / "a.py").write_text("x", encoding="utf-8")
|
||||
(tmp_path / "b.txt").write_text("x", encoding="utf-8")
|
||||
|
||||
def test_list_empty_directory(self, list_dir_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""Listing an empty directory returns empty list."""
|
||||
empty_dir = tmp_path / "empty"
|
||||
empty_dir.mkdir()
|
||||
result = search_files_fn(pattern="*.py", target="files", path=".", **mock_workspace)
|
||||
assert "a.py" in result
|
||||
assert "b.txt" not in result
|
||||
|
||||
result = list_dir_fn(path="empty", **mock_workspace)
|
||||
def test_nonexistent_path_returns_error_string(self, search_files_fn, mock_workspace, mock_secure_path):
|
||||
"""Missing path returns an Error: string, not a dict."""
|
||||
result = search_files_fn(pattern="*", target="files", path="nonexistent_dir", **mock_workspace)
|
||||
assert isinstance(result, str)
|
||||
assert "Error" in result
|
||||
assert "not found" in result.lower()
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["total_count"] == 0
|
||||
assert result["entries"] == []
|
||||
def test_content_mode_finds_matches(self, search_files_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""target='content' searches inside files and returns rel-path matches."""
|
||||
(tmp_path / "hello.txt").write_text("needle here\n", encoding="utf-8")
|
||||
(tmp_path / "other.txt").write_text("nothing\n", encoding="utf-8")
|
||||
|
||||
def test_list_nonexistent_directory(self, list_dir_fn, mock_workspace, mock_secure_path):
|
||||
"""Listing a non-existent directory returns error."""
|
||||
result = list_dir_fn(path="nonexistent_dir", **mock_workspace)
|
||||
|
||||
assert "error" in result
|
||||
assert "not found" in result["error"].lower()
|
||||
|
||||
def test_list_directory_with_file_sizes(self, list_dir_fn, mock_workspace, mock_secure_path, tmp_path):
|
||||
"""Listing a directory returns file sizes for files."""
|
||||
(tmp_path / "small.txt").write_text("hi", encoding="utf-8")
|
||||
(tmp_path / "larger.txt").write_text("hello world", encoding="utf-8")
|
||||
(tmp_path / "subdir").mkdir()
|
||||
|
||||
result = list_dir_fn(path=".", **mock_workspace)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
# Find entries by name
|
||||
entries_by_name = {e["name"]: e for e in result["entries"]}
|
||||
|
||||
# Files should have size_bytes
|
||||
assert entries_by_name["small.txt"]["type"] == "file"
|
||||
assert entries_by_name["small.txt"]["size_bytes"] == 2
|
||||
|
||||
assert entries_by_name["larger.txt"]["type"] == "file"
|
||||
assert entries_by_name["larger.txt"]["size_bytes"] == 11
|
||||
|
||||
# Directories should have None for size_bytes
|
||||
assert entries_by_name["subdir"]["type"] == "directory"
|
||||
assert entries_by_name["subdir"]["size_bytes"] is None
|
||||
result = search_files_fn(pattern="needle", target="content", path=".", **mock_workspace)
|
||||
assert "hello.txt" in result
|
||||
assert "needle" in result
|
||||
assert "other.txt" not in result
|
||||
|
||||
|
||||
class TestReplaceFileContentTool:
|
||||
|
||||
Reference in New Issue
Block a user