From 8470c6a980ee2d7f2f4a8180566ee9022966e4f5 Mon Sep 17 00:00:00 2001 From: Fernando Mano Date: Wed, 4 Mar 2026 14:16:48 -0300 Subject: [PATCH] feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing --- .github/workflows/ci.yml | 7 +- README.md | 1 + .../agents/hive_coder/nodes/__init__.py | 144 +++++----- core/framework/runner/mcp_client.py | 56 +++- core/framework/runner/runner.py | 10 + core/framework/runner/tool_registry.py | 119 +++++++- hive.ps1 | 3 +- tools/coder_tools_server.py | 263 +++++++++++++++--- tools/src/aden_tools/file_ops.py | 50 +++- tools/tests/tools/test_file_ops.py | 145 ++++++++++ .../tools/test_run_command_pythonpath.py | 118 ++++++++ 11 files changed, 767 insertions(+), 149 deletions(-) create mode 100644 tools/tests/tools/test_file_ops.py create mode 100644 tools/tests/tools/test_run_command_pythonpath.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ce273fb..3322fcae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,8 +62,11 @@ jobs: uv run pytest tests/ -v test-tools: - name: Test Tools - runs-on: ubuntu-latest + name: Test Tools (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 47e628f5..81c469f3 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Use Hive when you need: - Python 3.11+ for agent development - An LLM provider that powers the agents +- **ripgrep (optional, recommended on Windows):** The `search_files` tool uses ripgrep for faster file search. If not installed, a Python fallback is used. On Windows: `winget install BurntSushi.ripgrep` or `scoop install ripgrep` > **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell. diff --git a/core/framework/agents/hive_coder/nodes/__init__.py b/core/framework/agents/hive_coder/nodes/__init__.py index 741d82e7..cc2d5a0d 100644 --- a/core/framework/agents/hive_coder/nodes/__init__.py +++ b/core/framework/agents/hive_coder/nodes/__init__.py @@ -131,13 +131,18 @@ errors yourself. Don't declare success until validation passes. # Tools +## Paths (MANDATORY) +**Always use RELATIVE paths** (e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`). +**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail. The project root is implicit. + ## File I/O - read_file(path, offset?, limit?) — read with line numbers - write_file(path, content) — create/overwrite, auto-mkdir - edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit - list_directory(path, recursive?) — list contents - search_files(pattern, path?, include?) — regex search -- run_command(command, cwd?, timeout?) — shell execution +- run_command(command, cwd?, timeout?) — shell execution. Prefer \ +run_agent_tests for tests; run_command can timeout on long runs. - undo_changes(path?) — restore from git snapshot ## Meta-Agent @@ -177,10 +182,11 @@ for patterns: read_file("exports/{name}/nodes/__init__.py") ## Post-Build Testing -After writing agent code, validate structurally AND run tests: - run_command("python -c 'from {name} import default_agent; \\ - print(default_agent.validate())'") - run_agent_tests("{name}") +**Prefer dedicated tools** — run_command can timeout (MCP). Use: + validate_agent_tools("exports/{name}") # tool existence check + run_agent_tests("{name}") # run pytest +Avoid run_command for pytest or long validation. Structural checks via \ +run_command (validate, AgentRunner.load) are optional; they may timeout. ## Debugging Built Agents When a user says "my agent is failing" or "debug this agent": @@ -507,39 +513,40 @@ triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \ ## 5. Verify -Run FOUR validation steps after writing. All must pass: +**Reliability note:** run_command can timeout (MCP/stdio limits). Do Steps 1 \ +and 2 first — they are reliable. Steps 3 and 4 use run_command and may timeout. -**Step A — Class validation** (checks graph structure): +**Step 1 — Tool validation** (REQUIRED; reliable): +``` +validate_agent_tools("exports/{name}") +``` +Checks that declared tools exist in the agent's MCP servers. If any are \ +missing: fix node definitions. Run list_agent_tools() to see what's available. + +**Step 2 — Run tests** (REQUIRED; reliable): +``` +run_agent_tests("{name}") +``` +Runs pytest with proper timeouts. **Do NOT use run_command with pytest** — \ +it times out. Use run_agent_tests only. + +**Step 3 — Class validation** (optional; may timeout): ``` run_command("python -c 'from {name} import default_agent; \\ print(default_agent.validate())'") ``` +Structural check. Skip if it times out; Steps 1–2 and load_built_agent suffice. -**Step B — Runner load test** (checks package export contract — \ -THIS IS THE SAME PATH THE TUI USES): +**Step 4 — Runner load test** (optional; may timeout): ``` run_command("python -c 'from framework.runner.runner import \\ AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\ print(\"AgentRunner.load: OK\")'") ``` -This catches missing __init__.py exports, bad conversation_mode, \ -invalid loop_config, and unreachable nodes. If Step A passes but \ -Step B fails, the problem is in __init__.py exports. +Catches __init__.py exports, conversation_mode, loop_config. Skip if timeout; \ +load_built_agent will surface load errors when you load the agent. -**Step C — Tool validation** (checks that declared tools actually exist \ -in the agent's MCP servers — catches hallucinated tool names): -``` -validate_agent_tools("exports/{name}") -``` -If any tools are missing: fix the node definitions to use only tools \ -that exist. Run list_agent_tools() to see what's available. - -**Step D — Run tests:** -``` -run_agent_tests("{name}") -``` - -If anything fails: read error, fix with edit_file, re-validate. Up to 3x. +If Steps 1 or 2 fail: read error, fix with edit_file, re-validate. Up to 3x. **CRITICAL: Testing forever-alive agents** Most agents use `terminal_nodes=[]` (forever-alive). This means \ @@ -598,54 +605,31 @@ start_agent("{name}") # triggers default entry point _queen_tools_docs = """ -## Operating Modes +## Worker Lifecycle +- start_worker(task) — Start the worker with a task description. The \ +worker runs autonomously until it finishes or asks the user a question. +- stop_worker() — Cancel the worker's current execution. +- get_worker_status() — Check if the worker is idle, running, or waiting \ +for user input. Returns execution details. +- inject_worker_message(content) — Send a message to the running worker. \ +Use this to relay user instructions or concerns. -You operate in one of three modes. Your available tools change based on the \ -mode. The system notifies you when a mode change occurs. +## Monitoring +- get_worker_health_summary() — Read the latest health data from the judge. +- notify_operator(ticket_id, analysis, urgency) — Alert the user about a \ +critical issue. Use sparingly. -### BUILDING mode (default) -You have full coding tools for building and modifying agents: -- File I/O: read_file, write_file, edit_file, list_directory, search_files, \ -run_command, undo_changes -- Meta-agent: list_agent_tools, validate_agent_tools, \ -list_agents, list_agent_sessions, get_agent_session_state, get_agent_session_memory, \ -list_agent_checkpoints, get_agent_checkpoint, run_agent_tests -- load_built_agent(agent_path) — Load the agent and switch to STAGING mode -- list_credentials(credential_id?) — List authorized credentials +## Agent Loading +- load_built_agent(agent_path) — Load a newly built agent as the worker in \ +this session. If a worker is already loaded, it is automatically unloaded \ +first. **Call in a separate turn** after write_file, validate_agent_tools, and \ +run_agent_tests have finished — never in the same batch, or files may \ +not exist yet. -When you finish building an agent, call load_built_agent(path) to stage it. - -### STAGING mode (agent loaded, not yet running) -The agent is loaded and ready to run. You can inspect it and launch it: -- Read-only: read_file, list_directory, search_files, run_command -- list_credentials(credential_id?) — Verify credentials are configured -- get_worker_status() — Check the loaded worker -- run_agent_with_input(task) — Start the worker and switch to RUNNING mode -- stop_worker_and_edit() — Go back to BUILDING mode - -In STAGING mode you do NOT have write tools. If you need to modify the agent, \ -call stop_worker_and_edit() to go back to BUILDING mode. - -### RUNNING mode (worker is executing) -The worker is running. You have monitoring and lifecycle tools: -- Read-only: read_file, list_directory, search_files, run_command -- get_worker_status() — Check worker status (idle, running, waiting) -- inject_worker_message(content) — Send a message to the running worker -- get_worker_health_summary() — Read the latest health data -- notify_operator(ticket_id, analysis, urgency) — Alert the user (use sparingly) -- stop_worker() — Stop the worker and return to STAGING mode, then ask the user what to do next -- stop_worker_and_edit() — Stop the worker and switch back to BUILDING mode - -In RUNNING mode you do NOT have write tools or agent construction tools. \ -If you need to modify the agent, call stop_worker_and_edit() to switch back \ -to BUILDING mode. To stop the worker and ask the user what to do next, call \ -stop_worker() to return to STAGING mode. - -### Mode transitions -- load_built_agent(path) → switches to STAGING mode -- run_agent_with_input(task) → starts worker, switches to RUNNING mode -- stop_worker() → stops worker, switches to STAGING mode (ask user: re-run or edit?) -- stop_worker_and_edit() → stops worker (if running), switches to BUILDING mode +## Credentials +- list_credentials(credential_id?) — List all authorized credentials in the \ +local store. Returns IDs, aliases, status, and identity metadata (never \ +secrets). Optionally filter by credential_id. """ _queen_behavior = """ @@ -781,16 +765,14 @@ building something new. When the user asks to change, modify, or update the loaded worker \ (e.g., "change the report node", "add a node", "delete node X"): -1. Call stop_worker_and_edit() — this stops the worker and gives you \ -coding tools (switches to BUILDING mode). -2. Use the **Path** from the Worker Profile to locate the agent files. -3. Read the relevant files (nodes/__init__.py, agent.py, etc.). -4. Make the requested changes using edit_file / write_file. -5. Run validation (default_agent.validate(), AgentRunner.load(), \ -validate_agent_tools()). -6. **Reload the modified worker**: call load_built_agent("{path}") \ -so the changes take effect immediately (switches to STAGING mode). \ -Then call run_agent_with_input(task) to restart execution. +1. Use the **Path** from the Worker Profile to locate the agent files. +2. Read the relevant files (nodes/__init__.py, agent.py, etc.). +3. Make the requested changes using edit_file / write_file. +4. Run validation: validate_agent_tools(path), run_agent_tests(name). \ +Avoid run_command for validation — it can timeout. +5. **Reload the modified worker**: call load_built_agent("{path}") \ +so the changes take effect immediately. If a worker is already loaded, \ +stop it first, then reload. Do NOT skip step 6 — without reloading, the user will still be \ interacting with the old version. diff --git a/core/framework/runner/mcp_client.py b/core/framework/runner/mcp_client.py index 7e9512db..478cec72 100644 --- a/core/framework/runner/mcp_client.py +++ b/core/framework/runner/mcp_client.py @@ -7,6 +7,9 @@ Supports both STDIO and HTTP transports using the official MCP Python SDK. import asyncio import logging import os +import sys +import threading +from concurrent.futures import TimeoutError as FuturesTimeoutError from dataclasses import dataclass, field from typing import Any, Literal @@ -73,6 +76,8 @@ class MCPClient: # Background event loop for persistent STDIO connection self._loop = None self._loop_thread = None + # Serialize STDIO tool calls (avoids races, helps on Windows) + self._stdio_call_lock = threading.Lock() def _run_async(self, coro): """ @@ -89,7 +94,20 @@ class MCPClient: # Check if loop is running AND not closed if self._loop.is_running() and not self._loop.is_closed(): future = asyncio.run_coroutine_threadsafe(coro, self._loop) - return future.result() + try: + return future.result(timeout=120) + except (TimeoutError, FuturesTimeoutError): + logger.error( + "MCP tool call timed out after 120s — connection may be dead." + ) + try: + self.disconnect() + except Exception as e: + logger.debug("Disconnect after timeout: %s", e) + raise RuntimeError( + "MCP tool call timed out. The server may have crashed or become unresponsive. " + "Try again — the connection will be re-established." + ) from None # else: fall through to the standard approach below # This handles the case when STDIO loop exists but is stopped/closed @@ -156,11 +174,19 @@ class MCPClient: # Create server parameters # Always inherit parent environment and merge with any custom env vars merged_env = {**os.environ, **(self.config.env or {})} + # On Windows, passing cwd can cause WinError 267 ("invalid directory name"). + # tool_registry passes cwd=None and uses absolute script paths when applicable. + cwd = self.config.cwd + if os.name == "nt" and cwd is not None: + # Avoid passing cwd on Windows; tool_registry should have set cwd=None + # and absolute script paths for tools-dir servers. If cwd is still set, + # pass None to prevent WinError 267 (caller should use absolute paths). + cwd = None server_params = StdioServerParameters( command=self.config.command, args=self.config.args, env=merged_env, - cwd=self.config.cwd, + cwd=cwd, ) # Store for later use @@ -184,10 +210,13 @@ class MCPClient: from mcp.client.stdio import stdio_client # Create persistent stdio client context. - # Redirect server stderr to devnull to prevent raw - # output from leaking behind the TUI. - devnull = open(os.devnull, "w") # noqa: SIM115 - self._stdio_context = stdio_client(server_params, errlog=devnull) + # On Windows, use stderr so subprocess startup errors are visible + # when debugging "Connection closed" / MCP registration failures. + if os.name == "nt": + errlog = sys.stderr + else: + errlog = open(os.devnull, "w") # noqa: SIM115 + self._stdio_context = stdio_client(server_params, errlog=errlog) ( self._read_stream, self._write_stream, @@ -353,7 +382,12 @@ class MCPClient: raise ValueError(f"Unknown tool: {tool_name}") if self.config.transport == "stdio": - return self._run_async(self._call_tool_stdio_async(tool_name, arguments)) + with self._stdio_call_lock: + if not self._session and self._connected: + self._connected = False + if not self._connected: + self.connect() + return self._run_async(self._call_tool_stdio_async(tool_name, arguments)) else: return self._call_tool_http(tool_name, arguments) @@ -448,11 +482,15 @@ class MCPClient: if self._stdio_context: await self._stdio_context.__aexit__(None, None, None) except asyncio.CancelledError: - logger.warning( + logger.debug( "STDIO context cleanup was cancelled; proceeding with best-effort shutdown" ) except Exception as e: - logger.warning(f"Error closing STDIO context: {e}") + msg = str(e).lower() + if "cancel scope" in msg or "different task" in msg: + logger.debug("STDIO context teardown (known anyio quirk): %s", e) + else: + logger.warning(f"Error closing STDIO context: {e}") finally: self._stdio_context = None diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index fbe554eb..eec8cc9f 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -1119,6 +1119,11 @@ class AgentRunner: gcu_config = dict(GCU_MCP_SERVER_CONFIG) _repo_root = Path(__file__).resolve().parent.parent.parent.parent gcu_config["cwd"] = str(_repo_root / "tools") + + #gcu_config = self._tool_registry._resolve_mcp_server_config( + # dict(GCU_MCP_SERVER_CONFIG), self.agent_path + #) + self._tool_registry.register_mcp_server(gcu_config) gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME) @@ -1142,6 +1147,11 @@ class AgentRunner: files_config = dict(FILES_MCP_SERVER_CONFIG) _repo_root = Path(__file__).resolve().parent.parent.parent.parent files_config["cwd"] = str(_repo_root / "tools") + +# files_config = self._tool_registry._resolve_mcp_server_config( +# dict(FILES_MCP_SERVER_CONFIG), self.agent_path +# ) + self._tool_registry.register_mcp_server(files_config) files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME) diff --git a/core/framework/runner/tool_registry.py b/core/framework/runner/tool_registry.py index 78f5c936..ae6b5e07 100644 --- a/core/framework/runner/tool_registry.py +++ b/core/framework/runner/tool_registry.py @@ -243,10 +243,12 @@ class ToolRegistry: def _wrap_result(tool_use_id: str, result: Any) -> ToolResult: if isinstance(result, ToolResult): return result + is_error = isinstance(result, dict) and "error" in result + content = json.dumps(result) if not isinstance(result, str) else result return ToolResult( tool_use_id=tool_use_id, - content=json.dumps(result) if not isinstance(result, str) else result, - is_error=False, + content=content, + is_error=is_error, ) def executor(tool_use: ToolUse) -> ToolResult: @@ -326,6 +328,105 @@ class ToolRegistry: """Restore execution context to its previous state.""" _execution_context.reset(token) + @staticmethod + def resolve_mcp_stdio_config( + server_config: dict[str, Any], base_dir: Path + ) -> dict[str, Any]: + """Resolve cwd and script paths for MCP stdio config (Windows compatibility). + + Use this when building MCPServerConfig from a config file (e.g. in + list_agent_tools, discover_mcp_tools) so hive-tools and other servers + work on Windows. Call with base_dir = directory containing the config. + """ + registry = ToolRegistry() + return registry._resolve_mcp_server_config(server_config, base_dir) + + def _resolve_mcp_server_config( + self, server_config: dict[str, Any], base_dir: Path + ) -> dict[str, Any]: + """Resolve cwd and script paths for MCP stdio servers (Windows compatibility). + + On Windows, passing cwd to subprocess can cause WinError 267. We use cwd=None + and absolute script paths when the server runs a .py script from the tools dir. + If the resolved cwd doesn't exist (e.g. config from ~/.hive/agents/), fall back + to Path.cwd() / "tools". + """ + config = dict(server_config) + if config.get("transport") != "stdio": + return config + + cwd = config.get("cwd") + args = list(config.get("args", [])) + if not cwd and not args: + return config + + # Resolve cwd relative to base_dir + resolved_cwd: Path | None = None + if cwd: + if Path(cwd).is_absolute(): + resolved_cwd = Path(cwd) + else: + resolved_cwd = (base_dir / cwd).resolve() + + # Find .py script in args (e.g. coder_tools_server.py, files_server.py) + script_name = None + for i, arg in enumerate(args): + if isinstance(arg, str) and arg.endswith(".py"): + script_name = arg + script_idx = i + break + + if resolved_cwd is None: + return config + + # If resolved cwd doesn't exist or (when we have a script) doesn't contain it, + # try fallback + tools_fallback = Path.cwd() / "tools" + need_fallback = not resolved_cwd.is_dir() + if script_name and not need_fallback: + need_fallback = not (resolved_cwd / script_name).exists() + if need_fallback: + fallback_ok = tools_fallback.is_dir() + if script_name: + fallback_ok = fallback_ok and (tools_fallback / script_name).exists() + else: + # No script (e.g. GCU); just need tools dir to exist + pass + if fallback_ok: + resolved_cwd = tools_fallback + logger.debug( + "MCP server '%s': using fallback tools dir %s", + config.get("name", "?"), + resolved_cwd, + ) + else: + config["cwd"] = str(resolved_cwd) + return config + + if not script_name: + # No .py script (e.g. GCU uses -m gcu.server); just set cwd + config["cwd"] = str(resolved_cwd) + return config + + # For coder_tools_server, inject --project-root so writes go to the expected workspace + if script_name and "coder_tools" in script_name: + project_root = str(resolved_cwd.parent.resolve()) + args = list(args) + if "--project-root" not in args: + args.extend(["--project-root", project_root]) + config["args"] = args + + if os.name == "nt": + # Windows: cwd=None avoids WinError 267; use absolute script path + config["cwd"] = None + abs_script = str((resolved_cwd / script_name).resolve()) + args = list(config["args"]) + args[script_idx] = abs_script + config["args"] = args + else: + config["cwd"] = str(resolved_cwd) + return config + def load_mcp_config(self, config_path: Path) -> None: """ Load and register MCP servers from a config file. @@ -340,7 +441,7 @@ class ToolRegistry: self._mcp_config_path = Path(config_path) try: - with open(config_path) as f: + with open(config_path, encoding="utf-8") as f: config = json.load(f) except Exception as e: logger.warning(f"Failed to load MCP config from {config_path}: {e}") @@ -357,9 +458,7 @@ class ToolRegistry: server_list = [{"name": name, **cfg} for name, cfg in config.items()] for server_config in server_list: - cwd = server_config.get("cwd") - if cwd and not Path(cwd).is_absolute(): - server_config["cwd"] = str((base_dir / cwd).resolve()) + server_config = self._resolve_mcp_server_config(server_config, base_dir) try: self.register_mcp_server(server_config) except Exception as e: @@ -479,7 +578,13 @@ class ToolRegistry: return count except Exception as e: - logger.error(f"Failed to register MCP server: {e}") + name = server_config.get("name", "unknown") + logger.error(f"Failed to register MCP server '{name}': {e}") + if "Connection closed" in str(e) and os.name == "nt": + logger.debug( + "On Windows, check that the MCP subprocess starts (e.g. uv in PATH, " + "script path correct). Worker config uses base_dir = mcp_servers.json parent." + ) return 0 def _convert_mcp_tool_to_framework_tool(self, mcp_tool: Any) -> Tool: diff --git a/hive.ps1 b/hive.ps1 index 920db035..8436ac6b 100644 --- a/hive.ps1 +++ b/hive.ps1 @@ -78,5 +78,6 @@ if (-not $env:HIVE_CREDENTIAL_KEY) { } # ── Run the Hive CLI ──────────────────────────────────────────────── - +# PYTHONUTF8=1: use UTF-8 for default encoding (fixes charmap decode errors on Windows) +$env:PYTHONUTF8 = "1" & uv run hive @args diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py index 22e20160..86ec34f7 100644 --- a/tools/coder_tools_server.py +++ b/tools/coder_tools_server.py @@ -71,8 +71,40 @@ def _find_project_root() -> str: def _resolve_path(path: str) -> str: """Resolve path relative to PROJECT_ROOT. Raises ValueError if outside.""" + # Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM) + path = path.replace("/", os.sep) if os.path.isabs(path): resolved = os.path.abspath(path) + try: + common = os.path.commonpath([resolved, PROJECT_ROOT]) + except ValueError: + common = "" + if common != PROJECT_ROOT: + # LLM may emit wrong-root paths (/mnt/data, /workspace, etc.). + # Strip known prefixes and treat the remainder as relative to PROJECT_ROOT. + path_norm = path.replace("\\", "/") + for prefix in ("/mnt/data/", "/mnt/data", "/workspace/", "/workspace", + "/repo/", "/repo"): + p = prefix.rstrip("/") + "/" + if path_norm.startswith(p) or (path_norm.startswith(prefix.rstrip("/")) and len(path_norm) > len(prefix)): + suffix = path_norm[len(prefix.rstrip("/")):].lstrip("/") + if suffix: + path = suffix.replace("/", os.sep) + resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path)) + break + else: + # Try extracting exports/ or core/ subpath from the absolute path + parts = path.split(os.sep) + if "exports" in parts: + idx = parts.index("exports") + path = os.sep.join(parts[idx:]) + resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path)) + elif "core" in parts: + idx = parts.index("core") + path = os.sep.join(parts[idx:]) + resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path)) + else: + raise ValueError(f"Access denied: '{path}' is outside the project root.") else: resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path)) try: @@ -122,7 +154,38 @@ def _take_snapshot() -> str: # ── Tool: run_command ───────────────────────────────────────────────────── -MAX_COMMAND_OUTPUT = 30_000 # chars before truncation +MAX_COMMAND_OUTPUT = 16_000 # chars before truncation (keep under stdio pipe buffer) + + +def _translate_command_for_windows(command: str) -> str: + """Translate common Unix commands to Windows equivalents.""" + if os.name != "nt": + return command + cmd = command.strip() + + # mkdir -p: Unix creates parents; Windows mkdir already does; -p becomes a dir name + if cmd.startswith("mkdir -p ") or cmd.startswith("mkdir -p\t"): + rest = cmd[9:].lstrip().replace("/", os.sep) + return "mkdir " + rest + + # ls / pwd: cmd.exe uses dir and cd + # Order matters: replace longer patterns first + for unix, win in [ + ("ls -la", "dir /a"), + ("ls -al", "dir /a"), + ("ls -l", "dir"), + ("ls -a", "dir /a"), + ("ls ", "dir "), + ("pwd", "cd"), + ]: + cmd = cmd.replace(unix, win) + # Standalone "ls" at end (e.g. "cd x && ls") + if cmd.endswith(" ls"): + cmd = cmd[:-3] + " dir" + elif cmd == "ls": + cmd = "dir" + + return cmd @mcp.tool() @@ -141,9 +204,12 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str: Combined stdout/stderr with exit code """ timeout = min(timeout, 300) + # Use lower internal timeout so we return before MCP client's 120s timeout + cmd_timeout = min(timeout, 90) work_dir = _resolve_path(cwd) if cwd else PROJECT_ROOT try: + command = _translate_command_for_windows(command) start = time.monotonic() result = subprocess.run( command, @@ -151,12 +217,15 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str: cwd=work_dir, capture_output=True, text=True, - timeout=timeout, + timeout=cmd_timeout, env={ **os.environ, - "PYTHONPATH": ( - f"{PROJECT_ROOT}/core:{PROJECT_ROOT}/exports" - f":{PROJECT_ROOT}/core/framework/agents" + "PYTHONPATH": os.pathsep.join( + [ + os.path.join(PROJECT_ROOT, "core"), + os.path.join(PROJECT_ROOT, "exports"), + os.path.join(PROJECT_ROOT, "core", "framework", "agents"), + ] ), }, ) @@ -181,7 +250,7 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str: return output except subprocess.TimeoutExpired: return ( - f"Error: Command timed out after {timeout}s. " + f"Error: Command timed out after {cmd_timeout}s. " "Consider breaking it into smaller operations." ) except Exception as e: @@ -254,9 +323,111 @@ def list_agent_tools( ) -> str: """Discover tools available for agent building, grouped by category. - Connects to each MCP server, lists tools, then disconnects. Use this - BEFORE designing an agent to know exactly which tools exist. Only use - tools from this list in node definitions — never guess or fabricate. + Connects to each MCP server, lists all tools with full schemas, then + disconnects. Use this to see what tools are available before designing + an agent — never rely on static documentation. + + Args: + server_config_path: Path to mcp_servers.json (relative to project root). + Default: the hive-tools server config at tools/mcp_servers.json. + Can also point to any agent's mcp_servers.json. + + Returns: + JSON listing of all tools with names, descriptions, and input schemas + """ + # Resolve config path + if not server_config_path: + # Default: look for the main hive-tools mcp_servers.json + candidates = [ + os.path.join(PROJECT_ROOT, "tools", "mcp_servers.json"), + os.path.join(PROJECT_ROOT, "mcp_servers.json"), + ] + config_path = None + for c in candidates: + if os.path.isfile(c): + config_path = c + break + if not config_path: + return "Error: No mcp_servers.json found. Provide server_config_path." + else: + config_path = _resolve_path(server_config_path) + if not os.path.isfile(config_path): + return f"Error: Config file not found: {server_config_path}" + + try: + with open(config_path, encoding="utf-8") as f: + servers_config = json.load(f) + except (json.JSONDecodeError, OSError) as e: + return f"Error reading config: {e}" + + # Import MCPClient (deferred — needs PYTHONPATH to include core/) + try: + from pathlib import Path + + from framework.runner.mcp_client import MCPClient, MCPServerConfig + from framework.runner.tool_registry import ToolRegistry + except ImportError: + return "Error: Cannot import MCPClient. Ensure PYTHONPATH includes the core/ directory." + + all_tools = [] + errors = [] + config_dir = Path(config_path).parent + + for server_name, server_conf in servers_config.items(): + # Use shared resolution (Windows cwd/script path handling) + resolved = ToolRegistry.resolve_mcp_stdio_config( + {"name": server_name, **server_conf}, config_dir + ) + try: + config = MCPServerConfig( + name=server_name, + transport=resolved.get("transport", "stdio"), + command=resolved.get("command"), + args=resolved.get("args", []), + env=resolved.get("env", {}), + cwd=resolved.get("cwd"), + url=resolved.get("url"), + headers=resolved.get("headers", {}), + ) + client = MCPClient(config) + client.connect() + tools = client.list_tools() + + for tool in tools: + all_tools.append( + { + "server": server_name, + "name": tool.name, + "description": tool.description, + "input_schema": tool.input_schema, + } + ) + + client.disconnect() + except Exception as e: + errors.append({"server": server_name, "error": str(e)}) + + result = { + "tools": all_tools, + "total": len(all_tools), + "servers_queried": len(servers_config), + } + if errors: + result["errors"] = errors + + return json.dumps(result, indent=2, default=str) + + +# ── Meta-agent: Agent tool catalog ──────────────────────────────────────── + + +@mcp.tool() +def list_agent_tools(server_config_path: str = "") -> str: + """List all tools available for agent building from the hive-tools MCP server. + + Returns tool names grouped by category. Use this BEFORE designing an agent + to know exactly which tools exist. Only use tools from this list in node + definitions — never guess or fabricate tool names. Args: server_config_path: Path to mcp_servers.json. Default: tools/mcp_servers.json @@ -300,28 +471,31 @@ def list_agent_tools( return json.dumps({"error": f"Failed to read config: {e}"}) try: + from pathlib import Path + from framework.runner.mcp_client import MCPClient, MCPServerConfig + from framework.runner.tool_registry import ToolRegistry except ImportError: return json.dumps({"error": "Cannot import MCPClient"}) all_tools: list[dict] = [] errors = [] - config_dir = os.path.dirname(config_path) + config_dir = Path(config_path).parent for server_name, server_conf in servers_config.items(): - cwd = server_conf.get("cwd", "") - if cwd and not os.path.isabs(cwd): - cwd = os.path.abspath(os.path.join(config_dir, cwd)) + resolved = ToolRegistry.resolve_mcp_stdio_config( + {"name": server_name, **server_conf}, config_dir + ) try: config = MCPServerConfig( name=server_name, - transport=server_conf.get("transport", "stdio"), - command=server_conf.get("command"), - args=server_conf.get("args", []), - env=server_conf.get("env", {}), - cwd=cwd or None, - url=server_conf.get("url"), - headers=server_conf.get("headers", {}), + transport=resolved.get("transport", "stdio"), + command=resolved.get("command"), + args=resolved.get("args", []), + env=resolved.get("env", {}), + cwd=resolved.get("cwd"), + url=resolved.get("url"), + headers=resolved.get("headers", {}), ) client = MCPClient(config) client.connect() @@ -410,19 +584,24 @@ def validate_agent_tools(agent_path: str) -> str: if not os.path.isdir(resolved): return json.dumps({"error": f"Agent directory not found: {agent_path}"}) + agent_dir = resolved # Keep path; 'resolved' is reused for MCP config in loop + # --- Discover available tools from agent's MCP servers --- - mcp_config_path = os.path.join(resolved, "mcp_servers.json") + mcp_config_path = os.path.join(agent_dir, "mcp_servers.json") if not os.path.isfile(mcp_config_path): return json.dumps({"error": f"No mcp_servers.json found in {agent_path}"}) try: + from pathlib import Path + from framework.runner.mcp_client import MCPClient, MCPServerConfig + from framework.runner.tool_registry import ToolRegistry except ImportError: return json.dumps({"error": "Cannot import MCPClient"}) available_tools: set[str] = set() discovery_errors = [] - config_dir = os.path.dirname(mcp_config_path) + config_dir = Path(mcp_config_path).parent try: with open(mcp_config_path, encoding="utf-8") as f: @@ -431,19 +610,19 @@ def validate_agent_tools(agent_path: str) -> str: return json.dumps({"error": f"Failed to read mcp_servers.json: {e}"}) for server_name, server_conf in servers_config.items(): - cwd = server_conf.get("cwd", "") - if cwd and not os.path.isabs(cwd): - cwd = os.path.abspath(os.path.join(config_dir, cwd)) + resolved = ToolRegistry.resolve_mcp_stdio_config( + {"name": server_name, **server_conf}, config_dir + ) try: config = MCPServerConfig( name=server_name, - transport=server_conf.get("transport", "stdio"), - command=server_conf.get("command"), - args=server_conf.get("args", []), - env=server_conf.get("env", {}), - cwd=cwd or None, - url=server_conf.get("url"), - headers=server_conf.get("headers", {}), + transport=resolved.get("transport", "stdio"), + command=resolved.get("command"), + args=resolved.get("args", []), + env=resolved.get("env", {}), + cwd=resolved.get("cwd"), + url=resolved.get("url"), + headers=resolved.get("headers", {}), ) client = MCPClient(config) client.connect() @@ -454,7 +633,7 @@ def validate_agent_tools(agent_path: str) -> str: discovery_errors.append({"server": server_name, "error": str(e)}) # --- Load agent nodes and extract declared tools --- - agent_py = os.path.join(resolved, "agent.py") + agent_py = os.path.join(agent_dir, "agent.py") if not os.path.isfile(agent_py): return json.dumps({"error": f"No agent.py found in {agent_path}"}) @@ -462,8 +641,8 @@ def validate_agent_tools(agent_path: str) -> str: import importlib.util import sys - package_name = os.path.basename(resolved) - parent_dir = os.path.dirname(os.path.abspath(resolved)) + package_name = os.path.basename(agent_dir) + parent_dir = os.path.dirname(os.path.abspath(agent_dir)) if parent_dir not in sys.path: sys.path.insert(0, parent_dir) @@ -1006,26 +1185,30 @@ def run_agent_tests( cmd.append("-x") cmd.append("--tb=short") - # Set PYTHONPATH + # Set PYTHONPATH (use pathsep for Windows) env = os.environ.copy() pythonpath = env.get("PYTHONPATH", "") core_path = os.path.join(PROJECT_ROOT, "core") exports_path = os.path.join(PROJECT_ROOT, "exports") fw_agents_path = os.path.join(PROJECT_ROOT, "core", "framework", "agents") - env["PYTHONPATH"] = f"{core_path}:{exports_path}:{fw_agents_path}:{PROJECT_ROOT}:{pythonpath}" + path_parts = [core_path, exports_path, fw_agents_path, PROJECT_ROOT] + if pythonpath: + path_parts.append(pythonpath) + env["PYTHONPATH"] = os.pathsep.join(path_parts) + # Use 90s so we return before MCP client's 120s timeout try: result = subprocess.run( cmd, capture_output=True, text=True, - timeout=120, + timeout=90, env=env, ) except subprocess.TimeoutExpired: return json.dumps( { - "error": "Tests timed out after 120 seconds. A test may be hanging " + "error": "Tests timed out after 90 seconds. A test may be hanging " "(e.g. a client-facing node waiting for stdin). Use mock mode " "or add timeouts to async tests.", "command": " ".join(cmd), @@ -1144,7 +1327,7 @@ def main() -> None: register_file_tools( mcp, resolve_path=_resolve_path, - before_write=_take_snapshot, + before_write=None, # Git snapshot causes stdio deadlock on Windows; undo_changes limited project_root=PROJECT_ROOT, ) diff --git a/tools/src/aden_tools/file_ops.py b/tools/src/aden_tools/file_ops.py index 6c188c4b..3a27e264 100644 --- a/tools/src/aden_tools/file_ops.py +++ b/tools/src/aden_tools/file_ops.py @@ -323,19 +323,29 @@ def register_file_tools( content: Complete file content to write. """ resolved = _resolve(path) + resolved_path = Path(resolved) try: + # Create parent dirs first (before git snapshot) so structure exists + resolved_path.parent.mkdir(parents=True, exist_ok=True) if before_write: - before_write() + try: + before_write() + except Exception: + # Don't block the write if git snapshot fails. Do NOT log here — + # logging writes to stderr and can deadlock the MCP stdio pipe. + pass - existed = os.path.isfile(resolved) - os.makedirs(os.path.dirname(resolved), exist_ok=True) - with open(resolved, "w", encoding="utf-8") as f: - f.write(content) + existed = resolved_path.is_file() + content_str = content if content is not None else "" + with open(resolved_path, "w", encoding="utf-8") as f: + f.write(content_str) + f.flush() + os.fsync(f.fileno()) - line_count = content.count("\n") + (1 if content and not content.endswith("\n") else 0) + line_count = content_str.count("\n") + (1 if content_str and not content_str.endswith("\n") else 0) action = "Updated" if existed else "Created" - return f"{action} {path} ({len(content):,} bytes, {line_count} lines)" + return f"{action} {path} ({len(content_str):,} bytes, {line_count} lines)" except Exception as e: return f"Error writing file: {e}" @@ -510,7 +520,22 @@ def register_file_tools( lines = [] for line in output.split("\n")[:SEARCH_RESULT_LIMIT]: if project_root: - line = line.replace(project_root + "/", "") + # Platform-agnostic relativization: ripgrep may output + # forward or backslash paths; normalize before relpath (Windows). + match = re.match(r"^(.+):(\d+):", line) + if match: + path_part, line_num, rest = ( + match.group(1), + match.group(2), + line[match.end() :], + ) + path_part = os.path.normpath(path_part.replace("/", os.sep)) + proj_norm = os.path.normpath(project_root.replace("/", os.sep)) + try: + rel = os.path.relpath(path_part, proj_norm) + line = f"{rel}:{line_num}:{rest}" + except ValueError: + pass if len(line) > MAX_LINE_LENGTH: line = line[:MAX_LINE_LENGTH] + "..." lines.append(line) @@ -538,7 +563,14 @@ def register_file_tools( if include and not fnmatch.fnmatch(fname, include): continue fpath = os.path.join(root, fname) - display_path = os.path.relpath(fpath, project_root) if project_root else fpath + if project_root: + proj_norm = os.path.normpath(project_root.replace("/", os.sep)) + try: + display_path = os.path.relpath(fpath, proj_norm) + except ValueError: + display_path = fpath + else: + display_path = fpath try: with open(fpath, encoding="utf-8", errors="ignore") as f: for i, line in enumerate(f, 1): diff --git a/tools/tests/tools/test_file_ops.py b/tools/tests/tools/test_file_ops.py new file mode 100644 index 00000000..e55d8536 --- /dev/null +++ b/tools/tests/tools/test_file_ops.py @@ -0,0 +1,145 @@ +"""Tests for aden_tools.file_ops (shared file tools). + +These tests cover Windows compatibility concerns: path relativization +in search_files (ripgrep and Python fallback) and cross-platform behavior. +""" + +import os +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest +from fastmcp import FastMCP + +from aden_tools.file_ops import register_file_tools + + +@pytest.fixture +def file_ops_mcp(tmp_path): + """Create FastMCP with file_ops registered, sandboxed to tmp_path.""" + + def resolve_path(p: str) -> str: + if os.path.isabs(p): + return os.path.normpath(p) + return str((tmp_path / p).resolve()) + + mcp = FastMCP("test-file-ops") + register_file_tools( + mcp, + resolve_path=resolve_path, + project_root=str(tmp_path), + ) + return mcp + + +def _get_tool_fn(mcp, name): + """Extract the raw function for a registered tool.""" + return mcp._tool_manager._tools[name].fn + + +class TestSearchFilesPathRelativization: + """Tests for search_files path handling (Windows path separator fix).""" + + def test_ripgrep_output_with_backslash_relativized(self, file_ops_mcp, tmp_path): + """Ripgrep output using backslashes (Windows) should be relativized when project_root is set. + + Simulates: rg outputs 'C:\\Users\\...\\proj\\src\\foo.py:1:needle' + Expected: output should show 'src\\foo.py:1:needle' or 'src/foo.py:1:needle' + (relativized, not full path). + """ + # Create a file so the search has something to find + (tmp_path / "src").mkdir() + (tmp_path / "src" / "foo.py").write_text("needle\n") + project_root = str(tmp_path) + + # Ripgrep on Windows outputs backslash-separated paths + # Format: path:line_num:content + rg_output = f"{project_root}{os.sep}src{os.sep}foo.py:1:needle" + + search_fn = _get_tool_fn(file_ops_mcp, "search_files") + + with patch("aden_tools.file_ops.subprocess.run") as mock_run: + mock_run.return_value = type( + "Result", (), {"returncode": 0, "stdout": rg_output, "stderr": ""} + )() + + result = search_fn( + pattern="needle", + path=str(tmp_path), + ) + + # Output should be relativized (no full project_root in the line) + assert project_root not in result, ( + f"Output should not contain full project_root. Got: {result!r}" + ) + # Should contain the relative path part + assert "foo.py" in result + assert "1:" in result or ":1:" in result + + def test_ripgrep_output_with_forward_slash_relativized(self, file_ops_mcp, tmp_path): + """Ripgrep output using forward slashes (Unix/rg default) should be relativized.""" + (tmp_path / "src").mkdir() + (tmp_path / "src" / "bar.py").write_text("pattern_match\n") + project_root = str(tmp_path) + + # Some ripgrep builds output forward slashes even on Windows + rg_output = f"{project_root}/src/bar.py:1:pattern_match" + + search_fn = _get_tool_fn(file_ops_mcp, "search_files") + + with patch("aden_tools.file_ops.subprocess.run") as mock_run: + mock_run.return_value = type( + "Result", (), {"returncode": 0, "stdout": rg_output, "stderr": ""} + )() + + result = search_fn( + pattern="pattern_match", + path=str(tmp_path), + ) + + assert project_root not in result or "src/bar.py" in result + assert "bar.py" in result + + def test_python_fallback_relativizes_paths(self, file_ops_mcp, tmp_path): + """Python fallback (no ripgrep) uses os.path.relpath - should work on all platforms.""" + (tmp_path / "subdir").mkdir() + (tmp_path / "subdir" / "baz.txt").write_text("find_me\n") + + search_fn = _get_tool_fn(file_ops_mcp, "search_files") + + # Ensure ripgrep is not used + with patch("aden_tools.file_ops.subprocess.run", side_effect=FileNotFoundError()): + result = search_fn( + pattern="find_me", + path=str(tmp_path), + ) + + # Python fallback uses os.path.relpath - should produce relative path + project_root = str(tmp_path) + assert project_root not in result or "subdir" in result + assert "baz.txt" in result + assert "1:" in result or ":1:" in result + + +class TestSearchFilesBasic: + """Basic search_files behavior (no path mocking).""" + + def test_search_finds_content(self, file_ops_mcp, tmp_path): + """search_files finds matching content via Python fallback when rg absent.""" + (tmp_path / "hello.txt").write_text("world\n") + + search_fn = _get_tool_fn(file_ops_mcp, "search_files") + + with patch("aden_tools.file_ops.subprocess.run", side_effect=FileNotFoundError()): + result = search_fn(pattern="world", path=str(tmp_path)) + + assert "world" in result + assert "hello.txt" in result + + def test_search_nonexistent_dir_returns_error(self, file_ops_mcp, tmp_path): + """search_files on non-existent directory returns error.""" + search_fn = _get_tool_fn(file_ops_mcp, "search_files") + result = search_fn(pattern="x", path=str(tmp_path / "nonexistent")) + assert "Error" in result + assert "not found" in result.lower() diff --git a/tools/tests/tools/test_run_command_pythonpath.py b/tools/tests/tools/test_run_command_pythonpath.py new file mode 100644 index 00000000..2724ffeb --- /dev/null +++ b/tools/tests/tools/test_run_command_pythonpath.py @@ -0,0 +1,118 @@ +"""Tests for run_command PYTHONPATH handling (Windows compatibility). + +On Windows, PYTHONPATH must use semicolon (;) as separator, not colon (:). +These tests verify the correct behavior. They are Windows-only because +the bug only manifests on Windows. +""" + +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +# Skip entire module on non-Windows (tests will pass when fixes are applied) +pytestmark = pytest.mark.skipif( + sys.platform != "win32", + reason="Windows-only: PYTHONPATH separator behavior", +) + + +def _build_pythonpath_buggy(project_root: str) -> str: + """Replicate current (buggy) PYTHONPATH construction in run_command.""" + return ( + f"{project_root}/core:{project_root}/exports" + f":{project_root}/core/framework/agents" + ) + + +def _build_pythonpath_fixed(project_root: str) -> str: + """Correct PYTHONPATH construction using os.pathsep.""" + return os.pathsep.join( + [ + os.path.join(project_root, "core"), + os.path.join(project_root, "exports"), + os.path.join(project_root, "core", "framework", "agents"), + ] + ) + + +class TestPythonpathSeparatorWindows: + """Verify PYTHONPATH uses correct separator on Windows.""" + + def test_pythonpath_with_semicolons_parses_multiple_paths(self, tmp_path): + """PYTHONPATH built with os.pathsep allows Python to find modules in multiple dirs.""" + # Create two dirs, each with a module + core_dir = tmp_path / "core" + core_dir.mkdir() + (core_dir / "mod_a.py").write_text("x = 1\n") + + exports_dir = tmp_path / "exports" + exports_dir.mkdir() + (exports_dir / "mod_b.py").write_text("y = 2\n") + + pythonpath = os.pathsep.join([str(core_dir), str(exports_dir)]) + env = {**os.environ, "PYTHONPATH": pythonpath} + + # Python should find both when we add them to path + result = subprocess.run( + [ + sys.executable, + "-c", + "import sys; " + "sys.path = [p for p in sys.path if 'mod_a' not in p and 'mod_b' not in p]; " + "import mod_a; import mod_b; print('ok')", + ], + env=env, + capture_output=True, + text=True, + cwd=str(tmp_path), + timeout=10, + ) + + assert result.returncode == 0, f"Stdout: {result.stdout} Stderr: {result.stderr}" + assert "ok" in result.stdout + + def test_pythonpath_with_colons_fails_on_windows(self, tmp_path): + """PYTHONPATH built with colons (Unix style) fails on Windows - single path parsed.""" + core_dir = tmp_path / "core" + core_dir.mkdir() + (core_dir / "mod_c.py").write_text("z = 3\n") + + exports_dir = tmp_path / "exports" + exports_dir.mkdir() + (exports_dir / "mod_d.py").write_text("w = 4\n") + + # Buggy: colon-separated (Unix style) + pythonpath = f"{tmp_path}/core:{tmp_path}/exports" + env = {**os.environ, "PYTHONPATH": pythonpath} + + # On Windows, Python splits by ; only. The colon string is one invalid path. + result = subprocess.run( + [ + sys.executable, + "-c", + "import sys; " + "pp = [p for p in sys.path if 'core' in p or 'exports' in p]; " + "import mod_c; import mod_d; print('ok')", + ], + env=env, + capture_output=True, + text=True, + cwd=str(tmp_path), + timeout=10, + ) + + # Should fail: Python won't parse multiple paths from colon-separated string + assert result.returncode != 0 or "ok" not in result.stdout + + def test_fixed_pythonpath_construction_uses_pathsep(self, tmp_path): + """The fix pattern (os.pathsep.join) produces valid multi-path PYTHONPATH.""" + project_root = str(tmp_path) + fixed = _build_pythonpath_fixed(project_root) + + # On Windows, os.pathsep is ';' + assert os.pathsep in fixed, "Fixed PYTHONPATH must use os.pathsep on Windows" + # Three paths => two separators + assert fixed.count(os.pathsep) == 2