From 8470c6a980ee2d7f2f4a8180566ee9022966e4f5 Mon Sep 17 00:00:00 2001
From: Fernando Mano <fermano@gmail.com>
Date: Wed, 4 Mar 2026 14:16:48 -0300
Subject: [PATCH] feature(WindowsFilesystemSupport): #5677 - Windows File
 System Support and Testing

---
 .github/workflows/ci.yml                      |   7 +-
 README.md                                     |   1 +
 .../agents/hive_coder/nodes/__init__.py       | 144 +++++-----
 core/framework/runner/mcp_client.py           |  56 +++-
 core/framework/runner/runner.py               |  10 +
 core/framework/runner/tool_registry.py        | 119 +++++++-
 hive.ps1                                      |   3 +-
 tools/coder_tools_server.py                   | 263 +++++++++++++++---
 tools/src/aden_tools/file_ops.py              |  50 +++-
 tools/tests/tools/test_file_ops.py            | 145 ++++++++++
 .../tools/test_run_command_pythonpath.py      | 118 ++++++++
 11 files changed, 767 insertions(+), 149 deletions(-)
 create mode 100644 tools/tests/tools/test_file_ops.py
 create mode 100644 tools/tests/tools/test_run_command_pythonpath.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5ce273fb..3322fcae 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -62,8 +62,11 @@ jobs:
           uv run pytest tests/ -v
 
   test-tools:
-    name: Test Tools
-    runs-on: ubuntu-latest
+    name: Test Tools (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
     steps:
       - uses: actions/checkout@v4
 
diff --git a/README.md b/README.md
index 47e628f5..81c469f3 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,7 @@ Use Hive when you need:
 
 - Python 3.11+ for agent development
 - An LLM provider that powers the agents
+- **ripgrep (optional, recommended on Windows):** The `search_files` tool uses ripgrep for faster file search. If not installed, a Python fallback is used. On Windows: `winget install BurntSushi.ripgrep` or `scoop install ripgrep`
 
 > **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
 
diff --git a/core/framework/agents/hive_coder/nodes/__init__.py b/core/framework/agents/hive_coder/nodes/__init__.py
index 741d82e7..cc2d5a0d 100644
--- a/core/framework/agents/hive_coder/nodes/__init__.py
+++ b/core/framework/agents/hive_coder/nodes/__init__.py
@@ -131,13 +131,18 @@ errors yourself. Don't declare success until validation passes.
 
 # Tools
 
+## Paths (MANDATORY)
+**Always use RELATIVE paths** (e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail. The project root is implicit.
+
 ## File I/O
 - read_file(path, offset?, limit?) — read with line numbers
 - write_file(path, content) — create/overwrite, auto-mkdir
 - edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
 - list_directory(path, recursive?) — list contents
 - search_files(pattern, path?, include?) — regex search
-- run_command(command, cwd?, timeout?) — shell execution
+- run_command(command, cwd?, timeout?) — shell execution. Prefer \
+run_agent_tests for tests; run_command can timeout on long runs.
 - undo_changes(path?) — restore from git snapshot
 
 ## Meta-Agent
@@ -177,10 +182,11 @@ for patterns:
   read_file("exports/{name}/nodes/__init__.py")
 
 ## Post-Build Testing
-After writing agent code, validate structurally AND run tests:
-  run_command("python -c 'from {name} import default_agent; \\
-    print(default_agent.validate())'")
-  run_agent_tests("{name}")
+**Prefer dedicated tools** — run_command can timeout (MCP). Use:
+  validate_agent_tools("exports/{name}")  # tool existence check
+  run_agent_tests("{name}")               # run pytest
+Avoid run_command for pytest or long validation. Structural checks via \
+run_command (validate, AgentRunner.load) are optional; they may timeout.
 
 ## Debugging Built Agents
 When a user says "my agent is failing" or "debug this agent":
@@ -507,39 +513,40 @@ triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \
 
 ## 5. Verify
 
-Run FOUR validation steps after writing. All must pass:
+**Reliability note:** run_command can timeout (MCP/stdio limits). Do Steps 1 \
+and 2 first — they are reliable. Steps 3 and 4 use run_command and may timeout.
 
-**Step A — Class validation** (checks graph structure):
+**Step 1 — Tool validation** (REQUIRED; reliable):
+```
+validate_agent_tools("exports/{name}")
+```
+Checks that declared tools exist in the agent's MCP servers. If any are \
+missing: fix node definitions. Run list_agent_tools() to see what's available.
+
+**Step 2 — Run tests** (REQUIRED; reliable):
+```
+run_agent_tests("{name}")
+```
+Runs pytest with proper timeouts. **Do NOT use run_command with pytest** — \
+it times out. Use run_agent_tests only.
+
+**Step 3 — Class validation** (optional; may timeout):
 ```
 run_command("python -c 'from {name} import default_agent; \\
   print(default_agent.validate())'")
 ```
+Structural check. Skip if it times out; Steps 1–2 and load_built_agent suffice.
 
-**Step B — Runner load test** (checks package export contract — \
-THIS IS THE SAME PATH THE TUI USES):
+**Step 4 — Runner load test** (optional; may timeout):
 ```
 run_command("python -c 'from framework.runner.runner import \\
   AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
   print(\"AgentRunner.load: OK\")'")
 ```
-This catches missing __init__.py exports, bad conversation_mode, \
-invalid loop_config, and unreachable nodes. If Step A passes but \
-Step B fails, the problem is in __init__.py exports.
+Catches __init__.py exports, conversation_mode, loop_config. Skip if timeout; \
+load_built_agent will surface load errors when you load the agent.
 
-**Step C — Tool validation** (checks that declared tools actually exist \
-in the agent's MCP servers — catches hallucinated tool names):
-```
-validate_agent_tools("exports/{name}")
-```
-If any tools are missing: fix the node definitions to use only tools \
-that exist. Run list_agent_tools() to see what's available.
-
-**Step D — Run tests:**
-```
-run_agent_tests("{name}")
-```
-
-If anything fails: read error, fix with edit_file, re-validate. Up to 3x.
+If Steps 1 or 2 fail: read error, fix with edit_file, re-validate. Up to 3x.
 
 **CRITICAL: Testing forever-alive agents**
 Most agents use `terminal_nodes=[]` (forever-alive). This means \
@@ -598,54 +605,31 @@ start_agent("{name}")           # triggers default entry point
 
 _queen_tools_docs = """
 
-## Operating Modes
+## Worker Lifecycle
+- start_worker(task) — Start the worker with a task description. The \
+worker runs autonomously until it finishes or asks the user a question.
+- stop_worker() — Cancel the worker's current execution.
+- get_worker_status() — Check if the worker is idle, running, or waiting \
+for user input. Returns execution details.
+- inject_worker_message(content) — Send a message to the running worker. \
+Use this to relay user instructions or concerns.
 
-You operate in one of three modes. Your available tools change based on the \
-mode. The system notifies you when a mode change occurs.
+## Monitoring
+- get_worker_health_summary() — Read the latest health data from the judge.
+- notify_operator(ticket_id, analysis, urgency) — Alert the user about a \
+critical issue. Use sparingly.
 
-### BUILDING mode (default)
-You have full coding tools for building and modifying agents:
-- File I/O: read_file, write_file, edit_file, list_directory, search_files, \
-run_command, undo_changes
-- Meta-agent: list_agent_tools, validate_agent_tools, \
-list_agents, list_agent_sessions, get_agent_session_state, get_agent_session_memory, \
-list_agent_checkpoints, get_agent_checkpoint, run_agent_tests
-- load_built_agent(agent_path) — Load the agent and switch to STAGING mode
-- list_credentials(credential_id?) — List authorized credentials
+## Agent Loading
+- load_built_agent(agent_path) — Load a newly built agent as the worker in \
+this session. If a worker is already loaded, it is automatically unloaded \
+first. **Call in a separate turn** after write_file, validate_agent_tools, and \
+run_agent_tests have finished — never in the same batch, or files may \
+not exist yet.
 
-When you finish building an agent, call load_built_agent(path) to stage it.
-
-### STAGING mode (agent loaded, not yet running)
-The agent is loaded and ready to run. You can inspect it and launch it:
-- Read-only: read_file, list_directory, search_files, run_command
-- list_credentials(credential_id?) — Verify credentials are configured
-- get_worker_status() — Check the loaded worker
-- run_agent_with_input(task) — Start the worker and switch to RUNNING mode
-- stop_worker_and_edit() — Go back to BUILDING mode
-
-In STAGING mode you do NOT have write tools. If you need to modify the agent, \
-call stop_worker_and_edit() to go back to BUILDING mode.
-
-### RUNNING mode (worker is executing)
-The worker is running. You have monitoring and lifecycle tools:
-- Read-only: read_file, list_directory, search_files, run_command
-- get_worker_status() — Check worker status (idle, running, waiting)
-- inject_worker_message(content) — Send a message to the running worker
-- get_worker_health_summary() — Read the latest health data
-- notify_operator(ticket_id, analysis, urgency) — Alert the user (use sparingly)
-- stop_worker() — Stop the worker and return to STAGING mode, then ask the user what to do next
-- stop_worker_and_edit() — Stop the worker and switch back to BUILDING mode
-
-In RUNNING mode you do NOT have write tools or agent construction tools. \
-If you need to modify the agent, call stop_worker_and_edit() to switch back \
-to BUILDING mode. To stop the worker and ask the user what to do next, call \
-stop_worker() to return to STAGING mode.
-
-### Mode transitions
-- load_built_agent(path) → switches to STAGING mode
-- run_agent_with_input(task) → starts worker, switches to RUNNING mode
-- stop_worker() → stops worker, switches to STAGING mode (ask user: re-run or edit?)
-- stop_worker_and_edit() → stops worker (if running), switches to BUILDING mode
+## Credentials
+- list_credentials(credential_id?) — List all authorized credentials in the \
+local store. Returns IDs, aliases, status, and identity metadata (never \
+secrets). Optionally filter by credential_id.
 """
 
 _queen_behavior = """
@@ -781,16 +765,14 @@ building something new.
 When the user asks to change, modify, or update the loaded worker \
 (e.g., "change the report node", "add a node", "delete node X"):
 
-1. Call stop_worker_and_edit() — this stops the worker and gives you \
-coding tools (switches to BUILDING mode).
-2. Use the **Path** from the Worker Profile to locate the agent files.
-3. Read the relevant files (nodes/__init__.py, agent.py, etc.).
-4. Make the requested changes using edit_file / write_file.
-5. Run validation (default_agent.validate(), AgentRunner.load(), \
-validate_agent_tools()).
-6. **Reload the modified worker**: call load_built_agent("{path}") \
-so the changes take effect immediately (switches to STAGING mode). \
-Then call run_agent_with_input(task) to restart execution.
+1. Use the **Path** from the Worker Profile to locate the agent files.
+2. Read the relevant files (nodes/__init__.py, agent.py, etc.).
+3. Make the requested changes using edit_file / write_file.
+4. Run validation: validate_agent_tools(path), run_agent_tests(name). \
+Avoid run_command for validation — it can timeout.
+5. **Reload the modified worker**: call load_built_agent("{path}") \
+so the changes take effect immediately. If a worker is already loaded, \
+stop it first, then reload.
 
 Do NOT skip step 6 — without reloading, the user will still be \
 interacting with the old version.
diff --git a/core/framework/runner/mcp_client.py b/core/framework/runner/mcp_client.py
index 7e9512db..478cec72 100644
--- a/core/framework/runner/mcp_client.py
+++ b/core/framework/runner/mcp_client.py
@@ -7,6 +7,9 @@ Supports both STDIO and HTTP transports using the official MCP Python SDK.
 import asyncio
 import logging
 import os
+import sys
+import threading
+from concurrent.futures import TimeoutError as FuturesTimeoutError
 from dataclasses import dataclass, field
 from typing import Any, Literal
 
@@ -73,6 +76,8 @@ class MCPClient:
         # Background event loop for persistent STDIO connection
         self._loop = None
         self._loop_thread = None
+        # Serialize STDIO tool calls (avoids races, helps on Windows)
+        self._stdio_call_lock = threading.Lock()
 
     def _run_async(self, coro):
         """
@@ -89,7 +94,20 @@ class MCPClient:
             # Check if loop is running AND not closed
             if self._loop.is_running() and not self._loop.is_closed():
                 future = asyncio.run_coroutine_threadsafe(coro, self._loop)
-                return future.result()
+                try:
+                    return future.result(timeout=120)
+                except (TimeoutError, FuturesTimeoutError):
+                    logger.error(
+                        "MCP tool call timed out after 120s — connection may be dead."
+                    )
+                    try:
+                        self.disconnect()
+                    except Exception as e:
+                        logger.debug("Disconnect after timeout: %s", e)
+                    raise RuntimeError(
+                        "MCP tool call timed out. The server may have crashed or become unresponsive. "
+                        "Try again — the connection will be re-established."
+                    ) from None
             # else: fall through to the standard approach below
             # This handles the case when STDIO loop exists but is stopped/closed
 
@@ -156,11 +174,19 @@ class MCPClient:
             # Create server parameters
             # Always inherit parent environment and merge with any custom env vars
             merged_env = {**os.environ, **(self.config.env or {})}
+            # On Windows, passing cwd can cause WinError 267 ("invalid directory name").
+            # tool_registry passes cwd=None and uses absolute script paths when applicable.
+            cwd = self.config.cwd
+            if os.name == "nt" and cwd is not None:
+                # Avoid passing cwd on Windows; tool_registry should have set cwd=None
+                # and absolute script paths for tools-dir servers. If cwd is still set,
+                # pass None to prevent WinError 267 (caller should use absolute paths).
+                cwd = None
             server_params = StdioServerParameters(
                 command=self.config.command,
                 args=self.config.args,
                 env=merged_env,
-                cwd=self.config.cwd,
+                cwd=cwd,
             )
 
             # Store for later use
@@ -184,10 +210,13 @@ class MCPClient:
                         from mcp.client.stdio import stdio_client
 
                         # Create persistent stdio client context.
-                        # Redirect server stderr to devnull to prevent raw
-                        # output from leaking behind the TUI.
-                        devnull = open(os.devnull, "w")  # noqa: SIM115
-                        self._stdio_context = stdio_client(server_params, errlog=devnull)
+                        # On Windows, use stderr so subprocess startup errors are visible
+                        # when debugging "Connection closed" / MCP registration failures.
+                        if os.name == "nt":
+                            errlog = sys.stderr
+                        else:
+                            errlog = open(os.devnull, "w")  # noqa: SIM115
+                        self._stdio_context = stdio_client(server_params, errlog=errlog)
                         (
                             self._read_stream,
                             self._write_stream,
@@ -353,7 +382,12 @@ class MCPClient:
             raise ValueError(f"Unknown tool: {tool_name}")
 
         if self.config.transport == "stdio":
-            return self._run_async(self._call_tool_stdio_async(tool_name, arguments))
+            with self._stdio_call_lock:
+                if not self._session and self._connected:
+                    self._connected = False
+                if not self._connected:
+                    self.connect()
+                return self._run_async(self._call_tool_stdio_async(tool_name, arguments))
         else:
             return self._call_tool_http(tool_name, arguments)
 
@@ -448,11 +482,15 @@ class MCPClient:
             if self._stdio_context:
                 await self._stdio_context.__aexit__(None, None, None)
         except asyncio.CancelledError:
-            logger.warning(
+            logger.debug(
                 "STDIO context cleanup was cancelled; proceeding with best-effort shutdown"
             )
         except Exception as e:
-            logger.warning(f"Error closing STDIO context: {e}")
+            msg = str(e).lower()
+            if "cancel scope" in msg or "different task" in msg:
+                logger.debug("STDIO context teardown (known anyio quirk): %s", e)
+            else:
+                logger.warning(f"Error closing STDIO context: {e}")
         finally:
             self._stdio_context = None
 
diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py
index fbe554eb..eec8cc9f 100644
--- a/core/framework/runner/runner.py
+++ b/core/framework/runner/runner.py
@@ -1119,6 +1119,11 @@ class AgentRunner:
                 gcu_config = dict(GCU_MCP_SERVER_CONFIG)
                 _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                 gcu_config["cwd"] = str(_repo_root / "tools")
+
+                #gcu_config = self._tool_registry._resolve_mcp_server_config(
+                #    dict(GCU_MCP_SERVER_CONFIG), self.agent_path
+                #)
+
                 self._tool_registry.register_mcp_server(gcu_config)
                 gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
 
@@ -1142,6 +1147,11 @@ class AgentRunner:
                 files_config = dict(FILES_MCP_SERVER_CONFIG)
                 _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                 files_config["cwd"] = str(_repo_root / "tools")
+
+#                files_config = self._tool_registry._resolve_mcp_server_config(
+#                    dict(FILES_MCP_SERVER_CONFIG), self.agent_path
+#                )
+
                 self._tool_registry.register_mcp_server(files_config)
                 files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
 
diff --git a/core/framework/runner/tool_registry.py b/core/framework/runner/tool_registry.py
index 78f5c936..ae6b5e07 100644
--- a/core/framework/runner/tool_registry.py
+++ b/core/framework/runner/tool_registry.py
@@ -243,10 +243,12 @@ class ToolRegistry:
         def _wrap_result(tool_use_id: str, result: Any) -> ToolResult:
             if isinstance(result, ToolResult):
                 return result
+            is_error = isinstance(result, dict) and "error" in result
+            content = json.dumps(result) if not isinstance(result, str) else result
             return ToolResult(
                 tool_use_id=tool_use_id,
-                content=json.dumps(result) if not isinstance(result, str) else result,
-                is_error=False,
+                content=content,
+                is_error=is_error,
             )
 
         def executor(tool_use: ToolUse) -> ToolResult:
@@ -326,6 +328,105 @@ class ToolRegistry:
         """Restore execution context to its previous state."""
         _execution_context.reset(token)
 
+    @staticmethod
+    def resolve_mcp_stdio_config(
+        server_config: dict[str, Any], base_dir: Path
+    ) -> dict[str, Any]:
+        """Resolve cwd and script paths for MCP stdio config (Windows compatibility).
+
+        Use this when building MCPServerConfig from a config file (e.g. in
+        list_agent_tools, discover_mcp_tools) so hive-tools and other servers
+        work on Windows. Call with base_dir = directory containing the config.
+        """
+        registry = ToolRegistry()
+        return registry._resolve_mcp_server_config(server_config, base_dir)
+
+    def _resolve_mcp_server_config(
+        self, server_config: dict[str, Any], base_dir: Path
+    ) -> dict[str, Any]:
+        """Resolve cwd and script paths for MCP stdio servers (Windows compatibility).
+
+        On Windows, passing cwd to subprocess can cause WinError 267. We use cwd=None
+        and absolute script paths when the server runs a .py script from the tools dir.
+        If the resolved cwd doesn't exist (e.g. config from ~/.hive/agents/), fall back
+        to Path.cwd() / "tools".
+        """
+        config = dict(server_config)
+        if config.get("transport") != "stdio":
+            return config
+
+        cwd = config.get("cwd")
+        args = list(config.get("args", []))
+        if not cwd and not args:
+            return config
+
+        # Resolve cwd relative to base_dir
+        resolved_cwd: Path | None = None
+        if cwd:
+            if Path(cwd).is_absolute():
+                resolved_cwd = Path(cwd)
+            else:
+                resolved_cwd = (base_dir / cwd).resolve()
+
+        # Find .py script in args (e.g. coder_tools_server.py, files_server.py)
+        script_name = None
+        for i, arg in enumerate(args):
+            if isinstance(arg, str) and arg.endswith(".py"):
+                script_name = arg
+                script_idx = i
+                break
+
+        if resolved_cwd is None:
+            return config
+
+        # If resolved cwd doesn't exist or (when we have a script) doesn't contain it,
+        # try fallback
+        tools_fallback = Path.cwd() / "tools"
+        need_fallback = not resolved_cwd.is_dir()
+        if script_name and not need_fallback:
+            need_fallback = not (resolved_cwd / script_name).exists()
+        if need_fallback:
+            fallback_ok = tools_fallback.is_dir()
+            if script_name:
+                fallback_ok = fallback_ok and (tools_fallback / script_name).exists()
+            else:
+                # No script (e.g. GCU); just need tools dir to exist
+                pass
+            if fallback_ok:
+                resolved_cwd = tools_fallback
+                logger.debug(
+                    "MCP server '%s': using fallback tools dir %s",
+                    config.get("name", "?"),
+                    resolved_cwd,
+                )
+            else:
+                config["cwd"] = str(resolved_cwd)
+                return config
+
+        if not script_name:
+            # No .py script (e.g. GCU uses -m gcu.server); just set cwd
+            config["cwd"] = str(resolved_cwd)
+            return config
+
+        # For coder_tools_server, inject --project-root so writes go to the expected workspace
+        if script_name and "coder_tools" in script_name:
+            project_root = str(resolved_cwd.parent.resolve())
+            args = list(args)
+            if "--project-root" not in args:
+                args.extend(["--project-root", project_root])
+            config["args"] = args
+
+        if os.name == "nt":
+            # Windows: cwd=None avoids WinError 267; use absolute script path
+            config["cwd"] = None
+            abs_script = str((resolved_cwd / script_name).resolve())
+            args = list(config["args"])
+            args[script_idx] = abs_script
+            config["args"] = args
+        else:
+            config["cwd"] = str(resolved_cwd)
+        return config
+
     def load_mcp_config(self, config_path: Path) -> None:
         """
         Load and register MCP servers from a config file.
@@ -340,7 +441,7 @@ class ToolRegistry:
         self._mcp_config_path = Path(config_path)
 
         try:
-            with open(config_path) as f:
+            with open(config_path, encoding="utf-8") as f:
                 config = json.load(f)
         except Exception as e:
             logger.warning(f"Failed to load MCP config from {config_path}: {e}")
@@ -357,9 +458,7 @@ class ToolRegistry:
             server_list = [{"name": name, **cfg} for name, cfg in config.items()]
 
         for server_config in server_list:
-            cwd = server_config.get("cwd")
-            if cwd and not Path(cwd).is_absolute():
-                server_config["cwd"] = str((base_dir / cwd).resolve())
+            server_config = self._resolve_mcp_server_config(server_config, base_dir)
             try:
                 self.register_mcp_server(server_config)
             except Exception as e:
@@ -479,7 +578,13 @@ class ToolRegistry:
             return count
 
         except Exception as e:
-            logger.error(f"Failed to register MCP server: {e}")
+            name = server_config.get("name", "unknown")
+            logger.error(f"Failed to register MCP server '{name}': {e}")
+            if "Connection closed" in str(e) and os.name == "nt":
+                logger.debug(
+                    "On Windows, check that the MCP subprocess starts (e.g. uv in PATH, "
+                    "script path correct). Worker config uses base_dir = mcp_servers.json parent."
+                )
             return 0
 
     def _convert_mcp_tool_to_framework_tool(self, mcp_tool: Any) -> Tool:
diff --git a/hive.ps1 b/hive.ps1
index 920db035..8436ac6b 100644
--- a/hive.ps1
+++ b/hive.ps1
@@ -78,5 +78,6 @@ if (-not $env:HIVE_CREDENTIAL_KEY) {
 }
 
 # ── Run the Hive CLI ────────────────────────────────────────────────
-
+# PYTHONUTF8=1: use UTF-8 for default encoding (fixes charmap decode errors on Windows)
+$env:PYTHONUTF8 = "1"
 & uv run hive @args
diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py
index 22e20160..86ec34f7 100644
--- a/tools/coder_tools_server.py
+++ b/tools/coder_tools_server.py
@@ -71,8 +71,40 @@ def _find_project_root() -> str:
 
 def _resolve_path(path: str) -> str:
     """Resolve path relative to PROJECT_ROOT. Raises ValueError if outside."""
+    # Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM)
+    path = path.replace("/", os.sep)
     if os.path.isabs(path):
         resolved = os.path.abspath(path)
+        try:
+            common = os.path.commonpath([resolved, PROJECT_ROOT])
+        except ValueError:
+            common = ""
+        if common != PROJECT_ROOT:
+            # LLM may emit wrong-root paths (/mnt/data, /workspace, etc.).
+            # Strip known prefixes and treat the remainder as relative to PROJECT_ROOT.
+            path_norm = path.replace("\\", "/")
+            for prefix in ("/mnt/data/", "/mnt/data", "/workspace/", "/workspace",
+                          "/repo/", "/repo"):
+                p = prefix.rstrip("/") + "/"
+                if path_norm.startswith(p) or (path_norm.startswith(prefix.rstrip("/")) and len(path_norm) > len(prefix)):
+                    suffix = path_norm[len(prefix.rstrip("/")):].lstrip("/")
+                    if suffix:
+                        path = suffix.replace("/", os.sep)
+                        resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path))
+                        break
+            else:
+                # Try extracting exports/ or core/ subpath from the absolute path
+                parts = path.split(os.sep)
+                if "exports" in parts:
+                    idx = parts.index("exports")
+                    path = os.sep.join(parts[idx:])
+                    resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path))
+                elif "core" in parts:
+                    idx = parts.index("core")
+                    path = os.sep.join(parts[idx:])
+                    resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path))
+                else:
+                    raise ValueError(f"Access denied: '{path}' is outside the project root.")
     else:
         resolved = os.path.abspath(os.path.join(PROJECT_ROOT, path))
     try:
@@ -122,7 +154,38 @@ def _take_snapshot() -> str:
 
 # ── Tool: run_command ─────────────────────────────────────────────────────
 
-MAX_COMMAND_OUTPUT = 30_000  # chars before truncation
+MAX_COMMAND_OUTPUT = 16_000  # chars before truncation (keep under stdio pipe buffer)
+
+
+def _translate_command_for_windows(command: str) -> str:
+    """Translate common Unix commands to Windows equivalents."""
+    if os.name != "nt":
+        return command
+    cmd = command.strip()
+
+    # mkdir -p: Unix creates parents; Windows mkdir already does; -p becomes a dir name
+    if cmd.startswith("mkdir -p ") or cmd.startswith("mkdir -p\t"):
+        rest = cmd[9:].lstrip().replace("/", os.sep)
+        return "mkdir " + rest
+
+    # ls / pwd: cmd.exe uses dir and cd
+    # Order matters: replace longer patterns first
+    for unix, win in [
+        ("ls -la", "dir /a"),
+        ("ls -al", "dir /a"),
+        ("ls -l", "dir"),
+        ("ls -a", "dir /a"),
+        ("ls ", "dir "),
+        ("pwd", "cd"),
+    ]:
+        cmd = cmd.replace(unix, win)
+    # Standalone "ls" at end (e.g. "cd x && ls")
+    if cmd.endswith(" ls"):
+        cmd = cmd[:-3] + " dir"
+    elif cmd == "ls":
+        cmd = "dir"
+
+    return cmd
 
 
 @mcp.tool()
@@ -141,9 +204,12 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
         Combined stdout/stderr with exit code
     """
     timeout = min(timeout, 300)
+    # Use lower internal timeout so we return before MCP client's 120s timeout
+    cmd_timeout = min(timeout, 90)
     work_dir = _resolve_path(cwd) if cwd else PROJECT_ROOT
 
     try:
+        command = _translate_command_for_windows(command)
         start = time.monotonic()
         result = subprocess.run(
             command,
@@ -151,12 +217,15 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
             cwd=work_dir,
             capture_output=True,
             text=True,
-            timeout=timeout,
+            timeout=cmd_timeout,
             env={
                 **os.environ,
-                "PYTHONPATH": (
-                    f"{PROJECT_ROOT}/core:{PROJECT_ROOT}/exports"
-                    f":{PROJECT_ROOT}/core/framework/agents"
+                "PYTHONPATH": os.pathsep.join(
+                    [
+                        os.path.join(PROJECT_ROOT, "core"),
+                        os.path.join(PROJECT_ROOT, "exports"),
+                        os.path.join(PROJECT_ROOT, "core", "framework", "agents"),
+                    ]
                 ),
             },
         )
@@ -181,7 +250,7 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
         return output
     except subprocess.TimeoutExpired:
         return (
-            f"Error: Command timed out after {timeout}s. "
+            f"Error: Command timed out after {cmd_timeout}s. "
             "Consider breaking it into smaller operations."
         )
     except Exception as e:
@@ -254,9 +323,111 @@ def list_agent_tools(
 ) -> str:
     """Discover tools available for agent building, grouped by category.
 
-    Connects to each MCP server, lists tools, then disconnects. Use this
-    BEFORE designing an agent to know exactly which tools exist. Only use
-    tools from this list in node definitions — never guess or fabricate.
+    Connects to each MCP server, lists all tools with full schemas, then
+    disconnects. Use this to see what tools are available before designing
+    an agent — never rely on static documentation.
+
+    Args:
+        server_config_path: Path to mcp_servers.json (relative to project root).
+            Default: the hive-tools server config at tools/mcp_servers.json.
+            Can also point to any agent's mcp_servers.json.
+
+    Returns:
+        JSON listing of all tools with names, descriptions, and input schemas
+    """
+    # Resolve config path
+    if not server_config_path:
+        # Default: look for the main hive-tools mcp_servers.json
+        candidates = [
+            os.path.join(PROJECT_ROOT, "tools", "mcp_servers.json"),
+            os.path.join(PROJECT_ROOT, "mcp_servers.json"),
+        ]
+        config_path = None
+        for c in candidates:
+            if os.path.isfile(c):
+                config_path = c
+                break
+        if not config_path:
+            return "Error: No mcp_servers.json found. Provide server_config_path."
+    else:
+        config_path = _resolve_path(server_config_path)
+        if not os.path.isfile(config_path):
+            return f"Error: Config file not found: {server_config_path}"
+
+    try:
+        with open(config_path, encoding="utf-8") as f:
+            servers_config = json.load(f)
+    except (json.JSONDecodeError, OSError) as e:
+        return f"Error reading config: {e}"
+
+    # Import MCPClient (deferred — needs PYTHONPATH to include core/)
+    try:
+        from pathlib import Path
+
+        from framework.runner.mcp_client import MCPClient, MCPServerConfig
+        from framework.runner.tool_registry import ToolRegistry
+    except ImportError:
+        return "Error: Cannot import MCPClient. Ensure PYTHONPATH includes the core/ directory."
+
+    all_tools = []
+    errors = []
+    config_dir = Path(config_path).parent
+
+    for server_name, server_conf in servers_config.items():
+        # Use shared resolution (Windows cwd/script path handling)
+        resolved = ToolRegistry.resolve_mcp_stdio_config(
+            {"name": server_name, **server_conf}, config_dir
+        )
+        try:
+            config = MCPServerConfig(
+                name=server_name,
+                transport=resolved.get("transport", "stdio"),
+                command=resolved.get("command"),
+                args=resolved.get("args", []),
+                env=resolved.get("env", {}),
+                cwd=resolved.get("cwd"),
+                url=resolved.get("url"),
+                headers=resolved.get("headers", {}),
+            )
+            client = MCPClient(config)
+            client.connect()
+            tools = client.list_tools()
+
+            for tool in tools:
+                all_tools.append(
+                    {
+                        "server": server_name,
+                        "name": tool.name,
+                        "description": tool.description,
+                        "input_schema": tool.input_schema,
+                    }
+                )
+
+            client.disconnect()
+        except Exception as e:
+            errors.append({"server": server_name, "error": str(e)})
+
+    result = {
+        "tools": all_tools,
+        "total": len(all_tools),
+        "servers_queried": len(servers_config),
+    }
+    if errors:
+        result["errors"] = errors
+
+    return json.dumps(result, indent=2, default=str)
+
+
+# ── Meta-agent: Agent tool catalog ────────────────────────────────────────
+
+
+@mcp.tool()
+def list_agent_tools(server_config_path: str = "") -> str:
+    """List all tools available for agent building from the hive-tools MCP server.
+
+    Returns tool names grouped by category. Use this BEFORE designing an agent
+    to know exactly which tools exist. Only use tools from this list in node
+    definitions — never guess or fabricate tool names.
 
     Args:
         server_config_path: Path to mcp_servers.json. Default: tools/mcp_servers.json
@@ -300,28 +471,31 @@ def list_agent_tools(
         return json.dumps({"error": f"Failed to read config: {e}"})
 
     try:
+        from pathlib import Path
+
         from framework.runner.mcp_client import MCPClient, MCPServerConfig
+        from framework.runner.tool_registry import ToolRegistry
     except ImportError:
         return json.dumps({"error": "Cannot import MCPClient"})
 
     all_tools: list[dict] = []
     errors = []
-    config_dir = os.path.dirname(config_path)
+    config_dir = Path(config_path).parent
 
     for server_name, server_conf in servers_config.items():
-        cwd = server_conf.get("cwd", "")
-        if cwd and not os.path.isabs(cwd):
-            cwd = os.path.abspath(os.path.join(config_dir, cwd))
+        resolved = ToolRegistry.resolve_mcp_stdio_config(
+            {"name": server_name, **server_conf}, config_dir
+        )
         try:
             config = MCPServerConfig(
                 name=server_name,
-                transport=server_conf.get("transport", "stdio"),
-                command=server_conf.get("command"),
-                args=server_conf.get("args", []),
-                env=server_conf.get("env", {}),
-                cwd=cwd or None,
-                url=server_conf.get("url"),
-                headers=server_conf.get("headers", {}),
+                transport=resolved.get("transport", "stdio"),
+                command=resolved.get("command"),
+                args=resolved.get("args", []),
+                env=resolved.get("env", {}),
+                cwd=resolved.get("cwd"),
+                url=resolved.get("url"),
+                headers=resolved.get("headers", {}),
             )
             client = MCPClient(config)
             client.connect()
@@ -410,19 +584,24 @@ def validate_agent_tools(agent_path: str) -> str:
     if not os.path.isdir(resolved):
         return json.dumps({"error": f"Agent directory not found: {agent_path}"})
 
+    agent_dir = resolved  # Keep path; 'resolved' is reused for MCP config in loop
+
     # --- Discover available tools from agent's MCP servers ---
-    mcp_config_path = os.path.join(resolved, "mcp_servers.json")
+    mcp_config_path = os.path.join(agent_dir, "mcp_servers.json")
     if not os.path.isfile(mcp_config_path):
         return json.dumps({"error": f"No mcp_servers.json found in {agent_path}"})
 
     try:
+        from pathlib import Path
+
         from framework.runner.mcp_client import MCPClient, MCPServerConfig
+        from framework.runner.tool_registry import ToolRegistry
     except ImportError:
         return json.dumps({"error": "Cannot import MCPClient"})
 
     available_tools: set[str] = set()
     discovery_errors = []
-    config_dir = os.path.dirname(mcp_config_path)
+    config_dir = Path(mcp_config_path).parent
 
     try:
         with open(mcp_config_path, encoding="utf-8") as f:
@@ -431,19 +610,19 @@ def validate_agent_tools(agent_path: str) -> str:
         return json.dumps({"error": f"Failed to read mcp_servers.json: {e}"})
 
     for server_name, server_conf in servers_config.items():
-        cwd = server_conf.get("cwd", "")
-        if cwd and not os.path.isabs(cwd):
-            cwd = os.path.abspath(os.path.join(config_dir, cwd))
+        resolved = ToolRegistry.resolve_mcp_stdio_config(
+            {"name": server_name, **server_conf}, config_dir
+        )
         try:
             config = MCPServerConfig(
                 name=server_name,
-                transport=server_conf.get("transport", "stdio"),
-                command=server_conf.get("command"),
-                args=server_conf.get("args", []),
-                env=server_conf.get("env", {}),
-                cwd=cwd or None,
-                url=server_conf.get("url"),
-                headers=server_conf.get("headers", {}),
+                transport=resolved.get("transport", "stdio"),
+                command=resolved.get("command"),
+                args=resolved.get("args", []),
+                env=resolved.get("env", {}),
+                cwd=resolved.get("cwd"),
+                url=resolved.get("url"),
+                headers=resolved.get("headers", {}),
             )
             client = MCPClient(config)
             client.connect()
@@ -454,7 +633,7 @@ def validate_agent_tools(agent_path: str) -> str:
             discovery_errors.append({"server": server_name, "error": str(e)})
 
     # --- Load agent nodes and extract declared tools ---
-    agent_py = os.path.join(resolved, "agent.py")
+    agent_py = os.path.join(agent_dir, "agent.py")
     if not os.path.isfile(agent_py):
         return json.dumps({"error": f"No agent.py found in {agent_path}"})
 
@@ -462,8 +641,8 @@ def validate_agent_tools(agent_path: str) -> str:
     import importlib.util
     import sys
 
-    package_name = os.path.basename(resolved)
-    parent_dir = os.path.dirname(os.path.abspath(resolved))
+    package_name = os.path.basename(agent_dir)
+    parent_dir = os.path.dirname(os.path.abspath(agent_dir))
     if parent_dir not in sys.path:
         sys.path.insert(0, parent_dir)
 
@@ -1006,26 +1185,30 @@ def run_agent_tests(
         cmd.append("-x")
     cmd.append("--tb=short")
 
-    # Set PYTHONPATH
+    # Set PYTHONPATH (use pathsep for Windows)
     env = os.environ.copy()
     pythonpath = env.get("PYTHONPATH", "")
     core_path = os.path.join(PROJECT_ROOT, "core")
     exports_path = os.path.join(PROJECT_ROOT, "exports")
     fw_agents_path = os.path.join(PROJECT_ROOT, "core", "framework", "agents")
-    env["PYTHONPATH"] = f"{core_path}:{exports_path}:{fw_agents_path}:{PROJECT_ROOT}:{pythonpath}"
+    path_parts = [core_path, exports_path, fw_agents_path, PROJECT_ROOT]
+    if pythonpath:
+        path_parts.append(pythonpath)
+    env["PYTHONPATH"] = os.pathsep.join(path_parts)
 
+    # Use 90s so we return before MCP client's 120s timeout
     try:
         result = subprocess.run(
             cmd,
             capture_output=True,
             text=True,
-            timeout=120,
+            timeout=90,
             env=env,
         )
     except subprocess.TimeoutExpired:
         return json.dumps(
             {
-                "error": "Tests timed out after 120 seconds. A test may be hanging "
+                "error": "Tests timed out after 90 seconds. A test may be hanging "
                 "(e.g. a client-facing node waiting for stdin). Use mock mode "
                 "or add timeouts to async tests.",
                 "command": " ".join(cmd),
@@ -1144,7 +1327,7 @@ def main() -> None:
     register_file_tools(
         mcp,
         resolve_path=_resolve_path,
-        before_write=_take_snapshot,
+        before_write=None,  # Git snapshot causes stdio deadlock on Windows; undo_changes limited
         project_root=PROJECT_ROOT,
     )
 
diff --git a/tools/src/aden_tools/file_ops.py b/tools/src/aden_tools/file_ops.py
index 6c188c4b..3a27e264 100644
--- a/tools/src/aden_tools/file_ops.py
+++ b/tools/src/aden_tools/file_ops.py
@@ -323,19 +323,29 @@ def register_file_tools(
             content: Complete file content to write.
         """
         resolved = _resolve(path)
+        resolved_path = Path(resolved)
 
         try:
+            # Create parent dirs first (before git snapshot) so structure exists
+            resolved_path.parent.mkdir(parents=True, exist_ok=True)
             if before_write:
-                before_write()
+                try:
+                    before_write()
+                except Exception:
+                    # Don't block the write if git snapshot fails. Do NOT log here —
+                    # logging writes to stderr and can deadlock the MCP stdio pipe.
+                    pass
 
-            existed = os.path.isfile(resolved)
-            os.makedirs(os.path.dirname(resolved), exist_ok=True)
-            with open(resolved, "w", encoding="utf-8") as f:
-                f.write(content)
+            existed = resolved_path.is_file()
+            content_str = content if content is not None else ""
+            with open(resolved_path, "w", encoding="utf-8") as f:
+                f.write(content_str)
+                f.flush()
+                os.fsync(f.fileno())
 
-            line_count = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
+            line_count = content_str.count("\n") + (1 if content_str and not content_str.endswith("\n") else 0)
             action = "Updated" if existed else "Created"
-            return f"{action} {path} ({len(content):,} bytes, {line_count} lines)"
+            return f"{action} {path} ({len(content_str):,} bytes, {line_count} lines)"
         except Exception as e:
             return f"Error writing file: {e}"
 
@@ -510,7 +520,22 @@ def register_file_tools(
                 lines = []
                 for line in output.split("\n")[:SEARCH_RESULT_LIMIT]:
                     if project_root:
-                        line = line.replace(project_root + "/", "")
+                        # Platform-agnostic relativization: ripgrep may output
+                        # forward or backslash paths; normalize before relpath (Windows).
+                        match = re.match(r"^(.+):(\d+):", line)
+                        if match:
+                            path_part, line_num, rest = (
+                                match.group(1),
+                                match.group(2),
+                                line[match.end() :],
+                            )
+                            path_part = os.path.normpath(path_part.replace("/", os.sep))
+                            proj_norm = os.path.normpath(project_root.replace("/", os.sep))
+                            try:
+                                rel = os.path.relpath(path_part, proj_norm)
+                                line = f"{rel}:{line_num}:{rest}"
+                            except ValueError:
+                                pass
                     if len(line) > MAX_LINE_LENGTH:
                         line = line[:MAX_LINE_LENGTH] + "..."
                     lines.append(line)
@@ -538,7 +563,14 @@ def register_file_tools(
                     if include and not fnmatch.fnmatch(fname, include):
                         continue
                     fpath = os.path.join(root, fname)
-                    display_path = os.path.relpath(fpath, project_root) if project_root else fpath
+                    if project_root:
+                        proj_norm = os.path.normpath(project_root.replace("/", os.sep))
+                        try:
+                            display_path = os.path.relpath(fpath, proj_norm)
+                        except ValueError:
+                            display_path = fpath
+                    else:
+                        display_path = fpath
                     try:
                         with open(fpath, encoding="utf-8", errors="ignore") as f:
                             for i, line in enumerate(f, 1):
diff --git a/tools/tests/tools/test_file_ops.py b/tools/tests/tools/test_file_ops.py
new file mode 100644
index 00000000..e55d8536
--- /dev/null
+++ b/tools/tests/tools/test_file_ops.py
@@ -0,0 +1,145 @@
+"""Tests for aden_tools.file_ops (shared file tools).
+
+These tests cover Windows compatibility concerns: path relativization
+in search_files (ripgrep and Python fallback) and cross-platform behavior.
+"""
+
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from fastmcp import FastMCP
+
+from aden_tools.file_ops import register_file_tools
+
+
+@pytest.fixture
+def file_ops_mcp(tmp_path):
+    """Create FastMCP with file_ops registered, sandboxed to tmp_path."""
+
+    def resolve_path(p: str) -> str:
+        if os.path.isabs(p):
+            return os.path.normpath(p)
+        return str((tmp_path / p).resolve())
+
+    mcp = FastMCP("test-file-ops")
+    register_file_tools(
+        mcp,
+        resolve_path=resolve_path,
+        project_root=str(tmp_path),
+    )
+    return mcp
+
+
+def _get_tool_fn(mcp, name):
+    """Extract the raw function for a registered tool."""
+    return mcp._tool_manager._tools[name].fn
+
+
+class TestSearchFilesPathRelativization:
+    """Tests for search_files path handling (Windows path separator fix)."""
+
+    def test_ripgrep_output_with_backslash_relativized(self, file_ops_mcp, tmp_path):
+        """Ripgrep output using backslashes (Windows) should be relativized when project_root is set.
+
+        Simulates: rg outputs 'C:\\Users\\...\\proj\\src\\foo.py:1:needle'
+        Expected: output should show 'src\\foo.py:1:needle' or 'src/foo.py:1:needle'
+        (relativized, not full path).
+        """
+        # Create a file so the search has something to find
+        (tmp_path / "src").mkdir()
+        (tmp_path / "src" / "foo.py").write_text("needle\n")
+        project_root = str(tmp_path)
+
+        # Ripgrep on Windows outputs backslash-separated paths
+        # Format: path:line_num:content
+        rg_output = f"{project_root}{os.sep}src{os.sep}foo.py:1:needle"
+
+        search_fn = _get_tool_fn(file_ops_mcp, "search_files")
+
+        with patch("aden_tools.file_ops.subprocess.run") as mock_run:
+            mock_run.return_value = type(
+                "Result", (), {"returncode": 0, "stdout": rg_output, "stderr": ""}
+            )()
+
+            result = search_fn(
+                pattern="needle",
+                path=str(tmp_path),
+            )
+
+        # Output should be relativized (no full project_root in the line)
+        assert project_root not in result, (
+            f"Output should not contain full project_root. Got: {result!r}"
+        )
+        # Should contain the relative path part
+        assert "foo.py" in result
+        assert "1:" in result or ":1:" in result
+
+    def test_ripgrep_output_with_forward_slash_relativized(self, file_ops_mcp, tmp_path):
+        """Ripgrep output using forward slashes (Unix/rg default) should be relativized."""
+        (tmp_path / "src").mkdir()
+        (tmp_path / "src" / "bar.py").write_text("pattern_match\n")
+        project_root = str(tmp_path)
+
+        # Some ripgrep builds output forward slashes even on Windows
+        rg_output = f"{project_root}/src/bar.py:1:pattern_match"
+
+        search_fn = _get_tool_fn(file_ops_mcp, "search_files")
+
+        with patch("aden_tools.file_ops.subprocess.run") as mock_run:
+            mock_run.return_value = type(
+                "Result", (), {"returncode": 0, "stdout": rg_output, "stderr": ""}
+            )()
+
+            result = search_fn(
+                pattern="pattern_match",
+                path=str(tmp_path),
+            )
+
+        assert project_root not in result or "src/bar.py" in result
+        assert "bar.py" in result
+
+    def test_python_fallback_relativizes_paths(self, file_ops_mcp, tmp_path):
+        """Python fallback (no ripgrep) uses os.path.relpath - should work on all platforms."""
+        (tmp_path / "subdir").mkdir()
+        (tmp_path / "subdir" / "baz.txt").write_text("find_me\n")
+
+        search_fn = _get_tool_fn(file_ops_mcp, "search_files")
+
+        # Ensure ripgrep is not used
+        with patch("aden_tools.file_ops.subprocess.run", side_effect=FileNotFoundError()):
+            result = search_fn(
+                pattern="find_me",
+                path=str(tmp_path),
+            )
+
+        # Python fallback uses os.path.relpath - should produce relative path
+        project_root = str(tmp_path)
+        assert project_root not in result or "subdir" in result
+        assert "baz.txt" in result
+        assert "1:" in result or ":1:" in result
+
+
+class TestSearchFilesBasic:
+    """Basic search_files behavior (no path mocking)."""
+
+    def test_search_finds_content(self, file_ops_mcp, tmp_path):
+        """search_files finds matching content via Python fallback when rg absent."""
+        (tmp_path / "hello.txt").write_text("world\n")
+
+        search_fn = _get_tool_fn(file_ops_mcp, "search_files")
+
+        with patch("aden_tools.file_ops.subprocess.run", side_effect=FileNotFoundError()):
+            result = search_fn(pattern="world", path=str(tmp_path))
+
+        assert "world" in result
+        assert "hello.txt" in result
+
+    def test_search_nonexistent_dir_returns_error(self, file_ops_mcp, tmp_path):
+        """search_files on non-existent directory returns error."""
+        search_fn = _get_tool_fn(file_ops_mcp, "search_files")
+        result = search_fn(pattern="x", path=str(tmp_path / "nonexistent"))
+        assert "Error" in result
+        assert "not found" in result.lower()
diff --git a/tools/tests/tools/test_run_command_pythonpath.py b/tools/tests/tools/test_run_command_pythonpath.py
new file mode 100644
index 00000000..2724ffeb
--- /dev/null
+++ b/tools/tests/tools/test_run_command_pythonpath.py
@@ -0,0 +1,118 @@
+"""Tests for run_command PYTHONPATH handling (Windows compatibility).
+
+On Windows, PYTHONPATH must use semicolon (;) as separator, not colon (:).
+These tests verify the correct behavior. They are Windows-only because
+the bug only manifests on Windows.
+"""
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+# Skip entire module on non-Windows (tests will pass when fixes are applied)
+pytestmark = pytest.mark.skipif(
+    sys.platform != "win32",
+    reason="Windows-only: PYTHONPATH separator behavior",
+)
+
+
+def _build_pythonpath_buggy(project_root: str) -> str:
+    """Replicate current (buggy) PYTHONPATH construction in run_command."""
+    return (
+        f"{project_root}/core:{project_root}/exports"
+        f":{project_root}/core/framework/agents"
+    )
+
+
+def _build_pythonpath_fixed(project_root: str) -> str:
+    """Correct PYTHONPATH construction using os.pathsep."""
+    return os.pathsep.join(
+        [
+            os.path.join(project_root, "core"),
+            os.path.join(project_root, "exports"),
+            os.path.join(project_root, "core", "framework", "agents"),
+        ]
+    )
+
+
+class TestPythonpathSeparatorWindows:
+    """Verify PYTHONPATH uses correct separator on Windows."""
+
+    def test_pythonpath_with_semicolons_parses_multiple_paths(self, tmp_path):
+        """PYTHONPATH built with os.pathsep allows Python to find modules in multiple dirs."""
+        # Create two dirs, each with a module
+        core_dir = tmp_path / "core"
+        core_dir.mkdir()
+        (core_dir / "mod_a.py").write_text("x = 1\n")
+
+        exports_dir = tmp_path / "exports"
+        exports_dir.mkdir()
+        (exports_dir / "mod_b.py").write_text("y = 2\n")
+
+        pythonpath = os.pathsep.join([str(core_dir), str(exports_dir)])
+        env = {**os.environ, "PYTHONPATH": pythonpath}
+
+        # Python should find both when we add them to path
+        result = subprocess.run(
+            [
+                sys.executable,
+                "-c",
+                "import sys; "
+                "sys.path = [p for p in sys.path if 'mod_a' not in p and 'mod_b' not in p]; "
+                "import mod_a; import mod_b; print('ok')",
+            ],
+            env=env,
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            timeout=10,
+        )
+
+        assert result.returncode == 0, f"Stdout: {result.stdout} Stderr: {result.stderr}"
+        assert "ok" in result.stdout
+
+    def test_pythonpath_with_colons_fails_on_windows(self, tmp_path):
+        """PYTHONPATH built with colons (Unix style) fails on Windows - single path parsed."""
+        core_dir = tmp_path / "core"
+        core_dir.mkdir()
+        (core_dir / "mod_c.py").write_text("z = 3\n")
+
+        exports_dir = tmp_path / "exports"
+        exports_dir.mkdir()
+        (exports_dir / "mod_d.py").write_text("w = 4\n")
+
+        # Buggy: colon-separated (Unix style)
+        pythonpath = f"{tmp_path}/core:{tmp_path}/exports"
+        env = {**os.environ, "PYTHONPATH": pythonpath}
+
+        # On Windows, Python splits by ; only. The colon string is one invalid path.
+        result = subprocess.run(
+            [
+                sys.executable,
+                "-c",
+                "import sys; "
+                "pp = [p for p in sys.path if 'core' in p or 'exports' in p]; "
+                "import mod_c; import mod_d; print('ok')",
+            ],
+            env=env,
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            timeout=10,
+        )
+
+        # Should fail: Python won't parse multiple paths from colon-separated string
+        assert result.returncode != 0 or "ok" not in result.stdout
+
+    def test_fixed_pythonpath_construction_uses_pathsep(self, tmp_path):
+        """The fix pattern (os.pathsep.join) produces valid multi-path PYTHONPATH."""
+        project_root = str(tmp_path)
+        fixed = _build_pythonpath_fixed(project_root)
+
+        # On Windows, os.pathsep is ';'
+        assert os.pathsep in fixed, "Fixed PYTHONPATH must use os.pathsep on Windows"
+        # Three paths => two separators
+        assert fixed.count(os.pathsep) == 2