feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- remove testing codeand prepare for PR

2026-03-04 14:41:42 -03:00
parent 8470c6a980
commit bc349e8fde
4 changed files with 36 additions and 67 deletions
@@ -141,8 +141,7 @@ errors yourself. Don't declare success until validation passes.
 - edit_file(path, old_text, new_text, replace_all?) — fuzzy-match edit
 - list_directory(path, recursive?) — list contents
 - search_files(pattern, path?, include?) — regex search
- run_command(command, cwd?, timeout?) — shell execution. Prefer \
-run_agent_tests for tests; run_command can timeout on long runs.
+- run_command(command, cwd?, timeout?) — shell execution.
 - undo_changes(path?) — restore from git snapshot

 ## Meta-Agent
@@ -182,11 +181,10 @@ for patterns:
  read_file("exports/{name}/nodes/__init__.py")

 ## Post-Build Testing
-**Prefer dedicated tools** — run_command can timeout (MCP). Use:
-  validate_agent_tools("exports/{name}")  # tool existence check
-  run_agent_tests("{name}")               # run pytest
-Avoid run_command for pytest or long validation. Structural checks via \
-run_command (validate, AgentRunner.load) are optional; they may timeout.
+After writing agent code, validate structurally AND run tests:
+  run_command("python -c 'from {name} import default_agent; \\
+    print(default_agent.validate())'")
+  run_agent_tests("{name}")

 ## Debugging Built Agents
 When a user says "my agent is failing" or "debug this agent":
@@ -513,40 +511,39 @@ triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \

 ## 5. Verify

-**Reliability note:** run_command can timeout (MCP/stdio limits). Do Steps 1 \
-and 2 first — they are reliable. Steps 3 and 4 use run_command and may timeout.
+Run FOUR validation steps after writing. All must pass:

-**Step 1 — Tool validation** (REQUIRED; reliable):
-```
-validate_agent_tools("exports/{name}")
-```
-Checks that declared tools exist in the agent's MCP servers. If any are \
-missing: fix node definitions. Run list_agent_tools() to see what's available.
-
-**Step 2 — Run tests** (REQUIRED; reliable):
-```
-run_agent_tests("{name}")
-```
-Runs pytest with proper timeouts. **Do NOT use run_command with pytest** — \
-it times out. Use run_agent_tests only.
-
-**Step 3 — Class validation** (optional; may timeout):
+**Step A — Class validation** (checks graph structure):
 ```
 run_command("python -c 'from {name} import default_agent; \\
  print(default_agent.validate())'")
 ```
-Structural check. Skip if it times out; Steps 1–2 and load_built_agent suffice.

-**Step 4 — Runner load test** (optional; may timeout):
+**Step B — Runner load test** (checks package export contract — \
+THIS IS THE SAME PATH THE TUI USES):
 ```
 run_command("python -c 'from framework.runner.runner import \\
  AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
  print(\"AgentRunner.load: OK\")'")
 ```
-Catches __init__.py exports, conversation_mode, loop_config. Skip if timeout; \
-load_built_agent will surface load errors when you load the agent.
+This catches missing __init__.py exports, bad conversation_mode, \
+invalid loop_config, and unreachable nodes. If Step A passes but \
+Step B fails, the problem is in __init__.py exports.

-If Steps 1 or 2 fail: read error, fix with edit_file, re-validate. Up to 3x.
+**Step C — Tool validation** (checks that declared tools actually exist \
+in the agent's MCP servers — catches hallucinated tool names):
+```
+validate_agent_tools("exports/{name}")
+```
+If any tools are missing: fix the node definitions to use only tools \
+that exist. Run list_agent_tools() to see what's available.
+
+**Step D — Run tests:**
+```
+run_agent_tests("{name}")
+```
+
+If anything fails: read error, fix with edit_file, re-validate. Up to 3x.

 **CRITICAL: Testing forever-alive agents**
 Most agents use `terminal_nodes=[]` (forever-alive). This means \
@@ -622,8 +619,8 @@ critical issue. Use sparingly.
 ## Agent Loading
 - load_built_agent(agent_path) — Load a newly built agent as the worker in \
 this session. If a worker is already loaded, it is automatically unloaded \
-first. **Call in a separate turn** after write_file, validate_agent_tools, and \
-run_agent_tests have finished — never in the same batch, or files may \
+first. **Call in a separate turn** after write_file, run_command, and \
+validate_agent_tools have finished — never in the same batch, or files may \
 not exist yet.

 ## Credentials
@@ -768,8 +765,8 @@ When the user asks to change, modify, or update the loaded worker \
 1. Use the **Path** from the Worker Profile to locate the agent files.
 2. Read the relevant files (nodes/__init__.py, agent.py, etc.).
 3. Make the requested changes using edit_file / write_file.
-4. Run validation: validate_agent_tools(path), run_agent_tests(name). \
-Avoid run_command for validation — it can timeout.
+4. Run validation (default_agent.validate(), AgentRunner.load(), \
+validate_agent_tools()).
 5. **Reload the modified worker**: call load_built_agent("{path}") \
 so the changes take effect immediately. If a worker is already loaded, \
 stop it first, then reload.
@@ -9,7 +9,6 @@ import logging
 import os
 import sys
 import threading
-from concurrent.futures import TimeoutError as FuturesTimeoutError
 from dataclasses import dataclass, field
 from typing import Any, Literal

@@ -94,20 +93,7 @@ class MCPClient:
            # Check if loop is running AND not closed
            if self._loop.is_running() and not self._loop.is_closed():
                future = asyncio.run_coroutine_threadsafe(coro, self._loop)
-                try:
-                    return future.result(timeout=120)
-                except (TimeoutError, FuturesTimeoutError):
-                    logger.error(
-                        "MCP tool call timed out after 120s — connection may be dead."
-                    )
-                    try:
-                        self.disconnect()
-                    except Exception as e:
-                        logger.debug("Disconnect after timeout: %s", e)
-                    raise RuntimeError(
-                        "MCP tool call timed out. The server may have crashed or become unresponsive. "
-                        "Try again — the connection will be re-established."
-                    ) from None
+                return future.result()
            # else: fall through to the standard approach below
            # This handles the case when STDIO loop exists but is stopped/closed

@@ -210,8 +196,7 @@ class MCPClient:
                        from mcp.client.stdio import stdio_client

                        # Create persistent stdio client context.
-                        # On Windows, use stderr so subprocess startup errors are visible
-                        # when debugging "Connection closed" / MCP registration failures.
+                        # On Windows, use stderr so subprocess startup errors are visible.
                        if os.name == "nt":
                            errlog = sys.stderr
                        else:
@@ -1119,11 +1119,6 @@ class AgentRunner:
                gcu_config = dict(GCU_MCP_SERVER_CONFIG)
                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                gcu_config["cwd"] = str(_repo_root / "tools")
-
-                #gcu_config = self._tool_registry._resolve_mcp_server_config(
-                #    dict(GCU_MCP_SERVER_CONFIG), self.agent_path
-                #)
-
                self._tool_registry.register_mcp_server(gcu_config)
                gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)

@@ -1147,11 +1142,6 @@ class AgentRunner:
                files_config = dict(FILES_MCP_SERVER_CONFIG)
                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                files_config["cwd"] = str(_repo_root / "tools")
-
-#                files_config = self._tool_registry._resolve_mcp_server_config(
-#                    dict(FILES_MCP_SERVER_CONFIG), self.agent_path
-#                )
-
                self._tool_registry.register_mcp_server(files_config)
                files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)

@@ -204,8 +204,6 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
        Combined stdout/stderr with exit code
    """
    timeout = min(timeout, 300)
-    # Use lower internal timeout so we return before MCP client's 120s timeout
-    cmd_timeout = min(timeout, 90)
    work_dir = _resolve_path(cwd) if cwd else PROJECT_ROOT

    try:
@@ -217,7 +215,7 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
            cwd=work_dir,
            capture_output=True,
            text=True,
-            timeout=cmd_timeout,
+            timeout=timeout,
            env={
                **os.environ,
                "PYTHONPATH": os.pathsep.join(
@@ -250,7 +248,7 @@ def run_command(command: str, cwd: str = "", timeout: int = 120) -> str:
        return output
    except subprocess.TimeoutExpired:
        return (
-            f"Error: Command timed out after {cmd_timeout}s. "
+            f"Error: Command timed out after {timeout}s. "
            "Consider breaking it into smaller operations."
        )
    except Exception as e:
@@ -1196,19 +1194,18 @@ def run_agent_tests(
        path_parts.append(pythonpath)
    env["PYTHONPATH"] = os.pathsep.join(path_parts)

-    # Use 90s so we return before MCP client's 120s timeout
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
-            timeout=90,
+            timeout=120,
            env=env,
        )
    except subprocess.TimeoutExpired:
        return json.dumps(
            {
-                "error": "Tests timed out after 90 seconds. A test may be hanging "
+                "error": "Tests timed out after 120 seconds. A test may be hanging "
                "(e.g. a client-facing node waiting for stdin). Use mock mode "
                "or add timeouts to async tests.",
                "command": " ".join(cmd),