From 8cb0531959e47dc3954dcc33872adc84ee883926 Mon Sep 17 00:00:00 2001
From: Hundao <38208494+Hundao@users.noreply.github.com>
Date: Tue, 5 May 2026 00:32:59 +0800
Subject: [PATCH] fix(ci): unblock main CI, sort imports + install Playwright
 Chromium (#7172)

* fix(lint): organize imports in queen_orchestrator.create_queen

Ruff I001 blocks CI on every PR against main. The deferred imports
inside create_queen were not in alphabetical order between the queen
package and the framework package; ruff auto-fix moves
framework.config below the framework.agents.queen.nodes block.

No behavior change.

* fix(ci): install Playwright Chromium before Test Tools job

The new chart_tools smoke tests added in feabf327 require a Chromium
build for ECharts/Mermaid rendering, but the test-tools workflow only
ran `uv sync` and went straight to pytest. Three tests
(test_render_echarts_bar_chart, test_render_echarts_accepts_string_spec,
test_render_mermaid_flowchart) crash on every PR with:

    BrowserType.launch: Executable doesn't exist at
    /home/runner/.cache/ms-playwright/chromium_headless_shell-1208/...

Split the install/run into separate steps and add `playwright install
chromium` before pytest. Use `--with-deps` on Linux to pull system
libraries; Windows runners only need the browser binary.

* fix(tests): adapt test_file_state_cache to new file_ops API

The file_ops rewrite in feabf327 dropped the standalone hashline_edit
tool (the file_system_toolkits/hashline_edit/ directory was removed)
and switched edit_file to a mode-first signature
(mode, path, old_string, new_string, ...).

The test fixture still tried to look up "hashline_edit" via the MCP
tool manager and crashed with KeyError before any test could run, and
the edit_file calls were positional in the old order so they hit
"unknown mode 'e.py'" once the fixture was fixed.

Drop the stale hashline_edit lookup and pass mode="replace" explicitly
to every edit_file call. All 11 tests pass locally.

* fix(tests): skip terminal_tools tests on Windows (POSIX-only)

The new terminal_tools package added in feabf327 imports the Unix-only
`resource` module in tools/src/terminal_tools/common/limits.py to set
RLIMIT_CPU / RLIMIT_AS / RLIMIT_FSIZE on subprocesses. Five of the
six terminal_tools test files therefore crash on windows-latest with
`ModuleNotFoundError: No module named 'resource'` once their fixtures
trigger the import chain.

test_terminal_tools_pty.py already has the right module-level skip
(PTY is POSIX-only). Apply the same `pytestmark = skipif(win32)` to
the other five so the whole suite skips cleanly on Windows. The
terminal-tools package is bash-only by design (zsh refused at the
shell-resolver level), so a Windows port is out of scope.
---
 .github/workflows/ci.yml                    | 20 ++++++++++++++++----
 core/framework/server/queen_orchestrator.py |  2 +-
 tools/tests/test_file_state_cache.py        | 15 +++++++--------
 tools/tests/test_terminal_tools_exec.py     |  3 +++
 tools/tests/test_terminal_tools_jobs.py     |  3 +++
 tools/tests/test_terminal_tools_search.py   |  3 +++
 tools/tests/test_terminal_tools_security.py |  4 ++++
 tools/tests/test_terminal_tools_smoke.py    |  6 ++++++
 8 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ff81b9b0..1ed8529d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -84,11 +84,23 @@ jobs:
         with:
           enable-cache: true
 
-      - name: Install dependencies and run tests
+      - name: Install dependencies
         working-directory: tools
-        run: |
-          uv sync --extra dev
-          uv run pytest tests/ -v
+        run: uv sync --extra dev
+
+      - name: Install Playwright Chromium (Linux)
+        if: runner.os == 'Linux'
+        working-directory: tools
+        run: uv run playwright install --with-deps chromium
+
+      - name: Install Playwright Chromium (Windows)
+        if: runner.os == 'Windows'
+        working-directory: tools
+        run: uv run playwright install chromium
+
+      - name: Run tests
+        working-directory: tools
+        run: uv run pytest tests/ -v
 
   validate:
     name: Validate Agent Exports
diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py
index 57301b2e..a8da97d5 100644
--- a/core/framework/server/queen_orchestrator.py
+++ b/core/framework/server/queen_orchestrator.py
@@ -359,7 +359,6 @@ async def create_queen(
         queen_goal,
         queen_loop_config as _base_loop_config,
     )
-    from framework.config import get_max_tokens as _get_max_tokens
     from framework.agents.queen.nodes import (
         _QUEEN_INCUBATING_TOOLS,
         _QUEEN_INDEPENDENT_TOOLS,
@@ -378,6 +377,7 @@ async def create_queen(
         _queen_tools_working,
         finalize_queen_prompt,
     )
+    from framework.config import get_max_tokens as _get_max_tokens
     from framework.host.event_bus import AgentEvent, EventType
     from framework.llm.capabilities import supports_image_tool_results
     from framework.loader.mcp_registry import MCPRegistry
diff --git a/tools/tests/test_file_state_cache.py b/tools/tests/test_file_state_cache.py
index 2f4e1d78..d1bc5ddc 100644
--- a/tools/tests/test_file_state_cache.py
+++ b/tools/tests/test_file_state_cache.py
@@ -2,8 +2,8 @@
 
 These tests cover the stale-edit guard added for Gap 4:
 - read_file records a per-file hash snapshot
-- edit_file / write_file / hashline_edit refuse to run when the on-disk
-  file has diverged from the last recorded read
+- edit_file / write_file refuse to run when the on-disk file has
+  diverged from the last recorded read
 - write_file is allowed without a prior read when the target doesn't
   exist yet (brand-new file, nothing to clobber)
 - re-recording after a successful write keeps chained edits working
@@ -52,7 +52,6 @@ def tools(sandbox: Path):
         "read_file": _find_tool(mcp, "read_file"),
         "write_file": _find_tool(mcp, "write_file"),
         "edit_file": _find_tool(mcp, "edit_file"),
-        "hashline_edit": _find_tool(mcp, "hashline_edit"),
     }
 
 
@@ -129,7 +128,7 @@ def test_edit_file_refuses_without_prior_read(sandbox: Path, tools):
     # Clear the cache first so there's definitely no recorded read.
     file_state_cache.reset_all()
 
-    result = tools["edit_file"]("e.py", "hello", "world")
+    result = tools["edit_file"]("replace", "e.py", "hello", "world")
     assert "Refusing to edit" in result
     assert "read_file" in result
 
@@ -140,7 +139,7 @@ def test_edit_file_proceeds_after_read(sandbox: Path, tools):
     file_state_cache.reset_all()
 
     tools["read_file"]("f.py")
-    result = tools["edit_file"]("f.py", "hello", "world")
+    result = tools["edit_file"]("replace", "f.py", "hello", "world")
     assert "Replaced" in result
     assert target.read_text() == "print('world')\n"
 
@@ -157,7 +156,7 @@ def test_edit_file_refuses_when_file_changed_between_read_and_edit(sandbox: Path
     target.write_text("print('bye')\n")
     os.utime(str(target), None)
 
-    result = tools["edit_file"]("g.py", "hello", "world")
+    result = tools["edit_file"]("replace", "g.py", "hello", "world")
     assert "Refusing to edit" in result
     assert "Re-read" in result
 
@@ -185,10 +184,10 @@ def test_chained_edits_in_same_turn_do_not_self_invalidate(sandbox: Path, tools)
     file_state_cache.reset_all()
 
     tools["read_file"]("chained.py")
-    r1 = tools["edit_file"]("chained.py", "a", "A")
+    r1 = tools["edit_file"]("replace", "chained.py", "a", "A")
     assert "Replaced" in r1
     # Immediate second edit must NOT trip the stale guard because
     # edit_file re-records the post-write state.
-    r2 = tools["edit_file"]("chained.py", "b", "B")
+    r2 = tools["edit_file"]("replace", "chained.py", "b", "B")
     assert "Replaced" in r2
     assert target.read_text() == "print('A')\nprint('B')\n"
diff --git a/tools/tests/test_terminal_tools_exec.py b/tools/tests/test_terminal_tools_exec.py
index 44526815..12ea33fc 100644
--- a/tools/tests/test_terminal_tools_exec.py
+++ b/tools/tests/test_terminal_tools_exec.py
@@ -2,10 +2,13 @@
 
 from __future__ import annotations
 
+import sys
 import time
 
 import pytest
 
+pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
+
 
 @pytest.fixture
 def exec_tool(mcp):
diff --git a/tools/tests/test_terminal_tools_jobs.py b/tools/tests/test_terminal_tools_jobs.py
index f654eeaf..c5048910 100644
--- a/tools/tests/test_terminal_tools_jobs.py
+++ b/tools/tests/test_terminal_tools_jobs.py
@@ -2,10 +2,13 @@
 
 from __future__ import annotations
 
+import sys
 import time
 
 import pytest
 
+pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
+
 
 @pytest.fixture
 def job_tools(mcp):
diff --git a/tools/tests/test_terminal_tools_search.py b/tools/tests/test_terminal_tools_search.py
index c8336991..40201395 100644
--- a/tools/tests/test_terminal_tools_search.py
+++ b/tools/tests/test_terminal_tools_search.py
@@ -3,9 +3,12 @@
 from __future__ import annotations
 
 import shutil
+import sys
 
 import pytest
 
+pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
+
 
 @pytest.fixture
 def search_tools(mcp):
diff --git a/tools/tests/test_terminal_tools_security.py b/tools/tests/test_terminal_tools_security.py
index 5401d109..a4d6d9d0 100644
--- a/tools/tests/test_terminal_tools_security.py
+++ b/tools/tests/test_terminal_tools_security.py
@@ -2,8 +2,12 @@
 
 from __future__ import annotations
 
+import sys
+
 import pytest
 
+pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
+
 
 def test_resolve_shell_rejects_zsh():
     from terminal_tools.common.limits import ZshRefused, _resolve_shell
diff --git a/tools/tests/test_terminal_tools_smoke.py b/tools/tests/test_terminal_tools_smoke.py
index 7282aae0..23354bce 100644
--- a/tools/tests/test_terminal_tools_smoke.py
+++ b/tools/tests/test_terminal_tools_smoke.py
@@ -2,6 +2,12 @@
 
 from __future__ import annotations
 
+import sys
+
+import pytest
+
+pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
+
 EXPECTED_TOOLS = {
     "terminal_exec",
     "terminal_job_start",