fix(ci): unblock main CI, sort imports + install Playwright Chromium (#7172)
* fix(lint): organize imports in queen_orchestrator.create_queen Ruff I001 blocks CI on every PR against main. The deferred imports inside create_queen were not in alphabetical order between the queen package and the framework package; ruff auto-fix moves framework.config below the framework.agents.queen.nodes block. No behavior change. * fix(ci): install Playwright Chromium before Test Tools job The new chart_tools smoke tests added infeabf327require a Chromium build for ECharts/Mermaid rendering, but the test-tools workflow only ran `uv sync` and went straight to pytest. Three tests (test_render_echarts_bar_chart, test_render_echarts_accepts_string_spec, test_render_mermaid_flowchart) crash on every PR with: BrowserType.launch: Executable doesn't exist at /home/runner/.cache/ms-playwright/chromium_headless_shell-1208/... Split the install/run into separate steps and add `playwright install chromium` before pytest. Use `--with-deps` on Linux to pull system libraries; Windows runners only need the browser binary. * fix(tests): adapt test_file_state_cache to new file_ops API The file_ops rewrite infeabf327dropped the standalone hashline_edit tool (the file_system_toolkits/hashline_edit/ directory was removed) and switched edit_file to a mode-first signature (mode, path, old_string, new_string, ...). The test fixture still tried to look up "hashline_edit" via the MCP tool manager and crashed with KeyError before any test could run, and the edit_file calls were positional in the old order so they hit "unknown mode 'e.py'" once the fixture was fixed. Drop the stale hashline_edit lookup and pass mode="replace" explicitly to every edit_file call. All 11 tests pass locally. * fix(tests): skip terminal_tools tests on Windows (POSIX-only) The new terminal_tools package added infeabf327imports the Unix-only `resource` module in tools/src/terminal_tools/common/limits.py to set RLIMIT_CPU / RLIMIT_AS / RLIMIT_FSIZE on subprocesses. Five of the six terminal_tools test files therefore crash on windows-latest with `ModuleNotFoundError: No module named 'resource'` once their fixtures trigger the import chain. test_terminal_tools_pty.py already has the right module-level skip (PTY is POSIX-only). Apply the same `pytestmark = skipif(win32)` to the other five so the whole suite skips cleanly on Windows. The terminal-tools package is bash-only by design (zsh refused at the shell-resolver level), so a Windows port is out of scope.
This commit is contained in:
@@ -84,11 +84,23 @@ jobs:
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
- name: Install dependencies and run tests
|
||||
- name: Install dependencies
|
||||
working-directory: tools
|
||||
run: |
|
||||
uv sync --extra dev
|
||||
uv run pytest tests/ -v
|
||||
run: uv sync --extra dev
|
||||
|
||||
- name: Install Playwright Chromium (Linux)
|
||||
if: runner.os == 'Linux'
|
||||
working-directory: tools
|
||||
run: uv run playwright install --with-deps chromium
|
||||
|
||||
- name: Install Playwright Chromium (Windows)
|
||||
if: runner.os == 'Windows'
|
||||
working-directory: tools
|
||||
run: uv run playwright install chromium
|
||||
|
||||
- name: Run tests
|
||||
working-directory: tools
|
||||
run: uv run pytest tests/ -v
|
||||
|
||||
validate:
|
||||
name: Validate Agent Exports
|
||||
|
||||
@@ -359,7 +359,6 @@ async def create_queen(
|
||||
queen_goal,
|
||||
queen_loop_config as _base_loop_config,
|
||||
)
|
||||
from framework.config import get_max_tokens as _get_max_tokens
|
||||
from framework.agents.queen.nodes import (
|
||||
_QUEEN_INCUBATING_TOOLS,
|
||||
_QUEEN_INDEPENDENT_TOOLS,
|
||||
@@ -378,6 +377,7 @@ async def create_queen(
|
||||
_queen_tools_working,
|
||||
finalize_queen_prompt,
|
||||
)
|
||||
from framework.config import get_max_tokens as _get_max_tokens
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
from framework.loader.mcp_registry import MCPRegistry
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
These tests cover the stale-edit guard added for Gap 4:
|
||||
- read_file records a per-file hash snapshot
|
||||
- edit_file / write_file / hashline_edit refuse to run when the on-disk
|
||||
file has diverged from the last recorded read
|
||||
- edit_file / write_file refuse to run when the on-disk file has
|
||||
diverged from the last recorded read
|
||||
- write_file is allowed without a prior read when the target doesn't
|
||||
exist yet (brand-new file, nothing to clobber)
|
||||
- re-recording after a successful write keeps chained edits working
|
||||
@@ -52,7 +52,6 @@ def tools(sandbox: Path):
|
||||
"read_file": _find_tool(mcp, "read_file"),
|
||||
"write_file": _find_tool(mcp, "write_file"),
|
||||
"edit_file": _find_tool(mcp, "edit_file"),
|
||||
"hashline_edit": _find_tool(mcp, "hashline_edit"),
|
||||
}
|
||||
|
||||
|
||||
@@ -129,7 +128,7 @@ def test_edit_file_refuses_without_prior_read(sandbox: Path, tools):
|
||||
# Clear the cache first so there's definitely no recorded read.
|
||||
file_state_cache.reset_all()
|
||||
|
||||
result = tools["edit_file"]("e.py", "hello", "world")
|
||||
result = tools["edit_file"]("replace", "e.py", "hello", "world")
|
||||
assert "Refusing to edit" in result
|
||||
assert "read_file" in result
|
||||
|
||||
@@ -140,7 +139,7 @@ def test_edit_file_proceeds_after_read(sandbox: Path, tools):
|
||||
file_state_cache.reset_all()
|
||||
|
||||
tools["read_file"]("f.py")
|
||||
result = tools["edit_file"]("f.py", "hello", "world")
|
||||
result = tools["edit_file"]("replace", "f.py", "hello", "world")
|
||||
assert "Replaced" in result
|
||||
assert target.read_text() == "print('world')\n"
|
||||
|
||||
@@ -157,7 +156,7 @@ def test_edit_file_refuses_when_file_changed_between_read_and_edit(sandbox: Path
|
||||
target.write_text("print('bye')\n")
|
||||
os.utime(str(target), None)
|
||||
|
||||
result = tools["edit_file"]("g.py", "hello", "world")
|
||||
result = tools["edit_file"]("replace", "g.py", "hello", "world")
|
||||
assert "Refusing to edit" in result
|
||||
assert "Re-read" in result
|
||||
|
||||
@@ -185,10 +184,10 @@ def test_chained_edits_in_same_turn_do_not_self_invalidate(sandbox: Path, tools)
|
||||
file_state_cache.reset_all()
|
||||
|
||||
tools["read_file"]("chained.py")
|
||||
r1 = tools["edit_file"]("chained.py", "a", "A")
|
||||
r1 = tools["edit_file"]("replace", "chained.py", "a", "A")
|
||||
assert "Replaced" in r1
|
||||
# Immediate second edit must NOT trip the stale guard because
|
||||
# edit_file re-records the post-write state.
|
||||
r2 = tools["edit_file"]("chained.py", "b", "B")
|
||||
r2 = tools["edit_file"]("replace", "chained.py", "b", "B")
|
||||
assert "Replaced" in r2
|
||||
assert target.read_text() == "print('A')\nprint('B')\n"
|
||||
|
||||
@@ -2,10 +2,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def exec_tool(mcp):
|
||||
|
||||
@@ -2,10 +2,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def job_tools(mcp):
|
||||
|
||||
@@ -3,9 +3,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def search_tools(mcp):
|
||||
|
||||
@@ -2,8 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
|
||||
|
||||
|
||||
def test_resolve_shell_rejects_zsh():
|
||||
from terminal_tools.common.limits import ZshRefused, _resolve_shell
|
||||
|
||||
@@ -2,6 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)")
|
||||
|
||||
EXPECTED_TOOLS = {
|
||||
"terminal_exec",
|
||||
"terminal_job_start",
|
||||
|
||||
Reference in New Issue
Block a user