diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff81b9b0..1ed8529d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,11 +84,23 @@ jobs: with: enable-cache: true - - name: Install dependencies and run tests + - name: Install dependencies working-directory: tools - run: | - uv sync --extra dev - uv run pytest tests/ -v + run: uv sync --extra dev + + - name: Install Playwright Chromium (Linux) + if: runner.os == 'Linux' + working-directory: tools + run: uv run playwright install --with-deps chromium + + - name: Install Playwright Chromium (Windows) + if: runner.os == 'Windows' + working-directory: tools + run: uv run playwright install chromium + + - name: Run tests + working-directory: tools + run: uv run pytest tests/ -v validate: name: Validate Agent Exports diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py index 57301b2e..a8da97d5 100644 --- a/core/framework/server/queen_orchestrator.py +++ b/core/framework/server/queen_orchestrator.py @@ -359,7 +359,6 @@ async def create_queen( queen_goal, queen_loop_config as _base_loop_config, ) - from framework.config import get_max_tokens as _get_max_tokens from framework.agents.queen.nodes import ( _QUEEN_INCUBATING_TOOLS, _QUEEN_INDEPENDENT_TOOLS, @@ -378,6 +377,7 @@ async def create_queen( _queen_tools_working, finalize_queen_prompt, ) + from framework.config import get_max_tokens as _get_max_tokens from framework.host.event_bus import AgentEvent, EventType from framework.llm.capabilities import supports_image_tool_results from framework.loader.mcp_registry import MCPRegistry diff --git a/tools/tests/test_file_state_cache.py b/tools/tests/test_file_state_cache.py index 2f4e1d78..d1bc5ddc 100644 --- a/tools/tests/test_file_state_cache.py +++ b/tools/tests/test_file_state_cache.py @@ -2,8 +2,8 @@ These tests cover the stale-edit guard added for Gap 4: - read_file records a per-file hash snapshot -- edit_file / write_file / hashline_edit refuse to run when the on-disk - file has diverged from the last recorded read +- edit_file / write_file refuse to run when the on-disk file has + diverged from the last recorded read - write_file is allowed without a prior read when the target doesn't exist yet (brand-new file, nothing to clobber) - re-recording after a successful write keeps chained edits working @@ -52,7 +52,6 @@ def tools(sandbox: Path): "read_file": _find_tool(mcp, "read_file"), "write_file": _find_tool(mcp, "write_file"), "edit_file": _find_tool(mcp, "edit_file"), - "hashline_edit": _find_tool(mcp, "hashline_edit"), } @@ -129,7 +128,7 @@ def test_edit_file_refuses_without_prior_read(sandbox: Path, tools): # Clear the cache first so there's definitely no recorded read. file_state_cache.reset_all() - result = tools["edit_file"]("e.py", "hello", "world") + result = tools["edit_file"]("replace", "e.py", "hello", "world") assert "Refusing to edit" in result assert "read_file" in result @@ -140,7 +139,7 @@ def test_edit_file_proceeds_after_read(sandbox: Path, tools): file_state_cache.reset_all() tools["read_file"]("f.py") - result = tools["edit_file"]("f.py", "hello", "world") + result = tools["edit_file"]("replace", "f.py", "hello", "world") assert "Replaced" in result assert target.read_text() == "print('world')\n" @@ -157,7 +156,7 @@ def test_edit_file_refuses_when_file_changed_between_read_and_edit(sandbox: Path target.write_text("print('bye')\n") os.utime(str(target), None) - result = tools["edit_file"]("g.py", "hello", "world") + result = tools["edit_file"]("replace", "g.py", "hello", "world") assert "Refusing to edit" in result assert "Re-read" in result @@ -185,10 +184,10 @@ def test_chained_edits_in_same_turn_do_not_self_invalidate(sandbox: Path, tools) file_state_cache.reset_all() tools["read_file"]("chained.py") - r1 = tools["edit_file"]("chained.py", "a", "A") + r1 = tools["edit_file"]("replace", "chained.py", "a", "A") assert "Replaced" in r1 # Immediate second edit must NOT trip the stale guard because # edit_file re-records the post-write state. - r2 = tools["edit_file"]("chained.py", "b", "B") + r2 = tools["edit_file"]("replace", "chained.py", "b", "B") assert "Replaced" in r2 assert target.read_text() == "print('A')\nprint('B')\n" diff --git a/tools/tests/test_terminal_tools_exec.py b/tools/tests/test_terminal_tools_exec.py index 44526815..12ea33fc 100644 --- a/tools/tests/test_terminal_tools_exec.py +++ b/tools/tests/test_terminal_tools_exec.py @@ -2,10 +2,13 @@ from __future__ import annotations +import sys import time import pytest +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)") + @pytest.fixture def exec_tool(mcp): diff --git a/tools/tests/test_terminal_tools_jobs.py b/tools/tests/test_terminal_tools_jobs.py index f654eeaf..c5048910 100644 --- a/tools/tests/test_terminal_tools_jobs.py +++ b/tools/tests/test_terminal_tools_jobs.py @@ -2,10 +2,13 @@ from __future__ import annotations +import sys import time import pytest +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)") + @pytest.fixture def job_tools(mcp): diff --git a/tools/tests/test_terminal_tools_search.py b/tools/tests/test_terminal_tools_search.py index c8336991..40201395 100644 --- a/tools/tests/test_terminal_tools_search.py +++ b/tools/tests/test_terminal_tools_search.py @@ -3,9 +3,12 @@ from __future__ import annotations import shutil +import sys import pytest +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)") + @pytest.fixture def search_tools(mcp): diff --git a/tools/tests/test_terminal_tools_security.py b/tools/tests/test_terminal_tools_security.py index 5401d109..a4d6d9d0 100644 --- a/tools/tests/test_terminal_tools_security.py +++ b/tools/tests/test_terminal_tools_security.py @@ -2,8 +2,12 @@ from __future__ import annotations +import sys + import pytest +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)") + def test_resolve_shell_rejects_zsh(): from terminal_tools.common.limits import ZshRefused, _resolve_shell diff --git a/tools/tests/test_terminal_tools_smoke.py b/tools/tests/test_terminal_tools_smoke.py index 7282aae0..23354bce 100644 --- a/tools/tests/test_terminal_tools_smoke.py +++ b/tools/tests/test_terminal_tools_smoke.py @@ -2,6 +2,12 @@ from __future__ import annotations +import sys + +import pytest + +pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="terminal_tools is POSIX-only (uses resource module)") + EXPECTED_TOOLS = { "terminal_exec", "terminal_job_start",