fix: systematically enforce UTF-8 encoding across tools and core to fix Windows charmap decode errors

2026-03-04 16:04:53 +02:00
parent e1a490756e
commit 49f8fae0b4
24 changed files with 86 additions and 44 deletions
@@ -78,3 +78,4 @@ core/tests/*dumps/*

 screenshots/*

+.gemini/*
@@ -10,7 +10,7 @@ def _load_preferred_model() -> str:
    config_path = Path.home() / ".hive" / "configuration.json"
    if config_path.exists():
        try:
-            with open(config_path) as f:
+            with open(config_path, encoding="utf-8") as f:
                config = json.load(f)
            llm = config.get("llm", {})
            if llm.get("provider") and llm.get("model"):
@@ -7,11 +7,11 @@ from framework.graph import NodeSpec
 # Load reference docs at import time so they're always in the system prompt.
 # No voluntary read_file() calls needed — the LLM gets everything upfront.
 _ref_dir = Path(__file__).parent.parent / "reference"
-_framework_guide = (_ref_dir / "framework_guide.md").read_text()
-_file_templates = (_ref_dir / "file_templates.md").read_text()
-_anti_patterns = (_ref_dir / "anti_patterns.md").read_text()
+_framework_guide = (_ref_dir / "framework_guide.md").read_text(encoding="utf-8")
+_file_templates = (_ref_dir / "file_templates.md").read_text(encoding="utf-8")
+_anti_patterns = (_ref_dir / "anti_patterns.md").read_text(encoding="utf-8")
 _gcu_guide_path = _ref_dir / "gcu_guide.md"
-_gcu_guide = _gcu_guide_path.read_text() if _gcu_guide_path.exists() else ""
+_gcu_guide = _gcu_guide_path.read_text(encoding="utf-8") if _gcu_guide_path.exists() else ""


 def _is_gcu_enabled() -> bool:
@@ -660,7 +660,7 @@ class GraphBuilder:
        # Generate Python code
        code = self._generate_code(graph)

-        Path(path).write_text(code)
+        Path(path).write_text(code, encoding="utf-8")
        self.session.phase = BuildPhase.EXPORTED
        self._save_session()

@@ -754,7 +754,7 @@ class GraphBuilder:
        """Save session to disk."""
        self.session.updated_at = datetime.now()
        path = self.storage_path / f"{self.session.id}.json"
-        path.write_text(self.session.model_dump_json(indent=2))
+        path.write_text(self.session.model_dump_json(indent=2), encoding="utf-8")

    def _load_session(self, session_id: str) -> BuildSession:
        """Load session from disk."""
@@ -69,7 +69,7 @@ def save_credential_key(key: str) -> Path:
    # Restrict the secrets directory itself
    path.parent.chmod(stat.S_IRWXU)  # 0o700

-    path.write_text(key)
+    path.write_text(key, encoding="utf-8")
    path.chmod(stat.S_IRUSR | stat.S_IWUSR)  # 0o600

    os.environ[CREDENTIAL_KEY_ENV_VAR] = key
@@ -568,7 +568,7 @@ def _load_nodes_from_python_agent(agent_path: Path) -> list:
 def _load_nodes_from_json_agent(agent_json: Path) -> list:
    """Load nodes from a JSON-based agent."""
    try:
-        with open(agent_json) as f:
+        with open(agent_json, encoding="utf-8") as f:
            data = json.load(f)

        from framework.graph import NodeSpec
@@ -227,7 +227,7 @@ class EncryptedFileStorage(CredentialStorage):
        index_path = self.base_path / "metadata" / "index.json"
        if not index_path.exists():
            return []
-        with open(index_path) as f:
+        with open(index_path, encoding="utf-8") as f:
            index = json.load(f)
        return list(index.get("credentials", {}).keys())

@@ -268,7 +268,7 @@ class EncryptedFileStorage(CredentialStorage):
        index_path = self.base_path / "metadata" / "index.json"

        if index_path.exists():
-            with open(index_path) as f:
+            with open(index_path, encoding="utf-8") as f:
                index = json.load(f)
        else:
            index = {"credentials": {}, "version": "1.0"}
@@ -283,7 +283,7 @@ class EncryptedFileStorage(CredentialStorage):

        index["last_modified"] = datetime.now(UTC).isoformat()

-        with open(index_path, "w") as f:
+        with open(index_path, "w", encoding="utf-8") as f:
            json.dump(index, f, indent=2)


@@ -170,7 +170,7 @@ def _dump_failed_request(
        "temperature": kwargs.get("temperature"),
    }

-    with open(filepath, "w") as f:
+    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(dump_data, f, indent=2, default=str)

    return str(filepath)
@@ -2894,6 +2894,7 @@ def run_tests(
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=600,  # 10 minute timeout
@@ -3085,6 +3086,7 @@ def debug_test(
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=120,  # 2 minute timeout for single test
@@ -1932,10 +1932,18 @@ def _open_browser(url: str) -> None:

    try:
        if sys.platform == "darwin":
-            subprocess.Popen(["open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            subprocess.Popen(
+                ["open", url],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                encoding="utf-8",
+            )
        elif sys.platform == "linux":
            subprocess.Popen(
-                ["xdg-open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+                ["xdg-open", url],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                encoding="utf-8",
            )
    except Exception:
        pass  # Best-effort — don't crash if browser can't open
@@ -1980,12 +1988,14 @@ def _build_frontend() -> bool:
        # Ensure deps are installed
        subprocess.run(
            ["npm", "install", "--no-fund", "--no-audit"],
+            encoding="utf-8",
            cwd=frontend_dir,
            check=True,
            capture_output=True,
        )
        subprocess.run(
            ["npm", "run", "build"],
+            encoding="utf-8",
            cwd=frontend_dir,
            check=True,
            capture_output=True,
@@ -96,7 +96,7 @@ def _save_refreshed_credentials(token_data: dict) -> None:
        return

    try:
-        with open(CLAUDE_CREDENTIALS_FILE) as f:
+        with open(CLAUDE_CREDENTIALS_FILE, encoding="utf-8") as f:
            creds = json.load(f)

        oauth = creds.get("claudeAiOauth", {})
@@ -212,7 +212,7 @@ def _read_codex_keychain() -> dict | None:
                "-w",
            ],
            capture_output=True,
-            text=True,
+            encoding="utf-8",
            timeout=5,
        )
        if result.returncode != 0:
@@ -270,10 +270,10 @@ def _edit_test_code(code: str) -> str:

    try:
        # Open editor
-        subprocess.run([editor, temp_path], check=True)
+        subprocess.run([editor, temp_path], check=True, encoding="utf-8")

        # Read edited code
-        with open(temp_path) as f:
+        with open(temp_path, encoding="utf-8") as f:
            return f.read()
    except subprocess.CalledProcessError:
        print("Editor failed, keeping original code")
@@ -190,6 +190,7 @@ def cmd_test_run(args: argparse.Namespace) -> int:
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            env=env,
            timeout=600,  # 10 minute timeout
        )
@@ -248,6 +249,7 @@ def cmd_test_debug(args: argparse.Namespace) -> int:
    try:
        result = subprocess.run(
            cmd,
+            encoding="utf-8",
            env=env,
            timeout=120,  # 2 minute timeout for single test
        )
@@ -256,7 +256,7 @@ class AdenTUI(App):
        """Override to use native `open` for file:// URLs on macOS."""
        if url.startswith("file://") and platform.system() == "Darwin":
            path = url.removeprefix("file://")
-            subprocess.Popen(["open", path])
+            subprocess.Popen(["open", path], encoding="utf-8")
        else:
            super().open_url(url, new_tab=new_tab)

@@ -488,7 +488,7 @@ class ChatRepl(Vertical):
                if not state_file.exists():
                    continue

-                with open(state_file) as f:
+                with open(state_file, encoding="utf-8") as f:
                    state = json.load(f)

                status = state.get("status", "").lower()
@@ -547,7 +547,7 @@ class ChatRepl(Vertical):

            # Read session state
            try:
-                with open(state_file) as f:
+                with open(state_file, encoding="utf-8") as f:
                    state = json.load(f)

                # Track this session for /resume <number> lookup
@@ -599,7 +599,7 @@ class ChatRepl(Vertical):
        try:
            import json

-            with open(state_file) as f:
+            with open(state_file, encoding="utf-8") as f:
                state = json.load(f)

            # Basic info
@@ -640,7 +640,7 @@ class ChatRepl(Vertical):
                    # Load and show checkpoints
                    for i, cp_file in enumerate(checkpoint_files[-5:], 1):  # Last 5
                        try:
-                            with open(cp_file) as f:
+                            with open(cp_file, encoding="utf-8") as f:
                                cp_data = json.load(f)

                            cp_id = cp_data.get("checkpoint_id", cp_file.stem)
@@ -687,7 +687,7 @@ class ChatRepl(Vertical):

            import json

-            with open(state_file) as f:
+            with open(state_file, encoding="utf-8") as f:
                state = json.load(f)

            # Resume from session state (not checkpoint)
@@ -1112,7 +1112,7 @@ class ChatRepl(Vertical):
                    continue

                try:
-                    with open(state_file) as f:
+                    with open(state_file, encoding="utf-8") as f:
                        state = json.load(f)

                    status = state.get("status", "").lower()
@@ -38,6 +38,7 @@ def _linux_file_dialog() -> subprocess.CompletedProcess | None:
                "--title=Select a PDF file",
                "--file-filter=PDF files (*.pdf)|*.pdf",
            ],
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=300,
@@ -54,6 +55,7 @@ def _linux_file_dialog() -> subprocess.CompletedProcess | None:
                ".",
                "PDF files (*.pdf)",
            ],
+            encoding="utf-8",
            capture_output=True,
            text=True,
            timeout=300,
@@ -79,6 +81,7 @@ def _pick_pdf_subprocess() -> Path | None:
                    'POSIX path of (choose file of type {"com.adobe.pdf"} '
                    'with prompt "Select a PDF file")',
                ],
+                encoding="utf-8",
                capture_output=True,
                text=True,
                timeout=300,
@@ -93,6 +96,7 @@ def _pick_pdf_subprocess() -> Path | None:
            )
            result = subprocess.run(
                ["powershell", "-NoProfile", "-Command", ps_script],
+                encoding="utf-8",
                capture_output=True,
                text=True,
                timeout=300,
@@ -199,10 +199,11 @@ def _copy_to_clipboard(text: str) -> None:
    """Copy text to system clipboard using platform-native tools."""
    try:
        if sys.platform == "darwin":
-            subprocess.run(["pbcopy"], input=text.encode(), check=True, timeout=5)
+            subprocess.run(["pbcopy"], encoding="utf-8", input=text.encode(), check=True, timeout=5)
        elif sys.platform == "win32":
            subprocess.run(
                ["clip.exe"],
+                encoding="utf-8",
                input=text.encode("utf-16le"),
                check=True,
                timeout=5,
@@ -211,6 +212,7 @@ def _copy_to_clipboard(text: str) -> None:
            try:
                subprocess.run(
                    ["xclip", "-selection", "clipboard"],
+                    encoding="utf-8",
                    input=text.encode(),
                    check=True,
                    timeout=5,
@@ -218,6 +220,7 @@ def _copy_to_clipboard(text: str) -> None:
            except (subprocess.SubprocessError, FileNotFoundError):
                subprocess.run(
                    ["xsel", "--clipboard", "--input"],
+                    encoding="utf-8",
                    input=text.encode(),
                    check=True,
                    timeout=5,
@@ -53,7 +53,13 @@ def log_error(message: str):
 def run_command(cmd: list, error_msg: str) -> bool:
    """Run a command and return success status."""
    try:
-        subprocess.run(cmd, check=True, capture_output=True, text=True)
+        subprocess.run(
+            cmd,
+            check=True,
+            capture_output=True,
+            text=True,
+            encoding="utf-8",
+        )
        return True
    except subprocess.CalledProcessError as e:
        log_error(error_msg)
@@ -97,7 +103,7 @@ def main():
    if mcp_config_path.exists():
        log_success("MCP configuration found at .mcp.json")
        logger.info("Configuration:")
-        with open(mcp_config_path) as f:
+        with open(mcp_config_path, encoding="utf-8") as f:
            config = json.load(f)
            logger.info(json.dumps(config, indent=2))
    else:
@@ -114,7 +120,7 @@ def main():
            }
        }

-        with open(mcp_config_path, "w") as f:
+        with open(mcp_config_path, "w", encoding="utf-8") as f:
            json.dump(config, f, indent=2)

        log_success("Created .mcp.json")
@@ -129,6 +135,7 @@ def main():
            check=True,
            capture_output=True,
            text=True,
+            encoding="utf-8",
        )
        log_success("MCP server module verified")
    except subprocess.CalledProcessError as e:
@@ -68,6 +68,7 @@ class TestFrameworkModule:
            [sys.executable, "-m", "framework", "--help"],
            capture_output=True,
            text=True,
+            encoding="utf-8",
            cwd=str(project_root / "core"),
        )
        assert result.returncode == 0
@@ -79,6 +80,7 @@ class TestFrameworkModule:
            [sys.executable, "-m", "framework", "list", "--help"],
            capture_output=True,
            text=True,
+            encoding="utf-8",
            cwd=str(project_root / "core"),
        )
        assert result.returncode == 0
@@ -104,6 +106,7 @@ class TestHiveEntryPoint:
            ["hive", "--help"],
            capture_output=True,
            text=True,
+            encoding="utf-8",
        )
        assert result.returncode == 0
        assert "run" in result.stdout.lower()
@@ -115,6 +118,7 @@ class TestHiveEntryPoint:
            ["hive", "list", "--help"],
            capture_output=True,
            text=True,
+            encoding="utf-8",
        )
        assert result.returncode == 0

@@ -124,5 +128,6 @@ class TestHiveEntryPoint:
            ["hive", "run", "nonexistent_agent_xyz"],
            capture_output=True,
            text=True,
+            encoding="utf-8",
        )
        assert result.returncode != 0
@@ -232,7 +232,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
    # Verify primary session's state.json exists and has the primary entry_point
    primary_state_path = tmp_path / "sessions" / primary_exec_id / "state.json"
    assert primary_state_path.exists()
-    primary_state = json.loads(primary_state_path.read_text())
+    primary_state = json.loads(primary_state_path.read_text(encoding="utf-8"))
    assert primary_state["entry_point"] == "primary"

    # Async stream — simulates a webhook entry point sharing the session
@@ -275,7 +275,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):

    # State.json should NOT have been overwritten by the async execution
    # (it should still show the primary entry point)
-    final_state = json.loads(primary_state_path.read_text())
+    final_state = json.loads(primary_state_path.read_text(encoding="utf-8"))
    assert final_state["entry_point"] == "primary"

    # Verify only ONE session directory exists (not two)
@@ -184,7 +184,7 @@ class TestPathTraversalWithActualFiles:

            # Create a secret file outside storage
            secret_file = tmpdir_path / "secret.txt"
-            secret_file.write_text("SENSITIVE_DATA")
+            secret_file.write_text("SENSITIVE_DATA", encoding="utf-8")

            storage = FileStorage(storage_dir)

@@ -193,7 +193,7 @@ class TestPathTraversalWithActualFiles:
                storage.get_runs_by_goal("../secret")

            # Verify the secret file was not accessed (still contains original data)
-            assert secret_file.read_text() == "SENSITIVE_DATA"
+            assert secret_file.read_text(encoding="utf-8") == "SENSITIVE_DATA"

    def test_cannot_write_outside_storage(self):
        """Verify that we can't write files outside storage directory."""
@@ -353,7 +353,7 @@ class TestRuntimeLogger:
        # Verify the file exists and has one line
        jsonl_path = tmp_path / "logs" / "sessions" / run_id / "logs" / "tool_logs.jsonl"
        assert jsonl_path.exists()
-        lines = [line for line in jsonl_path.read_text().strip().split("\n") if line]
+        lines = [line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line]
        assert len(lines) == 1

        data = json.loads(lines[0])
@@ -376,7 +376,7 @@ class TestRuntimeLogger:

        jsonl_path = tmp_path / "logs" / "sessions" / run_id / "logs" / "details.jsonl"
        assert jsonl_path.exists()
-        lines = [line for line in jsonl_path.read_text().strip().split("\n") if line]
+        lines = [line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line]
        assert len(lines) == 1

        data = json.loads(lines[0])
@@ -98,7 +98,7 @@ class TestFileStorageRunOperations:
        assert run_file.exists()

        # Verify it's valid JSON
-        with open(run_file) as f:
+        with open(run_file, encoding="utf-8") as f:
            data = json.load(f)
        assert data["id"] == "my_run"

@@ -800,9 +800,6 @@ dependencies = [
    { name = "litellm" },
    { name = "mcp" },
    { name = "pydantic" },
-    { name = "pytest" },
-    { name = "pytest-asyncio" },
-    { name = "pytest-xdist" },
    { name = "textual" },
    { name = "tools" },
 ]
@@ -811,6 +808,11 @@ dependencies = [
 server = [
    { name = "aiohttp" },
 ]
+testing = [
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-xdist" },
+]
 tui = [
    { name = "textual" },
 ]
@@ -820,6 +822,9 @@ webhook = [

 [package.dev-dependencies]
 dev = [
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-xdist" },
    { name = "ruff" },
    { name = "ty" },
 ]
@@ -834,17 +839,20 @@ requires-dist = [
    { name = "litellm", specifier = ">=1.81.0" },
    { name = "mcp", specifier = ">=1.0.0" },
    { name = "pydantic", specifier = ">=2.0" },
-    { name = "pytest", specifier = ">=8.0" },
-    { name = "pytest-asyncio", specifier = ">=0.23" },
-    { name = "pytest-xdist", specifier = ">=3.0" },
+    { name = "pytest", marker = "extra == 'testing'", specifier = ">=8.0" },
+    { name = "pytest-asyncio", marker = "extra == 'testing'", specifier = ">=0.23" },
+    { name = "pytest-xdist", marker = "extra == 'testing'", specifier = ">=3.0" },
    { name = "textual", specifier = ">=1.0.0" },
    { name = "textual", marker = "extra == 'tui'", specifier = ">=0.75.0" },
    { name = "tools", editable = "tools" },
 ]
-provides-extras = ["tui", "webhook", "server"]
+provides-extras = ["tui", "webhook", "server", "testing"]

 [package.metadata.requires-dev]
 dev = [
+    { name = "pytest", specifier = ">=8.0" },
+    { name = "pytest-asyncio", specifier = ">=0.23" },
+    { name = "pytest-xdist", specifier = ">=3.0" },
    { name = "ruff", specifier = ">=0.14.14" },
    { name = "ty", specifier = ">=0.0.13" },
 ]