refactor: switch from Playwright browser to system Chrome via CDP

This commit is contained in:
bryan
2026-03-12 08:39:43 -07:00
parent d87dfca1ab
commit df71834e4b
6 changed files with 401 additions and 98 deletions
+8 -16
View File
@@ -6,7 +6,7 @@
.DESCRIPTION
An interactive setup wizard that:
1. Installs Python dependencies via uv
2. Installs Playwright browser for web scraping
2. Checks for Chrome/Edge browser for web automation
3. Helps configure LLM API keys
4. Verifies everything works
@@ -518,22 +518,14 @@ try {
exit 1
}
# Install Playwright browser
Write-Host " Installing Playwright browser... " -NoNewline
$null = & uv run python -c "import playwright" 2>&1
$importExitCode = $LASTEXITCODE
if ($importExitCode -eq 0) {
$null = & uv run python -m playwright install chromium 2>&1
$playwrightExitCode = $LASTEXITCODE
if ($playwrightExitCode -eq 0) {
Write-Ok "ok"
} else {
Write-Warn "skipped (install manually: uv run python -m playwright install chromium)"
}
# Check for Chrome/Edge (required for GCU browser tools)
Write-Host " Checking for Chrome/Edge browser... " -NoNewline
$null = & uv run python -c "from gcu.browser.chrome_finder import find_chrome; assert find_chrome()" 2>&1
$chromeCheckExit = $LASTEXITCODE
if ($chromeCheckExit -eq 0) {
Write-Ok "ok"
} else {
Write-Warn "skipped"
Write-Warn "not found - install Chrome or Edge for browser tools"
}
} finally {
Pop-Location
+6 -10
View File
@@ -4,7 +4,7 @@
#
# An interactive setup wizard that:
# 1. Installs Python dependencies
# 2. Installs Playwright browser for web scraping
# 2. Checks for Chrome/Edge browser for web automation
# 3. Helps configure LLM API keys
# 4. Verifies everything works
#
@@ -253,16 +253,12 @@ else
exit 1
fi
# Install Playwright browser
echo -n " Installing Playwright browser... "
if uv run python -c "import playwright" > /dev/null 2>&1; then
if uv run python -m playwright install chromium > /dev/null 2>&1; then
echo -e "${GREEN}ok${NC}"
else
echo -e "${YELLOW}${NC}"
fi
# Check for Chrome/Edge (required for GCU browser tools)
echo -n " Checking for Chrome/Edge browser... "
if uv run python -c "from gcu.browser.chrome_finder import find_chrome; assert find_chrome()" > /dev/null 2>&1; then
echo -e "${GREEN}ok${NC}"
else
echo -e "${YELLOW}${NC}"
echo -e "${YELLOW}not found — install Chrome or Edge for browser tools${NC}"
fi
cd "$SCRIPT_DIR"
+8 -2
View File
@@ -14,8 +14,14 @@ COPY mcp_server.py ./
# Install package with all dependencies
RUN pip install --no-cache-dir -e .
# Install Playwright Chromium browser and system dependencies
RUN playwright install chromium --with-deps
# Install Google Chrome (stable) — used by GCU browser tools via CDP
RUN apt-get update && apt-get install -y wget gnupg \
&& mkdir -p /etc/apt/keyrings \
&& wget -q -O /etc/apt/keyrings/google-chrome.asc https://dl.google.com/linux/linux_signing_key.pub \
&& echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/google-chrome.asc] http://dl.google.com/linux/chrome/deb/ stable main" \
> /etc/apt/sources.list.d/google-chrome.list \
&& apt-get update && apt-get install -y google-chrome-stable \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Create non-root user for security
RUN useradd -m -u 1001 appuser
+106
View File
@@ -0,0 +1,106 @@
"""
Detect system-installed Chrome or Edge browsers.
Searches platform-specific well-known paths to find a Chromium-based browser
executable. Used by chrome_launcher to avoid bundling Playwright's Chromium.
"""
from __future__ import annotations
import os
import shutil
import sys
from pathlib import Path
# Search order per platform: Chrome stable first, then Edge, then Chromium.
_MACOS_CANDIDATES = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
]
_LINUX_WHICH_NAMES = [
"google-chrome",
"google-chrome-stable",
"chromium-browser",
"chromium",
"microsoft-edge",
"microsoft-edge-stable",
]
_WINDOWS_CANDIDATES = [
r"Google\Chrome\Application\chrome.exe",
r"Microsoft\Edge\Application\msedge.exe",
]
def find_chrome() -> str | None:
"""Return the absolute path to a system Chrome/Edge executable, or None.
Check order:
1. ``CHROME_PATH`` environment variable (explicit override)
2. Platform-specific well-known install locations
"""
# 1. Explicit override
env_path = os.environ.get("CHROME_PATH")
if env_path and _is_executable(env_path):
return env_path
# 2. Platform search
if sys.platform == "darwin":
return _find_macos()
elif sys.platform == "win32":
return _find_windows()
else:
return _find_linux()
def require_chrome() -> str:
"""Return a Chrome/Edge path or raise with an actionable error message."""
path = find_chrome()
if path is None:
raise RuntimeError(
"No Chrome or Edge browser found. GCU browser tools require a "
"Chromium-based browser.\n\n"
"Options:\n"
" 1. Install Google Chrome: https://www.google.com/chrome/\n"
" 2. Set the CHROME_PATH environment variable to your browser executable\n"
)
return path
def _is_executable(path: str) -> bool:
"""Check that path exists and is executable."""
p = Path(path)
return p.exists() and os.access(p, os.X_OK)
def _find_macos() -> str | None:
for candidate in _MACOS_CANDIDATES:
if _is_executable(candidate):
return candidate
return None
def _find_linux() -> str | None:
for name in _LINUX_WHICH_NAMES:
result = shutil.which(name)
if result:
return result
return None
def _find_windows() -> str | None:
program_dirs = []
for env_var in ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA"):
val = os.environ.get(env_var)
if val:
program_dirs.append(val)
for base_dir in program_dirs:
for candidate in _WINDOWS_CANDIDATES:
full_path = os.path.join(base_dir, candidate)
if os.path.isfile(full_path):
return full_path
return None
+192
View File
@@ -0,0 +1,192 @@
"""
Launch and manage a system Chrome/Edge process for CDP connections.
Starts the browser as a subprocess with ``--remote-debugging-port`` and waits
until the CDP endpoint is ready. Used by ``session.py`` to replace
Playwright's ``chromium.launch()`` with a system-installed browser.
"""
from __future__ import annotations
import asyncio
import logging
import subprocess
import tempfile
import time
from dataclasses import dataclass, field
from pathlib import Path
from .chrome_finder import require_chrome
logger = logging.getLogger(__name__)
# Chrome flags shared with session.py — keep in sync with _CHROME_ARGS there
_CHROME_ARGS = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--no-first-run",
"--no-default-browser-check",
]
# CDP readiness polling
_CDP_POLL_INTERVAL_S = 0.1
_CDP_MAX_WAIT_S = 10.0
@dataclass
class ChromeProcess:
"""Handle to a running Chrome subprocess launched for CDP access."""
process: subprocess.Popen[bytes]
cdp_port: int
cdp_url: str
user_data_dir: Path
_temp_dir: tempfile.TemporaryDirectory[str] | None = field(default=None, repr=False)
def is_alive(self) -> bool:
return self.process.poll() is None
async def kill(self) -> None:
"""Terminate the Chrome process and clean up resources."""
if self.process.poll() is None:
self.process.terminate()
try:
await asyncio.wait_for(
asyncio.get_event_loop().run_in_executor(None, self.process.wait),
timeout=5.0,
)
except TimeoutError:
self.process.kill()
self.process.wait()
logger.info(f"Chrome process (port {self.cdp_port}) terminated")
# Clean up temp directory for ephemeral sessions
if self._temp_dir is not None:
try:
self._temp_dir.cleanup()
except Exception:
pass
self._temp_dir = None
async def launch_chrome(
cdp_port: int,
user_data_dir: Path | None = None,
headless: bool = True,
extra_args: list[str] | None = None,
) -> ChromeProcess:
"""Launch system Chrome and wait for CDP to become ready.
Args:
cdp_port: Port for ``--remote-debugging-port``.
user_data_dir: Profile directory. If *None*, a temporary directory is
created and cleaned up when the process is killed (ephemeral mode).
headless: Use Chrome's headless mode (``--headless=new``).
extra_args: Additional Chrome CLI flags.
Returns:
A :class:`ChromeProcess` handle.
Raises:
RuntimeError: If Chrome is not found, fails to start, or CDP does not
become ready within the timeout.
"""
chrome_path = require_chrome()
temp_dir: tempfile.TemporaryDirectory[str] | None = None
if user_data_dir is None:
temp_dir = tempfile.TemporaryDirectory(prefix="hive-browser-")
user_data_dir = Path(temp_dir.name)
args = [
chrome_path,
f"--remote-debugging-port={cdp_port}",
f"--user-data-dir={user_data_dir}",
"--window-size=1920,1080",
"--lang=en-US",
*_CHROME_ARGS,
*(extra_args or []),
]
if headless:
args.append("--headless=new")
logger.info(f"Launching Chrome: port={cdp_port}, user_data_dir={user_data_dir}")
process = subprocess.Popen(
args,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
cdp_url = f"http://127.0.0.1:{cdp_port}"
# Wait for CDP to become ready
try:
await _wait_for_cdp(cdp_port, process)
except Exception:
# Clean up on failure
process.kill()
process.wait()
if temp_dir is not None:
temp_dir.cleanup()
raise
return ChromeProcess(
process=process,
cdp_port=cdp_port,
cdp_url=cdp_url,
user_data_dir=user_data_dir,
_temp_dir=temp_dir,
)
async def _wait_for_cdp(
port: int,
process: subprocess.Popen[bytes],
timeout: float = _CDP_MAX_WAIT_S,
) -> None:
"""Poll ``/json/version`` until Chrome's CDP endpoint is ready."""
import urllib.error
import urllib.request
url = f"http://127.0.0.1:{port}/json/version"
deadline = time.monotonic() + timeout
def _probe() -> bool:
try:
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req, timeout=1) as resp:
return resp.status == 200
except (urllib.error.URLError, OSError, ConnectionError):
return False
while time.monotonic() < deadline:
# Check the process hasn't crashed
if process.poll() is not None:
stderr = ""
if process.stderr:
stderr = process.stderr.read().decode(errors="replace")
raise RuntimeError(
f"Chrome exited with code {process.returncode} before CDP "
f"was ready.\nstderr: {stderr[:500]}"
)
try:
loop = asyncio.get_running_loop()
ready = await asyncio.wait_for(
loop.run_in_executor(None, _probe),
timeout=2.0,
)
if ready:
elapsed = timeout - (deadline - time.monotonic())
logger.info(f"CDP ready on port {port} after {elapsed:.1f}s")
return
except TimeoutError:
pass
await asyncio.sleep(_CDP_POLL_INTERVAL_S)
raise RuntimeError(f"Chrome CDP endpoint did not become ready within {timeout}s on port {port}")
+81 -70
View File
@@ -1,8 +1,9 @@
"""
Browser session management.
Manages Playwright browser instances with support for multiple profiles,
each with independent browser context and multiple tabs.
Connects to system-installed Chrome/Edge via CDP for browser automation.
Each session launches a Chrome subprocess with ``--remote-debugging-port``
and connects Playwright as a CDP client.
Supports three session types:
- Standard: Single browser with ephemeral or persistent context
@@ -165,43 +166,45 @@ VALID_WAIT_UNTIL = {"commit", "domcontentloaded", "load", "networkidle"}
# ---------------------------------------------------------------------------
# Shared browser for agent contexts
# ---------------------------------------------------------------------------
# All agent sessions share this single browser process. Created via
# chromium.launch() (not persistent context) so we can call
# browser.new_context() multiple times with different storage states.
# All agent sessions share this single Chrome process + CDP connection.
# We can call browser.new_context() multiple times with different storage states.
_shared_browser: Browser | None = None
_shared_playwright: Any = None
_shared_chrome_process: Any = None # ChromeProcess | None (avoid circular import)
_shared_cdp_port: int | None = None
# Chrome flags shared between all browser launches
_CHROME_ARGS = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--no-first-run",
"--no-default-browser-check",
]
_DEFAULT_VIEWPORT = {"width": 1920, "height": 1080}
async def get_shared_browser(headless: bool = True) -> Browser:
"""Get or create the shared browser instance for agent contexts."""
global _shared_browser, _shared_playwright
global _shared_browser, _shared_playwright, _shared_chrome_process, _shared_cdp_port
if _shared_browser and _shared_browser.is_connected():
return _shared_browser
_shared_playwright = await async_playwright().start()
_shared_browser = await _shared_playwright.chromium.launch(
from .chrome_launcher import launch_chrome
from .port_manager import allocate_port
cdp_port = allocate_port("__shared__")
_shared_cdp_port = cdp_port
_shared_chrome_process = await launch_chrome(
cdp_port=cdp_port,
user_data_dir=None, # ephemeral
headless=headless,
args=_CHROME_ARGS,
)
logger.info("Started shared browser for agent contexts")
_shared_playwright = await async_playwright().start()
_shared_browser = await _shared_playwright.chromium.connect_over_cdp(
_shared_chrome_process.cdp_url
)
logger.info("Started shared browser for agent contexts (system Chrome)")
return _shared_browser
async def close_shared_browser() -> None:
"""Close the shared browser and clean up all agent contexts."""
global _shared_browser, _shared_playwright
global _shared_browser, _shared_playwright, _shared_chrome_process, _shared_cdp_port
if _shared_browser:
await _shared_browser.close()
@@ -212,6 +215,16 @@ async def close_shared_browser() -> None:
await _shared_playwright.stop()
_shared_playwright = None
if _shared_chrome_process:
await _shared_chrome_process.kill()
_shared_chrome_process = None
if _shared_cdp_port is not None:
from .port_manager import release_port
release_port(_shared_cdp_port)
_shared_cdp_port = None
@dataclass
class BrowserSession:
@@ -245,6 +258,9 @@ class BrowserSession:
# Session type: "standard" (default) or "agent" (ephemeral context from shared browser)
session_type: str = "standard"
# Chrome subprocess handle (standard sessions only)
_chrome_process: Any = None # ChromeProcess | None
def _is_running(self) -> bool:
"""Check if browser is currently running."""
if self.session_type == "agent":
@@ -254,9 +270,7 @@ class BrowserSession:
and self.browser is not None
and self.browser.is_connected()
)
if self.persistent:
# Persistent context doesn't have a separate browser object
return self.context is not None
# Both persistent and ephemeral now have a browser object via CDP
return self.browser is not None and self.browser.is_connected()
async def _health_check(self) -> None:
@@ -316,6 +330,13 @@ class BrowserSession:
pass
self._playwright = None
if self._chrome_process:
try:
await self._chrome_process.kill()
except Exception:
pass
self._chrome_process = None
self.pages.clear()
self.active_page_id = None
self.console_messages.clear()
@@ -343,19 +364,12 @@ class BrowserSession:
"cdp_port": self.cdp_port,
}
from .chrome_launcher import launch_chrome
from .port_manager import allocate_port
self._playwright = await async_playwright().start()
self.persistent = persistent
# Common Chrome flags
chrome_args = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--no-first-run",
"--no-default-browser-check",
]
if persistent:
# Get storage path from environment (set by AgentRunner)
storage_path_str = os.environ.get("HIVE_STORAGE_PATH")
@@ -370,33 +384,40 @@ class BrowserSession:
)
self.user_data_dir.mkdir(parents=True, exist_ok=True)
else:
self.user_data_dir = None # chrome_launcher creates a temp dir
# Allocate CDP port
from .port_manager import allocate_port
# Allocate CDP port for system Chrome
self.cdp_port = allocate_port(self.profile)
self.cdp_port = allocate_port(self.profile)
chrome_args.append(f"--remote-debugging-port={self.cdp_port}")
logger.info(
f"Starting {'persistent' if persistent else 'ephemeral'} browser: "
f"profile={self.profile}, user_data_dir={self.user_data_dir}, "
f"cdp_port={self.cdp_port}"
)
logger.info(
f"Starting persistent browser: profile={self.profile}, "
f"user_data_dir={self.user_data_dir}, cdp_port={self.cdp_port}"
)
# Launch system Chrome and connect via CDP
self._chrome_process = await launch_chrome(
cdp_port=self.cdp_port,
user_data_dir=self.user_data_dir,
headless=headless,
extra_args=[f"--user-agent={BROWSER_USER_AGENT}"],
)
self.browser = await self._playwright.chromium.connect_over_cdp(
self._chrome_process.cdp_url
)
self.context = self.browser.contexts[0]
# Use launch_persistent_context for true Chrome profile persistence
# Note: Returns BrowserContext directly, no separate Browser object
self.context = await self._playwright.chromium.launch_persistent_context(
user_data_dir=str(self.user_data_dir),
headless=headless,
viewport={"width": 1920, "height": 1080},
user_agent=BROWSER_USER_AGENT,
locale="en-US",
args=chrome_args,
)
self.browser = None # No separate browser object with persistent context
# Inject stealth script to hide automation detection
await self.context.add_init_script(STEALTH_SCRIPT)
# Inject stealth script to hide automation detection
await self.context.add_init_script(STEALTH_SCRIPT)
# Set viewport on existing pages (CDP default context doesn't
# inherit viewport settings like launch_persistent_context did)
viewport = _DEFAULT_VIEWPORT
for page in self.context.pages:
await page.set_viewport_size(viewport)
if persistent:
# Register existing pages from restored session
for page in self.context.pages:
target_id = f"tab_{id(page)}"
@@ -413,21 +434,6 @@ class BrowserSession:
# Only set branded content if it's a blank/new tab page
if url in ("", "about:blank", "chrome://newtab/"):
await first_page.set_content(HIVE_START_PAGE)
else:
# Ephemeral mode - original behavior
logger.info(f"Starting ephemeral browser: profile={self.profile}")
self.browser = await self._playwright.chromium.launch(
headless=headless,
args=chrome_args,
)
self.context = await self.browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent=BROWSER_USER_AGENT,
locale="en-US",
)
# Inject stealth script to hide automation detection
await self.context.add_init_script(STEALTH_SCRIPT)
# Health check: confirm the browser is actually responsive
try:
@@ -474,6 +480,11 @@ class BrowserSession:
if self._playwright:
await self._playwright.stop()
self._playwright = None
# Kill the Chrome subprocess
if self._chrome_process:
await self._chrome_process.kill()
self._chrome_process = None
else:
self.browser = None # Drop reference to shared browser
@@ -518,7 +529,7 @@ class BrowserSession:
# Create an isolated context stamped with the snapshot
context = await browser.new_context(
storage_state=storage_state,
viewport={"width": 1920, "height": 1080},
viewport=_DEFAULT_VIEWPORT,
user_agent=BROWSER_USER_AGENT,
locale="en-US",
)