Compare commits

...

2 Commits

Author SHA1 Message Date
Timothy 2cb54595c9 fix: browser timeout 2026-03-18 12:04:49 -07:00
Timothy 284079d18b fix: add timeout to browser tools 2026-03-18 10:51:53 -07:00
4 changed files with 204 additions and 25 deletions
+169
View File
@@ -3980,6 +3980,68 @@ class EventLoopNode(NodeProtocol):
ratio_before = conversation.usage_ratio()
phase_grad = getattr(ctx, "continuous_mode", False)
# Debug snapshot helper
def _snap(name: str, **extra: Any) -> dict[str, Any]:
roles: dict[str, int] = {}
for m in conversation.messages:
roles[m.role] = roles.get(m.role, 0) + 1
return {
"name": name,
"message_count": conversation.message_count,
"estimated_tokens": conversation.estimate_tokens(),
"usage_ratio": f"{conversation.usage_ratio():.2%}",
"max_context_tokens": self._config.max_context_tokens,
"messages_by_role": roles,
**extra,
}
initial = _snap("initial")
# When over budget, attach a full message inventory so the log
# shows exactly what is consuming the context window.
if ratio_before >= 1.0:
inventory: list[dict[str, Any]] = []
for m in conversation.messages:
content_chars = len(m.content)
tc_chars = 0
tool_name = None
if m.tool_calls:
for tc in m.tool_calls:
args = tc.get("function", {}).get("arguments", "")
tc_chars += len(args) if isinstance(args, str) else len(json.dumps(args))
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
tool_name = ", ".join(names)
elif m.role == "tool" and m.tool_use_id:
# Try to find the tool name from the preceding assistant message
for prev in conversation.messages:
if prev.tool_calls:
for tc in prev.tool_calls:
if tc.get("id") == m.tool_use_id:
tool_name = tc.get("function", {}).get("name", "?")
break
if tool_name:
break
entry: dict[str, Any] = {
"seq": m.seq,
"role": m.role,
"content_chars": content_chars,
}
if tc_chars:
entry["tool_call_args_chars"] = tc_chars
if tool_name:
entry["tool"] = tool_name
if m.is_error:
entry["is_error"] = True
if m.phase_id:
entry["phase"] = m.phase_id
# Content preview for the biggest messages
if content_chars > 2000:
entry["preview"] = m.content[:200] + ""
inventory.append(entry)
initial["message_inventory"] = inventory
debug_steps: list[dict[str, Any]] = [initial]
# --- Step 1: Prune old tool results (free, no LLM) ---
protect = max(2000, self._config.max_context_tokens // 12)
pruned = await conversation.prune_old_tool_results(
@@ -3993,8 +4055,10 @@ class EventLoopNode(NodeProtocol):
ratio_before * 100,
conversation.usage_ratio() * 100,
)
debug_steps.append(_snap("after_prune", messages_pruned=pruned))
if not conversation.needs_compaction():
await self._log_compaction(ctx, conversation, ratio_before)
self._write_compaction_debug_log(ctx, debug_steps)
return
# --- Step 2: Standard structure-preserving compaction (free, no LLM) ---
@@ -4006,8 +4070,14 @@ class EventLoopNode(NodeProtocol):
keep_recent=4,
phase_graduated=phase_grad,
)
debug_steps.append(_snap(
"after_structural",
spillover_dir=spill_dir,
keep_recent=4,
))
if not conversation.needs_compaction():
await self._log_compaction(ctx, conversation, ratio_before)
self._write_compaction_debug_log(ctx, debug_steps)
return
# --- Step 3: LLM summary compaction ---
@@ -4030,11 +4100,20 @@ class EventLoopNode(NodeProtocol):
keep_recent=2,
phase_graduated=phase_grad,
)
debug_steps.append(_snap(
"after_llm_compact",
summary_chars=len(summary),
))
except Exception as e:
logger.warning("LLM compaction failed: %s", e)
debug_steps.append(_snap(
"llm_compact_failed",
error=str(e),
))
if not conversation.needs_compaction():
await self._log_compaction(ctx, conversation, ratio_before)
self._write_compaction_debug_log(ctx, debug_steps)
return
# --- Step 4: Emergency deterministic summary (LLM failed/unavailable) ---
@@ -4048,7 +4127,12 @@ class EventLoopNode(NodeProtocol):
keep_recent=1,
phase_graduated=phase_grad,
)
debug_steps.append(_snap(
"after_emergency",
summary_chars=len(summary),
))
await self._log_compaction(ctx, conversation, ratio_before)
self._write_compaction_debug_log(ctx, debug_steps)
# --- LLM compaction with binary-search splitting ----------------------
@@ -4262,6 +4346,91 @@ class EventLoopNode(NodeProtocol):
)
)
@staticmethod
def _write_compaction_debug_log(
ctx: NodeContext,
steps: list[dict[str, Any]],
) -> None:
"""Write detailed compaction analysis to ~/.hive/compaction_log/.
Only runs when HIVE_COMPACTION_DEBUG is set in the environment.
Each compaction produces a timestamped markdown file.
"""
import os
if not os.environ.get("HIVE_COMPACTION_DEBUG"):
return
log_dir = Path.home() / ".hive" / "compaction_log"
log_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now(UTC).strftime("%Y%m%dT%H%M%S_%f")
node_label = ctx.node_id.replace("/", "_")
log_path = log_dir / f"{ts}_{node_label}.md"
lines: list[str] = []
lines.append(f"# Compaction Debug — {ctx.node_id}")
lines.append(f"**Time:** {datetime.now(UTC).isoformat()}")
lines.append(f"**Node:** {ctx.node_spec.name} (`{ctx.node_id}`)")
if ctx.stream_id:
lines.append(f"**Stream:** {ctx.stream_id}")
lines.append("")
for step in steps:
name = step.get("name", "unknown")
lines.append(f"## Step: {name}")
for key, val in step.items():
if key == "name":
continue
if key == "messages_by_role":
lines.append(f"- **{key}:**")
for role, count in val.items():
lines.append(f" - {role}: {count}")
elif key == "message_inventory":
total_chars = sum(e.get("content_chars", 0) + e.get("tool_call_args_chars", 0) for e in val)
lines.append(f"### Message Inventory ({len(val)} messages, {total_chars:,} total chars)")
lines.append("")
# Sort descending by size for the table
ranked = sorted(val, key=lambda e: e.get("content_chars", 0) + e.get("tool_call_args_chars", 0), reverse=True)
lines.append("| # | seq | role | tool | chars | % of total | flags |")
lines.append("|---|-----|------|------|------:|------------|-------|")
for i, entry in enumerate(ranked, 1):
chars = entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0)
pct = (chars / total_chars * 100) if total_chars else 0
tool = entry.get("tool", "")
flags = []
if entry.get("is_error"):
flags.append("error")
if entry.get("phase"):
flags.append(f"phase={entry['phase']}")
lines.append(
f"| {i} | {entry['seq']} | {entry['role']} | {tool} "
f"| {chars:,} | {pct:.1f}% | {', '.join(flags)} |"
)
# Previews for large messages
large = [e for e in ranked if e.get("preview")]
if large:
lines.append("")
lines.append("#### Large message previews")
for entry in large:
lines.append(f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):")
lines.append(f"```\n{entry['preview']}\n```")
elif key == "discarded_messages":
lines.append(f"- **{key}:** ({len(val)} messages)")
for msg_info in val[:50]: # cap at 50
lines.append(f" - seq={msg_info['seq']} role={msg_info['role']} chars={msg_info['chars']}")
if len(val) > 50:
lines.append(f" - ... and {len(val) - 50} more")
else:
lines.append(f"- **{key}:** {val}")
lines.append("")
try:
log_path.write_text("\n".join(lines), encoding="utf-8")
logger.debug("Compaction debug log written to %s", log_path)
except OSError:
logger.debug("Failed to write compaction debug log to %s", log_path)
def _build_emergency_summary(
self,
ctx: NodeContext,
+10 -10
View File
@@ -210,6 +210,16 @@ def configure_logging(
# printed on every single completion call). Warnings and errors still show.
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
# Suppress the "Provider List: ..." banner litellm prints to stdout via
# print() on every completion call. This is independent of log format.
try:
import litellm as _litellm
if hasattr(_litellm, "suppress_debug_info"):
_litellm.suppress_debug_info = True # type: ignore[attr-defined]
except (ImportError, AttributeError):
pass
# When in JSON mode, configure known third-party loggers to use JSON formatter
# This ensures libraries like LiteLLM, httpcore also output clean JSON
if format == "json":
@@ -232,16 +242,6 @@ def _disable_third_party_colors() -> None:
os.environ["NO_COLOR"] = "1"
os.environ["FORCE_COLOR"] = "0"
# Disable LiteLLM debug/verbose output colors if available
try:
import litellm
# LiteLLM respects NO_COLOR, but we can also suppress debug info
if hasattr(litellm, "suppress_debug_info"):
litellm.suppress_debug_info = True # type: ignore[attr-defined]
except (ImportError, AttributeError):
pass
def set_trace_context(**kwargs: Any) -> None:
"""
-8
View File
@@ -60,7 +60,6 @@
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@babel/code-frame": "^7.29.0",
"@babel/generator": "^7.29.0",
@@ -1557,7 +1556,6 @@
"integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~7.18.0"
}
@@ -1573,7 +1571,6 @@
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.28.tgz",
"integrity": "sha512-z9VXpC7MWrhfWipitjNdgCauoMLRdIILQsAEV+ZesIzBq/oUlxk0m3ApZuMFCXdnS4U7KrI+l3WRUEGQ8K1QKw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/prop-types": "*",
"csstype": "^3.2.2"
@@ -1786,7 +1783,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.9.0",
"caniuse-lite": "^1.0.30001759",
@@ -3564,7 +3560,6 @@
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -3616,7 +3611,6 @@
"resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
"integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"loose-envify": "^1.1.0"
},
@@ -3629,7 +3623,6 @@
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
"integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
"license": "MIT",
"peer": true,
"dependencies": {
"loose-envify": "^1.1.0",
"scheduler": "^0.23.2"
@@ -4190,7 +4183,6 @@
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.4",
+25 -7
View File
@@ -409,6 +409,8 @@ class BrowserSession:
We're already inside ``self._lock`` so we can't call ``stop()``.
This mirrors the teardown logic without re-acquiring the lock.
"""
_CLOSE_TIMEOUT = 10.0 # seconds
if self.cdp_port:
from .port_manager import release_port
@@ -417,21 +419,21 @@ class BrowserSession:
if self.context:
try:
await self.context.close()
await asyncio.wait_for(self.context.close(), timeout=_CLOSE_TIMEOUT)
except Exception:
pass
self.context = None
if self.browser:
try:
await self.browser.close()
await asyncio.wait_for(self.browser.close(), timeout=_CLOSE_TIMEOUT)
except Exception:
pass
self.browser = None
if self._playwright:
try:
await self._playwright.stop()
await asyncio.wait_for(self._playwright.stop(), timeout=_CLOSE_TIMEOUT)
except Exception:
pass
self._playwright = None
@@ -588,6 +590,10 @@ class BrowserSession:
async def stop(self) -> dict:
"""Stop the browser and clean up resources."""
# Timeout for each Playwright teardown call — prevents hanging when
# the browser process is crashed or unresponsive.
_CLOSE_TIMEOUT = 10.0 # seconds
async with self._lock:
# Release CDP port if allocated
if self.cdp_port:
@@ -598,23 +604,35 @@ class BrowserSession:
# Close context (works for both persistent and ephemeral)
if self.context:
await self.context.close()
try:
await asyncio.wait_for(self.context.close(), timeout=_CLOSE_TIMEOUT)
except Exception as exc:
logger.warning("context.close() failed for profile %r: %s", self.profile, exc)
self.context = None
# Agent sessions share a browser — don't close it (other agents depend on it).
# Only standard sessions own their browser and playwright instances.
if self.session_type != "agent":
if self.browser:
await self.browser.close()
try:
await asyncio.wait_for(self.browser.close(), timeout=_CLOSE_TIMEOUT)
except Exception as exc:
logger.warning("browser.close() failed for profile %r: %s", self.profile, exc)
self.browser = None
if self._playwright:
await self._playwright.stop()
try:
await asyncio.wait_for(self._playwright.stop(), timeout=_CLOSE_TIMEOUT)
except Exception as exc:
logger.warning("playwright.stop() failed for profile %r: %s", self.profile, exc)
self._playwright = None
# Kill the Chrome subprocess
if self._chrome_process:
await self._chrome_process.kill()
try:
await self._chrome_process.kill()
except Exception as exc:
logger.warning("chrome_process.kill() failed for profile %r: %s", self.profile, exc)
self._chrome_process = None
else:
self.browser = None # Drop reference to shared browser