feat: hive debugger

This commit is contained in:
Timothy
2026-03-18 16:26:55 -07:00
parent 1da28644a6
commit 6acceed288
6 changed files with 216 additions and 48 deletions
+5
View File
@@ -94,6 +94,11 @@ def main():
register_skill_commands(subparsers)
# Register debugger commands (debugger)
from framework.debugger.cli import register_debugger_commands
register_debugger_commands(subparsers)
args = parser.parse_args()
if hasattr(args, "func"):
View File
+78
View File
@@ -0,0 +1,78 @@
"""CLI command for the LLM debug log viewer."""
import argparse
import subprocess
import sys
from pathlib import Path
_SCRIPT = (
Path(__file__).resolve().parents[3] / "scripts" / "llm_debug_log_visualizer.py"
)
def register_debugger_commands(subparsers: argparse._SubParsersAction) -> None:
"""Register the ``hive debugger`` command."""
parser = subparsers.add_parser(
"debugger",
help="Open the LLM debug log viewer",
description=(
"Start a local server that lets you browse LLM debug sessions "
"recorded in ~/.hive/llm_logs. Sessions are loaded on demand so "
"the browser stays responsive."
),
)
parser.add_argument(
"--session",
help="Execution ID to select initially.",
)
parser.add_argument(
"--port",
type=int,
default=0,
help="Port for the local server (0 = auto-pick a free port).",
)
parser.add_argument(
"--logs-dir",
help="Directory containing JSONL log files (default: ~/.hive/llm_logs).",
)
parser.add_argument(
"--limit-files",
type=int,
default=None,
help="Maximum number of newest log files to scan (default: 200).",
)
parser.add_argument(
"--output",
help="Write a static HTML file instead of starting a server.",
)
parser.add_argument(
"--no-open",
action="store_true",
help="Start the server but do not open a browser.",
)
parser.add_argument(
"--include-tests",
action="store_true",
help="Show test/mock sessions (hidden by default).",
)
parser.set_defaults(func=cmd_debugger)
def cmd_debugger(args: argparse.Namespace) -> int:
"""Launch the LLM debug log visualizer."""
cmd: list[str] = [sys.executable, str(_SCRIPT)]
if args.session:
cmd += ["--session", args.session]
if args.port:
cmd += ["--port", str(args.port)]
if args.logs_dir:
cmd += ["--logs-dir", args.logs_dir]
if args.limit_files is not None:
cmd += ["--limit-files", str(args.limit_files)]
if args.output:
cmd += ["--output", args.output]
if args.no_open:
cmd.append("--no-open")
if args.include_tests:
cmd.append("--include-tests")
return subprocess.call(cmd)
@@ -8,6 +8,7 @@ write. Errors are silently swallowed — this must never break the agent.
import json
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import IO, Any
@@ -47,6 +48,11 @@ def log_llm_turn(
Never raises.
"""
try:
# Skip logging during test runs to avoid polluting real logs.
if os.environ.get("PYTEST_CURRENT_TEST") or os.environ.get(
"HIVE_DISABLE_LLM_LOGS"
):
return
global _log_file, _log_ready # noqa: PLW0603
if not _log_ready:
_log_file = _open_log()
-8
View File
@@ -60,7 +60,6 @@
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@babel/code-frame": "^7.29.0",
"@babel/generator": "^7.29.0",
@@ -1557,7 +1556,6 @@
"integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~7.18.0"
}
@@ -1573,7 +1571,6 @@
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.28.tgz",
"integrity": "sha512-z9VXpC7MWrhfWipitjNdgCauoMLRdIILQsAEV+ZesIzBq/oUlxk0m3ApZuMFCXdnS4U7KrI+l3WRUEGQ8K1QKw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/prop-types": "*",
"csstype": "^3.2.2"
@@ -1786,7 +1783,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.9.0",
"caniuse-lite": "^1.0.30001759",
@@ -3564,7 +3560,6 @@
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -3616,7 +3611,6 @@
"resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
"integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"loose-envify": "^1.1.0"
},
@@ -3629,7 +3623,6 @@
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
"integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
"license": "MIT",
"peer": true,
"dependencies": {
"loose-envify": "^1.1.0",
"scheduler": "^0.23.2"
@@ -4190,7 +4183,6 @@
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.4",
+127 -40
View File
@@ -1,17 +1,21 @@
#!/usr/bin/env python3
"""Open a browser-based viewer for Hive LLM debug JSONL sessions.
Starts a local HTTP server and loads session data on demand (one at a time).
Usage:
uv run --no-project scripts/llm_debug_log_visualizer.py
uv run --no-project scripts/llm_debug_log_visualizer.py --no-open
uv run --no-project scripts/llm_debug_log_visualizer.py --session <execution_id>
uv run --no-project scripts/llm_debug_log_visualizer.py --port 8080
uv run --no-project scripts/llm_debug_log_visualizer.py --output debug.html
"""
from __future__ import annotations
import argparse
import http.server
import json
import tempfile
import urllib.parse
import webbrowser
from collections import defaultdict
from dataclasses import dataclass
@@ -55,10 +59,21 @@ def _parse_args() -> argparse.Namespace:
default=200,
help="Maximum number of newest log files to scan.",
)
parser.add_argument(
"--port",
type=int,
default=0,
help="Port for the local server (0 = auto-pick a free port).",
)
parser.add_argument(
"--no-open",
action="store_true",
help="Generate the HTML but do not open a browser.",
help="Start the server but do not open a browser.",
)
parser.add_argument(
"--include-tests",
action="store_true",
help="Show test/mock sessions (hidden by default).",
)
return parser.parse_args()
@@ -117,8 +132,29 @@ def _format_timestamp(raw: str) -> str:
return raw
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
"""Return True for sessions that look like test artifacts."""
if execution_id.startswith("<MagicMock"):
return True
models = {
str(r.get("token_counts", {}).get("model", ""))
for r in records
if isinstance(r.get("token_counts"), dict)
}
models.discard("")
# Sessions that only used the mock LLM provider.
if models and models <= {"mock"}:
return True
# Sessions with no real model at all (empty string or missing).
if not models:
return True
return False
def _group_sessions(
records: list[dict[str, Any]],
*,
include_tests: bool = False,
) -> tuple[list[SessionSummary], dict[str, list[dict[str, Any]]]]:
by_session: dict[str, list[dict[str, Any]]] = defaultdict(list)
for record in records:
@@ -126,6 +162,13 @@ def _group_sessions(
if execution_id:
by_session[execution_id].append(record)
if not include_tests:
by_session = {
eid: recs
for eid, recs in by_session.items()
if not _is_test_session(eid, recs)
}
summaries: list[SessionSummary] = []
for execution_id, session_records in by_session.items():
session_records.sort(
@@ -174,7 +217,6 @@ def _group_sessions(
def _render_html(
summaries: list[SessionSummary],
sessions: dict[str, list[dict[str, Any]]],
initial_session_id: str,
) -> str:
summaries_data = [
@@ -193,16 +235,6 @@ def _render_html(
for summary in summaries
]
sessions_data = {
execution_id: sorted(
records,
key=lambda record: (
str(record.get("timestamp", "")),
record.get("iteration", 0),
),
)
for execution_id, records in sessions.items()
}
initial = initial_session_id or (summaries[0].execution_id if summaries else "")
return f"""<!DOCTYPE html>
<html lang="en">
@@ -579,10 +611,9 @@ def _render_html(
</div>
<script id="session-summaries" type="application/json">{json.dumps(summaries_data, ensure_ascii=False)}</script>
<script id="session-records" type="application/json">{json.dumps(sessions_data, ensure_ascii=False)}</script>
<script>
const summaries = JSON.parse(document.getElementById("session-summaries").textContent);
const recordsBySession = JSON.parse(document.getElementById("session-records").textContent);
const recordCache = {{}};
const initialSessionId = {json.dumps(initial, ensure_ascii=False)};
const sessionSearch = document.getElementById("sessionSearch");
@@ -746,10 +777,18 @@ def _render_html(
`;
}}
function renderSession(sessionId) {{
async function fetchSession(sessionId) {{
if (recordCache[sessionId]) return recordCache[sessionId];
const resp = await fetch(`/api/session/${{encodeURIComponent(sessionId)}}`);
if (!resp.ok) return [];
const data = await resp.json();
recordCache[sessionId] = data;
return data;
}}
async function renderSession(sessionId) {{
activeSessionId = sessionId;
const summary = summaries.find((entry) => entry.execution_id === sessionId);
const records = recordsBySession[sessionId] || [];
renderSessionChooser();
@@ -773,6 +812,9 @@ def _render_html(
renderMetaCard("Source file", summary.log_file),
].join("");
turnsEl.innerHTML = '<div class="empty">Loading session\u2026</div>';
const records = await fetchSession(sessionId);
if (activeSessionId !== sessionId) return;
turnsEl.innerHTML = records.length
? records.map((record) => renderTurn(record)).join("")
: '<div class="empty">This session has no turn records.</div>';
@@ -804,7 +846,8 @@ def _render_html(
}});
const hashSession = decodeURIComponent(window.location.hash.replace(/^#/, ""));
const bootSession = recordsBySession[hashSession] ? hashSession : activeSessionId;
const knownIds = new Set(summaries.map((s) => s.execution_id));
const bootSession = knownIds.has(hashSession) ? hashSession : activeSessionId;
renderSessionChooser();
renderSession(bootSession);
</script>
@@ -813,28 +856,70 @@ def _render_html(
"""
def _write_report(html_report: str, output: Path | None) -> Path:
if output is not None:
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(html_report, encoding="utf-8")
return output
def _sort_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
return sorted(
records,
key=lambda r: (str(r.get("timestamp", "")), r.get("iteration", 0)),
)
with tempfile.NamedTemporaryFile(
mode="w",
encoding="utf-8",
prefix="hive_llm_debug_",
suffix=".html",
delete=False,
dir="/tmp",
) as handle:
handle.write(html_report)
return Path(handle.name)
def _run_server(
html: str,
sessions: dict[str, list[dict[str, Any]]],
port: int,
no_open: bool,
) -> None:
html_bytes = html.encode("utf-8")
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self) -> None:
if self.path == "/":
self._respond(200, "text/html; charset=utf-8", html_bytes)
elif self.path.startswith("/api/session/"):
sid = urllib.parse.unquote(self.path[len("/api/session/"):])
records = sessions.get(sid)
if records is None:
self._respond(404, "application/json", b"[]")
else:
body = json.dumps(
_sort_records(records), ensure_ascii=False
).encode("utf-8")
self._respond(200, "application/json", body)
else:
self.send_error(404)
def _respond(self, code: int, content_type: str, body: bytes) -> None:
self.send_response(code)
self.send_header("Content-Type", content_type)
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def log_message(self, format: str, *args: object) -> None:
pass # silence per-request logs
server = http.server.HTTPServer(("127.0.0.1", port), Handler)
actual_port = server.server_address[1]
url = f"http://127.0.0.1:{actual_port}"
print(f"Serving at {url} (Ctrl+C to stop)")
if not no_open:
webbrowser.open(url)
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nStopped.")
finally:
server.server_close()
def main() -> int:
args = _parse_args()
records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
summaries, sessions = _group_sessions(records)
summaries, sessions = _group_sessions(
records, include_tests=args.include_tests
)
initial_session_id = args.session or (
summaries[0].execution_id if summaries else ""
@@ -843,13 +928,15 @@ def main() -> int:
print(f"session not found: {initial_session_id}")
return 1
html_report = _render_html(summaries, sessions, initial_session_id)
output_path = _write_report(html_report, args.output)
print(output_path)
html_report = _render_html(summaries, initial_session_id)
if not args.no_open:
webbrowser.open(output_path.resolve().as_uri())
if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(html_report, encoding="utf-8")
print(args.output)
return 0
_run_server(html_report, sessions, args.port, args.no_open)
return 0