feat: hive debugger
This commit is contained in:
@@ -94,6 +94,11 @@ def main():
|
||||
|
||||
register_skill_commands(subparsers)
|
||||
|
||||
# Register debugger commands (debugger)
|
||||
from framework.debugger.cli import register_debugger_commands
|
||||
|
||||
register_debugger_commands(subparsers)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if hasattr(args, "func"):
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""CLI command for the LLM debug log viewer."""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_SCRIPT = (
|
||||
Path(__file__).resolve().parents[3] / "scripts" / "llm_debug_log_visualizer.py"
|
||||
)
|
||||
|
||||
|
||||
def register_debugger_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
"""Register the ``hive debugger`` command."""
|
||||
parser = subparsers.add_parser(
|
||||
"debugger",
|
||||
help="Open the LLM debug log viewer",
|
||||
description=(
|
||||
"Start a local server that lets you browse LLM debug sessions "
|
||||
"recorded in ~/.hive/llm_logs. Sessions are loaded on demand so "
|
||||
"the browser stays responsive."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--session",
|
||||
help="Execution ID to select initially.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Port for the local server (0 = auto-pick a free port).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--logs-dir",
|
||||
help="Directory containing JSONL log files (default: ~/.hive/llm_logs).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit-files",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Maximum number of newest log files to scan (default: 200).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
help="Write a static HTML file instead of starting a server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-open",
|
||||
action="store_true",
|
||||
help="Start the server but do not open a browser.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-tests",
|
||||
action="store_true",
|
||||
help="Show test/mock sessions (hidden by default).",
|
||||
)
|
||||
parser.set_defaults(func=cmd_debugger)
|
||||
|
||||
|
||||
def cmd_debugger(args: argparse.Namespace) -> int:
|
||||
"""Launch the LLM debug log visualizer."""
|
||||
cmd: list[str] = [sys.executable, str(_SCRIPT)]
|
||||
if args.session:
|
||||
cmd += ["--session", args.session]
|
||||
if args.port:
|
||||
cmd += ["--port", str(args.port)]
|
||||
if args.logs_dir:
|
||||
cmd += ["--logs-dir", args.logs_dir]
|
||||
if args.limit_files is not None:
|
||||
cmd += ["--limit-files", str(args.limit_files)]
|
||||
if args.output:
|
||||
cmd += ["--output", args.output]
|
||||
if args.no_open:
|
||||
cmd.append("--no-open")
|
||||
if args.include_tests:
|
||||
cmd.append("--include-tests")
|
||||
return subprocess.call(cmd)
|
||||
@@ -8,6 +8,7 @@ write. Errors are silently swallowed — this must never break the agent.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import IO, Any
|
||||
@@ -47,6 +48,11 @@ def log_llm_turn(
|
||||
Never raises.
|
||||
"""
|
||||
try:
|
||||
# Skip logging during test runs to avoid polluting real logs.
|
||||
if os.environ.get("PYTEST_CURRENT_TEST") or os.environ.get(
|
||||
"HIVE_DISABLE_LLM_LOGS"
|
||||
):
|
||||
return
|
||||
global _log_file, _log_ready # noqa: PLW0603
|
||||
if not _log_ready:
|
||||
_log_file = _open_log()
|
||||
|
||||
Generated
-8
@@ -60,7 +60,6 @@
|
||||
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.29.0",
|
||||
"@babel/generator": "^7.29.0",
|
||||
@@ -1557,7 +1556,6 @@
|
||||
"integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.18.0"
|
||||
}
|
||||
@@ -1573,7 +1571,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.28.tgz",
|
||||
"integrity": "sha512-z9VXpC7MWrhfWipitjNdgCauoMLRdIILQsAEV+ZesIzBq/oUlxk0m3ApZuMFCXdnS4U7KrI+l3WRUEGQ8K1QKw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/prop-types": "*",
|
||||
"csstype": "^3.2.2"
|
||||
@@ -1786,7 +1783,6 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"baseline-browser-mapping": "^2.9.0",
|
||||
"caniuse-lite": "^1.0.30001759",
|
||||
@@ -3564,7 +3560,6 @@
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -3616,7 +3611,6 @@
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
|
||||
"integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.1.0"
|
||||
},
|
||||
@@ -3629,7 +3623,6 @@
|
||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
|
||||
"integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.1.0",
|
||||
"scheduler": "^0.23.2"
|
||||
@@ -4190,7 +4183,6 @@
|
||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.4.4",
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Open a browser-based viewer for Hive LLM debug JSONL sessions.
|
||||
|
||||
Starts a local HTTP server and loads session data on demand (one at a time).
|
||||
|
||||
Usage:
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --no-open
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --session <execution_id>
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --port 8080
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --output debug.html
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import http.server
|
||||
import json
|
||||
import tempfile
|
||||
import urllib.parse
|
||||
import webbrowser
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
@@ -55,10 +59,21 @@ def _parse_args() -> argparse.Namespace:
|
||||
default=200,
|
||||
help="Maximum number of newest log files to scan.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Port for the local server (0 = auto-pick a free port).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-open",
|
||||
action="store_true",
|
||||
help="Generate the HTML but do not open a browser.",
|
||||
help="Start the server but do not open a browser.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-tests",
|
||||
action="store_true",
|
||||
help="Show test/mock sessions (hidden by default).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
@@ -117,8 +132,29 @@ def _format_timestamp(raw: str) -> str:
|
||||
return raw
|
||||
|
||||
|
||||
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
|
||||
"""Return True for sessions that look like test artifacts."""
|
||||
if execution_id.startswith("<MagicMock"):
|
||||
return True
|
||||
models = {
|
||||
str(r.get("token_counts", {}).get("model", ""))
|
||||
for r in records
|
||||
if isinstance(r.get("token_counts"), dict)
|
||||
}
|
||||
models.discard("")
|
||||
# Sessions that only used the mock LLM provider.
|
||||
if models and models <= {"mock"}:
|
||||
return True
|
||||
# Sessions with no real model at all (empty string or missing).
|
||||
if not models:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _group_sessions(
|
||||
records: list[dict[str, Any]],
|
||||
*,
|
||||
include_tests: bool = False,
|
||||
) -> tuple[list[SessionSummary], dict[str, list[dict[str, Any]]]]:
|
||||
by_session: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||
for record in records:
|
||||
@@ -126,6 +162,13 @@ def _group_sessions(
|
||||
if execution_id:
|
||||
by_session[execution_id].append(record)
|
||||
|
||||
if not include_tests:
|
||||
by_session = {
|
||||
eid: recs
|
||||
for eid, recs in by_session.items()
|
||||
if not _is_test_session(eid, recs)
|
||||
}
|
||||
|
||||
summaries: list[SessionSummary] = []
|
||||
for execution_id, session_records in by_session.items():
|
||||
session_records.sort(
|
||||
@@ -174,7 +217,6 @@ def _group_sessions(
|
||||
|
||||
def _render_html(
|
||||
summaries: list[SessionSummary],
|
||||
sessions: dict[str, list[dict[str, Any]]],
|
||||
initial_session_id: str,
|
||||
) -> str:
|
||||
summaries_data = [
|
||||
@@ -193,16 +235,6 @@ def _render_html(
|
||||
for summary in summaries
|
||||
]
|
||||
|
||||
sessions_data = {
|
||||
execution_id: sorted(
|
||||
records,
|
||||
key=lambda record: (
|
||||
str(record.get("timestamp", "")),
|
||||
record.get("iteration", 0),
|
||||
),
|
||||
)
|
||||
for execution_id, records in sessions.items()
|
||||
}
|
||||
initial = initial_session_id or (summaries[0].execution_id if summaries else "")
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
@@ -579,10 +611,9 @@ def _render_html(
|
||||
</div>
|
||||
|
||||
<script id="session-summaries" type="application/json">{json.dumps(summaries_data, ensure_ascii=False)}</script>
|
||||
<script id="session-records" type="application/json">{json.dumps(sessions_data, ensure_ascii=False)}</script>
|
||||
<script>
|
||||
const summaries = JSON.parse(document.getElementById("session-summaries").textContent);
|
||||
const recordsBySession = JSON.parse(document.getElementById("session-records").textContent);
|
||||
const recordCache = {{}};
|
||||
const initialSessionId = {json.dumps(initial, ensure_ascii=False)};
|
||||
|
||||
const sessionSearch = document.getElementById("sessionSearch");
|
||||
@@ -746,10 +777,18 @@ def _render_html(
|
||||
`;
|
||||
}}
|
||||
|
||||
function renderSession(sessionId) {{
|
||||
async function fetchSession(sessionId) {{
|
||||
if (recordCache[sessionId]) return recordCache[sessionId];
|
||||
const resp = await fetch(`/api/session/${{encodeURIComponent(sessionId)}}`);
|
||||
if (!resp.ok) return [];
|
||||
const data = await resp.json();
|
||||
recordCache[sessionId] = data;
|
||||
return data;
|
||||
}}
|
||||
|
||||
async function renderSession(sessionId) {{
|
||||
activeSessionId = sessionId;
|
||||
const summary = summaries.find((entry) => entry.execution_id === sessionId);
|
||||
const records = recordsBySession[sessionId] || [];
|
||||
|
||||
renderSessionChooser();
|
||||
|
||||
@@ -773,6 +812,9 @@ def _render_html(
|
||||
renderMetaCard("Source file", summary.log_file),
|
||||
].join("");
|
||||
|
||||
turnsEl.innerHTML = '<div class="empty">Loading session\u2026</div>';
|
||||
const records = await fetchSession(sessionId);
|
||||
if (activeSessionId !== sessionId) return;
|
||||
turnsEl.innerHTML = records.length
|
||||
? records.map((record) => renderTurn(record)).join("")
|
||||
: '<div class="empty">This session has no turn records.</div>';
|
||||
@@ -804,7 +846,8 @@ def _render_html(
|
||||
}});
|
||||
|
||||
const hashSession = decodeURIComponent(window.location.hash.replace(/^#/, ""));
|
||||
const bootSession = recordsBySession[hashSession] ? hashSession : activeSessionId;
|
||||
const knownIds = new Set(summaries.map((s) => s.execution_id));
|
||||
const bootSession = knownIds.has(hashSession) ? hashSession : activeSessionId;
|
||||
renderSessionChooser();
|
||||
renderSession(bootSession);
|
||||
</script>
|
||||
@@ -813,28 +856,70 @@ def _render_html(
|
||||
"""
|
||||
|
||||
|
||||
def _write_report(html_report: str, output: Path | None) -> Path:
|
||||
if output is not None:
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(html_report, encoding="utf-8")
|
||||
return output
|
||||
def _sort_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return sorted(
|
||||
records,
|
||||
key=lambda r: (str(r.get("timestamp", "")), r.get("iteration", 0)),
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
prefix="hive_llm_debug_",
|
||||
suffix=".html",
|
||||
delete=False,
|
||||
dir="/tmp",
|
||||
) as handle:
|
||||
handle.write(html_report)
|
||||
return Path(handle.name)
|
||||
|
||||
def _run_server(
|
||||
html: str,
|
||||
sessions: dict[str, list[dict[str, Any]]],
|
||||
port: int,
|
||||
no_open: bool,
|
||||
) -> None:
|
||||
html_bytes = html.encode("utf-8")
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self) -> None:
|
||||
if self.path == "/":
|
||||
self._respond(200, "text/html; charset=utf-8", html_bytes)
|
||||
elif self.path.startswith("/api/session/"):
|
||||
sid = urllib.parse.unquote(self.path[len("/api/session/"):])
|
||||
records = sessions.get(sid)
|
||||
if records is None:
|
||||
self._respond(404, "application/json", b"[]")
|
||||
else:
|
||||
body = json.dumps(
|
||||
_sort_records(records), ensure_ascii=False
|
||||
).encode("utf-8")
|
||||
self._respond(200, "application/json", body)
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def _respond(self, code: int, content_type: str, body: bytes) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def log_message(self, format: str, *args: object) -> None:
|
||||
pass # silence per-request logs
|
||||
|
||||
server = http.server.HTTPServer(("127.0.0.1", port), Handler)
|
||||
actual_port = server.server_address[1]
|
||||
url = f"http://127.0.0.1:{actual_port}"
|
||||
print(f"Serving at {url} (Ctrl+C to stop)")
|
||||
|
||||
if not no_open:
|
||||
webbrowser.open(url)
|
||||
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
finally:
|
||||
server.server_close()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = _parse_args()
|
||||
records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
|
||||
summaries, sessions = _group_sessions(records)
|
||||
summaries, sessions = _group_sessions(
|
||||
records, include_tests=args.include_tests
|
||||
)
|
||||
|
||||
initial_session_id = args.session or (
|
||||
summaries[0].execution_id if summaries else ""
|
||||
@@ -843,13 +928,15 @@ def main() -> int:
|
||||
print(f"session not found: {initial_session_id}")
|
||||
return 1
|
||||
|
||||
html_report = _render_html(summaries, sessions, initial_session_id)
|
||||
output_path = _write_report(html_report, args.output)
|
||||
print(output_path)
|
||||
html_report = _render_html(summaries, initial_session_id)
|
||||
|
||||
if not args.no_open:
|
||||
webbrowser.open(output_path.resolve().as_uri())
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(html_report, encoding="utf-8")
|
||||
print(args.output)
|
||||
return 0
|
||||
|
||||
_run_server(html_report, sessions, args.port, args.no_open)
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user