feat: llm logging

This commit is contained in:
Richard Tang
2026-05-04 17:57:20 -07:00
parent 07600c5ab5
commit 07c97e2e9b
3 changed files with 155 additions and 1099 deletions
+75 -13
View File
@@ -1,11 +1,22 @@
"""Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging.
Each line is a JSON object with the full LLM turn: the request payload
(system prompt + messages), assistant text, tool calls, tool results, and
token counts. The file is opened lazily on first call and flushed after every
write. Errors are silently swallowed this must never break the agent.
Two record kinds, distinguished by ``_kind``:
* ``session_header`` emitted on the first turn of an ``execution_id`` and
any time its ``system_prompt`` or ``tools`` change. Carries those large
fields once instead of per-turn.
* ``turn`` one per LLM call. Carries per-turn outputs plus a
content-addressed message delta: ``message_hashes`` is the full ordered
message sequence for this turn, ``new_messages`` is hash body for
messages we haven't emitted before for this ``execution_id``. The reader
reassembles full ``messages`` by accumulating ``new_messages`` across
prior turn records. Content-addressed (not positional) because the agent
prunes messages mid-session a tail-delta would be wrong.
Errors are silently swallowed this must never break the agent.
"""
import hashlib
import json
import logging
import os
@@ -28,6 +39,12 @@ def _llm_debug_dir() -> Path:
_log_file: IO[str] | None = None
_log_ready = False # lazy init guard
# Per-execution_id delta state. Reset implicitly on process restart — a fresh
# log file has no prior context, so re-emitting the header on first turn is
# correct.
_session_header_hash: dict[str, str] = {}
_session_seen_msgs: dict[str, set[str]] = {}
def _open_log() -> IO[str] | None:
"""Open the JSONL log file for this process."""
@@ -61,6 +78,17 @@ def _serialize_tools(tools: Any) -> list[dict[str, Any]]:
return out
def _content_hash(payload: Any) -> str:
raw = json.dumps(payload, default=str, sort_keys=True, ensure_ascii=False)
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
def _write_line(record: dict[str, Any]) -> None:
assert _log_file is not None
_log_file.write(json.dumps(record, default=str) + "\n")
_log_file.flush()
def log_llm_turn(
*,
node_id: str,
@@ -75,7 +103,7 @@ def log_llm_turn(
token_counts: dict[str, Any],
tools: list[Any] | None = None,
) -> None:
"""Write one JSONL line capturing a complete LLM turn.
"""Write JSONL records capturing one LLM turn (header + turn delta).
Never raises.
"""
@@ -89,23 +117,57 @@ def log_llm_turn(
_log_ready = True
if _log_file is None:
return
record = {
# UTC + offset matches tool_call start_timestamp (agent_loop.py)
# so the viewer can render every event in one consistent local zone.
"timestamp": datetime.now(UTC).isoformat(),
timestamp = datetime.now(UTC).isoformat()
serialized_tools = _serialize_tools(tools)
# Re-emit the header on first turn or whenever system/tools change.
# The Queen reflects different prompts across turns, so we can't
# assume strict immutability per execution_id.
header_hash = _content_hash({"system_prompt": system_prompt, "tools": serialized_tools})
if _session_header_hash.get(execution_id) != header_hash:
_write_line(
{
"_kind": "session_header",
"timestamp": timestamp,
"execution_id": execution_id,
"node_id": node_id,
"stream_id": stream_id,
"execution_id": execution_id,
"iteration": iteration,
"header_hash": header_hash,
"system_prompt": system_prompt,
"tools": _serialize_tools(tools),
"messages": messages,
"tools": serialized_tools,
}
)
_session_header_hash[execution_id] = header_hash
seen = _session_seen_msgs.setdefault(execution_id, set())
message_hashes: list[str] = []
new_messages: dict[str, dict[str, Any]] = {}
for msg in messages or []:
h = _content_hash(msg)
message_hashes.append(h)
if h not in seen:
seen.add(h)
new_messages[h] = msg
_write_line(
{
"_kind": "turn",
"timestamp": timestamp,
"execution_id": execution_id,
"node_id": node_id,
"stream_id": stream_id,
"iteration": iteration,
"header_hash": header_hash,
"message_hashes": message_hashes,
"new_messages": new_messages,
"assistant_text": assistant_text,
"tool_calls": tool_calls,
"tool_results": tool_results,
"token_counts": token_counts,
}
_log_file.write(json.dumps(record, default=str) + "\n")
_log_file.flush()
)
except Exception:
pass # never break the agent
File diff suppressed because it is too large Load Diff
+64 -3
View File
@@ -64,6 +64,54 @@ def _format_timestamp(raw: str) -> str:
return raw
def _reassemble_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Convert new-format (header + turn-delta) records into legacy-shape full turns.
Records lacking ``_kind`` are passed through unchanged. Inputs must be in
file order so headers precede the turns that reference them.
"""
headers: dict[str, dict[str, Any]] = {} # execution_id -> latest session_header
pools: dict[str, dict[str, dict[str, Any]]] = {} # execution_id -> hash -> message body
out: list[dict[str, Any]] = []
for rec in records:
kind = rec.get("_kind")
if kind == "session_header":
eid = str(rec.get("execution_id") or "")
headers[eid] = rec
pools.setdefault(eid, {})
continue
if kind == "turn":
eid = str(rec.get("execution_id") or "")
pool = pools.setdefault(eid, {})
new_msgs = rec.get("new_messages") or {}
if isinstance(new_msgs, dict):
pool.update(new_msgs)
hashes = rec.get("message_hashes") or []
messages = [pool[h] for h in hashes if h in pool]
header = headers.get(eid, {})
out.append(
{
"timestamp": rec.get("timestamp", ""),
"execution_id": eid,
"node_id": rec.get("node_id", ""),
"stream_id": rec.get("stream_id", ""),
"iteration": rec.get("iteration", 0),
"system_prompt": header.get("system_prompt", ""),
"tools": header.get("tools", []),
"messages": messages,
"assistant_text": rec.get("assistant_text", ""),
"tool_calls": rec.get("tool_calls", []),
"tool_results": rec.get("tool_results", []),
"token_counts": rec.get("token_counts", {}),
"_log_file": rec.get("_log_file", ""),
}
)
continue
out.append(rec)
return out
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
if execution_id.startswith("<MagicMock"):
return True
@@ -100,6 +148,9 @@ def _discover_session_summaries(logs_dir: Path, limit_files: int, include_tests:
payload = json.loads(line)
except json.JSONDecodeError:
continue
# session_header is metadata, not a turn — don't count it.
if payload.get("_kind") == "session_header":
continue
eid = str(payload.get("execution_id") or "").strip()
if not eid:
continue
@@ -157,6 +208,10 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
records: list[dict[str, Any]] = []
for path in files:
# Reassemble per-file: each file is self-contained because the writer
# re-emits the session_header on every process start, so we never need
# cross-file state to fill in messages/system_prompt/tools.
file_records: list[dict[str, Any]] = []
try:
with path.open(encoding="utf-8") as handle:
for line_number, raw_line in enumerate(handle, start=1):
@@ -166,17 +221,23 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
try:
payload = json.loads(line)
except json.JSONDecodeError:
payload = {
records.append(
{
"timestamp": "",
"execution_id": "",
"_parse_error": f"{path.name}:{line_number}",
"_raw_line": line,
"_log_file": str(path),
}
if str(payload.get("execution_id") or "").strip() == session_id:
)
continue
if str(payload.get("execution_id") or "").strip() != session_id:
continue
payload["_log_file"] = str(path)
records.append(payload)
file_records.append(payload)
except OSError:
continue
records.extend(_reassemble_records(file_records))
if not records:
return None