feat: llm logging

This commit is contained in:
Richard Tang
2026-05-04 17:57:20 -07:00
parent 07600c5ab5
commit 07c97e2e9b
3 changed files with 155 additions and 1099 deletions
+85 -23
View File
@@ -1,11 +1,22 @@
"""Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging. """Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging.
Each line is a JSON object with the full LLM turn: the request payload Two record kinds, distinguished by ``_kind``:
(system prompt + messages), assistant text, tool calls, tool results, and
token counts. The file is opened lazily on first call and flushed after every * ``session_header`` emitted on the first turn of an ``execution_id`` and
write. Errors are silently swallowed this must never break the agent. any time its ``system_prompt`` or ``tools`` change. Carries those large
fields once instead of per-turn.
* ``turn`` one per LLM call. Carries per-turn outputs plus a
content-addressed message delta: ``message_hashes`` is the full ordered
message sequence for this turn, ``new_messages`` is hash body for
messages we haven't emitted before for this ``execution_id``. The reader
reassembles full ``messages`` by accumulating ``new_messages`` across
prior turn records. Content-addressed (not positional) because the agent
prunes messages mid-session a tail-delta would be wrong.
Errors are silently swallowed this must never break the agent.
""" """
import hashlib
import json import json
import logging import logging
import os import os
@@ -28,6 +39,12 @@ def _llm_debug_dir() -> Path:
_log_file: IO[str] | None = None _log_file: IO[str] | None = None
_log_ready = False # lazy init guard _log_ready = False # lazy init guard
# Per-execution_id delta state. Reset implicitly on process restart — a fresh
# log file has no prior context, so re-emitting the header on first turn is
# correct.
_session_header_hash: dict[str, str] = {}
_session_seen_msgs: dict[str, set[str]] = {}
def _open_log() -> IO[str] | None: def _open_log() -> IO[str] | None:
"""Open the JSONL log file for this process.""" """Open the JSONL log file for this process."""
@@ -61,6 +78,17 @@ def _serialize_tools(tools: Any) -> list[dict[str, Any]]:
return out return out
def _content_hash(payload: Any) -> str:
raw = json.dumps(payload, default=str, sort_keys=True, ensure_ascii=False)
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
def _write_line(record: dict[str, Any]) -> None:
assert _log_file is not None
_log_file.write(json.dumps(record, default=str) + "\n")
_log_file.flush()
def log_llm_turn( def log_llm_turn(
*, *,
node_id: str, node_id: str,
@@ -75,7 +103,7 @@ def log_llm_turn(
token_counts: dict[str, Any], token_counts: dict[str, Any],
tools: list[Any] | None = None, tools: list[Any] | None = None,
) -> None: ) -> None:
"""Write one JSONL line capturing a complete LLM turn. """Write JSONL records capturing one LLM turn (header + turn delta).
Never raises. Never raises.
""" """
@@ -89,23 +117,57 @@ def log_llm_turn(
_log_ready = True _log_ready = True
if _log_file is None: if _log_file is None:
return return
record = {
# UTC + offset matches tool_call start_timestamp (agent_loop.py) # UTC + offset matches tool_call start_timestamp (agent_loop.py)
# so the viewer can render every event in one consistent local zone. # so the viewer can render every event in one consistent local zone.
"timestamp": datetime.now(UTC).isoformat(), timestamp = datetime.now(UTC).isoformat()
"node_id": node_id, serialized_tools = _serialize_tools(tools)
"stream_id": stream_id,
"execution_id": execution_id, # Re-emit the header on first turn or whenever system/tools change.
"iteration": iteration, # The Queen reflects different prompts across turns, so we can't
"system_prompt": system_prompt, # assume strict immutability per execution_id.
"tools": _serialize_tools(tools), header_hash = _content_hash({"system_prompt": system_prompt, "tools": serialized_tools})
"messages": messages, if _session_header_hash.get(execution_id) != header_hash:
"assistant_text": assistant_text, _write_line(
"tool_calls": tool_calls, {
"tool_results": tool_results, "_kind": "session_header",
"token_counts": token_counts, "timestamp": timestamp,
} "execution_id": execution_id,
_log_file.write(json.dumps(record, default=str) + "\n") "node_id": node_id,
_log_file.flush() "stream_id": stream_id,
"header_hash": header_hash,
"system_prompt": system_prompt,
"tools": serialized_tools,
}
)
_session_header_hash[execution_id] = header_hash
seen = _session_seen_msgs.setdefault(execution_id, set())
message_hashes: list[str] = []
new_messages: dict[str, dict[str, Any]] = {}
for msg in messages or []:
h = _content_hash(msg)
message_hashes.append(h)
if h not in seen:
seen.add(h)
new_messages[h] = msg
_write_line(
{
"_kind": "turn",
"timestamp": timestamp,
"execution_id": execution_id,
"node_id": node_id,
"stream_id": stream_id,
"iteration": iteration,
"header_hash": header_hash,
"message_hashes": message_hashes,
"new_messages": new_messages,
"assistant_text": assistant_text,
"tool_calls": tool_calls,
"tool_results": tool_results,
"token_counts": token_counts,
}
)
except Exception: except Exception:
pass # never break the agent pass # never break the agent
File diff suppressed because it is too large Load Diff
+70 -9
View File
@@ -64,6 +64,54 @@ def _format_timestamp(raw: str) -> str:
return raw return raw
def _reassemble_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Convert new-format (header + turn-delta) records into legacy-shape full turns.
Records lacking ``_kind`` are passed through unchanged. Inputs must be in
file order so headers precede the turns that reference them.
"""
headers: dict[str, dict[str, Any]] = {} # execution_id -> latest session_header
pools: dict[str, dict[str, dict[str, Any]]] = {} # execution_id -> hash -> message body
out: list[dict[str, Any]] = []
for rec in records:
kind = rec.get("_kind")
if kind == "session_header":
eid = str(rec.get("execution_id") or "")
headers[eid] = rec
pools.setdefault(eid, {})
continue
if kind == "turn":
eid = str(rec.get("execution_id") or "")
pool = pools.setdefault(eid, {})
new_msgs = rec.get("new_messages") or {}
if isinstance(new_msgs, dict):
pool.update(new_msgs)
hashes = rec.get("message_hashes") or []
messages = [pool[h] for h in hashes if h in pool]
header = headers.get(eid, {})
out.append(
{
"timestamp": rec.get("timestamp", ""),
"execution_id": eid,
"node_id": rec.get("node_id", ""),
"stream_id": rec.get("stream_id", ""),
"iteration": rec.get("iteration", 0),
"system_prompt": header.get("system_prompt", ""),
"tools": header.get("tools", []),
"messages": messages,
"assistant_text": rec.get("assistant_text", ""),
"tool_calls": rec.get("tool_calls", []),
"tool_results": rec.get("tool_results", []),
"token_counts": rec.get("token_counts", {}),
"_log_file": rec.get("_log_file", ""),
}
)
continue
out.append(rec)
return out
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool: def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
if execution_id.startswith("<MagicMock"): if execution_id.startswith("<MagicMock"):
return True return True
@@ -100,6 +148,9 @@ def _discover_session_summaries(logs_dir: Path, limit_files: int, include_tests:
payload = json.loads(line) payload = json.loads(line)
except json.JSONDecodeError: except json.JSONDecodeError:
continue continue
# session_header is metadata, not a turn — don't count it.
if payload.get("_kind") == "session_header":
continue
eid = str(payload.get("execution_id") or "").strip() eid = str(payload.get("execution_id") or "").strip()
if not eid: if not eid:
continue continue
@@ -157,6 +208,10 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
records: list[dict[str, Any]] = [] records: list[dict[str, Any]] = []
for path in files: for path in files:
# Reassemble per-file: each file is self-contained because the writer
# re-emits the session_header on every process start, so we never need
# cross-file state to fill in messages/system_prompt/tools.
file_records: list[dict[str, Any]] = []
try: try:
with path.open(encoding="utf-8") as handle: with path.open(encoding="utf-8") as handle:
for line_number, raw_line in enumerate(handle, start=1): for line_number, raw_line in enumerate(handle, start=1):
@@ -166,17 +221,23 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
try: try:
payload = json.loads(line) payload = json.loads(line)
except json.JSONDecodeError: except json.JSONDecodeError:
payload = { records.append(
"timestamp": "", {
"execution_id": "", "timestamp": "",
"_parse_error": f"{path.name}:{line_number}", "execution_id": "",
"_raw_line": line, "_parse_error": f"{path.name}:{line_number}",
} "_raw_line": line,
if str(payload.get("execution_id") or "").strip() == session_id: "_log_file": str(path),
payload["_log_file"] = str(path) }
records.append(payload) )
continue
if str(payload.get("execution_id") or "").strip() != session_id:
continue
payload["_log_file"] = str(path)
file_records.append(payload)
except OSError: except OSError:
continue continue
records.extend(_reassemble_records(file_records))
if not records: if not records:
return None return None