feat: llm logging
This commit is contained in:
@@ -1,11 +1,22 @@
|
||||
"""Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging.
|
||||
|
||||
Each line is a JSON object with the full LLM turn: the request payload
|
||||
(system prompt + messages), assistant text, tool calls, tool results, and
|
||||
token counts. The file is opened lazily on first call and flushed after every
|
||||
write. Errors are silently swallowed — this must never break the agent.
|
||||
Two record kinds, distinguished by ``_kind``:
|
||||
|
||||
* ``session_header`` — emitted on the first turn of an ``execution_id`` and
|
||||
any time its ``system_prompt`` or ``tools`` change. Carries those large
|
||||
fields once instead of per-turn.
|
||||
* ``turn`` — one per LLM call. Carries per-turn outputs plus a
|
||||
content-addressed message delta: ``message_hashes`` is the full ordered
|
||||
message sequence for this turn, ``new_messages`` is hash → body for
|
||||
messages we haven't emitted before for this ``execution_id``. The reader
|
||||
reassembles full ``messages`` by accumulating ``new_messages`` across
|
||||
prior turn records. Content-addressed (not positional) because the agent
|
||||
prunes messages mid-session — a tail-delta would be wrong.
|
||||
|
||||
Errors are silently swallowed — this must never break the agent.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -28,6 +39,12 @@ def _llm_debug_dir() -> Path:
|
||||
_log_file: IO[str] | None = None
|
||||
_log_ready = False # lazy init guard
|
||||
|
||||
# Per-execution_id delta state. Reset implicitly on process restart — a fresh
|
||||
# log file has no prior context, so re-emitting the header on first turn is
|
||||
# correct.
|
||||
_session_header_hash: dict[str, str] = {}
|
||||
_session_seen_msgs: dict[str, set[str]] = {}
|
||||
|
||||
|
||||
def _open_log() -> IO[str] | None:
|
||||
"""Open the JSONL log file for this process."""
|
||||
@@ -61,6 +78,17 @@ def _serialize_tools(tools: Any) -> list[dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def _content_hash(payload: Any) -> str:
|
||||
raw = json.dumps(payload, default=str, sort_keys=True, ensure_ascii=False)
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _write_line(record: dict[str, Any]) -> None:
|
||||
assert _log_file is not None
|
||||
_log_file.write(json.dumps(record, default=str) + "\n")
|
||||
_log_file.flush()
|
||||
|
||||
|
||||
def log_llm_turn(
|
||||
*,
|
||||
node_id: str,
|
||||
@@ -75,7 +103,7 @@ def log_llm_turn(
|
||||
token_counts: dict[str, Any],
|
||||
tools: list[Any] | None = None,
|
||||
) -> None:
|
||||
"""Write one JSONL line capturing a complete LLM turn.
|
||||
"""Write JSONL records capturing one LLM turn (header + turn delta).
|
||||
|
||||
Never raises.
|
||||
"""
|
||||
@@ -89,23 +117,57 @@ def log_llm_turn(
|
||||
_log_ready = True
|
||||
if _log_file is None:
|
||||
return
|
||||
record = {
|
||||
|
||||
# UTC + offset matches tool_call start_timestamp (agent_loop.py)
|
||||
# so the viewer can render every event in one consistent local zone.
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
timestamp = datetime.now(UTC).isoformat()
|
||||
serialized_tools = _serialize_tools(tools)
|
||||
|
||||
# Re-emit the header on first turn or whenever system/tools change.
|
||||
# The Queen reflects different prompts across turns, so we can't
|
||||
# assume strict immutability per execution_id.
|
||||
header_hash = _content_hash({"system_prompt": system_prompt, "tools": serialized_tools})
|
||||
if _session_header_hash.get(execution_id) != header_hash:
|
||||
_write_line(
|
||||
{
|
||||
"_kind": "session_header",
|
||||
"timestamp": timestamp,
|
||||
"execution_id": execution_id,
|
||||
"node_id": node_id,
|
||||
"stream_id": stream_id,
|
||||
"execution_id": execution_id,
|
||||
"iteration": iteration,
|
||||
"header_hash": header_hash,
|
||||
"system_prompt": system_prompt,
|
||||
"tools": _serialize_tools(tools),
|
||||
"messages": messages,
|
||||
"tools": serialized_tools,
|
||||
}
|
||||
)
|
||||
_session_header_hash[execution_id] = header_hash
|
||||
|
||||
seen = _session_seen_msgs.setdefault(execution_id, set())
|
||||
message_hashes: list[str] = []
|
||||
new_messages: dict[str, dict[str, Any]] = {}
|
||||
for msg in messages or []:
|
||||
h = _content_hash(msg)
|
||||
message_hashes.append(h)
|
||||
if h not in seen:
|
||||
seen.add(h)
|
||||
new_messages[h] = msg
|
||||
|
||||
_write_line(
|
||||
{
|
||||
"_kind": "turn",
|
||||
"timestamp": timestamp,
|
||||
"execution_id": execution_id,
|
||||
"node_id": node_id,
|
||||
"stream_id": stream_id,
|
||||
"iteration": iteration,
|
||||
"header_hash": header_hash,
|
||||
"message_hashes": message_hashes,
|
||||
"new_messages": new_messages,
|
||||
"assistant_text": assistant_text,
|
||||
"tool_calls": tool_calls,
|
||||
"tool_results": tool_results,
|
||||
"token_counts": token_counts,
|
||||
}
|
||||
_log_file.write(json.dumps(record, default=str) + "\n")
|
||||
_log_file.flush()
|
||||
)
|
||||
except Exception:
|
||||
pass # never break the agent
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -64,6 +64,54 @@ def _format_timestamp(raw: str) -> str:
|
||||
return raw
|
||||
|
||||
|
||||
def _reassemble_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Convert new-format (header + turn-delta) records into legacy-shape full turns.
|
||||
|
||||
Records lacking ``_kind`` are passed through unchanged. Inputs must be in
|
||||
file order so headers precede the turns that reference them.
|
||||
"""
|
||||
headers: dict[str, dict[str, Any]] = {} # execution_id -> latest session_header
|
||||
pools: dict[str, dict[str, dict[str, Any]]] = {} # execution_id -> hash -> message body
|
||||
|
||||
out: list[dict[str, Any]] = []
|
||||
for rec in records:
|
||||
kind = rec.get("_kind")
|
||||
if kind == "session_header":
|
||||
eid = str(rec.get("execution_id") or "")
|
||||
headers[eid] = rec
|
||||
pools.setdefault(eid, {})
|
||||
continue
|
||||
if kind == "turn":
|
||||
eid = str(rec.get("execution_id") or "")
|
||||
pool = pools.setdefault(eid, {})
|
||||
new_msgs = rec.get("new_messages") or {}
|
||||
if isinstance(new_msgs, dict):
|
||||
pool.update(new_msgs)
|
||||
hashes = rec.get("message_hashes") or []
|
||||
messages = [pool[h] for h in hashes if h in pool]
|
||||
header = headers.get(eid, {})
|
||||
out.append(
|
||||
{
|
||||
"timestamp": rec.get("timestamp", ""),
|
||||
"execution_id": eid,
|
||||
"node_id": rec.get("node_id", ""),
|
||||
"stream_id": rec.get("stream_id", ""),
|
||||
"iteration": rec.get("iteration", 0),
|
||||
"system_prompt": header.get("system_prompt", ""),
|
||||
"tools": header.get("tools", []),
|
||||
"messages": messages,
|
||||
"assistant_text": rec.get("assistant_text", ""),
|
||||
"tool_calls": rec.get("tool_calls", []),
|
||||
"tool_results": rec.get("tool_results", []),
|
||||
"token_counts": rec.get("token_counts", {}),
|
||||
"_log_file": rec.get("_log_file", ""),
|
||||
}
|
||||
)
|
||||
continue
|
||||
out.append(rec)
|
||||
return out
|
||||
|
||||
|
||||
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
|
||||
if execution_id.startswith("<MagicMock"):
|
||||
return True
|
||||
@@ -100,6 +148,9 @@ def _discover_session_summaries(logs_dir: Path, limit_files: int, include_tests:
|
||||
payload = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
# session_header is metadata, not a turn — don't count it.
|
||||
if payload.get("_kind") == "session_header":
|
||||
continue
|
||||
eid = str(payload.get("execution_id") or "").strip()
|
||||
if not eid:
|
||||
continue
|
||||
@@ -157,6 +208,10 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
|
||||
|
||||
records: list[dict[str, Any]] = []
|
||||
for path in files:
|
||||
# Reassemble per-file: each file is self-contained because the writer
|
||||
# re-emits the session_header on every process start, so we never need
|
||||
# cross-file state to fill in messages/system_prompt/tools.
|
||||
file_records: list[dict[str, Any]] = []
|
||||
try:
|
||||
with path.open(encoding="utf-8") as handle:
|
||||
for line_number, raw_line in enumerate(handle, start=1):
|
||||
@@ -166,17 +221,23 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
|
||||
try:
|
||||
payload = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
payload = {
|
||||
records.append(
|
||||
{
|
||||
"timestamp": "",
|
||||
"execution_id": "",
|
||||
"_parse_error": f"{path.name}:{line_number}",
|
||||
"_raw_line": line,
|
||||
"_log_file": str(path),
|
||||
}
|
||||
if str(payload.get("execution_id") or "").strip() == session_id:
|
||||
)
|
||||
continue
|
||||
if str(payload.get("execution_id") or "").strip() != session_id:
|
||||
continue
|
||||
payload["_log_file"] = str(path)
|
||||
records.append(payload)
|
||||
file_records.append(payload)
|
||||
except OSError:
|
||||
continue
|
||||
records.extend(_reassemble_records(file_records))
|
||||
|
||||
if not records:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user