feat: llm logging
This commit is contained in:
@@ -1,11 +1,22 @@
|
|||||||
"""Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging.
|
"""Write every LLM turn to ~/.hive/llm_logs/<ts>.jsonl for replay/debugging.
|
||||||
|
|
||||||
Each line is a JSON object with the full LLM turn: the request payload
|
Two record kinds, distinguished by ``_kind``:
|
||||||
(system prompt + messages), assistant text, tool calls, tool results, and
|
|
||||||
token counts. The file is opened lazily on first call and flushed after every
|
* ``session_header`` — emitted on the first turn of an ``execution_id`` and
|
||||||
write. Errors are silently swallowed — this must never break the agent.
|
any time its ``system_prompt`` or ``tools`` change. Carries those large
|
||||||
|
fields once instead of per-turn.
|
||||||
|
* ``turn`` — one per LLM call. Carries per-turn outputs plus a
|
||||||
|
content-addressed message delta: ``message_hashes`` is the full ordered
|
||||||
|
message sequence for this turn, ``new_messages`` is hash → body for
|
||||||
|
messages we haven't emitted before for this ``execution_id``. The reader
|
||||||
|
reassembles full ``messages`` by accumulating ``new_messages`` across
|
||||||
|
prior turn records. Content-addressed (not positional) because the agent
|
||||||
|
prunes messages mid-session — a tail-delta would be wrong.
|
||||||
|
|
||||||
|
Errors are silently swallowed — this must never break the agent.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -28,6 +39,12 @@ def _llm_debug_dir() -> Path:
|
|||||||
_log_file: IO[str] | None = None
|
_log_file: IO[str] | None = None
|
||||||
_log_ready = False # lazy init guard
|
_log_ready = False # lazy init guard
|
||||||
|
|
||||||
|
# Per-execution_id delta state. Reset implicitly on process restart — a fresh
|
||||||
|
# log file has no prior context, so re-emitting the header on first turn is
|
||||||
|
# correct.
|
||||||
|
_session_header_hash: dict[str, str] = {}
|
||||||
|
_session_seen_msgs: dict[str, set[str]] = {}
|
||||||
|
|
||||||
|
|
||||||
def _open_log() -> IO[str] | None:
|
def _open_log() -> IO[str] | None:
|
||||||
"""Open the JSONL log file for this process."""
|
"""Open the JSONL log file for this process."""
|
||||||
@@ -61,6 +78,17 @@ def _serialize_tools(tools: Any) -> list[dict[str, Any]]:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _content_hash(payload: Any) -> str:
|
||||||
|
raw = json.dumps(payload, default=str, sort_keys=True, ensure_ascii=False)
|
||||||
|
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _write_line(record: dict[str, Any]) -> None:
|
||||||
|
assert _log_file is not None
|
||||||
|
_log_file.write(json.dumps(record, default=str) + "\n")
|
||||||
|
_log_file.flush()
|
||||||
|
|
||||||
|
|
||||||
def log_llm_turn(
|
def log_llm_turn(
|
||||||
*,
|
*,
|
||||||
node_id: str,
|
node_id: str,
|
||||||
@@ -75,7 +103,7 @@ def log_llm_turn(
|
|||||||
token_counts: dict[str, Any],
|
token_counts: dict[str, Any],
|
||||||
tools: list[Any] | None = None,
|
tools: list[Any] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Write one JSONL line capturing a complete LLM turn.
|
"""Write JSONL records capturing one LLM turn (header + turn delta).
|
||||||
|
|
||||||
Never raises.
|
Never raises.
|
||||||
"""
|
"""
|
||||||
@@ -89,23 +117,57 @@ def log_llm_turn(
|
|||||||
_log_ready = True
|
_log_ready = True
|
||||||
if _log_file is None:
|
if _log_file is None:
|
||||||
return
|
return
|
||||||
record = {
|
|
||||||
# UTC + offset matches tool_call start_timestamp (agent_loop.py)
|
# UTC + offset matches tool_call start_timestamp (agent_loop.py)
|
||||||
# so the viewer can render every event in one consistent local zone.
|
# so the viewer can render every event in one consistent local zone.
|
||||||
"timestamp": datetime.now(UTC).isoformat(),
|
timestamp = datetime.now(UTC).isoformat()
|
||||||
|
serialized_tools = _serialize_tools(tools)
|
||||||
|
|
||||||
|
# Re-emit the header on first turn or whenever system/tools change.
|
||||||
|
# The Queen reflects different prompts across turns, so we can't
|
||||||
|
# assume strict immutability per execution_id.
|
||||||
|
header_hash = _content_hash({"system_prompt": system_prompt, "tools": serialized_tools})
|
||||||
|
if _session_header_hash.get(execution_id) != header_hash:
|
||||||
|
_write_line(
|
||||||
|
{
|
||||||
|
"_kind": "session_header",
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"execution_id": execution_id,
|
||||||
"node_id": node_id,
|
"node_id": node_id,
|
||||||
"stream_id": stream_id,
|
"stream_id": stream_id,
|
||||||
"execution_id": execution_id,
|
"header_hash": header_hash,
|
||||||
"iteration": iteration,
|
|
||||||
"system_prompt": system_prompt,
|
"system_prompt": system_prompt,
|
||||||
"tools": _serialize_tools(tools),
|
"tools": serialized_tools,
|
||||||
"messages": messages,
|
}
|
||||||
|
)
|
||||||
|
_session_header_hash[execution_id] = header_hash
|
||||||
|
|
||||||
|
seen = _session_seen_msgs.setdefault(execution_id, set())
|
||||||
|
message_hashes: list[str] = []
|
||||||
|
new_messages: dict[str, dict[str, Any]] = {}
|
||||||
|
for msg in messages or []:
|
||||||
|
h = _content_hash(msg)
|
||||||
|
message_hashes.append(h)
|
||||||
|
if h not in seen:
|
||||||
|
seen.add(h)
|
||||||
|
new_messages[h] = msg
|
||||||
|
|
||||||
|
_write_line(
|
||||||
|
{
|
||||||
|
"_kind": "turn",
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"execution_id": execution_id,
|
||||||
|
"node_id": node_id,
|
||||||
|
"stream_id": stream_id,
|
||||||
|
"iteration": iteration,
|
||||||
|
"header_hash": header_hash,
|
||||||
|
"message_hashes": message_hashes,
|
||||||
|
"new_messages": new_messages,
|
||||||
"assistant_text": assistant_text,
|
"assistant_text": assistant_text,
|
||||||
"tool_calls": tool_calls,
|
"tool_calls": tool_calls,
|
||||||
"tool_results": tool_results,
|
"tool_results": tool_results,
|
||||||
"token_counts": token_counts,
|
"token_counts": token_counts,
|
||||||
}
|
}
|
||||||
_log_file.write(json.dumps(record, default=str) + "\n")
|
)
|
||||||
_log_file.flush()
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # never break the agent
|
pass # never break the agent
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -64,6 +64,54 @@ def _format_timestamp(raw: str) -> str:
|
|||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def _reassemble_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
"""Convert new-format (header + turn-delta) records into legacy-shape full turns.
|
||||||
|
|
||||||
|
Records lacking ``_kind`` are passed through unchanged. Inputs must be in
|
||||||
|
file order so headers precede the turns that reference them.
|
||||||
|
"""
|
||||||
|
headers: dict[str, dict[str, Any]] = {} # execution_id -> latest session_header
|
||||||
|
pools: dict[str, dict[str, dict[str, Any]]] = {} # execution_id -> hash -> message body
|
||||||
|
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for rec in records:
|
||||||
|
kind = rec.get("_kind")
|
||||||
|
if kind == "session_header":
|
||||||
|
eid = str(rec.get("execution_id") or "")
|
||||||
|
headers[eid] = rec
|
||||||
|
pools.setdefault(eid, {})
|
||||||
|
continue
|
||||||
|
if kind == "turn":
|
||||||
|
eid = str(rec.get("execution_id") or "")
|
||||||
|
pool = pools.setdefault(eid, {})
|
||||||
|
new_msgs = rec.get("new_messages") or {}
|
||||||
|
if isinstance(new_msgs, dict):
|
||||||
|
pool.update(new_msgs)
|
||||||
|
hashes = rec.get("message_hashes") or []
|
||||||
|
messages = [pool[h] for h in hashes if h in pool]
|
||||||
|
header = headers.get(eid, {})
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"timestamp": rec.get("timestamp", ""),
|
||||||
|
"execution_id": eid,
|
||||||
|
"node_id": rec.get("node_id", ""),
|
||||||
|
"stream_id": rec.get("stream_id", ""),
|
||||||
|
"iteration": rec.get("iteration", 0),
|
||||||
|
"system_prompt": header.get("system_prompt", ""),
|
||||||
|
"tools": header.get("tools", []),
|
||||||
|
"messages": messages,
|
||||||
|
"assistant_text": rec.get("assistant_text", ""),
|
||||||
|
"tool_calls": rec.get("tool_calls", []),
|
||||||
|
"tool_results": rec.get("tool_results", []),
|
||||||
|
"token_counts": rec.get("token_counts", {}),
|
||||||
|
"_log_file": rec.get("_log_file", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
out.append(rec)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
|
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
|
||||||
if execution_id.startswith("<MagicMock"):
|
if execution_id.startswith("<MagicMock"):
|
||||||
return True
|
return True
|
||||||
@@ -100,6 +148,9 @@ def _discover_session_summaries(logs_dir: Path, limit_files: int, include_tests:
|
|||||||
payload = json.loads(line)
|
payload = json.loads(line)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
continue
|
continue
|
||||||
|
# session_header is metadata, not a turn — don't count it.
|
||||||
|
if payload.get("_kind") == "session_header":
|
||||||
|
continue
|
||||||
eid = str(payload.get("execution_id") or "").strip()
|
eid = str(payload.get("execution_id") or "").strip()
|
||||||
if not eid:
|
if not eid:
|
||||||
continue
|
continue
|
||||||
@@ -157,6 +208,10 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
|
|||||||
|
|
||||||
records: list[dict[str, Any]] = []
|
records: list[dict[str, Any]] = []
|
||||||
for path in files:
|
for path in files:
|
||||||
|
# Reassemble per-file: each file is self-contained because the writer
|
||||||
|
# re-emits the session_header on every process start, so we never need
|
||||||
|
# cross-file state to fill in messages/system_prompt/tools.
|
||||||
|
file_records: list[dict[str, Any]] = []
|
||||||
try:
|
try:
|
||||||
with path.open(encoding="utf-8") as handle:
|
with path.open(encoding="utf-8") as handle:
|
||||||
for line_number, raw_line in enumerate(handle, start=1):
|
for line_number, raw_line in enumerate(handle, start=1):
|
||||||
@@ -166,17 +221,23 @@ def _load_session_data(logs_dir: Path, session_id: str, limit_files: int) -> lis
|
|||||||
try:
|
try:
|
||||||
payload = json.loads(line)
|
payload = json.loads(line)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
payload = {
|
records.append(
|
||||||
|
{
|
||||||
"timestamp": "",
|
"timestamp": "",
|
||||||
"execution_id": "",
|
"execution_id": "",
|
||||||
"_parse_error": f"{path.name}:{line_number}",
|
"_parse_error": f"{path.name}:{line_number}",
|
||||||
"_raw_line": line,
|
"_raw_line": line,
|
||||||
|
"_log_file": str(path),
|
||||||
}
|
}
|
||||||
if str(payload.get("execution_id") or "").strip() == session_id:
|
)
|
||||||
|
continue
|
||||||
|
if str(payload.get("execution_id") or "").strip() != session_id:
|
||||||
|
continue
|
||||||
payload["_log_file"] = str(path)
|
payload["_log_file"] = str(path)
|
||||||
records.append(payload)
|
file_records.append(payload)
|
||||||
except OSError:
|
except OSError:
|
||||||
continue
|
continue
|
||||||
|
records.extend(_reassemble_records(file_records))
|
||||||
|
|
||||||
if not records:
|
if not records:
|
||||||
return None
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user