1081 lines
37 KiB
Python
1081 lines
37 KiB
Python
"""Tests for RuntimeLogger and RuntimeLogStore.
|
|
|
|
Tests incremental JSONL writes (L2/L3), crash resilience, and L1
|
|
summary aggregation at end_run().
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from framework.observability import clear_trace_context, set_trace_context
|
|
from framework.runtime.runtime_log_schemas import (
|
|
NodeDetail,
|
|
NodeStepLog,
|
|
RunSummaryLog,
|
|
ToolCallLog,
|
|
)
|
|
from framework.runtime.runtime_log_store import RuntimeLogStore
|
|
from framework.runtime.runtime_logger import RuntimeLogger
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SESSION_PREFIX = "session_20250101_000000"
|
|
|
|
|
|
def _sid(suffix: str) -> str:
|
|
"""Build a deterministic session ID for tests."""
|
|
return f"{_SESSION_PREFIX}_{suffix}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# RuntimeLogStore tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _force_session_run_ids(monkeypatch):
|
|
"""Use unified session_* IDs in tests to avoid deprecated run path warnings."""
|
|
|
|
original_start_run = RuntimeLogger.start_run
|
|
counter = 0
|
|
|
|
def _patched_start_run(self, goal_id: str = "", session_id: str = "") -> str:
|
|
nonlocal counter
|
|
if not session_id:
|
|
counter += 1
|
|
session_id = _sid(f"{counter:08x}")
|
|
return original_start_run(self, goal_id=goal_id, session_id=session_id)
|
|
|
|
monkeypatch.setattr(RuntimeLogger, "start_run", _patched_start_run)
|
|
|
|
|
|
class TestRuntimeLogStore:
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_run_dir_creates_directory(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
store.ensure_run_dir(_sid("test0001"))
|
|
assert (tmp_path / "logs" / "sessions" / _sid("test0001") / "logs").is_dir()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_append_and_load_details(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
store.ensure_run_dir(_sid("test0002"))
|
|
|
|
detail1 = NodeDetail(
|
|
node_id="node-1",
|
|
node_name="Search Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=2,
|
|
exit_status="success",
|
|
accept_count=1,
|
|
retry_count=1,
|
|
)
|
|
detail2 = NodeDetail(
|
|
node_id="node-2",
|
|
node_name="Process Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=1,
|
|
)
|
|
|
|
store.append_node_detail(_sid("test0002"), detail1)
|
|
store.append_node_detail(_sid("test0002"), detail2)
|
|
|
|
loaded = await store.load_details(_sid("test0002"))
|
|
assert loaded is not None
|
|
assert len(loaded.nodes) == 2
|
|
assert loaded.nodes[0].node_id == "node-1"
|
|
assert loaded.nodes[0].exit_status == "success"
|
|
assert loaded.nodes[1].node_type == "event_loop"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_append_and_load_tool_logs(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
store.ensure_run_dir(_sid("test0003"))
|
|
|
|
step = NodeStepLog(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="I will search for the data.",
|
|
tool_calls=[
|
|
ToolCallLog(
|
|
tool_use_id="tc_1",
|
|
tool_name="web_search",
|
|
tool_input={"query": "test"},
|
|
result="Found 3 results",
|
|
is_error=False,
|
|
)
|
|
],
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
latency_ms=1200,
|
|
verdict="CONTINUE",
|
|
)
|
|
|
|
store.append_step(_sid("test0003"), step)
|
|
|
|
loaded = await store.load_tool_logs(_sid("test0003"))
|
|
assert loaded is not None
|
|
assert len(loaded.steps) == 1
|
|
assert loaded.steps[0].tool_calls[0].tool_name == "web_search"
|
|
assert loaded.steps[0].input_tokens == 100
|
|
assert loaded.steps[0].node_id == "node-1"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_save_and_load_summary(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
summary = RunSummaryLog(
|
|
run_id=_sid("test0001"),
|
|
agent_id="agent-a",
|
|
goal_id="goal-1",
|
|
status="success",
|
|
total_nodes_executed=3,
|
|
node_path=["node-1", "node-2", "node-3"],
|
|
started_at="2025-01-01T00:00:00",
|
|
duration_ms=5000,
|
|
execution_quality="clean",
|
|
)
|
|
|
|
await store.save_summary(_sid("test0001"), summary)
|
|
|
|
loaded = await store.load_summary(_sid("test0001"))
|
|
assert loaded is not None
|
|
assert loaded.run_id == _sid("test0001")
|
|
assert loaded.status == "success"
|
|
assert loaded.total_nodes_executed == 3
|
|
assert loaded.goal_id == "goal-1"
|
|
assert loaded.execution_quality == "clean"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_load_missing_run_returns_none(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
assert await store.load_summary(_sid("missing00")) is None
|
|
assert await store.load_details(_sid("missing00")) is None
|
|
assert await store.load_tool_logs(_sid("missing00")) is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_runs_empty(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
runs = await store.list_runs()
|
|
assert runs == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_runs_with_filter(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
|
|
# Save a success run
|
|
store.ensure_run_dir(_sid("runok000"))
|
|
await store.save_summary(
|
|
_sid("runok000"),
|
|
RunSummaryLog(
|
|
run_id=_sid("runok000"),
|
|
status="success",
|
|
started_at="2025-01-01T00:00:01",
|
|
),
|
|
)
|
|
# Save a failure run
|
|
store.ensure_run_dir(_sid("runfail0"))
|
|
await store.save_summary(
|
|
_sid("runfail0"),
|
|
RunSummaryLog(
|
|
run_id=_sid("runfail0"),
|
|
status="failure",
|
|
needs_attention=True,
|
|
started_at="2025-01-01T00:00:02",
|
|
),
|
|
)
|
|
|
|
# All runs
|
|
all_runs = await store.list_runs()
|
|
assert len(all_runs) == 2
|
|
|
|
# Filter by status
|
|
success_runs = await store.list_runs(status="success")
|
|
assert len(success_runs) == 1
|
|
assert success_runs[0].run_id == _sid("runok000")
|
|
|
|
# Filter by needs_attention
|
|
attention_runs = await store.list_runs(status="needs_attention")
|
|
assert len(attention_runs) == 1
|
|
assert attention_runs[0].run_id == _sid("runfail0")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_runs_sorted_by_timestamp_desc(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
|
|
for i in range(5):
|
|
run_id = f"session_20250101_0000{i:02d}_run{i:04d}"
|
|
store.ensure_run_dir(run_id)
|
|
await store.save_summary(
|
|
run_id,
|
|
RunSummaryLog(
|
|
run_id=run_id,
|
|
status="success",
|
|
started_at=f"2025-01-01T00:00:{i:02d}",
|
|
),
|
|
)
|
|
|
|
runs = await store.list_runs()
|
|
# Most recent first
|
|
assert runs[0].run_id == "session_20250101_000004_run0004"
|
|
assert runs[-1].run_id == "session_20250101_000000_run0000"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_runs_limit(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
|
|
for i in range(10):
|
|
run_id = f"session_20250101_0000{i:02d}_run{i:04d}"
|
|
store.ensure_run_dir(run_id)
|
|
await store.save_summary(
|
|
run_id,
|
|
RunSummaryLog(
|
|
run_id=run_id,
|
|
status="success",
|
|
started_at=f"2025-01-01T00:00:{i:02d}",
|
|
),
|
|
)
|
|
|
|
runs = await store.list_runs(limit=3)
|
|
assert len(runs) == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_runs_includes_in_progress(self, tmp_path: Path):
|
|
"""Directories without summary.json appear as in_progress."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
|
|
# Completed run with summary
|
|
store.ensure_run_dir(_sid("rundone0"))
|
|
await store.save_summary(
|
|
_sid("rundone0"),
|
|
RunSummaryLog(
|
|
run_id=_sid("rundone0"),
|
|
status="success",
|
|
started_at="2025-01-01T00:00:01",
|
|
),
|
|
)
|
|
|
|
# In-progress run: directory exists but no summary.json
|
|
store.ensure_run_dir(_sid("runactiv0"))
|
|
|
|
all_runs = await store.list_runs()
|
|
assert len(all_runs) == 2
|
|
run_ids = {r.run_id for r in all_runs}
|
|
assert _sid("rundone0") in run_ids
|
|
assert _sid("runactiv0") in run_ids
|
|
|
|
active = next(r for r in all_runs if r.run_id == _sid("runactiv0"))
|
|
assert active.status == "in_progress"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_read_node_details_sync(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
store.ensure_run_dir(_sid("testsync0"))
|
|
|
|
store.append_node_detail(
|
|
_sid("testsync0"),
|
|
NodeDetail(
|
|
node_id="n1", node_name="A", success=True, input_tokens=100, output_tokens=50
|
|
),
|
|
)
|
|
store.append_node_detail(
|
|
_sid("testsync0"),
|
|
NodeDetail(node_id="n2", node_name="B", success=False, error="oops"),
|
|
)
|
|
|
|
details = store.read_node_details_sync(_sid("testsync0"))
|
|
assert len(details) == 2
|
|
assert details[0].node_id == "n1"
|
|
assert details[1].error == "oops"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_corrupt_jsonl_line_skipped(self, tmp_path: Path):
|
|
"""A corrupt JSONL line should be skipped without breaking reads."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
store.ensure_run_dir(_sid("corrupt00"))
|
|
|
|
# Write a valid line, a corrupt line, then another valid line
|
|
jsonl_path = tmp_path / "logs" / "sessions" / _sid("corrupt00") / "logs" / "details.jsonl"
|
|
valid1 = json.dumps(NodeDetail(node_id="n1", node_name="A", success=True).model_dump())
|
|
valid2 = json.dumps(NodeDetail(node_id="n2", node_name="B", success=True).model_dump())
|
|
jsonl_path.write_text(f"{valid1}\n{{corrupt line\n{valid2}\n")
|
|
|
|
details = store.read_node_details_sync(_sid("corrupt00"))
|
|
assert len(details) == 2
|
|
assert details[0].node_id == "n1"
|
|
assert details[1].node_id == "n2"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# RuntimeLogger tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRuntimeLogger:
|
|
@pytest.mark.asyncio
|
|
async def test_start_run_returns_run_id(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
assert run_id
|
|
assert run_id.startswith("session_")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_start_run_creates_directory(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
assert (tmp_path / "logs" / "sessions" / run_id / "logs").is_dir()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_step_writes_to_disk_immediately(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
|
|
rl.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="Searching.",
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
)
|
|
|
|
# Verify the file exists and has one line
|
|
jsonl_path = tmp_path / "logs" / "sessions" / run_id / "logs" / "tool_logs.jsonl"
|
|
assert jsonl_path.exists()
|
|
lines = [line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line]
|
|
assert len(lines) == 1
|
|
|
|
data = json.loads(lines[0])
|
|
assert data["node_id"] == "node-1"
|
|
assert data["input_tokens"] == 100
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_node_complete_writes_to_disk_immediately(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
|
|
rl.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
exit_status="success",
|
|
)
|
|
|
|
jsonl_path = tmp_path / "logs" / "sessions" / run_id / "logs" / "details.jsonl"
|
|
assert jsonl_path.exists()
|
|
lines = [line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line]
|
|
assert len(lines) == 1
|
|
|
|
data = json.loads(lines[0])
|
|
assert data["node_id"] == "node-1"
|
|
assert data["exit_status"] == "success"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_full_lifecycle(self, tmp_path: Path):
|
|
"""Test start_run -> log_step (x3) -> log_node_complete -> end_run."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Step 0: RETRY (event_loop iteration)
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
verdict="RETRY",
|
|
verdict_feedback="Missing output keys: ['result']",
|
|
tool_calls=[
|
|
{
|
|
"tool_use_id": "tc_1",
|
|
"tool_name": "web_search",
|
|
"tool_input": {"query": "test"},
|
|
"content": "Found data",
|
|
"is_error": False,
|
|
}
|
|
],
|
|
llm_text="Let me search for that.",
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
latency_ms=1000,
|
|
)
|
|
|
|
# Step 1: CONTINUE (unjudged)
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=1,
|
|
verdict="CONTINUE",
|
|
verdict_feedback="Unjudged",
|
|
tool_calls=[],
|
|
llm_text="Processing...",
|
|
input_tokens=80,
|
|
output_tokens=30,
|
|
latency_ms=500,
|
|
)
|
|
|
|
# Step 2: ACCEPT
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=2,
|
|
verdict="ACCEPT",
|
|
verdict_feedback="All outputs set",
|
|
tool_calls=[],
|
|
llm_text="Here is your result.",
|
|
input_tokens=90,
|
|
output_tokens=40,
|
|
latency_ms=800,
|
|
)
|
|
|
|
# Log node completion
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=3,
|
|
tokens_used=390,
|
|
input_tokens=270,
|
|
output_tokens=120,
|
|
latency_ms=2300,
|
|
exit_status="success",
|
|
accept_count=1,
|
|
retry_count=1,
|
|
continue_count=1,
|
|
)
|
|
|
|
await rt_logger.end_run(
|
|
status="success",
|
|
duration_ms=2300,
|
|
node_path=["node-1"],
|
|
execution_quality="clean",
|
|
)
|
|
|
|
# Verify Level 1: Summary
|
|
summary = await store.load_summary(run_id)
|
|
assert summary is not None
|
|
assert summary.status == "success"
|
|
assert summary.total_nodes_executed == 1
|
|
assert summary.total_input_tokens == 270
|
|
assert summary.total_output_tokens == 120
|
|
assert summary.needs_attention is False
|
|
assert summary.duration_ms == 2300
|
|
assert summary.execution_quality == "clean"
|
|
assert summary.node_path == ["node-1"]
|
|
|
|
# Verify Level 2: Details
|
|
details = await store.load_details(run_id)
|
|
assert details is not None
|
|
assert len(details.nodes) == 1
|
|
assert details.nodes[0].node_id == "node-1"
|
|
assert details.nodes[0].exit_status == "success"
|
|
assert details.nodes[0].accept_count == 1
|
|
assert details.nodes[0].retry_count == 1
|
|
|
|
# Verify Level 3: Tool logs
|
|
tool_logs = await store.load_tool_logs(run_id)
|
|
assert tool_logs is not None
|
|
assert len(tool_logs.steps) == 3
|
|
assert tool_logs.steps[0].tool_calls[0].tool_name == "web_search"
|
|
assert tool_logs.steps[0].input_tokens == 100
|
|
assert tool_logs.steps[0].verdict == "RETRY"
|
|
assert tool_logs.steps[2].verdict == "ACCEPT"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_trace_context_populated_in_l1_l2_l3(self, tmp_path: Path):
|
|
"""With trace context set, L3/L2/L1 entries include trace_id, span_id, execution_id."""
|
|
set_trace_context(
|
|
trace_id="a1b2c3d4e5f6789012345678abcdef01",
|
|
execution_id="b2c3d4e5f6789012345678abcdef0123",
|
|
)
|
|
try:
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
|
|
rl.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="Step.",
|
|
input_tokens=10,
|
|
output_tokens=5,
|
|
)
|
|
rl.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
exit_status="success",
|
|
)
|
|
await rl.end_run(
|
|
status="success",
|
|
duration_ms=100,
|
|
node_path=["node-1"],
|
|
execution_quality="clean",
|
|
)
|
|
|
|
# L3: tool_logs
|
|
tool_logs = await store.load_tool_logs(run_id)
|
|
assert tool_logs is not None
|
|
assert len(tool_logs.steps) == 1
|
|
step = tool_logs.steps[0]
|
|
assert step.trace_id == "a1b2c3d4e5f6789012345678abcdef01"
|
|
assert step.execution_id == "b2c3d4e5f6789012345678abcdef0123"
|
|
assert len(step.span_id) == 16
|
|
assert all(c in "0123456789abcdef" for c in step.span_id)
|
|
|
|
# L2: details
|
|
details = await store.load_details(run_id)
|
|
assert details is not None
|
|
assert len(details.nodes) == 1
|
|
nd = details.nodes[0]
|
|
assert nd.trace_id == "a1b2c3d4e5f6789012345678abcdef01"
|
|
assert len(nd.span_id) == 16
|
|
|
|
# L1: summary
|
|
summary = await store.load_summary(run_id)
|
|
assert summary is not None
|
|
assert summary.trace_id == "a1b2c3d4e5f6789012345678abcdef01"
|
|
assert summary.execution_id == "b2c3d4e5f6789012345678abcdef0123"
|
|
finally:
|
|
clear_trace_context()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_trace_context_empty_when_not_set(self, tmp_path: Path):
|
|
"""Without trace context, L3/L2/L1 trace_id and execution_id are empty."""
|
|
clear_trace_context()
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rl = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rl.start_run("goal-1")
|
|
|
|
rl.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="Step.",
|
|
input_tokens=10,
|
|
output_tokens=5,
|
|
)
|
|
rl.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
exit_status="success",
|
|
)
|
|
await rl.end_run(
|
|
status="success",
|
|
duration_ms=100,
|
|
node_path=["node-1"],
|
|
execution_quality="clean",
|
|
)
|
|
|
|
# L3: trace_id and execution_id from context should be empty
|
|
tool_logs = await store.load_tool_logs(run_id)
|
|
assert tool_logs is not None
|
|
assert len(tool_logs.steps) == 1
|
|
assert tool_logs.steps[0].trace_id == ""
|
|
assert tool_logs.steps[0].execution_id == ""
|
|
|
|
# L2
|
|
details = await store.load_details(run_id)
|
|
assert details is not None
|
|
assert details.nodes[0].trace_id == ""
|
|
|
|
# L1
|
|
summary = await store.load_summary(run_id)
|
|
assert summary is not None
|
|
assert summary.trace_id == ""
|
|
assert summary.execution_id == ""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_node_lifecycle(self, tmp_path: Path):
|
|
"""Test logging across multiple nodes in a graph run."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Node 1: event_loop
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
verdict="ACCEPT",
|
|
llm_text="Done.",
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
)
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=1,
|
|
tokens_used=150,
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
exit_status="success",
|
|
accept_count=1,
|
|
)
|
|
|
|
# Node 2: function
|
|
rt_logger.log_step(
|
|
node_id="node-2",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
latency_ms=50,
|
|
)
|
|
rt_logger.log_node_complete(
|
|
node_id="node-2",
|
|
node_name="Process",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=1,
|
|
latency_ms=50,
|
|
)
|
|
|
|
await rt_logger.end_run(
|
|
status="success",
|
|
duration_ms=1000,
|
|
node_path=["node-1", "node-2"],
|
|
execution_quality="clean",
|
|
)
|
|
|
|
summary = await store.load_summary(run_id)
|
|
assert summary.total_nodes_executed == 2
|
|
assert summary.node_path == ["node-1", "node-2"]
|
|
assert summary.total_input_tokens == 100
|
|
assert summary.total_output_tokens == 50
|
|
|
|
details = await store.load_details(run_id)
|
|
assert len(details.nodes) == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_failed_node_needs_attention(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
verdict="ESCALATE",
|
|
verdict_feedback="Cannot proceed, need human input",
|
|
tool_calls=[],
|
|
llm_text="I'm stuck.",
|
|
input_tokens=50,
|
|
output_tokens=20,
|
|
latency_ms=300,
|
|
)
|
|
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=False,
|
|
error="Judge escalated: Cannot proceed",
|
|
total_steps=1,
|
|
tokens_used=70,
|
|
latency_ms=300,
|
|
exit_status="escalated",
|
|
escalate_count=1,
|
|
)
|
|
|
|
await rt_logger.end_run(
|
|
status="failure",
|
|
duration_ms=300,
|
|
node_path=["node-1"],
|
|
execution_quality="failed",
|
|
)
|
|
|
|
summary = await store.load_summary(run_id)
|
|
assert summary is not None
|
|
assert summary.needs_attention is True
|
|
assert any(
|
|
"failed" in r.lower() or "escalat" in r.lower() for r in summary.attention_reasons
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_node_logged_no_op_if_already_logged(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Node logs itself
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
exit_status="success",
|
|
)
|
|
|
|
# Executor calls ensure_node_logged — should be no-op
|
|
rt_logger.ensure_node_logged(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
)
|
|
|
|
# Only one entry on disk
|
|
details = store.read_node_details_sync(run_id)
|
|
assert len(details) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_node_logged_creates_entry_if_missing(self, tmp_path: Path):
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Node didn't log itself — executor calls ensure
|
|
rt_logger.ensure_node_logged(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=False,
|
|
error="Crashed",
|
|
)
|
|
|
|
details = store.read_node_details_sync(run_id)
|
|
assert len(details) == 1
|
|
assert details[0].error == "Crashed"
|
|
assert details[0].needs_attention is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_large_data_preserved(self, tmp_path: Path):
|
|
"""Large tool input/result/llm_text values should be stored in full."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
long_value = "x" * 2000
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
verdict="ACCEPT",
|
|
tool_calls=[
|
|
{
|
|
"tool_use_id": "tc_1",
|
|
"tool_name": "write_file",
|
|
"tool_input": {"content": long_value},
|
|
"content": "y" * 5000,
|
|
"is_error": False,
|
|
}
|
|
],
|
|
llm_text="z" * 5000,
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
latency_ms=500,
|
|
)
|
|
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Writer",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=1,
|
|
exit_status="success",
|
|
)
|
|
|
|
await rt_logger.end_run(
|
|
status="success",
|
|
duration_ms=500,
|
|
node_path=["node-1"],
|
|
)
|
|
|
|
tool_logs = await store.load_tool_logs(run_id)
|
|
assert tool_logs is not None
|
|
tc = tool_logs.steps[0].tool_calls[0]
|
|
# Full values preserved
|
|
assert len(tc.tool_input["content"]) == 2000
|
|
assert len(tc.result) == 5000
|
|
assert len(tool_logs.steps[0].llm_text) == 5000
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_end_run_does_not_propagate_exceptions(self, tmp_path: Path):
|
|
"""end_run must catch all exceptions and never propagate."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
rt_logger.start_run("goal-1")
|
|
|
|
# Make the store path unwritable to force an error
|
|
import os
|
|
|
|
bad_path = tmp_path / "logs" / "sessions"
|
|
bad_path.mkdir(parents=True, exist_ok=True)
|
|
# Create a file where directory should be
|
|
run_dir = bad_path / rt_logger._run_id / "logs"
|
|
run_dir.mkdir(parents=True, exist_ok=True)
|
|
blocker = run_dir / "summary.json"
|
|
blocker.write_text("not json")
|
|
os.chmod(str(run_dir), 0o444)
|
|
|
|
try:
|
|
# This should NOT raise, even though writing will fail
|
|
await rt_logger.end_run("success", duration_ms=100)
|
|
finally:
|
|
# Restore permissions for cleanup
|
|
os.chmod(str(run_dir), 0o755)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_crash_resilience_l2_l3_survive(self, tmp_path: Path):
|
|
"""L2 and L3 data survives even if end_run() is never called (crash)."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log some steps and a node
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="Working...",
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
)
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=1,
|
|
llm_text="Still working...",
|
|
input_tokens=80,
|
|
output_tokens=30,
|
|
)
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Search",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=2,
|
|
input_tokens=180,
|
|
output_tokens=80,
|
|
)
|
|
|
|
# Simulate crash: do NOT call end_run()
|
|
|
|
# Verify L2 and L3 are recoverable from disk
|
|
details = await store.load_details(run_id)
|
|
assert details is not None
|
|
assert len(details.nodes) == 1
|
|
assert details.nodes[0].node_id == "node-1"
|
|
|
|
tool_logs = await store.load_tool_logs(run_id)
|
|
assert tool_logs is not None
|
|
assert len(tool_logs.steps) == 2
|
|
|
|
# But no L1 summary exists
|
|
summary = await store.load_summary(run_id)
|
|
assert summary is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_in_progress_run_visible_in_list(self, tmp_path: Path):
|
|
"""An in-progress run (no summary.json) appears in list_runs."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log a step but don't end
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
llm_text="Working...",
|
|
)
|
|
|
|
runs = await store.list_runs()
|
|
assert len(runs) == 1
|
|
assert runs[0].run_id == run_id
|
|
assert runs[0].status == "in_progress"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_step_with_error_and_stacktrace(self, tmp_path: Path):
|
|
"""Test logging partial steps with errors and stack traces."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log a partial step with error
|
|
rt_logger.log_step(
|
|
node_id="node-1",
|
|
node_type="event_loop",
|
|
step_index=0,
|
|
error="LLM call failed: Connection timeout",
|
|
stacktrace=(
|
|
"Traceback (most recent call last):\n"
|
|
" File test.py line 10\n"
|
|
" raise TimeoutError()"
|
|
),
|
|
is_partial=True,
|
|
)
|
|
|
|
# Verify the step was logged
|
|
loaded = await store.load_tool_logs(run_id)
|
|
assert loaded is not None
|
|
assert len(loaded.steps) == 1
|
|
step = loaded.steps[0]
|
|
assert step.error == "LLM call failed: Connection timeout"
|
|
assert "TimeoutError" in step.stacktrace
|
|
assert step.is_partial is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_node_complete_with_stacktrace(self, tmp_path: Path):
|
|
"""Test logging node completion with stack traces."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log node failure with stacktrace
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Test Node",
|
|
node_type="event_loop",
|
|
success=False,
|
|
error="Node crashed",
|
|
stacktrace=(
|
|
"Traceback (most recent call last):\n"
|
|
" File node.py line 42\n"
|
|
" raise RuntimeError('crash')"
|
|
),
|
|
)
|
|
|
|
# Verify the detail was logged with stacktrace
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
assert len(loaded.nodes) == 1
|
|
node = loaded.nodes[0]
|
|
assert node.error == "Node crashed"
|
|
assert "RuntimeError" in node.stacktrace
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attention_flags_excessive_retries(self, tmp_path: Path):
|
|
"""Test that excessive retries trigger attention flags."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log node with excessive retries
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Retry Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
retry_count=5, # > 3 threshold
|
|
)
|
|
|
|
# Verify attention flag is set
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
node = loaded.nodes[0]
|
|
assert node.needs_attention is True
|
|
assert any("Excessive retries" in reason for reason in node.attention_reasons)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attention_flags_high_latency(self, tmp_path: Path):
|
|
"""Test that high latency triggers attention flags."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log node with high latency
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Slow Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
latency_ms=65000, # > 60000 threshold
|
|
)
|
|
|
|
# Verify attention flag is set
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
node = loaded.nodes[0]
|
|
assert node.needs_attention is True
|
|
assert any("High latency" in reason for reason in node.attention_reasons)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attention_flags_high_token_usage(self, tmp_path: Path):
|
|
"""Test that high token usage triggers attention flags."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log node with high token usage
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Token Heavy Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
tokens_used=150000, # > 100000 threshold
|
|
)
|
|
|
|
# Verify attention flag is set
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
node = loaded.nodes[0]
|
|
assert node.needs_attention is True
|
|
assert any("High token usage" in reason for reason in node.attention_reasons)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attention_flags_many_iterations(self, tmp_path: Path):
|
|
"""Test that many iterations trigger attention flags."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log node with many iterations
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Iterative Node",
|
|
node_type="event_loop",
|
|
success=True,
|
|
total_steps=25, # > 20 threshold
|
|
)
|
|
|
|
# Verify attention flag is set
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
node = loaded.nodes[0]
|
|
assert node.needs_attention is True
|
|
assert any("Many iterations" in reason for reason in node.attention_reasons)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_guard_failure_exit_status(self, tmp_path: Path):
|
|
"""Test that guard failures use the correct exit status."""
|
|
store = RuntimeLogStore(tmp_path / "logs")
|
|
rt_logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
run_id = rt_logger.start_run("goal-1")
|
|
|
|
# Log a guard failure
|
|
rt_logger.log_node_complete(
|
|
node_id="node-1",
|
|
node_name="Guard Node",
|
|
node_type="event_loop",
|
|
success=False,
|
|
error="LLM provider not available",
|
|
exit_status="guard_failure",
|
|
)
|
|
|
|
# Verify exit status
|
|
loaded = await store.load_details(run_id)
|
|
assert loaded is not None
|
|
node = loaded.nodes[0]
|
|
assert node.exit_status == "guard_failure"
|
|
assert node.success is False
|