Files
hive/core/framework/agent_loop/conversation.py
T
Hundao 589c5b06fe fix: resolve all ruff lint and format errors across codebase (#7058)
- Auto-fixed 70 lint errors (import sorting, aliased errors, datetime.UTC)
- Fixed 85 remaining errors manually:
  - E501: wrapped long lines in queen_profiles, catalog, routes_credentials
  - F821: added missing TYPE_CHECKING imports for AgentHost, ToolRegistry,
    HookContext, HookResult; added runtime imports where needed
  - F811: removed duplicate method definitions in queen_lifecycle_tools
  - F841/B007: removed unused variables in discovery.py
  - W291: removed trailing whitespace in queen nodes
  - E402: moved import to top of queen_memory_v2.py
  - Fixed AgentRuntime -> AgentHost in example template type annotations
- Reformatted 343 files with ruff format
2026-04-16 19:30:01 +08:00

1465 lines
58 KiB
Python

"""NodeConversation: Message history management for graph nodes."""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Literal, Protocol, runtime_checkable
LEGACY_RUN_ID = "__legacy_run__"
logger = logging.getLogger(__name__)
def is_legacy_run_id(run_id: str | None) -> bool:
"""True when run_id represents pre-migration (no run boundary) data."""
return run_id is None or run_id == LEGACY_RUN_ID
@dataclass
class Message:
"""A single message in a conversation.
Attributes:
seq: Monotonic sequence number.
role: One of "user", "assistant", or "tool".
content: Message text.
tool_use_id: Internal tool-use identifier (output as ``tool_call_id`` in LLM dicts).
tool_calls: OpenAI-format tool call list for assistant messages.
is_error: When True and role is "tool", ``to_llm_dict`` prepends "ERROR: " to content.
"""
seq: int
role: Literal["user", "assistant", "tool"]
content: str
tool_use_id: str | None = None
tool_calls: list[dict[str, Any]] | None = None
is_error: bool = False
# Phase-aware compaction metadata (continuous mode)
phase_id: str | None = None
is_transition_marker: bool = False
# True when this message is real human input (from /chat), not a system prompt
is_client_input: bool = False
# Optional image content blocks (e.g. from browser_screenshot)
image_content: list[dict[str, Any]] | None = None
# True when message contains an activated skill body (AS-10: never prune)
is_skill_content: bool = False
# Logical worker run identifier for shared-session persistence
run_id: str | None = None
def to_llm_dict(self) -> dict[str, Any]:
"""Convert to OpenAI-format message dict."""
if self.role == "user":
if self.image_content:
blocks: list[dict[str, Any]] = []
if self.content:
blocks.append({"type": "text", "text": self.content})
blocks.extend(self.image_content)
return {"role": "user", "content": blocks}
return {"role": "user", "content": self.content}
if self.role == "assistant":
d: dict[str, Any] = {"role": "assistant"}
if self.tool_calls:
d["tool_calls"] = self.tool_calls
d["content"] = self.content if self.content else None
else:
d["content"] = self.content or ""
return d
# role == "tool"
content = f"ERROR: {self.content}" if self.is_error else self.content
if self.image_content:
# Multimodal tool result: text + image content blocks
blocks: list[dict[str, Any]] = [{"type": "text", "text": content}]
blocks.extend(self.image_content)
return {
"role": "tool",
"tool_call_id": self.tool_use_id,
"content": blocks,
}
return {
"role": "tool",
"tool_call_id": self.tool_use_id,
"content": content,
}
def to_storage_dict(self) -> dict[str, Any]:
"""Serialize all fields for persistence. Omits None/default-False fields."""
d: dict[str, Any] = {
"seq": self.seq,
"role": self.role,
"content": self.content,
}
if self.tool_use_id is not None:
d["tool_use_id"] = self.tool_use_id
if self.tool_calls is not None:
d["tool_calls"] = self.tool_calls
if self.is_error:
d["is_error"] = self.is_error
if self.phase_id is not None:
d["phase_id"] = self.phase_id
if self.is_transition_marker:
d["is_transition_marker"] = self.is_transition_marker
if self.is_client_input:
d["is_client_input"] = self.is_client_input
if self.image_content is not None:
d["image_content"] = self.image_content
if self.run_id is not None:
d["run_id"] = self.run_id
return d
@classmethod
def from_storage_dict(cls, data: dict[str, Any]) -> Message:
"""Deserialize from a storage dict."""
return cls(
seq=data["seq"],
role=data["role"],
content=data["content"],
tool_use_id=data.get("tool_use_id"),
tool_calls=data.get("tool_calls"),
is_error=data.get("is_error", False),
phase_id=data.get("phase_id"),
is_transition_marker=data.get("is_transition_marker", False),
is_client_input=data.get("is_client_input", False),
image_content=data.get("image_content"),
run_id=data.get("run_id"),
)
def _normalize_cursor(cursor: dict[str, Any] | None) -> dict[str, Any]:
"""Normalize legacy and run-scoped cursor formats into one flat shape."""
return dict(cursor) if cursor else {}
def get_cursor_next_seq(cursor: dict[str, Any] | None) -> int | None:
next_seq = (cursor or {}).get("next_seq")
return next_seq if isinstance(next_seq, int) else None
def update_cursor_next_seq(cursor: dict[str, Any] | None, next_seq: int) -> dict[str, Any]:
updated = dict(cursor or {})
updated["next_seq"] = next_seq
return updated
def get_run_cursor(cursor: dict[str, Any] | None, run_id: str | None) -> dict[str, Any] | None:
return dict(cursor) if cursor else None
def update_run_cursor(
cursor: dict[str, Any] | None,
run_id: str | None,
values: dict[str, Any],
) -> dict[str, Any]:
updated = dict(cursor or {})
updated.update(values)
return updated
def _extract_spillover_filename(content: str) -> str | None:
"""Extract spillover filename from a tool result annotation.
Matches patterns produced by ``truncate_tool_result``:
- New large-result header: "Full result saved at: /abs/path/file.txt"
- Legacy bracketed trailer: "[Saved to 'file.txt']" (pre-2026-04-15,
retained here so cold conversations still resolve)
"""
# New prose format — ``saved at: <absolute path>``, terminated by
# newline or end-of-string.
match = re.search(r"[Ss]aved at:\s*(\S+)", content)
if match:
return match.group(1)
# Legacy format.
match = re.search(r"[Ss]aved to '([^']+)'", content)
return match.group(1) if match else None
_TC_ARG_LIMIT = 200 # max chars per tool_call argument after compaction
def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Truncate tool_call arguments to save context tokens during compaction.
Preserves ``id``, ``type``, and ``function.name`` exactly. When arguments
exceed ``_TC_ARG_LIMIT``, replaces the full JSON string with a compact
**valid** JSON summary. The Anthropic API parses tool_call arguments and
rejects requests with malformed JSON (e.g. unterminated strings), so we
must never produce broken JSON here.
"""
compact = []
for tc in tool_calls:
func = tc.get("function", {})
args = func.get("arguments", "")
if len(args) > _TC_ARG_LIMIT:
# Build a valid JSON summary instead of slicing mid-string.
# Try to extract top-level keys for a meaningful preview.
try:
parsed = json.loads(args)
if isinstance(parsed, dict):
# Preserve key names, truncate values
summary_parts = []
for k, v in parsed.items():
v_str = str(v)
if len(v_str) > 60:
v_str = v_str[:60] + "..."
summary_parts.append(f"{k}={v_str}")
summary = ", ".join(summary_parts)
if len(summary) > _TC_ARG_LIMIT:
summary = summary[:_TC_ARG_LIMIT] + "..."
args = json.dumps({"_compacted": summary})
else:
args = json.dumps({"_compacted": str(parsed)[:_TC_ARG_LIMIT]})
except (json.JSONDecodeError, TypeError):
# Args were already invalid JSON — wrap the preview safely
args = json.dumps({"_compacted": args[:_TC_ARG_LIMIT]})
compact.append(
{
"id": tc.get("id", ""),
"type": tc.get("type", "function"),
"function": {
"name": func.get("name", ""),
"arguments": args,
},
}
)
return compact
def extract_tool_call_history(messages: list[Message], max_entries: int = 30) -> str:
"""Build a compact tool call history from a list of messages.
Used in compaction summaries to prevent the LLM from re-calling
tools it already called. Extracts tool call details, files saved,
outputs set, and errors encountered.
"""
tool_calls_detail: dict[str, list[str]] = {}
files_saved: list[str] = []
outputs_set: list[str] = []
errors: list[str] = []
def _summarize_input(name: str, args: dict) -> str:
if name == "web_search":
return args.get("query", "")
if name == "web_scrape":
return args.get("url", "")
if name == "read_file":
return args.get("path", "")
return ""
for msg in messages:
if msg.role == "assistant" and msg.tool_calls:
for tc in msg.tool_calls:
func = tc.get("function", {})
name = func.get("name", "unknown")
try:
args = json.loads(func.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
args = {}
summary = _summarize_input(name, args)
tool_calls_detail.setdefault(name, []).append(summary)
if name == "read_file" and args.get("path"):
files_saved.append(args["path"])
if name == "set_output" and args.get("key"):
outputs_set.append(args["key"])
if msg.role == "tool" and msg.is_error:
preview = msg.content[:120].replace("\n", " ")
errors.append(preview)
parts: list[str] = []
if tool_calls_detail:
lines: list[str] = []
for name, inputs in list(tool_calls_detail.items())[:max_entries]:
count = len(inputs)
non_empty = [s for s in inputs if s]
if non_empty:
detail_lines = [f" - {s[:120]}" for s in non_empty[:8]]
lines.append(f" {name} ({count}x):\n" + "\n".join(detail_lines))
else:
lines.append(f" {name} ({count}x)")
parts.append("TOOLS ALREADY CALLED:\n" + "\n".join(lines))
if files_saved:
unique = list(dict.fromkeys(files_saved))
parts.append("FILES SAVED: " + ", ".join(unique))
if outputs_set:
unique = list(dict.fromkeys(outputs_set))
parts.append("OUTPUTS SET: " + ", ".join(unique))
if errors:
parts.append("ERRORS (do NOT retry these):\n" + "\n".join(f" - {e}" for e in errors[:10]))
return "\n\n".join(parts)
# ---------------------------------------------------------------------------
# ConversationStore protocol (Phase 2)
# ---------------------------------------------------------------------------
@runtime_checkable
class ConversationStore(Protocol):
"""Protocol for conversation persistence backends."""
async def write_part(self, seq: int, data: dict[str, Any]) -> None: ...
async def read_parts(self) -> list[dict[str, Any]]: ...
async def write_meta(self, data: dict[str, Any]) -> None: ...
async def read_meta(self) -> dict[str, Any] | None: ...
async def write_cursor(self, data: dict[str, Any]) -> None: ...
async def read_cursor(self) -> dict[str, Any] | None: ...
async def delete_parts_before(self, seq: int, run_id: str | None = None) -> None: ...
async def close(self) -> None: ...
async def destroy(self) -> None: ...
# ---------------------------------------------------------------------------
# NodeConversation
# ---------------------------------------------------------------------------
def _try_extract_key(content: str, key: str) -> str | None:
"""Try 4 strategies to extract a *key*'s value from message content.
Strategies (in order):
1. Whole message is JSON — ``json.loads``, check for key.
2. Embedded JSON via ``find_json_object`` helper.
3. Colon format: ``key: value``.
4. Equals format: ``key = value``.
"""
from framework.orchestrator.node import find_json_object
# 1. Whole message is JSON
try:
parsed = json.loads(content)
if isinstance(parsed, dict) and key in parsed:
val = parsed[key]
return json.dumps(val) if not isinstance(val, str) else val
except (json.JSONDecodeError, TypeError):
pass
# 2. Embedded JSON via find_json_object
json_str = find_json_object(content)
if json_str:
try:
parsed = json.loads(json_str)
if isinstance(parsed, dict) and key in parsed:
val = parsed[key]
return json.dumps(val) if not isinstance(val, str) else val
except (json.JSONDecodeError, TypeError):
pass
# 3. Colon format: key: value
match = re.search(rf"\b{re.escape(key)}\s*:\s*(.+)", content)
if match:
return match.group(1).strip()
# 4. Equals format: key = value
match = re.search(rf"\b{re.escape(key)}\s*=\s*(.+)", content)
if match:
return match.group(1).strip()
return None
class NodeConversation:
"""Message history for a graph node with optional write-through persistence.
When *store* is ``None`` the conversation works purely in-memory.
When a :class:`ConversationStore` is supplied every mutation is
persisted via write-through (meta is lazily written on the first
``_persist`` call).
"""
def __init__(
self,
system_prompt: str = "",
max_context_tokens: int = 32000,
compaction_threshold: float = 0.8,
output_keys: list[str] | None = None,
store: ConversationStore | None = None,
run_id: str | None = None,
compaction_buffer_tokens: int | None = None,
compaction_warning_buffer_tokens: int | None = None,
) -> None:
self._system_prompt = system_prompt
self._max_context_tokens = max_context_tokens
self._compaction_threshold = compaction_threshold
# Buffer-based compaction trigger (Gap 7). When set, takes
# precedence over the multiplicative compaction_threshold so the
# loop reserves a fixed headroom for the next turn's input+output
# instead of trying to get exactly X% of the way to the hard
# limit. If left as None the legacy threshold-based rule is
# used, keeping old call sites behaving identically.
self._compaction_buffer_tokens = compaction_buffer_tokens
self._compaction_warning_buffer_tokens = compaction_warning_buffer_tokens
self._output_keys = output_keys
self._store = store
self._messages: list[Message] = []
self._next_seq: int = 0
self._meta_persisted: bool = False
self._last_api_input_tokens: int | None = None
self._current_phase: str | None = None
self._run_id: str | None = run_id
# --- Properties --------------------------------------------------------
@property
def system_prompt(self) -> str:
return self._system_prompt
def update_system_prompt(self, new_prompt: str) -> None:
"""Update the system prompt.
Used in continuous conversation mode at phase transitions to swap
Layer 3 (focus) while preserving the conversation history.
"""
self._system_prompt = new_prompt
self._meta_persisted = False # re-persist with new prompt
def set_current_phase(self, phase_id: str) -> None:
"""Set the current phase ID. Subsequent messages will be stamped with it."""
self._current_phase = phase_id
@property
def current_phase(self) -> str | None:
return self._current_phase
@property
def messages(self) -> list[Message]:
"""Return a defensive copy of the message list."""
return list(self._messages)
@property
def turn_count(self) -> int:
"""Number of conversational turns (one turn = one user message)."""
return sum(1 for m in self._messages if m.role == "user")
@property
def message_count(self) -> int:
"""Total number of messages (all roles)."""
return len(self._messages)
@property
def next_seq(self) -> int:
return self._next_seq
# --- Add messages ------------------------------------------------------
async def add_user_message(
self,
content: str,
*,
is_transition_marker: bool = False,
is_client_input: bool = False,
image_content: list[dict[str, Any]] | None = None,
) -> Message:
msg = Message(
seq=self._next_seq,
role="user",
content=content,
phase_id=self._current_phase,
run_id=self._run_id,
is_transition_marker=is_transition_marker,
is_client_input=is_client_input,
image_content=image_content,
)
self._messages.append(msg)
self._next_seq += 1
# Invalidate stale API token count so estimate_tokens() uses
# the char-based heuristic which reflects the new message.
self._last_api_input_tokens = None
await self._persist(msg)
return msg
async def add_assistant_message(
self,
content: str,
tool_calls: list[dict[str, Any]] | None = None,
) -> Message:
msg = Message(
seq=self._next_seq,
role="assistant",
content=content,
tool_calls=tool_calls,
phase_id=self._current_phase,
run_id=self._run_id,
)
self._messages.append(msg)
self._next_seq += 1
self._last_api_input_tokens = None
await self._persist(msg)
return msg
async def add_tool_result(
self,
tool_use_id: str,
content: str,
is_error: bool = False,
image_content: list[dict[str, Any]] | None = None,
is_skill_content: bool = False,
) -> Message:
# Dedup guard: reject a second tool_result for the same tool_use_id.
# Anthropic's API only accepts one result per tool_call, and a duplicate
# causes a hard 400 two turns later ("messages with role 'tool' must
# be a response to a preceding message with 'tool_calls'"). Duplicates
# can arise when a tool_call_timeout fires and records a placeholder
# error, then the real executor thread eventually delivers the actual
# result (the thread kept running inside run_in_executor — see
# tool_result_handler.execute_tool). We keep the FIRST result to
# preserve whatever state the agent already reasoned about.
for existing in reversed(self._messages):
if existing.role == "tool" and existing.tool_use_id == tool_use_id:
import logging as _logging
_logging.getLogger(__name__).warning(
"add_tool_result: dropping duplicate result for tool_use_id=%s "
"(first result preserved, %d chars; new result ignored, %d chars)",
tool_use_id,
len(existing.content),
len(content),
)
return existing
msg = Message(
seq=self._next_seq,
role="tool",
content=content,
tool_use_id=tool_use_id,
is_error=is_error,
phase_id=self._current_phase,
image_content=image_content,
is_skill_content=is_skill_content,
run_id=self._run_id,
)
self._messages.append(msg)
self._next_seq += 1
self._last_api_input_tokens = None
await self._persist(msg)
return msg
# --- Query -------------------------------------------------------------
def to_llm_messages(self) -> list[dict[str, Any]]:
"""Return messages as OpenAI-format dicts (system prompt excluded).
Automatically repairs orphaned tool_use blocks (assistant messages
with tool_calls that lack corresponding tool-result messages). This
can happen when a loop is cancelled mid-tool-execution.
"""
msgs = [m.to_llm_dict() for m in self._messages]
msgs = self._repair_orphaned_tool_calls(msgs)
msgs = self._sanitize_for_api(msgs)
return msgs
@staticmethod
def _sanitize_for_api(msgs: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Final pass: ensure message sequence is valid for strict APIs.
Rules:
1. No two consecutive messages with the same role (merge or drop)
2. Tool messages must have a tool_call_id
3. Assistant messages with tool_calls must have content=null, not ""
4. First message must not be 'tool' or 'assistant' (without prior context)
"""
cleaned: list[dict[str, Any]] = []
for m in msgs:
role = m.get("role")
# Fix assistant content when tool_calls present
if role == "assistant" and m.get("tool_calls"):
if m.get("content") == "":
m["content"] = None
# Drop tool messages without tool_call_id
if role == "tool" and not m.get("tool_call_id"):
continue
# Drop consecutive duplicate roles (merge user messages)
if cleaned and cleaned[-1].get("role") == role == "user":
prev_content = cleaned[-1].get("content", "")
curr_content = m.get("content", "")
if isinstance(prev_content, str) and isinstance(curr_content, str):
cleaned[-1]["content"] = f"{prev_content}\n{curr_content}"
continue
cleaned.append(m)
# Drop leading assistant/tool messages (no prior context)
while cleaned and cleaned[0].get("role") in ("assistant", "tool"):
cleaned.pop(0)
return cleaned
@staticmethod
def _repair_orphaned_tool_calls(
msgs: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Ensure tool_call / tool_result pairs are consistent.
1. **Orphaned tool results** (tool_result with no matching tool_use
anywhere) are dropped. Happens after compaction removes the
parent assistant message.
2. **Positionally orphaned tool results** (tool_result separated
from its parent by a non-tool message, e.g. a user injection)
are dropped. The Anthropic API requires tool messages to
follow immediately after the assistant message that issued
the matching tool_call.
3. **Duplicate tool results** (same tool_call_id appearing more
than once) are dropped; only the first is kept.
4. **Orphaned tool calls** (tool_use with no following tool_result)
get a synthetic error result appended. Happens when the loop
is cancelled mid-tool-execution.
"""
# Pass 1: collect all tool_call IDs from assistant messages so we
# can identify orphaned tool-result messages.
all_tool_call_ids: set[str] = set()
for m in msgs:
if m.get("role") == "assistant":
for tc in m.get("tool_calls") or []:
tc_id = tc.get("id")
if tc_id:
all_tool_call_ids.add(tc_id)
# Pass 2: build repaired list — drop orphaned tool results, drop
# positional orphans and duplicates, patch missing tool results.
#
# ``open_tool_calls`` holds the tool_call IDs we're still expecting
# results for: it's populated when we emit an assistant-with-tool_calls
# and drained as matching tool messages follow. Any tool message
# whose id is not currently open is positionally invalid and gets
# dropped — that closes the gap that caused the tool-after-user
# 400 errors.
repaired: list[dict[str, Any]] = []
open_tool_calls: set[str] = set()
seen_tool_ids: set[str] = set()
for m in msgs:
role = m.get("role")
if role == "tool":
tid = m.get("tool_call_id")
# Drop tool results with no matching tool_use anywhere.
if not tid or tid not in all_tool_call_ids:
continue
# Drop duplicates (same id appearing twice) — keep first.
if tid in seen_tool_ids:
continue
# Drop positional orphans — tool messages whose parent
# assistant isn't the still-open assistant block.
if tid not in open_tool_calls:
continue
open_tool_calls.discard(tid)
seen_tool_ids.add(tid)
repaired.append(m)
continue
# Any non-tool message closes the current assistant tool block.
# If the previous assistant left tool_calls unanswered, patch
# synthetic error results before emitting this message so the
# API sees a complete pairing.
if open_tool_calls:
for stale_id in list(open_tool_calls):
repaired.append(
{
"role": "tool",
"tool_call_id": stale_id,
"content": "ERROR: Tool execution was interrupted.",
}
)
seen_tool_ids.add(stale_id)
open_tool_calls.clear()
repaired.append(m)
if role == "assistant":
for tc in m.get("tool_calls") or []:
tc_id = tc.get("id")
if tc_id and tc_id not in seen_tool_ids:
open_tool_calls.add(tc_id)
# Tail: if the conversation ends with an assistant that issued
# tool_calls and no results followed, patch them so the next
# turn's first message can be a valid assistant/user response.
if open_tool_calls:
for stale_id in list(open_tool_calls):
repaired.append(
{
"role": "tool",
"tool_call_id": stale_id,
"content": "ERROR: Tool execution was interrupted.",
}
)
return repaired
def estimate_tokens(self) -> int:
"""Best available token estimate.
Uses actual API input token count when available (set via
:meth:`update_token_count`), otherwise falls back to a
character-based heuristic that includes message content, tool_call
arguments, and image blocks. The heuristic applies a 4/3 safety
margin to avoid under-counting (inspired by Claude Code's compact
service).
"""
if self._last_api_input_tokens is not None:
return self._last_api_input_tokens
total_chars = 0
image_tokens = 0
for m in self._messages:
total_chars += len(m.content)
if m.tool_calls:
for tc in m.tool_calls:
func = tc.get("function", {})
total_chars += len(func.get("arguments", ""))
total_chars += len(func.get("name", ""))
if m.image_content:
# Images/documents have a fixed token cost per block
image_tokens += len(m.image_content) * 2000
# Apply 4/3 safety margin to character-based estimate
return (total_chars * 4) // (3 * 4) + image_tokens
def update_token_count(self, actual_input_tokens: int) -> None:
"""Store actual API input token count for more accurate compaction.
Called by EventLoopNode after each LLM call with the ``input_tokens``
value from the API response. This value includes system prompt and
tool definitions, so it may be higher than a message-only estimate.
"""
self._last_api_input_tokens = actual_input_tokens
def usage_ratio(self) -> float:
"""Current token usage as a fraction of *max_context_tokens*.
Returns 0.0 when ``max_context_tokens`` is zero (unlimited).
"""
if self._max_context_tokens <= 0:
return 0.0
return self.estimate_tokens() / self._max_context_tokens
def needs_compaction(self) -> bool:
"""True when the conversation should be compacted before the
next LLM call.
Buffer-based rule (Gap 7): trigger when the current estimate
plus the configured buffer would exceed the hard context limit.
Prevents compaction from firing only AFTER we're already over
the wire and forced into a reactive binary-split pass.
When no buffer is configured, falls back to the multiplicative
threshold the old callers were built around.
"""
if self._max_context_tokens <= 0:
return False
if self._compaction_buffer_tokens is not None:
budget = self._max_context_tokens - self._compaction_buffer_tokens
return self.estimate_tokens() >= max(0, budget)
return self.estimate_tokens() >= self._max_context_tokens * self._compaction_threshold
def compaction_warning(self) -> bool:
"""True when the conversation has crossed the warning threshold
but not yet the hard compaction trigger.
Used by telemetry / UI to show a "context getting tight" hint
before a compaction pass actually runs. Returns False when no
warning buffer is configured (legacy behaviour).
"""
if self._max_context_tokens <= 0 or self._compaction_warning_buffer_tokens is None:
return False
warn_at = self._max_context_tokens - self._compaction_warning_buffer_tokens
return self.estimate_tokens() >= max(0, warn_at)
# --- Output-key extraction ---------------------------------------------
def _extract_protected_values(self, messages: list[Message]) -> dict[str, str]:
"""Scan assistant messages for output_key values before compaction.
Iterates most-recent-first. Once a key is found, it's skipped for
older messages (latest value wins).
"""
if not self._output_keys:
return {}
found: dict[str, str] = {}
remaining_keys = set(self._output_keys)
for msg in reversed(messages):
if msg.role != "assistant" or not remaining_keys:
continue
for key in list(remaining_keys):
value = self._try_extract_key(msg.content, key)
if value is not None:
found[key] = value
remaining_keys.discard(key)
return found
def _try_extract_key(self, content: str, key: str) -> str | None:
"""Try 4 strategies to extract a key's value from message content."""
return _try_extract_key(content, key)
# --- Lifecycle ---------------------------------------------------------
async def prune_old_tool_results(
self,
protect_tokens: int = 5000,
min_prune_tokens: int = 2000,
) -> int:
"""Replace old tool result content with compact placeholders.
Walks backward through messages. Recent tool results (within
*protect_tokens*) are kept intact. Older tool results have their
content replaced with a ~100-char placeholder that preserves the
spillover filename reference (if any). Message structure (role,
seq, tool_use_id) stays valid for the LLM API.
Phase-aware behavior (continuous mode): when messages have ``phase_id``
metadata, all messages in the current phase are protected regardless of
token budget. Transition markers are never pruned. Older phases' tool
results are pruned more aggressively.
Error tool results are never pruned — they prevent re-calling
failing tools.
Returns the number of messages pruned (0 if nothing was pruned).
"""
if not self._messages:
return 0
# Walk backward, classify tool results as protected vs pruneable
protected_tokens = 0
pruneable: list[int] = [] # indices into self._messages
pruneable_tokens = 0
for i in range(len(self._messages) - 1, -1, -1):
msg = self._messages[i]
# Transition markers are never pruned (any role)
if msg.is_transition_marker:
continue
if msg.role != "tool":
continue
if msg.is_error:
continue # never prune errors
if msg.is_skill_content:
continue # never prune activated skill instructions (AS-10)
if msg.content.startswith("[Pruned tool result"):
continue # already pruned
# Tiny results (set_output acks, confirmations) — pruning
# saves negligible space but makes the LLM think the call
# failed, causing costly retries.
if len(msg.content) < 100:
continue
# Phase-aware: protect current phase messages
if self._current_phase and msg.phase_id == self._current_phase:
continue
est = len(msg.content) // 4
if protected_tokens < protect_tokens:
protected_tokens += est
else:
pruneable.append(i)
pruneable_tokens += est
# Only prune if enough to be worthwhile
if pruneable_tokens < min_prune_tokens:
return 0
# Replace content with compact placeholder
count = 0
for i in pruneable:
msg = self._messages[i]
orig_len = len(msg.content)
spillover = _extract_spillover_filename(msg.content)
if spillover:
placeholder = (
f"Pruned tool result ({orig_len:,} chars) cleared from context. "
f"Full data saved at: {spillover}\n"
f"Read the complete data with read_file(path='{spillover}')."
)
else:
placeholder = f"Pruned tool result ({orig_len:,} chars) cleared from context."
self._messages[i] = Message(
seq=msg.seq,
role=msg.role,
content=placeholder,
tool_use_id=msg.tool_use_id,
tool_calls=msg.tool_calls,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
run_id=msg.run_id,
)
count += 1
if self._store:
await self._store.write_part(msg.seq, self._messages[i].to_storage_dict())
# Reset token estimate — content lengths changed
self._last_api_input_tokens = None
return count
async def evict_old_images(self, keep_latest: int = 2) -> int:
"""Strip ``image_content`` from older messages, keeping the most recent.
Screenshots from ``browser_screenshot`` are inlined into the
message's ``image_content`` as base64 data URLs. Each screenshot
costs ~250k tokens when the provider counts the base64 as
text — four screenshots push a conversation over gemini's 1M
context limit and trigger out-of-context garbage output (see
``session_20260415_104727_5c4ed7ff`` for the terminal case
where the model emitted ``协日`` as its final text then stopped).
This method walks backward through messages and keeps
``image_content`` intact on the most recent ``keep_latest``
messages that have images. Older messages get their
``image_content`` nulled out — the text content (metadata
like url, dimensions, scale hints) stays, but the raw bytes
are dropped. Storage is updated too so cold-restore sees the
same evicted state.
Run this right after every tool result is recorded so image
context stays bounded even within a single iteration (the
compaction pipeline only fires at iteration boundaries, too
late for a single turn that takes 4 screenshots).
Returns the number of messages whose image_content was evicted.
"""
if not self._messages or keep_latest < 0:
return 0
# Find messages carrying images, walking newest → oldest.
image_indices: list[int] = []
for i in range(len(self._messages) - 1, -1, -1):
if self._messages[i].image_content:
image_indices.append(i)
# Nothing to evict if we have ≤ keep_latest images total.
if len(image_indices) <= keep_latest:
return 0
# Evict everything past the first keep_latest (newest) entries.
to_evict = image_indices[keep_latest:]
evicted = 0
for idx in to_evict:
msg = self._messages[idx]
self._messages[idx] = Message(
seq=msg.seq,
role=msg.role,
content=msg.content,
tool_use_id=msg.tool_use_id,
tool_calls=msg.tool_calls,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
is_client_input=msg.is_client_input,
image_content=None, # ← dropped
is_skill_content=msg.is_skill_content,
run_id=msg.run_id,
)
evicted += 1
if self._store:
await self._store.write_part(msg.seq, self._messages[idx].to_storage_dict())
if evicted:
# Reset token estimate — image blocks no longer contribute.
self._last_api_input_tokens = None
logger.info(
"evict_old_images: dropped image_content from %d message(s), kept %d most recent",
evicted,
keep_latest,
)
return evicted
async def compact(
self,
summary: str,
keep_recent: int = 2,
phase_graduated: bool = False,
) -> None:
"""Replace old messages with a summary, optionally keeping recent ones.
Args:
summary: Caller-provided summary text.
keep_recent: Number of recent messages to preserve (default 2).
Clamped to [0, len(messages) - 1].
phase_graduated: When True and messages have phase_id metadata,
split at phase boundaries instead of using keep_recent.
Keeps current + previous phase intact; compacts older phases.
"""
if not self._messages:
return
total = len(self._messages)
# Phase-graduated: find the split point based on phase boundaries.
# Keeps current phase + previous phase intact, compacts older phases.
if phase_graduated and self._current_phase:
split = self._find_phase_graduated_split()
else:
split = None
if split is None:
# Fallback: use keep_recent (non-phase or single-phase conversation)
keep_recent = max(0, min(keep_recent, total - 1))
split = total - keep_recent if keep_recent > 0 else total
# Advance split past orphaned tool results at the boundary.
# Tool-role messages reference a tool_use from the preceding
# assistant message; if that assistant message falls into the
# compacted (old) portion the tool_result becomes invalid.
while split < total and self._messages[split].role == "tool":
split += 1
# Nothing to compact
if split == 0:
return
old_messages = list(self._messages[:split])
recent_messages = list(self._messages[split:])
# Extract protected values from messages being discarded
if self._output_keys:
protected = self._extract_protected_values(old_messages)
if protected:
lines = ["PRESERVED VALUES (do not lose these):"]
for k, v in protected.items():
lines.append(f"- {k}: {v}")
lines.append("")
lines.append("CONVERSATION SUMMARY:")
lines.append(summary)
summary = "\n".join(lines)
# Determine summary seq
if recent_messages:
summary_seq = recent_messages[0].seq - 1
else:
summary_seq = self._next_seq
self._next_seq += 1
summary_msg = Message(seq=summary_seq, role="user", content=summary, run_id=self._run_id)
# Persist
if self._store:
delete_before = recent_messages[0].seq if recent_messages else self._next_seq
await self._store.delete_parts_before(delete_before)
await self._store.write_part(summary_msg.seq, summary_msg.to_storage_dict())
await self._write_next_seq()
self._messages = [summary_msg] + recent_messages
self._last_api_input_tokens = None # reset; next LLM call will recalibrate
async def compact_preserving_structure(
self,
spillover_dir: str,
keep_recent: int = 4,
phase_graduated: bool = False,
aggressive: bool = False,
) -> None:
"""Structure-preserving compaction: save freeform text to file, keep tool messages.
Unlike ``compact()`` which replaces ALL old messages with a single LLM
summary, this method preserves the tool call structure (assistant
messages with tool_calls + tool result messages) that are already tiny
after pruning. Only freeform text exchanges (user messages,
text-only assistant messages) are saved to a file and removed.
When *aggressive* is True, non-essential tool call pairs are also
collapsed into a compact summary instead of being kept individually.
Only ``set_output`` calls and error results are preserved; all other
old tool pairs are replaced by a tool-call history summary.
The result: the agent retains exact knowledge of what tools it called,
where each result is stored, and can load the conversation text if
needed. No LLM summary call. No heuristics. Nothing lost.
"""
if not self._messages:
return
total = len(self._messages)
# Determine split point (same logic as compact)
if phase_graduated and self._current_phase:
split = self._find_phase_graduated_split()
else:
split = None
if split is None:
keep_recent = max(0, min(keep_recent, total - 1))
split = total - keep_recent if keep_recent > 0 else total
# Advance split past orphaned tool results at the boundary
while split < total and self._messages[split].role == "tool":
split += 1
if split == 0:
return
old_messages = self._messages[:split]
# Classify old messages: structural (keep) vs freeform (save to file)
kept_structural: list[Message] = []
freeform_lines: list[str] = []
collapsed_msgs: list[Message] = []
# Collect all tool_use IDs present in old messages so we can detect
# orphaned tool results whose parent assistant message was already
# compacted away (API invariant protection).
old_tc_ids: set[str] = set()
for msg in old_messages:
if msg.tool_calls:
for tc in msg.tool_calls:
old_tc_ids.add(tc.get("id", ""))
if aggressive:
# Aggressive: only keep set_output tool pairs and error results.
# Everything else is collapsed into a tool-call history summary.
# We need to track tool_call IDs to pair assistant messages with
# their tool results.
protected_tc_ids: set[str] = set()
collapsible_tc_ids: set[str] = set()
# First pass: classify assistant messages
for msg in old_messages:
if msg.role != "assistant" or not msg.tool_calls:
continue
has_protected = any(tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls)
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if has_protected:
protected_tc_ids |= tc_ids
else:
collapsible_tc_ids |= tc_ids
# Skill content and transition markers are always protected
for msg in old_messages:
if msg.role == "tool" and msg.is_skill_content and msg.tool_use_id:
protected_tc_ids.add(msg.tool_use_id)
# Second pass: classify all messages
for msg in old_messages:
if msg.is_transition_marker:
# Transition markers are always kept (phase boundaries)
kept_structural.append(msg)
elif msg.role == "tool":
tc_id = msg.tool_use_id or ""
if tc_id in protected_tc_ids:
kept_structural.append(msg)
elif msg.is_error:
# Error results are always protected
kept_structural.append(msg)
# Protect the parent assistant message too
protected_tc_ids.add(tc_id)
elif msg.is_skill_content:
kept_structural.append(msg)
elif tc_id and tc_id not in old_tc_ids:
# Orphaned tool result — parent tool_use not in old msgs.
# Keep it to maintain API invariants.
kept_structural.append(msg)
else:
collapsed_msgs.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if tc_ids & protected_tc_ids:
# Has at least one protected tool call — keep entire msg
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
run_id=msg.run_id,
)
)
else:
collapsed_msgs.append(msg)
else:
# Freeform text — save to file
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
else:
# Standard mode: keep all tool call pairs as structural
for msg in old_messages:
if msg.is_transition_marker:
# Transition markers are always kept (phase boundaries)
kept_structural.append(msg)
elif msg.role == "tool":
kept_structural.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
run_id=msg.run_id,
)
)
else:
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
# Write freeform text to a numbered conversation file
spill_path = Path(spillover_dir)
spill_path.mkdir(parents=True, exist_ok=True)
# Find next conversation file number
existing = sorted(spill_path.glob("conversation_*.md"))
next_n = len(existing) + 1
conv_filename = f"conversation_{next_n}.md"
if freeform_lines:
header = f"## Compacted conversation (messages 1-{split})\n\n"
conv_text = header + "\n\n".join(freeform_lines)
(spill_path / conv_filename).write_text(conv_text, encoding="utf-8")
else:
# Nothing to save — skip file creation
conv_filename = ""
# Build reference message. Prose format (no brackets) — see the
# poison-pattern note on truncate_tool_result. Frontier models
# autocomplete `[...']` trailers into their own text turns.
ref_parts: list[str] = []
if conv_filename:
full_path = str((spill_path / conv_filename).resolve())
ref_parts.append(
f"Previous conversation saved at: {full_path}\n"
f"Read the full transcript with read_file('{conv_filename}')."
)
elif not collapsed_msgs:
ref_parts.append("(Previous freeform messages compacted.)")
# Aggressive: add collapsed tool-call history to the reference
if collapsed_msgs:
tool_history = extract_tool_call_history(collapsed_msgs)
if tool_history:
ref_parts.append(tool_history)
elif not ref_parts:
ref_parts.append("[Previous tool calls compacted.]")
ref_content = "\n\n".join(ref_parts)
# Use a seq just before the first kept message
recent_messages = list(self._messages[split:])
if kept_structural:
ref_seq = kept_structural[0].seq - 1
elif recent_messages:
ref_seq = recent_messages[0].seq - 1
else:
ref_seq = self._next_seq
self._next_seq += 1
ref_msg = Message(seq=ref_seq, role="user", content=ref_content, run_id=self._run_id)
# Persist: delete old messages from store, write reference + kept structural.
# In aggressive mode, collapsed messages may be interspersed with kept
# messages, so we delete everything before the recent boundary and
# rewrite only what we want to keep.
if self._store:
recent_boundary = recent_messages[0].seq if recent_messages else self._next_seq
await self._store.delete_parts_before(recent_boundary)
# Write the reference message
await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict())
# Write kept structural messages (they may have been modified)
for msg in kept_structural:
await self._store.write_part(msg.seq, msg.to_storage_dict())
await self._write_next_seq()
# Reassemble: reference + kept structural (in original order) + recent
self._messages = [ref_msg] + kept_structural + recent_messages
self._last_api_input_tokens = None
def _find_phase_graduated_split(self) -> int | None:
"""Find split point that preserves current + previous phase.
Returns the index of the first message in the protected set,
or None if phase graduation doesn't apply (< 3 phases).
"""
# Collect distinct phases in order of first appearance
phases_seen: list[str] = []
for msg in self._messages:
if msg.phase_id and msg.phase_id not in phases_seen:
phases_seen.append(msg.phase_id)
# Need at least 3 phases for graduation to be meaningful
# (current + previous are protected, older get compacted)
if len(phases_seen) < 3:
return None
# Protect: current phase + previous phase
protected_phases = {phases_seen[-1], phases_seen[-2]}
# Find split: first message belonging to a protected phase
for i, msg in enumerate(self._messages):
if msg.phase_id in protected_phases:
return i
return None
async def clear(self) -> None:
"""Remove all messages, keep system prompt, preserve ``_next_seq``."""
if self._store:
await self._store.delete_parts_before(self._next_seq)
await self._write_next_seq()
self._messages.clear()
self._last_api_input_tokens = None
def export_summary(self) -> str:
"""Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
prompt_preview = self._system_prompt[:80] + "..." if len(self._system_prompt) > 80 else self._system_prompt
lines = [
"[STATS]",
f"turns: {self.turn_count}",
f"messages: {self.message_count}",
f"estimated_tokens: {self.estimate_tokens()}",
"",
"[CONFIG]",
f"system_prompt: {prompt_preview!r}",
]
if self._output_keys:
lines.append(f"output_keys: {', '.join(self._output_keys)}")
lines.append("")
lines.append("[RECENT_MESSAGES]")
for m in self._messages[-5:]:
preview = m.content[:60] + "..." if len(m.content) > 60 else m.content
lines.append(f" [{m.role}] {preview}")
return "\n".join(lines)
# --- Persistence internals ---------------------------------------------
async def _persist(self, message: Message) -> None:
"""Write-through a single message. No-op when store is None."""
if self._store is None:
return
if not self._meta_persisted:
await self._persist_meta()
await self._store.write_part(message.seq, message.to_storage_dict())
await self._write_next_seq()
async def _persist_meta(self) -> None:
"""Lazily write conversation metadata to the store (called once).
When ``self._run_id`` is set, metadata is written flat for backward
compatibility (run-scoped isolation has been reverted).
"""
if self._store is None:
return
run_meta = {
"system_prompt": self._system_prompt,
"max_context_tokens": self._max_context_tokens,
"compaction_threshold": self._compaction_threshold,
"compaction_buffer_tokens": self._compaction_buffer_tokens,
"compaction_warning_buffer_tokens": (self._compaction_warning_buffer_tokens),
"output_keys": self._output_keys,
}
await self._store.write_meta(run_meta)
self._meta_persisted = True
async def _write_next_seq(self) -> None:
if self._store is None:
return
cursor = await self._store.read_cursor() or {}
cursor["next_seq"] = self._next_seq
await self._store.write_cursor(cursor)
# --- Restore -----------------------------------------------------------
@classmethod
async def restore(
cls,
store: ConversationStore,
phase_id: str | None = None,
run_id: str | None = None,
) -> NodeConversation | None:
"""Reconstruct a NodeConversation from a store.
Args:
store: The conversation store to read from.
phase_id: If set, only load parts matching this phase_id.
Used in isolated mode so a node only sees its own
messages in the shared flat store. In continuous mode
pass ``None`` to load all parts.
run_id: If set, only load parts matching this run_id.
Ensures intentional restarts (new run_id) start fresh
while crash recovery (same run_id) resumes correctly.
Returns ``None`` if the store contains no metadata (i.e. the
conversation was never persisted).
"""
meta = await store.read_meta()
if meta is None:
return None
conv = cls(
system_prompt=meta.get("system_prompt", ""),
max_context_tokens=meta.get("max_context_tokens", 32000),
compaction_threshold=meta.get("compaction_threshold", 0.8),
output_keys=meta.get("output_keys"),
store=store,
run_id=run_id,
compaction_buffer_tokens=meta.get("compaction_buffer_tokens"),
compaction_warning_buffer_tokens=meta.get("compaction_warning_buffer_tokens"),
)
conv._meta_persisted = True
parts = await store.read_parts()
if phase_id:
filtered_parts = [p for p in parts if p.get("phase_id") == phase_id]
if filtered_parts:
parts = filtered_parts
elif parts and all(p.get("phase_id") is None for p in parts):
# Backward compatibility: older isolated stores (including queen
# sessions) persisted parts without phase_id. In that case, the
# phase filter would incorrectly hide the entire conversation.
logger.info(
"Restoring legacy unphased conversation without applying phase filter (phase_id=%s, parts=%d)",
phase_id,
len(parts),
)
else:
parts = filtered_parts
# Filter by run_id so intentional restarts (new run_id) start fresh
# while crash recovery (same run_id) loads prior parts.
if run_id and not is_legacy_run_id(run_id):
parts = [p for p in parts if p.get("run_id") == run_id]
conv._messages = [Message.from_storage_dict(p) for p in parts]
cursor = await store.read_cursor()
next_seq = get_cursor_next_seq(cursor)
if next_seq is not None:
conv._next_seq = next_seq
elif conv._messages:
conv._next_seq = conv._messages[-1].seq + 1
return conv