refactor: refactor shared memory to data buffer

This commit is contained in:
Richard Tang
2026-04-02 11:02:30 -07:00
parent 5b08edb384
commit 318ecfd508
39 changed files with 558 additions and 558 deletions
+3 -3
View File
@@ -27,7 +27,7 @@ class GreeterNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
name = ctx.input_data.get("name", "World")
greeting = f"Hello, {name}!"
ctx.memory.write("greeting", greeting)
ctx.buffer.write("greeting", greeting)
return NodeResult(success=True, output={"greeting": greeting})
@@ -35,9 +35,9 @@ class UppercaserNode(NodeProtocol):
"""Convert text to uppercase."""
async def execute(self, ctx: NodeContext) -> NodeResult:
greeting = ctx.input_data.get("greeting") or ctx.memory.read("greeting") or ""
greeting = ctx.input_data.get("greeting") or ctx.buffer.read("greeting") or ""
result = greeting.upper()
ctx.memory.write("final_greeting", result)
ctx.buffer.write("final_greeting", result)
return NodeResult(success=True, output={"final_greeting": result})
+20 -20
View File
@@ -108,7 +108,7 @@ class EdgeSpec(BaseModel):
self,
source_success: bool,
source_output: dict[str, Any],
memory: dict[str, Any],
buffer_data: dict[str, Any],
llm: Any | None = None,
goal: Any | None = None,
source_node_name: str | None = None,
@@ -120,7 +120,7 @@ class EdgeSpec(BaseModel):
Args:
source_success: Whether the source node succeeded
source_output: Output from the source node
memory: Current shared memory state
buffer_data: Current data buffer state
llm: LLM provider for LLM_DECIDE edges
goal: Goal object for LLM_DECIDE edges
source_node_name: Name of source node (for LLM context)
@@ -139,7 +139,7 @@ class EdgeSpec(BaseModel):
return not source_success
if self.condition == EdgeCondition.CONDITIONAL:
return self._evaluate_condition(source_output, memory)
return self._evaluate_condition(source_output, buffer_data)
if self.condition == EdgeCondition.LLM_DECIDE:
if llm is None or goal is None:
@@ -150,7 +150,7 @@ class EdgeSpec(BaseModel):
goal=goal,
source_success=source_success,
source_output=source_output,
memory=memory,
buffer_data=buffer_data,
source_node_name=source_node_name,
target_node_name=target_node_name,
)
@@ -160,7 +160,7 @@ class EdgeSpec(BaseModel):
def _evaluate_condition(
self,
output: dict[str, Any],
memory: dict[str, Any],
buffer_data: dict[str, Any],
) -> bool:
"""Evaluate a conditional expression."""
@@ -168,14 +168,14 @@ class EdgeSpec(BaseModel):
return True
# Build evaluation context
# Include memory keys directly for easier access in conditions
# Include buffer keys directly for easier access in conditions
context = {
"output": output,
"memory": memory,
"buffer": buffer_data,
"result": output.get("result"),
"true": True, # Allow lowercase true/false in conditions
"false": False,
**memory, # Unpack memory keys directly into context
**buffer_data, # Unpack buffer keys directly into context
}
try:
@@ -186,7 +186,7 @@ class EdgeSpec(BaseModel):
expr_vars = {
k: repr(context[k])
for k in context
if k not in ("output", "memory", "result", "true", "false")
if k not in ("output", "buffer", "result", "true", "false")
and k in self.condition_expr
}
logger.info(
@@ -209,7 +209,7 @@ class EdgeSpec(BaseModel):
goal: Any,
source_success: bool,
source_output: dict[str, Any],
memory: dict[str, Any],
buffer_data: dict[str, Any],
source_node_name: str | None,
target_node_name: str | None,
) -> bool:
@@ -234,8 +234,8 @@ class EdgeSpec(BaseModel):
Should we proceed to: {target_node_name or self.target}?
Edge description: {self.description or "No description"}
**Context from memory**:
{json.dumps({k: str(v)[:100] for k, v in list(memory.items())[:5]}, indent=2)}
**Context from data buffer**:
{json.dumps({k: str(v)[:100] for k, v in list(buffer_data.items())[:5]}, indent=2)}
Evaluate whether proceeding to this next node is the right step toward achieving the goal.
Consider:
@@ -276,14 +276,14 @@ Respond with ONLY a JSON object:
def map_inputs(
self,
source_output: dict[str, Any],
memory: dict[str, Any],
buffer_data: dict[str, Any],
) -> dict[str, Any]:
"""
Map source outputs to target inputs.
Args:
source_output: Output from source node
memory: Current shared memory
buffer_data: Current data buffer
Returns:
Input dict for target node
@@ -294,11 +294,11 @@ Respond with ONLY a JSON object:
result = {}
for target_key, source_key in self.input_mapping.items():
# Try source output first, then memory
# Try source output first, then buffer
if source_key in source_output:
result[target_key] = source_output[source_key]
elif source_key in memory:
result[target_key] = memory[source_key]
elif source_key in buffer_data:
result[target_key] = buffer_data[source_key]
return result
@@ -403,9 +403,9 @@ class GraphSpec(BaseModel):
)
edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
# Shared memory keys
memory_keys: list[str] = Field(
default_factory=list, description="Keys available in shared memory"
# Data buffer keys
buffer_keys: list[str] = Field(
default_factory=list, description="Keys available in data buffer"
)
# Default LLM settings
@@ -551,7 +551,7 @@ def build_emergency_summary(
# 2. Inputs the node received
input_lines = []
for key in spec.input_keys:
value = ctx.input_data.get(key) or ctx.memory.read(key)
value = ctx.input_data.get(key) or ctx.buffer.read(key)
if value is not None:
# Truncate long values but keep them recognisable
v_str = str(value)
@@ -228,7 +228,7 @@ async def check_pause(
pause_requested = ctx.input_data.get("pause_requested", False)
if not pause_requested:
try:
pause_requested = ctx.memory.read("pause_requested") or False
pause_requested = ctx.buffer.read("pause_requested") or False
except (PermissionError, KeyError):
pause_requested = False
if pause_requested:
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Any
from framework.graph.conversation import ConversationStore
from framework.graph.event_loop.judge_pipeline import SubagentJudge
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
from framework.graph.node import NodeContext, SharedMemory
from framework.graph.node import DataBuffer, NodeContext
from framework.llm.provider import ToolResult, ToolUse
from framework.runtime.event_bus import EventBus
@@ -93,7 +93,7 @@ async def execute_subagent(
subagent_spec = ctx.node_registry[agent_id]
# 2. Create read-only memory snapshot
parent_data = ctx.memory.read_all()
parent_data = ctx.buffer.read_all()
# Merge in-flight outputs from the parent's accumulator.
if accumulator:
@@ -101,12 +101,12 @@ async def execute_subagent(
if key not in parent_data:
parent_data[key] = value
subagent_memory = SharedMemory()
subagent_buffer = DataBuffer()
for key, value in parent_data.items():
subagent_memory.write(key, value, validate=False)
subagent_buffer.write(key, value, validate=False)
read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
scoped_memory = subagent_memory.with_permissions(
scoped_buffer = subagent_buffer.with_permissions(
read_keys=list(read_keys),
write_keys=[], # Read-only!
)
@@ -218,7 +218,7 @@ async def execute_subagent(
runtime=ctx.runtime,
node_id=sa_node_id,
node_spec=subagent_spec,
memory=scoped_memory,
buffer=scoped_buffer,
input_data={"task": task, **parent_data},
llm=ctx.llm,
available_tools=subagent_tools,
+9 -9
View File
@@ -1617,9 +1617,9 @@ class EventLoopNode(NodeProtocol):
continue
# Exit point 5: Judge ACCEPT — log step + log_node_complete
# Write outputs to shared memory
# Write outputs to data buffer
for key, value in accumulator.to_dict().items():
ctx.memory.write(key, value, validate=False)
ctx.buffer.write(key, value, validate=False)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
@@ -2857,11 +2857,11 @@ class EventLoopNode(NodeProtocol):
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
def _build_initial_message(self, ctx: NodeContext) -> str:
"""Build the initial user message from input data and memory.
"""Build the initial user message from input data and buffer.
Includes ALL input_data (not just declared input_keys) so that
upstream handoff data flows through regardless of key naming.
Declared input_keys are also checked in shared memory as fallback.
Declared input_keys are also checked in data buffer as fallback.
"""
parts = []
seen: set[str] = set()
@@ -2870,10 +2870,10 @@ class EventLoopNode(NodeProtocol):
if value is not None:
parts.append(f"{key}: {value}")
seen.add(key)
# Fallback: check memory for declared input_keys not already covered
# Fallback: check data buffer for declared input_keys not already covered
for key in ctx.node_spec.input_keys:
if key not in seen:
value = ctx.memory.read(key)
value = ctx.buffer.read(key)
if value is not None:
parts.append(f"{key}: {value}")
if ctx.goal_context:
@@ -3460,17 +3460,17 @@ class EventLoopNode(NodeProtocol):
The subagent:
- Gets a fresh conversation with just the task
- Has read-only access to the parent's readable memory
- Has read-only access to the parent's readable data buffer
- Cannot delegate to its own subagents (prevents recursion)
- Returns its output in structured JSON format
Args:
ctx: Parent node's context (for memory, tools, LLM access).
ctx: Parent node's context (for data buffer, tools, LLM access).
agent_id: The node ID of the subagent to invoke.
task: The task description to give the subagent.
accumulator: Parent's OutputAccumulator — provides outputs that
have been set via ``set_output`` but not yet written to
shared memory (which only happens after the node completes).
data buffer (which only happens after the node completes).
Returns:
ToolResult with structured JSON output containing:
+106 -106
View File
@@ -3,7 +3,7 @@ Graph Executor - Runs agent graphs.
The executor:
1. Takes a GraphSpec and Goal
2. Initializes shared memory
2. Initializes data buffer
3. Executes nodes following edges
4. Records all decisions to Runtime
5. Returns the final result
@@ -24,7 +24,7 @@ from framework.graph.node import (
NodeProtocol,
NodeResult,
NodeSpec,
SharedMemory,
DataBuffer,
)
from framework.graph.validator import OutputValidator
from framework.llm.provider import LLMProvider, Tool, ToolUse
@@ -104,8 +104,8 @@ class ParallelExecutionConfig:
# "wait_all" waits for all and reports all failures
on_branch_failure: str = "fail_all"
# Memory conflict handling when branches write same key
memory_conflict_strategy: str = "last_wins" # "last_wins", "first_wins", "error"
# Buffer conflict handling when branches write same key
buffer_conflict_strategy: str = "last_wins" # "last_wins", "first_wins", "error"
# Timeout per branch in seconds
branch_timeout_seconds: float = 300.0
@@ -240,7 +240,7 @@ class GraphExecutor:
self,
current_node: str,
path: list[str],
memory: Any,
buffer: Any,
node_visit_counts: dict[str, int],
) -> None:
"""Update state.json with live progress at node transitions.
@@ -275,11 +275,11 @@ class GraphExecutor:
timestamps = state_data.setdefault("timestamps", {})
timestamps["updated_at"] = datetime.now().isoformat()
# Persist full memory so state.json is sufficient for resume
# Persist full buffer so state.json is sufficient for resume
# even if the process dies before the final write.
memory_snapshot = memory.read_all()
state_data["memory"] = memory_snapshot
state_data["memory_keys"] = list(memory_snapshot.keys())
buffer_snapshot = buffer.read_all()
state_data["data_buffer"] = buffer_snapshot
state_data["buffer_keys"] = list(buffer_snapshot.keys())
with atomic_write(state_path, encoding="utf-8") as f:
_json.dump(state_data, f, indent=2)
@@ -473,7 +473,7 @@ class GraphExecutor:
graph: The graph specification
goal: The goal driving execution
input_data: Initial input data
session_state: Optional session state to resume from (with paused_at, memory, etc.)
session_state: Optional session state to resume from (with paused_at, data_buffer, etc.)
validate_graph: If False, skip graph validation (for test graphs that
intentionally break rules)
@@ -507,7 +507,7 @@ class GraphExecutor:
)
# Initialize execution state
memory = SharedMemory()
buffer = DataBuffer()
# Continuous conversation mode state
is_continuous = getattr(graph, "conversation_mode", "isolated") == "continuous"
@@ -526,31 +526,31 @@ class GraphExecutor:
self.logger.info("✓ Checkpointing enabled")
# Restore session state if provided
if session_state and "memory" in session_state:
memory_data = session_state["memory"]
if session_state and ("data_buffer" in session_state or "memory" in session_state):
buffer_data = session_state.get("data_buffer", session_state.get("memory"))
# [RESTORED] Type safety check
if not isinstance(memory_data, dict):
if not isinstance(buffer_data, dict):
self.logger.warning(
f"⚠️ Invalid memory data type in session state: "
f"{type(memory_data).__name__}, expected dict"
f"⚠️ Invalid data buffer type in session state: "
f"{type(buffer_data).__name__}, expected dict"
)
else:
# Restore memory from previous session.
# Restore buffer from previous session.
# Skip validation — this data was already validated when
# originally written, and research text triggers false
# positives on the code-indicator heuristic.
for key, value in memory_data.items():
memory.write(key, value, validate=False)
self.logger.info(f"📥 Restored session state with {len(memory_data)} memory keys")
for key, value in buffer_data.items():
buffer.write(key, value, validate=False)
self.logger.info(f"📥 Restored session state with {len(buffer_data)} buffer keys")
# Write new input data to memory (each key individually).
# Skip when resuming from a paused session — restored memory already
# Write new input data to buffer (each key individually).
# Skip when resuming from a paused session — restored buffer already
# contains all state including the original input, and re-writing
# input_data would overwrite intermediate results with stale values.
_is_resuming = bool(session_state and session_state.get("paused_at"))
if input_data and not _is_resuming:
for key, value in input_data.items():
memory.write(key, value)
buffer.write(key, value)
# Detect event-triggered execution (timer/webhook) — no interactive user.
_event_triggered = bool(input_data and isinstance(input_data.get("event"), dict))
@@ -596,9 +596,9 @@ class GraphExecutor:
f"(node: {checkpoint.current_node})"
)
# Restore memory from checkpoint
for key, value in checkpoint.shared_memory.items():
memory.write(key, value, validate=False)
# Restore buffer from checkpoint
for key, value in checkpoint.data_buffer.items():
buffer.write(key, value, validate=False)
# Start from checkpoint's next node or current node
current_node_id = (
@@ -609,7 +609,7 @@ class GraphExecutor:
path.extend(checkpoint.execution_path)
self.logger.info(
f"📥 Restored memory with {len(checkpoint.shared_memory)} keys, "
f"📥 Restored buffer with {len(checkpoint.data_buffer)} keys, "
f"resuming at node: {current_node_id}"
)
else:
@@ -671,7 +671,7 @@ class GraphExecutor:
# Fresh shared-session execution: clear stale cursor so the entry
# node doesn't restore a filled OutputAccumulator from the previous
# webhook run (which would cause the judge to accept immediately).
# The conversation history is preserved (continuous memory).
# The conversation history is preserved (continuous buffer).
# Exclude cold restores — those need to continue the conversation
# naturally without a "start fresh" marker.
_is_fresh_shared = bool(
@@ -785,9 +785,9 @@ class GraphExecutor:
)
# Create session state for pause
saved_memory = memory.read_all()
saved_buffer = buffer.read_all()
pause_session_state: dict[str, Any] = {
"memory": saved_memory, # Include memory for resume
"data_buffer": saved_buffer, # Include buffer for resume
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
}
@@ -798,7 +798,7 @@ class GraphExecutor:
checkpoint_type="pause",
current_node=current_node_id,
execution_path=path,
memory=memory,
buffer=buffer,
next_node=current_node_id,
is_clean=True,
)
@@ -811,7 +811,7 @@ class GraphExecutor:
# Return with paused status
return ExecutionResult(
success=False,
output=saved_memory,
output=saved_buffer,
path=path,
paused_at=current_node_id,
error="Execution paused by user request",
@@ -836,15 +836,15 @@ class GraphExecutor:
f" ⊘ Node '{node_spec.name}' visit limit reached "
f"({node_visit_counts[current_node_id]}/{max_visits}), skipping"
)
# Skip execution — follow outgoing edges using current memory
skip_result = NodeResult(success=True, output=memory.read_all())
# Skip execution — follow outgoing edges using current buffer
skip_result = NodeResult(success=True, output=buffer.read_all())
next_node = await self._follow_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
current_node_spec=node_spec,
result=skip_result,
memory=memory,
buffer=buffer,
)
if next_node is None:
self.logger.info(" → No more edges after visit limit, ending")
@@ -856,15 +856,15 @@ class GraphExecutor:
# Clear stale nullable outputs from previous visits.
# When a node is re-visited (e.g. review → process-batch → review),
# nullable outputs from the PREVIOUS visit linger in shared memory.
# nullable outputs from the PREVIOUS visit linger in the data buffer.
# This causes stale edge conditions to fire (e.g. "feedback is not None"
# from visit 1 triggers even when visit 2 sets "final_summary" instead).
# Clearing them ensures only the CURRENT visit's outputs affect routing.
if node_visit_counts.get(current_node_id, 0) > 1:
nullable_keys = getattr(node_spec, "nullable_output_keys", None) or []
for key in nullable_keys:
if memory.read(key) is not None:
memory.write(key, None, validate=False)
if buffer.read(key) is not None:
buffer.write(key, None, validate=False)
self.logger.info(
f" 🧹 Cleared stale nullable output '{key}' from previous visit"
)
@@ -899,12 +899,12 @@ class GraphExecutor:
if _is_resuming and path:
from framework.graph.prompt_composer import build_narrative
_resume_narrative = build_narrative(memory, path, graph)
_resume_narrative = build_narrative(buffer, path, graph)
# Build context for node
ctx = self._build_context(
node_spec=node_spec,
memory=memory,
buffer=buffer,
goal=goal,
input_data=input_data or {},
max_tokens=graph.max_tokens,
@@ -921,9 +921,9 @@ class GraphExecutor:
# Log actual input data being read
if node_spec.input_keys:
self.logger.info(" Reading from memory:")
self.logger.info(" Reading from data buffer:")
for key in node_spec.input_keys:
value = memory.read(key)
value = buffer.read(key)
if value is not None:
# Truncate long values for readability
value_str = str(value)
@@ -953,7 +953,7 @@ class GraphExecutor:
checkpoint_type="node_start",
current_node=node_spec.id,
execution_path=list(path),
memory=memory,
buffer=buffer,
is_clean=(sum(node_retry_counts.values()) == 0),
)
@@ -1061,21 +1061,21 @@ class GraphExecutor:
summary = result.to_summary(node_spec)
self.logger.info(f" 📝 Summary: {summary}")
# Log what was written to memory (detailed view)
# Log what was written to buffer (detailed view)
if result.output:
self.logger.info(" Written to memory:")
self.logger.info(" Written to data buffer:")
for key, value in result.output.items():
value_str = str(value)
if len(value_str) > 200:
value_str = value_str[:200] + "..."
self.logger.info(f" {key}: {value_str}")
# Write node outputs to memory BEFORE edge evaluation
# Write node outputs to buffer BEFORE edge evaluation
# This enables direct key access in conditional expressions (e.g., "score > 80")
# Without this, conditional edges can only use output['key'] syntax
if result.output:
for key, value in result.output.items():
memory.write(key, value, validate=False)
buffer.write(key, value, validate=False)
else:
self.logger.error(f" ✗ Failed: {result.error}")
@@ -1147,7 +1147,7 @@ class GraphExecutor:
current_node_id=current_node_id,
current_node_spec=node_spec,
result=result, # result.success=False triggers ON_FAILURE
memory=memory,
buffer=buffer,
)
if next_node:
@@ -1166,7 +1166,7 @@ class GraphExecutor:
)
self.runtime.end_run(
success=False,
output_data=memory.read_all(),
output_data=buffer.read_all(),
narrative=(
f"Failed at {node_spec.name} after "
f"{max_retries} retries: {result.error}"
@@ -1185,10 +1185,10 @@ class GraphExecutor:
execution_quality="failed",
)
# Save memory for potential resume
saved_memory = memory.read_all()
# Save buffer for potential resume
saved_buffer = buffer.read_all()
failure_session_state = {
"memory": saved_memory,
"data_buffer": saved_buffer,
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
"resume_from": current_node_id,
@@ -1200,7 +1200,7 @@ class GraphExecutor:
f"Node '{node_spec.name}' failed after "
f"{max_retries} attempts: {result.error}"
),
output=saved_memory,
output=saved_buffer,
steps_executed=steps,
total_tokens=total_tokens,
total_latency_ms=total_latency,
@@ -1228,11 +1228,11 @@ class GraphExecutor:
execution_id=self._execution_id,
)
saved_memory = memory.read_all()
saved_buffer = buffer.read_all()
session_state_out = {
"paused_at": node_spec.id,
"resume_from": f"{node_spec.id}_resume", # Resume key
"memory": saved_memory,
"data_buffer": saved_buffer,
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
"next_node": None, # Will resume from entry point
@@ -1240,7 +1240,7 @@ class GraphExecutor:
self.runtime.end_run(
success=True,
output_data=saved_memory,
output_data=saved_buffer,
narrative=f"Paused at {node_spec.name} after {steps} steps",
)
@@ -1259,7 +1259,7 @@ class GraphExecutor:
return ExecutionResult(
success=True,
output=saved_memory,
output=saved_buffer,
steps_executed=steps,
total_tokens=total_tokens,
total_latency_ms=total_latency,
@@ -1295,7 +1295,7 @@ class GraphExecutor:
)
current_node_id = result.next_node
self._write_progress(current_node_id, path, memory, node_visit_counts)
self._write_progress(current_node_id, path, buffer, node_visit_counts)
else:
# Get all traversable edges for fan-out detection
traversable_edges = await self._get_all_traversable_edges(
@@ -1304,7 +1304,7 @@ class GraphExecutor:
current_node_id=current_node_id,
current_node_spec=node_spec,
result=result,
memory=memory,
buffer=buffer,
)
if not traversable_edges:
@@ -1339,7 +1339,7 @@ class GraphExecutor:
graph=graph,
goal=goal,
edges=traversable_edges,
memory=memory,
buffer=buffer,
source_result=result,
source_node_spec=node_spec,
path=path,
@@ -1353,7 +1353,7 @@ class GraphExecutor:
if fan_in_node:
self.logger.info(f" ⑃ Fan-in: converging at {fan_in_node}")
current_node_id = fan_in_node
self._write_progress(current_node_id, path, memory, node_visit_counts)
self._write_progress(current_node_id, path, buffer, node_visit_counts)
else:
# No convergence point - branches are terminal
self.logger.info(" → Parallel branches completed (no convergence)")
@@ -1366,7 +1366,7 @@ class GraphExecutor:
current_node_id=current_node_id,
current_node_spec=node_spec,
result=result,
memory=memory,
buffer=buffer,
)
if next_node is None:
self.logger.info(" → No more edges, ending execution")
@@ -1393,7 +1393,7 @@ class GraphExecutor:
checkpoint_type="node_complete",
current_node=node_spec.id,
execution_path=list(path),
memory=memory,
buffer=buffer,
next_node=next_node,
is_clean=(sum(node_retry_counts.values()) == 0),
)
@@ -1418,7 +1418,7 @@ class GraphExecutor:
current_node_id = next_node
# Write progress snapshot at node transition
self._write_progress(current_node_id, path, memory, node_visit_counts)
self._write_progress(current_node_id, path, buffer, node_visit_counts)
# Continuous mode: thread conversation forward with transition marker
if is_continuous and result.conversation is not None:
@@ -1436,7 +1436,7 @@ class GraphExecutor:
)
# Build Layer 2 (narrative) from current state
narrative = build_narrative(memory, path, graph)
narrative = build_narrative(buffer, path, graph)
# Build per-node accounts prompt for the next node
_node_accounts = self.accounts_prompt or None
@@ -1469,7 +1469,7 @@ class GraphExecutor:
marker = build_transition_marker(
previous_node=node_spec,
next_node=next_spec,
memory=memory,
buffer=buffer,
cumulative_tool_names=sorted(cumulative_tool_names),
data_dir=data_dir,
)
@@ -1558,7 +1558,7 @@ class GraphExecutor:
input_data = result.output
# Collect output
output = memory.read_all()
output = buffer.read_all()
self.logger.info("\n✓ Execution complete!")
self.logger.info(f" Steps: {steps}")
@@ -1608,7 +1608,7 @@ class GraphExecutor:
execution_quality=exec_quality,
node_visit_counts=dict(node_visit_counts),
session_state={
"memory": output, # output IS memory.read_all()
"data_buffer": output, # output IS buffer.read_all()
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
},
@@ -1619,9 +1619,9 @@ class GraphExecutor:
self.logger.info("⏸ Execution cancelled - saving state for resume")
# Flush WIP accumulator outputs from the interrupted node's
# cursor.json into SharedMemory so they survive resume. The
# cursor.json into DataBuffer so they survive resume. The
# accumulator writes to cursor.json on every set() call, but
# only writes to SharedMemory when the judge ACCEPTs. Without
# only writes to DataBuffer when the judge ACCEPTs. Without
# this, edge conditions checking these keys see None on resume.
if current_node_id and self._storage_path:
try:
@@ -1633,10 +1633,10 @@ class GraphExecutor:
wip_outputs = cursor_data.get("outputs", {})
for key, value in wip_outputs.items():
if value is not None:
memory.write(key, value, validate=False)
buffer.write(key, value, validate=False)
if wip_outputs:
self.logger.info(
"Flushed %d WIP accumulator outputs to memory: %s",
"Flushed %d WIP accumulator outputs to buffer: %s",
len(wip_outputs),
list(wip_outputs.keys()),
)
@@ -1646,10 +1646,10 @@ class GraphExecutor:
exc_info=True,
)
# Save memory and state for resume
saved_memory = memory.read_all()
# Save buffer and state for resume
saved_buffer = buffer.read_all()
session_state_out: dict[str, Any] = {
"memory": saved_memory,
"data_buffer": saved_buffer,
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
}
@@ -1671,7 +1671,7 @@ class GraphExecutor:
return ExecutionResult(
success=False,
error="Execution cancelled",
output=saved_memory,
output=saved_buffer,
steps_executed=steps,
total_tokens=total_tokens,
total_latency_ms=total_latency,
@@ -1733,17 +1733,17 @@ class GraphExecutor:
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
for key, value in cursor_data.get("outputs", {}).items():
if value is not None:
memory.write(key, value, validate=False)
buffer.write(key, value, validate=False)
except Exception:
self.logger.debug(
"Could not flush accumulator outputs from cursor",
exc_info=True,
)
# Save memory and state for potential resume
saved_memory = memory.read_all()
# Save buffer and state for potential resume
saved_buffer = buffer.read_all()
session_state_out: dict[str, Any] = {
"memory": saved_memory,
"data_buffer": saved_buffer,
"execution_path": list(path),
"node_visit_counts": dict(node_visit_counts),
"resume_from": current_node_id,
@@ -1774,7 +1774,7 @@ class GraphExecutor:
return ExecutionResult(
success=False,
error=str(e),
output=saved_memory,
output=saved_buffer,
steps_executed=steps,
path=path,
total_retries=total_retries_count,
@@ -1795,7 +1795,7 @@ class GraphExecutor:
def _build_context(
self,
node_spec: NodeSpec,
memory: SharedMemory,
buffer: DataBuffer,
goal: Goal,
input_data: dict[str, Any],
max_tokens: int = 4096,
@@ -1819,7 +1819,7 @@ class GraphExecutor:
if node_spec.tools:
available_tools = [t for t in self.tools if t.name in node_spec.tools]
# Create scoped memory view.
# Create scoped buffer view.
# When permissions are restricted (non-empty key lists), auto-include
# _-prefixed keys used by default skill protocols so agents can read/write
# operational state (e.g. _working_notes, _batch_ledger) regardless of
@@ -1831,9 +1831,9 @@ class GraphExecutor:
# Empty means "allow all" — adding keys would accidentally
# activate the permission check and block legitimate reads/writes.
if read_keys or write_keys:
from framework.skills.defaults import SHARED_MEMORY_KEYS as _skill_keys
from framework.skills.defaults import DATA_BUFFER_KEYS as _skill_keys
existing_underscore = [k for k in memory._data if k.startswith("_")]
existing_underscore = [k for k in buffer._data if k.startswith("_")]
extra_keys = set(_skill_keys) | set(existing_underscore)
# Only inject into read_keys when it was already non-empty — an empty
# read_keys means "allow all reads" and injecting skill keys would
@@ -1844,7 +1844,7 @@ class GraphExecutor:
if write_keys and k not in write_keys:
write_keys.append(k)
scoped_memory = memory.with_permissions(
scoped_buffer = buffer.with_permissions(
read_keys=read_keys,
write_keys=write_keys,
)
@@ -1866,7 +1866,7 @@ class GraphExecutor:
runtime=self.runtime,
node_id=node_spec.id,
node_spec=node_spec,
memory=scoped_memory,
buffer=scoped_buffer,
input_data=input_data,
llm=self.llm,
available_tools=available_tools,
@@ -1990,7 +1990,7 @@ class GraphExecutor:
current_node_id: str,
current_node_spec: Any,
result: NodeResult,
memory: SharedMemory,
buffer: DataBuffer,
) -> str | None:
"""Determine the next node by following edges."""
edges = graph.get_outgoing_edges(current_node_id)
@@ -2001,16 +2001,16 @@ class GraphExecutor:
if await edge.should_traverse(
source_success=result.success,
source_output=result.output,
memory=memory.read_all(),
buffer_data=buffer.read_all(),
llm=self.llm,
goal=goal,
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
target_node_name=target_node_spec.name if target_node_spec else edge.target,
):
# Map inputs (skip validation for processed LLM output)
mapped = edge.map_inputs(result.output, memory.read_all())
mapped = edge.map_inputs(result.output, buffer.read_all())
for key, value in mapped.items():
memory.write(key, value, validate=False)
buffer.write(key, value, validate=False)
return edge.target
@@ -2023,7 +2023,7 @@ class GraphExecutor:
current_node_id: str,
current_node_spec: Any,
result: NodeResult,
memory: SharedMemory,
buffer: DataBuffer,
) -> list[EdgeSpec]:
"""
Get ALL edges that should be traversed (for fan-out detection).
@@ -2039,7 +2039,7 @@ class GraphExecutor:
if await edge.should_traverse(
source_success=result.success,
source_output=result.output,
memory=memory.read_all(),
buffer_data=buffer.read_all(),
llm=self.llm,
goal=goal,
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
@@ -2103,7 +2103,7 @@ class GraphExecutor:
graph: GraphSpec,
goal: Goal,
edges: list[EdgeSpec],
memory: SharedMemory,
buffer: DataBuffer,
source_result: NodeResult,
source_node_spec: Any,
path: list[str],
@@ -2116,7 +2116,7 @@ class GraphExecutor:
graph: The graph specification
goal: The execution goal
edges: List of edges to follow in parallel
memory: Shared memory instance
buffer: DataBuffer instance
source_result: Result from the source node
source_node_spec: Spec of the source node
path: Execution path list to update
@@ -2135,7 +2135,7 @@ class GraphExecutor:
edge=edge,
)
# Track which branch wrote which key for memory conflict detection
# Track which branch wrote which key for buffer conflict detection
fanout_written_keys: dict[str, str] = {} # key -> branch_id that wrote it
fanout_keys_lock = asyncio.Lock()
@@ -2173,9 +2173,9 @@ class GraphExecutor:
try:
# Map inputs via edge
mapped = branch.edge.map_inputs(source_result.output, memory.read_all())
mapped = branch.edge.map_inputs(source_result.output, buffer.read_all())
for key, value in mapped.items():
await memory.write_async(key, value)
await buffer.write_async(key, value)
# Execute with retries
last_result = None
@@ -2185,7 +2185,7 @@ class GraphExecutor:
# Build context for this branch
ctx = self._build_context(
node_spec,
memory,
buffer,
goal,
mapped,
graph.max_tokens,
@@ -2230,15 +2230,15 @@ class GraphExecutor:
)
if result.success:
# Write outputs to shared memory with conflict detection
conflict_strategy = self._parallel_config.memory_conflict_strategy
# Write outputs to shared buffer with conflict detection
conflict_strategy = self._parallel_config.buffer_conflict_strategy
for key, value in result.output.items():
async with fanout_keys_lock:
prior_branch = fanout_written_keys.get(key)
if prior_branch and prior_branch != branch.branch_id:
if conflict_strategy == "error":
raise RuntimeError(
f"Memory conflict: key '{key}' already written "
f"Buffer conflict: key '{key}' already written "
f"by branch '{prior_branch}', "
f"conflicting write from '{branch.branch_id}'"
)
@@ -2255,7 +2255,7 @@ class GraphExecutor:
f"(last_wins: {prior_branch} -> {branch.branch_id})"
)
fanout_written_keys[key] = branch.branch_id
await memory.write_async(key, value)
await buffer.write_async(key, value)
branch.result = result
branch.status = "completed"
@@ -2378,7 +2378,7 @@ class GraphExecutor:
checkpoint_type: str,
current_node: str,
execution_path: list[str],
memory: SharedMemory,
buffer: DataBuffer,
next_node: str | None = None,
is_clean: bool = True,
) -> Checkpoint:
@@ -2389,7 +2389,7 @@ class GraphExecutor:
checkpoint_type: Type of checkpoint (node_start, node_complete)
current_node: Current node ID
execution_path: Nodes executed so far
memory: SharedMemory instance
buffer: DataBuffer instance
next_node: Next node to execute (for node_complete checkpoints)
is_clean: Whether execution was clean up to this point
@@ -2402,7 +2402,7 @@ class GraphExecutor:
session_id=self._storage_path.name if self._storage_path else "unknown",
current_node=current_node,
execution_path=execution_path,
shared_memory=memory.read_all(),
data_buffer=buffer.read_all(),
next_node=next_node,
is_clean=is_clean,
)
+5 -5
View File
@@ -401,13 +401,13 @@ class DataBuffer:
self,
read_keys: list[str],
write_keys: list[str],
) -> "SharedMemory":
) -> "DataBuffer":
"""Create a view with restricted permissions for a specific node.
The scoped view shares the same underlying data and locks,
enabling thread-safe parallel execution across scoped views.
"""
return SharedMemory(
return DataBuffer(
_data=self._data,
_allowed_read=set(read_keys) if read_keys else set(),
_allowed_write=set(write_keys) if write_keys else set(),
@@ -423,7 +423,7 @@ class NodeContext:
This is passed to every node and provides:
- Access to the runtime (for decision logging)
- Access to shared memory (for state)
- Access to the data buffer (for state)
- Access to LLM (for generation)
- Access to tools (for actions)
- The goal context (for guidance)
@@ -437,7 +437,7 @@ class NodeContext:
node_spec: NodeSpec
# State
memory: SharedMemory
buffer: DataBuffer
input_data: dict[str, Any] = field(default_factory=dict)
# LLM access (if applicable)
@@ -630,6 +630,6 @@ class NodeProtocol(ABC):
"""
errors = []
for key in ctx.node_spec.input_keys:
if key not in ctx.input_data and ctx.memory.read(key) is None:
if key not in ctx.input_data and ctx.buffer.read(key) is None:
errors.append(f"Missing required input: {key}")
return errors
+14 -14
View File
@@ -22,7 +22,7 @@ from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.node import NodeSpec, SharedMemory
from framework.graph.node import NodeSpec, DataBuffer
logger = logging.getLogger(__name__)
@@ -211,17 +211,17 @@ def compose_system_prompt(
def build_narrative(
memory: SharedMemory,
buffer: DataBuffer,
execution_path: list[str],
graph: GraphSpec,
) -> str:
"""Build Layer 2 (narrative) from structured state.
Deterministic no LLM call. Reads SharedMemory and execution path
Deterministic no LLM call. Reads data buffer and execution path
to describe what has happened so far. Cheap and fast.
Args:
memory: Current shared memory state.
buffer: Current data buffer state.
execution_path: List of node IDs visited so far.
graph: Graph spec (for node names/descriptions).
@@ -241,11 +241,11 @@ def build_narrative(
phase_descriptions.append(f"- {node_id}")
parts.append("Phases completed:\n" + "\n".join(phase_descriptions))
# Describe key memory values (skip very long values)
all_memory = memory.read_all()
if all_memory:
# Describe key buffer values (skip very long values)
all_buffer = buffer.read_all()
if all_buffer:
memory_lines: list[str] = []
for key, value in all_memory.items():
for key, value in all_buffer.items():
if value is None:
continue
val_str = str(value)
@@ -261,7 +261,7 @@ def build_narrative(
def build_transition_marker(
previous_node: NodeSpec,
next_node: NodeSpec,
memory: SharedMemory,
buffer: DataBuffer,
cumulative_tool_names: list[str],
data_dir: Path | str | None = None,
) -> str:
@@ -274,7 +274,7 @@ def build_transition_marker(
Args:
previous_node: NodeSpec of the phase just completed.
next_node: NodeSpec of the phase about to start.
memory: Current shared memory state.
buffer: Current data buffer state.
cumulative_tool_names: All tools available (cumulative set).
data_dir: Path to spillover data directory.
@@ -290,13 +290,13 @@ def build_transition_marker(
sections.append(f"\nCompleted: {previous_node.name}")
sections.append(f" {previous_node.description}")
# Outputs in memory — use file references for large values so the
# Outputs in buffer — use file references for large values so the
# next node loads full data from disk instead of seeing truncated
# inline previews that look deceptively complete.
all_memory = memory.read_all()
if all_memory:
all_buffer = buffer.read_all()
if all_buffer:
memory_lines: list[str] = []
for key, value in all_memory.items():
for key, value in all_buffer.items():
if value is None:
continue
val_str = str(value)
+2 -2
View File
@@ -278,7 +278,7 @@ def _load_resume_state(
return None
return {
"resume_session_id": session_id,
"memory": cp_data.get("shared_memory", {}),
"data_buffer": cp_data.get("data_buffer", cp_data.get("shared_memory", {})),
"paused_at": cp_data.get("next_node") or cp_data.get("current_node"),
"execution_path": cp_data.get("execution_path", []),
"node_visit_counts": {},
@@ -296,7 +296,7 @@ def _load_resume_state(
paused_at = progress.get("paused_at") or progress.get("resume_from")
return {
"resume_session_id": session_id,
"memory": state_data.get("memory", {}),
"data_buffer": state_data.get("data_buffer", state_data.get("memory", {})),
"paused_at": paused_at,
"execution_path": progress.get("path", []),
"node_visit_counts": progress.get("node_visit_counts", {}),
+2 -2
View File
@@ -33,7 +33,7 @@ Single-entry agents get a `"default"` entry point automatically. There is no sep
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
@@ -108,7 +108,7 @@ runtime.unsubscribe_from_events(sub_id)
# Inspection
runtime.is_running # bool
runtime.event_bus # EventBus
runtime.state_manager # SharedStateManager
runtime.state_manager # SharedBufferManager
runtime.get_stats() # Runtime statistics
```
+5 -5
View File
@@ -22,7 +22,7 @@ from framework.runtime.event_bus import EventBus
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.runtime.runtime_log_store import RuntimeLogStore
from framework.runtime.shared_state import SharedStateManager
from framework.runtime.shared_state import SharedBufferManager
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
@@ -229,7 +229,7 @@ class AgentRuntime:
self._session_store = SessionStore(storage_path_obj)
# Initialize shared components
self._state_manager = SharedStateManager()
self._state_manager = SharedBufferManager()
self._event_bus = event_bus or EventBus(max_history=self._config.max_history)
self._outcome_aggregator = OutcomeAggregator(goal, self._event_bus)
@@ -1505,7 +1505,7 @@ class AgentRuntime:
try:
if state_path.exists():
data = _json.loads(state_path.read_text(encoding="utf-8"))
full_memory = data.get("memory", {})
full_memory = data.get("data_buffer", data.get("memory", {}))
if not full_memory:
continue
# Filter to only input keys so stale outputs
@@ -1517,7 +1517,7 @@ class AgentRuntime:
if memory:
return {
"resume_session_id": exec_id,
"memory": memory,
"data_buffer": memory,
}
except Exception:
logger.debug(
@@ -1781,7 +1781,7 @@ class AgentRuntime:
# === PROPERTIES ===
@property
def state_manager(self) -> SharedStateManager:
def state_manager(self) -> SharedBufferManager:
"""Access the shared state manager."""
return self._state_manager
+4 -4
View File
@@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, Any
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.executor import ExecutionResult, GraphExecutor
from framework.runtime.event_bus import EventBus
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
if TYPE_CHECKING:
@@ -170,7 +170,7 @@ class ExecutionStream:
entry_spec: EntryPointSpec,
graph: "GraphSpec",
goal: "Goal",
state_manager: SharedStateManager,
state_manager: SharedBufferManager,
storage: "ConcurrentStorage",
outcome_aggregator: "OutcomeAggregator",
event_bus: "EventBus | None" = None,
@@ -639,7 +639,7 @@ class ExecutionStream:
self._write_run_event(execution_id, ctx.run_id, "run_started")
# Create execution-scoped memory
self._state_manager.create_memory(
self._state_manager.create_buffer(
execution_id=execution_id,
stream_id=self.stream_id,
isolation=ctx.isolation_level,
@@ -1074,7 +1074,7 @@ class ExecutionStream:
updated_at=now,
),
progress=progress,
memory=ss.get("memory", {}),
memory=ss.get("data_buffer", ss.get("memory", {})),
input_data=ctx.input_data,
)
+20 -20
View File
@@ -46,7 +46,7 @@ class StateChange:
timestamp: float = field(default_factory=time.time)
class SharedStateManager:
class SharedBufferManager:
"""
Manages shared state across concurrent executions.
@@ -61,18 +61,18 @@ class SharedStateManager:
- SYNCHRONIZED: Like SHARED but with write locks
Example:
manager = SharedStateManager()
manager = SharedBufferManager()
# Create memory for an execution
memory = manager.create_memory(
# Create buffer for an execution
buf = manager.create_buffer(
execution_id="exec_123",
stream_id="webhook",
isolation=IsolationLevel.SHARED,
)
# Read/write through the memory
await memory.write("customer_id", "cust_456", scope=StateScope.STREAM)
value = await memory.read("customer_id")
# Read/write through the buffer
await buf.write("customer_id", "cust_456", scope=StateScope.STREAM)
value = await buf.read("customer_id")
"""
def __init__(self):
@@ -93,14 +93,14 @@ class SharedStateManager:
# Version tracking
self._version = 0
def create_memory(
def create_buffer(
self,
execution_id: str,
stream_id: str,
isolation: IsolationLevel,
) -> "StreamMemory":
) -> "StreamBuffer":
"""
Create a memory instance for an execution.
Create a buffer instance for an execution.
Args:
execution_id: Unique execution identifier
@@ -108,7 +108,7 @@ class SharedStateManager:
isolation: Isolation level for this execution
Returns:
StreamMemory instance for reading/writing state
StreamBuffer instance for reading/writing state
"""
# Initialize execution state
if execution_id not in self._execution_state:
@@ -119,7 +119,7 @@ class SharedStateManager:
self._stream_state[stream_id] = {}
self._stream_locks[stream_id] = asyncio.Lock()
return StreamMemory(
return StreamBuffer(
manager=self,
execution_id=execution_id,
stream_id=stream_id,
@@ -343,17 +343,17 @@ class SharedStateManager:
return self._change_history[-limit:]
class StreamMemory:
class StreamBuffer:
"""
Memory interface for a single execution.
Buffer interface for a single execution.
Provides scoped access to shared state with proper isolation.
Compatible with the existing SharedMemory interface where possible.
Compatible with the existing DataBuffer interface where possible.
"""
def __init__(
self,
manager: SharedStateManager,
manager: SharedBufferManager,
execution_id: str,
stream_id: str,
isolation: IsolationLevel,
@@ -371,13 +371,13 @@ class StreamMemory:
self,
read_keys: list[str],
write_keys: list[str],
) -> "StreamMemory":
) -> "StreamBuffer":
"""
Create a scoped view with read/write permissions.
Compatible with existing SharedMemory.with_permissions().
Compatible with existing DataBuffer.with_permissions().
"""
scoped = StreamMemory(
scoped = StreamBuffer(
manager=self._manager,
execution_id=self._execution_id,
stream_id=self._stream_id,
@@ -434,7 +434,7 @@ class StreamMemory:
return all_state
# === SYNC API (for backward compatibility with SharedMemory) ===
# === SYNC API (for backward compatibility with DataBuffer) ===
def read_sync(self, key: str) -> Any:
"""
@@ -5,7 +5,7 @@ Tests:
1. AgentRuntime creation and lifecycle
2. Entry point registration
3. Concurrent executions across streams
4. SharedStateManager isolation levels
4. SharedBufferManager isolation levels
5. OutcomeAggregator goal evaluation
6. EventBus pub/sub
"""
@@ -24,7 +24,7 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.runtime.execution_stream import EntryPointSpec
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
# === Test Fixtures ===
@@ -121,45 +121,45 @@ def temp_storage():
yield Path(tmpdir)
# === SharedStateManager Tests ===
# === SharedBufferManager Tests ===
class TestSharedStateManager:
"""Tests for SharedStateManager."""
class TestSharedBufferManager:
"""Tests for SharedBufferManager."""
def test_create_memory(self):
"""Test creating execution-scoped memory."""
manager = SharedStateManager()
memory = manager.create_memory(
def test_create_buffer(self):
"""Test creating execution-scoped buffer."""
manager = SharedBufferManager()
buffer = manager.create_buffer(
execution_id="exec-1",
stream_id="webhook",
isolation=IsolationLevel.SHARED,
)
assert memory is not None
assert memory._execution_id == "exec-1"
assert memory._stream_id == "webhook"
assert buffer is not None
assert buffer._execution_id == "exec-1"
assert buffer._stream_id == "webhook"
@pytest.mark.asyncio
async def test_isolated_state(self):
"""Test isolated state doesn't leak between executions."""
manager = SharedStateManager()
manager = SharedBufferManager()
mem1 = manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
mem2 = manager.create_memory("exec-2", "stream-1", IsolationLevel.ISOLATED)
buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
await mem1.write("key", "value1")
await mem2.write("key", "value2")
await buf1.write("key", "value1")
await buf2.write("key", "value2")
assert await mem1.read("key") == "value1"
assert await mem2.read("key") == "value2"
assert await buf1.read("key") == "value1"
assert await buf2.read("key") == "value2"
@pytest.mark.asyncio
async def test_shared_state(self):
"""Test shared state is visible across executions."""
manager = SharedStateManager()
manager = SharedBufferManager()
manager.create_memory("exec-1", "stream-1", IsolationLevel.SHARED)
manager.create_memory("exec-2", "stream-1", IsolationLevel.SHARED)
manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
# Write to global scope
await manager.write(
@@ -180,8 +180,8 @@ class TestSharedStateManager:
def test_cleanup_execution(self):
"""Test execution cleanup removes state."""
manager = SharedStateManager()
manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
manager = SharedBufferManager()
manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
assert "exec-1" in manager._execution_state
+4 -4
View File
@@ -33,7 +33,7 @@ class Checkpoint(BaseModel):
execution_path: list[str] = Field(default_factory=list) # Nodes executed so far
# State snapshots
shared_memory: dict[str, Any] = Field(default_factory=dict) # Full SharedMemory._data
data_buffer: dict[str, Any] = Field(default_factory=dict) # Full DataBuffer._data
accumulated_outputs: dict[str, Any] = Field(default_factory=dict) # Outputs accumulated so far
# Execution metrics (for resuming quality tracking)
@@ -52,7 +52,7 @@ class Checkpoint(BaseModel):
session_id: str,
current_node: str,
execution_path: list[str],
shared_memory: dict[str, Any],
data_buffer: dict[str, Any],
next_node: str | None = None,
accumulated_outputs: dict[str, Any] | None = None,
metrics_snapshot: dict[str, Any] | None = None,
@@ -67,7 +67,7 @@ class Checkpoint(BaseModel):
session_id: Session this checkpoint belongs to
current_node: Node ID at checkpoint time
execution_path: List of node IDs executed so far
shared_memory: Full memory state snapshot
data_buffer: Full data buffer state snapshot
next_node: Next node to execute (for node_complete checkpoints)
accumulated_outputs: Outputs accumulated so far
metrics_snapshot: Execution metrics at checkpoint time
@@ -91,7 +91,7 @@ class Checkpoint(BaseModel):
current_node=current_node,
next_node=next_node,
execution_path=execution_path,
shared_memory=shared_memory,
data_buffer=data_buffer,
accumulated_outputs=accumulated_outputs or {},
metrics_snapshot=metrics_snapshot or {},
is_clean=is_clean,
+2 -2
View File
@@ -243,7 +243,7 @@ class SessionState(BaseModel):
error=result.error,
output=result.output,
),
memory=result.session_state.get("memory", {}) if result.session_state else {},
memory=result.session_state.get("data_buffer", result.session_state.get("memory", {})) if result.session_state else {},
input_data=input_data or {},
)
@@ -303,7 +303,7 @@ class SessionState(BaseModel):
return {
"paused_at": resume_from,
"resume_from": resume_from,
"memory": self.memory,
"data_buffer": self.memory,
"execution_path": self.progress.path,
"node_visit_counts": self.progress.node_visit_counts,
}
+1 -1
View File
@@ -270,7 +270,7 @@ async def handle_resume(request: web.Request) -> web.Response:
paused_at = progress.get("paused_at") or progress.get("resume_from")
resume_session_state = {
"resume_session_id": worker_session_id,
"memory": state.get("memory", {}),
"data_buffer": state.get("data_buffer", state.get("memory", {})),
"execution_path": progress.get("path", []),
"node_visit_counts": progress.get("node_visit_counts", {}),
}
+1 -1
View File
@@ -225,7 +225,7 @@ def _write_sample_session(base: Path, session_id: str):
"started_at": "2026-02-20T12:00:00",
"completed_at": None,
"input_data": {"user_request": "test input"},
"memory": {"key1": "value1"},
"data_buffer": {"key1": "value1"},
"progress": {
"current_node": "node_b",
"paused_at": "node_b",
+1 -1
View File
@@ -80,7 +80,7 @@ SKILL_REGISTRY: dict[str, str] = {
}
# All shared memory keys used by default skills (for permission auto-inclusion)
SHARED_MEMORY_KEYS: list[str] = [
DATA_BUFFER_KEYS: list[str] = [
# note-taking
"_working_notes",
"_notes_updated_at",
@@ -2659,11 +2659,11 @@ def register_queen_lifecycle_tools(
return "No active execution found."
exec_id = exec_ids[0]
memory = runtime.state_manager.create_memory(exec_id, stream_id, IsolationLevel.SHARED)
state = await memory.read_all()
buf = runtime.state_manager.create_buffer(exec_id, stream_id, IsolationLevel.SHARED)
state = await buf.read_all()
if not state:
lines.append("Worker's shared memory is empty.")
lines.append("Worker's shared buffer is empty.")
else:
lines.append(f"Worker's shared memory ({len(state)} keys):")
for key, value in state.items():
+3 -3
View File
@@ -20,7 +20,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
from framework.config import RuntimeConfig # noqa: E402
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, SharedMemory # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
from framework.llm.litellm import LiteLLMProvider # noqa: E402
@@ -68,13 +68,13 @@ def make_context(
runtime.record_outcome = MagicMock()
runtime.end_run = MagicMock()
memory = SharedMemory()
buffer = DataBuffer()
return NodeContext(
runtime=runtime,
node_id=node_id,
node_spec=spec,
memory=memory,
buffer=buffer,
input_data={},
llm=llm,
available_tools=[],
+3 -3
View File
@@ -16,7 +16,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
from framework.config import RuntimeConfig # noqa: E402
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, SharedMemory # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
from framework.llm.litellm import LiteLLMProvider # noqa: E402
@@ -61,13 +61,13 @@ def make_context(
runtime.record_outcome = MagicMock()
runtime.end_run = MagicMock()
memory = SharedMemory()
buffer = DataBuffer()
return NodeContext(
runtime=runtime,
node_id=node_id,
node_spec=spec,
memory=memory,
buffer=buffer,
input_data={},
llm=llm,
available_tools=[],
+9 -9
View File
@@ -21,7 +21,7 @@ from framework.graph.conversation import NodeConversation
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.executor import GraphExecutor
from framework.graph.goal import Goal
from framework.graph.node import NodeResult, NodeSpec, SharedMemory
from framework.graph.node import NodeResult, NodeSpec, DataBuffer
from framework.graph.prompt_composer import (
build_narrative,
build_transition_marker,
@@ -160,8 +160,8 @@ class TestComposeSystemPrompt:
class TestBuildNarrative:
def test_with_execution_path(self):
memory = SharedMemory()
memory.write("findings", "some findings")
buffer = DataBuffer()
buffer.write("findings", "some findings")
node_a = NodeSpec(
id="a", name="Research", description="Research the topic", node_type="event_loop"
@@ -175,14 +175,14 @@ class TestBuildNarrative:
edges=[],
)
result = build_narrative(memory, ["a"], graph)
result = build_narrative(buffer, ["a"], graph)
assert "Research" in result
assert "findings" in result
def test_empty_state(self):
memory = SharedMemory()
buffer = DataBuffer()
graph = GraphSpec(id="g1", goal_id="g1", entry_node="a", nodes=[], edges=[])
result = build_narrative(memory, [], graph)
result = build_narrative(buffer, [], graph)
assert result == ""
@@ -194,13 +194,13 @@ class TestBuildTransitionMarker:
next_n = NodeSpec(
id="report", name="Report", description="Write report", node_type="event_loop"
)
memory = SharedMemory()
memory.write("findings", "important stuff")
buffer = DataBuffer()
buffer.write("findings", "important stuff")
marker = build_transition_marker(
previous_node=prev,
next_node=next_n,
memory=memory,
buffer=buffer,
cumulative_tool_names=["web_search", "save_data"],
)
+5 -5
View File
@@ -6,7 +6,7 @@ import pytest
from framework.skills.config import DefaultSkillConfig, SkillsConfig
from framework.skills.defaults import (
SHARED_MEMORY_KEYS,
DATA_BUFFER_KEYS,
SKILL_REGISTRY,
DefaultSkillManager,
is_batch_scenario,
@@ -51,10 +51,10 @@ class TestDefaultSkillFiles:
f"({total_chars} chars), exceeding the 2000 token budget"
)
def test_shared_memory_keys_all_prefixed(self):
"""All shared memory keys must start with underscore."""
for key in SHARED_MEMORY_KEYS:
assert key.startswith("_"), f"Shared memory key missing _ prefix: {key}"
def test_data_buffer_keys_all_prefixed(self):
"""All data buffer keys must start with underscore."""
for key in DATA_BUFFER_KEYS:
assert key.startswith("_"), f"Data buffer key missing _ prefix: {key}"
class TestDefaultSkillManager:
+6 -6
View File
@@ -29,7 +29,7 @@ from framework.graph.node import (
NodeProtocol,
NodeResult,
NodeSpec,
SharedMemory,
DataBuffer,
)
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
@@ -266,13 +266,13 @@ def make_ctx(
client_facing=client_facing,
)
memory = SharedMemory()
buffer = DataBuffer()
return NodeContext(
runtime=runtime,
node_id=node_id,
node_spec=spec,
memory=memory,
buffer=buffer,
input_data=input_data or {},
llm=llm,
available_tools=available_tools or [],
@@ -1001,14 +1001,14 @@ async def test_mixed_node_graph(runtime):
class LoadLeadsNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
leads = ["lead_A", "lead_B", "lead_C"]
ctx.memory.write("leads", leads)
ctx.buffer.write("leads", leads)
return NodeResult(success=True, output={"leads": leads})
class FormatOutputNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
summary = ctx.input_data.get("summary", ctx.memory.read("summary") or "no summary")
summary = ctx.input_data.get("summary", ctx.buffer.read("summary") or "no summary")
report = f"Report: {summary}"
ctx.memory.write("report", report)
ctx.buffer.write("report", report)
return NodeResult(success=True, output={"report": report})
# Event loop: process leads, produce summary
+127 -127
View File
@@ -21,7 +21,7 @@ from framework.graph.event_loop_node import (
LoopConfig,
OutputAccumulator,
)
from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, SharedMemory
from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, DataBuffer
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
FinishEvent,
@@ -134,14 +134,14 @@ def node_spec():
@pytest.fixture
def memory():
return SharedMemory()
def buffer():
return DataBuffer()
def build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=None,
input_data=None,
@@ -153,7 +153,7 @@ def build_ctx(
runtime=runtime,
node_id=node_spec.id,
node_spec=node_spec,
memory=memory,
buffer=buffer,
input_data=input_data or {},
llm=llm,
available_tools=tools or [],
@@ -189,12 +189,12 @@ class TestNodeProtocolConformance:
class TestBasicLoop:
@pytest.mark.asyncio
async def test_basic_text_only_implicit_accept(self, runtime, node_spec, memory):
async def test_basic_text_only_implicit_accept(self, runtime, node_spec, buffer):
"""No tools, no judge. LLM produces text, implicit accept on stop."""
# Override to no output_keys so implicit judge accepts immediately
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("Hello world")])
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -203,9 +203,9 @@ class TestBasicLoop:
assert result.tokens_used > 0
@pytest.mark.asyncio
async def test_no_llm_returns_failure(self, runtime, node_spec, memory):
async def test_no_llm_returns_failure(self, runtime, node_spec, buffer):
"""ctx.llm=None should return failure immediately."""
ctx = build_ctx(runtime, node_spec, memory, llm=None)
ctx = build_ctx(runtime, node_spec, buffer, llm=None)
node = EventLoopNode()
result = await node.execute(ctx)
@@ -214,12 +214,12 @@ class TestBasicLoop:
assert "LLM" in result.error
@pytest.mark.asyncio
async def test_max_iterations_failure(self, runtime, node_spec, memory):
async def test_max_iterations_failure(self, runtime, node_spec, buffer):
"""When max_iterations is reached without acceptance, should fail."""
# LLM always produces text but never calls set_output, so implicit
# judge retries asking for missing keys
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(config=LoopConfig(max_iterations=2))
result = await node.execute(ctx)
@@ -235,7 +235,7 @@ class TestBasicLoop:
class TestJudgeIntegration:
@pytest.mark.asyncio
async def test_judge_accept(self, runtime, node_spec, memory):
async def test_judge_accept(self, runtime, node_spec, buffer):
"""Mock judge ACCEPT -> success."""
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("Done!")])
@@ -243,7 +243,7 @@ class TestJudgeIntegration:
judge = AsyncMock(spec=JudgeProtocol)
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -251,7 +251,7 @@ class TestJudgeIntegration:
judge.evaluate.assert_called_once()
@pytest.mark.asyncio
async def test_judge_escalate(self, runtime, node_spec, memory):
async def test_judge_escalate(self, runtime, node_spec, buffer):
"""Mock judge ESCALATE -> failure."""
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("Attempt")])
@@ -261,7 +261,7 @@ class TestJudgeIntegration:
return_value=JudgeVerdict(action="ESCALATE", feedback="Tone violation")
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -270,7 +270,7 @@ class TestJudgeIntegration:
assert "Tone violation" in result.error
@pytest.mark.asyncio
async def test_judge_retry_then_accept(self, runtime, node_spec, memory):
async def test_judge_retry_then_accept(self, runtime, node_spec, buffer):
"""RETRY twice, then ACCEPT. Should run 3 iterations."""
node_spec.output_keys = []
llm = MockStreamingLLM(
@@ -293,7 +293,7 @@ class TestJudgeIntegration:
judge = AsyncMock(spec=JudgeProtocol)
judge.evaluate = AsyncMock(side_effect=evaluate_fn)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=10))
result = await node.execute(ctx)
@@ -308,7 +308,7 @@ class TestJudgeIntegration:
class TestSetOutput:
@pytest.mark.asyncio
async def test_set_output_accumulates(self, runtime, node_spec, memory):
async def test_set_output_accumulates(self, runtime, node_spec, buffer):
"""LLM calls set_output -> values appear in NodeResult.output."""
llm = MockStreamingLLM(
scenarios=[
@@ -319,7 +319,7 @@ class TestSetOutput:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -327,7 +327,7 @@ class TestSetOutput:
assert result.output["result"] == 42
@pytest.mark.asyncio
async def test_set_output_rejects_invalid_key(self, runtime, node_spec, memory):
async def test_set_output_rejects_invalid_key(self, runtime, node_spec, buffer):
"""set_output with key not in output_keys -> is_error=True."""
llm = MockStreamingLLM(
scenarios=[
@@ -340,7 +340,7 @@ class TestSetOutput:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -349,7 +349,7 @@ class TestSetOutput:
assert "bad_key" not in result.output
@pytest.mark.asyncio
async def test_missing_keys_triggers_retry(self, runtime, node_spec, memory):
async def test_missing_keys_triggers_retry(self, runtime, node_spec, buffer):
"""Judge accepts but output keys are missing -> retry with hint."""
judge = AsyncMock(spec=JudgeProtocol)
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
@@ -365,7 +365,7 @@ class TestSetOutput:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -380,7 +380,7 @@ class TestSetOutput:
class TestStallDetection:
@pytest.mark.asyncio
async def test_stall_detection(self, runtime, node_spec, memory):
async def test_stall_detection(self, runtime, node_spec, buffer):
"""3 identical responses should trigger stall detection."""
node_spec.output_keys = [] # so implicit judge would accept
# But we need the judge to RETRY so we actually get 3 identical responses
@@ -389,7 +389,7 @@ class TestStallDetection:
llm = MockStreamingLLM(scenarios=[text_scenario("same answer")])
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
judge=judge,
config=LoopConfig(max_iterations=10, stall_detection_threshold=3),
@@ -407,7 +407,7 @@ class TestStallDetection:
class TestEventBusLifecycle:
@pytest.mark.asyncio
async def test_lifecycle_events_published(self, runtime, node_spec, memory):
async def test_lifecycle_events_published(self, runtime, node_spec, buffer):
"""NODE_LOOP_STARTED, NODE_LOOP_ITERATION, NODE_LOOP_COMPLETED should be published."""
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("ok")])
@@ -423,7 +423,7 @@ class TestEventBusLifecycle:
handler=lambda e: received_events.append(e.type),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
result = await node.execute(ctx)
@@ -434,7 +434,7 @@ class TestEventBusLifecycle:
@pytest.mark.asyncio
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
async def test_client_facing_uses_client_output_delta(self, runtime, memory):
async def test_client_facing_uses_client_output_delta(self, runtime, buffer):
"""client_facing=True should emit CLIENT_OUTPUT_DELTA instead of LLM_TEXT_DELTA."""
spec = NodeSpec(
id="ui_node",
@@ -453,7 +453,7 @@ class TestEventBusLifecycle:
handler=lambda e: received_types.append(e.type),
)
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
# Text-only on client_facing no longer blocks (no ask_user), so
@@ -485,7 +485,7 @@ class TestClientFacingBlocking:
@pytest.mark.asyncio
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
async def test_text_only_no_blocking(self, runtime, memory, client_spec):
async def test_text_only_no_blocking(self, runtime, buffer, client_spec):
"""client_facing + text-only (no ask_user) should NOT block."""
llm = MockStreamingLLM(
scenarios=[
@@ -494,7 +494,7 @@ class TestClientFacingBlocking:
)
bus = EventBus()
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
ctx = build_ctx(runtime, client_spec, memory, llm)
ctx = build_ctx(runtime, client_spec, buffer, llm)
# Should complete without blocking — no ask_user called, no output_keys required
result = await node.execute(ctx)
@@ -503,7 +503,7 @@ class TestClientFacingBlocking:
assert llm._call_index >= 1
@pytest.mark.asyncio
async def test_ask_user_triggers_blocking(self, runtime, memory, client_spec):
async def test_ask_user_triggers_blocking(self, runtime, buffer, client_spec):
"""client_facing + ask_user() blocks until inject_event."""
# Give the node an output key so the judge doesn't auto-accept
# after the user responds — it needs set_output first.
@@ -522,7 +522,7 @@ class TestClientFacingBlocking:
)
bus = EventBus()
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
ctx = build_ctx(runtime, client_spec, memory, llm)
ctx = build_ctx(runtime, client_spec, buffer, llm)
async def user_responds():
await asyncio.sleep(0.05)
@@ -538,7 +538,7 @@ class TestClientFacingBlocking:
assert result.output["answer"] == "help provided"
@pytest.mark.asyncio
async def test_client_facing_does_not_block_on_tools(self, runtime, memory):
async def test_client_facing_does_not_block_on_tools(self, runtime, buffer):
"""client_facing + tool calls (no ask_user) should NOT block."""
spec = NodeSpec(
id="chat",
@@ -558,7 +558,7 @@ class TestClientFacingBlocking:
]
)
node = EventLoopNode(config=LoopConfig(max_iterations=5))
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
# Should complete without blocking — no ask_user called
result = await node.execute(ctx)
@@ -567,7 +567,7 @@ class TestClientFacingBlocking:
assert result.output["result"] == "done"
@pytest.mark.asyncio
async def test_non_client_facing_unchanged(self, runtime, memory):
async def test_non_client_facing_unchanged(self, runtime, buffer):
"""client_facing=False should not block — existing behavior."""
spec = NodeSpec(
id="internal",
@@ -578,14 +578,14 @@ class TestClientFacingBlocking:
)
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
node = EventLoopNode(config=LoopConfig(max_iterations=2))
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
# Should complete without blocking (implicit judge ACCEPTs on no tools + no keys)
result = await node.execute(ctx)
assert result is not None
@pytest.mark.asyncio
async def test_signal_shutdown_unblocks(self, runtime, memory, client_spec):
async def test_signal_shutdown_unblocks(self, runtime, buffer, client_spec):
"""signal_shutdown should unblock a waiting client_facing node."""
llm = MockStreamingLLM(
scenarios=[
@@ -598,7 +598,7 @@ class TestClientFacingBlocking:
)
bus = EventBus()
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=10))
ctx = build_ctx(runtime, client_spec, memory, llm)
ctx = build_ctx(runtime, client_spec, buffer, llm)
async def shutdown_after_delay():
await asyncio.sleep(0.05)
@@ -611,7 +611,7 @@ class TestClientFacingBlocking:
assert result.success is True
@pytest.mark.asyncio
async def test_client_input_requested_event_published(self, runtime, memory, client_spec):
async def test_client_input_requested_event_published(self, runtime, buffer, client_spec):
"""CLIENT_INPUT_REQUESTED should be published when ask_user blocks."""
llm = MockStreamingLLM(
scenarios=[
@@ -634,7 +634,7 @@ class TestClientFacingBlocking:
)
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
ctx = build_ctx(runtime, client_spec, memory, llm)
ctx = build_ctx(runtime, client_spec, buffer, llm)
async def shutdown():
await asyncio.sleep(0.05)
@@ -649,7 +649,7 @@ class TestClientFacingBlocking:
@pytest.mark.asyncio
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
async def test_ask_user_with_real_tools(self, runtime, memory):
async def test_ask_user_with_real_tools(self, runtime, buffer):
"""ask_user alongside real tool calls still triggers blocking."""
spec = NodeSpec(
id="chat",
@@ -683,7 +683,7 @@ class TestClientFacingBlocking:
config=LoopConfig(max_iterations=5),
)
ctx = build_ctx(
runtime, spec, memory, llm, tools=[Tool(name="search", description="", parameters={})]
runtime, spec, buffer, llm, tools=[Tool(name="search", description="", parameters={})]
)
async def unblock():
@@ -698,7 +698,7 @@ class TestClientFacingBlocking:
assert llm._call_index >= 2
@pytest.mark.asyncio
async def test_ask_user_not_available_non_client_facing(self, runtime, memory):
async def test_ask_user_not_available_non_client_facing(self, runtime, buffer):
"""ask_user tool should NOT be injected for non-client-facing nodes."""
spec = NodeSpec(
id="internal",
@@ -709,7 +709,7 @@ class TestClientFacingBlocking:
)
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
node = EventLoopNode(config=LoopConfig(max_iterations=2))
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
await node.execute(ctx)
@@ -720,7 +720,7 @@ class TestClientFacingBlocking:
assert "ask_user" not in tool_names
@pytest.mark.asyncio
async def test_escalate_available_for_worker_stream(self, runtime, memory):
async def test_escalate_available_for_worker_stream(self, runtime, buffer):
"""Workers should receive escalate synthetic tool."""
spec = NodeSpec(
id="internal",
@@ -731,7 +731,7 @@ class TestClientFacingBlocking:
)
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
node = EventLoopNode(config=LoopConfig(max_iterations=2))
ctx = build_ctx(runtime, spec, memory, llm, stream_id="worker")
ctx = build_ctx(runtime, spec, buffer, llm, stream_id="worker")
await node.execute(ctx)
@@ -740,7 +740,7 @@ class TestClientFacingBlocking:
assert "escalate" in tool_names
@pytest.mark.asyncio
async def test_escalate_not_available_for_queen_stream(self, runtime, memory):
async def test_escalate_not_available_for_queen_stream(self, runtime, buffer):
"""Queen stream should not receive escalate tool."""
spec = NodeSpec(
id="queen",
@@ -751,7 +751,7 @@ class TestClientFacingBlocking:
)
llm = MockStreamingLLM(scenarios=[text_scenario("monitoring...")])
node = EventLoopNode(config=LoopConfig(max_iterations=2))
ctx = build_ctx(runtime, spec, memory, llm, stream_id="queen")
ctx = build_ctx(runtime, spec, buffer, llm, stream_id="queen")
await node.execute(ctx)
@@ -762,7 +762,7 @@ class TestClientFacingBlocking:
class TestEscalate:
@pytest.mark.asyncio
async def test_escalate_emits_event(self, runtime, node_spec, memory):
async def test_escalate_emits_event(self, runtime, node_spec, buffer):
"""escalate() should publish ESCALATION_REQUESTED and block for queen guidance."""
node_spec.output_keys = []
llm = MockStreamingLLM(
@@ -786,7 +786,7 @@ class TestEscalate:
bus.subscribe(event_types=[EventType.ESCALATION_REQUESTED], handler=capture)
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
async def queen_reply():
@@ -810,7 +810,7 @@ class TestEscalate:
assert "HTTP 401" in received[0].data["context"]
@pytest.mark.asyncio
async def test_escalate_handoff_reaches_queen(self, runtime, node_spec, memory):
async def test_escalate_handoff_reaches_queen(self, runtime, node_spec, buffer):
"""Worker escalation should be routed to queen via SessionManager handoff sub."""
node_spec.output_keys = []
llm = MockStreamingLLM(
@@ -836,7 +836,7 @@ class TestEscalate:
queen_executor.node_registry = {"queen": queen_node}
manager._subscribe_worker_handoffs(session, queen_executor)
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
async def queen_reply():
@@ -859,7 +859,7 @@ class TestEscalate:
assert kwargs["is_client_input"] is False
@pytest.mark.asyncio
async def test_escalate_waits_for_queen_input_and_skips_judge(self, runtime, node_spec, memory):
async def test_escalate_waits_for_queen_input_and_skips_judge(self, runtime, node_spec, buffer):
"""escalate() should block for queen input before judge evaluation."""
node_spec.output_keys = ["result"]
llm = MockStreamingLLM(
@@ -891,7 +891,7 @@ class TestEscalate:
judge = AsyncMock(spec=JudgeProtocol)
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
node = EventLoopNode(judge=judge, event_bus=bus, config=LoopConfig(max_iterations=5))
async def queen_reply():
@@ -922,7 +922,7 @@ class TestClientFacingExpectingWork:
"""Tests for _cf_expecting_work state machine in client-facing nodes."""
@pytest.mark.asyncio
async def test_text_after_user_input_goes_to_judge(self, runtime, memory):
async def test_text_after_user_input_goes_to_judge(self, runtime, buffer):
"""After user responds, text-only with missing outputs gets judged (not auto-blocked).
Simulates: findings-review asks user, user says "generate report",
@@ -956,7 +956,7 @@ class TestClientFacingExpectingWork:
]
)
node = EventLoopNode(config=LoopConfig(max_iterations=10))
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
async def user_responds():
await asyncio.sleep(0.05)
@@ -972,7 +972,7 @@ class TestClientFacingExpectingWork:
assert llm._call_index >= 3
@pytest.mark.asyncio
async def test_auto_block_without_missing_outputs(self, runtime, memory):
async def test_auto_block_without_missing_outputs(self, runtime, buffer):
"""Text-only with no missing outputs should still auto-block (queen monitoring).
Simulates: queen node with no required outputs outputs "monitoring..."
@@ -1000,7 +1000,7 @@ class TestClientFacingExpectingWork:
]
)
node = EventLoopNode(config=LoopConfig(max_iterations=10))
ctx = build_ctx(runtime, spec, memory, llm)
ctx = build_ctx(runtime, spec, buffer, llm)
async def user_then_shutdown():
await asyncio.sleep(0.05)
@@ -1020,7 +1020,7 @@ class TestClientFacingExpectingWork:
assert llm._call_index == 2
@pytest.mark.asyncio
async def test_tool_calls_reset_expecting_work(self, runtime, memory):
async def test_tool_calls_reset_expecting_work(self, runtime, buffer):
"""After LLM calls tools, next text-only turn should auto-block again.
Simulates: user gives input -> LLM calls tools (work) -> LLM presents
@@ -1071,7 +1071,7 @@ class TestClientFacingExpectingWork:
ctx = build_ctx(
runtime,
spec,
memory,
buffer,
llm,
tools=[Tool(name="save_data", description="save", parameters={})],
)
@@ -1092,7 +1092,7 @@ class TestClientFacingExpectingWork:
assert result.output["status"] == "complete"
@pytest.mark.asyncio
async def test_judge_retry_enables_expecting_work(self, runtime, memory):
async def test_judge_retry_enables_expecting_work(self, runtime, buffer):
"""After judge RETRY, text-only with missing outputs goes to judge again.
Simulates: LLM calls save_data but forgets set_output -> judge RETRY ->
@@ -1143,7 +1143,7 @@ class TestClientFacingExpectingWork:
ctx = build_ctx(
runtime,
spec,
memory,
buffer,
llm,
tools=[Tool(name="save_data", description="save", parameters={})],
)
@@ -1169,7 +1169,7 @@ class TestClientFacingExpectingWork:
class TestToolExecution:
@pytest.mark.asyncio
async def test_tool_execution_feedback(self, runtime, node_spec, memory):
async def test_tool_execution_feedback(self, runtime, node_spec, buffer):
"""Tool call -> result fed back to conversation via stream loop."""
node_spec.output_keys = []
@@ -1192,7 +1192,7 @@ class TestToolExecution:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="Search", parameters={})],
)
@@ -1214,13 +1214,13 @@ class TestToolExecution:
class TestWriteThroughPersistence:
@pytest.mark.asyncio
async def test_messages_written_to_store(self, tmp_path, runtime, node_spec, memory):
async def test_messages_written_to_store(self, tmp_path, runtime, node_spec, buffer):
"""Messages should be persisted immediately via write-through."""
store = FileConversationStore(tmp_path / "conv")
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("Hello")])
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
conversation_store=store,
config=LoopConfig(max_iterations=5),
@@ -1234,7 +1234,7 @@ class TestWriteThroughPersistence:
assert len(parts) >= 2 # at least initial user msg + assistant msg
@pytest.mark.asyncio
async def test_output_accumulator_write_through(self, tmp_path, runtime, node_spec, memory):
async def test_output_accumulator_write_through(self, tmp_path, runtime, node_spec, buffer):
"""set_output values should be persisted in cursor immediately."""
store = FileConversationStore(tmp_path / "conv")
llm = MockStreamingLLM(
@@ -1244,7 +1244,7 @@ class TestWriteThroughPersistence:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
conversation_store=store,
config=LoopConfig(max_iterations=5),
@@ -1267,7 +1267,7 @@ class TestWriteThroughPersistence:
class TestCrashRecovery:
@pytest.mark.asyncio
async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, memory):
async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, buffer):
"""Populate a store with state, then verify EventLoopNode restores from it."""
store = FileConversationStore(tmp_path / "conv")
@@ -1293,7 +1293,7 @@ class TestCrashRecovery:
node_spec.output_keys = [] # no required keys so implicit accept works
llm = MockStreamingLLM(scenarios=[text_scenario("Continuing...")])
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
conversation_store=store,
config=LoopConfig(max_iterations=5),
@@ -1312,7 +1312,7 @@ class TestCrashRecovery:
class TestEventInjection:
@pytest.mark.asyncio
async def test_inject_event(self, runtime, node_spec, memory):
async def test_inject_event(self, runtime, node_spec, buffer):
"""inject_event() content should appear as user message in next iteration."""
node_spec.output_keys = []
@@ -1334,7 +1334,7 @@ class TestEventInjection:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
judge=judge,
config=LoopConfig(max_iterations=5),
@@ -1361,7 +1361,7 @@ class TestEventInjection:
class TestPauseResume:
@pytest.mark.asyncio
async def test_pause_returns_early(self, runtime, node_spec, memory):
async def test_pause_returns_early(self, runtime, node_spec, buffer):
"""pause_requested in input_data should trigger early return."""
node_spec.output_keys = []
llm = MockStreamingLLM(scenarios=[text_scenario("should not run")])
@@ -1369,7 +1369,7 @@ class TestPauseResume:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
input_data={"pause_requested": True},
)
@@ -1389,7 +1389,7 @@ class TestPauseResume:
class TestStreamErrors:
@pytest.mark.asyncio
async def test_non_recoverable_stream_error_raises(self, runtime, node_spec, memory):
async def test_non_recoverable_stream_error_raises(self, runtime, node_spec, buffer):
"""Non-recoverable StreamErrorEvent should raise RuntimeError."""
node_spec.output_keys = []
llm = MockStreamingLLM(
@@ -1398,7 +1398,7 @@ class TestStreamErrors:
]
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(config=LoopConfig(max_iterations=5))
with pytest.raises(RuntimeError, match="Stream error"):
@@ -1489,7 +1489,7 @@ class TestTransientErrorRetry:
"""Test retry-with-backoff for transient LLM errors in EventLoopNode."""
@pytest.mark.asyncio
async def test_transient_error_retries_then_succeeds(self, runtime, node_spec, memory):
async def test_transient_error_retries_then_succeeds(self, runtime, node_spec, buffer):
"""A transient error on the first try should retry and succeed."""
node_spec.output_keys = []
llm = ErrorThenSuccessLLM(
@@ -1497,7 +1497,7 @@ class TestTransientErrorRetry:
fail_count=1,
success_scenario=text_scenario("success"),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=5,
@@ -1510,7 +1510,7 @@ class TestTransientErrorRetry:
assert llm._call_index == 2 # 1 failure + 1 success
@pytest.mark.asyncio
async def test_permanent_error_no_retry(self, runtime, node_spec, memory):
async def test_permanent_error_no_retry(self, runtime, node_spec, buffer):
"""A permanent error (ValueError) should NOT be retried."""
node_spec.output_keys = []
llm = ErrorThenSuccessLLM(
@@ -1518,7 +1518,7 @@ class TestTransientErrorRetry:
fail_count=1,
success_scenario=text_scenario("success"),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=5,
@@ -1532,7 +1532,7 @@ class TestTransientErrorRetry:
@pytest.mark.asyncio
async def test_client_facing_non_transient_error_does_not_crash(
self, runtime, node_spec, memory
self, runtime, node_spec, buffer
):
"""Client-facing non-transient errors should wait for input, not crash on token vars."""
node_spec.output_keys = []
@@ -1542,7 +1542,7 @@ class TestTransientErrorRetry:
fail_count=100, # always fails
success_scenario=text_scenario("unreachable"),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=1,
@@ -1559,7 +1559,7 @@ class TestTransientErrorRetry:
node._await_user_input.assert_awaited_once()
@pytest.mark.asyncio
async def test_transient_error_exhausts_retries(self, runtime, node_spec, memory):
async def test_transient_error_exhausts_retries(self, runtime, node_spec, buffer):
"""Transient errors that exhaust retries should raise."""
node_spec.output_keys = []
llm = ErrorThenSuccessLLM(
@@ -1567,7 +1567,7 @@ class TestTransientErrorRetry:
fail_count=100, # always fails
success_scenario=text_scenario("unreachable"),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=5,
@@ -1580,7 +1580,7 @@ class TestTransientErrorRetry:
assert llm._call_index == 3 # 1 initial + 2 retries
@pytest.mark.asyncio
async def test_stream_error_event_retried_as_runtime_error(self, runtime, node_spec, memory):
async def test_stream_error_event_retried_as_runtime_error(self, runtime, node_spec, buffer):
"""StreamErrorEvent(recoverable=False) raises RuntimeError caught by retry."""
node_spec.output_keys = []
@@ -1615,7 +1615,7 @@ class TestTransientErrorRetry:
)
llm = StreamErrorThenSuccessLLM()
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=5,
@@ -1628,7 +1628,7 @@ class TestTransientErrorRetry:
assert call_index == 2
@pytest.mark.asyncio
async def test_retry_emits_event_bus_event(self, runtime, node_spec, memory):
async def test_retry_emits_event_bus_event(self, runtime, node_spec, buffer):
"""Retry should emit NODE_RETRY event on the event bus."""
node_spec.output_keys = []
llm = ErrorThenSuccessLLM(
@@ -1643,7 +1643,7 @@ class TestTransientErrorRetry:
handler=lambda e: retry_events.append(e),
)
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
event_bus=bus,
config=LoopConfig(
@@ -1658,7 +1658,7 @@ class TestTransientErrorRetry:
assert retry_events[0].data["retry_count"] == 1
@pytest.mark.asyncio
async def test_recoverable_stream_error_retried_not_silent(self, runtime, node_spec, memory):
async def test_recoverable_stream_error_retried_not_silent(self, runtime, node_spec, buffer):
"""Recoverable StreamErrorEvent with empty response should raise ConnectionError.
Previously, recoverable stream errors were silently swallowed,
@@ -1697,7 +1697,7 @@ class TestTransientErrorRetry:
return LLMResponse(content="ok", model="mock", stop_reason="stop")
llm = RecoverableErrorThenSuccessLLM()
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
node = EventLoopNode(
config=LoopConfig(
max_iterations=5,
@@ -1892,7 +1892,7 @@ class TestToolDoomLoopIntegration:
self,
runtime,
node_spec,
memory,
buffer,
):
"""3 identical tool call turns should inject a warning."""
node_spec.output_keys = []
@@ -1921,7 +1921,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="s", parameters={})],
)
@@ -1942,7 +1942,7 @@ class TestToolDoomLoopIntegration:
self,
runtime,
node_spec,
memory,
buffer,
):
"""Doom loop should emit NODE_TOOL_DOOM_LOOP event."""
node_spec.output_keys = []
@@ -1976,7 +1976,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="s", parameters={})],
)
@@ -1999,7 +1999,7 @@ class TestToolDoomLoopIntegration:
async def test_client_facing_worker_doom_loop_escalates_to_queen(
self,
runtime,
memory,
buffer,
):
"""Client-facing worker doom loops should escalate instead of blocking for user input."""
spec = NodeSpec(
@@ -2040,7 +2040,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="s", parameters={})],
stream_id="worker",
@@ -2066,7 +2066,7 @@ class TestToolDoomLoopIntegration:
self,
runtime,
node_spec,
memory,
buffer,
):
"""Disabled doom loop should not trigger with identical calls."""
node_spec.output_keys = []
@@ -2094,7 +2094,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="s", parameters={})],
)
@@ -2115,7 +2115,7 @@ class TestToolDoomLoopIntegration:
self,
runtime,
node_spec,
memory,
buffer,
):
"""Different tool args each turn should NOT trigger doom loop."""
node_spec.output_keys = []
@@ -2183,7 +2183,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="search", description="s", parameters={})],
)
@@ -2204,7 +2204,7 @@ class TestToolDoomLoopIntegration:
self,
runtime,
node_spec,
memory,
buffer,
):
"""A tool that keeps failing with is_error=True should trigger doom loop.
@@ -2245,7 +2245,7 @@ class TestToolDoomLoopIntegration:
ctx = build_ctx(
runtime,
node_spec,
memory,
buffer,
llm,
tools=[Tool(name="failing_tool", description="s", parameters={})],
)
@@ -2274,21 +2274,21 @@ class TestToolDoomLoopIntegration:
class TestExecutionId:
"""Tests for execution_id on NodeContext and its wiring through the framework."""
def test_node_context_accepts_execution_id(self, runtime, node_spec, memory):
def test_node_context_accepts_execution_id(self, runtime, node_spec, buffer):
"""NodeContext stores execution_id when constructed with one."""
ctx = NodeContext(
runtime=runtime,
node_id=node_spec.id,
node_spec=node_spec,
memory=memory,
buffer=buffer,
execution_id="exec_abc",
)
assert ctx.execution_id == "exec_abc"
def test_node_context_execution_id_defaults_to_empty(self, runtime, node_spec, memory):
def test_node_context_execution_id_defaults_to_empty(self, runtime, node_spec, buffer):
"""build_ctx without execution_id gives ctx.execution_id == ''."""
llm = MockStreamingLLM()
ctx = build_ctx(runtime, node_spec, memory, llm)
ctx = build_ctx(runtime, node_spec, buffer, llm)
assert ctx.execution_id == ""
def test_stream_runtime_adapter_exposes_execution_id(self):
@@ -2313,7 +2313,7 @@ class TestExecutionId:
id="n1", name="n1", description="test", node_type="event_loop", output_keys=["r"]
)
ctx = executor._build_context(
node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={}
node_spec=node_spec, buffer=DataBuffer(), goal=goal, input_data={}
)
assert ctx.execution_id == "exec_123"
@@ -2331,27 +2331,27 @@ class TestExecutionId:
id="n1", name="n1", description="test", node_type="event_loop", output_keys=["r"]
)
ctx = executor._build_context(
node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={}
node_spec=node_spec, buffer=DataBuffer(), goal=goal, input_data={}
)
assert ctx.execution_id == ""
# ---------------------------------------------------------------------------
# Subagent memory snapshot includes accumulator outputs
# Subagent data buffer snapshot includes accumulator outputs
# ---------------------------------------------------------------------------
class TestSubagentAccumulatorMemory:
"""Verify that subagent memory construction merges accumulator outputs
"""Verify that subagent data buffer construction merges accumulator outputs
and includes the subagent's input_keys in read permissions."""
def test_accumulator_values_merged_into_parent_data(self):
"""Keys from OutputAccumulator should appear in subagent memory."""
"""Keys from OutputAccumulator should appear in subagent data buffer."""
# Simulate what _execute_subagent does internally:
# parent shared memory has user_request but NOT tweet_content
parent_memory = SharedMemory()
parent_memory.write("user_request", "post a joke")
parent_data = parent_memory.read_all() # {"user_request": "post a joke"}
# parent shared data buffer has user_request but NOT tweet_content
parent_buffer = DataBuffer()
parent_buffer.write("user_request", "post a joke")
parent_data = parent_buffer.read_all() # {"user_request": "post a joke"}
# Accumulator has tweet_content (set via set_output before delegation)
acc = OutputAccumulator(values={"tweet_content": "Hello world!"})
@@ -2361,14 +2361,14 @@ class TestSubagentAccumulatorMemory:
if key not in parent_data:
parent_data[key] = value
# Build subagent memory
subagent_memory = SharedMemory()
# Build subagent data buffer
subagent_buffer = DataBuffer()
for key, value in parent_data.items():
subagent_memory.write(key, value, validate=False)
subagent_buffer.write(key, value, validate=False)
subagent_input_keys = ["tweet_content"]
read_keys = set(parent_data.keys()) | set(subagent_input_keys)
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
scoped = subagent_buffer.with_permissions(read_keys=list(read_keys), write_keys=[])
# This would have raised PermissionError before the fix
assert scoped.read("tweet_content") == "Hello world!"
@@ -2376,18 +2376,18 @@ class TestSubagentAccumulatorMemory:
def test_input_keys_allowed_even_if_not_in_data(self):
"""Subagent input_keys should be in read permissions even if the
key doesn't exist in memory (returns None instead of PermissionError)."""
parent_memory = SharedMemory()
parent_memory.write("user_request", "hi")
parent_data = parent_memory.read_all()
key doesn't exist in data buffer (returns None instead of PermissionError)."""
parent_buffer = DataBuffer()
parent_buffer.write("user_request", "hi")
parent_data = parent_buffer.read_all()
subagent_memory = SharedMemory()
subagent_buffer = DataBuffer()
for key, value in parent_data.items():
subagent_memory.write(key, value, validate=False)
subagent_buffer.write(key, value, validate=False)
# input_keys includes "tweet_content" which isn't in parent_data
read_keys = set(parent_data.keys()) | {"tweet_content"}
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
scoped = subagent_buffer.with_permissions(read_keys=list(read_keys), write_keys=[])
# Should return None (not raise PermissionError)
assert scoped.read("tweet_content") is None
+5 -5
View File
@@ -13,7 +13,7 @@ from framework.llm.stream_events import FinishEvent, StreamEvent, TextDeltaEvent
from framework.runtime.event_bus import EventBus
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.runtime.shared_state import SharedStateManager
from framework.runtime.shared_state import SharedBufferManager
from framework.storage.concurrent import ConcurrentStorage
@@ -119,7 +119,7 @@ async def test_execution_stream_retention(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
event_bus=None,
@@ -211,7 +211,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
event_bus=None,
@@ -247,7 +247,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
event_bus=None,
@@ -262,7 +262,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
# Run async execution with resume_session_id pointing to primary session
session_state = {
"resume_session_id": primary_exec_id,
"memory": {"rules": "star important emails"},
"data_buffer": {"rules": "star important emails"},
}
async_exec_id = await async_stream.execute({"event": "new_email"}, session_state=session_state)
+4 -4
View File
@@ -606,7 +606,7 @@ async def test_memory_conflict_last_wins(runtime, goal):
"""last_wins should allow both branches to write the same key without error."""
# Use distinct output_keys in spec (to pass graph validation) but have
# the node impl write a shared key at runtime — this is the scenario
# memory_conflict_strategy is designed to handle.
# buffer_conflict_strategy is designed to handle.
b1 = NodeSpec(
id="b1", name="B1", description="b1", node_type="event_loop", output_keys=["b1_out"]
)
@@ -616,7 +616,7 @@ async def test_memory_conflict_last_wins(runtime, goal):
graph = _make_fanout_graph([b1, b2])
config = ParallelExecutionConfig(memory_conflict_strategy="last_wins")
config = ParallelExecutionConfig(buffer_conflict_strategy="last_wins")
executor = GraphExecutor(
runtime=runtime, enable_parallel_execution=True, parallel_config=config
)
@@ -647,7 +647,7 @@ async def test_memory_conflict_first_wins(runtime, goal):
graph = _make_fanout_graph([b1, b2])
config = ParallelExecutionConfig(memory_conflict_strategy="first_wins")
config = ParallelExecutionConfig(buffer_conflict_strategy="first_wins")
executor = GraphExecutor(
runtime=runtime, enable_parallel_execution=True, parallel_config=config
)
@@ -675,7 +675,7 @@ async def test_memory_conflict_error_raises(runtime, goal):
graph = _make_fanout_graph([b1, b2])
config = ParallelExecutionConfig(memory_conflict_strategy="error")
config = ParallelExecutionConfig(buffer_conflict_strategy="error")
executor = GraphExecutor(
runtime=runtime, enable_parallel_execution=True, parallel_config=config
)
+4 -4
View File
@@ -277,7 +277,7 @@ def test_write_progress_uses_atomic_write_and_updates_state(tmp_path, monkeypatc
executor._write_progress(
current_node="node-b",
path=["node-a", "node-b"],
memory=memory,
buffer=memory,
node_visit_counts={"node-a": 1, "node-b": 1},
)
@@ -287,9 +287,9 @@ def test_write_progress_uses_atomic_write_and_updates_state(tmp_path, monkeypatc
assert state["progress"]["current_node"] == "node-b"
assert state["progress"]["path"] == ["node-a", "node-b"]
assert state["progress"]["node_visit_counts"] == {"node-a": 1, "node-b": 1}
assert state["data_buffer"] == {"foo": "bar"}
assert state["progress"]["steps_executed"] == 2
assert state["memory"] == {"foo": "bar"}
assert state["memory_keys"] == ["foo"]
assert state["buffer_keys"] == ["foo"]
assert "updated_at" in state["timestamps"]
@@ -309,7 +309,7 @@ def test_write_progress_logs_warning_on_atomic_write_failure(tmp_path, monkeypat
executor._write_progress(
current_node="node-b",
path=["node-a", "node-b"],
memory=memory,
buffer=memory,
node_visit_counts={"node-a": 1, "node-b": 1},
)
+50 -50
View File
@@ -1,5 +1,5 @@
"""
Test hallucination detection in SharedMemory and OutputValidator.
Test hallucination detection in DataBuffer and OutputValidator.
These tests verify that code detection works correctly across the entire
string content, not just the first 500 characters.
@@ -7,117 +7,117 @@ string content, not just the first 500 characters.
import pytest
from framework.graph.node import MemoryWriteError, SharedMemory
from framework.graph.node import DataBufferWriteError, DataBuffer
from framework.graph.validator import OutputValidator, ValidationResult
class TestSharedMemoryHallucinationDetection:
"""Test the SharedMemory hallucination detection."""
class TestDataBufferHallucinationDetection:
"""Test the DataBuffer hallucination detection."""
def test_detects_code_at_start(self):
"""Code at the start of the string should be detected."""
memory = SharedMemory()
buffer = DataBuffer()
code_content = "```python\nimport os\ndef hack(): pass\n```" + "A" * 6000
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", code_content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", code_content)
assert "hallucinated code" in str(exc_info.value)
def test_detects_code_in_middle(self):
"""Code in the middle of the string should be detected (was previously missed)."""
memory = SharedMemory()
buffer = DataBuffer()
# 600 chars of padding, then code, then more padding to exceed 5000 chars
padding_start = "A" * 600
code = "\n```python\nimport os\ndef malicious(): pass\n```\n"
padding_end = "B" * 5000
content = padding_start + code + padding_end
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
def test_detects_code_at_end(self):
"""Code at the end of the string should be detected (was previously missed)."""
memory = SharedMemory()
buffer = DataBuffer()
padding = "A" * 5500
code = "\n```python\nclass Exploit:\n pass\n```"
content = padding + code
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
def test_detects_javascript_code(self):
"""JavaScript code patterns should be detected."""
memory = SharedMemory()
buffer = DataBuffer()
padding = "A" * 600
code = "\nfunction malicious() { require('child_process'); }\n"
padding_end = "B" * 5000
content = padding + code + padding_end
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
def test_detects_sql_injection(self):
"""SQL patterns should be detected."""
memory = SharedMemory()
buffer = DataBuffer()
padding = "A" * 600
code = "\nDROP TABLE users; SELECT * FROM passwords;\n"
padding_end = "B" * 5000
content = padding + code + padding_end
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
def test_detects_script_injection(self):
"""HTML script injection should be detected."""
memory = SharedMemory()
buffer = DataBuffer()
padding = "A" * 600
code = "\n<script>alert('xss')</script>\n"
padding_end = "B" * 5000
content = padding + code + padding_end
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
def test_allows_short_strings_without_validation(self):
"""Strings under 5000 chars should not trigger validation."""
memory = SharedMemory()
buffer = DataBuffer()
content = "def hello(): pass" # Contains code indicator but short
# Should not raise - too short to validate
memory.write("output", content)
assert memory.read("output") == content
buffer.write("output", content)
assert buffer.read("output") == content
def test_allows_long_strings_without_code(self):
"""Long strings without code indicators should be allowed."""
memory = SharedMemory()
buffer = DataBuffer()
content = "This is a long text document. " * 500 # ~15000 chars, no code
memory.write("output", content)
assert memory.read("output") == content
buffer.write("output", content)
assert buffer.read("output") == content
def test_validate_false_bypasses_check(self):
"""Using validate=False should bypass the check."""
memory = SharedMemory()
buffer = DataBuffer()
code_content = "```python\nimport os\n```" + "A" * 6000
# Should not raise when validate=False
memory.write("output", code_content, validate=False)
assert memory.read("output") == code_content
buffer.write("output", code_content, validate=False)
assert buffer.read("output") == code_content
def test_sampling_for_very_long_strings(self):
"""Very long strings (>10KB) should be sampled at multiple positions."""
memory = SharedMemory()
buffer = DataBuffer()
# Create a 50KB string with code at the 75% mark
size = 50000
code_position = int(size * 0.75)
@@ -125,8 +125,8 @@ class TestSharedMemoryHallucinationDetection:
"A" * code_position + "def hidden_code(): pass" + "B" * (size - code_position - 25)
)
with pytest.raises(MemoryWriteError) as exc_info:
memory.write("output", content)
with pytest.raises(DataBufferWriteError) as exc_info:
buffer.write("output", content)
assert "hallucinated code" in str(exc_info.value)
@@ -199,36 +199,36 @@ class TestEdgeCases:
def test_empty_string(self):
"""Empty strings should not cause errors."""
memory = SharedMemory()
memory.write("output", "")
assert memory.read("output") == ""
buffer = DataBuffer()
buffer.write("output", "")
assert buffer.read("output") == ""
def test_non_string_values(self):
"""Non-string values should not be validated for code."""
memory = SharedMemory()
buffer = DataBuffer()
# These should all work without validation
memory.write("number", 12345)
memory.write("list", [1, 2, 3])
memory.write("dict", {"key": "value"})
memory.write("bool", True)
buffer.write("number", 12345)
buffer.write("list", [1, 2, 3])
buffer.write("dict", {"key": "value"})
buffer.write("bool", True)
assert memory.read("number") == 12345
assert memory.read("list") == [1, 2, 3]
assert buffer.read("number") == 12345
assert buffer.read("list") == [1, 2, 3]
def test_exactly_5000_chars(self):
"""String of exactly 5000 chars should not trigger validation."""
memory = SharedMemory()
buffer = DataBuffer()
content = "def code(): pass" + "A" * (5000 - 16) # Exactly 5000 chars
# Should not raise - exactly at threshold, not over
memory.write("output", content)
assert len(memory.read("output")) == 5000
buffer.write("output", content)
assert len(buffer.read("output")) == 5000
def test_5001_chars_triggers_validation(self):
"""String of 5001 chars with code should trigger validation."""
memory = SharedMemory()
buffer = DataBuffer()
content = "def code(): pass" + "A" * (5001 - 16) # 5001 chars
with pytest.raises(MemoryWriteError):
memory.write("output", content)
with pytest.raises(DataBufferWriteError):
buffer.write("output", content)
+4 -4
View File
@@ -494,12 +494,12 @@ class TestEdgeConditionPatterns:
)
def test_success_flag_check(self):
ctx = {"output": {"success": True}, "memory": {"attempts": 2}}
ctx = {"output": {"success": True}, "buffer": {"attempts": 2}}
assert safe_eval("output.get('success') == True", ctx) is True
def test_memory_threshold(self):
ctx = {"memory": {"score": 0.85}}
assert safe_eval("memory.get('score', 0) >= 0.8", ctx) is True
def test_buffer_threshold(self):
ctx = {"buffer": {"score": 0.85}}
assert safe_eval("buffer.get('score', 0) >= 0.8", ctx) is True
def test_string_contains_check(self):
ctx = {"output": {"status": "completed_with_warnings"}}
+1 -1
View File
@@ -76,7 +76,7 @@ class TestSkillDirsPropagation:
runtime=MagicMock(),
node_id="n",
node_spec=MagicMock(),
memory={},
buffer={},
**kwargs,
)
+70 -70
View File
@@ -1,6 +1,6 @@
"""Tests for subagent capability in EventLoopNode.
Tests the delegate_to_sub_agent tool, subagent execution with read-only memory,
Tests the delegate_to_sub_agent tool, subagent execution with read-only data buffer,
prevention of nested subagent delegation, and report_to_parent one-way channel.
"""
@@ -19,7 +19,7 @@ from framework.graph.event_loop_node import (
LoopConfig,
SubagentJudge,
)
from framework.graph.node import NodeContext, NodeSpec, SharedMemory
from framework.graph.node import NodeContext, NodeSpec, DataBuffer
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
FinishEvent,
@@ -193,14 +193,14 @@ class TestSubagentExecution:
"""Should return error when subagent ID is not in registry."""
node = EventLoopNode(config=LoopConfig(max_iterations=5))
memory = SharedMemory()
memory.write("query", "test query")
buffer = DataBuffer()
buffer.write("query", "test query")
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=memory,
buffer=buffer,
input_data={},
llm=MockStreamingLLM([]),
available_tools=[],
@@ -219,7 +219,7 @@ class TestSubagentExecution:
async def test_subagent_receives_readonly_memory(
self, runtime, parent_node_spec, subagent_node_spec
):
"""Subagent should have read-only access to memory."""
"""Subagent should have read-only access to data buffer."""
# Create LLM that will set output for the subagent
subagent_llm = MockStreamingLLM(
[
@@ -232,10 +232,10 @@ class TestSubagentExecution:
config=LoopConfig(max_iterations=5),
)
# Parent memory with some data
memory = SharedMemory()
memory.write("query", "research AI")
scoped_memory = memory.with_permissions(
# Parent data buffer with some data
buffer = DataBuffer()
buffer.write("query", "research AI")
scoped_buffer = buffer.with_permissions(
read_keys=["query"],
write_keys=["result"],
)
@@ -244,7 +244,7 @@ class TestSubagentExecution:
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped_memory,
buffer=scoped_buffer,
input_data={"query": "research AI"},
llm=subagent_llm,
available_tools=[],
@@ -275,14 +275,14 @@ class TestSubagentExecution:
node = EventLoopNode(config=LoopConfig(max_iterations=5))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -332,14 +332,14 @@ class TestSubagentExecution:
browser_tool = Tool(name="browser_snapshot", description="Snapshot")
node = EventLoopNode(config=LoopConfig(max_iterations=5))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=spy_llm,
available_tools=[],
@@ -383,12 +383,12 @@ class TestNestedSubagentPrevention:
sub_agents=["another"], # This should be ignored in subagent mode
)
memory = SharedMemory()
buffer = DataBuffer()
ctx = NodeContext(
runtime=runtime,
node_id="nested",
node_spec=subagent_with_subagents,
memory=memory,
buffer=buffer,
input_data={},
llm=MockStreamingLLM([]),
available_tools=[],
@@ -459,9 +459,9 @@ class TestDelegationIntegration:
# For this test, let's just verify the parent can call delegate_to_sub_agent
# and the tool handling correctly queues and executes it
memory = SharedMemory()
memory.write("query", "What are AI trends?")
scoped = memory.with_permissions(
buffer = DataBuffer()
buffer.write("query", "What are AI trends?")
scoped = buffer.with_permissions(
read_keys=["query"],
write_keys=["result"],
)
@@ -475,7 +475,7 @@ class TestDelegationIntegration:
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={"query": "What are AI trends?"},
llm=parent_llm,
available_tools=[],
@@ -538,12 +538,12 @@ class TestBuildReportToParentTool:
node = EventLoopNode()
# Parent mode: no report_to_parent
memory = SharedMemory()
buffer = DataBuffer()
parent_ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=memory,
buffer=buffer,
input_data={},
llm=MockStreamingLLM([]),
available_tools=[],
@@ -567,7 +567,7 @@ class TestBuildReportToParentTool:
runtime=runtime,
node_id="sub",
node_spec=subagent_node_spec,
memory=memory,
buffer=buffer,
input_data={},
llm=MockStreamingLLM([]),
available_tools=[],
@@ -587,13 +587,13 @@ class TestBuildReportToParentTool:
def test_tool_not_visible_without_callback(self, runtime, subagent_node_spec):
"""report_to_parent should NOT appear when callback is None even in subagent mode."""
node = EventLoopNode()
memory = SharedMemory()
buffer = DataBuffer()
ctx = NodeContext(
runtime=runtime,
node_id="sub",
node_spec=subagent_node_spec,
memory=memory,
buffer=buffer,
input_data={},
llm=MockStreamingLLM([]),
available_tools=[],
@@ -630,14 +630,14 @@ class TestReportToParentExecution:
node = EventLoopNode(config=LoopConfig(max_iterations=10))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -689,14 +689,14 @@ class TestReportToParentExecution:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -744,14 +744,14 @@ class TestReportToParentExecution:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -790,14 +790,14 @@ class TestReportToParentExecution:
node = EventLoopNode(config=LoopConfig(max_iterations=10))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -831,14 +831,14 @@ class TestReportToParentExecution:
node = EventLoopNode(config=LoopConfig(max_iterations=10))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -967,14 +967,14 @@ class TestEscalationFlow:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1045,14 +1045,14 @@ class TestEscalationFlow:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1122,14 +1122,14 @@ class TestEscalationFlow:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1182,14 +1182,14 @@ class TestEscalationFlow:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1232,14 +1232,14 @@ class TestEscalationFlow:
config=LoopConfig(max_iterations=10),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1368,15 +1368,15 @@ class TestSubagentJudge:
tool_executor=mock_tool_executor,
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
task_text = "Check the profile at https://example.com/user789"
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1463,14 +1463,14 @@ class TestMarkCompleteViaReport:
node = EventLoopNode(config=LoopConfig(max_iterations=10))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1524,14 +1524,14 @@ class TestMarkCompleteViaReport:
node = EventLoopNode(config=LoopConfig(max_iterations=10))
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
ctx = NodeContext(
runtime=runtime,
node_id="parent",
node_spec=parent_node_spec,
memory=scoped,
buffer=scoped,
input_data={},
llm=subagent_llm,
available_tools=[],
@@ -1594,14 +1594,14 @@ class TestMarkCompleteViaReport:
config=LoopConfig(max_iterations=5),
)
memory = SharedMemory()
scoped = memory.with_permissions(read_keys=[], write_keys=[])
buffer = DataBuffer()
scoped = buffer.with_permissions(read_keys=[], write_keys=[])
ctx = NodeContext(
runtime=runtime,
node_id="sub",
node_spec=subagent_node_spec,
memory=scoped,
buffer=scoped,
input_data={"task": "test task"},
llm=subagent_llm,
available_tools=[],
+4 -4
View File
@@ -29,7 +29,7 @@ from framework.llm.stream_events import (
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.runtime.shared_state import SharedStateManager
from framework.runtime.shared_state import SharedBufferManager
from framework.storage.concurrent import ConcurrentStorage
# ---------------------------------------------------------------------------
@@ -268,7 +268,7 @@ async def test_escalation_e2e_through_execution_stream(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, bus),
event_bus=bus,
@@ -479,7 +479,7 @@ async def test_escalation_cleanup_after_completion(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, bus),
event_bus=bus,
@@ -649,7 +649,7 @@ async def test_mark_complete_e2e_through_execution_stream(tmp_path):
),
graph=graph,
goal=goal,
state_manager=SharedStateManager(),
state_manager=SharedBufferManager(),
storage=storage,
outcome_aggregator=OutcomeAggregator(goal, bus),
event_bus=bus,
+2 -2
View File
@@ -33,7 +33,7 @@ Single-entry agents get a `"default"` entry point automatically. There is no sep
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
@@ -108,7 +108,7 @@ runtime.unsubscribe_from_events(sub_id)
# Inspection
runtime.is_running # bool
runtime.event_bus # EventBus
runtime.state_manager # SharedStateManager
runtime.state_manager # SharedBufferManager
runtime.get_stats() # Runtime statistics
```
+14 -14
View File
@@ -59,7 +59,7 @@ flowchart TB
subgraph Infra [Infra]
TR["Tool Registry"]
WTM["Write through Conversation Memory<br/>(Logs/RAM/Harddrive)"]
SM["Shared Memory<br/>(State/Harddrive)"]
SM["Data Buffer<br/>(State/Harddrive)"]
EB["Event Bus<br/>(RAM)"]
CS["Credential Store<br/>(Harddrive/Cloud)"]
@@ -132,7 +132,7 @@ flowchart TB
CB -->|"Modify Worker Bee"| WorkerBees
%% =========================================
%% SHARED MEMORY & LOGS ACCESS
%% DATA BUFFER & LOGS ACCESS
%% =========================================
%% Worker Bees Access
@@ -152,11 +152,11 @@ flowchart TB
| Subsystem | Role | Description |
| ----------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **Event Loop Node** | Entry point | Listens for external events (schedulers, webhooks, SSE), triggers the event loop, and delegates to sub-agents. Its conversation mirrors the Worker Bees conversation for context continuity. |
| **Worker Bees** | Execution | A graph of nodes that execute the actual work. Each node in the graph can become the Active Node. Workers maintain their own conversation and system prompt, and read/write to shared memory. |
| **Worker Bees** | Execution | A graph of nodes that execute the actual work. Each node in the graph can become the Active Node. Workers maintain their own conversation and system prompt, and read/write to the data buffer. |
| **Judge** | Evaluation | Runs as an **isolated graph** alongside the worker on a 2-minute timer. Reads worker session logs via `get_worker_health_summary` and accumulates observations in a continuous conversation (its own memory) to assess worker health trends. Criteria and principles align with Worker/Queen system prompts at design-time. |
| **Queen Bee** | Oversight | The orchestration layer. Subscribes to Active Node events via the Event Bus and has read/write access to shared memory and credentials. Users can talk directly to the Queen Bee. |
| **Queen Bee** | Oversight | The orchestration layer. Subscribes to Active Node events via the Event Bus and has read/write access to the data buffer and credentials. Users can talk directly to the Queen Bee. |
| **Sub-Agent Framework** | Delegation | Enables parent nodes to delegate tasks to specialized sub-agents via `delegate_to_sub_agent`. Sub-agents run as independent EventLoopNodes with read-only memory snapshots, their own conversation, and a `SubagentJudge`. They report progress via `report_to_parent` and can escalate to users via `wait_for_response`. Multiple delegations execute in parallel. Nested delegation is prevented. |
| **Infra** | Services | Shared infrastructure: Tool Registry (assigned to Event Loop Nodes and Sub-Agents), Write-through Conversation Memory (logs across RAM and disk), Shared Memory (state on disk), Event Bus (pub/sub in RAM), and Credential Store (encrypted on disk or cloud). |
| **Infra** | Services | Shared infrastructure: Tool Registry (assigned to Event Loop Nodes and Sub-Agents), Write-through Conversation Memory (logs across RAM and disk), Data Buffer (state on disk), Event Bus (pub/sub in RAM), and Credential Store (encrypted on disk or cloud). |
### Data Flow Patterns
@@ -363,7 +363,7 @@ flowchart TB
%% =========================================
%% SHARED MEMORY
%% =========================================
subgraph SharedMem [Shared Memory]
subgraph SharedMem [Data Buffer]
ExecState["Execution State<br/>(private)"]
StreamState["Stream State<br/>(shared within stream)"]
GlobalState["Global State<br/>(shared across all)"]
@@ -376,7 +376,7 @@ flowchart TB
%% =========================================
subgraph PromptOnion [System Prompt — 3-Layer Onion]
Layer1["Layer 1 — Identity<br/>(static, never changes)"]
Layer2["Layer 2 — Narrative<br/>(auto-built from<br/>SharedMemory +<br/>execution path)"]
Layer2["Layer 2 — Narrative<br/>(auto-built from<br/>DataBuffer +<br/>execution path)"]
Layer3["Layer 3 — Focus<br/>(current node's<br/>system_prompt)"]
end
@@ -410,11 +410,11 @@ flowchart TB
**2. Judge feedback becomes conversation memory.** When the judge issues a RETRY verdict with feedback, that feedback is injected as a `[Judge feedback]: ...` user message into the conversation. On the next LLM turn, the agent sees its prior attempt, the judge's critique, and can adjust. This is the core reflexion mechanism — in-context learning without model retraining.
**3. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `SharedMemory.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.
**3. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `DataBuffer.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.
**4. Phase transitions inject structured reflection.** When execution moves between nodes, a transition marker is inserted into the conversation containing: what phase completed, all outputs in memory, available data files, available tools, and an explicit reflection prompt: *"Before proceeding, briefly reflect: what went well in the previous phase? Are there any gaps or surprises worth noting?"* This engineered metacognition surfaces issues before they compound.
**5. Shared memory connects phases.** On ACCEPT, the accumulator's outputs are written to `SharedMemory`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.
**5. Data buffer connects phases.** On ACCEPT, the accumulator's outputs are written to `DataBuffer`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.
### The Judge Evaluation Pipeline
@@ -773,8 +773,8 @@ The system architecture (see diagram above) maps onto four logical layers. The *
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ EXECUTION LAYER (Worker Bees) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ Graph │───►│ Active │───►│ Shared │ │ │
│ │ │ Executor │ │ Node │ │ Memory │ │ │
│ │ │ Graph │───►│ Active │───►│ Data │ │ │
│ │ │ Executor │ │ Node │ │ Buffer │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
│ │ Event Loop Node delegates │ to Sub-Agents (parallel) │ │
│ │ Sub-Agents: read-only memory │ SubagentJudge │ report_to_parent│ │
@@ -1057,8 +1057,8 @@ class SignalWeights:
| **Rule Generation** | Research | Transforming human decisions into deterministic rules (closing the loop) |
| **HybridJudge** | Engineering | Implementation of triangulation with priority-ordered evaluation |
| **Reflexion Loop** | Engineering | Worker-Judge architecture with RETRY/REPLAN/ESCALATE |
| **Memory Reflection** | Engineering | 3-layer prompt onion, judge feedback injection, shared memory |
| **Graph Execution** | Engineering | Node composition, shared memory, edge traversal, sub-agent delegation |
| **Memory Reflection** | Engineering | 3-layer prompt onion, judge feedback injection, data buffer |
| **Graph Execution** | Engineering | Node composition, data buffer, edge traversal, sub-agent delegation |
| **HITL Protocol** | Engineering | Pause/resume, approval workflows, escalation handling |
---
@@ -1075,7 +1075,7 @@ The Hive Agent Framework addresses the fundamental reliability crisis in agentic
4. **The Foundation**: Goal-driven architecture ensures we're optimizing for user intent, not metric gaming. The reflexion loop between Worker Bees and Judge enables learning from failure without expensive search.
5. **The Memory System**: Agents reflect through three mechanisms — the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from shared memory), and structured phase transition markers with explicit reflection prompts at node boundaries.
5. **The Memory System**: Agents reflect through three mechanisms — the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from the data buffer), and structured phase transition markers with explicit reflection prompts at node boundaries.
6. **The Learning Path**: Human escalations aren't just fallbacks—they're training signals. Confidence calibration tunes thresholds automatically. Rule generation transforms repeated human decisions into deterministic automation.
@@ -141,9 +141,9 @@ Compare to proper state management:
```python
# Isolated test - no external dependencies
memory = manager.create_memory("test-exec", "test-stream", IsolationLevel.ISOLATED)
await memory.write("key", "value")
assert await memory.read("key") == "value"
buf = manager.create_buffer("test-exec", "test-stream", IsolationLevel.ISOLATED)
await buf.write("key", "value")
assert await buf.read("key") == "value"
# Other tests unaffected
```
@@ -169,7 +169,7 @@ The new architecture introduces explicit state management with proper isolation:
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
│ └────────────────┼────────────────┘ │
│ ↓ │
│ SharedStateManager │
│ SharedBufferManager │
│ (Isolation Levels) │
│ │
│ OutcomeAggregator │
@@ -179,7 +179,7 @@ The new architecture introduces explicit state management with proper isolation:
### Key Components
#### 1. SharedStateManager with Isolation Levels
#### 1. SharedBufferManager with Isolation Levels
```python
class IsolationLevel(Enum):
@@ -272,11 +272,11 @@ The key distinction:
| Use Case | Correct Approach |
| ------------------------------------ | --------------------------------- |
| Coordinate between executions | SharedStateManager |
| Coordinate between executions | SharedBufferManager |
| Track decision outcomes | StreamRuntime + OutcomeAggregator |
| Call external API | Tool |
| Persist business data | Tool (to external storage) |
| Share scratch state during execution | StreamMemory |
| Share scratch state during execution | StreamBuffer |
| Publish events to other streams | EventBus |
---
@@ -332,6 +332,6 @@ The multi-entry-point architecture doesn't just enable concurrent execution—it
## References
- [core/framework/runtime/agent_runtime.py](../../core/framework/runtime/agent_runtime.py) - AgentRuntime implementation
- [core/framework/runtime/shared_state.py](../../core/framework/runtime/shared_state.py) - SharedStateManager
- [core/framework/runtime/shared_state.py](../../core/framework/runtime/shared_state.py) - SharedBufferManager
- [core/framework/runtime/outcome_aggregator.py](../../core/framework/runtime/outcome_aggregator.py) - Cross-stream goal evaluation
- [core/framework/runtime/tests/test_agent_runtime.py](../../core/framework/runtime/tests/test_agent_runtime.py) - Test examples
+5 -5
View File
@@ -228,7 +228,7 @@ AgentRuntime.__init__(...) (line 118)
├─ Initialize SessionStore for unified sessions [line 182]
├─ Initialize shared components:
│ ├─ SharedStateManager [line 185]
│ ├─ SharedBufferManager [line 185]
│ ├─ EventBus (or use shared one) [line 186]
│ └─ OutcomeAggregator [line 187]
@@ -411,8 +411,8 @@ await _run_execution(ctx) (line 538)
├─ Mark status as "running" [line 559]
├─ Create execution-scoped memory [line 572-576]
│ └─ self._state_manager.create_memory(execution_id, stream_id, isolation)
├─ Create execution-scoped buffer [line 572-576]
│ └─ self._state_manager.create_buffer(execution_id, stream_id, isolation)
├─ Start runtime adapter [line 579-586]
│ └─ runtime_adapter.start_run(goal_id, goal_description, input_data)
@@ -480,7 +480,7 @@ await executor.execute(graph, goal, input_data, session_state, checkpoint_config
├─ Validate tool availability [line 320-332]
├─ Initialize SharedMemory for session [line 335]
├─ Initialize DataBuffer for session [line 335]
├─ Restore session state if resuming [line 353-369]
│ └─ Load memory from previous session
@@ -576,7 +576,7 @@ Shared Component: LLM Provider
Memory Flow:
├─ Each execution has ExecutionContext with input_data
├─ SharedMemory created per execution (line 572-576 in execution_stream.py)
├─ DataBuffer created per execution (line 572-576 in execution_stream.py)
├─ Session state restored if resuming (line 354-369 in executor.py)
├─ Each node reads from memory via input_keys
├─ Each node writes to memory via output_keys