refactor: refactor shared memory to data buffer
This commit is contained in:
@@ -27,7 +27,7 @@ class GreeterNode(NodeProtocol):
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
name = ctx.input_data.get("name", "World")
|
||||
greeting = f"Hello, {name}!"
|
||||
ctx.memory.write("greeting", greeting)
|
||||
ctx.buffer.write("greeting", greeting)
|
||||
return NodeResult(success=True, output={"greeting": greeting})
|
||||
|
||||
|
||||
@@ -35,9 +35,9 @@ class UppercaserNode(NodeProtocol):
|
||||
"""Convert text to uppercase."""
|
||||
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
greeting = ctx.input_data.get("greeting") or ctx.memory.read("greeting") or ""
|
||||
greeting = ctx.input_data.get("greeting") or ctx.buffer.read("greeting") or ""
|
||||
result = greeting.upper()
|
||||
ctx.memory.write("final_greeting", result)
|
||||
ctx.buffer.write("final_greeting", result)
|
||||
return NodeResult(success=True, output={"final_greeting": result})
|
||||
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ class EdgeSpec(BaseModel):
|
||||
self,
|
||||
source_success: bool,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
llm: Any | None = None,
|
||||
goal: Any | None = None,
|
||||
source_node_name: str | None = None,
|
||||
@@ -120,7 +120,7 @@ class EdgeSpec(BaseModel):
|
||||
Args:
|
||||
source_success: Whether the source node succeeded
|
||||
source_output: Output from the source node
|
||||
memory: Current shared memory state
|
||||
buffer_data: Current data buffer state
|
||||
llm: LLM provider for LLM_DECIDE edges
|
||||
goal: Goal object for LLM_DECIDE edges
|
||||
source_node_name: Name of source node (for LLM context)
|
||||
@@ -139,7 +139,7 @@ class EdgeSpec(BaseModel):
|
||||
return not source_success
|
||||
|
||||
if self.condition == EdgeCondition.CONDITIONAL:
|
||||
return self._evaluate_condition(source_output, memory)
|
||||
return self._evaluate_condition(source_output, buffer_data)
|
||||
|
||||
if self.condition == EdgeCondition.LLM_DECIDE:
|
||||
if llm is None or goal is None:
|
||||
@@ -150,7 +150,7 @@ class EdgeSpec(BaseModel):
|
||||
goal=goal,
|
||||
source_success=source_success,
|
||||
source_output=source_output,
|
||||
memory=memory,
|
||||
buffer_data=buffer_data,
|
||||
source_node_name=source_node_name,
|
||||
target_node_name=target_node_name,
|
||||
)
|
||||
@@ -160,7 +160,7 @@ class EdgeSpec(BaseModel):
|
||||
def _evaluate_condition(
|
||||
self,
|
||||
output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
) -> bool:
|
||||
"""Evaluate a conditional expression."""
|
||||
|
||||
@@ -168,14 +168,14 @@ class EdgeSpec(BaseModel):
|
||||
return True
|
||||
|
||||
# Build evaluation context
|
||||
# Include memory keys directly for easier access in conditions
|
||||
# Include buffer keys directly for easier access in conditions
|
||||
context = {
|
||||
"output": output,
|
||||
"memory": memory,
|
||||
"buffer": buffer_data,
|
||||
"result": output.get("result"),
|
||||
"true": True, # Allow lowercase true/false in conditions
|
||||
"false": False,
|
||||
**memory, # Unpack memory keys directly into context
|
||||
**buffer_data, # Unpack buffer keys directly into context
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -186,7 +186,7 @@ class EdgeSpec(BaseModel):
|
||||
expr_vars = {
|
||||
k: repr(context[k])
|
||||
for k in context
|
||||
if k not in ("output", "memory", "result", "true", "false")
|
||||
if k not in ("output", "buffer", "result", "true", "false")
|
||||
and k in self.condition_expr
|
||||
}
|
||||
logger.info(
|
||||
@@ -209,7 +209,7 @@ class EdgeSpec(BaseModel):
|
||||
goal: Any,
|
||||
source_success: bool,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
source_node_name: str | None,
|
||||
target_node_name: str | None,
|
||||
) -> bool:
|
||||
@@ -234,8 +234,8 @@ class EdgeSpec(BaseModel):
|
||||
Should we proceed to: {target_node_name or self.target}?
|
||||
Edge description: {self.description or "No description"}
|
||||
|
||||
**Context from memory**:
|
||||
{json.dumps({k: str(v)[:100] for k, v in list(memory.items())[:5]}, indent=2)}
|
||||
**Context from data buffer**:
|
||||
{json.dumps({k: str(v)[:100] for k, v in list(buffer_data.items())[:5]}, indent=2)}
|
||||
|
||||
Evaluate whether proceeding to this next node is the right step toward achieving the goal.
|
||||
Consider:
|
||||
@@ -276,14 +276,14 @@ Respond with ONLY a JSON object:
|
||||
def map_inputs(
|
||||
self,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Map source outputs to target inputs.
|
||||
|
||||
Args:
|
||||
source_output: Output from source node
|
||||
memory: Current shared memory
|
||||
buffer_data: Current data buffer
|
||||
|
||||
Returns:
|
||||
Input dict for target node
|
||||
@@ -294,11 +294,11 @@ Respond with ONLY a JSON object:
|
||||
|
||||
result = {}
|
||||
for target_key, source_key in self.input_mapping.items():
|
||||
# Try source output first, then memory
|
||||
# Try source output first, then buffer
|
||||
if source_key in source_output:
|
||||
result[target_key] = source_output[source_key]
|
||||
elif source_key in memory:
|
||||
result[target_key] = memory[source_key]
|
||||
elif source_key in buffer_data:
|
||||
result[target_key] = buffer_data[source_key]
|
||||
|
||||
return result
|
||||
|
||||
@@ -403,9 +403,9 @@ class GraphSpec(BaseModel):
|
||||
)
|
||||
edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
|
||||
|
||||
# Shared memory keys
|
||||
memory_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys available in shared memory"
|
||||
# Data buffer keys
|
||||
buffer_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys available in data buffer"
|
||||
)
|
||||
|
||||
# Default LLM settings
|
||||
|
||||
@@ -551,7 +551,7 @@ def build_emergency_summary(
|
||||
# 2. Inputs the node received
|
||||
input_lines = []
|
||||
for key in spec.input_keys:
|
||||
value = ctx.input_data.get(key) or ctx.memory.read(key)
|
||||
value = ctx.input_data.get(key) or ctx.buffer.read(key)
|
||||
if value is not None:
|
||||
# Truncate long values but keep them recognisable
|
||||
v_str = str(value)
|
||||
|
||||
@@ -228,7 +228,7 @@ async def check_pause(
|
||||
pause_requested = ctx.input_data.get("pause_requested", False)
|
||||
if not pause_requested:
|
||||
try:
|
||||
pause_requested = ctx.memory.read("pause_requested") or False
|
||||
pause_requested = ctx.buffer.read("pause_requested") or False
|
||||
except (PermissionError, KeyError):
|
||||
pause_requested = False
|
||||
if pause_requested:
|
||||
|
||||
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Any
|
||||
from framework.graph.conversation import ConversationStore
|
||||
from framework.graph.event_loop.judge_pipeline import SubagentJudge
|
||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
|
||||
from framework.graph.node import NodeContext, SharedMemory
|
||||
from framework.graph.node import DataBuffer, NodeContext
|
||||
from framework.llm.provider import ToolResult, ToolUse
|
||||
from framework.runtime.event_bus import EventBus
|
||||
|
||||
@@ -93,7 +93,7 @@ async def execute_subagent(
|
||||
subagent_spec = ctx.node_registry[agent_id]
|
||||
|
||||
# 2. Create read-only memory snapshot
|
||||
parent_data = ctx.memory.read_all()
|
||||
parent_data = ctx.buffer.read_all()
|
||||
|
||||
# Merge in-flight outputs from the parent's accumulator.
|
||||
if accumulator:
|
||||
@@ -101,12 +101,12 @@ async def execute_subagent(
|
||||
if key not in parent_data:
|
||||
parent_data[key] = value
|
||||
|
||||
subagent_memory = SharedMemory()
|
||||
subagent_buffer = DataBuffer()
|
||||
for key, value in parent_data.items():
|
||||
subagent_memory.write(key, value, validate=False)
|
||||
subagent_buffer.write(key, value, validate=False)
|
||||
|
||||
read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
|
||||
scoped_memory = subagent_memory.with_permissions(
|
||||
scoped_buffer = subagent_buffer.with_permissions(
|
||||
read_keys=list(read_keys),
|
||||
write_keys=[], # Read-only!
|
||||
)
|
||||
@@ -218,7 +218,7 @@ async def execute_subagent(
|
||||
runtime=ctx.runtime,
|
||||
node_id=sa_node_id,
|
||||
node_spec=subagent_spec,
|
||||
memory=scoped_memory,
|
||||
buffer=scoped_buffer,
|
||||
input_data={"task": task, **parent_data},
|
||||
llm=ctx.llm,
|
||||
available_tools=subagent_tools,
|
||||
|
||||
@@ -1617,9 +1617,9 @@ class EventLoopNode(NodeProtocol):
|
||||
continue
|
||||
|
||||
# Exit point 5: Judge ACCEPT — log step + log_node_complete
|
||||
# Write outputs to shared memory
|
||||
# Write outputs to data buffer
|
||||
for key, value in accumulator.to_dict().items():
|
||||
ctx.memory.write(key, value, validate=False)
|
||||
ctx.buffer.write(key, value, validate=False)
|
||||
|
||||
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
@@ -2857,11 +2857,11 @@ class EventLoopNode(NodeProtocol):
|
||||
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
|
||||
|
||||
def _build_initial_message(self, ctx: NodeContext) -> str:
|
||||
"""Build the initial user message from input data and memory.
|
||||
"""Build the initial user message from input data and buffer.
|
||||
|
||||
Includes ALL input_data (not just declared input_keys) so that
|
||||
upstream handoff data flows through regardless of key naming.
|
||||
Declared input_keys are also checked in shared memory as fallback.
|
||||
Declared input_keys are also checked in data buffer as fallback.
|
||||
"""
|
||||
parts = []
|
||||
seen: set[str] = set()
|
||||
@@ -2870,10 +2870,10 @@ class EventLoopNode(NodeProtocol):
|
||||
if value is not None:
|
||||
parts.append(f"{key}: {value}")
|
||||
seen.add(key)
|
||||
# Fallback: check memory for declared input_keys not already covered
|
||||
# Fallback: check data buffer for declared input_keys not already covered
|
||||
for key in ctx.node_spec.input_keys:
|
||||
if key not in seen:
|
||||
value = ctx.memory.read(key)
|
||||
value = ctx.buffer.read(key)
|
||||
if value is not None:
|
||||
parts.append(f"{key}: {value}")
|
||||
if ctx.goal_context:
|
||||
@@ -3460,17 +3460,17 @@ class EventLoopNode(NodeProtocol):
|
||||
|
||||
The subagent:
|
||||
- Gets a fresh conversation with just the task
|
||||
- Has read-only access to the parent's readable memory
|
||||
- Has read-only access to the parent's readable data buffer
|
||||
- Cannot delegate to its own subagents (prevents recursion)
|
||||
- Returns its output in structured JSON format
|
||||
|
||||
Args:
|
||||
ctx: Parent node's context (for memory, tools, LLM access).
|
||||
ctx: Parent node's context (for data buffer, tools, LLM access).
|
||||
agent_id: The node ID of the subagent to invoke.
|
||||
task: The task description to give the subagent.
|
||||
accumulator: Parent's OutputAccumulator — provides outputs that
|
||||
have been set via ``set_output`` but not yet written to
|
||||
shared memory (which only happens after the node completes).
|
||||
data buffer (which only happens after the node completes).
|
||||
|
||||
Returns:
|
||||
ToolResult with structured JSON output containing:
|
||||
|
||||
+106
-106
@@ -3,7 +3,7 @@ Graph Executor - Runs agent graphs.
|
||||
|
||||
The executor:
|
||||
1. Takes a GraphSpec and Goal
|
||||
2. Initializes shared memory
|
||||
2. Initializes data buffer
|
||||
3. Executes nodes following edges
|
||||
4. Records all decisions to Runtime
|
||||
5. Returns the final result
|
||||
@@ -24,7 +24,7 @@ from framework.graph.node import (
|
||||
NodeProtocol,
|
||||
NodeResult,
|
||||
NodeSpec,
|
||||
SharedMemory,
|
||||
DataBuffer,
|
||||
)
|
||||
from framework.graph.validator import OutputValidator
|
||||
from framework.llm.provider import LLMProvider, Tool, ToolUse
|
||||
@@ -104,8 +104,8 @@ class ParallelExecutionConfig:
|
||||
# "wait_all" waits for all and reports all failures
|
||||
on_branch_failure: str = "fail_all"
|
||||
|
||||
# Memory conflict handling when branches write same key
|
||||
memory_conflict_strategy: str = "last_wins" # "last_wins", "first_wins", "error"
|
||||
# Buffer conflict handling when branches write same key
|
||||
buffer_conflict_strategy: str = "last_wins" # "last_wins", "first_wins", "error"
|
||||
|
||||
# Timeout per branch in seconds
|
||||
branch_timeout_seconds: float = 300.0
|
||||
@@ -240,7 +240,7 @@ class GraphExecutor:
|
||||
self,
|
||||
current_node: str,
|
||||
path: list[str],
|
||||
memory: Any,
|
||||
buffer: Any,
|
||||
node_visit_counts: dict[str, int],
|
||||
) -> None:
|
||||
"""Update state.json with live progress at node transitions.
|
||||
@@ -275,11 +275,11 @@ class GraphExecutor:
|
||||
timestamps = state_data.setdefault("timestamps", {})
|
||||
timestamps["updated_at"] = datetime.now().isoformat()
|
||||
|
||||
# Persist full memory so state.json is sufficient for resume
|
||||
# Persist full buffer so state.json is sufficient for resume
|
||||
# even if the process dies before the final write.
|
||||
memory_snapshot = memory.read_all()
|
||||
state_data["memory"] = memory_snapshot
|
||||
state_data["memory_keys"] = list(memory_snapshot.keys())
|
||||
buffer_snapshot = buffer.read_all()
|
||||
state_data["data_buffer"] = buffer_snapshot
|
||||
state_data["buffer_keys"] = list(buffer_snapshot.keys())
|
||||
|
||||
with atomic_write(state_path, encoding="utf-8") as f:
|
||||
_json.dump(state_data, f, indent=2)
|
||||
@@ -473,7 +473,7 @@ class GraphExecutor:
|
||||
graph: The graph specification
|
||||
goal: The goal driving execution
|
||||
input_data: Initial input data
|
||||
session_state: Optional session state to resume from (with paused_at, memory, etc.)
|
||||
session_state: Optional session state to resume from (with paused_at, data_buffer, etc.)
|
||||
validate_graph: If False, skip graph validation (for test graphs that
|
||||
intentionally break rules)
|
||||
|
||||
@@ -507,7 +507,7 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
# Initialize execution state
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
# Continuous conversation mode state
|
||||
is_continuous = getattr(graph, "conversation_mode", "isolated") == "continuous"
|
||||
@@ -526,31 +526,31 @@ class GraphExecutor:
|
||||
self.logger.info("✓ Checkpointing enabled")
|
||||
|
||||
# Restore session state if provided
|
||||
if session_state and "memory" in session_state:
|
||||
memory_data = session_state["memory"]
|
||||
if session_state and ("data_buffer" in session_state or "memory" in session_state):
|
||||
buffer_data = session_state.get("data_buffer", session_state.get("memory"))
|
||||
# [RESTORED] Type safety check
|
||||
if not isinstance(memory_data, dict):
|
||||
if not isinstance(buffer_data, dict):
|
||||
self.logger.warning(
|
||||
f"⚠️ Invalid memory data type in session state: "
|
||||
f"{type(memory_data).__name__}, expected dict"
|
||||
f"⚠️ Invalid data buffer type in session state: "
|
||||
f"{type(buffer_data).__name__}, expected dict"
|
||||
)
|
||||
else:
|
||||
# Restore memory from previous session.
|
||||
# Restore buffer from previous session.
|
||||
# Skip validation — this data was already validated when
|
||||
# originally written, and research text triggers false
|
||||
# positives on the code-indicator heuristic.
|
||||
for key, value in memory_data.items():
|
||||
memory.write(key, value, validate=False)
|
||||
self.logger.info(f"📥 Restored session state with {len(memory_data)} memory keys")
|
||||
for key, value in buffer_data.items():
|
||||
buffer.write(key, value, validate=False)
|
||||
self.logger.info(f"📥 Restored session state with {len(buffer_data)} buffer keys")
|
||||
|
||||
# Write new input data to memory (each key individually).
|
||||
# Skip when resuming from a paused session — restored memory already
|
||||
# Write new input data to buffer (each key individually).
|
||||
# Skip when resuming from a paused session — restored buffer already
|
||||
# contains all state including the original input, and re-writing
|
||||
# input_data would overwrite intermediate results with stale values.
|
||||
_is_resuming = bool(session_state and session_state.get("paused_at"))
|
||||
if input_data and not _is_resuming:
|
||||
for key, value in input_data.items():
|
||||
memory.write(key, value)
|
||||
buffer.write(key, value)
|
||||
|
||||
# Detect event-triggered execution (timer/webhook) — no interactive user.
|
||||
_event_triggered = bool(input_data and isinstance(input_data.get("event"), dict))
|
||||
@@ -596,9 +596,9 @@ class GraphExecutor:
|
||||
f"(node: {checkpoint.current_node})"
|
||||
)
|
||||
|
||||
# Restore memory from checkpoint
|
||||
for key, value in checkpoint.shared_memory.items():
|
||||
memory.write(key, value, validate=False)
|
||||
# Restore buffer from checkpoint
|
||||
for key, value in checkpoint.data_buffer.items():
|
||||
buffer.write(key, value, validate=False)
|
||||
|
||||
# Start from checkpoint's next node or current node
|
||||
current_node_id = (
|
||||
@@ -609,7 +609,7 @@ class GraphExecutor:
|
||||
path.extend(checkpoint.execution_path)
|
||||
|
||||
self.logger.info(
|
||||
f"📥 Restored memory with {len(checkpoint.shared_memory)} keys, "
|
||||
f"📥 Restored buffer with {len(checkpoint.data_buffer)} keys, "
|
||||
f"resuming at node: {current_node_id}"
|
||||
)
|
||||
else:
|
||||
@@ -671,7 +671,7 @@ class GraphExecutor:
|
||||
# Fresh shared-session execution: clear stale cursor so the entry
|
||||
# node doesn't restore a filled OutputAccumulator from the previous
|
||||
# webhook run (which would cause the judge to accept immediately).
|
||||
# The conversation history is preserved (continuous memory).
|
||||
# The conversation history is preserved (continuous buffer).
|
||||
# Exclude cold restores — those need to continue the conversation
|
||||
# naturally without a "start fresh" marker.
|
||||
_is_fresh_shared = bool(
|
||||
@@ -785,9 +785,9 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
# Create session state for pause
|
||||
saved_memory = memory.read_all()
|
||||
saved_buffer = buffer.read_all()
|
||||
pause_session_state: dict[str, Any] = {
|
||||
"memory": saved_memory, # Include memory for resume
|
||||
"data_buffer": saved_buffer, # Include buffer for resume
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
}
|
||||
@@ -798,7 +798,7 @@ class GraphExecutor:
|
||||
checkpoint_type="pause",
|
||||
current_node=current_node_id,
|
||||
execution_path=path,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
next_node=current_node_id,
|
||||
is_clean=True,
|
||||
)
|
||||
@@ -811,7 +811,7 @@ class GraphExecutor:
|
||||
# Return with paused status
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
output=saved_memory,
|
||||
output=saved_buffer,
|
||||
path=path,
|
||||
paused_at=current_node_id,
|
||||
error="Execution paused by user request",
|
||||
@@ -836,15 +836,15 @@ class GraphExecutor:
|
||||
f" ⊘ Node '{node_spec.name}' visit limit reached "
|
||||
f"({node_visit_counts[current_node_id]}/{max_visits}), skipping"
|
||||
)
|
||||
# Skip execution — follow outgoing edges using current memory
|
||||
skip_result = NodeResult(success=True, output=memory.read_all())
|
||||
# Skip execution — follow outgoing edges using current buffer
|
||||
skip_result = NodeResult(success=True, output=buffer.read_all())
|
||||
next_node = await self._follow_edges(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=skip_result,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
)
|
||||
if next_node is None:
|
||||
self.logger.info(" → No more edges after visit limit, ending")
|
||||
@@ -856,15 +856,15 @@ class GraphExecutor:
|
||||
|
||||
# Clear stale nullable outputs from previous visits.
|
||||
# When a node is re-visited (e.g. review → process-batch → review),
|
||||
# nullable outputs from the PREVIOUS visit linger in shared memory.
|
||||
# nullable outputs from the PREVIOUS visit linger in the data buffer.
|
||||
# This causes stale edge conditions to fire (e.g. "feedback is not None"
|
||||
# from visit 1 triggers even when visit 2 sets "final_summary" instead).
|
||||
# Clearing them ensures only the CURRENT visit's outputs affect routing.
|
||||
if node_visit_counts.get(current_node_id, 0) > 1:
|
||||
nullable_keys = getattr(node_spec, "nullable_output_keys", None) or []
|
||||
for key in nullable_keys:
|
||||
if memory.read(key) is not None:
|
||||
memory.write(key, None, validate=False)
|
||||
if buffer.read(key) is not None:
|
||||
buffer.write(key, None, validate=False)
|
||||
self.logger.info(
|
||||
f" 🧹 Cleared stale nullable output '{key}' from previous visit"
|
||||
)
|
||||
@@ -899,12 +899,12 @@ class GraphExecutor:
|
||||
if _is_resuming and path:
|
||||
from framework.graph.prompt_composer import build_narrative
|
||||
|
||||
_resume_narrative = build_narrative(memory, path, graph)
|
||||
_resume_narrative = build_narrative(buffer, path, graph)
|
||||
|
||||
# Build context for node
|
||||
ctx = self._build_context(
|
||||
node_spec=node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
goal=goal,
|
||||
input_data=input_data or {},
|
||||
max_tokens=graph.max_tokens,
|
||||
@@ -921,9 +921,9 @@ class GraphExecutor:
|
||||
|
||||
# Log actual input data being read
|
||||
if node_spec.input_keys:
|
||||
self.logger.info(" Reading from memory:")
|
||||
self.logger.info(" Reading from data buffer:")
|
||||
for key in node_spec.input_keys:
|
||||
value = memory.read(key)
|
||||
value = buffer.read(key)
|
||||
if value is not None:
|
||||
# Truncate long values for readability
|
||||
value_str = str(value)
|
||||
@@ -953,7 +953,7 @@ class GraphExecutor:
|
||||
checkpoint_type="node_start",
|
||||
current_node=node_spec.id,
|
||||
execution_path=list(path),
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
is_clean=(sum(node_retry_counts.values()) == 0),
|
||||
)
|
||||
|
||||
@@ -1061,21 +1061,21 @@ class GraphExecutor:
|
||||
summary = result.to_summary(node_spec)
|
||||
self.logger.info(f" 📝 Summary: {summary}")
|
||||
|
||||
# Log what was written to memory (detailed view)
|
||||
# Log what was written to buffer (detailed view)
|
||||
if result.output:
|
||||
self.logger.info(" Written to memory:")
|
||||
self.logger.info(" Written to data buffer:")
|
||||
for key, value in result.output.items():
|
||||
value_str = str(value)
|
||||
if len(value_str) > 200:
|
||||
value_str = value_str[:200] + "..."
|
||||
self.logger.info(f" {key}: {value_str}")
|
||||
|
||||
# Write node outputs to memory BEFORE edge evaluation
|
||||
# Write node outputs to buffer BEFORE edge evaluation
|
||||
# This enables direct key access in conditional expressions (e.g., "score > 80")
|
||||
# Without this, conditional edges can only use output['key'] syntax
|
||||
if result.output:
|
||||
for key, value in result.output.items():
|
||||
memory.write(key, value, validate=False)
|
||||
buffer.write(key, value, validate=False)
|
||||
else:
|
||||
self.logger.error(f" ✗ Failed: {result.error}")
|
||||
|
||||
@@ -1147,7 +1147,7 @@ class GraphExecutor:
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=result, # result.success=False triggers ON_FAILURE
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
)
|
||||
|
||||
if next_node:
|
||||
@@ -1166,7 +1166,7 @@ class GraphExecutor:
|
||||
)
|
||||
self.runtime.end_run(
|
||||
success=False,
|
||||
output_data=memory.read_all(),
|
||||
output_data=buffer.read_all(),
|
||||
narrative=(
|
||||
f"Failed at {node_spec.name} after "
|
||||
f"{max_retries} retries: {result.error}"
|
||||
@@ -1185,10 +1185,10 @@ class GraphExecutor:
|
||||
execution_quality="failed",
|
||||
)
|
||||
|
||||
# Save memory for potential resume
|
||||
saved_memory = memory.read_all()
|
||||
# Save buffer for potential resume
|
||||
saved_buffer = buffer.read_all()
|
||||
failure_session_state = {
|
||||
"memory": saved_memory,
|
||||
"data_buffer": saved_buffer,
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
"resume_from": current_node_id,
|
||||
@@ -1200,7 +1200,7 @@ class GraphExecutor:
|
||||
f"Node '{node_spec.name}' failed after "
|
||||
f"{max_retries} attempts: {result.error}"
|
||||
),
|
||||
output=saved_memory,
|
||||
output=saved_buffer,
|
||||
steps_executed=steps,
|
||||
total_tokens=total_tokens,
|
||||
total_latency_ms=total_latency,
|
||||
@@ -1228,11 +1228,11 @@ class GraphExecutor:
|
||||
execution_id=self._execution_id,
|
||||
)
|
||||
|
||||
saved_memory = memory.read_all()
|
||||
saved_buffer = buffer.read_all()
|
||||
session_state_out = {
|
||||
"paused_at": node_spec.id,
|
||||
"resume_from": f"{node_spec.id}_resume", # Resume key
|
||||
"memory": saved_memory,
|
||||
"data_buffer": saved_buffer,
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
"next_node": None, # Will resume from entry point
|
||||
@@ -1240,7 +1240,7 @@ class GraphExecutor:
|
||||
|
||||
self.runtime.end_run(
|
||||
success=True,
|
||||
output_data=saved_memory,
|
||||
output_data=saved_buffer,
|
||||
narrative=f"Paused at {node_spec.name} after {steps} steps",
|
||||
)
|
||||
|
||||
@@ -1259,7 +1259,7 @@ class GraphExecutor:
|
||||
|
||||
return ExecutionResult(
|
||||
success=True,
|
||||
output=saved_memory,
|
||||
output=saved_buffer,
|
||||
steps_executed=steps,
|
||||
total_tokens=total_tokens,
|
||||
total_latency_ms=total_latency,
|
||||
@@ -1295,7 +1295,7 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
current_node_id = result.next_node
|
||||
self._write_progress(current_node_id, path, memory, node_visit_counts)
|
||||
self._write_progress(current_node_id, path, buffer, node_visit_counts)
|
||||
else:
|
||||
# Get all traversable edges for fan-out detection
|
||||
traversable_edges = await self._get_all_traversable_edges(
|
||||
@@ -1304,7 +1304,7 @@ class GraphExecutor:
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=result,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
)
|
||||
|
||||
if not traversable_edges:
|
||||
@@ -1339,7 +1339,7 @@ class GraphExecutor:
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
edges=traversable_edges,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
source_result=result,
|
||||
source_node_spec=node_spec,
|
||||
path=path,
|
||||
@@ -1353,7 +1353,7 @@ class GraphExecutor:
|
||||
if fan_in_node:
|
||||
self.logger.info(f" ⑃ Fan-in: converging at {fan_in_node}")
|
||||
current_node_id = fan_in_node
|
||||
self._write_progress(current_node_id, path, memory, node_visit_counts)
|
||||
self._write_progress(current_node_id, path, buffer, node_visit_counts)
|
||||
else:
|
||||
# No convergence point - branches are terminal
|
||||
self.logger.info(" → Parallel branches completed (no convergence)")
|
||||
@@ -1366,7 +1366,7 @@ class GraphExecutor:
|
||||
current_node_id=current_node_id,
|
||||
current_node_spec=node_spec,
|
||||
result=result,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
)
|
||||
if next_node is None:
|
||||
self.logger.info(" → No more edges, ending execution")
|
||||
@@ -1393,7 +1393,7 @@ class GraphExecutor:
|
||||
checkpoint_type="node_complete",
|
||||
current_node=node_spec.id,
|
||||
execution_path=list(path),
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
next_node=next_node,
|
||||
is_clean=(sum(node_retry_counts.values()) == 0),
|
||||
)
|
||||
@@ -1418,7 +1418,7 @@ class GraphExecutor:
|
||||
current_node_id = next_node
|
||||
|
||||
# Write progress snapshot at node transition
|
||||
self._write_progress(current_node_id, path, memory, node_visit_counts)
|
||||
self._write_progress(current_node_id, path, buffer, node_visit_counts)
|
||||
|
||||
# Continuous mode: thread conversation forward with transition marker
|
||||
if is_continuous and result.conversation is not None:
|
||||
@@ -1436,7 +1436,7 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
# Build Layer 2 (narrative) from current state
|
||||
narrative = build_narrative(memory, path, graph)
|
||||
narrative = build_narrative(buffer, path, graph)
|
||||
|
||||
# Build per-node accounts prompt for the next node
|
||||
_node_accounts = self.accounts_prompt or None
|
||||
@@ -1469,7 +1469,7 @@ class GraphExecutor:
|
||||
marker = build_transition_marker(
|
||||
previous_node=node_spec,
|
||||
next_node=next_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
cumulative_tool_names=sorted(cumulative_tool_names),
|
||||
data_dir=data_dir,
|
||||
)
|
||||
@@ -1558,7 +1558,7 @@ class GraphExecutor:
|
||||
input_data = result.output
|
||||
|
||||
# Collect output
|
||||
output = memory.read_all()
|
||||
output = buffer.read_all()
|
||||
|
||||
self.logger.info("\n✓ Execution complete!")
|
||||
self.logger.info(f" Steps: {steps}")
|
||||
@@ -1608,7 +1608,7 @@ class GraphExecutor:
|
||||
execution_quality=exec_quality,
|
||||
node_visit_counts=dict(node_visit_counts),
|
||||
session_state={
|
||||
"memory": output, # output IS memory.read_all()
|
||||
"data_buffer": output, # output IS buffer.read_all()
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
},
|
||||
@@ -1619,9 +1619,9 @@ class GraphExecutor:
|
||||
self.logger.info("⏸ Execution cancelled - saving state for resume")
|
||||
|
||||
# Flush WIP accumulator outputs from the interrupted node's
|
||||
# cursor.json into SharedMemory so they survive resume. The
|
||||
# cursor.json into DataBuffer so they survive resume. The
|
||||
# accumulator writes to cursor.json on every set() call, but
|
||||
# only writes to SharedMemory when the judge ACCEPTs. Without
|
||||
# only writes to DataBuffer when the judge ACCEPTs. Without
|
||||
# this, edge conditions checking these keys see None on resume.
|
||||
if current_node_id and self._storage_path:
|
||||
try:
|
||||
@@ -1633,10 +1633,10 @@ class GraphExecutor:
|
||||
wip_outputs = cursor_data.get("outputs", {})
|
||||
for key, value in wip_outputs.items():
|
||||
if value is not None:
|
||||
memory.write(key, value, validate=False)
|
||||
buffer.write(key, value, validate=False)
|
||||
if wip_outputs:
|
||||
self.logger.info(
|
||||
"Flushed %d WIP accumulator outputs to memory: %s",
|
||||
"Flushed %d WIP accumulator outputs to buffer: %s",
|
||||
len(wip_outputs),
|
||||
list(wip_outputs.keys()),
|
||||
)
|
||||
@@ -1646,10 +1646,10 @@ class GraphExecutor:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Save memory and state for resume
|
||||
saved_memory = memory.read_all()
|
||||
# Save buffer and state for resume
|
||||
saved_buffer = buffer.read_all()
|
||||
session_state_out: dict[str, Any] = {
|
||||
"memory": saved_memory,
|
||||
"data_buffer": saved_buffer,
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
}
|
||||
@@ -1671,7 +1671,7 @@ class GraphExecutor:
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
error="Execution cancelled",
|
||||
output=saved_memory,
|
||||
output=saved_buffer,
|
||||
steps_executed=steps,
|
||||
total_tokens=total_tokens,
|
||||
total_latency_ms=total_latency,
|
||||
@@ -1733,17 +1733,17 @@ class GraphExecutor:
|
||||
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
|
||||
for key, value in cursor_data.get("outputs", {}).items():
|
||||
if value is not None:
|
||||
memory.write(key, value, validate=False)
|
||||
buffer.write(key, value, validate=False)
|
||||
except Exception:
|
||||
self.logger.debug(
|
||||
"Could not flush accumulator outputs from cursor",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Save memory and state for potential resume
|
||||
saved_memory = memory.read_all()
|
||||
# Save buffer and state for potential resume
|
||||
saved_buffer = buffer.read_all()
|
||||
session_state_out: dict[str, Any] = {
|
||||
"memory": saved_memory,
|
||||
"data_buffer": saved_buffer,
|
||||
"execution_path": list(path),
|
||||
"node_visit_counts": dict(node_visit_counts),
|
||||
"resume_from": current_node_id,
|
||||
@@ -1774,7 +1774,7 @@ class GraphExecutor:
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
error=str(e),
|
||||
output=saved_memory,
|
||||
output=saved_buffer,
|
||||
steps_executed=steps,
|
||||
path=path,
|
||||
total_retries=total_retries_count,
|
||||
@@ -1795,7 +1795,7 @@ class GraphExecutor:
|
||||
def _build_context(
|
||||
self,
|
||||
node_spec: NodeSpec,
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
goal: Goal,
|
||||
input_data: dict[str, Any],
|
||||
max_tokens: int = 4096,
|
||||
@@ -1819,7 +1819,7 @@ class GraphExecutor:
|
||||
if node_spec.tools:
|
||||
available_tools = [t for t in self.tools if t.name in node_spec.tools]
|
||||
|
||||
# Create scoped memory view.
|
||||
# Create scoped buffer view.
|
||||
# When permissions are restricted (non-empty key lists), auto-include
|
||||
# _-prefixed keys used by default skill protocols so agents can read/write
|
||||
# operational state (e.g. _working_notes, _batch_ledger) regardless of
|
||||
@@ -1831,9 +1831,9 @@ class GraphExecutor:
|
||||
# Empty means "allow all" — adding keys would accidentally
|
||||
# activate the permission check and block legitimate reads/writes.
|
||||
if read_keys or write_keys:
|
||||
from framework.skills.defaults import SHARED_MEMORY_KEYS as _skill_keys
|
||||
from framework.skills.defaults import DATA_BUFFER_KEYS as _skill_keys
|
||||
|
||||
existing_underscore = [k for k in memory._data if k.startswith("_")]
|
||||
existing_underscore = [k for k in buffer._data if k.startswith("_")]
|
||||
extra_keys = set(_skill_keys) | set(existing_underscore)
|
||||
# Only inject into read_keys when it was already non-empty — an empty
|
||||
# read_keys means "allow all reads" and injecting skill keys would
|
||||
@@ -1844,7 +1844,7 @@ class GraphExecutor:
|
||||
if write_keys and k not in write_keys:
|
||||
write_keys.append(k)
|
||||
|
||||
scoped_memory = memory.with_permissions(
|
||||
scoped_buffer = buffer.with_permissions(
|
||||
read_keys=read_keys,
|
||||
write_keys=write_keys,
|
||||
)
|
||||
@@ -1866,7 +1866,7 @@ class GraphExecutor:
|
||||
runtime=self.runtime,
|
||||
node_id=node_spec.id,
|
||||
node_spec=node_spec,
|
||||
memory=scoped_memory,
|
||||
buffer=scoped_buffer,
|
||||
input_data=input_data,
|
||||
llm=self.llm,
|
||||
available_tools=available_tools,
|
||||
@@ -1990,7 +1990,7 @@ class GraphExecutor:
|
||||
current_node_id: str,
|
||||
current_node_spec: Any,
|
||||
result: NodeResult,
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
) -> str | None:
|
||||
"""Determine the next node by following edges."""
|
||||
edges = graph.get_outgoing_edges(current_node_id)
|
||||
@@ -2001,16 +2001,16 @@ class GraphExecutor:
|
||||
if await edge.should_traverse(
|
||||
source_success=result.success,
|
||||
source_output=result.output,
|
||||
memory=memory.read_all(),
|
||||
buffer_data=buffer.read_all(),
|
||||
llm=self.llm,
|
||||
goal=goal,
|
||||
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
|
||||
target_node_name=target_node_spec.name if target_node_spec else edge.target,
|
||||
):
|
||||
# Map inputs (skip validation for processed LLM output)
|
||||
mapped = edge.map_inputs(result.output, memory.read_all())
|
||||
mapped = edge.map_inputs(result.output, buffer.read_all())
|
||||
for key, value in mapped.items():
|
||||
memory.write(key, value, validate=False)
|
||||
buffer.write(key, value, validate=False)
|
||||
|
||||
return edge.target
|
||||
|
||||
@@ -2023,7 +2023,7 @@ class GraphExecutor:
|
||||
current_node_id: str,
|
||||
current_node_spec: Any,
|
||||
result: NodeResult,
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
) -> list[EdgeSpec]:
|
||||
"""
|
||||
Get ALL edges that should be traversed (for fan-out detection).
|
||||
@@ -2039,7 +2039,7 @@ class GraphExecutor:
|
||||
if await edge.should_traverse(
|
||||
source_success=result.success,
|
||||
source_output=result.output,
|
||||
memory=memory.read_all(),
|
||||
buffer_data=buffer.read_all(),
|
||||
llm=self.llm,
|
||||
goal=goal,
|
||||
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
|
||||
@@ -2103,7 +2103,7 @@ class GraphExecutor:
|
||||
graph: GraphSpec,
|
||||
goal: Goal,
|
||||
edges: list[EdgeSpec],
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
source_result: NodeResult,
|
||||
source_node_spec: Any,
|
||||
path: list[str],
|
||||
@@ -2116,7 +2116,7 @@ class GraphExecutor:
|
||||
graph: The graph specification
|
||||
goal: The execution goal
|
||||
edges: List of edges to follow in parallel
|
||||
memory: Shared memory instance
|
||||
buffer: DataBuffer instance
|
||||
source_result: Result from the source node
|
||||
source_node_spec: Spec of the source node
|
||||
path: Execution path list to update
|
||||
@@ -2135,7 +2135,7 @@ class GraphExecutor:
|
||||
edge=edge,
|
||||
)
|
||||
|
||||
# Track which branch wrote which key for memory conflict detection
|
||||
# Track which branch wrote which key for buffer conflict detection
|
||||
fanout_written_keys: dict[str, str] = {} # key -> branch_id that wrote it
|
||||
fanout_keys_lock = asyncio.Lock()
|
||||
|
||||
@@ -2173,9 +2173,9 @@ class GraphExecutor:
|
||||
|
||||
try:
|
||||
# Map inputs via edge
|
||||
mapped = branch.edge.map_inputs(source_result.output, memory.read_all())
|
||||
mapped = branch.edge.map_inputs(source_result.output, buffer.read_all())
|
||||
for key, value in mapped.items():
|
||||
await memory.write_async(key, value)
|
||||
await buffer.write_async(key, value)
|
||||
|
||||
# Execute with retries
|
||||
last_result = None
|
||||
@@ -2185,7 +2185,7 @@ class GraphExecutor:
|
||||
# Build context for this branch
|
||||
ctx = self._build_context(
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
goal,
|
||||
mapped,
|
||||
graph.max_tokens,
|
||||
@@ -2230,15 +2230,15 @@ class GraphExecutor:
|
||||
)
|
||||
|
||||
if result.success:
|
||||
# Write outputs to shared memory with conflict detection
|
||||
conflict_strategy = self._parallel_config.memory_conflict_strategy
|
||||
# Write outputs to shared buffer with conflict detection
|
||||
conflict_strategy = self._parallel_config.buffer_conflict_strategy
|
||||
for key, value in result.output.items():
|
||||
async with fanout_keys_lock:
|
||||
prior_branch = fanout_written_keys.get(key)
|
||||
if prior_branch and prior_branch != branch.branch_id:
|
||||
if conflict_strategy == "error":
|
||||
raise RuntimeError(
|
||||
f"Memory conflict: key '{key}' already written "
|
||||
f"Buffer conflict: key '{key}' already written "
|
||||
f"by branch '{prior_branch}', "
|
||||
f"conflicting write from '{branch.branch_id}'"
|
||||
)
|
||||
@@ -2255,7 +2255,7 @@ class GraphExecutor:
|
||||
f"(last_wins: {prior_branch} -> {branch.branch_id})"
|
||||
)
|
||||
fanout_written_keys[key] = branch.branch_id
|
||||
await memory.write_async(key, value)
|
||||
await buffer.write_async(key, value)
|
||||
|
||||
branch.result = result
|
||||
branch.status = "completed"
|
||||
@@ -2378,7 +2378,7 @@ class GraphExecutor:
|
||||
checkpoint_type: str,
|
||||
current_node: str,
|
||||
execution_path: list[str],
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
next_node: str | None = None,
|
||||
is_clean: bool = True,
|
||||
) -> Checkpoint:
|
||||
@@ -2389,7 +2389,7 @@ class GraphExecutor:
|
||||
checkpoint_type: Type of checkpoint (node_start, node_complete)
|
||||
current_node: Current node ID
|
||||
execution_path: Nodes executed so far
|
||||
memory: SharedMemory instance
|
||||
buffer: DataBuffer instance
|
||||
next_node: Next node to execute (for node_complete checkpoints)
|
||||
is_clean: Whether execution was clean up to this point
|
||||
|
||||
@@ -2402,7 +2402,7 @@ class GraphExecutor:
|
||||
session_id=self._storage_path.name if self._storage_path else "unknown",
|
||||
current_node=current_node,
|
||||
execution_path=execution_path,
|
||||
shared_memory=memory.read_all(),
|
||||
data_buffer=buffer.read_all(),
|
||||
next_node=next_node,
|
||||
is_clean=is_clean,
|
||||
)
|
||||
|
||||
@@ -401,13 +401,13 @@ class DataBuffer:
|
||||
self,
|
||||
read_keys: list[str],
|
||||
write_keys: list[str],
|
||||
) -> "SharedMemory":
|
||||
) -> "DataBuffer":
|
||||
"""Create a view with restricted permissions for a specific node.
|
||||
|
||||
The scoped view shares the same underlying data and locks,
|
||||
enabling thread-safe parallel execution across scoped views.
|
||||
"""
|
||||
return SharedMemory(
|
||||
return DataBuffer(
|
||||
_data=self._data,
|
||||
_allowed_read=set(read_keys) if read_keys else set(),
|
||||
_allowed_write=set(write_keys) if write_keys else set(),
|
||||
@@ -423,7 +423,7 @@ class NodeContext:
|
||||
|
||||
This is passed to every node and provides:
|
||||
- Access to the runtime (for decision logging)
|
||||
- Access to shared memory (for state)
|
||||
- Access to the data buffer (for state)
|
||||
- Access to LLM (for generation)
|
||||
- Access to tools (for actions)
|
||||
- The goal context (for guidance)
|
||||
@@ -437,7 +437,7 @@ class NodeContext:
|
||||
node_spec: NodeSpec
|
||||
|
||||
# State
|
||||
memory: SharedMemory
|
||||
buffer: DataBuffer
|
||||
input_data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# LLM access (if applicable)
|
||||
@@ -630,6 +630,6 @@ class NodeProtocol(ABC):
|
||||
"""
|
||||
errors = []
|
||||
for key in ctx.node_spec.input_keys:
|
||||
if key not in ctx.input_data and ctx.memory.read(key) is None:
|
||||
if key not in ctx.input_data and ctx.buffer.read(key) is None:
|
||||
errors.append(f"Missing required input: {key}")
|
||||
return errors
|
||||
|
||||
@@ -22,7 +22,7 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.node import NodeSpec, SharedMemory
|
||||
from framework.graph.node import NodeSpec, DataBuffer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -211,17 +211,17 @@ def compose_system_prompt(
|
||||
|
||||
|
||||
def build_narrative(
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
execution_path: list[str],
|
||||
graph: GraphSpec,
|
||||
) -> str:
|
||||
"""Build Layer 2 (narrative) from structured state.
|
||||
|
||||
Deterministic — no LLM call. Reads SharedMemory and execution path
|
||||
Deterministic — no LLM call. Reads data buffer and execution path
|
||||
to describe what has happened so far. Cheap and fast.
|
||||
|
||||
Args:
|
||||
memory: Current shared memory state.
|
||||
buffer: Current data buffer state.
|
||||
execution_path: List of node IDs visited so far.
|
||||
graph: Graph spec (for node names/descriptions).
|
||||
|
||||
@@ -241,11 +241,11 @@ def build_narrative(
|
||||
phase_descriptions.append(f"- {node_id}")
|
||||
parts.append("Phases completed:\n" + "\n".join(phase_descriptions))
|
||||
|
||||
# Describe key memory values (skip very long values)
|
||||
all_memory = memory.read_all()
|
||||
if all_memory:
|
||||
# Describe key buffer values (skip very long values)
|
||||
all_buffer = buffer.read_all()
|
||||
if all_buffer:
|
||||
memory_lines: list[str] = []
|
||||
for key, value in all_memory.items():
|
||||
for key, value in all_buffer.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
@@ -261,7 +261,7 @@ def build_narrative(
|
||||
def build_transition_marker(
|
||||
previous_node: NodeSpec,
|
||||
next_node: NodeSpec,
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
cumulative_tool_names: list[str],
|
||||
data_dir: Path | str | None = None,
|
||||
) -> str:
|
||||
@@ -274,7 +274,7 @@ def build_transition_marker(
|
||||
Args:
|
||||
previous_node: NodeSpec of the phase just completed.
|
||||
next_node: NodeSpec of the phase about to start.
|
||||
memory: Current shared memory state.
|
||||
buffer: Current data buffer state.
|
||||
cumulative_tool_names: All tools available (cumulative set).
|
||||
data_dir: Path to spillover data directory.
|
||||
|
||||
@@ -290,13 +290,13 @@ def build_transition_marker(
|
||||
sections.append(f"\nCompleted: {previous_node.name}")
|
||||
sections.append(f" {previous_node.description}")
|
||||
|
||||
# Outputs in memory — use file references for large values so the
|
||||
# Outputs in buffer — use file references for large values so the
|
||||
# next node loads full data from disk instead of seeing truncated
|
||||
# inline previews that look deceptively complete.
|
||||
all_memory = memory.read_all()
|
||||
if all_memory:
|
||||
all_buffer = buffer.read_all()
|
||||
if all_buffer:
|
||||
memory_lines: list[str] = []
|
||||
for key, value in all_memory.items():
|
||||
for key, value in all_buffer.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
|
||||
@@ -278,7 +278,7 @@ def _load_resume_state(
|
||||
return None
|
||||
return {
|
||||
"resume_session_id": session_id,
|
||||
"memory": cp_data.get("shared_memory", {}),
|
||||
"data_buffer": cp_data.get("data_buffer", cp_data.get("shared_memory", {})),
|
||||
"paused_at": cp_data.get("next_node") or cp_data.get("current_node"),
|
||||
"execution_path": cp_data.get("execution_path", []),
|
||||
"node_visit_counts": {},
|
||||
@@ -296,7 +296,7 @@ def _load_resume_state(
|
||||
paused_at = progress.get("paused_at") or progress.get("resume_from")
|
||||
return {
|
||||
"resume_session_id": session_id,
|
||||
"memory": state_data.get("memory", {}),
|
||||
"data_buffer": state_data.get("data_buffer", state_data.get("memory", {})),
|
||||
"paused_at": paused_at,
|
||||
"execution_path": progress.get("path", []),
|
||||
"node_visit_counts": progress.get("node_visit_counts", {}),
|
||||
|
||||
@@ -33,7 +33,7 @@ Single-entry agents get a `"default"` entry point automatically. There is no sep
|
||||
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
|
||||
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
|
||||
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
|
||||
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
|
||||
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
|
||||
|
||||
@@ -108,7 +108,7 @@ runtime.unsubscribe_from_events(sub_id)
|
||||
# Inspection
|
||||
runtime.is_running # bool
|
||||
runtime.event_bus # EventBus
|
||||
runtime.state_manager # SharedStateManager
|
||||
runtime.state_manager # SharedBufferManager
|
||||
runtime.get_stats() # Runtime statistics
|
||||
```
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
||||
from framework.runtime.shared_state import SharedStateManager
|
||||
from framework.runtime.shared_state import SharedBufferManager
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
from framework.storage.session_store import SessionStore
|
||||
|
||||
@@ -229,7 +229,7 @@ class AgentRuntime:
|
||||
self._session_store = SessionStore(storage_path_obj)
|
||||
|
||||
# Initialize shared components
|
||||
self._state_manager = SharedStateManager()
|
||||
self._state_manager = SharedBufferManager()
|
||||
self._event_bus = event_bus or EventBus(max_history=self._config.max_history)
|
||||
self._outcome_aggregator = OutcomeAggregator(goal, self._event_bus)
|
||||
|
||||
@@ -1505,7 +1505,7 @@ class AgentRuntime:
|
||||
try:
|
||||
if state_path.exists():
|
||||
data = _json.loads(state_path.read_text(encoding="utf-8"))
|
||||
full_memory = data.get("memory", {})
|
||||
full_memory = data.get("data_buffer", data.get("memory", {}))
|
||||
if not full_memory:
|
||||
continue
|
||||
# Filter to only input keys so stale outputs
|
||||
@@ -1517,7 +1517,7 @@ class AgentRuntime:
|
||||
if memory:
|
||||
return {
|
||||
"resume_session_id": exec_id,
|
||||
"memory": memory,
|
||||
"data_buffer": memory,
|
||||
}
|
||||
except Exception:
|
||||
logger.debug(
|
||||
@@ -1781,7 +1781,7 @@ class AgentRuntime:
|
||||
# === PROPERTIES ===
|
||||
|
||||
@property
|
||||
def state_manager(self) -> SharedStateManager:
|
||||
def state_manager(self) -> SharedBufferManager:
|
||||
"""Access the shared state manager."""
|
||||
return self._state_manager
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, Any
|
||||
from framework.graph.checkpoint_config import CheckpointConfig
|
||||
from framework.graph.executor import ExecutionResult, GraphExecutor
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
||||
from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -170,7 +170,7 @@ class ExecutionStream:
|
||||
entry_spec: EntryPointSpec,
|
||||
graph: "GraphSpec",
|
||||
goal: "Goal",
|
||||
state_manager: SharedStateManager,
|
||||
state_manager: SharedBufferManager,
|
||||
storage: "ConcurrentStorage",
|
||||
outcome_aggregator: "OutcomeAggregator",
|
||||
event_bus: "EventBus | None" = None,
|
||||
@@ -639,7 +639,7 @@ class ExecutionStream:
|
||||
self._write_run_event(execution_id, ctx.run_id, "run_started")
|
||||
|
||||
# Create execution-scoped memory
|
||||
self._state_manager.create_memory(
|
||||
self._state_manager.create_buffer(
|
||||
execution_id=execution_id,
|
||||
stream_id=self.stream_id,
|
||||
isolation=ctx.isolation_level,
|
||||
@@ -1074,7 +1074,7 @@ class ExecutionStream:
|
||||
updated_at=now,
|
||||
),
|
||||
progress=progress,
|
||||
memory=ss.get("memory", {}),
|
||||
memory=ss.get("data_buffer", ss.get("memory", {})),
|
||||
input_data=ctx.input_data,
|
||||
)
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ class StateChange:
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
|
||||
|
||||
class SharedStateManager:
|
||||
class SharedBufferManager:
|
||||
"""
|
||||
Manages shared state across concurrent executions.
|
||||
|
||||
@@ -61,18 +61,18 @@ class SharedStateManager:
|
||||
- SYNCHRONIZED: Like SHARED but with write locks
|
||||
|
||||
Example:
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
# Create memory for an execution
|
||||
memory = manager.create_memory(
|
||||
# Create buffer for an execution
|
||||
buf = manager.create_buffer(
|
||||
execution_id="exec_123",
|
||||
stream_id="webhook",
|
||||
isolation=IsolationLevel.SHARED,
|
||||
)
|
||||
|
||||
# Read/write through the memory
|
||||
await memory.write("customer_id", "cust_456", scope=StateScope.STREAM)
|
||||
value = await memory.read("customer_id")
|
||||
# Read/write through the buffer
|
||||
await buf.write("customer_id", "cust_456", scope=StateScope.STREAM)
|
||||
value = await buf.read("customer_id")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -93,14 +93,14 @@ class SharedStateManager:
|
||||
# Version tracking
|
||||
self._version = 0
|
||||
|
||||
def create_memory(
|
||||
def create_buffer(
|
||||
self,
|
||||
execution_id: str,
|
||||
stream_id: str,
|
||||
isolation: IsolationLevel,
|
||||
) -> "StreamMemory":
|
||||
) -> "StreamBuffer":
|
||||
"""
|
||||
Create a memory instance for an execution.
|
||||
Create a buffer instance for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Unique execution identifier
|
||||
@@ -108,7 +108,7 @@ class SharedStateManager:
|
||||
isolation: Isolation level for this execution
|
||||
|
||||
Returns:
|
||||
StreamMemory instance for reading/writing state
|
||||
StreamBuffer instance for reading/writing state
|
||||
"""
|
||||
# Initialize execution state
|
||||
if execution_id not in self._execution_state:
|
||||
@@ -119,7 +119,7 @@ class SharedStateManager:
|
||||
self._stream_state[stream_id] = {}
|
||||
self._stream_locks[stream_id] = asyncio.Lock()
|
||||
|
||||
return StreamMemory(
|
||||
return StreamBuffer(
|
||||
manager=self,
|
||||
execution_id=execution_id,
|
||||
stream_id=stream_id,
|
||||
@@ -343,17 +343,17 @@ class SharedStateManager:
|
||||
return self._change_history[-limit:]
|
||||
|
||||
|
||||
class StreamMemory:
|
||||
class StreamBuffer:
|
||||
"""
|
||||
Memory interface for a single execution.
|
||||
Buffer interface for a single execution.
|
||||
|
||||
Provides scoped access to shared state with proper isolation.
|
||||
Compatible with the existing SharedMemory interface where possible.
|
||||
Compatible with the existing DataBuffer interface where possible.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: SharedStateManager,
|
||||
manager: SharedBufferManager,
|
||||
execution_id: str,
|
||||
stream_id: str,
|
||||
isolation: IsolationLevel,
|
||||
@@ -371,13 +371,13 @@ class StreamMemory:
|
||||
self,
|
||||
read_keys: list[str],
|
||||
write_keys: list[str],
|
||||
) -> "StreamMemory":
|
||||
) -> "StreamBuffer":
|
||||
"""
|
||||
Create a scoped view with read/write permissions.
|
||||
|
||||
Compatible with existing SharedMemory.with_permissions().
|
||||
Compatible with existing DataBuffer.with_permissions().
|
||||
"""
|
||||
scoped = StreamMemory(
|
||||
scoped = StreamBuffer(
|
||||
manager=self._manager,
|
||||
execution_id=self._execution_id,
|
||||
stream_id=self._stream_id,
|
||||
@@ -434,7 +434,7 @@ class StreamMemory:
|
||||
|
||||
return all_state
|
||||
|
||||
# === SYNC API (for backward compatibility with SharedMemory) ===
|
||||
# === SYNC API (for backward compatibility with DataBuffer) ===
|
||||
|
||||
def read_sync(self, key: str) -> Any:
|
||||
"""
|
||||
|
||||
@@ -5,7 +5,7 @@ Tests:
|
||||
1. AgentRuntime creation and lifecycle
|
||||
2. Entry point registration
|
||||
3. Concurrent executions across streams
|
||||
4. SharedStateManager isolation levels
|
||||
4. SharedBufferManager isolation levels
|
||||
5. OutcomeAggregator goal evaluation
|
||||
6. EventBus pub/sub
|
||||
"""
|
||||
@@ -24,7 +24,7 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
||||
|
||||
# === Test Fixtures ===
|
||||
|
||||
@@ -121,45 +121,45 @@ def temp_storage():
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
# === SharedStateManager Tests ===
|
||||
# === SharedBufferManager Tests ===
|
||||
|
||||
|
||||
class TestSharedStateManager:
|
||||
"""Tests for SharedStateManager."""
|
||||
class TestSharedBufferManager:
|
||||
"""Tests for SharedBufferManager."""
|
||||
|
||||
def test_create_memory(self):
|
||||
"""Test creating execution-scoped memory."""
|
||||
manager = SharedStateManager()
|
||||
memory = manager.create_memory(
|
||||
def test_create_buffer(self):
|
||||
"""Test creating execution-scoped buffer."""
|
||||
manager = SharedBufferManager()
|
||||
buffer = manager.create_buffer(
|
||||
execution_id="exec-1",
|
||||
stream_id="webhook",
|
||||
isolation=IsolationLevel.SHARED,
|
||||
)
|
||||
assert memory is not None
|
||||
assert memory._execution_id == "exec-1"
|
||||
assert memory._stream_id == "webhook"
|
||||
assert buffer is not None
|
||||
assert buffer._execution_id == "exec-1"
|
||||
assert buffer._stream_id == "webhook"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_isolated_state(self):
|
||||
"""Test isolated state doesn't leak between executions."""
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
mem1 = manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
mem2 = manager.create_memory("exec-2", "stream-1", IsolationLevel.ISOLATED)
|
||||
buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
|
||||
|
||||
await mem1.write("key", "value1")
|
||||
await mem2.write("key", "value2")
|
||||
await buf1.write("key", "value1")
|
||||
await buf2.write("key", "value2")
|
||||
|
||||
assert await mem1.read("key") == "value1"
|
||||
assert await mem2.read("key") == "value2"
|
||||
assert await buf1.read("key") == "value1"
|
||||
assert await buf2.read("key") == "value2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shared_state(self):
|
||||
"""Test shared state is visible across executions."""
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
manager.create_memory("exec-1", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_memory("exec-2", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
|
||||
|
||||
# Write to global scope
|
||||
await manager.write(
|
||||
@@ -180,8 +180,8 @@ class TestSharedStateManager:
|
||||
|
||||
def test_cleanup_execution(self):
|
||||
"""Test execution cleanup removes state."""
|
||||
manager = SharedStateManager()
|
||||
manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
manager = SharedBufferManager()
|
||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
|
||||
assert "exec-1" in manager._execution_state
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ class Checkpoint(BaseModel):
|
||||
execution_path: list[str] = Field(default_factory=list) # Nodes executed so far
|
||||
|
||||
# State snapshots
|
||||
shared_memory: dict[str, Any] = Field(default_factory=dict) # Full SharedMemory._data
|
||||
data_buffer: dict[str, Any] = Field(default_factory=dict) # Full DataBuffer._data
|
||||
accumulated_outputs: dict[str, Any] = Field(default_factory=dict) # Outputs accumulated so far
|
||||
|
||||
# Execution metrics (for resuming quality tracking)
|
||||
@@ -52,7 +52,7 @@ class Checkpoint(BaseModel):
|
||||
session_id: str,
|
||||
current_node: str,
|
||||
execution_path: list[str],
|
||||
shared_memory: dict[str, Any],
|
||||
data_buffer: dict[str, Any],
|
||||
next_node: str | None = None,
|
||||
accumulated_outputs: dict[str, Any] | None = None,
|
||||
metrics_snapshot: dict[str, Any] | None = None,
|
||||
@@ -67,7 +67,7 @@ class Checkpoint(BaseModel):
|
||||
session_id: Session this checkpoint belongs to
|
||||
current_node: Node ID at checkpoint time
|
||||
execution_path: List of node IDs executed so far
|
||||
shared_memory: Full memory state snapshot
|
||||
data_buffer: Full data buffer state snapshot
|
||||
next_node: Next node to execute (for node_complete checkpoints)
|
||||
accumulated_outputs: Outputs accumulated so far
|
||||
metrics_snapshot: Execution metrics at checkpoint time
|
||||
@@ -91,7 +91,7 @@ class Checkpoint(BaseModel):
|
||||
current_node=current_node,
|
||||
next_node=next_node,
|
||||
execution_path=execution_path,
|
||||
shared_memory=shared_memory,
|
||||
data_buffer=data_buffer,
|
||||
accumulated_outputs=accumulated_outputs or {},
|
||||
metrics_snapshot=metrics_snapshot or {},
|
||||
is_clean=is_clean,
|
||||
|
||||
@@ -243,7 +243,7 @@ class SessionState(BaseModel):
|
||||
error=result.error,
|
||||
output=result.output,
|
||||
),
|
||||
memory=result.session_state.get("memory", {}) if result.session_state else {},
|
||||
memory=result.session_state.get("data_buffer", result.session_state.get("memory", {})) if result.session_state else {},
|
||||
input_data=input_data or {},
|
||||
)
|
||||
|
||||
@@ -303,7 +303,7 @@ class SessionState(BaseModel):
|
||||
return {
|
||||
"paused_at": resume_from,
|
||||
"resume_from": resume_from,
|
||||
"memory": self.memory,
|
||||
"data_buffer": self.memory,
|
||||
"execution_path": self.progress.path,
|
||||
"node_visit_counts": self.progress.node_visit_counts,
|
||||
}
|
||||
|
||||
@@ -270,7 +270,7 @@ async def handle_resume(request: web.Request) -> web.Response:
|
||||
paused_at = progress.get("paused_at") or progress.get("resume_from")
|
||||
resume_session_state = {
|
||||
"resume_session_id": worker_session_id,
|
||||
"memory": state.get("memory", {}),
|
||||
"data_buffer": state.get("data_buffer", state.get("memory", {})),
|
||||
"execution_path": progress.get("path", []),
|
||||
"node_visit_counts": progress.get("node_visit_counts", {}),
|
||||
}
|
||||
|
||||
@@ -225,7 +225,7 @@ def _write_sample_session(base: Path, session_id: str):
|
||||
"started_at": "2026-02-20T12:00:00",
|
||||
"completed_at": None,
|
||||
"input_data": {"user_request": "test input"},
|
||||
"memory": {"key1": "value1"},
|
||||
"data_buffer": {"key1": "value1"},
|
||||
"progress": {
|
||||
"current_node": "node_b",
|
||||
"paused_at": "node_b",
|
||||
|
||||
@@ -80,7 +80,7 @@ SKILL_REGISTRY: dict[str, str] = {
|
||||
}
|
||||
|
||||
# All shared memory keys used by default skills (for permission auto-inclusion)
|
||||
SHARED_MEMORY_KEYS: list[str] = [
|
||||
DATA_BUFFER_KEYS: list[str] = [
|
||||
# note-taking
|
||||
"_working_notes",
|
||||
"_notes_updated_at",
|
||||
|
||||
@@ -2659,11 +2659,11 @@ def register_queen_lifecycle_tools(
|
||||
return "No active execution found."
|
||||
|
||||
exec_id = exec_ids[0]
|
||||
memory = runtime.state_manager.create_memory(exec_id, stream_id, IsolationLevel.SHARED)
|
||||
state = await memory.read_all()
|
||||
buf = runtime.state_manager.create_buffer(exec_id, stream_id, IsolationLevel.SHARED)
|
||||
state = await buf.read_all()
|
||||
|
||||
if not state:
|
||||
lines.append("Worker's shared memory is empty.")
|
||||
lines.append("Worker's shared buffer is empty.")
|
||||
else:
|
||||
lines.append(f"Worker's shared memory ({len(state)} keys):")
|
||||
for key, value in state.items():
|
||||
|
||||
@@ -20,7 +20,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
|
||||
|
||||
from framework.config import RuntimeConfig # noqa: E402
|
||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeResult, NodeSpec, SharedMemory # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
|
||||
from framework.llm.litellm import LiteLLMProvider # noqa: E402
|
||||
|
||||
|
||||
@@ -68,13 +68,13 @@ def make_context(
|
||||
runtime.record_outcome = MagicMock()
|
||||
runtime.end_run = MagicMock()
|
||||
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
return NodeContext(
|
||||
runtime=runtime,
|
||||
node_id=node_id,
|
||||
node_spec=spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=llm,
|
||||
available_tools=[],
|
||||
|
||||
@@ -16,7 +16,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
|
||||
|
||||
from framework.config import RuntimeConfig # noqa: E402
|
||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeResult, NodeSpec, SharedMemory # noqa: E402
|
||||
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
|
||||
from framework.llm.litellm import LiteLLMProvider # noqa: E402
|
||||
|
||||
|
||||
@@ -61,13 +61,13 @@ def make_context(
|
||||
runtime.record_outcome = MagicMock()
|
||||
runtime.end_run = MagicMock()
|
||||
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
return NodeContext(
|
||||
runtime=runtime,
|
||||
node_id=node_id,
|
||||
node_spec=spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=llm,
|
||||
available_tools=[],
|
||||
|
||||
@@ -21,7 +21,7 @@ from framework.graph.conversation import NodeConversation
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.graph.goal import Goal
|
||||
from framework.graph.node import NodeResult, NodeSpec, SharedMemory
|
||||
from framework.graph.node import NodeResult, NodeSpec, DataBuffer
|
||||
from framework.graph.prompt_composer import (
|
||||
build_narrative,
|
||||
build_transition_marker,
|
||||
@@ -160,8 +160,8 @@ class TestComposeSystemPrompt:
|
||||
|
||||
class TestBuildNarrative:
|
||||
def test_with_execution_path(self):
|
||||
memory = SharedMemory()
|
||||
memory.write("findings", "some findings")
|
||||
buffer = DataBuffer()
|
||||
buffer.write("findings", "some findings")
|
||||
|
||||
node_a = NodeSpec(
|
||||
id="a", name="Research", description="Research the topic", node_type="event_loop"
|
||||
@@ -175,14 +175,14 @@ class TestBuildNarrative:
|
||||
edges=[],
|
||||
)
|
||||
|
||||
result = build_narrative(memory, ["a"], graph)
|
||||
result = build_narrative(buffer, ["a"], graph)
|
||||
assert "Research" in result
|
||||
assert "findings" in result
|
||||
|
||||
def test_empty_state(self):
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
graph = GraphSpec(id="g1", goal_id="g1", entry_node="a", nodes=[], edges=[])
|
||||
result = build_narrative(memory, [], graph)
|
||||
result = build_narrative(buffer, [], graph)
|
||||
assert result == ""
|
||||
|
||||
|
||||
@@ -194,13 +194,13 @@ class TestBuildTransitionMarker:
|
||||
next_n = NodeSpec(
|
||||
id="report", name="Report", description="Write report", node_type="event_loop"
|
||||
)
|
||||
memory = SharedMemory()
|
||||
memory.write("findings", "important stuff")
|
||||
buffer = DataBuffer()
|
||||
buffer.write("findings", "important stuff")
|
||||
|
||||
marker = build_transition_marker(
|
||||
previous_node=prev,
|
||||
next_node=next_n,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
cumulative_tool_names=["web_search", "save_data"],
|
||||
)
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import pytest
|
||||
|
||||
from framework.skills.config import DefaultSkillConfig, SkillsConfig
|
||||
from framework.skills.defaults import (
|
||||
SHARED_MEMORY_KEYS,
|
||||
DATA_BUFFER_KEYS,
|
||||
SKILL_REGISTRY,
|
||||
DefaultSkillManager,
|
||||
is_batch_scenario,
|
||||
@@ -51,10 +51,10 @@ class TestDefaultSkillFiles:
|
||||
f"({total_chars} chars), exceeding the 2000 token budget"
|
||||
)
|
||||
|
||||
def test_shared_memory_keys_all_prefixed(self):
|
||||
"""All shared memory keys must start with underscore."""
|
||||
for key in SHARED_MEMORY_KEYS:
|
||||
assert key.startswith("_"), f"Shared memory key missing _ prefix: {key}"
|
||||
def test_data_buffer_keys_all_prefixed(self):
|
||||
"""All data buffer keys must start with underscore."""
|
||||
for key in DATA_BUFFER_KEYS:
|
||||
assert key.startswith("_"), f"Data buffer key missing _ prefix: {key}"
|
||||
|
||||
|
||||
class TestDefaultSkillManager:
|
||||
|
||||
@@ -29,7 +29,7 @@ from framework.graph.node import (
|
||||
NodeProtocol,
|
||||
NodeResult,
|
||||
NodeSpec,
|
||||
SharedMemory,
|
||||
DataBuffer,
|
||||
)
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
@@ -266,13 +266,13 @@ def make_ctx(
|
||||
client_facing=client_facing,
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
return NodeContext(
|
||||
runtime=runtime,
|
||||
node_id=node_id,
|
||||
node_spec=spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data=input_data or {},
|
||||
llm=llm,
|
||||
available_tools=available_tools or [],
|
||||
@@ -1001,14 +1001,14 @@ async def test_mixed_node_graph(runtime):
|
||||
class LoadLeadsNode(NodeProtocol):
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
leads = ["lead_A", "lead_B", "lead_C"]
|
||||
ctx.memory.write("leads", leads)
|
||||
ctx.buffer.write("leads", leads)
|
||||
return NodeResult(success=True, output={"leads": leads})
|
||||
|
||||
class FormatOutputNode(NodeProtocol):
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
summary = ctx.input_data.get("summary", ctx.memory.read("summary") or "no summary")
|
||||
summary = ctx.input_data.get("summary", ctx.buffer.read("summary") or "no summary")
|
||||
report = f"Report: {summary}"
|
||||
ctx.memory.write("report", report)
|
||||
ctx.buffer.write("report", report)
|
||||
return NodeResult(success=True, output={"report": report})
|
||||
|
||||
# Event loop: process leads, produce summary
|
||||
|
||||
+127
-127
@@ -21,7 +21,7 @@ from framework.graph.event_loop_node import (
|
||||
LoopConfig,
|
||||
OutputAccumulator,
|
||||
)
|
||||
from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, SharedMemory
|
||||
from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, DataBuffer
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
@@ -134,14 +134,14 @@ def node_spec():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory():
|
||||
return SharedMemory()
|
||||
def buffer():
|
||||
return DataBuffer()
|
||||
|
||||
|
||||
def build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=None,
|
||||
input_data=None,
|
||||
@@ -153,7 +153,7 @@ def build_ctx(
|
||||
runtime=runtime,
|
||||
node_id=node_spec.id,
|
||||
node_spec=node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data=input_data or {},
|
||||
llm=llm,
|
||||
available_tools=tools or [],
|
||||
@@ -189,12 +189,12 @@ class TestNodeProtocolConformance:
|
||||
|
||||
class TestBasicLoop:
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_text_only_implicit_accept(self, runtime, node_spec, memory):
|
||||
async def test_basic_text_only_implicit_accept(self, runtime, node_spec, buffer):
|
||||
"""No tools, no judge. LLM produces text, implicit accept on stop."""
|
||||
# Override to no output_keys so implicit judge accepts immediately
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Hello world")])
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
@@ -203,9 +203,9 @@ class TestBasicLoop:
|
||||
assert result.tokens_used > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_llm_returns_failure(self, runtime, node_spec, memory):
|
||||
async def test_no_llm_returns_failure(self, runtime, node_spec, buffer):
|
||||
"""ctx.llm=None should return failure immediately."""
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm=None)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm=None)
|
||||
|
||||
node = EventLoopNode()
|
||||
result = await node.execute(ctx)
|
||||
@@ -214,12 +214,12 @@ class TestBasicLoop:
|
||||
assert "LLM" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_iterations_failure(self, runtime, node_spec, memory):
|
||||
async def test_max_iterations_failure(self, runtime, node_spec, buffer):
|
||||
"""When max_iterations is reached without acceptance, should fail."""
|
||||
# LLM always produces text but never calls set_output, so implicit
|
||||
# judge retries asking for missing keys
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=2))
|
||||
result = await node.execute(ctx)
|
||||
@@ -235,7 +235,7 @@ class TestBasicLoop:
|
||||
|
||||
class TestJudgeIntegration:
|
||||
@pytest.mark.asyncio
|
||||
async def test_judge_accept(self, runtime, node_spec, memory):
|
||||
async def test_judge_accept(self, runtime, node_spec, buffer):
|
||||
"""Mock judge ACCEPT -> success."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Done!")])
|
||||
@@ -243,7 +243,7 @@ class TestJudgeIntegration:
|
||||
judge = AsyncMock(spec=JudgeProtocol)
|
||||
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -251,7 +251,7 @@ class TestJudgeIntegration:
|
||||
judge.evaluate.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_judge_escalate(self, runtime, node_spec, memory):
|
||||
async def test_judge_escalate(self, runtime, node_spec, buffer):
|
||||
"""Mock judge ESCALATE -> failure."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Attempt")])
|
||||
@@ -261,7 +261,7 @@ class TestJudgeIntegration:
|
||||
return_value=JudgeVerdict(action="ESCALATE", feedback="Tone violation")
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -270,7 +270,7 @@ class TestJudgeIntegration:
|
||||
assert "Tone violation" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_judge_retry_then_accept(self, runtime, node_spec, memory):
|
||||
async def test_judge_retry_then_accept(self, runtime, node_spec, buffer):
|
||||
"""RETRY twice, then ACCEPT. Should run 3 iterations."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(
|
||||
@@ -293,7 +293,7 @@ class TestJudgeIntegration:
|
||||
judge = AsyncMock(spec=JudgeProtocol)
|
||||
judge.evaluate = AsyncMock(side_effect=evaluate_fn)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=10))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -308,7 +308,7 @@ class TestJudgeIntegration:
|
||||
|
||||
class TestSetOutput:
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_output_accumulates(self, runtime, node_spec, memory):
|
||||
async def test_set_output_accumulates(self, runtime, node_spec, buffer):
|
||||
"""LLM calls set_output -> values appear in NodeResult.output."""
|
||||
llm = MockStreamingLLM(
|
||||
scenarios=[
|
||||
@@ -319,7 +319,7 @@ class TestSetOutput:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -327,7 +327,7 @@ class TestSetOutput:
|
||||
assert result.output["result"] == 42
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_output_rejects_invalid_key(self, runtime, node_spec, memory):
|
||||
async def test_set_output_rejects_invalid_key(self, runtime, node_spec, buffer):
|
||||
"""set_output with key not in output_keys -> is_error=True."""
|
||||
llm = MockStreamingLLM(
|
||||
scenarios=[
|
||||
@@ -340,7 +340,7 @@ class TestSetOutput:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -349,7 +349,7 @@ class TestSetOutput:
|
||||
assert "bad_key" not in result.output
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_keys_triggers_retry(self, runtime, node_spec, memory):
|
||||
async def test_missing_keys_triggers_retry(self, runtime, node_spec, buffer):
|
||||
"""Judge accepts but output keys are missing -> retry with hint."""
|
||||
judge = AsyncMock(spec=JudgeProtocol)
|
||||
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
|
||||
@@ -365,7 +365,7 @@ class TestSetOutput:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -380,7 +380,7 @@ class TestSetOutput:
|
||||
|
||||
class TestStallDetection:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stall_detection(self, runtime, node_spec, memory):
|
||||
async def test_stall_detection(self, runtime, node_spec, buffer):
|
||||
"""3 identical responses should trigger stall detection."""
|
||||
node_spec.output_keys = [] # so implicit judge would accept
|
||||
# But we need the judge to RETRY so we actually get 3 identical responses
|
||||
@@ -389,7 +389,7 @@ class TestStallDetection:
|
||||
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("same answer")])
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
judge=judge,
|
||||
config=LoopConfig(max_iterations=10, stall_detection_threshold=3),
|
||||
@@ -407,7 +407,7 @@ class TestStallDetection:
|
||||
|
||||
class TestEventBusLifecycle:
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifecycle_events_published(self, runtime, node_spec, memory):
|
||||
async def test_lifecycle_events_published(self, runtime, node_spec, buffer):
|
||||
"""NODE_LOOP_STARTED, NODE_LOOP_ITERATION, NODE_LOOP_COMPLETED should be published."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("ok")])
|
||||
@@ -423,7 +423,7 @@ class TestEventBusLifecycle:
|
||||
handler=lambda e: received_events.append(e.type),
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
result = await node.execute(ctx)
|
||||
|
||||
@@ -434,7 +434,7 @@ class TestEventBusLifecycle:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
|
||||
async def test_client_facing_uses_client_output_delta(self, runtime, memory):
|
||||
async def test_client_facing_uses_client_output_delta(self, runtime, buffer):
|
||||
"""client_facing=True should emit CLIENT_OUTPUT_DELTA instead of LLM_TEXT_DELTA."""
|
||||
spec = NodeSpec(
|
||||
id="ui_node",
|
||||
@@ -453,7 +453,7 @@ class TestEventBusLifecycle:
|
||||
handler=lambda e: received_types.append(e.type),
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
|
||||
# Text-only on client_facing no longer blocks (no ask_user), so
|
||||
@@ -485,7 +485,7 @@ class TestClientFacingBlocking:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
|
||||
async def test_text_only_no_blocking(self, runtime, memory, client_spec):
|
||||
async def test_text_only_no_blocking(self, runtime, buffer, client_spec):
|
||||
"""client_facing + text-only (no ask_user) should NOT block."""
|
||||
llm = MockStreamingLLM(
|
||||
scenarios=[
|
||||
@@ -494,7 +494,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
bus = EventBus()
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
ctx = build_ctx(runtime, client_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, client_spec, buffer, llm)
|
||||
|
||||
# Should complete without blocking — no ask_user called, no output_keys required
|
||||
result = await node.execute(ctx)
|
||||
@@ -503,7 +503,7 @@ class TestClientFacingBlocking:
|
||||
assert llm._call_index >= 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ask_user_triggers_blocking(self, runtime, memory, client_spec):
|
||||
async def test_ask_user_triggers_blocking(self, runtime, buffer, client_spec):
|
||||
"""client_facing + ask_user() blocks until inject_event."""
|
||||
# Give the node an output key so the judge doesn't auto-accept
|
||||
# after the user responds — it needs set_output first.
|
||||
@@ -522,7 +522,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
bus = EventBus()
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
ctx = build_ctx(runtime, client_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, client_spec, buffer, llm)
|
||||
|
||||
async def user_responds():
|
||||
await asyncio.sleep(0.05)
|
||||
@@ -538,7 +538,7 @@ class TestClientFacingBlocking:
|
||||
assert result.output["answer"] == "help provided"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_facing_does_not_block_on_tools(self, runtime, memory):
|
||||
async def test_client_facing_does_not_block_on_tools(self, runtime, buffer):
|
||||
"""client_facing + tool calls (no ask_user) should NOT block."""
|
||||
spec = NodeSpec(
|
||||
id="chat",
|
||||
@@ -558,7 +558,7 @@ class TestClientFacingBlocking:
|
||||
]
|
||||
)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
|
||||
# Should complete without blocking — no ask_user called
|
||||
result = await node.execute(ctx)
|
||||
@@ -567,7 +567,7 @@ class TestClientFacingBlocking:
|
||||
assert result.output["result"] == "done"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_client_facing_unchanged(self, runtime, memory):
|
||||
async def test_non_client_facing_unchanged(self, runtime, buffer):
|
||||
"""client_facing=False should not block — existing behavior."""
|
||||
spec = NodeSpec(
|
||||
id="internal",
|
||||
@@ -578,14 +578,14 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=2))
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
|
||||
# Should complete without blocking (implicit judge ACCEPTs on no tools + no keys)
|
||||
result = await node.execute(ctx)
|
||||
assert result is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_signal_shutdown_unblocks(self, runtime, memory, client_spec):
|
||||
async def test_signal_shutdown_unblocks(self, runtime, buffer, client_spec):
|
||||
"""signal_shutdown should unblock a waiting client_facing node."""
|
||||
llm = MockStreamingLLM(
|
||||
scenarios=[
|
||||
@@ -598,7 +598,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
bus = EventBus()
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=10))
|
||||
ctx = build_ctx(runtime, client_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, client_spec, buffer, llm)
|
||||
|
||||
async def shutdown_after_delay():
|
||||
await asyncio.sleep(0.05)
|
||||
@@ -611,7 +611,7 @@ class TestClientFacingBlocking:
|
||||
assert result.success is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_input_requested_event_published(self, runtime, memory, client_spec):
|
||||
async def test_client_input_requested_event_published(self, runtime, buffer, client_spec):
|
||||
"""CLIENT_INPUT_REQUESTED should be published when ask_user blocks."""
|
||||
llm = MockStreamingLLM(
|
||||
scenarios=[
|
||||
@@ -634,7 +634,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
ctx = build_ctx(runtime, client_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, client_spec, buffer, llm)
|
||||
|
||||
async def shutdown():
|
||||
await asyncio.sleep(0.05)
|
||||
@@ -649,7 +649,7 @@ class TestClientFacingBlocking:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="Hangs in non-interactive shells (client-facing blocks on stdin)")
|
||||
async def test_ask_user_with_real_tools(self, runtime, memory):
|
||||
async def test_ask_user_with_real_tools(self, runtime, buffer):
|
||||
"""ask_user alongside real tool calls still triggers blocking."""
|
||||
spec = NodeSpec(
|
||||
id="chat",
|
||||
@@ -683,7 +683,7 @@ class TestClientFacingBlocking:
|
||||
config=LoopConfig(max_iterations=5),
|
||||
)
|
||||
ctx = build_ctx(
|
||||
runtime, spec, memory, llm, tools=[Tool(name="search", description="", parameters={})]
|
||||
runtime, spec, buffer, llm, tools=[Tool(name="search", description="", parameters={})]
|
||||
)
|
||||
|
||||
async def unblock():
|
||||
@@ -698,7 +698,7 @@ class TestClientFacingBlocking:
|
||||
assert llm._call_index >= 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ask_user_not_available_non_client_facing(self, runtime, memory):
|
||||
async def test_ask_user_not_available_non_client_facing(self, runtime, buffer):
|
||||
"""ask_user tool should NOT be injected for non-client-facing nodes."""
|
||||
spec = NodeSpec(
|
||||
id="internal",
|
||||
@@ -709,7 +709,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=2))
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
|
||||
await node.execute(ctx)
|
||||
|
||||
@@ -720,7 +720,7 @@ class TestClientFacingBlocking:
|
||||
assert "ask_user" not in tool_names
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_escalate_available_for_worker_stream(self, runtime, memory):
|
||||
async def test_escalate_available_for_worker_stream(self, runtime, buffer):
|
||||
"""Workers should receive escalate synthetic tool."""
|
||||
spec = NodeSpec(
|
||||
id="internal",
|
||||
@@ -731,7 +731,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("thinking...")])
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=2))
|
||||
ctx = build_ctx(runtime, spec, memory, llm, stream_id="worker")
|
||||
ctx = build_ctx(runtime, spec, buffer, llm, stream_id="worker")
|
||||
|
||||
await node.execute(ctx)
|
||||
|
||||
@@ -740,7 +740,7 @@ class TestClientFacingBlocking:
|
||||
assert "escalate" in tool_names
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_escalate_not_available_for_queen_stream(self, runtime, memory):
|
||||
async def test_escalate_not_available_for_queen_stream(self, runtime, buffer):
|
||||
"""Queen stream should not receive escalate tool."""
|
||||
spec = NodeSpec(
|
||||
id="queen",
|
||||
@@ -751,7 +751,7 @@ class TestClientFacingBlocking:
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("monitoring...")])
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=2))
|
||||
ctx = build_ctx(runtime, spec, memory, llm, stream_id="queen")
|
||||
ctx = build_ctx(runtime, spec, buffer, llm, stream_id="queen")
|
||||
|
||||
await node.execute(ctx)
|
||||
|
||||
@@ -762,7 +762,7 @@ class TestClientFacingBlocking:
|
||||
|
||||
class TestEscalate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_escalate_emits_event(self, runtime, node_spec, memory):
|
||||
async def test_escalate_emits_event(self, runtime, node_spec, buffer):
|
||||
"""escalate() should publish ESCALATION_REQUESTED and block for queen guidance."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(
|
||||
@@ -786,7 +786,7 @@ class TestEscalate:
|
||||
|
||||
bus.subscribe(event_types=[EventType.ESCALATION_REQUESTED], handler=capture)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
|
||||
async def queen_reply():
|
||||
@@ -810,7 +810,7 @@ class TestEscalate:
|
||||
assert "HTTP 401" in received[0].data["context"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_escalate_handoff_reaches_queen(self, runtime, node_spec, memory):
|
||||
async def test_escalate_handoff_reaches_queen(self, runtime, node_spec, buffer):
|
||||
"""Worker escalation should be routed to queen via SessionManager handoff sub."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(
|
||||
@@ -836,7 +836,7 @@ class TestEscalate:
|
||||
queen_executor.node_registry = {"queen": queen_node}
|
||||
manager._subscribe_worker_handoffs(session, queen_executor)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
|
||||
node = EventLoopNode(event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
|
||||
async def queen_reply():
|
||||
@@ -859,7 +859,7 @@ class TestEscalate:
|
||||
assert kwargs["is_client_input"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_escalate_waits_for_queen_input_and_skips_judge(self, runtime, node_spec, memory):
|
||||
async def test_escalate_waits_for_queen_input_and_skips_judge(self, runtime, node_spec, buffer):
|
||||
"""escalate() should block for queen input before judge evaluation."""
|
||||
node_spec.output_keys = ["result"]
|
||||
llm = MockStreamingLLM(
|
||||
@@ -891,7 +891,7 @@ class TestEscalate:
|
||||
judge = AsyncMock(spec=JudgeProtocol)
|
||||
judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ACCEPT"))
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm, stream_id="worker")
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm, stream_id="worker")
|
||||
node = EventLoopNode(judge=judge, event_bus=bus, config=LoopConfig(max_iterations=5))
|
||||
|
||||
async def queen_reply():
|
||||
@@ -922,7 +922,7 @@ class TestClientFacingExpectingWork:
|
||||
"""Tests for _cf_expecting_work state machine in client-facing nodes."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_after_user_input_goes_to_judge(self, runtime, memory):
|
||||
async def test_text_after_user_input_goes_to_judge(self, runtime, buffer):
|
||||
"""After user responds, text-only with missing outputs gets judged (not auto-blocked).
|
||||
|
||||
Simulates: findings-review asks user, user says "generate report",
|
||||
@@ -956,7 +956,7 @@ class TestClientFacingExpectingWork:
|
||||
]
|
||||
)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
|
||||
async def user_responds():
|
||||
await asyncio.sleep(0.05)
|
||||
@@ -972,7 +972,7 @@ class TestClientFacingExpectingWork:
|
||||
assert llm._call_index >= 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_block_without_missing_outputs(self, runtime, memory):
|
||||
async def test_auto_block_without_missing_outputs(self, runtime, buffer):
|
||||
"""Text-only with no missing outputs should still auto-block (queen monitoring).
|
||||
|
||||
Simulates: queen node with no required outputs outputs "monitoring..."
|
||||
@@ -1000,7 +1000,7 @@ class TestClientFacingExpectingWork:
|
||||
]
|
||||
)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
ctx = build_ctx(runtime, spec, memory, llm)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm)
|
||||
|
||||
async def user_then_shutdown():
|
||||
await asyncio.sleep(0.05)
|
||||
@@ -1020,7 +1020,7 @@ class TestClientFacingExpectingWork:
|
||||
assert llm._call_index == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_calls_reset_expecting_work(self, runtime, memory):
|
||||
async def test_tool_calls_reset_expecting_work(self, runtime, buffer):
|
||||
"""After LLM calls tools, next text-only turn should auto-block again.
|
||||
|
||||
Simulates: user gives input -> LLM calls tools (work) -> LLM presents
|
||||
@@ -1071,7 +1071,7 @@ class TestClientFacingExpectingWork:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="save_data", description="save", parameters={})],
|
||||
)
|
||||
@@ -1092,7 +1092,7 @@ class TestClientFacingExpectingWork:
|
||||
assert result.output["status"] == "complete"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_judge_retry_enables_expecting_work(self, runtime, memory):
|
||||
async def test_judge_retry_enables_expecting_work(self, runtime, buffer):
|
||||
"""After judge RETRY, text-only with missing outputs goes to judge again.
|
||||
|
||||
Simulates: LLM calls save_data but forgets set_output -> judge RETRY ->
|
||||
@@ -1143,7 +1143,7 @@ class TestClientFacingExpectingWork:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="save_data", description="save", parameters={})],
|
||||
)
|
||||
@@ -1169,7 +1169,7 @@ class TestClientFacingExpectingWork:
|
||||
|
||||
class TestToolExecution:
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_execution_feedback(self, runtime, node_spec, memory):
|
||||
async def test_tool_execution_feedback(self, runtime, node_spec, buffer):
|
||||
"""Tool call -> result fed back to conversation via stream loop."""
|
||||
node_spec.output_keys = []
|
||||
|
||||
@@ -1192,7 +1192,7 @@ class TestToolExecution:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="Search", parameters={})],
|
||||
)
|
||||
@@ -1214,13 +1214,13 @@ class TestToolExecution:
|
||||
|
||||
class TestWriteThroughPersistence:
|
||||
@pytest.mark.asyncio
|
||||
async def test_messages_written_to_store(self, tmp_path, runtime, node_spec, memory):
|
||||
async def test_messages_written_to_store(self, tmp_path, runtime, node_spec, buffer):
|
||||
"""Messages should be persisted immediately via write-through."""
|
||||
store = FileConversationStore(tmp_path / "conv")
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Hello")])
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
conversation_store=store,
|
||||
config=LoopConfig(max_iterations=5),
|
||||
@@ -1234,7 +1234,7 @@ class TestWriteThroughPersistence:
|
||||
assert len(parts) >= 2 # at least initial user msg + assistant msg
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_output_accumulator_write_through(self, tmp_path, runtime, node_spec, memory):
|
||||
async def test_output_accumulator_write_through(self, tmp_path, runtime, node_spec, buffer):
|
||||
"""set_output values should be persisted in cursor immediately."""
|
||||
store = FileConversationStore(tmp_path / "conv")
|
||||
llm = MockStreamingLLM(
|
||||
@@ -1244,7 +1244,7 @@ class TestWriteThroughPersistence:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
conversation_store=store,
|
||||
config=LoopConfig(max_iterations=5),
|
||||
@@ -1267,7 +1267,7 @@ class TestWriteThroughPersistence:
|
||||
|
||||
class TestCrashRecovery:
|
||||
@pytest.mark.asyncio
|
||||
async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, memory):
|
||||
async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, buffer):
|
||||
"""Populate a store with state, then verify EventLoopNode restores from it."""
|
||||
store = FileConversationStore(tmp_path / "conv")
|
||||
|
||||
@@ -1293,7 +1293,7 @@ class TestCrashRecovery:
|
||||
node_spec.output_keys = [] # no required keys so implicit accept works
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Continuing...")])
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
conversation_store=store,
|
||||
config=LoopConfig(max_iterations=5),
|
||||
@@ -1312,7 +1312,7 @@ class TestCrashRecovery:
|
||||
|
||||
class TestEventInjection:
|
||||
@pytest.mark.asyncio
|
||||
async def test_inject_event(self, runtime, node_spec, memory):
|
||||
async def test_inject_event(self, runtime, node_spec, buffer):
|
||||
"""inject_event() content should appear as user message in next iteration."""
|
||||
node_spec.output_keys = []
|
||||
|
||||
@@ -1334,7 +1334,7 @@ class TestEventInjection:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
judge=judge,
|
||||
config=LoopConfig(max_iterations=5),
|
||||
@@ -1361,7 +1361,7 @@ class TestEventInjection:
|
||||
|
||||
class TestPauseResume:
|
||||
@pytest.mark.asyncio
|
||||
async def test_pause_returns_early(self, runtime, node_spec, memory):
|
||||
async def test_pause_returns_early(self, runtime, node_spec, buffer):
|
||||
"""pause_requested in input_data should trigger early return."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("should not run")])
|
||||
@@ -1369,7 +1369,7 @@ class TestPauseResume:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
input_data={"pause_requested": True},
|
||||
)
|
||||
@@ -1389,7 +1389,7 @@ class TestPauseResume:
|
||||
|
||||
class TestStreamErrors:
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_recoverable_stream_error_raises(self, runtime, node_spec, memory):
|
||||
async def test_non_recoverable_stream_error_raises(self, runtime, node_spec, buffer):
|
||||
"""Non-recoverable StreamErrorEvent should raise RuntimeError."""
|
||||
node_spec.output_keys = []
|
||||
llm = MockStreamingLLM(
|
||||
@@ -1398,7 +1398,7 @@ class TestStreamErrors:
|
||||
]
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
|
||||
with pytest.raises(RuntimeError, match="Stream error"):
|
||||
@@ -1489,7 +1489,7 @@ class TestTransientErrorRetry:
|
||||
"""Test retry-with-backoff for transient LLM errors in EventLoopNode."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transient_error_retries_then_succeeds(self, runtime, node_spec, memory):
|
||||
async def test_transient_error_retries_then_succeeds(self, runtime, node_spec, buffer):
|
||||
"""A transient error on the first try should retry and succeed."""
|
||||
node_spec.output_keys = []
|
||||
llm = ErrorThenSuccessLLM(
|
||||
@@ -1497,7 +1497,7 @@ class TestTransientErrorRetry:
|
||||
fail_count=1,
|
||||
success_scenario=text_scenario("success"),
|
||||
)
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=5,
|
||||
@@ -1510,7 +1510,7 @@ class TestTransientErrorRetry:
|
||||
assert llm._call_index == 2 # 1 failure + 1 success
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_permanent_error_no_retry(self, runtime, node_spec, memory):
|
||||
async def test_permanent_error_no_retry(self, runtime, node_spec, buffer):
|
||||
"""A permanent error (ValueError) should NOT be retried."""
|
||||
node_spec.output_keys = []
|
||||
llm = ErrorThenSuccessLLM(
|
||||
@@ -1518,7 +1518,7 @@ class TestTransientErrorRetry:
|
||||
fail_count=1,
|
||||
success_scenario=text_scenario("success"),
|
||||
)
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=5,
|
||||
@@ -1532,7 +1532,7 @@ class TestTransientErrorRetry:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_facing_non_transient_error_does_not_crash(
|
||||
self, runtime, node_spec, memory
|
||||
self, runtime, node_spec, buffer
|
||||
):
|
||||
"""Client-facing non-transient errors should wait for input, not crash on token vars."""
|
||||
node_spec.output_keys = []
|
||||
@@ -1542,7 +1542,7 @@ class TestTransientErrorRetry:
|
||||
fail_count=100, # always fails
|
||||
success_scenario=text_scenario("unreachable"),
|
||||
)
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=1,
|
||||
@@ -1559,7 +1559,7 @@ class TestTransientErrorRetry:
|
||||
node._await_user_input.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transient_error_exhausts_retries(self, runtime, node_spec, memory):
|
||||
async def test_transient_error_exhausts_retries(self, runtime, node_spec, buffer):
|
||||
"""Transient errors that exhaust retries should raise."""
|
||||
node_spec.output_keys = []
|
||||
llm = ErrorThenSuccessLLM(
|
||||
@@ -1567,7 +1567,7 @@ class TestTransientErrorRetry:
|
||||
fail_count=100, # always fails
|
||||
success_scenario=text_scenario("unreachable"),
|
||||
)
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=5,
|
||||
@@ -1580,7 +1580,7 @@ class TestTransientErrorRetry:
|
||||
assert llm._call_index == 3 # 1 initial + 2 retries
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_error_event_retried_as_runtime_error(self, runtime, node_spec, memory):
|
||||
async def test_stream_error_event_retried_as_runtime_error(self, runtime, node_spec, buffer):
|
||||
"""StreamErrorEvent(recoverable=False) raises RuntimeError caught by retry."""
|
||||
node_spec.output_keys = []
|
||||
|
||||
@@ -1615,7 +1615,7 @@ class TestTransientErrorRetry:
|
||||
)
|
||||
|
||||
llm = StreamErrorThenSuccessLLM()
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=5,
|
||||
@@ -1628,7 +1628,7 @@ class TestTransientErrorRetry:
|
||||
assert call_index == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_emits_event_bus_event(self, runtime, node_spec, memory):
|
||||
async def test_retry_emits_event_bus_event(self, runtime, node_spec, buffer):
|
||||
"""Retry should emit NODE_RETRY event on the event bus."""
|
||||
node_spec.output_keys = []
|
||||
llm = ErrorThenSuccessLLM(
|
||||
@@ -1643,7 +1643,7 @@ class TestTransientErrorRetry:
|
||||
handler=lambda e: retry_events.append(e),
|
||||
)
|
||||
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
event_bus=bus,
|
||||
config=LoopConfig(
|
||||
@@ -1658,7 +1658,7 @@ class TestTransientErrorRetry:
|
||||
assert retry_events[0].data["retry_count"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recoverable_stream_error_retried_not_silent(self, runtime, node_spec, memory):
|
||||
async def test_recoverable_stream_error_retried_not_silent(self, runtime, node_spec, buffer):
|
||||
"""Recoverable StreamErrorEvent with empty response should raise ConnectionError.
|
||||
|
||||
Previously, recoverable stream errors were silently swallowed,
|
||||
@@ -1697,7 +1697,7 @@ class TestTransientErrorRetry:
|
||||
return LLMResponse(content="ok", model="mock", stop_reason="stop")
|
||||
|
||||
llm = RecoverableErrorThenSuccessLLM()
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
node = EventLoopNode(
|
||||
config=LoopConfig(
|
||||
max_iterations=5,
|
||||
@@ -1892,7 +1892,7 @@ class TestToolDoomLoopIntegration:
|
||||
self,
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""3 identical tool call turns should inject a warning."""
|
||||
node_spec.output_keys = []
|
||||
@@ -1921,7 +1921,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="s", parameters={})],
|
||||
)
|
||||
@@ -1942,7 +1942,7 @@ class TestToolDoomLoopIntegration:
|
||||
self,
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""Doom loop should emit NODE_TOOL_DOOM_LOOP event."""
|
||||
node_spec.output_keys = []
|
||||
@@ -1976,7 +1976,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="s", parameters={})],
|
||||
)
|
||||
@@ -1999,7 +1999,7 @@ class TestToolDoomLoopIntegration:
|
||||
async def test_client_facing_worker_doom_loop_escalates_to_queen(
|
||||
self,
|
||||
runtime,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""Client-facing worker doom loops should escalate instead of blocking for user input."""
|
||||
spec = NodeSpec(
|
||||
@@ -2040,7 +2040,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="s", parameters={})],
|
||||
stream_id="worker",
|
||||
@@ -2066,7 +2066,7 @@ class TestToolDoomLoopIntegration:
|
||||
self,
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""Disabled doom loop should not trigger with identical calls."""
|
||||
node_spec.output_keys = []
|
||||
@@ -2094,7 +2094,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="s", parameters={})],
|
||||
)
|
||||
@@ -2115,7 +2115,7 @@ class TestToolDoomLoopIntegration:
|
||||
self,
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""Different tool args each turn should NOT trigger doom loop."""
|
||||
node_spec.output_keys = []
|
||||
@@ -2183,7 +2183,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="search", description="s", parameters={})],
|
||||
)
|
||||
@@ -2204,7 +2204,7 @@ class TestToolDoomLoopIntegration:
|
||||
self,
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
):
|
||||
"""A tool that keeps failing with is_error=True should trigger doom loop.
|
||||
|
||||
@@ -2245,7 +2245,7 @@ class TestToolDoomLoopIntegration:
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
memory,
|
||||
buffer,
|
||||
llm,
|
||||
tools=[Tool(name="failing_tool", description="s", parameters={})],
|
||||
)
|
||||
@@ -2274,21 +2274,21 @@ class TestToolDoomLoopIntegration:
|
||||
class TestExecutionId:
|
||||
"""Tests for execution_id on NodeContext and its wiring through the framework."""
|
||||
|
||||
def test_node_context_accepts_execution_id(self, runtime, node_spec, memory):
|
||||
def test_node_context_accepts_execution_id(self, runtime, node_spec, buffer):
|
||||
"""NodeContext stores execution_id when constructed with one."""
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id=node_spec.id,
|
||||
node_spec=node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
execution_id="exec_abc",
|
||||
)
|
||||
assert ctx.execution_id == "exec_abc"
|
||||
|
||||
def test_node_context_execution_id_defaults_to_empty(self, runtime, node_spec, memory):
|
||||
def test_node_context_execution_id_defaults_to_empty(self, runtime, node_spec, buffer):
|
||||
"""build_ctx without execution_id gives ctx.execution_id == ''."""
|
||||
llm = MockStreamingLLM()
|
||||
ctx = build_ctx(runtime, node_spec, memory, llm)
|
||||
ctx = build_ctx(runtime, node_spec, buffer, llm)
|
||||
assert ctx.execution_id == ""
|
||||
|
||||
def test_stream_runtime_adapter_exposes_execution_id(self):
|
||||
@@ -2313,7 +2313,7 @@ class TestExecutionId:
|
||||
id="n1", name="n1", description="test", node_type="event_loop", output_keys=["r"]
|
||||
)
|
||||
ctx = executor._build_context(
|
||||
node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={}
|
||||
node_spec=node_spec, buffer=DataBuffer(), goal=goal, input_data={}
|
||||
)
|
||||
assert ctx.execution_id == "exec_123"
|
||||
|
||||
@@ -2331,27 +2331,27 @@ class TestExecutionId:
|
||||
id="n1", name="n1", description="test", node_type="event_loop", output_keys=["r"]
|
||||
)
|
||||
ctx = executor._build_context(
|
||||
node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={}
|
||||
node_spec=node_spec, buffer=DataBuffer(), goal=goal, input_data={}
|
||||
)
|
||||
assert ctx.execution_id == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subagent memory snapshot includes accumulator outputs
|
||||
# Subagent data buffer snapshot includes accumulator outputs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSubagentAccumulatorMemory:
|
||||
"""Verify that subagent memory construction merges accumulator outputs
|
||||
"""Verify that subagent data buffer construction merges accumulator outputs
|
||||
and includes the subagent's input_keys in read permissions."""
|
||||
|
||||
def test_accumulator_values_merged_into_parent_data(self):
|
||||
"""Keys from OutputAccumulator should appear in subagent memory."""
|
||||
"""Keys from OutputAccumulator should appear in subagent data buffer."""
|
||||
# Simulate what _execute_subagent does internally:
|
||||
# parent shared memory has user_request but NOT tweet_content
|
||||
parent_memory = SharedMemory()
|
||||
parent_memory.write("user_request", "post a joke")
|
||||
parent_data = parent_memory.read_all() # {"user_request": "post a joke"}
|
||||
# parent shared data buffer has user_request but NOT tweet_content
|
||||
parent_buffer = DataBuffer()
|
||||
parent_buffer.write("user_request", "post a joke")
|
||||
parent_data = parent_buffer.read_all() # {"user_request": "post a joke"}
|
||||
|
||||
# Accumulator has tweet_content (set via set_output before delegation)
|
||||
acc = OutputAccumulator(values={"tweet_content": "Hello world!"})
|
||||
@@ -2361,14 +2361,14 @@ class TestSubagentAccumulatorMemory:
|
||||
if key not in parent_data:
|
||||
parent_data[key] = value
|
||||
|
||||
# Build subagent memory
|
||||
subagent_memory = SharedMemory()
|
||||
# Build subagent data buffer
|
||||
subagent_buffer = DataBuffer()
|
||||
for key, value in parent_data.items():
|
||||
subagent_memory.write(key, value, validate=False)
|
||||
subagent_buffer.write(key, value, validate=False)
|
||||
|
||||
subagent_input_keys = ["tweet_content"]
|
||||
read_keys = set(parent_data.keys()) | set(subagent_input_keys)
|
||||
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
|
||||
scoped = subagent_buffer.with_permissions(read_keys=list(read_keys), write_keys=[])
|
||||
|
||||
# This would have raised PermissionError before the fix
|
||||
assert scoped.read("tweet_content") == "Hello world!"
|
||||
@@ -2376,18 +2376,18 @@ class TestSubagentAccumulatorMemory:
|
||||
|
||||
def test_input_keys_allowed_even_if_not_in_data(self):
|
||||
"""Subagent input_keys should be in read permissions even if the
|
||||
key doesn't exist in memory (returns None instead of PermissionError)."""
|
||||
parent_memory = SharedMemory()
|
||||
parent_memory.write("user_request", "hi")
|
||||
parent_data = parent_memory.read_all()
|
||||
key doesn't exist in data buffer (returns None instead of PermissionError)."""
|
||||
parent_buffer = DataBuffer()
|
||||
parent_buffer.write("user_request", "hi")
|
||||
parent_data = parent_buffer.read_all()
|
||||
|
||||
subagent_memory = SharedMemory()
|
||||
subagent_buffer = DataBuffer()
|
||||
for key, value in parent_data.items():
|
||||
subagent_memory.write(key, value, validate=False)
|
||||
subagent_buffer.write(key, value, validate=False)
|
||||
|
||||
# input_keys includes "tweet_content" which isn't in parent_data
|
||||
read_keys = set(parent_data.keys()) | {"tweet_content"}
|
||||
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
|
||||
scoped = subagent_buffer.with_permissions(read_keys=list(read_keys), write_keys=[])
|
||||
|
||||
# Should return None (not raise PermissionError)
|
||||
assert scoped.read("tweet_content") is None
|
||||
|
||||
@@ -13,7 +13,7 @@ from framework.llm.stream_events import FinishEvent, StreamEvent, TextDeltaEvent
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.shared_state import SharedStateManager
|
||||
from framework.runtime.shared_state import SharedBufferManager
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ async def test_execution_stream_retention(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
|
||||
event_bus=None,
|
||||
@@ -211,7 +211,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
|
||||
event_bus=None,
|
||||
@@ -247,7 +247,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, EventBus()),
|
||||
event_bus=None,
|
||||
@@ -262,7 +262,7 @@ async def test_shared_session_reuses_directory_and_memory(tmp_path):
|
||||
# Run async execution with resume_session_id pointing to primary session
|
||||
session_state = {
|
||||
"resume_session_id": primary_exec_id,
|
||||
"memory": {"rules": "star important emails"},
|
||||
"data_buffer": {"rules": "star important emails"},
|
||||
}
|
||||
async_exec_id = await async_stream.execute({"event": "new_email"}, session_state=session_state)
|
||||
|
||||
|
||||
@@ -606,7 +606,7 @@ async def test_memory_conflict_last_wins(runtime, goal):
|
||||
"""last_wins should allow both branches to write the same key without error."""
|
||||
# Use distinct output_keys in spec (to pass graph validation) but have
|
||||
# the node impl write a shared key at runtime — this is the scenario
|
||||
# memory_conflict_strategy is designed to handle.
|
||||
# buffer_conflict_strategy is designed to handle.
|
||||
b1 = NodeSpec(
|
||||
id="b1", name="B1", description="b1", node_type="event_loop", output_keys=["b1_out"]
|
||||
)
|
||||
@@ -616,7 +616,7 @@ async def test_memory_conflict_last_wins(runtime, goal):
|
||||
|
||||
graph = _make_fanout_graph([b1, b2])
|
||||
|
||||
config = ParallelExecutionConfig(memory_conflict_strategy="last_wins")
|
||||
config = ParallelExecutionConfig(buffer_conflict_strategy="last_wins")
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime, enable_parallel_execution=True, parallel_config=config
|
||||
)
|
||||
@@ -647,7 +647,7 @@ async def test_memory_conflict_first_wins(runtime, goal):
|
||||
|
||||
graph = _make_fanout_graph([b1, b2])
|
||||
|
||||
config = ParallelExecutionConfig(memory_conflict_strategy="first_wins")
|
||||
config = ParallelExecutionConfig(buffer_conflict_strategy="first_wins")
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime, enable_parallel_execution=True, parallel_config=config
|
||||
)
|
||||
@@ -675,7 +675,7 @@ async def test_memory_conflict_error_raises(runtime, goal):
|
||||
|
||||
graph = _make_fanout_graph([b1, b2])
|
||||
|
||||
config = ParallelExecutionConfig(memory_conflict_strategy="error")
|
||||
config = ParallelExecutionConfig(buffer_conflict_strategy="error")
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime, enable_parallel_execution=True, parallel_config=config
|
||||
)
|
||||
|
||||
@@ -277,7 +277,7 @@ def test_write_progress_uses_atomic_write_and_updates_state(tmp_path, monkeypatc
|
||||
executor._write_progress(
|
||||
current_node="node-b",
|
||||
path=["node-a", "node-b"],
|
||||
memory=memory,
|
||||
buffer=memory,
|
||||
node_visit_counts={"node-a": 1, "node-b": 1},
|
||||
)
|
||||
|
||||
@@ -287,9 +287,9 @@ def test_write_progress_uses_atomic_write_and_updates_state(tmp_path, monkeypatc
|
||||
assert state["progress"]["current_node"] == "node-b"
|
||||
assert state["progress"]["path"] == ["node-a", "node-b"]
|
||||
assert state["progress"]["node_visit_counts"] == {"node-a": 1, "node-b": 1}
|
||||
assert state["data_buffer"] == {"foo": "bar"}
|
||||
assert state["progress"]["steps_executed"] == 2
|
||||
assert state["memory"] == {"foo": "bar"}
|
||||
assert state["memory_keys"] == ["foo"]
|
||||
assert state["buffer_keys"] == ["foo"]
|
||||
assert "updated_at" in state["timestamps"]
|
||||
|
||||
|
||||
@@ -309,7 +309,7 @@ def test_write_progress_logs_warning_on_atomic_write_failure(tmp_path, monkeypat
|
||||
executor._write_progress(
|
||||
current_node="node-b",
|
||||
path=["node-a", "node-b"],
|
||||
memory=memory,
|
||||
buffer=memory,
|
||||
node_visit_counts={"node-a": 1, "node-b": 1},
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Test hallucination detection in SharedMemory and OutputValidator.
|
||||
Test hallucination detection in DataBuffer and OutputValidator.
|
||||
|
||||
These tests verify that code detection works correctly across the entire
|
||||
string content, not just the first 500 characters.
|
||||
@@ -7,117 +7,117 @@ string content, not just the first 500 characters.
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.node import MemoryWriteError, SharedMemory
|
||||
from framework.graph.node import DataBufferWriteError, DataBuffer
|
||||
from framework.graph.validator import OutputValidator, ValidationResult
|
||||
|
||||
|
||||
class TestSharedMemoryHallucinationDetection:
|
||||
"""Test the SharedMemory hallucination detection."""
|
||||
class TestDataBufferHallucinationDetection:
|
||||
"""Test the DataBuffer hallucination detection."""
|
||||
|
||||
def test_detects_code_at_start(self):
|
||||
"""Code at the start of the string should be detected."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
code_content = "```python\nimport os\ndef hack(): pass\n```" + "A" * 6000
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", code_content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", code_content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_detects_code_in_middle(self):
|
||||
"""Code in the middle of the string should be detected (was previously missed)."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
# 600 chars of padding, then code, then more padding to exceed 5000 chars
|
||||
padding_start = "A" * 600
|
||||
code = "\n```python\nimport os\ndef malicious(): pass\n```\n"
|
||||
padding_end = "B" * 5000
|
||||
content = padding_start + code + padding_end
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_detects_code_at_end(self):
|
||||
"""Code at the end of the string should be detected (was previously missed)."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
padding = "A" * 5500
|
||||
code = "\n```python\nclass Exploit:\n pass\n```"
|
||||
content = padding + code
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_detects_javascript_code(self):
|
||||
"""JavaScript code patterns should be detected."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
padding = "A" * 600
|
||||
code = "\nfunction malicious() { require('child_process'); }\n"
|
||||
padding_end = "B" * 5000
|
||||
content = padding + code + padding_end
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_detects_sql_injection(self):
|
||||
"""SQL patterns should be detected."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
padding = "A" * 600
|
||||
code = "\nDROP TABLE users; SELECT * FROM passwords;\n"
|
||||
padding_end = "B" * 5000
|
||||
content = padding + code + padding_end
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_detects_script_injection(self):
|
||||
"""HTML script injection should be detected."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
padding = "A" * 600
|
||||
code = "\n<script>alert('xss')</script>\n"
|
||||
padding_end = "B" * 5000
|
||||
content = padding + code + padding_end
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
def test_allows_short_strings_without_validation(self):
|
||||
"""Strings under 5000 chars should not trigger validation."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
content = "def hello(): pass" # Contains code indicator but short
|
||||
|
||||
# Should not raise - too short to validate
|
||||
memory.write("output", content)
|
||||
assert memory.read("output") == content
|
||||
buffer.write("output", content)
|
||||
assert buffer.read("output") == content
|
||||
|
||||
def test_allows_long_strings_without_code(self):
|
||||
"""Long strings without code indicators should be allowed."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
content = "This is a long text document. " * 500 # ~15000 chars, no code
|
||||
|
||||
memory.write("output", content)
|
||||
assert memory.read("output") == content
|
||||
buffer.write("output", content)
|
||||
assert buffer.read("output") == content
|
||||
|
||||
def test_validate_false_bypasses_check(self):
|
||||
"""Using validate=False should bypass the check."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
code_content = "```python\nimport os\n```" + "A" * 6000
|
||||
|
||||
# Should not raise when validate=False
|
||||
memory.write("output", code_content, validate=False)
|
||||
assert memory.read("output") == code_content
|
||||
buffer.write("output", code_content, validate=False)
|
||||
assert buffer.read("output") == code_content
|
||||
|
||||
def test_sampling_for_very_long_strings(self):
|
||||
"""Very long strings (>10KB) should be sampled at multiple positions."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
# Create a 50KB string with code at the 75% mark
|
||||
size = 50000
|
||||
code_position = int(size * 0.75)
|
||||
@@ -125,8 +125,8 @@ class TestSharedMemoryHallucinationDetection:
|
||||
"A" * code_position + "def hidden_code(): pass" + "B" * (size - code_position - 25)
|
||||
)
|
||||
|
||||
with pytest.raises(MemoryWriteError) as exc_info:
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError) as exc_info:
|
||||
buffer.write("output", content)
|
||||
|
||||
assert "hallucinated code" in str(exc_info.value)
|
||||
|
||||
@@ -199,36 +199,36 @@ class TestEdgeCases:
|
||||
|
||||
def test_empty_string(self):
|
||||
"""Empty strings should not cause errors."""
|
||||
memory = SharedMemory()
|
||||
memory.write("output", "")
|
||||
assert memory.read("output") == ""
|
||||
buffer = DataBuffer()
|
||||
buffer.write("output", "")
|
||||
assert buffer.read("output") == ""
|
||||
|
||||
def test_non_string_values(self):
|
||||
"""Non-string values should not be validated for code."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
# These should all work without validation
|
||||
memory.write("number", 12345)
|
||||
memory.write("list", [1, 2, 3])
|
||||
memory.write("dict", {"key": "value"})
|
||||
memory.write("bool", True)
|
||||
buffer.write("number", 12345)
|
||||
buffer.write("list", [1, 2, 3])
|
||||
buffer.write("dict", {"key": "value"})
|
||||
buffer.write("bool", True)
|
||||
|
||||
assert memory.read("number") == 12345
|
||||
assert memory.read("list") == [1, 2, 3]
|
||||
assert buffer.read("number") == 12345
|
||||
assert buffer.read("list") == [1, 2, 3]
|
||||
|
||||
def test_exactly_5000_chars(self):
|
||||
"""String of exactly 5000 chars should not trigger validation."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
content = "def code(): pass" + "A" * (5000 - 16) # Exactly 5000 chars
|
||||
|
||||
# Should not raise - exactly at threshold, not over
|
||||
memory.write("output", content)
|
||||
assert len(memory.read("output")) == 5000
|
||||
buffer.write("output", content)
|
||||
assert len(buffer.read("output")) == 5000
|
||||
|
||||
def test_5001_chars_triggers_validation(self):
|
||||
"""String of 5001 chars with code should trigger validation."""
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
content = "def code(): pass" + "A" * (5001 - 16) # 5001 chars
|
||||
|
||||
with pytest.raises(MemoryWriteError):
|
||||
memory.write("output", content)
|
||||
with pytest.raises(DataBufferWriteError):
|
||||
buffer.write("output", content)
|
||||
|
||||
@@ -494,12 +494,12 @@ class TestEdgeConditionPatterns:
|
||||
)
|
||||
|
||||
def test_success_flag_check(self):
|
||||
ctx = {"output": {"success": True}, "memory": {"attempts": 2}}
|
||||
ctx = {"output": {"success": True}, "buffer": {"attempts": 2}}
|
||||
assert safe_eval("output.get('success') == True", ctx) is True
|
||||
|
||||
def test_memory_threshold(self):
|
||||
ctx = {"memory": {"score": 0.85}}
|
||||
assert safe_eval("memory.get('score', 0) >= 0.8", ctx) is True
|
||||
def test_buffer_threshold(self):
|
||||
ctx = {"buffer": {"score": 0.85}}
|
||||
assert safe_eval("buffer.get('score', 0) >= 0.8", ctx) is True
|
||||
|
||||
def test_string_contains_check(self):
|
||||
ctx = {"output": {"status": "completed_with_warnings"}}
|
||||
|
||||
@@ -76,7 +76,7 @@ class TestSkillDirsPropagation:
|
||||
runtime=MagicMock(),
|
||||
node_id="n",
|
||||
node_spec=MagicMock(),
|
||||
memory={},
|
||||
buffer={},
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
+70
-70
@@ -1,6 +1,6 @@
|
||||
"""Tests for subagent capability in EventLoopNode.
|
||||
|
||||
Tests the delegate_to_sub_agent tool, subagent execution with read-only memory,
|
||||
Tests the delegate_to_sub_agent tool, subagent execution with read-only data buffer,
|
||||
prevention of nested subagent delegation, and report_to_parent one-way channel.
|
||||
"""
|
||||
|
||||
@@ -19,7 +19,7 @@ from framework.graph.event_loop_node import (
|
||||
LoopConfig,
|
||||
SubagentJudge,
|
||||
)
|
||||
from framework.graph.node import NodeContext, NodeSpec, SharedMemory
|
||||
from framework.graph.node import NodeContext, NodeSpec, DataBuffer
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
@@ -193,14 +193,14 @@ class TestSubagentExecution:
|
||||
"""Should return error when subagent ID is not in registry."""
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
|
||||
memory = SharedMemory()
|
||||
memory.write("query", "test query")
|
||||
buffer = DataBuffer()
|
||||
buffer.write("query", "test query")
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=MockStreamingLLM([]),
|
||||
available_tools=[],
|
||||
@@ -219,7 +219,7 @@ class TestSubagentExecution:
|
||||
async def test_subagent_receives_readonly_memory(
|
||||
self, runtime, parent_node_spec, subagent_node_spec
|
||||
):
|
||||
"""Subagent should have read-only access to memory."""
|
||||
"""Subagent should have read-only access to data buffer."""
|
||||
# Create LLM that will set output for the subagent
|
||||
subagent_llm = MockStreamingLLM(
|
||||
[
|
||||
@@ -232,10 +232,10 @@ class TestSubagentExecution:
|
||||
config=LoopConfig(max_iterations=5),
|
||||
)
|
||||
|
||||
# Parent memory with some data
|
||||
memory = SharedMemory()
|
||||
memory.write("query", "research AI")
|
||||
scoped_memory = memory.with_permissions(
|
||||
# Parent data buffer with some data
|
||||
buffer = DataBuffer()
|
||||
buffer.write("query", "research AI")
|
||||
scoped_buffer = buffer.with_permissions(
|
||||
read_keys=["query"],
|
||||
write_keys=["result"],
|
||||
)
|
||||
@@ -244,7 +244,7 @@ class TestSubagentExecution:
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped_memory,
|
||||
buffer=scoped_buffer,
|
||||
input_data={"query": "research AI"},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -275,14 +275,14 @@ class TestSubagentExecution:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -332,14 +332,14 @@ class TestSubagentExecution:
|
||||
browser_tool = Tool(name="browser_snapshot", description="Snapshot")
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=5))
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=spy_llm,
|
||||
available_tools=[],
|
||||
@@ -383,12 +383,12 @@ class TestNestedSubagentPrevention:
|
||||
sub_agents=["another"], # This should be ignored in subagent mode
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="nested",
|
||||
node_spec=subagent_with_subagents,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=MockStreamingLLM([]),
|
||||
available_tools=[],
|
||||
@@ -459,9 +459,9 @@ class TestDelegationIntegration:
|
||||
# For this test, let's just verify the parent can call delegate_to_sub_agent
|
||||
# and the tool handling correctly queues and executes it
|
||||
|
||||
memory = SharedMemory()
|
||||
memory.write("query", "What are AI trends?")
|
||||
scoped = memory.with_permissions(
|
||||
buffer = DataBuffer()
|
||||
buffer.write("query", "What are AI trends?")
|
||||
scoped = buffer.with_permissions(
|
||||
read_keys=["query"],
|
||||
write_keys=["result"],
|
||||
)
|
||||
@@ -475,7 +475,7 @@ class TestDelegationIntegration:
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={"query": "What are AI trends?"},
|
||||
llm=parent_llm,
|
||||
available_tools=[],
|
||||
@@ -538,12 +538,12 @@ class TestBuildReportToParentTool:
|
||||
node = EventLoopNode()
|
||||
|
||||
# Parent mode: no report_to_parent
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
parent_ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=MockStreamingLLM([]),
|
||||
available_tools=[],
|
||||
@@ -567,7 +567,7 @@ class TestBuildReportToParentTool:
|
||||
runtime=runtime,
|
||||
node_id="sub",
|
||||
node_spec=subagent_node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=MockStreamingLLM([]),
|
||||
available_tools=[],
|
||||
@@ -587,13 +587,13 @@ class TestBuildReportToParentTool:
|
||||
def test_tool_not_visible_without_callback(self, runtime, subagent_node_spec):
|
||||
"""report_to_parent should NOT appear when callback is None even in subagent mode."""
|
||||
node = EventLoopNode()
|
||||
memory = SharedMemory()
|
||||
buffer = DataBuffer()
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="sub",
|
||||
node_spec=subagent_node_spec,
|
||||
memory=memory,
|
||||
buffer=buffer,
|
||||
input_data={},
|
||||
llm=MockStreamingLLM([]),
|
||||
available_tools=[],
|
||||
@@ -630,14 +630,14 @@ class TestReportToParentExecution:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -689,14 +689,14 @@ class TestReportToParentExecution:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -744,14 +744,14 @@ class TestReportToParentExecution:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -790,14 +790,14 @@ class TestReportToParentExecution:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -831,14 +831,14 @@ class TestReportToParentExecution:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -967,14 +967,14 @@ class TestEscalationFlow:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1045,14 +1045,14 @@ class TestEscalationFlow:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1122,14 +1122,14 @@ class TestEscalationFlow:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1182,14 +1182,14 @@ class TestEscalationFlow:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1232,14 +1232,14 @@ class TestEscalationFlow:
|
||||
config=LoopConfig(max_iterations=10),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1368,15 +1368,15 @@ class TestSubagentJudge:
|
||||
tool_executor=mock_tool_executor,
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
task_text = "Check the profile at https://example.com/user789"
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1463,14 +1463,14 @@ class TestMarkCompleteViaReport:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1524,14 +1524,14 @@ class TestMarkCompleteViaReport:
|
||||
|
||||
node = EventLoopNode(config=LoopConfig(max_iterations=10))
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=["result"])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=["result"])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="parent",
|
||||
node_spec=parent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
@@ -1594,14 +1594,14 @@ class TestMarkCompleteViaReport:
|
||||
config=LoopConfig(max_iterations=5),
|
||||
)
|
||||
|
||||
memory = SharedMemory()
|
||||
scoped = memory.with_permissions(read_keys=[], write_keys=[])
|
||||
buffer = DataBuffer()
|
||||
scoped = buffer.with_permissions(read_keys=[], write_keys=[])
|
||||
|
||||
ctx = NodeContext(
|
||||
runtime=runtime,
|
||||
node_id="sub",
|
||||
node_spec=subagent_node_spec,
|
||||
memory=scoped,
|
||||
buffer=scoped,
|
||||
input_data={"task": "test task"},
|
||||
llm=subagent_llm,
|
||||
available_tools=[],
|
||||
|
||||
@@ -29,7 +29,7 @@ from framework.llm.stream_events import (
|
||||
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
|
||||
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.shared_state import SharedStateManager
|
||||
from framework.runtime.shared_state import SharedBufferManager
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -268,7 +268,7 @@ async def test_escalation_e2e_through_execution_stream(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, bus),
|
||||
event_bus=bus,
|
||||
@@ -479,7 +479,7 @@ async def test_escalation_cleanup_after_completion(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, bus),
|
||||
event_bus=bus,
|
||||
@@ -649,7 +649,7 @@ async def test_mark_complete_e2e_through_execution_stream(tmp_path):
|
||||
),
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
state_manager=SharedStateManager(),
|
||||
state_manager=SharedBufferManager(),
|
||||
storage=storage,
|
||||
outcome_aggregator=OutcomeAggregator(goal, bus),
|
||||
event_bus=bus,
|
||||
|
||||
@@ -33,7 +33,7 @@ Single-entry agents get a `"default"` entry point automatically. There is no sep
|
||||
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
|
||||
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
|
||||
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
|
||||
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
|
||||
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
|
||||
|
||||
@@ -108,7 +108,7 @@ runtime.unsubscribe_from_events(sub_id)
|
||||
# Inspection
|
||||
runtime.is_running # bool
|
||||
runtime.event_bus # EventBus
|
||||
runtime.state_manager # SharedStateManager
|
||||
runtime.state_manager # SharedBufferManager
|
||||
runtime.get_stats() # Runtime statistics
|
||||
```
|
||||
|
||||
|
||||
+14
-14
@@ -59,7 +59,7 @@ flowchart TB
|
||||
subgraph Infra [Infra]
|
||||
TR["Tool Registry"]
|
||||
WTM["Write through Conversation Memory<br/>(Logs/RAM/Harddrive)"]
|
||||
SM["Shared Memory<br/>(State/Harddrive)"]
|
||||
SM["Data Buffer<br/>(State/Harddrive)"]
|
||||
EB["Event Bus<br/>(RAM)"]
|
||||
CS["Credential Store<br/>(Harddrive/Cloud)"]
|
||||
|
||||
@@ -132,7 +132,7 @@ flowchart TB
|
||||
CB -->|"Modify Worker Bee"| WorkerBees
|
||||
|
||||
%% =========================================
|
||||
%% SHARED MEMORY & LOGS ACCESS
|
||||
%% DATA BUFFER & LOGS ACCESS
|
||||
%% =========================================
|
||||
|
||||
%% Worker Bees Access
|
||||
@@ -152,11 +152,11 @@ flowchart TB
|
||||
| Subsystem | Role | Description |
|
||||
| ----------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Event Loop Node** | Entry point | Listens for external events (schedulers, webhooks, SSE), triggers the event loop, and delegates to sub-agents. Its conversation mirrors the Worker Bees conversation for context continuity. |
|
||||
| **Worker Bees** | Execution | A graph of nodes that execute the actual work. Each node in the graph can become the Active Node. Workers maintain their own conversation and system prompt, and read/write to shared memory. |
|
||||
| **Worker Bees** | Execution | A graph of nodes that execute the actual work. Each node in the graph can become the Active Node. Workers maintain their own conversation and system prompt, and read/write to the data buffer. |
|
||||
| **Judge** | Evaluation | Runs as an **isolated graph** alongside the worker on a 2-minute timer. Reads worker session logs via `get_worker_health_summary` and accumulates observations in a continuous conversation (its own memory) to assess worker health trends. Criteria and principles align with Worker/Queen system prompts at design-time. |
|
||||
| **Queen Bee** | Oversight | The orchestration layer. Subscribes to Active Node events via the Event Bus and has read/write access to shared memory and credentials. Users can talk directly to the Queen Bee. |
|
||||
| **Queen Bee** | Oversight | The orchestration layer. Subscribes to Active Node events via the Event Bus and has read/write access to the data buffer and credentials. Users can talk directly to the Queen Bee. |
|
||||
| **Sub-Agent Framework** | Delegation | Enables parent nodes to delegate tasks to specialized sub-agents via `delegate_to_sub_agent`. Sub-agents run as independent EventLoopNodes with read-only memory snapshots, their own conversation, and a `SubagentJudge`. They report progress via `report_to_parent` and can escalate to users via `wait_for_response`. Multiple delegations execute in parallel. Nested delegation is prevented. |
|
||||
| **Infra** | Services | Shared infrastructure: Tool Registry (assigned to Event Loop Nodes and Sub-Agents), Write-through Conversation Memory (logs across RAM and disk), Shared Memory (state on disk), Event Bus (pub/sub in RAM), and Credential Store (encrypted on disk or cloud). |
|
||||
| **Infra** | Services | Shared infrastructure: Tool Registry (assigned to Event Loop Nodes and Sub-Agents), Write-through Conversation Memory (logs across RAM and disk), Data Buffer (state on disk), Event Bus (pub/sub in RAM), and Credential Store (encrypted on disk or cloud). |
|
||||
|
||||
### Data Flow Patterns
|
||||
|
||||
@@ -363,7 +363,7 @@ flowchart TB
|
||||
%% =========================================
|
||||
%% SHARED MEMORY
|
||||
%% =========================================
|
||||
subgraph SharedMem [Shared Memory]
|
||||
subgraph SharedMem [Data Buffer]
|
||||
ExecState["Execution State<br/>(private)"]
|
||||
StreamState["Stream State<br/>(shared within stream)"]
|
||||
GlobalState["Global State<br/>(shared across all)"]
|
||||
@@ -376,7 +376,7 @@ flowchart TB
|
||||
%% =========================================
|
||||
subgraph PromptOnion [System Prompt — 3-Layer Onion]
|
||||
Layer1["Layer 1 — Identity<br/>(static, never changes)"]
|
||||
Layer2["Layer 2 — Narrative<br/>(auto-built from<br/>SharedMemory +<br/>execution path)"]
|
||||
Layer2["Layer 2 — Narrative<br/>(auto-built from<br/>DataBuffer +<br/>execution path)"]
|
||||
Layer3["Layer 3 — Focus<br/>(current node's<br/>system_prompt)"]
|
||||
end
|
||||
|
||||
@@ -410,11 +410,11 @@ flowchart TB
|
||||
|
||||
**2. Judge feedback becomes conversation memory.** When the judge issues a RETRY verdict with feedback, that feedback is injected as a `[Judge feedback]: ...` user message into the conversation. On the next LLM turn, the agent sees its prior attempt, the judge's critique, and can adjust. This is the core reflexion mechanism — in-context learning without model retraining.
|
||||
|
||||
**3. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `SharedMemory.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.
|
||||
**3. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `DataBuffer.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.
|
||||
|
||||
**4. Phase transitions inject structured reflection.** When execution moves between nodes, a transition marker is inserted into the conversation containing: what phase completed, all outputs in memory, available data files, available tools, and an explicit reflection prompt: *"Before proceeding, briefly reflect: what went well in the previous phase? Are there any gaps or surprises worth noting?"* This engineered metacognition surfaces issues before they compound.
|
||||
|
||||
**5. Shared memory connects phases.** On ACCEPT, the accumulator's outputs are written to `SharedMemory`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.
|
||||
**5. Data buffer connects phases.** On ACCEPT, the accumulator's outputs are written to `DataBuffer`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.
|
||||
|
||||
### The Judge Evaluation Pipeline
|
||||
|
||||
@@ -773,8 +773,8 @@ The system architecture (see diagram above) maps onto four logical layers. The *
|
||||
│ ┌─────────────────────────────────────────────────────────────┐ │
|
||||
│ │ EXECUTION LAYER (Worker Bees) │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
||||
│ │ │ Graph │───►│ Active │───►│ Shared │ │ │
|
||||
│ │ │ Executor │ │ Node │ │ Memory │ │ │
|
||||
│ │ │ Graph │───►│ Active │───►│ Data │ │ │
|
||||
│ │ │ Executor │ │ Node │ │ Buffer │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
|
||||
│ │ Event Loop Node delegates │ to Sub-Agents (parallel) │ │
|
||||
│ │ Sub-Agents: read-only memory │ SubagentJudge │ report_to_parent│ │
|
||||
@@ -1057,8 +1057,8 @@ class SignalWeights:
|
||||
| **Rule Generation** | Research | Transforming human decisions into deterministic rules (closing the loop) |
|
||||
| **HybridJudge** | Engineering | Implementation of triangulation with priority-ordered evaluation |
|
||||
| **Reflexion Loop** | Engineering | Worker-Judge architecture with RETRY/REPLAN/ESCALATE |
|
||||
| **Memory Reflection** | Engineering | 3-layer prompt onion, judge feedback injection, shared memory |
|
||||
| **Graph Execution** | Engineering | Node composition, shared memory, edge traversal, sub-agent delegation |
|
||||
| **Memory Reflection** | Engineering | 3-layer prompt onion, judge feedback injection, data buffer |
|
||||
| **Graph Execution** | Engineering | Node composition, data buffer, edge traversal, sub-agent delegation |
|
||||
| **HITL Protocol** | Engineering | Pause/resume, approval workflows, escalation handling |
|
||||
|
||||
---
|
||||
@@ -1075,7 +1075,7 @@ The Hive Agent Framework addresses the fundamental reliability crisis in agentic
|
||||
|
||||
4. **The Foundation**: Goal-driven architecture ensures we're optimizing for user intent, not metric gaming. The reflexion loop between Worker Bees and Judge enables learning from failure without expensive search.
|
||||
|
||||
5. **The Memory System**: Agents reflect through three mechanisms — the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from shared memory), and structured phase transition markers with explicit reflection prompts at node boundaries.
|
||||
5. **The Memory System**: Agents reflect through three mechanisms — the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from the data buffer), and structured phase transition markers with explicit reflection prompts at node boundaries.
|
||||
|
||||
6. **The Learning Path**: Human escalations aren't just fallbacks—they're training signals. Confidence calibration tunes thresholds automatically. Rule generation transforms repeated human decisions into deterministic automation.
|
||||
|
||||
|
||||
@@ -141,9 +141,9 @@ Compare to proper state management:
|
||||
|
||||
```python
|
||||
# Isolated test - no external dependencies
|
||||
memory = manager.create_memory("test-exec", "test-stream", IsolationLevel.ISOLATED)
|
||||
await memory.write("key", "value")
|
||||
assert await memory.read("key") == "value"
|
||||
buf = manager.create_buffer("test-exec", "test-stream", IsolationLevel.ISOLATED)
|
||||
await buf.write("key", "value")
|
||||
assert await buf.read("key") == "value"
|
||||
# Other tests unaffected
|
||||
```
|
||||
|
||||
@@ -169,7 +169,7 @@ The new architecture introduces explicit state management with proper isolation:
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||
│ └────────────────┼────────────────┘ │
|
||||
│ ↓ │
|
||||
│ SharedStateManager │
|
||||
│ SharedBufferManager │
|
||||
│ (Isolation Levels) │
|
||||
│ │
|
||||
│ OutcomeAggregator │
|
||||
@@ -179,7 +179,7 @@ The new architecture introduces explicit state management with proper isolation:
|
||||
|
||||
### Key Components
|
||||
|
||||
#### 1. SharedStateManager with Isolation Levels
|
||||
#### 1. SharedBufferManager with Isolation Levels
|
||||
|
||||
```python
|
||||
class IsolationLevel(Enum):
|
||||
@@ -272,11 +272,11 @@ The key distinction:
|
||||
|
||||
| Use Case | Correct Approach |
|
||||
| ------------------------------------ | --------------------------------- |
|
||||
| Coordinate between executions | SharedStateManager |
|
||||
| Coordinate between executions | SharedBufferManager |
|
||||
| Track decision outcomes | StreamRuntime + OutcomeAggregator |
|
||||
| Call external API | Tool |
|
||||
| Persist business data | Tool (to external storage) |
|
||||
| Share scratch state during execution | StreamMemory |
|
||||
| Share scratch state during execution | StreamBuffer |
|
||||
| Publish events to other streams | EventBus |
|
||||
|
||||
---
|
||||
@@ -332,6 +332,6 @@ The multi-entry-point architecture doesn't just enable concurrent execution—it
|
||||
## References
|
||||
|
||||
- [core/framework/runtime/agent_runtime.py](../../core/framework/runtime/agent_runtime.py) - AgentRuntime implementation
|
||||
- [core/framework/runtime/shared_state.py](../../core/framework/runtime/shared_state.py) - SharedStateManager
|
||||
- [core/framework/runtime/shared_state.py](../../core/framework/runtime/shared_state.py) - SharedBufferManager
|
||||
- [core/framework/runtime/outcome_aggregator.py](../../core/framework/runtime/outcome_aggregator.py) - Cross-stream goal evaluation
|
||||
- [core/framework/runtime/tests/test_agent_runtime.py](../../core/framework/runtime/tests/test_agent_runtime.py) - Test examples
|
||||
|
||||
@@ -228,7 +228,7 @@ AgentRuntime.__init__(...) (line 118)
|
||||
├─ Initialize SessionStore for unified sessions [line 182]
|
||||
│
|
||||
├─ Initialize shared components:
|
||||
│ ├─ SharedStateManager [line 185]
|
||||
│ ├─ SharedBufferManager [line 185]
|
||||
│ ├─ EventBus (or use shared one) [line 186]
|
||||
│ └─ OutcomeAggregator [line 187]
|
||||
│
|
||||
@@ -411,8 +411,8 @@ await _run_execution(ctx) (line 538)
|
||||
│
|
||||
├─ Mark status as "running" [line 559]
|
||||
│
|
||||
├─ Create execution-scoped memory [line 572-576]
|
||||
│ └─ self._state_manager.create_memory(execution_id, stream_id, isolation)
|
||||
├─ Create execution-scoped buffer [line 572-576]
|
||||
│ └─ self._state_manager.create_buffer(execution_id, stream_id, isolation)
|
||||
│
|
||||
├─ Start runtime adapter [line 579-586]
|
||||
│ └─ runtime_adapter.start_run(goal_id, goal_description, input_data)
|
||||
@@ -480,7 +480,7 @@ await executor.execute(graph, goal, input_data, session_state, checkpoint_config
|
||||
│
|
||||
├─ Validate tool availability [line 320-332]
|
||||
│
|
||||
├─ Initialize SharedMemory for session [line 335]
|
||||
├─ Initialize DataBuffer for session [line 335]
|
||||
│
|
||||
├─ Restore session state if resuming [line 353-369]
|
||||
│ └─ Load memory from previous session
|
||||
@@ -576,7 +576,7 @@ Shared Component: LLM Provider
|
||||
|
||||
Memory Flow:
|
||||
├─ Each execution has ExecutionContext with input_data
|
||||
├─ SharedMemory created per execution (line 572-576 in execution_stream.py)
|
||||
├─ DataBuffer created per execution (line 572-576 in execution_stream.py)
|
||||
├─ Session state restored if resuming (line 354-369 in executor.py)
|
||||
├─ Each node reads from memory via input_keys
|
||||
├─ Each node writes to memory via output_keys
|
||||
|
||||
Reference in New Issue
Block a user