fix: isolate session loading

This commit is contained in:
Timothy
2026-02-24 11:02:58 -08:00
parent 28a71b70a8
commit 3963855d1d
6 changed files with 96 additions and 18910 deletions
+48
View File
@@ -328,6 +328,20 @@ class LiteLLMProvider(LLMProvider):
f"Full request dumped to: {dump_path}" f"Full request dumped to: {dump_path}"
) )
# finish_reason=length means the model exhausted max_tokens
# before producing content. Retrying with the same max_tokens
# will never help — return immediately instead of looping.
if finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[retry] {model} returned empty content with "
f"finish_reason=length (max_tokens={max_tok}). "
f"The model exhausted its token budget before "
f"producing visible output. Increase max_tokens "
f"or use a different model. Not retrying."
)
return response
if attempt == retries: if attempt == retries:
logger.error( logger.error(
f"[retry] GAVE UP on {model} after {retries + 1} " f"[retry] GAVE UP on {model} after {retries + 1} "
@@ -621,6 +635,20 @@ class LiteLLMProvider(LLMProvider):
f"Full request dumped to: {dump_path}" f"Full request dumped to: {dump_path}"
) )
# finish_reason=length means the model exhausted max_tokens
# before producing content. Retrying with the same max_tokens
# will never help — return immediately instead of looping.
if finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[async-retry] {model} returned empty content with "
f"finish_reason=length (max_tokens={max_tok}). "
f"The model exhausted its token budget before "
f"producing visible output. Increase max_tokens "
f"or use a different model. Not retrying."
)
return response
if attempt == retries: if attempt == retries:
logger.error( logger.error(
f"[async-retry] GAVE UP on {model} after {retries + 1} " f"[async-retry] GAVE UP on {model} after {retries + 1} "
@@ -903,6 +931,7 @@ class LiteLLMProvider(LLMProvider):
tool_calls_acc: dict[int, dict[str, str]] = {} tool_calls_acc: dict[int, dict[str, str]] = {}
input_tokens = 0 input_tokens = 0
output_tokens = 0 output_tokens = 0
stream_finish_reason: str | None = None
try: try:
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr] response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
@@ -938,6 +967,7 @@ class LiteLLMProvider(LLMProvider):
# --- Finish --- # --- Finish ---
if choice.finish_reason: if choice.finish_reason:
stream_finish_reason = choice.finish_reason
for _idx, tc_data in sorted(tool_calls_acc.items()): for _idx, tc_data in sorted(tool_calls_acc.items()):
try: try:
parsed_args = json.loads(tc_data["arguments"]) parsed_args = json.loads(tc_data["arguments"])
@@ -992,6 +1022,24 @@ class LiteLLMProvider(LLMProvider):
for event in tail_events: for event in tail_events:
yield event yield event
return return
# finish_reason=length means the model exhausted
# max_tokens before producing content. Retrying with
# the same max_tokens will never help.
if stream_finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[stream] {self.model} returned empty content "
f"with finish_reason=length "
f"(max_tokens={max_tok}). The model exhausted "
f"its token budget before producing visible "
f"output. Increase max_tokens or use a "
f"different model. Not retrying."
)
for event in tail_events:
yield event
return
wait = _compute_retry_delay(attempt) wait = _compute_retry_delay(attempt)
token_count, token_method = _estimate_tokens( token_count, token_method = _estimate_tokens(
self.model, self.model,
+19 -15
View File
@@ -61,7 +61,7 @@ async def _extract_subgraph_steps(nodes: list, llm: Any) -> None:
response = await llm.acomplete( response = await llm.acomplete(
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
max_tokens=1000, max_tokens=4096,
json_mode=True, json_mode=True,
) )
@@ -172,13 +172,6 @@ class AgentManager:
if runner._agent_runtime is None: if runner._agent_runtime is None:
await loop.run_in_executor(None, runner._setup) await loop.run_in_executor(None, runner._setup)
# Extract subgraph steps for frontend visualization (non-critical)
if runner.graph and runner._llm:
try:
await _extract_subgraph_steps(runner.graph.nodes, runner._llm)
except Exception as e:
logger.warning(f"Subgraph extraction skipped: {e}")
runtime = runner._agent_runtime runtime = runner._agent_runtime
# Start runtime on event loop # Start runtime on event loop
@@ -224,6 +217,9 @@ class AgentManager:
- **Judge**: timer-driven background GraphExecutor (silent monitoring) - **Judge**: timer-driven background GraphExecutor (silent monitoring)
- **Worker**: the existing AgentRuntime (unchanged) - **Worker**: the existing AgentRuntime (unchanged)
""" """
import uuid
from datetime import datetime
from framework.graph.executor import GraphExecutor from framework.graph.executor import GraphExecutor
from framework.monitoring import judge_goal, judge_graph from framework.monitoring import judge_goal, judge_graph
from framework.runner.tool_registry import ToolRegistry from framework.runner.tool_registry import ToolRegistry
@@ -238,6 +234,12 @@ class AgentManager:
event_bus = runtime._event_bus event_bus = runtime._event_bus
llm = runtime._llm llm = runtime._llm
# Generate a shared session ID for queen, judge, and worker.
# All three use the same ID so conversations are scoped to this
# agent load and start fresh each time.
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
# 1. Monitoring tools — standalone registry, NOT merged into worker # 1. Monitoring tools — standalone registry, NOT merged into worker
monitoring_registry = ToolRegistry() monitoring_registry = ToolRegistry()
register_worker_monitoring_tools( register_worker_monitoring_tools(
@@ -247,14 +249,15 @@ class AgentManager:
worker_graph_id=runtime._graph_id, worker_graph_id=runtime._graph_id,
) )
# 2. Storage dirs # 2. Storage dirs — scoped by session_id so each agent load
judge_dir = storage_path / "graphs" / "worker_health_judge" / "session" # gets fresh queen/judge conversations.
judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
judge_dir.mkdir(parents=True, exist_ok=True) judge_dir.mkdir(parents=True, exist_ok=True)
queen_dir = storage_path / "graphs" / "queen" / "session" queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
queen_dir.mkdir(parents=True, exist_ok=True) queen_dir.mkdir(parents=True, exist_ok=True)
# 3. Health judge — background task, fires every 2 minutes # 3. Health judge — background task, fires every 2 minutes
judge_runtime = Runtime(storage_path / "graphs" / "worker_health_judge") judge_runtime = Runtime(storage_path / "graphs" / "judge")
monitoring_tools = list(monitoring_registry.get_tools().values()) monitoring_tools = list(monitoring_registry.get_tools().values())
monitoring_executor = monitoring_registry.get_executor() monitoring_executor = monitoring_registry.get_executor()
@@ -272,7 +275,7 @@ class AgentManager:
tools=monitoring_tools, tools=monitoring_tools,
tool_executor=monitoring_executor, tool_executor=monitoring_executor,
event_bus=event_bus, event_bus=event_bus,
stream_id="worker_health_judge", stream_id="judge",
storage_path=judge_dir, storage_path=judge_dir,
loop_config=judge_graph.loop_config, loop_config=judge_graph.loop_config,
) )
@@ -282,7 +285,7 @@ class AgentManager:
input_data={ input_data={
"event": {"source": "timer", "reason": "scheduled"}, "event": {"source": "timer", "reason": "scheduled"},
}, },
session_state={"resume_session_id": "persistent"}, session_state={"resume_session_id": session_id},
) )
except Exception: except Exception:
logger.error("Health judge tick failed", exc_info=True) logger.error("Health judge tick failed", exc_info=True)
@@ -300,6 +303,7 @@ class AgentManager:
worker_runtime=runtime, worker_runtime=runtime,
event_bus=event_bus, event_bus=event_bus,
storage_path=storage_path, storage_path=storage_path,
session_id=session_id,
) )
register_worker_monitoring_tools( register_worker_monitoring_tools(
queen_registry, queen_registry,
@@ -365,7 +369,7 @@ class AgentManager:
graph=queen_graph, graph=queen_graph,
goal=queen_goal, goal=queen_goal,
input_data={"greeting": "Session started."}, input_data={"greeting": "Session started."},
session_state={"resume_session_id": "persistent"}, session_state={"resume_session_id": session_id},
) )
logger.warning("Queen executor returned (should be forever-alive)") logger.warning("Queen executor returned (should be forever-alive)")
except Exception: except Exception:
+11 -1
View File
@@ -36,9 +36,14 @@ def register_queen_lifecycle_tools(
worker_runtime: AgentRuntime, worker_runtime: AgentRuntime,
event_bus: EventBus, event_bus: EventBus,
storage_path: Path | None = None, storage_path: Path | None = None,
session_id: str | None = None,
) -> int: ) -> int:
"""Register queen lifecycle tools bound to *worker_runtime*. """Register queen lifecycle tools bound to *worker_runtime*.
Args:
session_id: Shared session ID so the worker uses the same session
scope as the queen and judge.
Returns the number of tools registered. Returns the number of tools registered.
""" """
from framework.llm.provider import Tool from framework.llm.provider import Tool
@@ -55,7 +60,12 @@ def register_queen_lifecycle_tools(
""" """
try: try:
# Get session state from any prior execution for memory continuity # Get session state from any prior execution for memory continuity
session_state = worker_runtime._get_primary_session_state("default") session_state = worker_runtime._get_primary_session_state("default") or {}
# Use the shared session ID so queen, judge, and worker all
# scope their conversations to the same session.
if session_id:
session_state["resume_session_id"] = session_id
exec_id = await worker_runtime.trigger( exec_id = await worker_runtime.trigger(
entry_point_id="default", entry_point_id="default",
+13 -9
View File
@@ -468,6 +468,8 @@ class AdenTUI(App):
into the worker runtime. The worker is completely untouched. into the worker runtime. The worker is completely untouched.
""" """
import asyncio import asyncio
import uuid
from datetime import datetime
from pathlib import Path from pathlib import Path
from framework.graph.executor import GraphExecutor from framework.graph.executor import GraphExecutor
@@ -486,6 +488,10 @@ class AdenTUI(App):
llm = self.runtime._llm llm = self.runtime._llm
agent_loop = self.chat_repl._agent_loop agent_loop = self.chat_repl._agent_loop
# Generate a shared session ID for queen, judge, and worker.
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
# 1. Monitoring tools (health summary, emit ticket, notify operator). # 1. Monitoring tools (health summary, emit ticket, notify operator).
# Registered on a standalone registry — NOT merged into the worker. # Registered on a standalone registry — NOT merged into the worker.
monitoring_registry = ToolRegistry() monitoring_registry = ToolRegistry()
@@ -496,11 +502,11 @@ class AdenTUI(App):
worker_graph_id=self.runtime._graph_id, worker_graph_id=self.runtime._graph_id,
) )
# 2. Storage dirs — under worker's base path but completely owned # 2. Storage dirs — scoped by session_id so each agent load
# by the judge/queen. Worker never writes here. # gets fresh queen/judge conversations.
judge_dir = storage_path / "graphs" / "judge" / "session" judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
judge_dir.mkdir(parents=True, exist_ok=True) judge_dir.mkdir(parents=True, exist_ok=True)
queen_dir = storage_path / "graphs" / "queen" / "session" queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
queen_dir.mkdir(parents=True, exist_ok=True) queen_dir.mkdir(parents=True, exist_ok=True)
# --------------------------------------------------------------- # ---------------------------------------------------------------
@@ -542,7 +548,7 @@ class AdenTUI(App):
input_data={ input_data={
"event": {"source": "timer", "reason": "scheduled"}, "event": {"source": "timer", "reason": "scheduled"},
}, },
session_state={"resume_session_id": "persistent"}, session_state={"resume_session_id": session_id},
) )
except Exception: except Exception:
log.error("Health judge tick failed", exc_info=True) log.error("Health judge tick failed", exc_info=True)
@@ -584,6 +590,7 @@ class AdenTUI(App):
worker_runtime=self.runtime, worker_runtime=self.runtime,
event_bus=event_bus, event_bus=event_bus,
storage_path=storage_path, storage_path=storage_path,
session_id=session_id,
) )
register_worker_monitoring_tools( register_worker_monitoring_tools(
queen_registry, queen_registry,
@@ -596,9 +603,6 @@ class AdenTUI(App):
queen_tool_executor = queen_registry.get_executor() queen_tool_executor = queen_registry.get_executor()
# Build worker identity to inject into the queen's system prompt. # Build worker identity to inject into the queen's system prompt.
# This must be in the system prompt (not input_data) because
# persistent sessions restore the old conversation and skip
# _build_initial_message — the queen would lose context.
worker_graph_id = self.runtime._graph_id worker_graph_id = self.runtime._graph_id
worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id) worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
worker_goal_desc = getattr(self.runtime.goal, "description", "") worker_goal_desc = getattr(self.runtime.goal, "description", "")
@@ -657,7 +661,7 @@ class AdenTUI(App):
graph=queen_graph, graph=queen_graph,
goal=queen_goal, goal=queen_goal,
input_data={"greeting": "Session started."}, input_data={"greeting": "Session started."},
session_state={"resume_session_id": "persistent"}, session_state={"resume_session_id": session_id},
) )
# Should never reach here — queen is forever-alive. # Should never reach here — queen is forever-alive.
log.warning( log.warning(
+1 -1
View File
@@ -569,7 +569,7 @@ export default function Workspace() {
const streamId = event.stream_id; const streamId = event.stream_id;
// Suppress judge events (silent background monitoring) // Suppress judge events (silent background monitoring)
if (streamId === "worker_health_judge") return; if (streamId === "judge") return;
// Determine if this is a queen event // Determine if this is a queen event
const isQueen = streamId === "queen"; const isQueen = streamId === "queen";
+4 -18884
View File
File diff suppressed because it is too large Load Diff