fix: isolate session loading
This commit is contained in:
@@ -328,6 +328,20 @@ class LiteLLMProvider(LLMProvider):
|
||||
f"Full request dumped to: {dump_path}"
|
||||
)
|
||||
|
||||
# finish_reason=length means the model exhausted max_tokens
|
||||
# before producing content. Retrying with the same max_tokens
|
||||
# will never help — return immediately instead of looping.
|
||||
if finish_reason == "length":
|
||||
max_tok = kwargs.get("max_tokens", "unset")
|
||||
logger.error(
|
||||
f"[retry] {model} returned empty content with "
|
||||
f"finish_reason=length (max_tokens={max_tok}). "
|
||||
f"The model exhausted its token budget before "
|
||||
f"producing visible output. Increase max_tokens "
|
||||
f"or use a different model. Not retrying."
|
||||
)
|
||||
return response
|
||||
|
||||
if attempt == retries:
|
||||
logger.error(
|
||||
f"[retry] GAVE UP on {model} after {retries + 1} "
|
||||
@@ -621,6 +635,20 @@ class LiteLLMProvider(LLMProvider):
|
||||
f"Full request dumped to: {dump_path}"
|
||||
)
|
||||
|
||||
# finish_reason=length means the model exhausted max_tokens
|
||||
# before producing content. Retrying with the same max_tokens
|
||||
# will never help — return immediately instead of looping.
|
||||
if finish_reason == "length":
|
||||
max_tok = kwargs.get("max_tokens", "unset")
|
||||
logger.error(
|
||||
f"[async-retry] {model} returned empty content with "
|
||||
f"finish_reason=length (max_tokens={max_tok}). "
|
||||
f"The model exhausted its token budget before "
|
||||
f"producing visible output. Increase max_tokens "
|
||||
f"or use a different model. Not retrying."
|
||||
)
|
||||
return response
|
||||
|
||||
if attempt == retries:
|
||||
logger.error(
|
||||
f"[async-retry] GAVE UP on {model} after {retries + 1} "
|
||||
@@ -903,6 +931,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
tool_calls_acc: dict[int, dict[str, str]] = {}
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
stream_finish_reason: str | None = None
|
||||
|
||||
try:
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
@@ -938,6 +967,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
|
||||
# --- Finish ---
|
||||
if choice.finish_reason:
|
||||
stream_finish_reason = choice.finish_reason
|
||||
for _idx, tc_data in sorted(tool_calls_acc.items()):
|
||||
try:
|
||||
parsed_args = json.loads(tc_data["arguments"])
|
||||
@@ -992,6 +1022,24 @@ class LiteLLMProvider(LLMProvider):
|
||||
for event in tail_events:
|
||||
yield event
|
||||
return
|
||||
|
||||
# finish_reason=length means the model exhausted
|
||||
# max_tokens before producing content. Retrying with
|
||||
# the same max_tokens will never help.
|
||||
if stream_finish_reason == "length":
|
||||
max_tok = kwargs.get("max_tokens", "unset")
|
||||
logger.error(
|
||||
f"[stream] {self.model} returned empty content "
|
||||
f"with finish_reason=length "
|
||||
f"(max_tokens={max_tok}). The model exhausted "
|
||||
f"its token budget before producing visible "
|
||||
f"output. Increase max_tokens or use a "
|
||||
f"different model. Not retrying."
|
||||
)
|
||||
for event in tail_events:
|
||||
yield event
|
||||
return
|
||||
|
||||
wait = _compute_retry_delay(attempt)
|
||||
token_count, token_method = _estimate_tokens(
|
||||
self.model,
|
||||
|
||||
@@ -61,7 +61,7 @@ async def _extract_subgraph_steps(nodes: list, llm: Any) -> None:
|
||||
|
||||
response = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=1000,
|
||||
max_tokens=4096,
|
||||
json_mode=True,
|
||||
)
|
||||
|
||||
@@ -172,13 +172,6 @@ class AgentManager:
|
||||
if runner._agent_runtime is None:
|
||||
await loop.run_in_executor(None, runner._setup)
|
||||
|
||||
# Extract subgraph steps for frontend visualization (non-critical)
|
||||
if runner.graph and runner._llm:
|
||||
try:
|
||||
await _extract_subgraph_steps(runner.graph.nodes, runner._llm)
|
||||
except Exception as e:
|
||||
logger.warning(f"Subgraph extraction skipped: {e}")
|
||||
|
||||
runtime = runner._agent_runtime
|
||||
|
||||
# Start runtime on event loop
|
||||
@@ -224,6 +217,9 @@ class AgentManager:
|
||||
- **Judge**: timer-driven background GraphExecutor (silent monitoring)
|
||||
- **Worker**: the existing AgentRuntime (unchanged)
|
||||
"""
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.monitoring import judge_goal, judge_graph
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
@@ -238,6 +234,12 @@ class AgentManager:
|
||||
event_bus = runtime._event_bus
|
||||
llm = runtime._llm
|
||||
|
||||
# Generate a shared session ID for queen, judge, and worker.
|
||||
# All three use the same ID so conversations are scoped to this
|
||||
# agent load and start fresh each time.
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# 1. Monitoring tools — standalone registry, NOT merged into worker
|
||||
monitoring_registry = ToolRegistry()
|
||||
register_worker_monitoring_tools(
|
||||
@@ -247,14 +249,15 @@ class AgentManager:
|
||||
worker_graph_id=runtime._graph_id,
|
||||
)
|
||||
|
||||
# 2. Storage dirs
|
||||
judge_dir = storage_path / "graphs" / "worker_health_judge" / "session"
|
||||
# 2. Storage dirs — scoped by session_id so each agent load
|
||||
# gets fresh queen/judge conversations.
|
||||
judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
|
||||
judge_dir.mkdir(parents=True, exist_ok=True)
|
||||
queen_dir = storage_path / "graphs" / "queen" / "session"
|
||||
queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
|
||||
queen_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 3. Health judge — background task, fires every 2 minutes
|
||||
judge_runtime = Runtime(storage_path / "graphs" / "worker_health_judge")
|
||||
judge_runtime = Runtime(storage_path / "graphs" / "judge")
|
||||
monitoring_tools = list(monitoring_registry.get_tools().values())
|
||||
monitoring_executor = monitoring_registry.get_executor()
|
||||
|
||||
@@ -272,7 +275,7 @@ class AgentManager:
|
||||
tools=monitoring_tools,
|
||||
tool_executor=monitoring_executor,
|
||||
event_bus=event_bus,
|
||||
stream_id="worker_health_judge",
|
||||
stream_id="judge",
|
||||
storage_path=judge_dir,
|
||||
loop_config=judge_graph.loop_config,
|
||||
)
|
||||
@@ -282,7 +285,7 @@ class AgentManager:
|
||||
input_data={
|
||||
"event": {"source": "timer", "reason": "scheduled"},
|
||||
},
|
||||
session_state={"resume_session_id": "persistent"},
|
||||
session_state={"resume_session_id": session_id},
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Health judge tick failed", exc_info=True)
|
||||
@@ -300,6 +303,7 @@ class AgentManager:
|
||||
worker_runtime=runtime,
|
||||
event_bus=event_bus,
|
||||
storage_path=storage_path,
|
||||
session_id=session_id,
|
||||
)
|
||||
register_worker_monitoring_tools(
|
||||
queen_registry,
|
||||
@@ -365,7 +369,7 @@ class AgentManager:
|
||||
graph=queen_graph,
|
||||
goal=queen_goal,
|
||||
input_data={"greeting": "Session started."},
|
||||
session_state={"resume_session_id": "persistent"},
|
||||
session_state={"resume_session_id": session_id},
|
||||
)
|
||||
logger.warning("Queen executor returned (should be forever-alive)")
|
||||
except Exception:
|
||||
|
||||
@@ -36,9 +36,14 @@ def register_queen_lifecycle_tools(
|
||||
worker_runtime: AgentRuntime,
|
||||
event_bus: EventBus,
|
||||
storage_path: Path | None = None,
|
||||
session_id: str | None = None,
|
||||
) -> int:
|
||||
"""Register queen lifecycle tools bound to *worker_runtime*.
|
||||
|
||||
Args:
|
||||
session_id: Shared session ID so the worker uses the same session
|
||||
scope as the queen and judge.
|
||||
|
||||
Returns the number of tools registered.
|
||||
"""
|
||||
from framework.llm.provider import Tool
|
||||
@@ -55,7 +60,12 @@ def register_queen_lifecycle_tools(
|
||||
"""
|
||||
try:
|
||||
# Get session state from any prior execution for memory continuity
|
||||
session_state = worker_runtime._get_primary_session_state("default")
|
||||
session_state = worker_runtime._get_primary_session_state("default") or {}
|
||||
|
||||
# Use the shared session ID so queen, judge, and worker all
|
||||
# scope their conversations to the same session.
|
||||
if session_id:
|
||||
session_state["resume_session_id"] = session_id
|
||||
|
||||
exec_id = await worker_runtime.trigger(
|
||||
entry_point_id="default",
|
||||
|
||||
@@ -468,6 +468,8 @@ class AdenTUI(App):
|
||||
into the worker runtime. The worker is completely untouched.
|
||||
"""
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from framework.graph.executor import GraphExecutor
|
||||
@@ -486,6 +488,10 @@ class AdenTUI(App):
|
||||
llm = self.runtime._llm
|
||||
agent_loop = self.chat_repl._agent_loop
|
||||
|
||||
# Generate a shared session ID for queen, judge, and worker.
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# 1. Monitoring tools (health summary, emit ticket, notify operator).
|
||||
# Registered on a standalone registry — NOT merged into the worker.
|
||||
monitoring_registry = ToolRegistry()
|
||||
@@ -496,11 +502,11 @@ class AdenTUI(App):
|
||||
worker_graph_id=self.runtime._graph_id,
|
||||
)
|
||||
|
||||
# 2. Storage dirs — under worker's base path but completely owned
|
||||
# by the judge/queen. Worker never writes here.
|
||||
judge_dir = storage_path / "graphs" / "judge" / "session"
|
||||
# 2. Storage dirs — scoped by session_id so each agent load
|
||||
# gets fresh queen/judge conversations.
|
||||
judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
|
||||
judge_dir.mkdir(parents=True, exist_ok=True)
|
||||
queen_dir = storage_path / "graphs" / "queen" / "session"
|
||||
queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
|
||||
queen_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
@@ -542,7 +548,7 @@ class AdenTUI(App):
|
||||
input_data={
|
||||
"event": {"source": "timer", "reason": "scheduled"},
|
||||
},
|
||||
session_state={"resume_session_id": "persistent"},
|
||||
session_state={"resume_session_id": session_id},
|
||||
)
|
||||
except Exception:
|
||||
log.error("Health judge tick failed", exc_info=True)
|
||||
@@ -584,6 +590,7 @@ class AdenTUI(App):
|
||||
worker_runtime=self.runtime,
|
||||
event_bus=event_bus,
|
||||
storage_path=storage_path,
|
||||
session_id=session_id,
|
||||
)
|
||||
register_worker_monitoring_tools(
|
||||
queen_registry,
|
||||
@@ -596,9 +603,6 @@ class AdenTUI(App):
|
||||
queen_tool_executor = queen_registry.get_executor()
|
||||
|
||||
# Build worker identity to inject into the queen's system prompt.
|
||||
# This must be in the system prompt (not input_data) because
|
||||
# persistent sessions restore the old conversation and skip
|
||||
# _build_initial_message — the queen would lose context.
|
||||
worker_graph_id = self.runtime._graph_id
|
||||
worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
|
||||
worker_goal_desc = getattr(self.runtime.goal, "description", "")
|
||||
@@ -657,7 +661,7 @@ class AdenTUI(App):
|
||||
graph=queen_graph,
|
||||
goal=queen_goal,
|
||||
input_data={"greeting": "Session started."},
|
||||
session_state={"resume_session_id": "persistent"},
|
||||
session_state={"resume_session_id": session_id},
|
||||
)
|
||||
# Should never reach here — queen is forever-alive.
|
||||
log.warning(
|
||||
|
||||
@@ -569,7 +569,7 @@ export default function Workspace() {
|
||||
const streamId = event.stream_id;
|
||||
|
||||
// Suppress judge events (silent background monitoring)
|
||||
if (streamId === "worker_health_judge") return;
|
||||
if (streamId === "judge") return;
|
||||
|
||||
// Determine if this is a queen event
|
||||
const isQueen = streamId === "queen";
|
||||
|
||||
Generated
+4
-18884
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user