fix: isolate session loading
This commit is contained in:
@@ -328,6 +328,20 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
f"Full request dumped to: {dump_path}"
|
f"Full request dumped to: {dump_path}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# finish_reason=length means the model exhausted max_tokens
|
||||||
|
# before producing content. Retrying with the same max_tokens
|
||||||
|
# will never help — return immediately instead of looping.
|
||||||
|
if finish_reason == "length":
|
||||||
|
max_tok = kwargs.get("max_tokens", "unset")
|
||||||
|
logger.error(
|
||||||
|
f"[retry] {model} returned empty content with "
|
||||||
|
f"finish_reason=length (max_tokens={max_tok}). "
|
||||||
|
f"The model exhausted its token budget before "
|
||||||
|
f"producing visible output. Increase max_tokens "
|
||||||
|
f"or use a different model. Not retrying."
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
if attempt == retries:
|
if attempt == retries:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[retry] GAVE UP on {model} after {retries + 1} "
|
f"[retry] GAVE UP on {model} after {retries + 1} "
|
||||||
@@ -621,6 +635,20 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
f"Full request dumped to: {dump_path}"
|
f"Full request dumped to: {dump_path}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# finish_reason=length means the model exhausted max_tokens
|
||||||
|
# before producing content. Retrying with the same max_tokens
|
||||||
|
# will never help — return immediately instead of looping.
|
||||||
|
if finish_reason == "length":
|
||||||
|
max_tok = kwargs.get("max_tokens", "unset")
|
||||||
|
logger.error(
|
||||||
|
f"[async-retry] {model} returned empty content with "
|
||||||
|
f"finish_reason=length (max_tokens={max_tok}). "
|
||||||
|
f"The model exhausted its token budget before "
|
||||||
|
f"producing visible output. Increase max_tokens "
|
||||||
|
f"or use a different model. Not retrying."
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
if attempt == retries:
|
if attempt == retries:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[async-retry] GAVE UP on {model} after {retries + 1} "
|
f"[async-retry] GAVE UP on {model} after {retries + 1} "
|
||||||
@@ -903,6 +931,7 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
tool_calls_acc: dict[int, dict[str, str]] = {}
|
tool_calls_acc: dict[int, dict[str, str]] = {}
|
||||||
input_tokens = 0
|
input_tokens = 0
|
||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
|
stream_finish_reason: str | None = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||||
@@ -938,6 +967,7 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
|
|
||||||
# --- Finish ---
|
# --- Finish ---
|
||||||
if choice.finish_reason:
|
if choice.finish_reason:
|
||||||
|
stream_finish_reason = choice.finish_reason
|
||||||
for _idx, tc_data in sorted(tool_calls_acc.items()):
|
for _idx, tc_data in sorted(tool_calls_acc.items()):
|
||||||
try:
|
try:
|
||||||
parsed_args = json.loads(tc_data["arguments"])
|
parsed_args = json.loads(tc_data["arguments"])
|
||||||
@@ -992,6 +1022,24 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
for event in tail_events:
|
for event in tail_events:
|
||||||
yield event
|
yield event
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# finish_reason=length means the model exhausted
|
||||||
|
# max_tokens before producing content. Retrying with
|
||||||
|
# the same max_tokens will never help.
|
||||||
|
if stream_finish_reason == "length":
|
||||||
|
max_tok = kwargs.get("max_tokens", "unset")
|
||||||
|
logger.error(
|
||||||
|
f"[stream] {self.model} returned empty content "
|
||||||
|
f"with finish_reason=length "
|
||||||
|
f"(max_tokens={max_tok}). The model exhausted "
|
||||||
|
f"its token budget before producing visible "
|
||||||
|
f"output. Increase max_tokens or use a "
|
||||||
|
f"different model. Not retrying."
|
||||||
|
)
|
||||||
|
for event in tail_events:
|
||||||
|
yield event
|
||||||
|
return
|
||||||
|
|
||||||
wait = _compute_retry_delay(attempt)
|
wait = _compute_retry_delay(attempt)
|
||||||
token_count, token_method = _estimate_tokens(
|
token_count, token_method = _estimate_tokens(
|
||||||
self.model,
|
self.model,
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ async def _extract_subgraph_steps(nodes: list, llm: Any) -> None:
|
|||||||
|
|
||||||
response = await llm.acomplete(
|
response = await llm.acomplete(
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
max_tokens=1000,
|
max_tokens=4096,
|
||||||
json_mode=True,
|
json_mode=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -172,13 +172,6 @@ class AgentManager:
|
|||||||
if runner._agent_runtime is None:
|
if runner._agent_runtime is None:
|
||||||
await loop.run_in_executor(None, runner._setup)
|
await loop.run_in_executor(None, runner._setup)
|
||||||
|
|
||||||
# Extract subgraph steps for frontend visualization (non-critical)
|
|
||||||
if runner.graph and runner._llm:
|
|
||||||
try:
|
|
||||||
await _extract_subgraph_steps(runner.graph.nodes, runner._llm)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Subgraph extraction skipped: {e}")
|
|
||||||
|
|
||||||
runtime = runner._agent_runtime
|
runtime = runner._agent_runtime
|
||||||
|
|
||||||
# Start runtime on event loop
|
# Start runtime on event loop
|
||||||
@@ -224,6 +217,9 @@ class AgentManager:
|
|||||||
- **Judge**: timer-driven background GraphExecutor (silent monitoring)
|
- **Judge**: timer-driven background GraphExecutor (silent monitoring)
|
||||||
- **Worker**: the existing AgentRuntime (unchanged)
|
- **Worker**: the existing AgentRuntime (unchanged)
|
||||||
"""
|
"""
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from framework.graph.executor import GraphExecutor
|
from framework.graph.executor import GraphExecutor
|
||||||
from framework.monitoring import judge_goal, judge_graph
|
from framework.monitoring import judge_goal, judge_graph
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
from framework.runner.tool_registry import ToolRegistry
|
||||||
@@ -238,6 +234,12 @@ class AgentManager:
|
|||||||
event_bus = runtime._event_bus
|
event_bus = runtime._event_bus
|
||||||
llm = runtime._llm
|
llm = runtime._llm
|
||||||
|
|
||||||
|
# Generate a shared session ID for queen, judge, and worker.
|
||||||
|
# All three use the same ID so conversations are scoped to this
|
||||||
|
# agent load and start fresh each time.
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
# 1. Monitoring tools — standalone registry, NOT merged into worker
|
# 1. Monitoring tools — standalone registry, NOT merged into worker
|
||||||
monitoring_registry = ToolRegistry()
|
monitoring_registry = ToolRegistry()
|
||||||
register_worker_monitoring_tools(
|
register_worker_monitoring_tools(
|
||||||
@@ -247,14 +249,15 @@ class AgentManager:
|
|||||||
worker_graph_id=runtime._graph_id,
|
worker_graph_id=runtime._graph_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Storage dirs
|
# 2. Storage dirs — scoped by session_id so each agent load
|
||||||
judge_dir = storage_path / "graphs" / "worker_health_judge" / "session"
|
# gets fresh queen/judge conversations.
|
||||||
|
judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
|
||||||
judge_dir.mkdir(parents=True, exist_ok=True)
|
judge_dir.mkdir(parents=True, exist_ok=True)
|
||||||
queen_dir = storage_path / "graphs" / "queen" / "session"
|
queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
|
||||||
queen_dir.mkdir(parents=True, exist_ok=True)
|
queen_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# 3. Health judge — background task, fires every 2 minutes
|
# 3. Health judge — background task, fires every 2 minutes
|
||||||
judge_runtime = Runtime(storage_path / "graphs" / "worker_health_judge")
|
judge_runtime = Runtime(storage_path / "graphs" / "judge")
|
||||||
monitoring_tools = list(monitoring_registry.get_tools().values())
|
monitoring_tools = list(monitoring_registry.get_tools().values())
|
||||||
monitoring_executor = monitoring_registry.get_executor()
|
monitoring_executor = monitoring_registry.get_executor()
|
||||||
|
|
||||||
@@ -272,7 +275,7 @@ class AgentManager:
|
|||||||
tools=monitoring_tools,
|
tools=monitoring_tools,
|
||||||
tool_executor=monitoring_executor,
|
tool_executor=monitoring_executor,
|
||||||
event_bus=event_bus,
|
event_bus=event_bus,
|
||||||
stream_id="worker_health_judge",
|
stream_id="judge",
|
||||||
storage_path=judge_dir,
|
storage_path=judge_dir,
|
||||||
loop_config=judge_graph.loop_config,
|
loop_config=judge_graph.loop_config,
|
||||||
)
|
)
|
||||||
@@ -282,7 +285,7 @@ class AgentManager:
|
|||||||
input_data={
|
input_data={
|
||||||
"event": {"source": "timer", "reason": "scheduled"},
|
"event": {"source": "timer", "reason": "scheduled"},
|
||||||
},
|
},
|
||||||
session_state={"resume_session_id": "persistent"},
|
session_state={"resume_session_id": session_id},
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error("Health judge tick failed", exc_info=True)
|
logger.error("Health judge tick failed", exc_info=True)
|
||||||
@@ -300,6 +303,7 @@ class AgentManager:
|
|||||||
worker_runtime=runtime,
|
worker_runtime=runtime,
|
||||||
event_bus=event_bus,
|
event_bus=event_bus,
|
||||||
storage_path=storage_path,
|
storage_path=storage_path,
|
||||||
|
session_id=session_id,
|
||||||
)
|
)
|
||||||
register_worker_monitoring_tools(
|
register_worker_monitoring_tools(
|
||||||
queen_registry,
|
queen_registry,
|
||||||
@@ -365,7 +369,7 @@ class AgentManager:
|
|||||||
graph=queen_graph,
|
graph=queen_graph,
|
||||||
goal=queen_goal,
|
goal=queen_goal,
|
||||||
input_data={"greeting": "Session started."},
|
input_data={"greeting": "Session started."},
|
||||||
session_state={"resume_session_id": "persistent"},
|
session_state={"resume_session_id": session_id},
|
||||||
)
|
)
|
||||||
logger.warning("Queen executor returned (should be forever-alive)")
|
logger.warning("Queen executor returned (should be forever-alive)")
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -36,9 +36,14 @@ def register_queen_lifecycle_tools(
|
|||||||
worker_runtime: AgentRuntime,
|
worker_runtime: AgentRuntime,
|
||||||
event_bus: EventBus,
|
event_bus: EventBus,
|
||||||
storage_path: Path | None = None,
|
storage_path: Path | None = None,
|
||||||
|
session_id: str | None = None,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Register queen lifecycle tools bound to *worker_runtime*.
|
"""Register queen lifecycle tools bound to *worker_runtime*.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session_id: Shared session ID so the worker uses the same session
|
||||||
|
scope as the queen and judge.
|
||||||
|
|
||||||
Returns the number of tools registered.
|
Returns the number of tools registered.
|
||||||
"""
|
"""
|
||||||
from framework.llm.provider import Tool
|
from framework.llm.provider import Tool
|
||||||
@@ -55,7 +60,12 @@ def register_queen_lifecycle_tools(
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get session state from any prior execution for memory continuity
|
# Get session state from any prior execution for memory continuity
|
||||||
session_state = worker_runtime._get_primary_session_state("default")
|
session_state = worker_runtime._get_primary_session_state("default") or {}
|
||||||
|
|
||||||
|
# Use the shared session ID so queen, judge, and worker all
|
||||||
|
# scope their conversations to the same session.
|
||||||
|
if session_id:
|
||||||
|
session_state["resume_session_id"] = session_id
|
||||||
|
|
||||||
exec_id = await worker_runtime.trigger(
|
exec_id = await worker_runtime.trigger(
|
||||||
entry_point_id="default",
|
entry_point_id="default",
|
||||||
|
|||||||
@@ -468,6 +468,8 @@ class AdenTUI(App):
|
|||||||
into the worker runtime. The worker is completely untouched.
|
into the worker runtime. The worker is completely untouched.
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from framework.graph.executor import GraphExecutor
|
from framework.graph.executor import GraphExecutor
|
||||||
@@ -486,6 +488,10 @@ class AdenTUI(App):
|
|||||||
llm = self.runtime._llm
|
llm = self.runtime._llm
|
||||||
agent_loop = self.chat_repl._agent_loop
|
agent_loop = self.chat_repl._agent_loop
|
||||||
|
|
||||||
|
# Generate a shared session ID for queen, judge, and worker.
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
# 1. Monitoring tools (health summary, emit ticket, notify operator).
|
# 1. Monitoring tools (health summary, emit ticket, notify operator).
|
||||||
# Registered on a standalone registry — NOT merged into the worker.
|
# Registered on a standalone registry — NOT merged into the worker.
|
||||||
monitoring_registry = ToolRegistry()
|
monitoring_registry = ToolRegistry()
|
||||||
@@ -496,11 +502,11 @@ class AdenTUI(App):
|
|||||||
worker_graph_id=self.runtime._graph_id,
|
worker_graph_id=self.runtime._graph_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Storage dirs — under worker's base path but completely owned
|
# 2. Storage dirs — scoped by session_id so each agent load
|
||||||
# by the judge/queen. Worker never writes here.
|
# gets fresh queen/judge conversations.
|
||||||
judge_dir = storage_path / "graphs" / "judge" / "session"
|
judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
|
||||||
judge_dir.mkdir(parents=True, exist_ok=True)
|
judge_dir.mkdir(parents=True, exist_ok=True)
|
||||||
queen_dir = storage_path / "graphs" / "queen" / "session"
|
queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
|
||||||
queen_dir.mkdir(parents=True, exist_ok=True)
|
queen_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
@@ -542,7 +548,7 @@ class AdenTUI(App):
|
|||||||
input_data={
|
input_data={
|
||||||
"event": {"source": "timer", "reason": "scheduled"},
|
"event": {"source": "timer", "reason": "scheduled"},
|
||||||
},
|
},
|
||||||
session_state={"resume_session_id": "persistent"},
|
session_state={"resume_session_id": session_id},
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.error("Health judge tick failed", exc_info=True)
|
log.error("Health judge tick failed", exc_info=True)
|
||||||
@@ -584,6 +590,7 @@ class AdenTUI(App):
|
|||||||
worker_runtime=self.runtime,
|
worker_runtime=self.runtime,
|
||||||
event_bus=event_bus,
|
event_bus=event_bus,
|
||||||
storage_path=storage_path,
|
storage_path=storage_path,
|
||||||
|
session_id=session_id,
|
||||||
)
|
)
|
||||||
register_worker_monitoring_tools(
|
register_worker_monitoring_tools(
|
||||||
queen_registry,
|
queen_registry,
|
||||||
@@ -596,9 +603,6 @@ class AdenTUI(App):
|
|||||||
queen_tool_executor = queen_registry.get_executor()
|
queen_tool_executor = queen_registry.get_executor()
|
||||||
|
|
||||||
# Build worker identity to inject into the queen's system prompt.
|
# Build worker identity to inject into the queen's system prompt.
|
||||||
# This must be in the system prompt (not input_data) because
|
|
||||||
# persistent sessions restore the old conversation and skip
|
|
||||||
# _build_initial_message — the queen would lose context.
|
|
||||||
worker_graph_id = self.runtime._graph_id
|
worker_graph_id = self.runtime._graph_id
|
||||||
worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
|
worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
|
||||||
worker_goal_desc = getattr(self.runtime.goal, "description", "")
|
worker_goal_desc = getattr(self.runtime.goal, "description", "")
|
||||||
@@ -657,7 +661,7 @@ class AdenTUI(App):
|
|||||||
graph=queen_graph,
|
graph=queen_graph,
|
||||||
goal=queen_goal,
|
goal=queen_goal,
|
||||||
input_data={"greeting": "Session started."},
|
input_data={"greeting": "Session started."},
|
||||||
session_state={"resume_session_id": "persistent"},
|
session_state={"resume_session_id": session_id},
|
||||||
)
|
)
|
||||||
# Should never reach here — queen is forever-alive.
|
# Should never reach here — queen is forever-alive.
|
||||||
log.warning(
|
log.warning(
|
||||||
|
|||||||
@@ -569,7 +569,7 @@ export default function Workspace() {
|
|||||||
const streamId = event.stream_id;
|
const streamId = event.stream_id;
|
||||||
|
|
||||||
// Suppress judge events (silent background monitoring)
|
// Suppress judge events (silent background monitoring)
|
||||||
if (streamId === "worker_health_judge") return;
|
if (streamId === "judge") return;
|
||||||
|
|
||||||
// Determine if this is a queen event
|
// Determine if this is a queen event
|
||||||
const isQueen = streamId === "queen";
|
const isQueen = streamId === "queen";
|
||||||
|
|||||||
Generated
+4
-18884
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user