fix: isolate session loading

2026-02-24 11:02:58 -08:00
parent 28a71b70a8
commit 3963855d1d
6 changed files with 96 additions and 18910 deletions
@@ -328,6 +328,20 @@ class LiteLLMProvider(LLMProvider):
                        f"Full request dumped to: {dump_path}"
                    )

+                    # finish_reason=length means the model exhausted max_tokens
+                    # before producing content. Retrying with the same max_tokens
+                    # will never help — return immediately instead of looping.
+                    if finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[retry] {model} returned empty content with "
+                            f"finish_reason=length (max_tokens={max_tok}). "
+                            f"The model exhausted its token budget before "
+                            f"producing visible output. Increase max_tokens "
+                            f"or use a different model. Not retrying."
+                        )
+                        return response
+
                    if attempt == retries:
                        logger.error(
                            f"[retry] GAVE UP on {model} after {retries + 1} "
@@ -621,6 +635,20 @@ class LiteLLMProvider(LLMProvider):
                        f"Full request dumped to: {dump_path}"
                    )

+                    # finish_reason=length means the model exhausted max_tokens
+                    # before producing content. Retrying with the same max_tokens
+                    # will never help — return immediately instead of looping.
+                    if finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[async-retry] {model} returned empty content with "
+                            f"finish_reason=length (max_tokens={max_tok}). "
+                            f"The model exhausted its token budget before "
+                            f"producing visible output. Increase max_tokens "
+                            f"or use a different model. Not retrying."
+                        )
+                        return response
+
                    if attempt == retries:
                        logger.error(
                            f"[async-retry] GAVE UP on {model} after {retries + 1} "
@@ -903,6 +931,7 @@ class LiteLLMProvider(LLMProvider):
            tool_calls_acc: dict[int, dict[str, str]] = {}
            input_tokens = 0
            output_tokens = 0
+            stream_finish_reason: str | None = None

            try:
                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
@@ -938,6 +967,7 @@ class LiteLLMProvider(LLMProvider):

                    # --- Finish ---
                    if choice.finish_reason:
+                        stream_finish_reason = choice.finish_reason
                        for _idx, tc_data in sorted(tool_calls_acc.items()):
                            try:
                                parsed_args = json.loads(tc_data["arguments"])
@@ -992,6 +1022,24 @@ class LiteLLMProvider(LLMProvider):
                        for event in tail_events:
                            yield event
                        return
+
+                    # finish_reason=length means the model exhausted
+                    # max_tokens before producing content. Retrying with
+                    # the same max_tokens will never help.
+                    if stream_finish_reason == "length":
+                        max_tok = kwargs.get("max_tokens", "unset")
+                        logger.error(
+                            f"[stream] {self.model} returned empty content "
+                            f"with finish_reason=length "
+                            f"(max_tokens={max_tok}). The model exhausted "
+                            f"its token budget before producing visible "
+                            f"output. Increase max_tokens or use a "
+                            f"different model. Not retrying."
+                        )
+                        for event in tail_events:
+                            yield event
+                        return
+
                    wait = _compute_retry_delay(attempt)
                    token_count, token_method = _estimate_tokens(
                        self.model,
@@ -61,7 +61,7 @@ async def _extract_subgraph_steps(nodes: list, llm: Any) -> None:

            response = await llm.acomplete(
                messages=[{"role": "user", "content": prompt}],
-                max_tokens=1000,
+                max_tokens=4096,
                json_mode=True,
            )

@@ -172,13 +172,6 @@ class AgentManager:
            if runner._agent_runtime is None:
                await loop.run_in_executor(None, runner._setup)

-            # Extract subgraph steps for frontend visualization (non-critical)
-            if runner.graph and runner._llm:
-                try:
-                    await _extract_subgraph_steps(runner.graph.nodes, runner._llm)
-                except Exception as e:
-                    logger.warning(f"Subgraph extraction skipped: {e}")
-
            runtime = runner._agent_runtime

            # Start runtime on event loop
@@ -224,6 +217,9 @@ class AgentManager:
        - **Judge**: timer-driven background GraphExecutor (silent monitoring)
        - **Worker**: the existing AgentRuntime (unchanged)
        """
+        import uuid
+        from datetime import datetime
+
        from framework.graph.executor import GraphExecutor
        from framework.monitoring import judge_goal, judge_graph
        from framework.runner.tool_registry import ToolRegistry
@@ -238,6 +234,12 @@ class AgentManager:
            event_bus = runtime._event_bus
            llm = runtime._llm

+            # Generate a shared session ID for queen, judge, and worker.
+            # All three use the same ID so conversations are scoped to this
+            # agent load and start fresh each time.
+            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+            session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
+
            # 1. Monitoring tools — standalone registry, NOT merged into worker
            monitoring_registry = ToolRegistry()
            register_worker_monitoring_tools(
@@ -247,14 +249,15 @@ class AgentManager:
                worker_graph_id=runtime._graph_id,
            )

-            # 2. Storage dirs
-            judge_dir = storage_path / "graphs" / "worker_health_judge" / "session"
+            # 2. Storage dirs — scoped by session_id so each agent load
+            #    gets fresh queen/judge conversations.
+            judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
            judge_dir.mkdir(parents=True, exist_ok=True)
-            queen_dir = storage_path / "graphs" / "queen" / "session"
+            queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
            queen_dir.mkdir(parents=True, exist_ok=True)

            # 3. Health judge — background task, fires every 2 minutes
-            judge_runtime = Runtime(storage_path / "graphs" / "worker_health_judge")
+            judge_runtime = Runtime(storage_path / "graphs" / "judge")
            monitoring_tools = list(monitoring_registry.get_tools().values())
            monitoring_executor = monitoring_registry.get_executor()

@@ -272,7 +275,7 @@ class AgentManager:
                            tools=monitoring_tools,
                            tool_executor=monitoring_executor,
                            event_bus=event_bus,
-                            stream_id="worker_health_judge",
+                            stream_id="judge",
                            storage_path=judge_dir,
                            loop_config=judge_graph.loop_config,
                        )
@@ -282,7 +285,7 @@ class AgentManager:
                            input_data={
                                "event": {"source": "timer", "reason": "scheduled"},
                            },
-                            session_state={"resume_session_id": "persistent"},
+                            session_state={"resume_session_id": session_id},
                        )
                    except Exception:
                        logger.error("Health judge tick failed", exc_info=True)
@@ -300,6 +303,7 @@ class AgentManager:
                worker_runtime=runtime,
                event_bus=event_bus,
                storage_path=storage_path,
+                session_id=session_id,
            )
            register_worker_monitoring_tools(
                queen_registry,
@@ -365,7 +369,7 @@ class AgentManager:
                        graph=queen_graph,
                        goal=queen_goal,
                        input_data={"greeting": "Session started."},
-                        session_state={"resume_session_id": "persistent"},
+                        session_state={"resume_session_id": session_id},
                    )
                    logger.warning("Queen executor returned (should be forever-alive)")
                except Exception:
@@ -36,9 +36,14 @@ def register_queen_lifecycle_tools(
    worker_runtime: AgentRuntime,
    event_bus: EventBus,
    storage_path: Path | None = None,
+    session_id: str | None = None,
 ) -> int:
    """Register queen lifecycle tools bound to *worker_runtime*.

+    Args:
+        session_id: Shared session ID so the worker uses the same session
+                    scope as the queen and judge.
+
    Returns the number of tools registered.
    """
    from framework.llm.provider import Tool
@@ -55,7 +60,12 @@ def register_queen_lifecycle_tools(
        """
        try:
            # Get session state from any prior execution for memory continuity
-            session_state = worker_runtime._get_primary_session_state("default")
+            session_state = worker_runtime._get_primary_session_state("default") or {}
+
+            # Use the shared session ID so queen, judge, and worker all
+            # scope their conversations to the same session.
+            if session_id:
+                session_state["resume_session_id"] = session_id

            exec_id = await worker_runtime.trigger(
                entry_point_id="default",
@@ -468,6 +468,8 @@ class AdenTUI(App):
        into the worker runtime.  The worker is completely untouched.
        """
        import asyncio
+        import uuid
+        from datetime import datetime
        from pathlib import Path

        from framework.graph.executor import GraphExecutor
@@ -486,6 +488,10 @@ class AdenTUI(App):
            llm = self.runtime._llm
            agent_loop = self.chat_repl._agent_loop

+            # Generate a shared session ID for queen, judge, and worker.
+            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+            session_id = f"session_{ts}_{uuid.uuid4().hex[:8]}"
+
            # 1. Monitoring tools (health summary, emit ticket, notify operator).
            #    Registered on a standalone registry — NOT merged into the worker.
            monitoring_registry = ToolRegistry()
@@ -496,11 +502,11 @@ class AdenTUI(App):
                worker_graph_id=self.runtime._graph_id,
            )

-            # 2. Storage dirs — under worker's base path but completely owned
-            #    by the judge/queen.  Worker never writes here.
-            judge_dir = storage_path / "graphs" / "judge" / "session"
+            # 2. Storage dirs — scoped by session_id so each agent load
+            #    gets fresh queen/judge conversations.
+            judge_dir = storage_path / "graphs" / "judge" / "session" / session_id
            judge_dir.mkdir(parents=True, exist_ok=True)
-            queen_dir = storage_path / "graphs" / "queen" / "session"
+            queen_dir = storage_path / "graphs" / "queen" / "session" / session_id
            queen_dir.mkdir(parents=True, exist_ok=True)

            # ---------------------------------------------------------------
@@ -542,7 +548,7 @@ class AdenTUI(App):
                            input_data={
                                "event": {"source": "timer", "reason": "scheduled"},
                            },
-                            session_state={"resume_session_id": "persistent"},
+                            session_state={"resume_session_id": session_id},
                        )
                    except Exception:
                        log.error("Health judge tick failed", exc_info=True)
@@ -584,6 +590,7 @@ class AdenTUI(App):
                worker_runtime=self.runtime,
                event_bus=event_bus,
                storage_path=storage_path,
+                session_id=session_id,
            )
            register_worker_monitoring_tools(
                queen_registry,
@@ -596,9 +603,6 @@ class AdenTUI(App):
            queen_tool_executor = queen_registry.get_executor()

            # Build worker identity to inject into the queen's system prompt.
-            # This must be in the system prompt (not input_data) because
-            # persistent sessions restore the old conversation and skip
-            # _build_initial_message — the queen would lose context.
            worker_graph_id = self.runtime._graph_id
            worker_goal_name = getattr(self.runtime.goal, "name", worker_graph_id)
            worker_goal_desc = getattr(self.runtime.goal, "description", "")
@@ -657,7 +661,7 @@ class AdenTUI(App):
                        graph=queen_graph,
                        goal=queen_goal,
                        input_data={"greeting": "Session started."},
-                        session_state={"resume_session_id": "persistent"},
+                        session_state={"resume_session_id": session_id},
                    )
                    # Should never reach here — queen is forever-alive.
                    log.warning(
@@ -569,7 +569,7 @@ export default function Workspace() {
      const streamId = event.stream_id;

      // Suppress judge events (silent background monitoring)
-      if (streamId === "worker_health_judge") return;
+      if (streamId === "judge") return;

      // Determine if this is a queen event
      const isQueen = streamId === "queen";