Merge remote-tracking branch 'origin/feature/thinking-hook' into feat/queen-responsibility

2026-03-06 17:10:25 -08:00
parent 5583896429 5eb623e931
commit 5c87b4b194
6 changed files with 155 additions and 13 deletions
@@ -0,0 +1,80 @@
+"""Queen thinking hook — HR persona classifier.
+
+Fires once when the queen enters building mode at session start.
+Makes a single non-streaming LLM call (acting as an HR Director) to select
+the best-fit expert persona for the user's request, then returns a persona
+prefix string that replaces the queen's default "Solution Architect" identity.
+
+This is designed to activate the model's latent domain expertise — a CFO
+persona on a financial question, a Lawyer on a legal question, etc.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from framework.llm.provider import LLMProvider
+
+logger = logging.getLogger(__name__)
+
+_HR_SYSTEM_PROMPT = """\
+You are an expert HR Director and talent consultant at a world-class firm.
+A new request has arrived and you must identify which professional's expertise
+would produce the highest-quality response.
+
+Reply with ONLY a valid JSON object — no markdown, no prose, no explanation:
+{"role": "<job title>", "persona": "<2-3 sentence first-person identity statement>"}
+
+Rules:
+- Choose from any real professional role: CFO, CEO, CTO, Lawyer, Data Scientist,
+  Product Manager, Security Engineer, DevOps Engineer, Software Architect,
+  HR Director, Marketing Director, Business Analyst, UX Designer,
+  Financial Analyst, Operations Director, Legal Counsel, etc.
+- The persona statement must be written in first person ("I am..." or "I have...").
+- Select the role whose domain knowledge most directly applies to solving the request.
+- If the request is clearly about coding or building software systems, pick Software Architect.
+- "Queen" is your internal alias — do not include it in the persona.
+"""
+
+
+async def select_expert_persona(user_message: str, llm: LLMProvider) -> str:
+    """Run the HR classifier and return a persona prefix string.
+
+    Makes a single non-streaming acomplete() call with the session LLM.
+    Returns an empty string on any failure so the queen falls back
+    gracefully to its default "Solution Architect" identity.
+
+    Args:
+        user_message: The user's opening message for the session.
+        llm: The session LLM provider.
+
+    Returns:
+        A persona prefix like "You are a CFO. I am a CFO with 20 years..."
+        or "" on failure.
+    """
+    if not user_message.strip():
+        return ""
+
+    try:
+        response = await llm.acomplete(
+            messages=[{"role": "user", "content": user_message}],
+            system=_HR_SYSTEM_PROMPT,
+            max_tokens=1024,
+            json_mode=True,
+        )
+        raw = response.content.strip()
+        parsed = json.loads(raw)
+        role = parsed.get("role", "").strip()
+        persona = parsed.get("persona", "").strip()
+        if not role or not persona:
+            logger.warning("Thinking hook: empty role/persona in response: %r", raw)
+            return ""
+        result = f"You are a {role}. {persona}"
+        logger.info("Thinking hook: selected persona — %s", role)
+        return result
+    except Exception:
+        logger.warning("Thinking hook: persona classification failed", exc_info=True)
+        return ""
@@ -898,9 +898,11 @@ class NodeConversation:
        # Build reference message
        ref_parts: list[str] = []
        if conv_filename:
+            full_path = str((spill_path / conv_filename).resolve())
            ref_parts.append(
-                f"[Previous conversation saved to '{conv_filename}'. "
-                f"Use load_data('{conv_filename}') to review if needed.]"
+                f"[Previous conversation saved to '{full_path}'. "
+                f"Use load_data('{conv_filename}'), read_file('{full_path}'), "
+                f"or run_command('cat \"{full_path}\"') to review if needed.]"
            )
        elif not collapsed_msgs:
            ref_parts.append("[Previous freeform messages compacted.]")
@@ -604,8 +604,10 @@ class EventLoopNode(NodeProtocol):
            await self._publish_iteration(stream_id, node_id, iteration, execution_id)

            # 6d. Pre-turn compaction check (tiered)
+            _compacted_this_iter = False
            if conversation.needs_compaction():
                await self._compact(ctx, conversation, accumulator)
+                _compacted_this_iter = True

            # 6e. Run single LLM turn (with transient error retry)
            logger.info(
@@ -809,8 +811,11 @@ class EventLoopNode(NodeProtocol):
            if turn_input > 0:
                conversation.update_token_count(turn_input)

-            # 6e''. Post-turn compaction check (catches tool-result bloat)
-            if conversation.needs_compaction():
+            # 6e''. Post-turn compaction check (catches tool-result bloat).
+            # Skip if pre-turn already compacted this iteration — two compactions
+            # in one iteration produce back-to-back spillover files and leave the
+            # agent disoriented on the very next turn.
+            if not _compacted_this_iter and conversation.needs_compaction():
                await self._compact(ctx, conversation, accumulator)

            # Reset auto-block grace streak when real work happens
@@ -3637,14 +3642,24 @@ class EventLoopNode(NodeProtocol):
                    data_files = [f for f in all_files if f not in conv_files]

                    if conv_files:
-                        conv_list = "\n".join(f"  - {f}" for f in conv_files)
+                        conv_list = "\n".join(
+                            f"  - {f}  (full path: {data_dir / f})" for f in conv_files
+                        )
                        parts.append(
                            "CONVERSATION HISTORY (freeform messages saved during compaction — "
-                            "use load_data to review earlier dialogue):\n" + conv_list
+                            "use load_data('<filename>'), read_file('<full_path>'), "
+                            "or run_command('cat \"<full_path>\"') to review earlier dialogue):\n"
+                            + conv_list
                        )
                    if data_files:
-                        file_list = "\n".join(f"  - {f}" for f in data_files[:30])
-                        parts.append("DATA FILES (use load_data to read):\n" + file_list)
+                        file_list = "\n".join(
+                            f"  - {f}  (full path: {data_dir / f})" for f in data_files[:30]
+                        )
+                        parts.append(
+                            "DATA FILES (use load_data('<filename>'), read_file('<full_path>'), "
+                            "or run_command('cat \"<full_path>\"') to read):\n"
+                            + file_list
+                        )
                    if not all_files:
                        parts.append(
                            "NOTE: Large tool results may have been saved to files. "
@@ -140,6 +140,9 @@ class EventType(StrEnum):
    # Queen phase changes (building <-> staging <-> running)
    QUEEN_PHASE_CHANGED = "queen_phase_changed"

+    # Queen thinking hook — persona selected for the current building session
+    QUEEN_PERSONA_SELECTED = "queen_persona_selected"
+
    # Subagent reports (one-way progress updates from sub-agents)
    SUBAGENT_REPORT = "subagent_report"

@@ -605,10 +605,11 @@ class SessionManager:
                "Handle all tasks directly using your coding tools."
            )

-        # Compose phase-specific prompts
-        phase_state.prompt_building = (
-            _queen_identity_building
-            + _queen_style
+        # Compose phase-specific prompts.
+        # The building prompt body is stored separately so the thinking hook
+        # can replace the identity prefix with a domain-matched expert persona.
+        _building_body = (
+            _queen_style
            + _queen_tools_building
            + _queen_behavior_always
            + _queen_behavior_building
@@ -617,6 +618,8 @@ class SessionManager:
            + _appendices
            + worker_identity
        )
+        phase_state.base_prompt_building = _building_body
+        phase_state.prompt_building = _queen_identity_building + _building_body
        phase_state.prompt_staging = (
            _queen_identity_staging
            + _queen_style
@@ -634,7 +637,16 @@ class SessionManager:
            + worker_identity
        )

-        # Use the initial phase prompt as the node's system_prompt
+        # Wire up the thinking hook: fires once at session start to select
+        # the best-fit expert persona for the user's opening message.
+        from framework.agents.hive_coder.nodes.thinking_hook import select_expert_persona
+
+        _session_llm = session.llm
+        phase_state.thinking_hook = lambda msg: select_expert_persona(msg, _session_llm)
+        if initial_prompt:
+            await phase_state.apply_thinking_hook(initial_prompt)
+
+        # Use the (potentially enriched) building prompt as the node's system_prompt
        initial_prompt_text = phase_state.get_current_prompt()

        registered_tool_names = set(queen_registry.get_tools().keys())
@@ -88,6 +88,11 @@ class QueenPhaseState:
    prompt_staging: str = ""
    prompt_running: str = ""

+    # Thinking hook — set by session_manager to enrich the building prompt
+    # with a domain-specific expert persona on session start.
+    thinking_hook: Any = None  # async (str) -> str
+    base_prompt_building: str = ""  # building prompt body without the identity prefix
+
    def get_current_tools(self) -> list:
        """Return tools for the current phase."""
        if self.phase == "running":
@@ -104,6 +109,31 @@ class QueenPhaseState:
            return self.prompt_staging
        return self.prompt_building

+    async def apply_thinking_hook(self, trigger_message: str) -> None:
+        """Run the thinking hook and enrich prompt_building with an expert persona.
+
+        Called once at session start (initial_prompt as trigger). The selected
+        persona replaces the default "Solution Architect" identity prefix while
+        keeping all tool docs, behavior rules, and appendices intact.
+
+        No-ops silently if thinking_hook is not set, trigger_message is empty,
+        or the classifier returns an empty string (falls back to default identity).
+        """
+        if not self.thinking_hook or not trigger_message.strip():
+            return
+        persona_prefix = await self.thinking_hook(trigger_message)
+        if not persona_prefix:
+            return
+        self.prompt_building = persona_prefix + "\n\n" + self.base_prompt_building
+        if self.event_bus is not None:
+            await self.event_bus.publish(
+                AgentEvent(
+                    type=EventType.QUEEN_PERSONA_SELECTED,
+                    stream_id="queen",
+                    data={"persona": persona_prefix},
+                )
+            )
+
    async def _emit_phase_event(self) -> None:
        """Publish a QUEEN_PHASE_CHANGED event so the frontend updates the tag."""
        if self.event_bus is not None: