Merge pull request #5769 from aden-hive/queen-mode-separation
Release / Create Release (push) Waiting to run

Queen mode separation: building, staging, and running modes
This commit is contained in:
RichardTang-Aden
2026-03-03 21:31:23 -08:00
committed by GitHub
32 changed files with 2928 additions and 843 deletions
+1 -1
View File
@@ -69,7 +69,7 @@ goal = Goal(
id="dynamic-tool-discovery", id="dynamic-tool-discovery",
description=( description=(
"Always discover available tools dynamically via " "Always discover available tools dynamically via "
"discover_mcp_tools before referencing tools in agent designs" "list_agent_tools before referencing tools in agent designs"
), ),
constraint_type="hard", constraint_type="hard",
category="correctness", category="correctness",
+229 -128
View File
@@ -52,7 +52,6 @@ _SHARED_TOOLS = [
"undo_changes", "undo_changes",
# Meta-agent # Meta-agent
"list_agent_tools", "list_agent_tools",
"discover_mcp_tools",
"validate_agent_tools", "validate_agent_tools",
"list_agents", "list_agents",
"list_agent_sessions", "list_agent_sessions",
@@ -63,6 +62,47 @@ _SHARED_TOOLS = [
"run_agent_tests", "run_agent_tests",
] ]
# Queen mode-specific tool sets.
# Building mode: full coding + agent construction tools.
_QUEEN_BUILDING_TOOLS = _SHARED_TOOLS + [
"load_built_agent",
"list_credentials",
]
# Staging mode: agent loaded but not yet running — inspect, configure, launch.
_QUEEN_STAGING_TOOLS = [
# Read-only (inspect agent files, logs)
"read_file",
"list_directory",
"search_files",
"run_command",
# Agent inspection
"list_credentials",
"get_worker_status",
# Launch or go back
"run_agent_with_input",
"stop_worker_and_edit",
]
# Running mode: worker is executing — monitor and control.
_QUEEN_RUNNING_TOOLS = [
# Read-only coding (for inspecting logs, files)
"read_file",
"list_directory",
"search_files",
"run_command",
# Credentials
"list_credentials",
# Worker lifecycle
"stop_worker",
"stop_worker_and_edit",
"get_worker_status",
"inject_worker_message",
# Monitoring
"get_worker_health_summary",
"notify_operator",
]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Shared agent-building knowledge: core mandates, tool docs, meta-agent # Shared agent-building knowledge: core mandates, tool docs, meta-agent
@@ -101,10 +141,10 @@ errors yourself. Don't declare success until validation passes.
- undo_changes(path?) restore from git snapshot - undo_changes(path?) restore from git snapshot
## Meta-Agent ## Meta-Agent
- list_agent_tools(server_config_path?) list all tool names available \ - list_agent_tools(server_config_path?, output_schema?, group?) discover \
for agent building, grouped by category. Call this FIRST before designing. available tools grouped by category. output_schema: "simple" (default) or \
- discover_mcp_tools(server_config_path?) connect to MCP servers \ "full" (includes input_schema). group: "all" (default) or a prefix like \
and list all available tools with full schemas. Use for parameter details. "gmail". Call FIRST before designing.
- validate_agent_tools(agent_path) validate that all tools declared \ - validate_agent_tools(agent_path) validate that all tools declared \
in an agent's nodes actually exist. Call after building. in an agent's nodes actually exist. Call after building.
- list_agents() list all agent packages in exports/ with session counts - list_agents() list all agent packages in exports/ with session counts
@@ -121,15 +161,14 @@ You are not just a file writer. You have deep integration with the \
Hive framework: Hive framework:
## Tool Discovery (MANDATORY before designing) ## Tool Discovery (MANDATORY before designing)
Before designing any agent, run list_agent_tools() to get all \ Before designing any agent, run list_agent_tools() to discover all \
available tool names. ONLY use tools from this list in your node \ available tools. ONLY use tools from this list in your node definitions. \
definitions. NEVER guess or fabricate tool names from memory. NEVER guess or fabricate tool names from memory.
For full parameter schemas when you need details: list_agent_tools() # names + descriptions
discover_mcp_tools() list_agent_tools(output_schema="full") # include input_schema
list_agent_tools(group="gmail") # only gmail_* tools
To check a specific agent's configured tools: list_agent_tools("exports/{agent_name}/mcp_servers.json") # specific agent
list_agent_tools("exports/{agent_name}/mcp_servers.json")
## Agent Awareness ## Agent Awareness
Run list_agents() to see what agents already exist. Read their code \ Run list_agents() to see what agents already exist. Read their code \
@@ -246,11 +285,12 @@ explicitly requests a one-shot/batch agent. Forever-alive agents loop \
continuously the user exits by closing the TUI. This is the standard \ continuously the user exits by closing the TUI. This is the standard \
pattern for all interactive agents. pattern for all interactive agents.
### Node Count Rules (HARD LIMITS) ### Node Design Rules
**2-4 nodes** for all agents. Never exceed 4 unless the user explicitly \ Each node boundary serializes outputs to shared memory \
requests more. Each node boundary serializes outputs to shared memory \ and DESTROYS all in-context information (tool results, reasoning, history). \
and DESTROYS all in-context information (tool results, reasoning, history). Use as many nodes as the use case requires, but don't create nodes without \
tools merge them into nodes that do real work.
**MERGE nodes when:** **MERGE nodes when:**
- Node has NO tools (pure LLM reasoning) merge into predecessor/successor - Node has NO tools (pure LLM reasoning) merge into predecessor/successor
@@ -264,10 +304,11 @@ and DESTROYS all in-context information (tool results, reasoning, history).
- Fundamentally different tool sets - Fundamentally different tool sets
- Fan-out parallelism (parallel branches MUST be separate) - Fan-out parallelism (parallel branches MUST be separate)
**Typical patterns:** **Typical patterns (queen manages intake NO client-facing intake node):**
- 2 nodes: `interact (client-facing) process (autonomous) interact` - 2 nodes: `process (autonomous) review (client-facing) process`
- 3 nodes: `intake (CF) process (auto) review (CF) intake` - 1 node: `process (autonomous)` simplest; queen handles all interaction
- WRONG: 7 nodes where half have no tools and just do LLM reasoning - WRONG: 7 nodes where half have no tools and just do LLM reasoning
- WRONG: Intake node that asks the user for requirements the queen does intake
Read reference agents before designing: Read reference agents before designing:
list_agents() list_agents()
@@ -280,20 +321,27 @@ use box-drawing characters and clear flow arrows:
``` ```
intake (client-facing)
tools: set_output
on_success
process (autonomous) process (autonomous)
in: user_request
tools: web_search, tools: web_search,
save_data save_data
on_success on_success
back to intake
review (client-facing)
tools: set_output
on_success
back to process
``` ```
The queen owns intake: she gathers user requirements, then calls \
`run_agent_with_input(task)` with a structured task description. \
When building the agent, design the entry node's `input_keys` to \
match what the queen will provide at run time. No client-facing \
intake node in the worker.
Follow the graph with a brief summary of each node's purpose. \ Follow the graph with a brief summary of each node's purpose. \
Get user approval before implementing. Get user approval before implementing.
@@ -356,8 +404,9 @@ from .agent import (
``` ```
**entry_points**: `{"start": "first-node-id"}` **entry_points**: `{"start": "first-node-id"}`
For agents with multiple entry points (e.g. a reminder trigger), \ The first node should be an autonomous processing node (NOT a \
add them: `{"start": "intake", "reminder": "reminder"}` client-facing intake). For agents with multiple entry points, \
add them: `{"start": "process", "reminder": "check"}`
**conversation_mode** ONLY two valid values: **conversation_mode** ONLY two valid values:
- `"continuous"` recommended for interactive agents (context carries \ - `"continuous"` recommended for interactive agents (context carries \
@@ -391,7 +440,8 @@ NO "mcpServers" wrapper. cwd "../../tools". command "uv".
**Storage**: `Path.home() / ".hive" / "agents" / "{name}"` **Storage**: `Path.home() / ".hive" / "agents" / "{name}"`
**Client-facing system prompts** STEP 1/STEP 2 pattern: **Client-facing system prompts** (review/approval nodes only, NOT intake) \
STEP 1/STEP 2 pattern:
``` ```
STEP 1 Present to user (text only, NO tool calls): STEP 1 Present to user (text only, NO tool calls):
[instructions] [instructions]
@@ -399,6 +449,9 @@ STEP 1 — Present to user (text only, NO tool calls):
STEP 2 After user responds, call set_output: STEP 2 After user responds, call set_output:
[set_output calls] [set_output calls]
``` ```
The queen manages intake. Workers should NOT have a client-facing node \
that asks for requirements. Use client_facing=True only for review or \
approval checkpoints mid-execution.
**Autonomous system prompts** set_output in SEPARATE turn. **Autonomous system prompts** set_output in SEPARATE turn.
@@ -408,7 +461,10 @@ If list_agent_tools() shows these don't exist, use alternatives \
(e.g. save_data/load_data for data persistence). (e.g. save_data/load_data for data persistence).
**Node rules**: **Node rules**:
- **2-4 nodes MAX.** Never exceed 4. Merge thin nodes aggressively. - **NO intake nodes.** The queen owns intake. She defines the entry \
node's input_keys at build time and fills them via \
`run_agent_with_input(task)` at run time.
- Don't abuse nodes without tools — merge them into a node that does work.
- A node with 0 tools is NOT a real node merge it. - A node with 0 tools is NOT a real node merge it.
- node_type "event_loop" for all regular graph nodes. Use "gcu" ONLY for - node_type "event_loop" for all regular graph nodes. Use "gcu" ONLY for
browser automation subagents (see GCU appendix). GCU nodes MUST be in a browser automation subagents (see GCU appendix). GCU nodes MUST be in a
@@ -542,50 +598,89 @@ start_agent("{name}") # triggers default entry point
_queen_tools_docs = """ _queen_tools_docs = """
## Worker Lifecycle ## Operating Modes
- start_worker(task) Start the worker with a task description. The \
worker runs autonomously until it finishes or asks the user a question.
- stop_worker() Cancel the worker's current execution.
- get_worker_status() Check if the worker is idle, running, or waiting \
for user input. Returns execution details.
- inject_worker_message(content) Send a message to the running worker. \
Use this to relay user instructions or concerns.
## Monitoring You operate in one of three modes. Your available tools change based on the \
- get_worker_health_summary() Read the latest health data from the judge. mode. The system notifies you when a mode change occurs.
- notify_operator(ticket_id, analysis, urgency) Alert the user about a \
critical issue. Use sparingly.
## Agent Loading ### BUILDING mode (default)
- load_built_agent(agent_path) Load a newly built agent as the worker in \ You have full coding tools for building and modifying agents:
this session. If a worker is already loaded, it is automatically unloaded \ - File I/O: read_file, write_file, edit_file, list_directory, search_files, \
first. Call after building and validating an agent to make it available \ run_command, undo_changes
immediately. - Meta-agent: list_agent_tools, validate_agent_tools, \
list_agents, list_agent_sessions, get_agent_session_state, get_agent_session_memory, \
list_agent_checkpoints, get_agent_checkpoint, run_agent_tests
- load_built_agent(agent_path) Load the agent and switch to STAGING mode
- list_credentials(credential_id?) List authorized credentials
## Credentials When you finish building an agent, call load_built_agent(path) to stage it.
- list_credentials(credential_id?) List all authorized credentials in the \
local store. Returns IDs, aliases, status, and identity metadata (never \ ### STAGING mode (agent loaded, not yet running)
secrets). Optionally filter by credential_id. The agent is loaded and ready to run. You can inspect it and launch it:
- Read-only: read_file, list_directory, search_files, run_command
- list_credentials(credential_id?) Verify credentials are configured
- get_worker_status() Check the loaded worker
- run_agent_with_input(task) Start the worker and switch to RUNNING mode
- stop_worker_and_edit() Go back to BUILDING mode
In STAGING mode you do NOT have write tools. If you need to modify the agent, \
call stop_worker_and_edit() to go back to BUILDING mode.
### RUNNING mode (worker is executing)
The worker is running. You have monitoring and lifecycle tools:
- Read-only: read_file, list_directory, search_files, run_command
- get_worker_status() Check worker status (idle, running, waiting)
- inject_worker_message(content) Send a message to the running worker
- get_worker_health_summary() Read the latest health data
- notify_operator(ticket_id, analysis, urgency) Alert the user (use sparingly)
- stop_worker() Stop the worker and return to STAGING mode, then ask the user what to do next
- stop_worker_and_edit() Stop the worker and switch back to BUILDING mode
In RUNNING mode you do NOT have write tools or agent construction tools. \
If you need to modify the agent, call stop_worker_and_edit() to switch back \
to BUILDING mode. To stop the worker and ask the user what to do next, call \
stop_worker() to return to STAGING mode.
### Mode transitions
- load_built_agent(path) switches to STAGING mode
- run_agent_with_input(task) starts worker, switches to RUNNING mode
- stop_worker() stops worker, switches to STAGING mode (ask user: re-run or edit?)
- stop_worker_and_edit() stops worker (if running), switches to BUILDING mode
""" """
_queen_behavior = """ _queen_behavior = """
# Behavior # Behavior
## CRITICAL RULE — ask_user tool
Every response that ends with a question, a prompt, or expects user \
input MUST finish with a call to ask_user(prompt, options). This is \
NON-NEGOTIABLE. The system CANNOT detect that you are waiting for \
input unless you call ask_user. You MUST call ask_user as the LAST \
action in your response.
NEVER end a response with a question in text without calling ask_user. \
NEVER rely on the user seeing your text and replying call ask_user.
Always provide 2-4 short options that cover the most likely answers. \
The user can always type a custom response.
Examples:
- ask_user("What do you need?",
["Build a new agent", "Run the loaded worker", "Help with code"])
- ask_user("Which pattern?",
["Simple 2-node", "Rich with feedback", "Custom"])
- ask_user("Ready to proceed?",
["Yes, go ahead", "Let me change something"])
## Greeting and identity ## Greeting and identity
When the user greets you ("hi", "hello") or asks what you can do / \ When the user greets you or asks what you can do, respond concisely \
what you are, respond concisely. DO NOT list internal processes \ (under 10 lines). DO NOT list internal processes. Focus on:
(validation steps, AgentRunner.load, tool discovery). Focus on \ 1. Direct capabilities: coding, agent building & debugging.
user-facing capabilities: 2. What the loaded worker does (one sentence from Worker Profile). \
If no worker is loaded, say so.
1. Direct capabilities: file operations, shell commands, coding, \ 3. THEN call ask_user to prompt them do NOT just write text.
agent building & debugging.
2. Delegation: describe what the loaded worker does in one sentence \
(read the Worker Profile at the end of this prompt). If no worker \
is loaded, say so.
3. End with a short prompt: "What do you need?"
Keep it under 10 lines. No bullet-point dumps of every tool you have.
## Direct coding ## Direct coding
You can do any coding task directly reading files, writing code, running \ You can do any coding task directly reading files, writing code, running \
@@ -596,7 +691,8 @@ The worker is a specialized agent (see Worker Profile at the end of this \
prompt). It can ONLY do what its goal and tools allow. prompt). It can ONLY do what its goal and tools allow.
**Decision rule read the Worker Profile first:** **Decision rule read the Worker Profile first:**
- The user's request directly matches the worker's goal start_worker(task) - The user's request directly matches the worker's goal use \
run_agent_with_input(task) (if in staging) or load then run (if in building)
- Anything else do it yourself. Do NOT reframe user requests into \ - Anything else do it yourself. Do NOT reframe user requests into \
subtasks to justify delegation. subtasks to justify delegation.
- Building, modifying, or configuring agents is ALWAYS your job. Never \ - Building, modifying, or configuring agents is ALWAYS your job. Never \
@@ -604,16 +700,30 @@ delegate agent construction to the worker, even as a "research" subtask.
## When the user says "run", "execute", or "start" (without specifics) ## When the user says "run", "execute", or "start" (without specifics)
The loaded worker is described in the Worker Profile below. Ask what \ The loaded worker is described in the Worker Profile below. You MUST \
task or topic they want do NOT call list_agents() or list directories. \ ask the user what task or input they want using ask_user do NOT \
The worker is already loaded. Just ask for the input the worker needs \ invent a task, do NOT call list_agents() or list directories. \
(e.g., a research topic, a target domain, a job description). The worker is already loaded. Just ask for the specific input the \
worker needs (e.g., a research topic, a target domain, a job description). \
NEVER call run_agent_with_input until the user has provided their input.
If NO worker is loaded, say so and offer to build one. If NO worker is loaded, say so and offer to build one.
## When in staging mode (agent loaded, not running):
- Tell the user the agent is loaded and ready.
- For tasks matching the worker's goal: ALWAYS ask the user for their \
specific input BEFORE calling run_agent_with_input(task). NEVER make up \
or assume what the user wants. Use ask_user to collect the task details \
(e.g., topic, target, requirements). Once you have the user's answer, \
compose a structured task description from their input and call \
run_agent_with_input(task). The worker has no intake node it receives \
your task and starts processing.
- If the user wants to modify the agent, call stop_worker_and_edit().
## When idle (worker not running): ## When idle (worker not running):
- Greet the user. Mention what the worker can do in one sentence. - Greet the user. Mention what the worker can do in one sentence.
- For tasks matching the worker's goal, call start_worker(task). - For tasks matching the worker's goal, use run_agent_with_input(task) \
(if in staging) or load the agent first (if in building).
- For everything else, do it directly. - For everything else, do it directly.
## When the user clicks Run (external event notification) ## When the user clicks Run (external event notification)
@@ -625,24 +735,37 @@ explain the problem clearly and help fix it. For credential errors, \
guide the user to set up the missing credentials. For structural \ guide the user to set up the missing credentials. For structural \
issues, offer to fix the agent graph directly. issues, offer to fix the agent graph directly.
## When worker is running: ## When worker is running — GO SILENT
- If the user asks about progress, call get_worker_status() ONCE and \
report the result. Do NOT poll in a loop.
- NEVER call get_worker_status() repeatedly without user input in between. \
The worker will surface results through client-facing nodes. You do not \
need to monitor it. One check per user request is enough.
- If the user has a concern or instruction for the worker, call \
inject_worker_message(content) to relay it.
- You can still do coding tasks directly while the worker runs.
- If an escalation ticket arrives from the judge, assess severity:
- Low/transient: acknowledge silently, do not disturb the user.
- High/critical: notify the user with a brief analysis and suggested action.
- After starting the worker or checking its status, WAIT for the user's \
next message. Do not take autonomous actions unless the user asks.
## When worker asks user a question: Once you call start_worker(), your job is DONE. Do NOT call ask_user, \
- The system will route the user's response directly to the worker. \ do NOT call get_worker_status(), do NOT emit any text. Just stop. \
You do not need to relay it. The user will come back to you after responding. The worker owns the conversation now it has its own client-facing \
nodes that talk to the user directly.
**After start_worker, your ENTIRE response should be ONE short \
confirmation sentence with NO tool calls.** Example: \
"Started the vulnerability assessment." that's it. No ask_user, \
no get_worker_status, no follow-up questions.
You only wake up again when:
- The user explicitly addresses you (not answering a worker question)
- A worker question is forwarded to you for relay
- An escalation ticket arrives from the judge
- The worker finishes
If the user explicitly asks about progress, call get_worker_status() \
ONCE and report. Do NOT poll or check proactively.
For escalation tickets: low/transient acknowledge silently. \
High/critical notify the user with a brief analysis.
## When the worker asks the user a question:
- The user's answer is routed to you with context: \
[Worker asked: "...", Options: ...] User answered: "...".
- If the user is answering the worker's question normally, relay it \
using inject_worker_message(answer_text). Then go silent again.
- If the user is rejecting the approach, asking to stop, or giving \
you an instruction, handle it yourself do NOT relay.
## Showing or describing the loaded worker ## Showing or describing the loaded worker
@@ -658,16 +781,18 @@ building something new.
When the user asks to change, modify, or update the loaded worker \ When the user asks to change, modify, or update the loaded worker \
(e.g., "change the report node", "add a node", "delete node X"): (e.g., "change the report node", "add a node", "delete node X"):
1. Use the **Path** from the Worker Profile to locate the agent files. 1. Call stop_worker_and_edit() this stops the worker and gives you \
2. Read the relevant files (nodes/__init__.py, agent.py, etc.). coding tools (switches to BUILDING mode).
3. Make the requested changes using edit_file / write_file. 2. Use the **Path** from the Worker Profile to locate the agent files.
4. Run validation (default_agent.validate(), AgentRunner.load(), \ 3. Read the relevant files (nodes/__init__.py, agent.py, etc.).
4. Make the requested changes using edit_file / write_file.
5. Run validation (default_agent.validate(), AgentRunner.load(), \
validate_agent_tools()). validate_agent_tools()).
5. **Reload the modified worker**: call load_built_agent("{path}") \ 6. **Reload the modified worker**: call load_built_agent("{path}") \
so the changes take effect immediately. If a worker is already loaded, \ so the changes take effect immediately (switches to STAGING mode). \
stop it first, then reload. Then call run_agent_with_input(task) to restart execution.
Do NOT skip step 5 without reloading, the user will still be \ Do NOT skip step 6 without reloading, the user will still be \
interacting with the old version. interacting with the old version.
""" """
@@ -676,9 +801,9 @@ _queen_phase_7 = """
After building and verifying, load the agent into the current session: After building and verifying, load the agent into the current session:
load_built_agent("exports/{name}") load_built_agent("exports/{name}")
This makes the agent available immediately the user sees its graph, \ This switches to STAGING mode the user sees the agent's graph and \
the tab name updates, and you can delegate to it via start_worker(). \ the tab name updates. Then call run_agent_with_input(task) to start it. \
Do NOT tell the user to run `python -m {name} run` load it here. Do NOT tell the user to run `python -m {name} run` load and run it here.
""" """
_queen_style = """ _queen_style = """
@@ -808,21 +933,7 @@ queen_node = NodeSpec(
"User's intent is understood, coding tasks are completed correctly, " "User's intent is understood, coding tasks are completed correctly, "
"and the worker is managed effectively when delegated to." "and the worker is managed effectively when delegated to."
), ),
tools=_SHARED_TOOLS tools=sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS)),
+ [
# Worker lifecycle
"start_worker",
"stop_worker",
"get_worker_status",
"inject_worker_message",
# Monitoring
"get_worker_health_summary",
"notify_operator",
# Agent loading
"load_built_agent",
# Credentials
"list_credentials",
],
system_prompt=( system_prompt=(
"You are the Queen — the user's primary interface. You are a coding agent " "You are the Queen — the user's primary interface. You are a coding agent "
"with the same capabilities as the Hive Coder worker, PLUS the ability to " "with the same capabilities as the Hive Coder worker, PLUS the ability to "
@@ -836,20 +947,7 @@ queen_node = NodeSpec(
), ),
) )
ALL_QUEEN_TOOLS = _SHARED_TOOLS + [ ALL_QUEEN_TOOLS = sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS))
# Worker lifecycle
"start_worker",
"stop_worker",
"get_worker_status",
"inject_worker_message",
# Monitoring
"get_worker_health_summary",
"notify_operator",
# Agent loading
"load_built_agent",
# Credentials
"list_credentials",
]
__all__ = [ __all__ = [
"coder_node", "coder_node",
@@ -857,4 +955,7 @@ __all__ = [
"queen_node", "queen_node",
"ALL_QUEEN_TRIAGE_TOOLS", "ALL_QUEEN_TRIAGE_TOOLS",
"ALL_QUEEN_TOOLS", "ALL_QUEEN_TOOLS",
"_QUEEN_BUILDING_TOOLS",
"_QUEEN_STAGING_TOOLS",
"_QUEEN_RUNNING_TOOLS",
] ]
@@ -48,11 +48,11 @@ profile_setup → daily_intake → update_tracker → analyze_progress → gener
``` ```
`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work. `analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
**Good example** (3 nodes): **Good example** (2 nodes):
``` ```
intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back) process (autonomous: track + analyze + plan) → review (client-facing) → process (loop back)
``` ```
One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved. The queen handles intake (gathering requirements from the user) and passes the task via `run_agent_with_input(task)`. One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved. One client-facing node handles review/approval when needed.
12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges. 12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
@@ -109,3 +109,5 @@ def test_research_routes_back_to_interact(self):
25. **Manually wiring browser tools on event_loop nodes** — If the agent needs browser automation, use `node_type="gcu"` which auto-includes all browser tools and prepends best-practices guidance. Do NOT manually list browser tool names on event_loop nodes — they may not exist in the MCP server or may be incomplete. See the GCU Guide appendix. 25. **Manually wiring browser tools on event_loop nodes** — If the agent needs browser automation, use `node_type="gcu"` which auto-includes all browser tools and prepends best-practices guidance. Do NOT manually list browser tool names on event_loop nodes — they may not exist in the MCP server or may be incomplete. See the GCU Guide appendix.
26. **Using GCU nodes as regular graph nodes** — GCU nodes (`node_type="gcu"`) are exclusively subagents. They must ONLY appear in a parent node's `sub_agents=["gcu-node-id"]` list and be invoked via `delegate_to_sub_agent()`. They must NEVER be connected via edges, used as entry nodes, or used as terminal nodes. If a GCU node appears as an edge source or target, the graph will fail pre-load validation. 26. **Using GCU nodes as regular graph nodes** — GCU nodes (`node_type="gcu"`) are exclusively subagents. They must ONLY appear in a parent node's `sub_agents=["gcu-node-id"]` list and be invoked via `delegate_to_sub_agent()`. They must NEVER be connected via edges, used as entry nodes, or used as terminal nodes. If a GCU node appears as an edge source or target, the graph will fail pre-load validation.
27. **Adding a client-facing intake node to worker agents** — The queen owns intake. She defines the entry node's `input_keys` at build time and fills them via `run_agent_with_input(task)` at run time. Worker agents should start with an autonomous processing node, NOT a client-facing intake node that asks the user for requirements. Client-facing nodes in workers are for mid-execution review/approval only.
@@ -57,51 +57,28 @@ metadata = AgentMetadata()
from framework.graph import NodeSpec from framework.graph import NodeSpec
# Node 1: Intake (client-facing) # Node 1: Process (autonomous entry node)
intake_node = NodeSpec( # The queen handles intake and passes structured input via
id="intake", # run_agent_with_input(task). NO client-facing intake node.
name="Intake", # The queen defines input_keys at build time and fills them at run time.
description="Gather requirements from the user", process_node = NodeSpec(
id="process",
name="Process",
description="Execute the task using available tools",
node_type="event_loop", node_type="event_loop",
client_facing=True,
max_node_visits=0, # Unlimited for forever-alive max_node_visits=0, # Unlimited for forever-alive
input_keys=["topic"], input_keys=["user_request", "feedback"],
output_keys=["brief"],
success_criteria="The brief is specific and actionable.",
system_prompt="""\
You are an intake specialist.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If vague, ask 1-2 clarifying questions
3. If clear, confirm your understanding
**STEP 2 — After the user confirms, call set_output:**
- set_output("brief", "Clear description of what to do")
""",
tools=[],
)
# Node 2: Worker (autonomous)
worker_node = NodeSpec(
id="worker",
name="Worker",
description="Do the main work",
node_type="event_loop",
max_node_visits=0,
input_keys=["brief", "feedback"],
output_keys=["results"], output_keys=["results"],
nullable_output_keys=["feedback"], # Only on feedback edge nullable_output_keys=["feedback"], # Only on feedback edge
success_criteria="Results are complete and accurate.", success_criteria="Results are complete and accurate.",
system_prompt="""\ system_prompt="""\
You are a worker agent. Given a brief, do the work. You are a processing agent. Your task is in memory under "user_request". \
If "feedback" is present, this is a revision — address the feedback.
If feedback is provided, this is a follow-up — address the feedback.
Work in phases: Work in phases:
1. Use tools to gather/process data 1. Use tools to gather/process data
2. Analyze results 2. Analyze results
3. Call set_output for each key in a SEPARATE turn: 3. Call set_output in a SEPARATE turn:
- set_output("results", "structured results") - set_output("results", "structured results")
""", """,
tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"], tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
@@ -115,7 +92,7 @@ review_node = NodeSpec(
node_type="event_loop", node_type="event_loop",
client_facing=True, client_facing=True,
max_node_visits=0, max_node_visits=0,
input_keys=["results", "brief"], input_keys=["results", "user_request"],
output_keys=["next_action", "feedback"], output_keys=["next_action", "feedback"],
nullable_output_keys=["feedback"], nullable_output_keys=["feedback"],
success_criteria="User has reviewed and decided next steps.", success_criteria="User has reviewed and decided next steps.",
@@ -128,14 +105,14 @@ Present the results to the user.
3. Ask: satisfied, or want changes? 3. Ask: satisfied, or want changes?
**STEP 2 — After user responds, call set_output:** **STEP 2 — After user responds, call set_output:**
- set_output("next_action", "new_topic") — if starting fresh - set_output("next_action", "done") — if satisfied
- set_output("next_action", "revise") — if changes needed - set_output("next_action", "revise") — if changes needed
- set_output("feedback", "what to change") — only if revising - set_output("feedback", "what to change") — only if revising
""", """,
tools=[], tools=[],
) )
__all__ = ["intake_node", "worker_node", "review_node"] __all__ = ["process_node", "review_node"]
``` ```
## agent.py ## agent.py
@@ -155,7 +132,7 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata from .config import default_config, metadata
from .nodes import intake_node, worker_node, review_node from .nodes import process_node, review_node
# Goal definition # Goal definition
goal = Goal( goal = Goal(
@@ -172,27 +149,26 @@ goal = Goal(
) )
# Node list # Node list
nodes = [intake_node, worker_node, review_node] nodes = [process_node, review_node]
# Edge definitions # Edge definitions
edges = [ edges = [
EdgeSpec(id="intake-to-worker", source="intake", target="worker", EdgeSpec(id="process-to-review", source="process", target="review",
condition=EdgeCondition.ON_SUCCESS, priority=1), condition=EdgeCondition.ON_SUCCESS, priority=1),
EdgeSpec(id="worker-to-review", source="worker", target="review", # Feedback loop — revise results
condition=EdgeCondition.ON_SUCCESS, priority=1), EdgeSpec(id="review-to-process", source="review", target="process",
# Feedback loop
EdgeSpec(id="review-to-worker", source="review", target="worker",
condition=EdgeCondition.CONDITIONAL, condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'revise'", priority=2), condition_expr="str(next_action).lower() == 'revise'", priority=2),
# Loop back for new topic # Loop back for next task (queen sends new input)
EdgeSpec(id="review-to-intake", source="review", target="intake", EdgeSpec(id="review-done", source="review", target="process",
condition=EdgeCondition.CONDITIONAL, condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'new_topic'", priority=1), condition_expr="str(next_action).lower() == 'done'", priority=1),
] ]
# Graph configuration # Graph configuration — entry is the autonomous process node
entry_node = "intake" # The queen handles intake and passes the task via run_agent_with_input(task)
entry_points = {"start": "intake"} entry_node = "process"
entry_points = {"start": "process"}
pause_nodes = [] pause_nodes = []
terminal_nodes = [] # Forever-alive terminal_nodes = [] # Forever-alive
@@ -208,7 +184,7 @@ class MyAgent:
self.goal = goal self.goal = goal
self.nodes = nodes self.nodes = nodes
self.edges = edges self.edges = edges
self.entry_node = entry_node self.entry_node = entry_node # "process" — autonomous entry
self.entry_points = entry_points self.entry_points = entry_points
self.pause_nodes = pause_nodes self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes self.terminal_nodes = terminal_nodes
@@ -498,7 +474,7 @@ def tui():
llm = LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base) llm = LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
runtime = create_agent_runtime( runtime = create_agent_runtime(
graph=agent._build_graph(), goal=agent.goal, storage_path=storage, graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")], entry_points=[EntryPointSpec(id="start", name="Start", entry_node="process", trigger_type="manual", isolation_level="isolated")],
llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor()) llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
await runtime.start() await runtime.start()
try: try:
@@ -131,13 +131,19 @@ downstream node only sees the serialized summary string.
- A "report" node that presents analysis → merge into the client-facing node - A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove - A "confirm" or "schedule" node that doesn't call any external service → remove
**Typical agent structure (3 nodes):** **Typical agent structure (2 nodes):**
``` ```
intake (client-facing) ←→ process (autonomous) ←→ review (client-facing) process (autonomous) ←→ review (client-facing)
``` ```
Or for simpler agents, just 2 nodes: The queen owns intake — she gathers requirements from the user, then
passes structured input via `run_agent_with_input(task)`. When building
the agent, design the entry node's `input_keys` to match what the queen
will provide at run time. Worker agents should NOT have a client-facing
intake node. Client-facing nodes are for mid-execution review/approval only.
For simpler agents, just 1 autonomous node:
``` ```
interact (client-facing) → process (autonomous) → interact (loop) process (autonomous) — loops back to itself
``` ```
### nullable_output_keys ### nullable_output_keys
@@ -397,7 +403,7 @@ from .agent import (
### Reference Agent ### Reference Agent
See `exports/gmail_inbox_guardian/agent.py` for a complete example with: See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
- Primary client-facing intake node (user configures rules) - Primary client-facing node (user configures rules)
- Timer-based scheduled inbox checks (every 20 min) - Timer-based scheduled inbox checks (every 20 min)
- Webhook-triggered email event handling - Webhook-triggered email event handling
- Shared isolation for memory access across streams - Shared isolation for memory access across streams
@@ -413,13 +419,13 @@ See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
## Tool Discovery ## Tool Discovery
Do NOT rely on a static tool list — it will be outdated. Always use Do NOT rely on a static tool list — it will be outdated. Always use
`list_agent_tools()` to get available tool names grouped by category. `list_agent_tools()` to discover available tools, grouped by category.
For full schemas with parameter details, use `discover_mcp_tools()`.
``` ```
list_agent_tools() # all available tools list_agent_tools() # names + descriptions, all groups
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent list_agent_tools(output_schema="full") # include input_schema
discover_mcp_tools() # full schemas with params list_agent_tools(group="gmail") # only gmail_* tools
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
``` ```
After building, validate tools exist: `validate_agent_tools("exports/{name}")` After building, validate tools exist: `validate_agent_tools("exports/{name}")`
@@ -21,7 +21,7 @@ Do NOT use GCU for:
- Same underlying `EventLoopNode` class — no new imports needed - Same underlying `EventLoopNode` class — no new imports needed
- `tools=[]` is correct — tools are auto-populated at runtime - `tools=[]` is correct — tools are auto-populated at runtime
## GCU Architecture Pattern ## GCU Architecture Pattern
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges. GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+204 -39
View File
@@ -152,6 +152,72 @@ def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]
return compact return compact
def extract_tool_call_history(messages: list[Message], max_entries: int = 30) -> str:
"""Build a compact tool call history from a list of messages.
Used in compaction summaries to prevent the LLM from re-calling
tools it already called. Extracts tool call details, files saved,
outputs set, and errors encountered.
"""
tool_calls_detail: dict[str, list[str]] = {}
files_saved: list[str] = []
outputs_set: list[str] = []
errors: list[str] = []
def _summarize_input(name: str, args: dict) -> str:
if name == "web_search":
return args.get("query", "")
if name == "web_scrape":
return args.get("url", "")
if name in ("load_data", "save_data"):
return args.get("filename", "")
return ""
for msg in messages:
if msg.role == "assistant" and msg.tool_calls:
for tc in msg.tool_calls:
func = tc.get("function", {})
name = func.get("name", "unknown")
try:
args = json.loads(func.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
args = {}
summary = _summarize_input(name, args)
tool_calls_detail.setdefault(name, []).append(summary)
if name == "save_data" and args.get("filename"):
files_saved.append(args["filename"])
if name == "set_output" and args.get("key"):
outputs_set.append(args["key"])
if msg.role == "tool" and msg.is_error:
preview = msg.content[:120].replace("\n", " ")
errors.append(preview)
parts: list[str] = []
if tool_calls_detail:
lines: list[str] = []
for name, inputs in list(tool_calls_detail.items())[:max_entries]:
count = len(inputs)
non_empty = [s for s in inputs if s]
if non_empty:
detail_lines = [f" - {s[:120]}" for s in non_empty[:8]]
lines.append(f" {name} ({count}x):\n" + "\n".join(detail_lines))
else:
lines.append(f" {name} ({count}x)")
parts.append("TOOLS ALREADY CALLED:\n" + "\n".join(lines))
if files_saved:
unique = list(dict.fromkeys(files_saved))
parts.append("FILES SAVED: " + ", ".join(unique))
if outputs_set:
unique = list(dict.fromkeys(outputs_set))
parts.append("OUTPUTS SET: " + ", ".join(unique))
if errors:
parts.append("ERRORS (do NOT retry these):\n" + "\n".join(f" - {e}" for e in errors[:10]))
return "\n\n".join(parts)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ConversationStore protocol (Phase 2) # ConversationStore protocol (Phase 2)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -373,9 +439,36 @@ class NodeConversation:
def _repair_orphaned_tool_calls( def _repair_orphaned_tool_calls(
msgs: list[dict[str, Any]], msgs: list[dict[str, Any]],
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
"""Ensure every tool_call has a matching tool-result message.""" """Ensure tool_call / tool_result pairs are consistent.
1. **Orphaned tool results** (tool_result with no preceding tool_use)
are dropped. This happens when compaction removes an assistant
message but leaves its tool-result messages behind.
2. **Orphaned tool calls** (tool_use with no following tool_result)
get a synthetic error result appended. This happens when a loop
is cancelled mid-tool-execution.
"""
# Pass 1: collect all tool_call IDs from assistant messages so we
# can identify orphaned tool-result messages.
all_tool_call_ids: set[str] = set()
for m in msgs:
if m.get("role") == "assistant":
for tc in m.get("tool_calls") or []:
tc_id = tc.get("id")
if tc_id:
all_tool_call_ids.add(tc_id)
# Pass 2: build repaired list — drop orphaned tool results, patch
# missing tool results.
repaired: list[dict[str, Any]] = [] repaired: list[dict[str, Any]] = []
for i, m in enumerate(msgs): for i, m in enumerate(msgs):
# Drop tool-result messages whose tool_call_id has no matching
# tool_use in any assistant message (orphaned by compaction).
if m.get("role") == "tool":
tid = m.get("tool_call_id")
if tid and tid not in all_tool_call_ids:
continue # skip orphaned result
repaired.append(m) repaired.append(m)
tool_calls = m.get("tool_calls") tool_calls = m.get("tool_calls")
if m.get("role") != "assistant" or not tool_calls: if m.get("role") != "assistant" or not tool_calls:
@@ -653,6 +746,7 @@ class NodeConversation:
spillover_dir: str, spillover_dir: str,
keep_recent: int = 4, keep_recent: int = 4,
phase_graduated: bool = False, phase_graduated: bool = False,
aggressive: bool = False,
) -> None: ) -> None:
"""Structure-preserving compaction: save freeform text to file, keep tool messages. """Structure-preserving compaction: save freeform text to file, keep tool messages.
@@ -662,6 +756,11 @@ class NodeConversation:
after pruning. Only freeform text exchanges (user messages, after pruning. Only freeform text exchanges (user messages,
text-only assistant messages) are saved to a file and removed. text-only assistant messages) are saved to a file and removed.
When *aggressive* is True, non-essential tool call pairs are also
collapsed into a compact summary instead of being kept individually.
Only ``set_output`` calls and error results are preserved; all other
old tool pairs are replaced by a tool-call history summary.
The result: the agent retains exact knowledge of what tools it called, The result: the agent retains exact knowledge of what tools it called,
where each result is stored, and can load the conversation text if where each result is stored, and can load the conversation text if
needed. No LLM summary call. No heuristics. Nothing lost. needed. No LLM summary call. No heuristics. Nothing lost.
@@ -693,35 +792,91 @@ class NodeConversation:
# Classify old messages: structural (keep) vs freeform (save to file) # Classify old messages: structural (keep) vs freeform (save to file)
kept_structural: list[Message] = [] kept_structural: list[Message] = []
freeform_lines: list[str] = [] freeform_lines: list[str] = []
collapsed_msgs: list[Message] = []
for msg in old_messages: if aggressive:
if msg.role == "tool": # Aggressive: only keep set_output tool pairs and error results.
# Tool results — already pruned to ~30 tokens (file reference). # Everything else is collapsed into a tool-call history summary.
# Keep in conversation. # We need to track tool_call IDs to pair assistant messages with
kept_structural.append(msg) # their tool results.
elif msg.role == "assistant" and msg.tool_calls: protected_tc_ids: set[str] = set()
# Assistant message with tool_calls — keep the tool_calls collapsible_tc_ids: set[str] = set()
# with truncated arguments, clear the freeform text content.
compact_tcs = _compact_tool_calls(msg.tool_calls) # First pass: classify assistant messages
kept_structural.append( for msg in old_messages:
Message( if msg.role != "assistant" or not msg.tool_calls:
seq=msg.seq, continue
role=msg.role, has_protected = any(
content="", tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
) )
else: tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
# Freeform text (user messages, text-only assistant messages) if has_protected:
# — save to file and remove from conversation. protected_tc_ids |= tc_ids
role_label = msg.role else:
text = msg.content collapsible_tc_ids |= tc_ids
if len(text) > 2000:
text = text[:2000] + "" # Second pass: classify all messages
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}") for msg in old_messages:
if msg.role == "tool":
tc_id = msg.tool_use_id or ""
if tc_id in protected_tc_ids:
kept_structural.append(msg)
elif msg.is_error:
# Error results are always protected
kept_structural.append(msg)
# Protect the parent assistant message too
protected_tc_ids.add(tc_id)
else:
collapsed_msgs.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if tc_ids & protected_tc_ids:
# Has at least one protected tool call — keep entire msg
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
collapsed_msgs.append(msg)
else:
# Freeform text — save to file
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
else:
# Standard mode: keep all tool call pairs as structural
for msg in old_messages:
if msg.role == "tool":
kept_structural.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
# Write freeform text to a numbered conversation file # Write freeform text to a numbered conversation file
spill_path = Path(spillover_dir) spill_path = Path(spillover_dir)
@@ -741,13 +896,25 @@ class NodeConversation:
conv_filename = "" conv_filename = ""
# Build reference message # Build reference message
ref_parts: list[str] = []
if conv_filename: if conv_filename:
ref_content = ( ref_parts.append(
f"[Previous conversation saved to '{conv_filename}'. " f"[Previous conversation saved to '{conv_filename}'. "
f"Use load_data('{conv_filename}') to review if needed.]" f"Use load_data('{conv_filename}') to review if needed.]"
) )
else: elif not collapsed_msgs:
ref_content = "[Previous freeform messages compacted.]" ref_parts.append("[Previous freeform messages compacted.]")
# Aggressive: add collapsed tool-call history to the reference
if collapsed_msgs:
tool_history = extract_tool_call_history(collapsed_msgs)
if tool_history:
ref_parts.append(tool_history)
elif not ref_parts:
ref_parts.append("[Previous tool calls compacted.]")
ref_content = "\n\n".join(ref_parts)
# Use a seq just before the first kept message # Use a seq just before the first kept message
recent_messages = list(self._messages[split:]) recent_messages = list(self._messages[split:])
if kept_structural: if kept_structural:
@@ -760,15 +927,13 @@ class NodeConversation:
ref_msg = Message(seq=ref_seq, role="user", content=ref_content) ref_msg = Message(seq=ref_seq, role="user", content=ref_content)
# Persist: delete old messages from store, write reference + kept structural # Persist: delete old messages from store, write reference + kept structural.
# In aggressive mode, collapsed messages may be interspersed with kept
# messages, so we delete everything before the recent boundary and
# rewrite only what we want to keep.
if self._store: if self._store:
first_kept_seq = ( recent_boundary = recent_messages[0].seq if recent_messages else self._next_seq
kept_structural[0].seq await self._store.delete_parts_before(recent_boundary)
if kept_structural
else (recent_messages[0].seq if recent_messages else self._next_seq)
)
# Delete everything before the first structural message we're keeping
await self._store.delete_parts_before(first_kept_seq)
# Write the reference message # Write the reference message
await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict()) await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict())
# Write kept structural messages (they may have been modified) # Write kept structural messages (they may have been modified)
File diff suppressed because it is too large Load Diff
+167 -21
View File
@@ -138,6 +138,7 @@ class GraphExecutor:
accounts_prompt: str = "", accounts_prompt: str = "",
accounts_data: list[dict] | None = None, accounts_data: list[dict] | None = None,
tool_provider_map: dict[str, str] | None = None, tool_provider_map: dict[str, str] | None = None,
dynamic_tools_provider: Callable | None = None,
): ):
""" """
Initialize the executor. Initialize the executor.
@@ -160,6 +161,8 @@ class GraphExecutor:
accounts_prompt: Connected accounts block for system prompt injection accounts_prompt: Connected accounts block for system prompt injection
accounts_data: Raw account data for per-node prompt generation accounts_data: Raw account data for per-node prompt generation
tool_provider_map: Tool name to provider name mapping for account routing tool_provider_map: Tool name to provider name mapping for account routing
dynamic_tools_provider: Optional callback returning current
tool list (for mode switching)
""" """
self.runtime = runtime self.runtime = runtime
self.llm = llm self.llm = llm
@@ -178,6 +181,7 @@ class GraphExecutor:
self.accounts_prompt = accounts_prompt self.accounts_prompt = accounts_prompt
self.accounts_data = accounts_data self.accounts_data = accounts_data
self.tool_provider_map = tool_provider_map self.tool_provider_map = tool_provider_map
self.dynamic_tools_provider = dynamic_tools_provider
# Initialize output cleaner # Initialize output cleaner
self.cleansing_config = cleansing_config or CleansingConfig() self.cleansing_config = cleansing_config or CleansingConfig()
@@ -286,6 +290,125 @@ class GraphExecutor:
return errors return errors
# Max chars of formatted messages before proactively splitting for LLM.
_PHASE_LLM_CHAR_LIMIT = 240_000
_PHASE_LLM_MAX_DEPTH = 10
async def _phase_llm_compact(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int = 0,
) -> str:
"""Summarise messages for phase-boundary compaction.
Uses the same recursive binary-search splitting as EventLoopNode.
"""
from framework.graph.conversation import extract_tool_call_history
from framework.graph.event_loop_node import _is_context_too_large_error
if _depth > self._PHASE_LLM_MAX_DEPTH:
raise RuntimeError("Phase LLM compaction recursion limit")
# Format messages
lines: list[str] = []
for m in messages:
if m.role == "tool":
c = m.content[:500] + ("..." if len(m.content) > 500 else "")
lines.append(f"[tool result]: {c}")
elif m.role == "assistant" and m.tool_calls:
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
lines.append(
f"[assistant (calls: {', '.join(names)})]: "
f"{m.content[:200] if m.content else ''}"
)
else:
lines.append(f"[{m.role}]: {m.content}")
formatted = "\n\n".join(lines)
# Proactive split
if len(formatted) > self._PHASE_LLM_CHAR_LIMIT and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
max_tokens = getattr(conversation, "_max_history_tokens", 32000)
target_tokens = max_tokens // 2
target_chars = target_tokens * 4
prompt = (
"You are compacting an AI agent's conversation history "
"at a phase boundary.\n\n"
f"NEXT PHASE: {next_spec.name}\n"
)
if next_spec.description:
prompt += f"NEXT PHASE PURPOSE: {next_spec.description}\n"
prompt += (
f"\nCONVERSATION MESSAGES:\n{formatted}\n\n"
"INSTRUCTIONS:\n"
f"Write a summary of approximately {target_chars} characters "
f"(~{target_tokens} tokens).\n"
"Preserve user-stated rules, constraints, and preferences "
"verbatim. Preserve key decisions and results from earlier "
"phases. Preserve context needed for the next phase.\n"
)
summary_budget = max(1024, max_tokens // 2)
try:
response = await self._llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system=(
"You are a conversation compactor. Write a detailed "
"summary preserving context for the next phase."
),
max_tokens=summary_budget,
)
summary = response.content
except Exception as e:
if _is_context_too_large_error(e) and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
raise
# Append tool history at top level only
if _depth == 0:
tool_history = extract_tool_call_history(messages)
if tool_history and "TOOLS ALREADY CALLED" not in summary:
summary += "\n\n" + tool_history
return summary
async def _phase_llm_compact_split(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int,
) -> str:
"""Split messages in half and summarise each half."""
mid = max(1, len(messages) // 2)
s1 = await self._phase_llm_compact(
conversation,
next_spec,
messages[:mid],
_depth + 1,
)
s2 = await self._phase_llm_compact(
conversation,
next_spec,
messages[mid:],
_depth + 1,
)
return s1 + "\n\n" + s2
async def execute( async def execute(
self, self,
graph: GraphSpec, graph: GraphSpec,
@@ -1291,9 +1414,7 @@ class GraphExecutor:
# Set current phase for phase-aware compaction # Set current phase for phase-aware compaction
continuous_conversation.set_current_phase(next_spec.id) continuous_conversation.set_current_phase(next_spec.id)
# Opportunistic compaction at transition: # Phase-boundary compaction (same flow as EventLoopNode._compact)
# 1. Prune old tool results (free, no LLM call)
# 2. If still over 80%, do a phase-graduated compact
if continuous_conversation.usage_ratio() > 0.5: if continuous_conversation.usage_ratio() > 0.5:
await continuous_conversation.prune_old_tool_results( await continuous_conversation.prune_old_tool_results(
protect_tokens=2000, protect_tokens=2000,
@@ -1307,38 +1428,62 @@ class GraphExecutor:
_data_dir = ( _data_dir = (
str(self._storage_path / "data") if self._storage_path else None str(self._storage_path / "data") if self._storage_path else None
) )
# Step 1: Structural compaction (>=80%)
if _data_dir: if _data_dir:
_pre = continuous_conversation.usage_ratio()
await continuous_conversation.compact_preserving_structure( await continuous_conversation.compact_preserving_structure(
spillover_dir=_data_dir, spillover_dir=_data_dir,
keep_recent=4, keep_recent=4,
phase_graduated=True, phase_graduated=True,
) )
# Circuit breaker: if still over budget, fall back if continuous_conversation.usage_ratio() >= 0.9 * _pre:
_post_ratio = continuous_conversation.usage_ratio() await continuous_conversation.compact_preserving_structure(
if _post_ratio >= 0.9 * _phase_ratio: spillover_dir=_data_dir,
self.logger.warning(
" Structure-preserving compaction ineffective "
"(%.0f%% -> %.0f%%), falling back to summary",
_phase_ratio * 100,
_post_ratio * 100,
)
summary = (
f"Summary of earlier phases (before {next_spec.name}). "
"See transition markers for phase details."
)
await continuous_conversation.compact(
summary,
keep_recent=4, keep_recent=4,
phase_graduated=True, phase_graduated=True,
aggressive=True,
) )
else:
# Step 2: LLM compaction (>95%)
if (
continuous_conversation.usage_ratio() > 0.95
and self._llm is not None
):
self.logger.info(
" LLM phase-boundary compaction (%.0f%% usage)",
continuous_conversation.usage_ratio() * 100,
)
try:
_llm_summary = await self._phase_llm_compact(
continuous_conversation,
next_spec,
list(continuous_conversation.messages),
)
await continuous_conversation.compact(
_llm_summary,
keep_recent=2,
phase_graduated=True,
)
except Exception as e:
self.logger.warning(
" Phase LLM compaction failed: %s",
e,
)
# Step 3: Emergency (only if still over budget)
if continuous_conversation.needs_compaction():
self.logger.warning(
" Emergency phase compaction (%.0f%%)",
continuous_conversation.usage_ratio() * 100,
)
summary = ( summary = (
f"Summary of earlier phases (before {next_spec.name}). " f"Summary of earlier phases "
f"(before {next_spec.name}). "
"See transition markers for phase details." "See transition markers for phase details."
) )
await continuous_conversation.compact( await continuous_conversation.compact(
summary, summary,
keep_recent=4, keep_recent=1,
phase_graduated=True, phase_graduated=True,
) )
@@ -1651,6 +1796,7 @@ class GraphExecutor:
node_registry=node_registry or {}, node_registry=node_registry or {},
all_tools=list(self.tools), # Full catalog for subagent tool resolution all_tools=list(self.tools), # Full catalog for subagent tool resolution
shared_node_registry=self.node_registry, # For subagent escalation routing shared_node_registry=self.node_registry, # For subagent escalation routing
dynamic_tools_provider=self.dynamic_tools_provider,
) )
VALID_NODE_TYPES = { VALID_NODE_TYPES = {
+5
View File
@@ -544,6 +544,11 @@ class NodeContext:
# the inject_input() routing chain can find. # the inject_input() routing chain can find.
shared_node_registry: dict[str, Any] = field(default_factory=dict) shared_node_registry: dict[str, Any] = field(default_factory=dict)
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
# list from this callback at the start of each iteration. Used by
# the queen to switch between building-mode and running-mode tools.
dynamic_tools_provider: Any = None # Callable[[], list[Tool]] | None
@dataclass @dataclass
class NodeResult: class NodeResult:
+14 -2
View File
@@ -137,6 +137,9 @@ class EventType(StrEnum):
WORKER_LOADED = "worker_loaded" WORKER_LOADED = "worker_loaded"
CREDENTIALS_REQUIRED = "credentials_required" CREDENTIALS_REQUIRED = "credentials_required"
# Queen mode changes (building ↔ running)
QUEEN_MODE_CHANGED = "queen_mode_changed"
# Subagent reports (one-way progress updates from sub-agents) # Subagent reports (one-way progress updates from sub-agents)
SUBAGENT_REPORT = "subagent_report" SUBAGENT_REPORT = "subagent_report"
@@ -715,15 +718,24 @@ class EventBus:
node_id: str, node_id: str,
prompt: str = "", prompt: str = "",
execution_id: str | None = None, execution_id: str | None = None,
options: list[str] | None = None,
) -> None: ) -> None:
"""Emit client input requested event (client_facing=True nodes).""" """Emit client input requested event (client_facing=True nodes).
Args:
options: Optional predefined choices for the user (1-3 items).
The frontend appends an "Other" free-text option automatically.
"""
data: dict[str, Any] = {"prompt": prompt}
if options:
data["options"] = options
await self.publish( await self.publish(
AgentEvent( AgentEvent(
type=EventType.CLIENT_INPUT_REQUESTED, type=EventType.CLIENT_INPUT_REQUESTED,
stream_id=stream_id, stream_id=stream_id,
node_id=node_id, node_id=node_id,
execution_id=execution_id, execution_id=execution_id,
data={"prompt": prompt}, data=data,
) )
) )
+9 -2
View File
@@ -511,9 +511,11 @@ class ExecutionStream:
logger.debug(f"Queued execution {execution_id} for stream {self.stream_id}") logger.debug(f"Queued execution {execution_id} for stream {self.stream_id}")
return execution_id return execution_id
# Errors that indicate a fundamental configuration or environment problem. # Errors that indicate resurrection won't help — the same error will recur.
# Resurrecting after these is pointless — the same error will recur. # Includes both configuration/environment errors and deterministic node
# failures where the conversation/state hasn't changed.
_FATAL_ERROR_PATTERNS: tuple[str, ...] = ( _FATAL_ERROR_PATTERNS: tuple[str, ...] = (
# Configuration / environment
"credential", "credential",
"authentication", "authentication",
"unauthorized", "unauthorized",
@@ -525,6 +527,11 @@ class ExecutionStream:
"permission denied", "permission denied",
"invalid api", "invalid api",
"configuration error", "configuration error",
# Deterministic node failures — resurrecting at the same node with
# the same conversation produces the same result.
"node stalled",
"ghost empty stream",
"max iterations",
) )
@classmethod @classmethod
+25
View File
@@ -38,6 +38,7 @@ DEFAULT_EVENT_TYPES = [
EventType.WORKER_LOADED, EventType.WORKER_LOADED,
EventType.CREDENTIALS_REQUIRED, EventType.CREDENTIALS_REQUIRED,
EventType.SUBAGENT_REPORT, EventType.SUBAGENT_REPORT,
EventType.QUEEN_MODE_CHANGED,
] ]
# Keepalive interval in seconds # Keepalive interval in seconds
@@ -91,6 +92,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
"node_loop_started", "node_loop_started",
"credentials_required", "credentials_required",
"worker_loaded", "worker_loaded",
"queen_mode_changed",
} }
client_disconnected = asyncio.Event() client_disconnected = asyncio.Event()
@@ -130,6 +132,29 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
"SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types) "SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types)
) )
# Replay buffered events that were published before this SSE connected.
# The EventBus keeps a history ring-buffer; we replay the subset that
# produces visible chat messages so the frontend never misses early
# queen output. Lifecycle events are NOT replayed to avoid duplicate
# state transitions (turn counter increments, etc.).
_REPLAY_TYPES = {
EventType.CLIENT_OUTPUT_DELTA.value,
EventType.EXECUTION_STARTED.value,
EventType.CLIENT_INPUT_REQUESTED.value,
}
event_type_values = {et.value for et in event_types}
replay_types = _REPLAY_TYPES & event_type_values
replayed = 0
for past_event in event_bus._event_history:
if past_event.type.value in replay_types:
try:
queue.put_nowait(past_event.to_dict())
replayed += 1
except asyncio.QueueFull:
break
if replayed:
logger.info("SSE replayed %d buffered events for session='%s'", replayed, session.id)
event_count = 0 event_count = 0
close_reason = "unknown" close_reason = "unknown"
try: try:
+50
View File
@@ -64,6 +64,16 @@ async def handle_trigger(request: web.Request) -> web.Response:
session_state=session_state, session_state=session_state,
) )
# Cancel queen's in-progress LLM turn so it picks up the mode change cleanly
if session.queen_executor:
node = session.queen_executor.node_registry.get("queen")
if node and hasattr(node, "cancel_current_turn"):
node.cancel_current_turn()
# Switch queen to running mode (mirrors run_agent_with_input tool behavior)
if session.mode_state is not None:
await session.mode_state.switch_to_running(source="frontend")
return web.json_response({"execution_id": execution_id}) return web.json_response({"execution_id": execution_id})
@@ -124,6 +134,35 @@ async def handle_chat(request: web.Request) -> web.Response:
return web.json_response({"error": "Queen not available"}, status=503) return web.json_response({"error": "Queen not available"}, status=503)
async def handle_queen_context(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/queen-context — queue context for the queen.
Unlike /chat, this does NOT trigger an LLM response. The message is
queued in the queen's injection queue and will be drained on her next
natural iteration (prefixed with [External event]:).
Body: {"message": "..."}
"""
session, err = resolve_session(request)
if err:
return err
body = await request.json()
message = body.get("message", "")
if not message:
return web.json_response({"error": "message is required"}, status=400)
queen_executor = session.queen_executor
if queen_executor is not None:
node = queen_executor.node_registry.get("queen")
if node is not None and hasattr(node, "inject_event"):
await node.inject_event(message, is_client_input=False)
return web.json_response({"status": "queued", "delivered": True})
return web.json_response({"error": "Queen not available"}, status=503)
async def handle_worker_input(request: web.Request) -> web.Response: async def handle_worker_input(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/worker-input — send input to waiting worker node. """POST /api/sessions/{session_id}/worker-input — send input to waiting worker node.
@@ -282,6 +321,16 @@ async def handle_stop(request: web.Request) -> web.Response:
cancelled = await stream.cancel_execution(execution_id) cancelled = await stream.cancel_execution(execution_id)
if cancelled: if cancelled:
# Cancel queen's in-progress LLM turn
if session.queen_executor:
node = session.queen_executor.node_registry.get("queen")
if node and hasattr(node, "cancel_current_turn"):
node.cancel_current_turn()
# Switch to staging (agent still loaded, ready to re-run)
if session.mode_state is not None:
await session.mode_state.switch_to_staging(source="frontend")
return web.json_response( return web.json_response(
{ {
"stopped": True, "stopped": True,
@@ -365,6 +414,7 @@ def register_routes(app: web.Application) -> None:
app.router.add_post("/api/sessions/{session_id}/trigger", handle_trigger) app.router.add_post("/api/sessions/{session_id}/trigger", handle_trigger)
app.router.add_post("/api/sessions/{session_id}/inject", handle_inject) app.router.add_post("/api/sessions/{session_id}/inject", handle_inject)
app.router.add_post("/api/sessions/{session_id}/chat", handle_chat) app.router.add_post("/api/sessions/{session_id}/chat", handle_chat)
app.router.add_post("/api/sessions/{session_id}/queen-context", handle_queen_context)
app.router.add_post("/api/sessions/{session_id}/worker-input", handle_worker_input) app.router.add_post("/api/sessions/{session_id}/worker-input", handle_worker_input)
app.router.add_post("/api/sessions/{session_id}/pause", handle_stop) app.router.add_post("/api/sessions/{session_id}/pause", handle_stop)
app.router.add_post("/api/sessions/{session_id}/resume", handle_resume) app.router.add_post("/api/sessions/{session_id}/resume", handle_resume)
+2
View File
@@ -48,6 +48,7 @@ def _get_manager(request: web.Request) -> SessionManager:
def _session_to_live_dict(session) -> dict: def _session_to_live_dict(session) -> dict:
"""Serialize a live Session to the session-primary JSON shape.""" """Serialize a live Session to the session-primary JSON shape."""
info = session.worker_info info = session.worker_info
mode_state = getattr(session, "mode_state", None)
return { return {
"session_id": session.id, "session_id": session.id,
"worker_id": session.worker_id, "worker_id": session.worker_id,
@@ -60,6 +61,7 @@ def _session_to_live_dict(session) -> dict:
"loaded_at": session.loaded_at, "loaded_at": session.loaded_at,
"uptime_seconds": round(time.time() - session.loaded_at, 1), "uptime_seconds": round(time.time() - session.loaded_at, 1),
"intro_message": getattr(session.runner, "intro_message", "") or "", "intro_message": getattr(session.runner, "intro_message", "") or "",
"queen_mode": mode_state.mode if mode_state else "building",
} }
+68 -5
View File
@@ -40,6 +40,8 @@ class Session:
runner: Any | None = None # AgentRunner runner: Any | None = None # AgentRunner
worker_runtime: Any | None = None # AgentRuntime worker_runtime: Any | None = None # AgentRuntime
worker_info: Any | None = None # AgentInfo worker_info: Any | None = None # AgentInfo
# Queen mode state (building/staging/running)
mode_state: Any = None # QueenModeState
# Judge (active when worker is loaded) # Judge (active when worker is loaded)
judge_task: asyncio.Task | None = None judge_task: asyncio.Task | None = None
escalation_sub: str | None = None escalation_sub: str | None = None
@@ -425,16 +427,26 @@ class SessionManager:
except Exception: except Exception:
logger.warning("Queen: MCP config failed to load", exc_info=True) logger.warning("Queen: MCP config failed to load", exc_info=True)
# Mode state for building/running mode switching
from framework.tools.queen_lifecycle_tools import (
QueenModeState,
register_queen_lifecycle_tools,
)
# Start in staging when the caller provided an agent, building otherwise.
initial_mode = "staging" if worker_identity else "building"
mode_state = QueenModeState(mode=initial_mode, event_bus=session.event_bus)
session.mode_state = mode_state
# Always register lifecycle tools — they check session.worker_runtime # Always register lifecycle tools — they check session.worker_runtime
# at call time, so they work even if no worker is loaded yet. # at call time, so they work even if no worker is loaded yet.
from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools
register_queen_lifecycle_tools( register_queen_lifecycle_tools(
queen_registry, queen_registry,
session=session, session=session,
session_id=session.id, session_id=session.id,
session_manager=self, session_manager=self,
manager_session_id=session.id, manager_session_id=session.id,
mode_state=mode_state,
) )
# Monitoring tools need concrete worker paths — only register when present # Monitoring tools need concrete worker paths — only register when present
@@ -452,6 +464,32 @@ class SessionManager:
queen_tools = list(queen_registry.get_tools().values()) queen_tools = list(queen_registry.get_tools().values())
queen_tool_executor = queen_registry.get_executor() queen_tool_executor = queen_registry.get_executor()
# Partition tools into mode-specific sets
from framework.agents.hive_coder.nodes import (
_QUEEN_BUILDING_TOOLS,
_QUEEN_RUNNING_TOOLS,
_QUEEN_STAGING_TOOLS,
)
building_names = set(_QUEEN_BUILDING_TOOLS)
staging_names = set(_QUEEN_STAGING_TOOLS)
running_names = set(_QUEEN_RUNNING_TOOLS)
registered_names = {t.name for t in queen_tools}
missing_building = building_names - registered_names
if missing_building:
logger.warning(
"Queen: %d/%d building tools NOT registered: %s",
len(missing_building),
len(building_names),
sorted(missing_building),
)
logger.info("Queen: registered tools: %s", sorted(registered_names))
mode_state.building_tools = [t for t in queen_tools if t.name in building_names]
mode_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
mode_state.running_tools = [t for t in queen_tools if t.name in running_names]
# Build queen graph with adjusted prompt + tools # Build queen graph with adjusted prompt + tools
_orig_node = _queen_graph.nodes[0] _orig_node = _queen_graph.nodes[0]
base_prompt = _orig_node.system_prompt or "" base_prompt = _orig_node.system_prompt or ""
@@ -493,12 +531,37 @@ class SessionManager:
storage_path=queen_dir, storage_path=queen_dir,
loop_config=queen_graph.loop_config, loop_config=queen_graph.loop_config,
execution_id=session.id, execution_id=session.id,
dynamic_tools_provider=mode_state.get_current_tools,
) )
session.queen_executor = executor session.queen_executor = executor
# Wire inject_notification so mode switches notify the queen LLM
async def _inject_mode_notification(content: str) -> None:
node = executor.node_registry.get("queen")
if node is not None and hasattr(node, "inject_event"):
await node.inject_event(content)
mode_state.inject_notification = _inject_mode_notification
# Auto-switch to staging when worker execution finishes naturally
from framework.runtime.event_bus import EventType as _ET
async def _on_worker_done(event):
if event.stream_id == "queen":
return
if mode_state.mode == "running":
await mode_state.switch_to_staging(source="auto")
session.event_bus.subscribe(
event_types=[_ET.EXECUTION_COMPLETED, _ET.EXECUTION_FAILED],
handler=_on_worker_done,
)
logger.info( logger.info(
"Queen starting with %d tools: %s", "Queen starting in %s mode with %d tools: %s",
len(queen_tools), mode_state.mode,
[t.name for t in queen_tools], len(mode_state.get_current_tools()),
[t.name for t in mode_state.get_current_tools()],
) )
result = await executor.execute( result = await executor.execute(
graph=queen_graph, graph=queen_graph,
+329 -3
View File
@@ -36,7 +36,7 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
@@ -66,6 +66,125 @@ class WorkerSessionAdapter:
worker_path: Path | None = None worker_path: Path | None = None
@dataclass
class QueenModeState:
"""Mutable state container for queen operating mode.
Three modes: building staging running.
Shared between the dynamic_tools_provider callback and tool handlers
that trigger mode transitions.
"""
mode: str = "building" # "building", "staging", or "running"
building_tools: list = field(default_factory=list) # list[Tool]
staging_tools: list = field(default_factory=list) # list[Tool]
running_tools: list = field(default_factory=list) # list[Tool]
inject_notification: Any = None # async (str) -> None
event_bus: Any = None # EventBus — for emitting QUEEN_MODE_CHANGED events
def get_current_tools(self) -> list:
"""Return tools for the current mode."""
if self.mode == "running":
return list(self.running_tools)
if self.mode == "staging":
return list(self.staging_tools)
return list(self.building_tools)
async def _emit_mode_event(self) -> None:
"""Publish a QUEEN_MODE_CHANGED event so the frontend updates the tag."""
if self.event_bus is not None:
await self.event_bus.publish(
AgentEvent(
type=EventType.QUEEN_MODE_CHANGED,
stream_id="queen",
data={"mode": self.mode},
)
)
async def switch_to_running(self, source: str = "tool") -> None:
"""Switch to running mode and notify the queen.
Args:
source: Who triggered the switch "tool" (queen LLM),
"frontend" (user clicked Run), or "auto" (system).
"""
if self.mode == "running":
return
self.mode = "running"
tool_names = [t.name for t in self.running_tools]
logger.info("Queen mode → running (source=%s, tools: %s)", source, tool_names)
await self._emit_mode_event()
if self.inject_notification:
if source == "frontend":
msg = (
"[MODE CHANGE] The user clicked Run in the UI. Switched to RUNNING mode. "
"Worker is now executing. You have monitoring/lifecycle tools: "
+ ", ".join(tool_names)
+ "."
)
else:
msg = (
"[MODE CHANGE] Switched to RUNNING mode. "
"Worker is executing. You now have monitoring/lifecycle tools: "
+ ", ".join(tool_names)
+ "."
)
await self.inject_notification(msg)
async def switch_to_staging(self, source: str = "tool") -> None:
"""Switch to staging mode and notify the queen.
Args:
source: Who triggered the switch "tool", "frontend", or "auto".
"""
if self.mode == "staging":
return
self.mode = "staging"
tool_names = [t.name for t in self.staging_tools]
logger.info("Queen mode → staging (source=%s, tools: %s)", source, tool_names)
await self._emit_mode_event()
if self.inject_notification:
if source == "frontend":
msg = (
"[MODE CHANGE] The user stopped the worker from the UI. "
"Switched to STAGING mode. Agent is still loaded. "
"Available tools: " + ", ".join(tool_names) + "."
)
elif source == "auto":
msg = (
"[MODE CHANGE] Worker execution completed. Switched to STAGING mode. "
"Agent is still loaded. Call run_agent_with_input(task) to run again. "
"Available tools: " + ", ".join(tool_names) + "."
)
else:
msg = (
"[MODE CHANGE] Switched to STAGING mode. "
"Agent loaded and ready. Call run_agent_with_input(task) to start, "
"or stop_worker_and_edit() to go back to building. "
"Available tools: " + ", ".join(tool_names) + "."
)
await self.inject_notification(msg)
async def switch_to_building(self, source: str = "tool") -> None:
"""Switch to building mode and notify the queen.
Args:
source: Who triggered the switch "tool", "frontend", or "auto".
"""
if self.mode == "building":
return
self.mode = "building"
tool_names = [t.name for t in self.building_tools]
logger.info("Queen mode → building (source=%s, tools: %s)", source, tool_names)
await self._emit_mode_event()
if self.inject_notification:
await self.inject_notification(
"[MODE CHANGE] Switched to BUILDING mode. "
"Lifecycle tools removed. Full coding tools restored. "
"Call load_built_agent(path) when ready to stage."
)
def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str: def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str:
"""Build a worker capability profile from its graph/goal definition. """Build a worker capability profile from its graph/goal definition.
@@ -120,6 +239,8 @@ def register_queen_lifecycle_tools(
# Server context — enables load_built_agent tool # Server context — enables load_built_agent tool
session_manager: Any = None, session_manager: Any = None,
manager_session_id: str | None = None, manager_session_id: str | None = None,
# Mode switching
mode_state: QueenModeState | None = None,
) -> int: ) -> int:
"""Register queen lifecycle tools. """Register queen lifecycle tools.
@@ -136,6 +257,9 @@ def register_queen_lifecycle_tools(
for ``load_built_agent`` to hot-load a worker. for ``load_built_agent`` to hot-load a worker.
manager_session_id: (Server only) The session's ID in the manager, manager_session_id: (Server only) The session's ID in the manager,
used with ``session_manager.load_worker()``. used with ``session_manager.load_worker()``.
mode_state: (Optional) Mutable mode state for building/running
mode switching. When provided, load_built_agent switches to
running mode and stop_worker_and_edit switches to building mode.
Returns the number of tools registered. Returns the number of tools registered.
""" """
@@ -343,6 +467,75 @@ def register_queen_lifecycle_tools(
registry.register("stop_worker", _stop_tool, lambda inputs: stop_worker()) registry.register("stop_worker", _stop_tool, lambda inputs: stop_worker())
tools_registered += 1 tools_registered += 1
# --- stop_worker_and_edit -------------------------------------------------
async def stop_worker_and_edit() -> str:
"""Stop the worker and switch to building mode for editing the agent."""
stop_result = await stop_worker()
# Switch to building mode
if mode_state is not None:
await mode_state.switch_to_building()
result = json.loads(stop_result)
result["mode"] = "building"
result["message"] = (
"Worker stopped. You are now in building mode. "
"Use your coding tools to modify the agent, then call "
"load_built_agent(path) to stage it again."
)
return json.dumps(result)
_stop_edit_tool = Tool(
name="stop_worker_and_edit",
description=(
"Stop the running worker and switch to building mode. "
"Use this when you need to modify the agent's code, nodes, or configuration. "
"After editing, call load_built_agent(path) to reload and run."
),
parameters={"type": "object", "properties": {}},
)
registry.register(
"stop_worker_and_edit", _stop_edit_tool, lambda inputs: stop_worker_and_edit()
)
tools_registered += 1
# --- stop_worker (Running → Staging) -------------------------------------
async def stop_worker_to_staging() -> str:
"""Stop the running worker and switch to staging mode.
After stopping, ask the user whether they want to:
1. Re-run the agent with new input call run_agent_with_input(task)
2. Edit the agent code call stop_worker_and_edit() to go to building mode
"""
stop_result = await stop_worker()
# Switch to staging mode
if mode_state is not None:
await mode_state.switch_to_staging()
result = json.loads(stop_result)
result["mode"] = "staging"
result["message"] = (
"Worker stopped. You are now in staging mode. "
"Ask the user: would they like to re-run with new input, "
"or edit the agent code?"
)
return json.dumps(result)
_stop_worker_tool = Tool(
name="stop_worker",
description=(
"Stop the running worker and switch to staging mode. "
"After stopping, ask the user whether they want to re-run "
"with new input or edit the agent code."
),
parameters={"type": "object", "properties": {}},
)
registry.register("stop_worker", _stop_worker_tool, lambda inputs: stop_worker_to_staging())
tools_registered += 1
# --- get_worker_status ---------------------------------------------------- # --- get_worker_status ----------------------------------------------------
def _get_event_bus(): def _get_event_bus():
@@ -648,7 +841,7 @@ def register_queen_lifecycle_tools(
injectable = stream.get_injectable_nodes() injectable = stream.get_injectable_nodes()
if injectable: if injectable:
target_node_id = injectable[0]["node_id"] target_node_id = injectable[0]["node_id"]
ok = await stream.inject_input(target_node_id, content) ok = await stream.inject_input(target_node_id, content, is_client_input=True)
if ok: if ok:
return json.dumps( return json.dumps(
{ {
@@ -818,11 +1011,24 @@ def register_queen_lifecycle_tools(
str(resolved_path), str(resolved_path),
) )
info = updated_session.worker_info info = updated_session.worker_info
# Switch to staging mode after successful load
if mode_state is not None:
await mode_state.switch_to_staging()
worker_name = info.name if info else updated_session.worker_id
return json.dumps( return json.dumps(
{ {
"status": "loaded", "status": "loaded",
"mode": "staging",
"message": (
f"Successfully loaded '{worker_name}'. "
"You are now in STAGING mode. "
"Call run_agent_with_input(task) to start the worker, "
"or stop_worker_and_edit() to go back to building."
),
"worker_id": updated_session.worker_id, "worker_id": updated_session.worker_id,
"worker_name": info.name if info else updated_session.worker_id, "worker_name": worker_name,
"goal": info.goal_name if info else "", "goal": info.goal_name if info else "",
"node_count": info.node_count if info else 0, "node_count": info.node_count if info else 0,
} }
@@ -857,5 +1063,125 @@ def register_queen_lifecycle_tools(
) )
tools_registered += 1 tools_registered += 1
# --- run_agent_with_input ------------------------------------------------
async def run_agent_with_input(task: str) -> str:
"""Run the loaded worker agent with the given task input.
Performs preflight checks (credentials, MCP resync), triggers the
worker's default entry point, and switches to running mode.
"""
runtime = _get_runtime()
if runtime is None:
return json.dumps({"error": "No worker loaded in this session."})
try:
# Pre-flight: validate credentials and resync MCP servers.
loop = asyncio.get_running_loop()
async def _preflight():
cred_error: CredentialError | None = None
try:
await loop.run_in_executor(
None,
lambda: validate_credentials(
runtime.graph.nodes,
interactive=False,
skip=False,
),
)
except CredentialError as e:
cred_error = e
runner = getattr(session, "runner", None)
if runner:
try:
await loop.run_in_executor(
None,
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
)
except Exception as e:
logger.warning("MCP resync failed: %s", e)
if cred_error is not None:
raise cred_error
try:
await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
except TimeoutError:
logger.warning(
"run_agent_with_input preflight timed out after %ds — proceeding",
_START_PREFLIGHT_TIMEOUT,
)
except CredentialError:
raise # handled below
# Resume timers in case they were paused by a previous stop
runtime.resume_timers()
# Get session state from any prior execution for memory continuity
session_state = runtime._get_primary_session_state("default") or {}
if session_id:
session_state["resume_session_id"] = session_id
exec_id = await runtime.trigger(
entry_point_id="default",
input_data={"user_request": task},
session_state=session_state,
)
# Switch to running mode
if mode_state is not None:
await mode_state.switch_to_running()
return json.dumps(
{
"status": "started",
"mode": "running",
"execution_id": exec_id,
"task": task,
}
)
except CredentialError as e:
error_payload = credential_errors_to_json(e)
error_payload["agent_path"] = str(getattr(session, "worker_path", "") or "")
bus = getattr(session, "event_bus", None)
if bus is not None:
await bus.publish(
AgentEvent(
type=EventType.CREDENTIALS_REQUIRED,
stream_id="queen",
data=error_payload,
)
)
return json.dumps(error_payload)
except Exception as e:
return json.dumps({"error": f"Failed to start worker: {e}"})
_run_input_tool = Tool(
name="run_agent_with_input",
description=(
"Run the loaded worker agent with the given task. Validates credentials, "
"triggers the worker's default entry point, and switches to running mode. "
"Use this after loading an agent (staging mode) to start execution."
),
parameters={
"type": "object",
"properties": {
"task": {
"type": "string",
"description": "The task or input for the worker agent to execute",
},
},
"required": ["task"],
},
)
registry.register(
"run_agent_with_input", _run_input_tool, lambda inputs: run_agent_with_input(**inputs)
)
tools_registered += 1
logger.info("Registered %d queen lifecycle tools", tools_registered) logger.info("Registered %d queen lifecycle tools", tools_registered)
return tools_registered return tools_registered
+36 -4
View File
@@ -475,7 +475,10 @@ class AdenTUI(App):
from framework.graph.executor import GraphExecutor from framework.graph.executor import GraphExecutor
from framework.runner.tool_registry import ToolRegistry from framework.runner.tool_registry import ToolRegistry
from framework.runtime.core import Runtime from framework.runtime.core import Runtime
from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools from framework.tools.queen_lifecycle_tools import (
QueenModeState,
register_queen_lifecycle_tools,
)
from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
log = logging.getLogger("tui.queen") log = logging.getLogger("tui.queen")
@@ -536,12 +539,16 @@ class AdenTUI(App):
except Exception: except Exception:
log.warning("Queen: MCP config failed to load", exc_info=True) log.warning("Queen: MCP config failed to load", exc_info=True)
# Worker is already loaded in TUI path → start in staging mode.
mode_state = QueenModeState(mode="staging", event_bus=event_bus)
register_queen_lifecycle_tools( register_queen_lifecycle_tools(
queen_registry, queen_registry,
worker_runtime=self.runtime, worker_runtime=self.runtime,
event_bus=event_bus, event_bus=event_bus,
storage_path=storage_path, storage_path=storage_path,
session_id=session_id, session_id=session_id,
mode_state=mode_state,
) )
register_worker_monitoring_tools( register_worker_monitoring_tools(
queen_registry, queen_registry,
@@ -553,6 +560,20 @@ class AdenTUI(App):
queen_tools = list(queen_registry.get_tools().values()) queen_tools = list(queen_registry.get_tools().values())
queen_tool_executor = queen_registry.get_executor() queen_tool_executor = queen_registry.get_executor()
# Partition tools into mode-specific sets
from framework.agents.hive_coder.nodes import (
_QUEEN_BUILDING_TOOLS,
_QUEEN_RUNNING_TOOLS,
_QUEEN_STAGING_TOOLS,
)
building_names = set(_QUEEN_BUILDING_TOOLS)
staging_names = set(_QUEEN_STAGING_TOOLS)
running_names = set(_QUEEN_RUNNING_TOOLS)
mode_state.building_tools = [t for t in queen_tools if t.name in building_names]
mode_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
mode_state.running_tools = [t for t in queen_tools if t.name in running_names]
# Build worker profile for queen's system prompt. # Build worker profile for queen's system prompt.
from framework.tools.queen_lifecycle_tools import build_worker_profile from framework.tools.queen_lifecycle_tools import build_worker_profile
@@ -593,12 +614,23 @@ class AdenTUI(App):
stream_id="queen", stream_id="queen",
storage_path=queen_dir, storage_path=queen_dir,
loop_config=queen_graph.loop_config, loop_config=queen_graph.loop_config,
dynamic_tools_provider=mode_state.get_current_tools,
) )
self._queen_executor = executor self._queen_executor = executor
# Wire inject_notification so mode switches notify the queen LLM
async def _inject_mode_notification(content: str) -> None:
node = executor.node_registry.get("queen")
if node is not None and hasattr(node, "inject_event"):
await node.inject_event(content)
mode_state.inject_notification = _inject_mode_notification
log.info( log.info(
"Queen starting with %d tools: %s", "Queen starting in %s mode with %d tools: %s",
len(queen_tools), mode_state.mode,
[t.name for t in queen_tools], len(mode_state.get_current_tools()),
[t.name for t in mode_state.get_current_tools()],
) )
# The queen's event_loop node runs forever (continuous mode). # The queen's event_loop node runs forever (continuous mode).
# It blocks on _await_user_input() after each LLM turn, # It blocks on _await_user_input() after each LLM turn,
+4
View File
@@ -37,6 +37,10 @@ export const executionApi = {
chat: (sessionId: string, message: string) => chat: (sessionId: string, message: string) =>
api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message }), api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message }),
/** Queue context for the queen without triggering an LLM response. */
queenContext: (sessionId: string, message: string) =>
api.post<ChatResult>(`/sessions/${sessionId}/queen-context`, { message }),
workerInput: (sessionId: string, message: string) => workerInput: (sessionId: string, message: string) =>
api.post<ChatResult>(`/sessions/${sessionId}/worker-input`, { message }), api.post<ChatResult>(`/sessions/${sessionId}/worker-input`, { message }),
+3
View File
@@ -12,6 +12,8 @@ export interface LiveSession {
loaded_at: number; loaded_at: number;
uptime_seconds: number; uptime_seconds: number;
intro_message?: string; intro_message?: string;
/** Queen operating mode — "building", "staging", or "running" */
queen_mode?: "building" | "staging" | "running";
/** Present in 409 conflict responses when worker is still loading */ /** Present in 409 conflict responses when worker is still loading */
loading?: boolean; loading?: boolean;
} }
@@ -271,6 +273,7 @@ export type EventTypeName =
| "escalation_requested" | "escalation_requested"
| "worker_loaded" | "worker_loaded"
| "credentials_required" | "credentials_required"
| "queen_mode_changed"
| "subagent_report"; | "subagent_report";
export interface AgentEvent { export interface AgentEvent {
+3 -2
View File
@@ -31,6 +31,7 @@ interface AgentGraphProps {
version?: string; version?: string;
runState?: RunState; runState?: RunState;
building?: boolean; building?: boolean;
queenMode?: "building" | "staging" | "running";
} }
// --- Extracted RunButton so hover state survives parent re-renders --- // --- Extracted RunButton so hover state survives parent re-renders ---
@@ -145,7 +146,7 @@ function truncateLabel(label: string, availablePx: number, fontSize: number): st
return label.slice(0, Math.max(maxChars - 1, 1)) + "\u2026"; return label.slice(0, Math.max(maxChars - 1, 1)) + "\u2026";
} }
export default function AgentGraph({ nodes, title: _title, onNodeClick, onRun, onPause, version, runState: externalRunState, building }: AgentGraphProps) { export default function AgentGraph({ nodes, title: _title, onNodeClick, onRun, onPause, version, runState: externalRunState, building, queenMode }: AgentGraphProps) {
const [localRunState, setLocalRunState] = useState<RunState>("idle"); const [localRunState, setLocalRunState] = useState<RunState>("idle");
const runState = externalRunState ?? localRunState; const runState = externalRunState ?? localRunState;
const runBtnRef = useRef<HTMLButtonElement>(null); const runBtnRef = useRef<HTMLButtonElement>(null);
@@ -277,7 +278,7 @@ export default function AgentGraph({ nodes, title: _title, onNodeClick, onRun, o
</span> </span>
)} )}
</div> </div>
<RunButton runState={runState} disabled={nodes.length === 0} onRun={handleRun} onPause={onPause ?? (() => {})} btnRef={runBtnRef} /> <RunButton runState={runState} disabled={nodes.length === 0 || queenMode === "building"} onRun={handleRun} onPause={onPause ?? (() => {})} btnRef={runBtnRef} />
</div> </div>
<div className="flex-1 flex items-center justify-center px-5"> <div className="flex-1 flex items-center justify-center px-5">
{building ? ( {building ? (
+128 -142
View File
@@ -1,6 +1,7 @@
import { memo, useState, useRef, useEffect } from "react"; import { memo, useState, useRef, useEffect } from "react";
import { Send, Square, Crown, Cpu, Check, Loader2, Reply } from "lucide-react"; import { Send, Square, Crown, Cpu, Check, Loader2 } from "lucide-react";
import MarkdownContent from "@/components/MarkdownContent"; import MarkdownContent from "@/components/MarkdownContent";
import QuestionWidget from "@/components/QuestionWidget";
export interface ChatMessage { export interface ChatMessage {
id: string; id: string;
@@ -20,15 +21,25 @@ interface ChatPanelProps {
messages: ChatMessage[]; messages: ChatMessage[];
onSend: (message: string, thread: string) => void; onSend: (message: string, thread: string) => void;
isWaiting?: boolean; isWaiting?: boolean;
/** When true a worker is thinking (not yet streaming) */
isWorkerWaiting?: boolean;
/** When true the queen is busy (typing or streaming) — shows the stop button */
isBusy?: boolean;
activeThread: string; activeThread: string;
/** When true, the worker is waiting for user input — shows inline reply box */
workerAwaitingInput?: boolean;
/** When true, the input is disabled (e.g. during loading) */ /** When true, the input is disabled (e.g. during loading) */
disabled?: boolean; disabled?: boolean;
/** Called when user clicks the stop button to cancel the queen's current turn */ /** Called when user clicks the stop button to cancel the queen's current turn */
onCancel?: () => void; onCancel?: () => void;
/** Called when user submits a reply to the worker's input request */ /** Pending question from ask_user — replaces textarea when present */
onWorkerReply?: (message: string) => void; pendingQuestion?: string | null;
/** Options for the pending question */
pendingOptions?: string[] | null;
/** Called when user submits an answer to the pending question */
onQuestionSubmit?: (answer: string, isOther: boolean) => void;
/** Called when user dismisses the pending question without answering */
onQuestionDismiss?: () => void;
/** Queen operating mode — shown as a tag on queen messages */
queenMode?: "building" | "staging" | "running";
} }
const queenColor = "hsl(45,95%,58%)"; const queenColor = "hsl(45,95%,58%)";
@@ -133,76 +144,7 @@ function ToolActivityRow({ content }: { content: string }) {
); );
} }
/** Inline reply box that appears below a worker's input request in the chat thread. */ const MessageBubble = memo(function MessageBubble({ msg, queenMode }: { msg: ChatMessage; queenMode?: "building" | "staging" | "running" }) {
function WorkerInputReply({ onSubmit, disabled }: { onSubmit: (text: string) => void; disabled?: boolean }) {
const [value, setValue] = useState("");
const [sent, setSent] = useState(false);
const inputRef = useRef<HTMLTextAreaElement>(null);
useEffect(() => {
if (!disabled && !sent) inputRef.current?.focus();
}, [disabled, sent]);
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (!value.trim() || sent) return;
onSubmit(value.trim());
setSent(true);
};
if (sent) {
return (
<div className="ml-10 flex items-center gap-1.5 text-[11px] text-muted-foreground py-1">
<Check className="w-3 h-3 text-emerald-500" />
<span>Response sent</span>
</div>
);
}
return (
<form onSubmit={handleSubmit} className="ml-10 mt-1">
<div
className="flex items-center gap-2 rounded-xl px-3 py-2 border transition-colors"
style={{
backgroundColor: `${workerColor}08`,
borderColor: `${workerColor}30`,
}}
>
<Reply className="w-3.5 h-3.5 flex-shrink-0" style={{ color: workerColor }} />
<textarea
ref={inputRef}
rows={1}
value={value}
onChange={(e) => {
setValue(e.target.value);
const ta = e.target;
ta.style.height = "auto";
ta.style.height = `${Math.min(ta.scrollHeight, 120)}px`;
}}
onKeyDown={(e) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleSubmit(e);
}
}}
placeholder="Reply to worker..."
disabled={disabled}
className="flex-1 bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-50 resize-none overflow-y-auto"
/>
<button
type="submit"
disabled={!value.trim() || disabled}
className="p-1.5 rounded-lg transition-opacity disabled:opacity-30 hover:opacity-90"
style={{ backgroundColor: workerColor, color: "white" }}
>
<Send className="w-3.5 h-3.5" />
</button>
</div>
</form>
);
}
const MessageBubble = memo(function MessageBubble({ msg }: { msg: ChatMessage }) {
const isUser = msg.type === "user"; const isUser = msg.type === "user";
const isQueen = msg.role === "queen"; const isQueen = msg.role === "queen";
const color = getColor(msg.agent, msg.role); const color = getColor(msg.agent, msg.role);
@@ -257,7 +199,13 @@ const MessageBubble = memo(function MessageBubble({ msg }: { msg: ChatMessage })
isQueen ? "bg-primary/15 text-primary" : "bg-muted text-muted-foreground" isQueen ? "bg-primary/15 text-primary" : "bg-muted text-muted-foreground"
}`} }`}
> >
{isQueen ? "Queen" : "Worker"} {isQueen
? queenMode === "running"
? "running mode"
: queenMode === "staging"
? "staging mode"
: "building mode"
: "Worker"}
</span> </span>
</div> </div>
<div <div
@@ -270,12 +218,14 @@ const MessageBubble = memo(function MessageBubble({ msg }: { msg: ChatMessage })
</div> </div>
</div> </div>
); );
}, (prev, next) => prev.msg.id === next.msg.id && prev.msg.content === next.msg.content); }, (prev, next) => prev.msg.id === next.msg.id && prev.msg.content === next.msg.content && prev.queenMode === next.queenMode);
export default function ChatPanel({ messages, onSend, isWaiting, activeThread, workerAwaitingInput, disabled, onCancel, onWorkerReply }: ChatPanelProps) { export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, onQuestionSubmit, onQuestionDismiss, queenMode }: ChatPanelProps) {
const [input, setInput] = useState(""); const [input, setInput] = useState("");
const [readMap, setReadMap] = useState<Record<string, number>>({}); const [readMap, setReadMap] = useState<Record<string, number>>({});
const bottomRef = useRef<HTMLDivElement>(null); const bottomRef = useRef<HTMLDivElement>(null);
const scrollRef = useRef<HTMLDivElement>(null);
const stickToBottom = useRef(true);
const textareaRef = useRef<HTMLTextAreaElement>(null); const textareaRef = useRef<HTMLTextAreaElement>(null);
const threadMessages = messages.filter((m) => { const threadMessages = messages.filter((m) => {
@@ -292,10 +242,24 @@ export default function ChatPanel({ messages, onSend, isWaiting, activeThread, w
// Suppress unused var // Suppress unused var
void readMap; void readMap;
const lastMsg = threadMessages[threadMessages.length - 1]; // Autoscroll: only when user is already near the bottom
const handleScroll = () => {
const el = scrollRef.current;
if (!el) return;
const distFromBottom = el.scrollHeight - el.scrollTop - el.clientHeight;
stickToBottom.current = distFromBottom < 80;
};
useEffect(() => { useEffect(() => {
bottomRef.current?.scrollIntoView({ behavior: "smooth" }); if (stickToBottom.current) {
}, [threadMessages.length, lastMsg?.content, workerAwaitingInput]); bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}
}, [threadMessages, pendingQuestion, isWaiting, isWorkerWaiting]);
// Always start pinned to bottom when switching threads
useEffect(() => {
stickToBottom.current = true;
}, [activeThread]);
const handleSubmit = (e: React.FormEvent) => { const handleSubmit = (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
@@ -305,17 +269,6 @@ export default function ChatPanel({ messages, onSend, isWaiting, activeThread, w
if (textareaRef.current) textareaRef.current.style.height = "auto"; if (textareaRef.current) textareaRef.current.style.height = "auto";
}; };
// Find the last worker message to attach the inline reply box below.
// For explicit ask_user, this will be the worker_input_request message.
// For auto-block, this will be the last client_output_delta streamed message.
const lastWorkerMsgIdx = workerAwaitingInput
? threadMessages.reduce(
(last, m, i) =>
m.role === "worker" && m.type !== "tool_status" && m.type !== "system" ? i : last,
-1,
)
: -1;
return ( return (
<div className="flex flex-col h-full min-w-0"> <div className="flex flex-col h-full min-w-0">
{/* Compact sub-header */} {/* Compact sub-header */}
@@ -324,20 +277,44 @@ export default function ChatPanel({ messages, onSend, isWaiting, activeThread, w
</div> </div>
{/* Messages */} {/* Messages */}
<div className="flex-1 overflow-auto px-5 py-4 space-y-3"> <div ref={scrollRef} onScroll={handleScroll} className="flex-1 overflow-auto px-5 py-4 space-y-3">
{threadMessages.map((msg, idx) => ( {threadMessages.map((msg) => (
<div key={msg.id}> <div key={msg.id}>
<MessageBubble msg={msg} /> <MessageBubble msg={msg} queenMode={queenMode} />
{idx === lastWorkerMsgIdx && onWorkerReply && (
<WorkerInputReply onSubmit={onWorkerReply} />
)}
</div> </div>
))} ))}
{isWaiting && ( {isWaiting && (
<div className="flex gap-3"> <div className="flex gap-3">
<div className="w-7 h-7 rounded-xl bg-muted flex items-center justify-center"> <div
<Cpu className="w-3.5 h-3.5 text-muted-foreground" /> className="flex-shrink-0 w-9 h-9 rounded-xl flex items-center justify-center"
style={{
backgroundColor: `${queenColor}18`,
border: `1.5px solid ${queenColor}35`,
boxShadow: `0 0 12px ${queenColor}20`,
}}
>
<Crown className="w-4 h-4" style={{ color: queenColor }} />
</div>
<div className="border border-primary/20 bg-primary/5 rounded-2xl rounded-tl-md px-4 py-3">
<div className="flex gap-1.5">
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "0ms" }} />
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "150ms" }} />
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "300ms" }} />
</div>
</div>
</div>
)}
{isWorkerWaiting && !isWaiting && (
<div className="flex gap-3">
<div
className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center"
style={{
backgroundColor: `${workerColor}18`,
border: `1.5px solid ${workerColor}35`,
}}
>
<Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
</div> </div>
<div className="bg-muted/60 rounded-2xl rounded-tl-md px-4 py-3"> <div className="bg-muted/60 rounded-2xl rounded-tl-md px-4 py-3">
<div className="flex gap-1.5"> <div className="flex gap-1.5">
@@ -351,48 +328,57 @@ export default function ChatPanel({ messages, onSend, isWaiting, activeThread, w
<div ref={bottomRef} /> <div ref={bottomRef} />
</div> </div>
{/* Input — always connected to Queen */} {/* Input area — question widget replaces textarea when a question is pending */}
<form onSubmit={handleSubmit} className="p-4 border-t border-border"> {pendingQuestion && pendingOptions && onQuestionSubmit ? (
<div className="flex items-center gap-3 bg-muted/40 rounded-xl px-4 py-2.5 border border-border focus-within:border-primary/40 transition-colors"> <QuestionWidget
<textarea question={pendingQuestion}
ref={textareaRef} options={pendingOptions}
rows={1} onSubmit={onQuestionSubmit}
value={input} onDismiss={onQuestionDismiss}
onChange={(e) => { />
setInput(e.target.value); ) : (
const ta = e.target; <form onSubmit={handleSubmit} className="p-4">
ta.style.height = "auto"; <div className="flex items-center gap-3 bg-muted/40 rounded-xl px-4 py-2.5 border border-border focus-within:border-primary/40 transition-colors">
ta.style.height = `${Math.min(ta.scrollHeight, 160)}px`; <textarea
}} ref={textareaRef}
onKeyDown={(e) => { rows={1}
if (e.key === "Enter" && !e.shiftKey) { value={input}
e.preventDefault(); onChange={(e) => {
handleSubmit(e); setInput(e.target.value);
} const ta = e.target;
}} ta.style.height = "auto";
placeholder={disabled ? "Connecting to agent..." : "Message Queen Bee..."} ta.style.height = `${Math.min(ta.scrollHeight, 160)}px`;
disabled={disabled} }}
className="flex-1 bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-50 disabled:cursor-not-allowed resize-none overflow-y-auto" onKeyDown={(e) => {
/> if (e.key === "Enter" && !e.shiftKey) {
{isWaiting && onCancel ? ( e.preventDefault();
<button handleSubmit(e);
type="button" }
onClick={onCancel} }}
className="p-2 rounded-lg bg-destructive text-destructive-foreground hover:opacity-90 transition-opacity" placeholder={disabled ? "Connecting to agent..." : "Message Queen Bee..."}
> disabled={disabled}
<Square className="w-4 h-4" /> className="flex-1 bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-50 disabled:cursor-not-allowed resize-none overflow-y-auto"
</button> />
) : ( {isBusy && onCancel ? (
<button <button
type="submit" type="button"
disabled={!input.trim() || disabled} onClick={onCancel}
className="p-2 rounded-lg bg-primary text-primary-foreground disabled:opacity-30 hover:opacity-90 transition-opacity" className="p-2 rounded-lg bg-amber-500/15 text-amber-400 border border-amber-500/40 hover:bg-amber-500/25 transition-colors"
> >
<Send className="w-4 h-4" /> <Square className="w-4 h-4" />
</button> </button>
)} ) : (
</div> <button
</form> type="submit"
disabled={!input.trim() || disabled}
className="p-2 rounded-lg bg-primary text-primary-foreground disabled:opacity-30 hover:opacity-90 transition-opacity"
>
<Send className="w-4 h-4" />
</button>
)}
</div>
</form>
)}
</div> </div>
); );
} }
@@ -0,0 +1,142 @@
import { useState, useRef, useEffect, useCallback } from "react";
import { Send, MessageCircleQuestion, X } from "lucide-react";
export interface QuestionWidgetProps {
/** The question text shown to the user */
question: string;
/** 1-3 predefined options. The UI appends an "Other" free-text option. */
options: string[];
/** Called with the selected option label or custom text, and whether "Other" was chosen */
onSubmit: (answer: string, isOther: boolean) => void;
/** Called when user dismisses the question without answering */
onDismiss?: () => void;
}
export default function QuestionWidget({ question, options, onSubmit, onDismiss }: QuestionWidgetProps) {
const [selected, setSelected] = useState<number | null>(null);
const [customText, setCustomText] = useState("");
const [submitted, setSubmitted] = useState(false);
const inputRef = useRef<HTMLInputElement>(null);
const containerRef = useRef<HTMLDivElement>(null);
// "Other" is always the last option index
const otherIndex = options.length;
const isOtherSelected = selected === otherIndex;
// Focus the text input when "Other" is selected
useEffect(() => {
if (isOtherSelected) {
inputRef.current?.focus();
}
}, [isOtherSelected]);
const canSubmit = selected !== null && (!isOtherSelected || customText.trim().length > 0);
const handleSubmit = useCallback(() => {
if (!canSubmit || submitted) return;
setSubmitted(true);
if (isOtherSelected) {
onSubmit(customText.trim(), true);
} else {
onSubmit(options[selected!], false);
}
}, [canSubmit, submitted, isOtherSelected, customText, options, selected, onSubmit]);
// Keyboard: Enter to submit, number keys to select (only when text input is not focused)
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
if (submitted) return;
const inTextInput = e.target === inputRef.current;
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleSubmit();
return;
}
// Number keys 1-4 select options — skip when typing in the "Other" field
if (!inTextInput) {
const num = parseInt(e.key, 10);
if (num >= 1 && num <= options.length + 1) {
e.preventDefault();
setSelected(num - 1);
}
}
};
window.addEventListener("keydown", handleKeyDown);
return () => window.removeEventListener("keydown", handleKeyDown);
}, [handleSubmit, submitted, options.length]);
if (submitted) return null;
return (
<div ref={containerRef} className="p-4">
<div className="bg-card border border-border rounded-xl shadow-sm overflow-hidden">
{/* Header / Question */}
<div className="px-5 pt-4 pb-3 flex items-start gap-3">
<div className="w-7 h-7 rounded-lg bg-primary/10 border border-primary/20 flex items-center justify-center flex-shrink-0 mt-0.5">
<MessageCircleQuestion className="w-3.5 h-3.5 text-primary" />
</div>
<p className="text-sm font-medium text-foreground leading-relaxed flex-1">{question}</p>
{onDismiss && (
<button
onClick={onDismiss}
className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60 transition-colors flex-shrink-0"
>
<X className="w-4 h-4" />
</button>
)}
</div>
{/* Options */}
<div className="px-5 pb-3 space-y-1.5">
{options.map((option, idx) => (
<button
key={idx}
onClick={() => setSelected(idx)}
className={`w-full text-left px-4 py-2.5 rounded-lg border text-sm transition-colors ${
selected === idx
? "border-primary bg-primary/10 text-foreground"
: "border-border/60 bg-muted/20 text-foreground hover:border-primary/40 hover:bg-muted/40"
}`}
>
<span className="text-xs text-muted-foreground mr-2">{idx + 1}.</span>
{option}
</button>
))}
{/* "Other" — inline text input that auto-selects on focus */}
<input
ref={inputRef}
type="text"
value={customText}
onFocus={() => setSelected(otherIndex)}
onChange={(e) => {
setSelected(otherIndex);
setCustomText(e.target.value);
}}
placeholder="Type a custom response..."
className={`w-full px-4 py-2.5 rounded-lg border border-dashed text-sm transition-colors bg-transparent placeholder:text-muted-foreground focus:outline-none ${
isOtherSelected
? "border-primary bg-primary/10 text-foreground"
: "border-border text-muted-foreground hover:border-primary/40"
}`}
/>
</div>
{/* Submit */}
<div className="px-5 pb-4">
<button
onClick={handleSubmit}
disabled={!canSubmit}
className="w-full flex items-center justify-center gap-2 py-2.5 rounded-lg text-sm font-medium bg-primary text-primary-foreground hover:bg-primary/90 disabled:opacity-30 disabled:cursor-not-allowed transition-colors"
>
<Send className="w-3.5 h-3.5" />
Submit
</button>
</div>
</div>
</div>
);
}
+9
View File
@@ -167,3 +167,12 @@
.animate-in.slide-in-from-right { .animate-in.slide-in-from-right {
animation: slide-in-from-right 0.2s ease-out; animation: slide-in-from-right 0.2s ease-out;
} }
/* Slide-up animation for question widget */
@keyframes slide-in-from-bottom {
from { transform: translateY(16px); opacity: 0; }
to { transform: translateY(0); opacity: 1; }
}
.animate-in.slide-in-from-bottom {
animation: slide-in-from-bottom 0.25s ease-out;
}
+215 -27
View File
@@ -8,6 +8,7 @@ import TopBar from "@/components/TopBar";
import { TAB_STORAGE_KEY, loadPersistedTabs, savePersistedTabs, type PersistedTabState } from "@/lib/tab-persistence"; import { TAB_STORAGE_KEY, loadPersistedTabs, savePersistedTabs, type PersistedTabState } from "@/lib/tab-persistence";
import NodeDetailPanel from "@/components/NodeDetailPanel"; import NodeDetailPanel from "@/components/NodeDetailPanel";
import CredentialsModal, { type Credential, createFreshCredentials, cloneCredentials, allRequiredCredentialsMet, clearCredentialCache } from "@/components/CredentialsModal"; import CredentialsModal, { type Credential, createFreshCredentials, cloneCredentials, allRequiredCredentialsMet, clearCredentialCache } from "@/components/CredentialsModal";
import { agentsApi } from "@/api/agents"; import { agentsApi } from "@/api/agents";
import { executionApi } from "@/api/execution"; import { executionApi } from "@/api/execution";
import { graphsApi } from "@/api/graphs"; import { graphsApi } from "@/api/graphs";
@@ -240,6 +241,8 @@ interface AgentBackendState {
/** The message ID of the current worker input request (for inline reply box) */ /** The message ID of the current worker input request (for inline reply box) */
workerInputMessageId: string | null; workerInputMessageId: string | null;
queenBuilding: boolean; queenBuilding: boolean;
/** Queen operating mode — "building" (coding), "staging" (loaded), or "running" (executing) */
queenMode: "building" | "staging" | "running";
workerRunState: "idle" | "deploying" | "running"; workerRunState: "idle" | "deploying" | "running";
currentExecutionId: string | null; currentExecutionId: string | null;
nodeLogs: Record<string, string[]>; nodeLogs: Record<string, string[]>;
@@ -247,8 +250,18 @@ interface AgentBackendState {
subagentReports: { subagent_id: string; message: string; data?: Record<string, unknown>; timestamp: string }[]; subagentReports: { subagent_id: string; message: string; data?: Record<string, unknown>; timestamp: string }[];
isTyping: boolean; isTyping: boolean;
isStreaming: boolean; isStreaming: boolean;
/** True only when the queen's LLM is actively processing (not worker) */
queenIsTyping: boolean;
/** True only when a worker's LLM is actively processing (not queen) */
workerIsTyping: boolean;
llmSnapshots: Record<string, string>; llmSnapshots: Record<string, string>;
activeToolCalls: Record<string, { name: string; done: boolean; streamId: string }>; activeToolCalls: Record<string, { name: string; done: boolean; streamId: string }>;
/** Structured question text from ask_user with options */
pendingQuestion: string | null;
/** Predefined choices from ask_user (1-3 items); UI appends "Other" */
pendingOptions: string[] | null;
/** Whether the pending question came from queen or worker */
pendingQuestionSource: "queen" | "worker" | null;
} }
function defaultAgentState(): AgentBackendState { function defaultAgentState(): AgentBackendState {
@@ -264,6 +277,7 @@ function defaultAgentState(): AgentBackendState {
awaitingInput: false, awaitingInput: false,
workerInputMessageId: null, workerInputMessageId: null,
queenBuilding: false, queenBuilding: false,
queenMode: "building",
workerRunState: "idle", workerRunState: "idle",
currentExecutionId: null, currentExecutionId: null,
nodeLogs: {}, nodeLogs: {},
@@ -271,8 +285,13 @@ function defaultAgentState(): AgentBackendState {
subagentReports: [], subagentReports: [],
isTyping: false, isTyping: false,
isStreaming: false, isStreaming: false,
queenIsTyping: false,
workerIsTyping: false,
llmSnapshots: {}, llmSnapshots: {},
activeToolCalls: {}, activeToolCalls: {},
pendingQuestion: null,
pendingOptions: null,
pendingQuestionSource: null,
}; };
} }
@@ -352,8 +371,14 @@ export default function Workspace() {
if (persisted) { if (persisted) {
const restored = { ...persisted.activeSessionByAgent }; const restored = { ...persisted.activeSessionByAgent };
const urlSessions = sessionsByAgent[initialAgent]; const urlSessions = sessionsByAgent[initialAgent];
if (urlSessions?.length && !restored[initialAgent]) { if (urlSessions?.length) {
restored[initialAgent] = urlSessions[0].id; // When a prompt was submitted from home, activate the newly created
// session (last in array) instead of the previously active one.
if (initialPrompt && hasExplicitAgent) {
restored[initialAgent] = urlSessions[urlSessions.length - 1].id;
} else if (!restored[initialAgent]) {
restored[initialAgent] = urlSessions[0].id;
}
} }
return restored; return restored;
} }
@@ -632,7 +657,11 @@ export default function Workspace() {
const result = await sessionsApi.get(existingSessionId); const result = await sessionsApi.get(existingSessionId);
if (result.loading) continue; if (result.loading) continue;
return result as LiveSession; return result as LiveSession;
} catch { } catch (pollErr) {
// 404 = agent failed to load and was cleaned up — stop immediately
if (pollErr instanceof ApiError && pollErr.status === 404) {
throw new Error("Agent failed to load");
}
if (i === maxAttempts - 1) throw loadErr; if (i === maxAttempts - 1) throw loadErr;
} }
} }
@@ -648,7 +677,13 @@ export default function Workspace() {
// failed, the throw inside the catch exits the outer try block. // failed, the throw inside the catch exits the outer try block.
const session = liveSession!; const session = liveSession!;
const displayName = formatAgentDisplayName(session.worker_name || agentType); const displayName = formatAgentDisplayName(session.worker_name || agentType);
updateAgentState(agentType, { sessionId: session.session_id, displayName }); const initialMode = session.queen_mode || (session.has_worker ? "staging" : "building");
updateAgentState(agentType, {
sessionId: session.session_id,
displayName,
queenMode: initialMode,
queenBuilding: initialMode === "building",
});
// Update the session label // Update the session label
setSessionsByAgent((prev) => { setSessionsByAgent((prev) => {
@@ -921,7 +956,7 @@ export default function Workspace() {
} catch { } catch {
// Best-effort — queen may have already finished // Best-effort — queen may have already finished
} }
updateAgentState(activeWorker, { isTyping: false, isStreaming: false }); updateAgentState(activeWorker, { isTyping: false, isStreaming: false, queenIsTyping: false, workerIsTyping: false });
}, [agentStates, activeWorker, updateAgentState]); }, [agentStates, activeWorker, updateAgentState]);
// --- Node log helper (writes into agentStates) --- // --- Node log helper (writes into agentStates) ---
@@ -1004,7 +1039,7 @@ export default function Workspace() {
case "execution_started": case "execution_started":
if (isQueen) { if (isQueen) {
turnCounterRef.current[turnKey] = currentTurn + 1; turnCounterRef.current[turnKey] = currentTurn + 1;
updateAgentState(agentType, { isTyping: true }); updateAgentState(agentType, { isTyping: true, queenIsTyping: true });
} else { } else {
// Warn if prior LLM snapshots are being dropped (edge case: execution_completed never arrived) // Warn if prior LLM snapshots are being dropped (edge case: execution_completed never arrived)
const priorSnapshots = agentStates[agentType]?.llmSnapshots || {}; const priorSnapshots = agentStates[agentType]?.llmSnapshots || {};
@@ -1015,6 +1050,7 @@ export default function Workspace() {
updateAgentState(agentType, { updateAgentState(agentType, {
isTyping: true, isTyping: true,
isStreaming: false, isStreaming: false,
workerIsTyping: true,
awaitingInput: false, awaitingInput: false,
workerRunState: "running", workerRunState: "running",
currentExecutionId: event.execution_id || agentStates[agentType]?.currentExecutionId || null, currentExecutionId: event.execution_id || agentStates[agentType]?.currentExecutionId || null,
@@ -1022,6 +1058,9 @@ export default function Workspace() {
subagentReports: [], subagentReports: [],
llmSnapshots: {}, llmSnapshots: {},
activeToolCalls: {}, activeToolCalls: {},
pendingQuestion: null,
pendingOptions: null,
pendingQuestionSource: null,
}); });
markAllNodesAs(agentType, ["running", "looping", "complete", "error"], "pending"); markAllNodesAs(agentType, ["running", "looping", "complete", "error"], "pending");
} }
@@ -1029,7 +1068,7 @@ export default function Workspace() {
case "execution_completed": case "execution_completed":
if (isQueen) { if (isQueen) {
updateAgentState(agentType, { isTyping: false }); updateAgentState(agentType, { isTyping: false, queenIsTyping: false });
} else { } else {
// Flush any remaining LLM snapshots before clearing state // Flush any remaining LLM snapshots before clearing state
const completedSnapshots = agentStates[agentType]?.llmSnapshots || {}; const completedSnapshots = agentStates[agentType]?.llmSnapshots || {};
@@ -1041,11 +1080,15 @@ export default function Workspace() {
updateAgentState(agentType, { updateAgentState(agentType, {
isTyping: false, isTyping: false,
isStreaming: false, isStreaming: false,
workerIsTyping: false,
awaitingInput: false, awaitingInput: false,
workerInputMessageId: null, workerInputMessageId: null,
workerRunState: "idle", workerRunState: "idle",
currentExecutionId: null, currentExecutionId: null,
llmSnapshots: {}, llmSnapshots: {},
pendingQuestion: null,
pendingOptions: null,
pendingQuestionSource: null,
}); });
markAllNodesAs(agentType, ["running", "looping"], "complete"); markAllNodesAs(agentType, ["running", "looping"], "complete");
@@ -1070,7 +1113,7 @@ export default function Workspace() {
// Mark streaming when LLM text is actively arriving // Mark streaming when LLM text is actively arriving
if (event.type === "llm_text_delta" || event.type === "client_output_delta") { if (event.type === "llm_text_delta" || event.type === "client_output_delta") {
updateAgentState(agentType, { isStreaming: true }); updateAgentState(agentType, { isStreaming: true, ...(isQueen ? {} : { workerIsTyping: false }) });
} }
if (event.type === "llm_text_delta" && !isQueen && event.node_id) { if (event.type === "llm_text_delta" && !isQueen && event.node_id) {
@@ -1092,8 +1135,41 @@ export default function Workspace() {
if (event.type === "client_input_requested") { if (event.type === "client_input_requested") {
console.log('[CLIENT_INPUT_REQ] stream_id:', streamId, 'isQueen:', isQueen, 'node_id:', event.node_id, 'prompt:', (event.data?.prompt as string)?.slice(0, 80), 'agentType:', agentType); console.log('[CLIENT_INPUT_REQ] stream_id:', streamId, 'isQueen:', isQueen, 'node_id:', event.node_id, 'prompt:', (event.data?.prompt as string)?.slice(0, 80), 'agentType:', agentType);
const rawOptions = event.data?.options;
const options = Array.isArray(rawOptions) ? (rawOptions as string[]) : null;
if (isQueen) { if (isQueen) {
updateAgentState(agentType, { awaitingInput: true, isTyping: false, isStreaming: false, queenBuilding: false }); const prompt = (event.data?.prompt as string) || "";
const isAutoBlock = !prompt && !options;
// Queen auto-block (empty prompt, no options) should not
// overwrite a pending worker question — the worker's
// QuestionWidget must stay visible. Use the updater form
// to read the latest state and avoid stale-closure races
// when worker and queen events arrive in the same batch.
setAgentStates(prev => {
const cur = prev[agentType] || defaultAgentState();
const workerQuestionActive = cur.pendingQuestionSource === "worker";
if (isAutoBlock && workerQuestionActive) {
return { ...prev, [agentType]: {
...cur,
awaitingInput: true,
isTyping: false,
isStreaming: false,
queenIsTyping: false,
queenBuilding: false,
}};
}
return { ...prev, [agentType]: {
...cur,
awaitingInput: true,
isTyping: false,
isStreaming: false,
queenIsTyping: false,
queenBuilding: false,
pendingQuestion: prompt || null,
pendingOptions: options,
pendingQuestionSource: "queen",
}};
});
} else { } else {
// Worker input request. // Worker input request.
// If the prompt is non-empty (explicit ask_user), create a visible // If the prompt is non-empty (explicit ask_user), create a visible
@@ -1121,18 +1197,22 @@ export default function Workspace() {
awaitingInput: true, awaitingInput: true,
isTyping: false, isTyping: false,
isStreaming: false, isStreaming: false,
queenIsTyping: false,
pendingQuestion: prompt || null,
pendingOptions: options,
pendingQuestionSource: options ? "worker" : null,
}); });
} }
} }
if (event.type === "execution_paused") { if (event.type === "execution_paused") {
updateAgentState(agentType, { isTyping: false, isStreaming: false, awaitingInput: false, workerInputMessageId: null }); updateAgentState(agentType, { isTyping: false, isStreaming: false, queenIsTyping: false, workerIsTyping: false, awaitingInput: false, workerInputMessageId: null, pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
if (!isQueen) { if (!isQueen) {
updateAgentState(agentType, { workerRunState: "idle", currentExecutionId: null }); updateAgentState(agentType, { workerRunState: "idle", currentExecutionId: null });
markAllNodesAs(agentType, ["running", "looping"], "pending"); markAllNodesAs(agentType, ["running", "looping"], "pending");
} }
} }
if (event.type === "execution_failed") { if (event.type === "execution_failed") {
updateAgentState(agentType, { isTyping: false, isStreaming: false, awaitingInput: false, workerInputMessageId: null }); updateAgentState(agentType, { isTyping: false, isStreaming: false, queenIsTyping: false, workerIsTyping: false, awaitingInput: false, workerInputMessageId: null, pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
if (!isQueen) { if (!isQueen) {
updateAgentState(agentType, { workerRunState: "idle", currentExecutionId: null }); updateAgentState(agentType, { workerRunState: "idle", currentExecutionId: null });
if (event.node_id) { if (event.node_id) {
@@ -1164,7 +1244,11 @@ export default function Workspace() {
case "node_loop_iteration": case "node_loop_iteration":
turnCounterRef.current[turnKey] = currentTurn + 1; turnCounterRef.current[turnKey] = currentTurn + 1;
updateAgentState(agentType, { isStreaming: false, activeToolCalls: {}, awaitingInput: false }); if (isQueen) {
updateAgentState(agentType, { isStreaming: false, activeToolCalls: {}, awaitingInput: false, pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
} else {
updateAgentState(agentType, { isStreaming: false, workerIsTyping: true, activeToolCalls: {}, awaitingInput: false, pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
}
if (!isQueen && event.node_id) { if (!isQueen && event.node_id) {
const pendingText = agentStates[agentType]?.llmSnapshots[event.node_id]; const pendingText = agentStates[agentType]?.llmSnapshots[event.node_id];
if (pendingText?.trim()) { if (pendingText?.trim()) {
@@ -1212,13 +1296,7 @@ export default function Workspace() {
case "tool_call_started": { case "tool_call_started": {
console.log('[TOOL_PILL] tool_call_started received:', { isQueen, nodeId: event.node_id, streamId: event.stream_id, agentType, executionId: event.execution_id, toolName: event.data?.tool_name }); console.log('[TOOL_PILL] tool_call_started received:', { isQueen, nodeId: event.node_id, streamId: event.stream_id, agentType, executionId: event.execution_id, toolName: event.data?.tool_name });
// Detect queen building: when the queen starts writing/editing files, she's building an agent // queenBuilding is now driven by queen_mode_changed events
if (isQueen) {
const tn = (event.data?.tool_name as string) || "";
if (tn === "write_file" || tn === "edit_file") {
updateAgentState(agentType, { queenBuilding: true });
}
}
if (event.node_id) { if (event.node_id) {
if (!isQueen) { if (!isQueen) {
@@ -1453,6 +1531,19 @@ export default function Workspace() {
break; break;
} }
case "queen_mode_changed": {
const rawMode = event.data?.mode as string;
const newMode: "building" | "staging" | "running" =
rawMode === "running" ? "running" : rawMode === "staging" ? "staging" : "building";
updateAgentState(agentType, {
queenMode: newMode,
queenBuilding: newMode === "building",
// Sync workerRunState so the RunButton reflects the mode
workerRunState: newMode === "running" ? "running" : "idle",
});
break;
}
case "worker_loaded": { case "worker_loaded": {
const workerName = event.data?.worker_name as string | undefined; const workerName = event.data?.worker_name as string | undefined;
const agentPathFromEvent = event.data?.agent_path as string | undefined; const agentPathFromEvent = event.data?.agent_path as string | undefined;
@@ -1561,6 +1652,11 @@ export default function Workspace() {
return; return;
} }
// If queen has a pending question widget, dismiss it when user types directly
if (agentStates[activeWorker]?.pendingQuestionSource === "queen") {
updateAgentState(activeWorker, { pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
}
const userMsg: ChatMessage = { const userMsg: ChatMessage = {
id: makeId(), agent: "You", agentColor: "", id: makeId(), agent: "You", agentColor: "",
content: text, timestamp: "", type: "user", thread, createdAt: Date.now(), content: text, timestamp: "", type: "user", thread, createdAt: Date.now(),
@@ -1571,7 +1667,7 @@ export default function Workspace() {
s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s
), ),
})); }));
updateAgentState(activeWorker, { isTyping: true }); updateAgentState(activeWorker, { isTyping: true, queenIsTyping: true });
if (state?.sessionId && state?.ready) { if (state?.sessionId && state?.ready) {
executionApi.chat(state.sessionId, text).catch((err: unknown) => { executionApi.chat(state.sessionId, text).catch((err: unknown) => {
@@ -1587,7 +1683,7 @@ export default function Workspace() {
s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s
), ),
})); }));
updateAgentState(activeWorker, { isTyping: false, isStreaming: false }); updateAgentState(activeWorker, { isTyping: false, isStreaming: false, queenIsTyping: false });
}); });
} else { } else {
const errorMsg: ChatMessage = { const errorMsg: ChatMessage = {
@@ -1624,7 +1720,7 @@ export default function Workspace() {
})); }));
// Clear awaiting state optimistically // Clear awaiting state optimistically
updateAgentState(activeWorker, { awaitingInput: false, workerInputMessageId: null, isTyping: true }); updateAgentState(activeWorker, { awaitingInput: false, workerInputMessageId: null, isTyping: true, pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
executionApi.workerInput(state.sessionId, text).catch((err: unknown) => { executionApi.workerInput(state.sessionId, text).catch((err: unknown) => {
const errMsg = err instanceof Error ? err.message : String(err); const errMsg = err instanceof Error ? err.message : String(err);
@@ -1643,6 +1739,90 @@ export default function Workspace() {
}); });
}, [activeWorker, activeSession, agentStates, updateAgentState]); }, [activeWorker, activeSession, agentStates, updateAgentState]);
// --- handleWorkerQuestionAnswer: route predefined answers direct to worker, "Other" through queen ---
const handleWorkerQuestionAnswer = useCallback((answer: string, isOther: boolean) => {
if (!activeSession) return;
const state = agentStates[activeWorker];
const question = state?.pendingQuestion || "";
const opts = state?.pendingOptions;
if (isOther) {
// "Other" free-text → route through queen for evaluation
updateAgentState(activeWorker, { pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
if (question && opts && state?.sessionId && state?.ready) {
const formatted = `[Worker asked: "${question}" | Options: ${opts.join(", ")}]\nUser answered: "${answer}"`;
const userMsg: ChatMessage = {
id: makeId(), agent: "You", agentColor: "",
content: answer, timestamp: "", type: "user", thread: activeWorker, createdAt: Date.now(),
};
setSessionsByAgent(prev => ({
...prev,
[activeWorker]: prev[activeWorker].map(s =>
s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s
),
}));
updateAgentState(activeWorker, { isTyping: true, queenIsTyping: true });
executionApi.chat(state.sessionId, formatted).catch((err: unknown) => {
const errMsg = err instanceof Error ? err.message : String(err);
const errorChatMsg: ChatMessage = {
id: makeId(), agent: "System", agentColor: "",
content: `Failed to send message: ${errMsg}`,
timestamp: "", type: "system", thread: activeWorker, createdAt: Date.now(),
};
setSessionsByAgent(prev => ({
...prev,
[activeWorker]: prev[activeWorker].map(s =>
s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s
),
}));
updateAgentState(activeWorker, { isTyping: false, isStreaming: false, queenIsTyping: false });
});
} else {
handleSend(answer, activeWorker);
}
} else {
// Predefined option → send directly to worker
handleWorkerReply(answer);
// Queue context for queen (fire-and-forget, no LLM response triggered)
if (question && state?.sessionId && state?.ready) {
const notification = `[Worker asked: "${question}" | User selected: "${answer}"]`;
executionApi.queenContext(state.sessionId, notification).catch(() => {});
}
}
}, [activeWorker, activeSession, agentStates, handleWorkerReply, handleSend, updateAgentState, setSessionsByAgent]);
// --- handleQueenQuestionAnswer: submit queen's own question answer via /chat ---
// The queen asked the question herself, so she already has context — just send the raw answer.
const handleQueenQuestionAnswer = useCallback((answer: string, _isOther: boolean) => {
updateAgentState(activeWorker, { pendingQuestion: null, pendingOptions: null, pendingQuestionSource: null });
handleSend(answer, activeWorker);
}, [activeWorker, handleSend, updateAgentState]);
// --- handleQuestionDismiss: user closed the question widget without answering ---
// Injects a dismiss signal so the blocked node can continue.
const handleQuestionDismiss = useCallback(() => {
const state = agentStates[activeWorker];
if (!state?.sessionId) return;
const source = state.pendingQuestionSource;
const question = state.pendingQuestion || "";
// Clear UI state immediately
updateAgentState(activeWorker, {
pendingQuestion: null,
pendingOptions: null,
pendingQuestionSource: null,
awaitingInput: false,
});
// Unblock the waiting node with a dismiss signal
const dismissMsg = `[User dismissed the question: "${question}"]`;
if (source === "worker") {
executionApi.workerInput(state.sessionId, dismissMsg).catch(() => {});
} else {
executionApi.chat(state.sessionId, dismissMsg).catch(() => {});
}
}, [agentStates, activeWorker, updateAgentState]);
const handleLoadAgent = useCallback(async (agentPath: string) => { const handleLoadAgent = useCallback(async (agentPath: string) => {
const state = agentStates[activeWorker]; const state = agentStates[activeWorker];
if (!state?.sessionId) return; if (!state?.sessionId) return;
@@ -1795,6 +1975,7 @@ export default function Workspace() {
onPause={handlePause} onPause={handlePause}
runState={activeAgentState?.workerRunState ?? "idle"} runState={activeAgentState?.workerRunState ?? "idle"}
building={activeAgentState?.queenBuilding ?? false} building={activeAgentState?.queenBuilding ?? false}
queenMode={activeAgentState?.queenMode ?? "building"}
/> />
</div> </div>
</div> </div>
@@ -1856,16 +2037,23 @@ export default function Workspace() {
messages={activeSession.messages} messages={activeSession.messages}
onSend={handleSend} onSend={handleSend}
onCancel={handleCancelQueen} onCancel={handleCancelQueen}
onWorkerReply={handleWorkerReply}
activeThread={activeWorker} activeThread={activeWorker}
isWaiting={(activeAgentState?.isTyping && !activeAgentState?.isStreaming) ?? false} isWaiting={(activeAgentState?.queenIsTyping && !activeAgentState?.isStreaming) ?? false}
workerAwaitingInput={ isWorkerWaiting={(activeAgentState?.workerIsTyping && !activeAgentState?.isStreaming) ?? false}
(activeAgentState?.awaitingInput && activeAgentState?.workerRunState === "running") ?? false isBusy={activeAgentState?.queenIsTyping ?? false}
}
disabled={ disabled={
(activeAgentState?.loading ?? true) || (activeAgentState?.loading ?? true) ||
!(activeAgentState?.queenReady) !(activeAgentState?.queenReady)
} }
queenMode={activeAgentState?.queenMode ?? "building"}
pendingQuestion={activeAgentState?.awaitingInput ? activeAgentState.pendingQuestion : null}
pendingOptions={activeAgentState?.awaitingInput ? activeAgentState.pendingOptions : null}
onQuestionSubmit={
activeAgentState?.pendingQuestionSource === "queen"
? handleQueenQuestionAnswer
: handleWorkerQuestionAnswer
}
onQuestionDismiss={handleQuestionDismiss}
/> />
)} )}
</div> </div>
+69 -3
View File
@@ -578,7 +578,11 @@ class TestClientFacingBlocking:
"""signal_shutdown should unblock a waiting client_facing node.""" """signal_shutdown should unblock a waiting client_facing node."""
llm = MockStreamingLLM( llm = MockStreamingLLM(
scenarios=[ scenarios=[
tool_call_scenario("ask_user", {"question": "Waiting..."}, tool_use_id="ask_1"), tool_call_scenario(
"ask_user",
{"question": "Waiting...", "options": ["Continue", "Stop"]},
tool_use_id="ask_1",
),
] ]
) )
bus = EventBus() bus = EventBus()
@@ -600,7 +604,11 @@ class TestClientFacingBlocking:
"""CLIENT_INPUT_REQUESTED should be published when ask_user blocks.""" """CLIENT_INPUT_REQUESTED should be published when ask_user blocks."""
llm = MockStreamingLLM( llm = MockStreamingLLM(
scenarios=[ scenarios=[
tool_call_scenario("ask_user", {"question": "Hello!"}, tool_use_id="ask_1"), tool_call_scenario(
"ask_user",
{"question": "Hello!", "options": ["Yes", "No"]},
tool_use_id="ask_1",
),
] ]
) )
bus = EventBus() bus = EventBus()
@@ -796,7 +804,7 @@ class TestClientFacingExpectingWork:
async def user_then_shutdown(): async def user_then_shutdown():
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
await node.inject_event("furwise.app") await node.inject_event("furwise.app", is_client_input=True)
# Node should auto-block on "Monitoring..." text. # Node should auto-block on "Monitoring..." text.
# Give it time to reach the block, then shutdown. # Give it time to reach the block, then shutdown.
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
@@ -2027,3 +2035,61 @@ class TestExecutionId:
node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={} node_spec=node_spec, memory=SharedMemory(), goal=goal, input_data={}
) )
assert ctx.execution_id == "" assert ctx.execution_id == ""
# ---------------------------------------------------------------------------
# Subagent memory snapshot includes accumulator outputs
# ---------------------------------------------------------------------------
class TestSubagentAccumulatorMemory:
"""Verify that subagent memory construction merges accumulator outputs
and includes the subagent's input_keys in read permissions."""
def test_accumulator_values_merged_into_parent_data(self):
"""Keys from OutputAccumulator should appear in subagent memory."""
# Simulate what _execute_subagent does internally:
# parent shared memory has user_request but NOT tweet_content
parent_memory = SharedMemory()
parent_memory.write("user_request", "post a joke")
parent_data = parent_memory.read_all() # {"user_request": "post a joke"}
# Accumulator has tweet_content (set via set_output before delegation)
acc = OutputAccumulator(values={"tweet_content": "Hello world!"})
# Merge accumulator outputs (the fix)
for key, value in acc.to_dict().items():
if key not in parent_data:
parent_data[key] = value
# Build subagent memory
subagent_memory = SharedMemory()
for key, value in parent_data.items():
subagent_memory.write(key, value, validate=False)
subagent_input_keys = ["tweet_content"]
read_keys = set(parent_data.keys()) | set(subagent_input_keys)
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
# This would have raised PermissionError before the fix
assert scoped.read("tweet_content") == "Hello world!"
assert scoped.read("user_request") == "post a joke"
def test_input_keys_allowed_even_if_not_in_data(self):
"""Subagent input_keys should be in read permissions even if the
key doesn't exist in memory (returns None instead of PermissionError)."""
parent_memory = SharedMemory()
parent_memory.write("user_request", "hi")
parent_data = parent_memory.read_all()
subagent_memory = SharedMemory()
for key, value in parent_data.items():
subagent_memory.write(key, value, validate=False)
# input_keys includes "tweet_content" which isn't in parent_data
read_keys = set(parent_data.keys()) | {"tweet_content"}
scoped = subagent_memory.with_permissions(read_keys=list(read_keys), write_keys=[])
# Should return None (not raise PermissionError)
assert scoped.read("tweet_content") is None
assert scoped.read("user_request") == "hi"
+599 -1
View File
@@ -2,11 +2,12 @@
from __future__ import annotations from __future__ import annotations
import json
from typing import Any from typing import Any
import pytest import pytest
from framework.graph.conversation import Message, NodeConversation from framework.graph.conversation import Message, NodeConversation, extract_tool_call_history
from framework.storage.conversation_store import FileConversationStore from framework.storage.conversation_store import FileConversationStore
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -930,3 +931,600 @@ class TestConversationIntegration:
assert restored.next_seq == 4 assert restored.next_seq == 4
assert restored.messages[0].content == "new msg" assert restored.messages[0].content == "new msg"
assert restored.messages[0].seq == 2 assert restored.messages[0].seq == 2
# ---------------------------------------------------------------------------
# Helpers for aggressive compaction tests
# ---------------------------------------------------------------------------
def _make_tool_call(call_id: str, name: str, args: dict) -> dict:
return {
"id": call_id,
"type": "function",
"function": {"name": name, "arguments": json.dumps(args)},
}
async def _build_tool_heavy_conversation(
store: MockConversationStore | None = None,
) -> NodeConversation:
"""Build a conversation with many tool call pairs.
Layout: user msg, then 5x (assistant with append_data tool_call + tool result),
then 1x (assistant with set_output tool_call + tool result), then user msg + assistant msg.
"""
conv = NodeConversation(store=store)
await conv.add_user_message("Process the data") # seq 0
for i in range(5):
args = {"filename": "output.html", "content": "x" * 500}
tc = [_make_tool_call(f"call_{i}", "append_data", args)]
conv._messages.append(
Message(
seq=conv._next_seq,
role="assistant",
content=f"Appending part {i}",
tool_calls=tc,
)
)
if store:
await store.write_part(conv._next_seq, conv._messages[-1].to_storage_dict())
conv._next_seq += 1
conv._messages.append(
Message(
seq=conv._next_seq,
role="tool",
content='{"success": true}',
tool_use_id=f"call_{i}",
)
)
if store:
await store.write_part(conv._next_seq, conv._messages[-1].to_storage_dict())
conv._next_seq += 1
# set_output call — must be protected
so_tc = [_make_tool_call("call_so", "set_output", {"key": "result", "value": "done"})]
conv._messages.append(
Message(seq=conv._next_seq, role="assistant", content="Setting output", tool_calls=so_tc)
)
if store:
await store.write_part(conv._next_seq, conv._messages[-1].to_storage_dict())
conv._next_seq += 1
conv._messages.append(
Message(
seq=conv._next_seq,
role="tool",
content="Output 'result' set successfully.",
tool_use_id="call_so",
)
)
if store:
await store.write_part(conv._next_seq, conv._messages[-1].to_storage_dict())
conv._next_seq += 1
# Recent messages
await conv.add_user_message("Continue")
await conv.add_assistant_message("Working on it")
return conv
# ---------------------------------------------------------------------------
# Tests: aggressive structural compaction
# ---------------------------------------------------------------------------
class TestAggressiveStructuralCompaction:
@pytest.mark.asyncio
async def test_aggressive_collapses_tool_pairs(self, tmp_path):
"""Aggressive mode should collapse non-essential tool pairs into a summary."""
conv = await _build_tool_heavy_conversation()
spill = str(tmp_path)
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=True,
)
# The 5 append_data pairs (10 msgs) + 1 user msg should be collapsed.
# Remaining: ref_msg + set_output pair (2 msgs) + 2 recent = 5
assert conv.message_count == 5
assert conv.messages[0].role == "user" # ref message
assert "TOOLS ALREADY CALLED" in conv.messages[0].content
assert "append_data (5x)" in conv.messages[0].content
# set_output pair should be preserved
assert conv.messages[1].role == "assistant"
assert conv.messages[1].tool_calls is not None
assert conv.messages[1].tool_calls[0]["function"]["name"] == "set_output"
assert conv.messages[2].role == "tool"
# Recent messages intact
assert conv.messages[3].content == "Continue"
assert conv.messages[4].content == "Working on it"
@pytest.mark.asyncio
async def test_aggressive_preserves_set_output(self, tmp_path):
"""set_output tool calls are always protected in aggressive mode."""
conv = await _build_tool_heavy_conversation()
spill = str(tmp_path)
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=True,
)
# Find all tool calls in remaining messages
tool_names = []
for msg in conv.messages:
if msg.tool_calls:
for tc in msg.tool_calls:
tool_names.append(tc["function"]["name"])
assert "set_output" in tool_names
# append_data should NOT be in remaining messages (collapsed)
assert "append_data" not in tool_names
@pytest.mark.asyncio
async def test_aggressive_preserves_errors(self, tmp_path):
"""Error tool results are always protected in aggressive mode."""
conv = NodeConversation()
await conv.add_user_message("Start")
# Regular tool call
tc1 = [_make_tool_call("call_ok", "web_search", {"query": "test"})]
conv._messages.append(
Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc1)
)
conv._next_seq += 1
conv._messages.append(
Message(seq=conv._next_seq, role="tool", content="results", tool_use_id="call_ok")
)
conv._next_seq += 1
# Error tool call
tc2 = [_make_tool_call("call_err", "web_scrape", {"url": "http://broken.com"})]
conv._messages.append(
Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc2)
)
conv._next_seq += 1
conv._messages.append(
Message(
seq=conv._next_seq,
role="tool",
content="Connection timeout",
tool_use_id="call_err",
is_error=True,
)
)
conv._next_seq += 1
await conv.add_user_message("Next")
await conv.add_assistant_message("OK")
spill = str(tmp_path)
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=True,
)
# Error pair should be preserved
error_msgs = [m for m in conv.messages if m.role == "tool" and m.is_error]
assert len(error_msgs) == 1
assert error_msgs[0].content == "Connection timeout"
@pytest.mark.asyncio
async def test_standard_mode_keeps_all_tool_pairs(self, tmp_path):
"""Non-aggressive mode should keep all tool pairs (existing behavior)."""
conv = await _build_tool_heavy_conversation()
spill = str(tmp_path)
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=False,
)
# All 6 tool pairs (12 msgs) should be kept as structural.
# Removed: 1 user msg (freeform). Remaining: ref + 12 structural + 2 recent = 15
assert conv.message_count == 15
@pytest.mark.asyncio
async def test_two_pass_sequence(self, tmp_path):
"""Standard pass then aggressive pass produces valid result."""
conv = await _build_tool_heavy_conversation()
spill = str(tmp_path)
# Pass 1: standard
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
)
after_standard = conv.message_count
assert after_standard == 15 # all structural kept
# Pass 2: aggressive
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=True,
)
after_aggressive = conv.message_count
assert after_aggressive < after_standard
# ref + set_output pair + 2 recent = 5
assert after_aggressive == 5
@pytest.mark.asyncio
async def test_aggressive_persists_correctly(self, tmp_path):
"""Aggressive compaction correctly updates the store."""
store = MockConversationStore()
conv = await _build_tool_heavy_conversation(store=store)
spill = str(tmp_path)
await conv.compact_preserving_structure(
spillover_dir=spill,
keep_recent=2,
aggressive=True,
)
# Verify store state matches in-memory state
parts = await store.read_parts()
assert len(parts) == conv.message_count
class TestExtractToolCallHistory:
def test_basic_extraction(self):
msgs = [
Message(
seq=0,
role="assistant",
content="",
tool_calls=[
_make_tool_call("c1", "web_search", {"query": "python async"}),
],
),
Message(seq=1, role="tool", content="results", tool_use_id="c1"),
Message(
seq=2,
role="assistant",
content="",
tool_calls=[
_make_tool_call(
"c2", "save_data", {"filename": "output.txt", "content": "data"}
),
],
),
Message(seq=3, role="tool", content="saved", tool_use_id="c2"),
]
result = extract_tool_call_history(msgs)
assert "web_search (1x)" in result
assert "save_data (1x)" in result
assert "FILES SAVED: output.txt" in result
def test_errors_included(self):
msgs = [
Message(
seq=0,
role="tool",
content="Connection refused",
is_error=True,
tool_use_id="c1",
),
]
result = extract_tool_call_history(msgs)
assert "ERRORS" in result
assert "Connection refused" in result
def test_empty_messages(self):
assert extract_tool_call_history([]) == ""
# ---------------------------------------------------------------------------
# Tests for _is_context_too_large_error
# ---------------------------------------------------------------------------
class TestIsContextTooLargeError:
def test_context_window_class_name(self):
from framework.graph.event_loop_node import _is_context_too_large_error
class ContextWindowExceededError(Exception):
pass
assert _is_context_too_large_error(ContextWindowExceededError("x"))
def test_openai_context_length(self):
from framework.graph.event_loop_node import _is_context_too_large_error
err = RuntimeError("This model's maximum context length is 128000 tokens")
assert _is_context_too_large_error(err)
def test_anthropic_too_long(self):
from framework.graph.event_loop_node import _is_context_too_large_error
err = RuntimeError("prompt is too long: 150000 tokens > 100000")
assert _is_context_too_large_error(err)
def test_generic_exceeds_limit(self):
from framework.graph.event_loop_node import _is_context_too_large_error
err = ValueError("Request exceeds token limit")
assert _is_context_too_large_error(err)
def test_unrelated_error(self):
from framework.graph.event_loop_node import _is_context_too_large_error
assert not _is_context_too_large_error(ValueError("connection refused"))
assert not _is_context_too_large_error(RuntimeError("timeout"))
# ---------------------------------------------------------------------------
# Tests for _format_messages_for_summary
# ---------------------------------------------------------------------------
class TestFormatMessagesForSummary:
def test_user_assistant_messages(self):
from framework.graph.event_loop_node import EventLoopNode
msgs = [
Message(seq=0, role="user", content="Hello world"),
Message(seq=1, role="assistant", content="Hi there"),
]
result = EventLoopNode._format_messages_for_summary(msgs)
assert "[user]: Hello world" in result
assert "[assistant]: Hi there" in result
def test_tool_result_truncated(self):
from framework.graph.event_loop_node import EventLoopNode
msgs = [
Message(seq=0, role="tool", content="x" * 1000, tool_use_id="c1"),
]
result = EventLoopNode._format_messages_for_summary(msgs)
assert "[tool result]:" in result
assert "..." in result
# Should be truncated to 500 + "..."
assert len(result) < 600
def test_assistant_with_tool_calls(self):
from framework.graph.event_loop_node import EventLoopNode
tc = [_make_tool_call("c1", "web_search", {"query": "test"})]
msgs = [
Message(seq=0, role="assistant", content="Searching", tool_calls=tc),
]
result = EventLoopNode._format_messages_for_summary(msgs)
assert "web_search" in result
assert "[assistant (calls:" in result
# ---------------------------------------------------------------------------
# Tests for _llm_compact (recursive binary-search)
# ---------------------------------------------------------------------------
class TestLlmCompact:
"""Test the recursive LLM compaction with mock LLM."""
def _make_node(self):
"""Create a minimal EventLoopNode for testing."""
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
config = LoopConfig(max_history_tokens=32000)
node = EventLoopNode.__new__(EventLoopNode)
node._config = config
node._event_bus = None
node._judge = None
node._approval_callback = None
node._tool_executor = None
node._adaptive_learner = None
# Set class-level constants (already on class, but explicit)
return node
def _make_ctx(self, llm_responses=None, llm_error=None):
"""Create a mock NodeContext with controllable LLM."""
from unittest.mock import AsyncMock, MagicMock
from framework.graph.node import NodeSpec
spec = NodeSpec(
id="test",
name="Test Node",
description="A test node",
node_type="event_loop",
input_keys=[],
output_keys=["result"],
)
ctx = MagicMock()
ctx.node_spec = spec
ctx.node_id = "test"
ctx.stream_id = "test"
ctx.continuous_mode = False
ctx.runtime_logger = None
mock_llm = AsyncMock()
if llm_error:
mock_llm.acomplete.side_effect = llm_error
elif llm_responses:
responses = []
for text in llm_responses:
resp = MagicMock()
resp.content = text
responses.append(resp)
mock_llm.acomplete.side_effect = responses
else:
resp = MagicMock()
resp.content = "Summary of conversation."
mock_llm.acomplete.return_value = resp
ctx.llm = mock_llm
return ctx
@pytest.mark.asyncio
async def test_single_call_success(self):
node = self._make_node()
ctx = self._make_ctx()
msgs = [
Message(seq=0, role="user", content="Do something"),
Message(seq=1, role="assistant", content="Done"),
]
result = await node._llm_compact(ctx, msgs, None)
assert "Summary of conversation." in result
ctx.llm.acomplete.assert_called_once()
@pytest.mark.asyncio
async def test_context_too_large_triggers_split(self):
"""When LLM raises context error, should split and retry."""
from unittest.mock import MagicMock
node = self._make_node()
call_count = 0
async def mock_acomplete(**kwargs):
nonlocal call_count
call_count += 1
# First call with full messages → fail
# Subsequent calls with smaller chunks → succeed
if call_count == 1:
raise RuntimeError("This model's maximum context length is 128000 tokens")
resp = MagicMock()
resp.content = f"Summary part {call_count}"
return resp
ctx = self._make_ctx()
ctx.llm.acomplete = mock_acomplete
msgs = [Message(seq=i, role="user", content=f"Message {i}") for i in range(10)]
result = await node._llm_compact(ctx, msgs, None)
# Should have split and produced two summaries
assert "Summary part" in result
assert call_count >= 3 # 1 failure + 2 successful halves
@pytest.mark.asyncio
async def test_non_context_error_propagates(self):
"""Non-context errors should propagate, not trigger splitting."""
node = self._make_node()
ctx = self._make_ctx(llm_error=ValueError("API key invalid"))
msgs = [
Message(seq=0, role="user", content="Hello"),
Message(seq=1, role="assistant", content="Hi"),
]
with pytest.raises(ValueError, match="API key invalid"):
await node._llm_compact(ctx, msgs, None)
@pytest.mark.asyncio
async def test_proactive_split_for_large_input(self):
"""Messages exceeding char limit should be split proactively."""
node = self._make_node()
# Lower the limit for testing
node._LLM_COMPACT_CHAR_LIMIT = 100
ctx = self._make_ctx(
llm_responses=["Part 1 summary", "Part 2 summary"],
)
msgs = [
Message(seq=0, role="user", content="x" * 80),
Message(seq=1, role="user", content="y" * 80),
]
result = await node._llm_compact(ctx, msgs, None)
assert "Part 1 summary" in result
assert "Part 2 summary" in result
# LLM should have been called twice (no failure, proactive split)
assert ctx.llm.acomplete.call_count == 2
@pytest.mark.asyncio
async def test_tool_history_appended_at_top_level(self):
"""Tool history should only be appended at depth 0."""
node = self._make_node()
ctx = self._make_ctx()
tc = [_make_tool_call("c1", "web_search", {"query": "test"})]
msgs = [
Message(seq=0, role="assistant", content="", tool_calls=tc),
Message(seq=1, role="tool", content="results", tool_use_id="c1"),
]
result = await node._llm_compact(ctx, msgs, None)
assert "TOOLS ALREADY CALLED" in result
assert "web_search" in result
# ---------------------------------------------------------------------------
# Orphaned tool result repair
# ---------------------------------------------------------------------------
class TestRepairOrphanedToolCalls:
"""Test _repair_orphaned_tool_calls handles both directions."""
def test_orphaned_tool_result_dropped(self):
"""Tool result with no matching tool_use should be dropped."""
msgs = [
# tool result with no preceding assistant tool_use
{"role": "tool", "tool_call_id": "orphan_1", "content": "stale result"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
]
repaired = NodeConversation._repair_orphaned_tool_calls(msgs)
assert len(repaired) == 2
assert repaired[0]["role"] == "user"
assert repaired[1]["role"] == "assistant"
def test_valid_tool_pair_preserved(self):
"""Tool result with matching tool_use should be kept."""
msgs = [
{"role": "user", "content": "search"},
{
"role": "assistant",
"content": "",
"tool_calls": [{"id": "tc_1", "function": {"name": "search", "arguments": "{}"}}],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "results"},
]
repaired = NodeConversation._repair_orphaned_tool_calls(msgs)
assert len(repaired) == 3
assert repaired[2]["tool_call_id"] == "tc_1"
def test_orphaned_tool_use_gets_stub(self):
"""Tool use with no following tool result gets a synthetic error stub."""
msgs = [
{"role": "user", "content": "search"},
{
"role": "assistant",
"content": "",
"tool_calls": [{"id": "tc_1", "function": {"name": "search", "arguments": "{}"}}],
},
# No tool result follows
{"role": "user", "content": "what happened?"},
]
repaired = NodeConversation._repair_orphaned_tool_calls(msgs)
# Should insert a synthetic tool result between assistant and user
assert len(repaired) == 4
assert repaired[2]["role"] == "tool"
assert repaired[2]["tool_call_id"] == "tc_1"
assert "interrupted" in repaired[2]["content"].lower()
def test_mixed_orphans(self):
"""Both orphaned results and orphaned calls handled together."""
msgs = [
# Orphaned result (no matching tool_use)
{"role": "tool", "tool_call_id": "gone_1", "content": "old result"},
{"role": "user", "content": "try again"},
{
"role": "assistant",
"content": "",
"tool_calls": [{"id": "tc_2", "function": {"name": "fetch", "arguments": "{}"}}],
},
# Missing result for tc_2
{"role": "user", "content": "done?"},
]
repaired = NodeConversation._repair_orphaned_tool_calls(msgs)
# orphaned result dropped, stub added for tc_2
roles = [m["role"] for m in repaired]
assert roles == ["user", "assistant", "tool", "user"]
assert repaired[2]["tool_call_id"] == "tc_2"
@@ -8,7 +8,7 @@ from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata from .config import default_config, metadata
@@ -90,7 +90,7 @@ edges = [
source="confirm-draft", source="confirm-draft",
target="intake", target="intake",
condition=EdgeCondition.CONDITIONAL, condition=EdgeCondition.CONDITIONAL,
condition_expr="batch_complete == True and send_started == True and send_count >= 1 and sent_message_ids is not None and len(sent_message_ids) >= 1", condition_expr="batch_complete == True",
priority=1, priority=1,
), ),
] ]
@@ -251,9 +251,7 @@ class EmailReplyAgent:
errors.append(f"Terminal node '{t}' not found") errors.append(f"Terminal node '{t}' not found")
for ep_id, nid in self.entry_points.items(): for ep_id, nid in self.entry_points.items():
if nid not in node_ids: if nid not in node_ids:
errors.append( errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
f"Entry point '{ep_id}' references unknown node '{nid}'"
)
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings} return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
@@ -36,7 +36,9 @@ default_config = RuntimeConfig()
class AgentMetadata: class AgentMetadata:
name: str = "Email Reply Agent" name: str = "Email Reply Agent"
version: str = "1.0.0" version: str = "1.0.0"
description: str = "Filter unreplied emails, confirm recipients, send personalized replies." description: str = (
"Filter unreplied emails, confirm recipients, send personalized replies."
)
intro_message: str = "Tell me which emails you want to reply to (e.g., 'emails from @company.com in the last week')." intro_message: str = "Tell me which emails you want to reply to (e.g., 'emails from @company.com in the last week')."
@@ -83,8 +83,8 @@ confirm_draft_node = NodeSpec(
client_facing=True, client_facing=True,
max_node_visits=0, max_node_visits=0,
input_keys=["email_list", "filter_criteria"], input_keys=["email_list", "filter_criteria"],
output_keys=["batch_complete", "restart", "send_started", "send_count", "sent_message_ids", "send_failures"], output_keys=["batch_complete", "restart"],
nullable_output_keys=["batch_complete", "restart", "send_started", "send_count", "sent_message_ids", "send_failures"], nullable_output_keys=["batch_complete", "restart"],
success_criteria="User confirmed recipients and personalized replies sent for each.", success_criteria="User confirmed recipients and personalized replies sent for each.",
system_prompt="""\ system_prompt="""\
You are a Gmail reply assistant. Present emails for confirmation, then send personalized replies. You are a Gmail reply assistant. Present emails for confirmation, then send personalized replies.
@@ -99,22 +99,14 @@ You are a Gmail reply assistant. Present emails for confirmation, then send pers
**STEP 2 Handle user response:** **STEP 2 Handle user response:**
If user CONFIRMS (says yes, go ahead, sounds good, etc.): If user CONFIRMS (says yes, go ahead, sounds good, etc.):
1. Immediately call set_output("send_started", True) before any send tools. For EACH email in email_list:
2. For EACH email in email_list, call gmail_reply_email with: 1. Read the subject and snippet
2. Use tone_guidance from filter_criteria + any user-specified preferences
3. Call gmail_reply_email with:
- message_id: the email's message_id - message_id: the email's message_id
- html: personalized 2-4 sentence reply based on email context, using tone_guidance from filter_criteria and any new user preferences. - html: personalized 2-4 sentence reply based on email context
3. Track send results during this run: (The tool automatically handles recipient, subject, and threading)
- send_count: number of successful gmail_reply_email calls 4. After all replies sent, call: set_output("batch_complete", True)
- sent_message_ids: list of message_ids successfully replied to
- send_failures: list of {"message_id": "...", "error": "..."} for failed sends
4. REQUIRED completion gate:
- You MUST NOT set batch_complete=True unless send_started is True AND send_count >= 1 AND sent_message_ids is non-empty.
- If no sends succeeded, do NOT set batch_complete=True. Instead explain what failed and ask user whether to retry or restart.
5. After successful sends, call set_output in a separate turn:
- set_output("send_count", <int>)
- set_output("sent_message_ids", <list>)
- set_output("send_failures", <list>)
- set_output("batch_complete", True)
If user wants to CHANGE LOGIC/FILTER (says change filter, different criteria, not these emails, wrong emails, etc.): If user wants to CHANGE LOGIC/FILTER (says change filter, different criteria, not these emails, wrong emails, etc.):
1. Acknowledge their request 1. Acknowledge their request
@@ -1,7 +1,5 @@
"""Structural tests for Email Reply Agent.""" """Structural tests for Email Reply Agent."""
import pytest
class TestAgentStructure: class TestAgentStructure:
"""Test agent graph structure.""" """Test agent graph structure."""
+45 -113
View File
@@ -247,121 +247,34 @@ def undo_changes(path: str = "") -> str:
@mcp.tool() @mcp.tool()
def discover_mcp_tools(server_config_path: str = "") -> str: def list_agent_tools(
"""Discover available MCP tools by connecting to servers defined in a config file. server_config_path: str = "",
output_schema: str = "simple",
group: str = "all",
) -> str:
"""Discover tools available for agent building, grouped by category.
Connects to each MCP server, lists all tools with full schemas, then Connects to each MCP server, lists tools, then disconnects. Use this
disconnects. Use this to see what tools are available before designing BEFORE designing an agent to know exactly which tools exist. Only use
an agent never rely on static documentation. tools from this list in node definitions never guess or fabricate.
Args:
server_config_path: Path to mcp_servers.json (relative to project root).
Default: the hive-tools server config at tools/mcp_servers.json.
Can also point to any agent's mcp_servers.json.
Returns:
JSON listing of all tools with names, descriptions, and input schemas
"""
# Resolve config path
if not server_config_path:
# Default: look for the main hive-tools mcp_servers.json
candidates = [
os.path.join(PROJECT_ROOT, "tools", "mcp_servers.json"),
os.path.join(PROJECT_ROOT, "mcp_servers.json"),
]
config_path = None
for c in candidates:
if os.path.isfile(c):
config_path = c
break
if not config_path:
return "Error: No mcp_servers.json found. Provide server_config_path."
else:
config_path = _resolve_path(server_config_path)
if not os.path.isfile(config_path):
return f"Error: Config file not found: {server_config_path}"
try:
with open(config_path, encoding="utf-8") as f:
servers_config = json.load(f)
except (json.JSONDecodeError, OSError) as e:
return f"Error reading config: {e}"
# Import MCPClient (deferred — needs PYTHONPATH to include core/)
try:
from framework.runner.mcp_client import MCPClient, MCPServerConfig
except ImportError:
return "Error: Cannot import MCPClient. Ensure PYTHONPATH includes the core/ directory."
all_tools = []
errors = []
config_dir = os.path.dirname(config_path)
for server_name, server_conf in servers_config.items():
# Resolve cwd relative to config file location
cwd = server_conf.get("cwd", "")
if cwd and not os.path.isabs(cwd):
cwd = os.path.abspath(os.path.join(config_dir, cwd))
try:
config = MCPServerConfig(
name=server_name,
transport=server_conf.get("transport", "stdio"),
command=server_conf.get("command"),
args=server_conf.get("args", []),
env=server_conf.get("env", {}),
cwd=cwd or None,
url=server_conf.get("url"),
headers=server_conf.get("headers", {}),
)
client = MCPClient(config)
client.connect()
tools = client.list_tools()
for tool in tools:
all_tools.append(
{
"server": server_name,
"name": tool.name,
"description": tool.description,
"input_schema": tool.input_schema,
}
)
client.disconnect()
except Exception as e:
errors.append({"server": server_name, "error": str(e)})
result = {
"tools": all_tools,
"total": len(all_tools),
"servers_queried": len(servers_config),
}
if errors:
result["errors"] = errors
return json.dumps(result, indent=2, default=str)
# ── Meta-agent: Agent tool catalog ────────────────────────────────────────
@mcp.tool()
def list_agent_tools(server_config_path: str = "") -> str:
"""List all tools available for agent building from the hive-tools MCP server.
Returns tool names grouped by category. Use this BEFORE designing an agent
to know exactly which tools exist. Only use tools from this list in node
definitions never guess or fabricate tool names.
Args: Args:
server_config_path: Path to mcp_servers.json. Default: tools/mcp_servers.json server_config_path: Path to mcp_servers.json. Default: tools/mcp_servers.json
(the standard hive-tools server). Can also point to an agent's config (the standard hive-tools server). Can also point to an agent's config
to see what tools that specific agent has access to. to see what tools that specific agent has access to.
output_schema: "simple" (default) returns name and description per tool.
"full" also includes server and input_schema.
group: "all" (default) returns every category. A prefix like "gmail"
returns only that group's tools.
Returns: Returns:
JSON with tool names grouped by prefix (e.g. gmail_*, slack_*, etc.) JSON with tools grouped by prefix (e.g. gmail_*, slack_*).
""" """
if output_schema not in ("simple", "full"):
return json.dumps(
{"error": f"Invalid output_schema: {output_schema!r}. Use 'simple' or 'full'."}
)
# Resolve config path # Resolve config path
if not server_config_path: if not server_config_path:
candidates = [ candidates = [
@@ -413,27 +326,46 @@ def list_agent_tools(server_config_path: str = "") -> str:
client = MCPClient(config) client = MCPClient(config)
client.connect() client.connect()
for tool in client.list_tools(): for tool in client.list_tools():
all_tools.append({"name": tool.name, "description": tool.description}) all_tools.append(
{
"server": server_name,
"name": tool.name,
"description": tool.description,
"input_schema": tool.input_schema,
}
)
client.disconnect() client.disconnect()
except Exception as e: except Exception as e:
errors.append({"server": server_name, "error": str(e)}) errors.append({"server": server_name, "error": str(e)})
# Group by prefix (e.g., gmail_, slack_, stripe_) # Group by prefix (e.g., gmail_, slack_, stripe_)
groups: dict[str, list[str]] = {} groups: dict[str, list[dict]] = {}
for t in sorted(all_tools, key=lambda x: x["name"]): for t in sorted(all_tools, key=lambda x: x["name"]):
parts = t["name"].split("_", 1) parts = t["name"].split("_", 1)
prefix = parts[0] if len(parts) > 1 else "general" prefix = parts[0] if len(parts) > 1 else "general"
groups.setdefault(prefix, []).append(t["name"]) groups.setdefault(prefix, []).append(t)
# Filter to a specific group
if group != "all":
groups = {group: groups[group]} if group in groups else {}
# Apply output schema
if output_schema == "simple":
groups = {
prefix: [{"name": t["name"], "description": t["description"]} for t in tools]
for prefix, tools in groups.items()
}
all_names = sorted(t["name"] for tools in groups.values() for t in tools)
result: dict = { result: dict = {
"total": len(all_tools), "total": len(all_names),
"tools_by_category": groups, "tools_by_category": groups,
"all_tool_names": sorted(t["name"] for t in all_tools), "all_tool_names": all_names,
} }
if errors: if errors:
result["errors"] = errors result["errors"] = errors
return json.dumps(result, indent=2) return json.dumps(result, indent=2, default=str)
# ── Meta-agent: Agent tool validation ───────────────────────────────────── # ── Meta-agent: Agent tool validation ─────────────────────────────────────
@@ -564,7 +496,7 @@ def validate_agent_tools(agent_path: str) -> str:
result["missing_tools"] = missing_by_node result["missing_tools"] = missing_by_node
result["message"] = ( result["message"] = (
f"FAIL: {sum(len(v) for v in missing_by_node.values())} tool(s) declared " f"FAIL: {sum(len(v) for v in missing_by_node.values())} tool(s) declared "
f"in nodes do not exist. Run discover_mcp_tools() to see available tools " f"in nodes do not exist. Run list_agent_tools() to see available tools "
f"and fix the node definitions." f"and fix the node definitions."
) )
else: else: