Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 22df99ef51 | |||
| edc3135797 | |||
| 27b15789fb | |||
| 5ba5933edc | |||
| 50eb4b0e8f | |||
| 3e4a4c9924 | |||
| c47987e73c | |||
| 8f5daf0569 | |||
| af5c72e785 | |||
| 5cdc01cb8c | |||
| 2f58cce781 | |||
| 9dc214cfd2 |
@@ -87,7 +87,7 @@ from framework.agent_loop.internals.types import (
|
||||
)
|
||||
from framework.agent_loop.types import AgentContext, AgentProtocol, AgentResult
|
||||
from framework.host.event_bus import EventBus
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
from framework.llm.capabilities import filter_tools_for_model, supports_image_tool_results
|
||||
from framework.llm.provider import Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
@@ -632,13 +632,20 @@ class AgentLoop(AgentProtocol):
|
||||
if isinstance(stream_id, str) and stream_id.startswith("worker:"):
|
||||
tools.append(build_report_to_parent_tool())
|
||||
|
||||
# Hide image-producing tools from text-only models so they never try
|
||||
# to call them. Avoids wasted turns + "screenshot failed" lessons
|
||||
# getting saved to memory. See framework.llm.capabilities.
|
||||
_llm_model = ctx.llm.model if ctx.llm else ""
|
||||
tools, _hidden_image_tools = filter_tools_for_model(tools, _llm_model)
|
||||
|
||||
logger.info(
|
||||
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
|
||||
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s | hidden_image_tools=%s",
|
||||
node_id,
|
||||
len(tools),
|
||||
[t.name for t in tools],
|
||||
ctx.supports_direct_user_io,
|
||||
type(self._judge).__name__ if self._judge else "None",
|
||||
_hidden_image_tools,
|
||||
)
|
||||
|
||||
# 4. Publish loop started
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Node definitions for Queen agent."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from framework.orchestrator import NodeSpec
|
||||
@@ -32,6 +33,29 @@ def _build_appendices() -> str:
|
||||
return parts
|
||||
|
||||
|
||||
# Wraps prompt sections that should only be shown to vision-capable models.
|
||||
# Content inside `<!-- vision-only -->...<!-- /vision-only -->` is kept for
|
||||
# vision models and stripped for text-only models. Applied once per session
|
||||
# in queen_orchestrator.create_queen.
|
||||
_VISION_ONLY_BLOCK_RE = re.compile(
|
||||
r"<!-- vision-only -->(.*?)<!-- /vision-only -->",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def finalize_queen_prompt(text: str, has_vision: bool) -> str:
|
||||
"""Resolve `<!-- vision-only -->` blocks based on model capability.
|
||||
|
||||
For vision-capable models the markers are stripped and the inner
|
||||
content is kept. For text-only models the whole block (markers +
|
||||
content) is removed so the queen is never nudged toward tools it
|
||||
cannot usefully invoke.
|
||||
"""
|
||||
if has_vision:
|
||||
return _VISION_ONLY_BLOCK_RE.sub(r"\1", text)
|
||||
return _VISION_ONLY_BLOCK_RE.sub("", text)
|
||||
|
||||
|
||||
# Shared appendices — appended to every coding node's system prompt.
|
||||
_appendices = _build_appendices()
|
||||
|
||||
@@ -504,7 +528,7 @@ The queen writes final production-ready system prompts directly.
|
||||
|
||||
MCP servers are loaded from the global registry by name. Available servers:
|
||||
- `hive_tools` — web search, email, CRM, calendar, 100+ integrations
|
||||
- `gcu-tools` — browser automation (click, type, navigate, screenshot)
|
||||
- `gcu-tools` — browser automation (click, type, navigate<!-- vision-only -->, screenshot<!-- /vision-only -->)
|
||||
- `files-tools` — file I/O (read, write, edit, search, list)
|
||||
|
||||
**Template variables:** Add a `variables:` section at the top of agent.json \
|
||||
@@ -862,7 +886,7 @@ search_files, run_command, undo_changes
|
||||
|
||||
## Browser Automation (gcu-tools MCP)
|
||||
All browser tools are prefixed with `browser_` (browser_start, browser_navigate, \
|
||||
browser_click, browser_fill, browser_snapshot, browser_screenshot, browser_scroll, \
|
||||
browser_click, browser_fill, browser_snapshot, <!-- vision-only -->browser_screenshot, <!-- /vision-only -->browser_scroll, \
|
||||
browser_tabs, browser_close, browser_evaluate, etc.).
|
||||
Follow the browser-automation skill protocol — activate it before using browser tools.
|
||||
|
||||
|
||||
@@ -21,7 +21,9 @@ All tools are prefixed with `browser_`:
|
||||
- `browser_click`, `browser_click_coordinate`, `browser_fill`, `browser_type` — interact
|
||||
- `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts
|
||||
- `browser_snapshot` — compact accessibility-tree read (structured)
|
||||
<!-- vision-only -->
|
||||
- `browser_screenshot` — visual capture (annotated PNG)
|
||||
<!-- /vision-only -->
|
||||
- `browser_shadow_query`, `browser_get_rect` — locate elements (shadow-piercing via `>>>`)
|
||||
- `browser_coords` — convert image pixels to CSS pixels (always use `css_x/y`, never `physical_x/y`)
|
||||
- `browser_scroll`, `browser_wait` — navigation helpers
|
||||
|
||||
@@ -12,6 +12,11 @@ Vision support rules are derived from official vendor documentation:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.llm.provider import Tool
|
||||
|
||||
|
||||
def _model_name(model: str) -> str:
|
||||
"""Return the bare model name after stripping any 'provider/' prefix."""
|
||||
@@ -104,3 +109,22 @@ def supports_image_tool_results(model: str) -> bool:
|
||||
# 5. Default: assume vision capable
|
||||
# Covers: OpenAI, Anthropic, Google, Mistral, Kimi, and other hosted providers
|
||||
return True
|
||||
|
||||
|
||||
def filter_tools_for_model(tools: list[Tool], model: str) -> tuple[list[Tool], list[str]]:
|
||||
"""Drop image-producing tools for text-only models.
|
||||
|
||||
Returns ``(filtered_tools, hidden_names)``. For vision-capable models
|
||||
(or when *model* is empty) the input list is returned unchanged and
|
||||
``hidden_names`` is empty. For text-only models any tool with
|
||||
``produces_image=True`` is removed so the LLM never sees it in its
|
||||
schema — avoids wasted calls and stale "screenshot failed" entries
|
||||
in agent memory.
|
||||
"""
|
||||
if not model or supports_image_tool_results(model):
|
||||
return list(tools), []
|
||||
hidden = [t.name for t in tools if t.produces_image]
|
||||
if not hidden:
|
||||
return list(tools), []
|
||||
kept = [t for t in tools if not t.produces_image]
|
||||
return kept, hidden
|
||||
|
||||
@@ -27,6 +27,9 @@ class Tool:
|
||||
name: str
|
||||
description: str
|
||||
parameters: dict[str, Any] = field(default_factory=dict)
|
||||
# If True, the tool may return ImageContent in its result. Text-only models
|
||||
# (e.g. glm-5, deepseek-chat) have this hidden from their schema entirely.
|
||||
produces_image: bool = False
|
||||
# If True, this tool performs no filesystem/process/network writes and is
|
||||
# safe to run concurrently with other safe-flagged tools inside the same
|
||||
# assistant turn. Unsafe tools (writes, shell, browser actions) are always
|
||||
|
||||
@@ -7,6 +7,7 @@ import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@@ -18,6 +19,16 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_INPUT_LOG_MAX_LEN = 500
|
||||
|
||||
# Tools whose names match this pattern are assumed to return ImageContent.
|
||||
# Matched against the bare tool name (case-insensitive). Used to mark MCP
|
||||
# tools with produces_image=True so they can be filtered out for text-only
|
||||
# models before the schema is ever shown to the LLM (avoids wasted calls
|
||||
# and "screenshot failed" entries polluting memory).
|
||||
_IMAGE_TOOL_NAME_RE = re.compile(
|
||||
r"(screenshot|screen_capture|capture_image|render_image|get_image|snapshot_image)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Per-execution context overrides. Each asyncio task (and thus each
|
||||
# concurrent graph execution) gets its own copy, so there are no races
|
||||
# when multiple ExecutionStreams run in parallel.
|
||||
@@ -998,6 +1009,7 @@ class ToolRegistry:
|
||||
"properties": properties,
|
||||
"required": required,
|
||||
},
|
||||
produces_image=bool(_IMAGE_TOOL_NAME_RE.search(mcp_tool.name or "")),
|
||||
concurrency_safe=mcp_tool.name in self.CONCURRENCY_SAFE_TOOLS,
|
||||
)
|
||||
|
||||
|
||||
@@ -311,7 +311,9 @@ async def create_queen(
|
||||
_queen_tools_running,
|
||||
_queen_tools_staging,
|
||||
_shared_building_knowledge,
|
||||
finalize_queen_prompt,
|
||||
)
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
from framework.loader.mcp_registry import MCPRegistry
|
||||
from framework.loader.tool_registry import ToolRegistry
|
||||
@@ -489,6 +491,13 @@ async def create_queen(
|
||||
"according to your current phase."
|
||||
)
|
||||
|
||||
# Resolve vision-only prompt sections based on the session's LLM.
|
||||
# session.llm is immutable for the session's lifetime, so this check
|
||||
# is stable — prompts never need to be recomposed mid-session.
|
||||
_has_vision = bool(
|
||||
session.llm and supports_image_tool_results(getattr(session.llm, "model", ""))
|
||||
)
|
||||
|
||||
_planning_body = (
|
||||
_queen_character_core
|
||||
+ _queen_role_planning
|
||||
@@ -500,7 +509,7 @@ async def create_queen(
|
||||
+ _planning_knowledge
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_planning = _planning_body
|
||||
phase_state.prompt_planning = finalize_queen_prompt(_planning_body, _has_vision)
|
||||
|
||||
_building_body = (
|
||||
_queen_character_core
|
||||
@@ -515,40 +524,52 @@ async def create_queen(
|
||||
+ _appendices
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_building = _building_body
|
||||
phase_state.prompt_staging = (
|
||||
_queen_character_core
|
||||
+ _queen_role_staging
|
||||
+ _queen_style
|
||||
+ _queen_tools_staging
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_staging
|
||||
+ worker_identity
|
||||
phase_state.prompt_building = finalize_queen_prompt(_building_body, _has_vision)
|
||||
phase_state.prompt_staging = finalize_queen_prompt(
|
||||
(
|
||||
_queen_character_core
|
||||
+ _queen_role_staging
|
||||
+ _queen_style
|
||||
+ _queen_tools_staging
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_staging
|
||||
+ worker_identity
|
||||
),
|
||||
_has_vision,
|
||||
)
|
||||
phase_state.prompt_running = (
|
||||
_queen_character_core
|
||||
+ _queen_role_running
|
||||
+ _queen_style
|
||||
+ _queen_tools_running
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_running
|
||||
+ worker_identity
|
||||
phase_state.prompt_running = finalize_queen_prompt(
|
||||
(
|
||||
_queen_character_core
|
||||
+ _queen_role_running
|
||||
+ _queen_style
|
||||
+ _queen_tools_running
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_running
|
||||
+ worker_identity
|
||||
),
|
||||
_has_vision,
|
||||
)
|
||||
phase_state.prompt_editing = (
|
||||
_queen_identity_editing
|
||||
+ _queen_style
|
||||
+ _queen_tools_editing
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_editing
|
||||
+ worker_identity
|
||||
phase_state.prompt_editing = finalize_queen_prompt(
|
||||
(
|
||||
_queen_identity_editing
|
||||
+ _queen_style
|
||||
+ _queen_tools_editing
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_editing
|
||||
+ worker_identity
|
||||
),
|
||||
_has_vision,
|
||||
)
|
||||
phase_state.prompt_independent = (
|
||||
_queen_character_core
|
||||
+ _queen_role_independent
|
||||
+ _queen_style
|
||||
+ _queen_tools_independent
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_independent
|
||||
phase_state.prompt_independent = finalize_queen_prompt(
|
||||
(
|
||||
_queen_character_core
|
||||
+ _queen_role_independent
|
||||
+ _queen_style
|
||||
+ _queen_tools_independent
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_independent
|
||||
),
|
||||
_has_vision,
|
||||
)
|
||||
|
||||
# ---- Default skill protocols -------------------------------------
|
||||
|
||||
@@ -284,10 +284,16 @@ def _get_subscription_token(sub_id: str) -> str | None:
|
||||
def _hot_swap_sessions(
|
||||
request: web.Request, full_model: str, api_key: str | None, api_base: str | None
|
||||
) -> int:
|
||||
"""Hot-swap the LLM on all running sessions. Returns count of swapped sessions."""
|
||||
"""Hot-swap the LLM on all running sessions. Returns count of swapped sessions.
|
||||
|
||||
Also refreshes the SessionManager's default model so that subsequent
|
||||
one-shot LLM consumers (e.g. /messages/classify, new session bootstrap)
|
||||
pick up the new provider/model instead of the stale startup override.
|
||||
"""
|
||||
from framework.server.session_manager import SessionManager
|
||||
|
||||
manager: SessionManager = request.app["manager"]
|
||||
manager._model = full_model
|
||||
swapped = 0
|
||||
for session in manager.list_sessions():
|
||||
llm_provider = getattr(session, "llm", None)
|
||||
|
||||
@@ -14,13 +14,37 @@ from framework.skills.skill_errors import SkillErrorCode, log_skill_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BEHAVIORAL_INSTRUCTION = (
|
||||
"The following skills provide specialized instructions for specific tasks.\n"
|
||||
"When a task matches a skill's description, read the SKILL.md at the listed\n"
|
||||
"location to load the full instructions before proceeding.\n"
|
||||
"When a skill references relative paths, resolve them against the skill's\n"
|
||||
"directory (the parent of SKILL.md) and use absolute paths in tool calls."
|
||||
)
|
||||
# Upper bound on the raw `<available_skills>` XML body, in characters.
|
||||
# When the full catalog (with <description> entries) exceeds this, we fall
|
||||
# back to the compact variant that drops descriptions but keeps every skill
|
||||
# visible. Preserving awareness of every skill beats truncating entries.
|
||||
_COMPACT_THRESHOLD_CHARS = 5000
|
||||
|
||||
_MANDATORY_HEADER_FULL = """## Skills (mandatory)
|
||||
Before replying: scan <available_skills> <description> entries.
|
||||
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
|
||||
- If multiple could apply: choose the most specific one, then read/follow it.
|
||||
- If none clearly apply: do not read any SKILL.md.
|
||||
Constraints: never read more than one skill up front; only read after selecting.
|
||||
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
|
||||
|
||||
|
||||
The following skills provide specialized instructions for specific tasks.
|
||||
Use `read_file` to load a skill's SKILL.md when the task matches its description.
|
||||
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
|
||||
|
||||
_MANDATORY_HEADER_COMPACT = """## Skills (mandatory)
|
||||
Before replying: scan <available_skills> <name> entries.
|
||||
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
|
||||
- If multiple could apply: choose the most specific one, then read/follow it.
|
||||
- If none clearly apply: do not read any SKILL.md.
|
||||
Constraints: never read more than one skill up front; only read after selecting.
|
||||
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
|
||||
|
||||
|
||||
The following skills provide specialized instructions for specific tasks.
|
||||
Use `read_file` to load a skill's SKILL.md when the task matches its name.
|
||||
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
|
||||
|
||||
|
||||
class SkillCatalog:
|
||||
@@ -61,27 +85,42 @@ class SkillCatalog:
|
||||
def to_prompt(self) -> str:
|
||||
"""Generate the catalog prompt for system prompt injection.
|
||||
|
||||
Returns empty string if no community/user skills are discovered
|
||||
(default skills are handled separately by DefaultSkillManager).
|
||||
"""
|
||||
# All skills go through the catalog for progressive disclosure.
|
||||
all_skills = list(self._skills.values())
|
||||
Returns empty string when no skills are present. Otherwise returns
|
||||
a mandatory pre-reply checklist + decision rules + rate-limit note,
|
||||
followed by the <available_skills> XML body.
|
||||
|
||||
When the full XML body exceeds ``_COMPACT_THRESHOLD_CHARS``, the
|
||||
compact variant is emitted instead: <description> elements are
|
||||
dropped so every skill stays visible before any gets truncated.
|
||||
"""
|
||||
all_skills = sorted(self._skills.values(), key=lambda s: s.name)
|
||||
if not all_skills:
|
||||
return ""
|
||||
|
||||
full_xml = self._render_xml(all_skills, compact=False)
|
||||
if len(full_xml) <= _COMPACT_THRESHOLD_CHARS:
|
||||
return f"{_MANDATORY_HEADER_FULL}\n\n{full_xml}"
|
||||
|
||||
compact_xml = self._render_xml(all_skills, compact=True)
|
||||
return f"{_MANDATORY_HEADER_COMPACT}\n\n{compact_xml}"
|
||||
|
||||
@staticmethod
|
||||
def _render_xml(skills: list[ParsedSkill], *, compact: bool) -> str:
|
||||
"""Render the `<available_skills>` block.
|
||||
|
||||
``compact=True`` drops `<description>` to preserve skill awareness
|
||||
when the catalog would otherwise blow the char budget.
|
||||
"""
|
||||
lines = ["<available_skills>"]
|
||||
for skill in sorted(all_skills, key=lambda s: s.name):
|
||||
for skill in skills:
|
||||
lines.append(" <skill>")
|
||||
lines.append(f" <name>{escape(skill.name)}</name>")
|
||||
lines.append(f" <description>{escape(skill.description)}</description>")
|
||||
if not compact:
|
||||
lines.append(f" <description>{escape(skill.description)}</description>")
|
||||
lines.append(f" <location>{escape(skill.location)}</location>")
|
||||
lines.append(f" <base_dir>{escape(skill.base_dir)}</base_dir>")
|
||||
lines.append(" </skill>")
|
||||
lines.append("</available_skills>")
|
||||
|
||||
xml_block = "\n".join(lines)
|
||||
return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}"
|
||||
return "\n".join(lines)
|
||||
|
||||
def build_pre_activated_prompt(self, skill_names: list[str]) -> str:
|
||||
"""Build prompt content for pre-activated skills.
|
||||
|
||||
@@ -212,6 +212,211 @@ function ToolActivityRow({ content }: { content: string }) {
|
||||
);
|
||||
}
|
||||
|
||||
// --- Inline ask_user fallback ---------------------------------------------
|
||||
// Sometimes the model prints the ask_user / ask_user_multiple payload as
|
||||
// regular assistant text instead of invoking the tool. We detect that
|
||||
// payload here and render a QuestionWidget / MultiQuestionWidget inline so
|
||||
// the user still gets the nice button UI. Submissions are sent back as a
|
||||
// regular user message via onSend (there is no pending backend state to
|
||||
// fulfill, so we treat it like the user answering in chat).
|
||||
|
||||
type AskUserInlinePayload =
|
||||
| { kind: "single"; question: string; options: string[] }
|
||||
| {
|
||||
kind: "multi";
|
||||
questions: { id: string; prompt: string; options?: string[] }[];
|
||||
};
|
||||
|
||||
function detectAskUserPayload(content: string): AskUserInlinePayload | null {
|
||||
if (!content) return null;
|
||||
let text = content.trim();
|
||||
if (!text) return null;
|
||||
// Strip an optional ```json ... ``` / ``` ... ``` code fence
|
||||
const fence = text.match(/^```(?:json|JSON)?\s*([\s\S]*?)\s*```$/);
|
||||
if (fence) text = fence[1].trim();
|
||||
// Strip surrounding double quotes that fully wrap a JSON object
|
||||
if (text.length >= 2 && text.startsWith('"') && text.endsWith('"')) {
|
||||
const inner = text.slice(1, -1).trim();
|
||||
if (inner.startsWith("{") && inner.endsWith("}")) text = inner;
|
||||
}
|
||||
if (!text.startsWith("{") || !text.endsWith("}")) return null;
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(text);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
if (!parsed || typeof parsed !== "object") return null;
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
|
||||
// ask_user_multiple: { questions: [{ id, prompt, options? }, ...] }
|
||||
if (Array.isArray(obj.questions)) {
|
||||
const raw = obj.questions as unknown[];
|
||||
if (raw.length < 1 || raw.length > 8) return null;
|
||||
const questions: { id: string; prompt: string; options?: string[] }[] = [];
|
||||
for (let i = 0; i < raw.length; i++) {
|
||||
const q = raw[i];
|
||||
if (!q || typeof q !== "object") return null;
|
||||
const qo = q as Record<string, unknown>;
|
||||
const prompt =
|
||||
typeof qo.prompt === "string"
|
||||
? qo.prompt
|
||||
: typeof qo.question === "string"
|
||||
? qo.question
|
||||
: null;
|
||||
if (!prompt) return null;
|
||||
const id = typeof qo.id === "string" && qo.id ? qo.id : `q${i}`;
|
||||
let options: string[] | undefined;
|
||||
if (
|
||||
Array.isArray(qo.options) &&
|
||||
qo.options.every((o) => typeof o === "string")
|
||||
) {
|
||||
options = qo.options as string[];
|
||||
}
|
||||
questions.push({ id, prompt, options });
|
||||
}
|
||||
return { kind: "multi", questions };
|
||||
}
|
||||
|
||||
// ask_user: { question: string, options: string[] }
|
||||
const question = typeof obj.question === "string" ? obj.question : null;
|
||||
const options =
|
||||
Array.isArray(obj.options) &&
|
||||
obj.options.every((o) => typeof o === "string")
|
||||
? (obj.options as string[])
|
||||
: null;
|
||||
if (!question || !options || options.length < 2) return null;
|
||||
return { kind: "single", question, options };
|
||||
}
|
||||
|
||||
function InlineAskUserBubble({
|
||||
msg,
|
||||
payload,
|
||||
activeThread,
|
||||
onSend,
|
||||
queenPhase,
|
||||
showQueenPhaseBadge = true,
|
||||
}: {
|
||||
msg: ChatMessage;
|
||||
payload: AskUserInlinePayload;
|
||||
activeThread: string;
|
||||
onSend: (
|
||||
message: string,
|
||||
thread: string,
|
||||
images?: ImageContent[],
|
||||
) => void;
|
||||
queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
|
||||
showQueenPhaseBadge?: boolean;
|
||||
}) {
|
||||
const [state, setState] = useState<"pending" | "submitted" | "dismissed">(
|
||||
"pending",
|
||||
);
|
||||
|
||||
// Once the user submits an answer via the inline widget, hide the whole
|
||||
// bubble — their reply appears right after as a normal user message.
|
||||
if (state === "submitted") return null;
|
||||
|
||||
// If the user dismissed without answering, fall back to the regular
|
||||
// MarkdownContent rendering so they can still see what the model said.
|
||||
if (state === "dismissed") {
|
||||
return (
|
||||
<MessageBubble
|
||||
msg={msg}
|
||||
queenPhase={queenPhase}
|
||||
showQueenPhaseBadge={showQueenPhaseBadge}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
const isQueen = msg.role === "queen";
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
const thread = msg.thread || activeThread;
|
||||
|
||||
const handleSingle = (answer: string) => {
|
||||
setState("submitted");
|
||||
onSend(answer, thread);
|
||||
};
|
||||
|
||||
const handleMulti = (answers: Record<string, string>) => {
|
||||
setState("submitted");
|
||||
if (payload.kind !== "multi") return;
|
||||
// Format answers as a readable, numbered list for the outgoing message.
|
||||
const lines = payload.questions.map((q, i) => {
|
||||
const a = answers[q.id] ?? "";
|
||||
return `${i + 1}. ${q.prompt}\n ${a}`;
|
||||
});
|
||||
onSend(lines.join("\n"), thread);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
) : (
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color }} />
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={`flex-1 min-w-0 ${isQueen ? "max-w-[85%]" : "max-w-[75%]"}`}
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
style={{ color }}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
{(!isQueen || showQueenPhaseBadge) && (
|
||||
<span
|
||||
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
|
||||
isQueen
|
||||
? "bg-primary/15 text-primary"
|
||||
: "bg-muted text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{isQueen
|
||||
? (msg.phase ?? queenPhase) === "independent"
|
||||
? "independent"
|
||||
: (msg.phase ?? queenPhase) === "running"
|
||||
? "running"
|
||||
: (msg.phase ?? queenPhase) === "staging"
|
||||
? "staging"
|
||||
: (msg.phase ?? queenPhase) === "planning"
|
||||
? "planning"
|
||||
: "building"
|
||||
: "Worker"}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{payload.kind === "single" ? (
|
||||
<QuestionWidget
|
||||
inline
|
||||
question={payload.question}
|
||||
options={payload.options}
|
||||
onSubmit={handleSingle}
|
||||
onDismiss={() => setState("dismissed")}
|
||||
/>
|
||||
) : (
|
||||
<MultiQuestionWidget
|
||||
inline
|
||||
questions={payload.questions}
|
||||
onSubmit={handleMulti}
|
||||
onDismiss={() => setState("dismissed")}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const MessageBubble = memo(
|
||||
function MessageBubble({
|
||||
msg,
|
||||
@@ -596,24 +801,51 @@ export default function ChatPanel({
|
||||
onScroll={handleScroll}
|
||||
className="flex-1 overflow-auto px-5 py-4 space-y-3"
|
||||
>
|
||||
{renderItems.map((item) =>
|
||||
item.kind === "parallel" ? (
|
||||
<div key={item.groupId}>
|
||||
<ParallelSubagentBubble
|
||||
groupId={item.groupId}
|
||||
groups={item.groups}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div key={item.msg.id}>
|
||||
{renderItems.map((item) => {
|
||||
if (item.kind === "parallel") {
|
||||
return (
|
||||
<div key={item.groupId}>
|
||||
<ParallelSubagentBubble
|
||||
groupId={item.groupId}
|
||||
groups={item.groups}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
const msg = item.msg;
|
||||
// Detect misformatted ask_user payloads emitted as plain text and
|
||||
// substitute the nicer widget-based bubble. Only inspect regular
|
||||
// agent messages — skip system rows, tool status, dividers, etc.
|
||||
const askPayload =
|
||||
(msg.role === "queen" || msg.role === "worker") &&
|
||||
!msg.type &&
|
||||
msg.content
|
||||
? detectAskUserPayload(msg.content)
|
||||
: null;
|
||||
if (askPayload) {
|
||||
return (
|
||||
<div key={msg.id}>
|
||||
<InlineAskUserBubble
|
||||
msg={msg}
|
||||
payload={askPayload}
|
||||
activeThread={activeThread}
|
||||
onSend={onSend}
|
||||
queenPhase={queenPhase}
|
||||
showQueenPhaseBadge={showQueenPhaseBadge}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div key={msg.id}>
|
||||
<MessageBubble
|
||||
msg={item.msg}
|
||||
msg={msg}
|
||||
queenPhase={queenPhase}
|
||||
showQueenPhaseBadge={showQueenPhaseBadge}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
)}
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Show typing indicator while waiting for first queen response (disabled + empty chat) */}
|
||||
{(isWaiting || (disabled && threadMessages.length === 0)) && (
|
||||
|
||||
@@ -11,9 +11,15 @@ export interface MultiQuestionWidgetProps {
|
||||
questions: QuestionItem[];
|
||||
onSubmit: (answers: Record<string, string>) => void;
|
||||
onDismiss?: () => void;
|
||||
/**
|
||||
* When true, skip the global Enter-to-submit listener. Use this when rendering
|
||||
* the widget inline alongside other inputs (e.g. the chat textarea) so Enter
|
||||
* isn't hijacked from the surrounding UI.
|
||||
*/
|
||||
inline?: boolean;
|
||||
}
|
||||
|
||||
export default function MultiQuestionWidget({ questions, onSubmit, onDismiss }: MultiQuestionWidgetProps) {
|
||||
export default function MultiQuestionWidget({ questions, onSubmit, onDismiss, inline = false }: MultiQuestionWidgetProps) {
|
||||
// Per-question state: selected index (null = nothing, options.length = "Other")
|
||||
const [selections, setSelections] = useState<(number | null)[]>(
|
||||
() => questions.map(() => null),
|
||||
@@ -50,8 +56,10 @@ export default function MultiQuestionWidget({ questions, onSubmit, onDismiss }:
|
||||
onSubmit(answers);
|
||||
}, [canSubmit, submitted, questions, selections, customTexts, onSubmit]);
|
||||
|
||||
// Enter to submit (only when not focused on a text input)
|
||||
// Enter to submit (only when not focused on a text input).
|
||||
// Skipped in inline mode so the widget doesn't hijack keys from surrounding inputs.
|
||||
useEffect(() => {
|
||||
if (inline) return;
|
||||
const handleKeyDown = (e: KeyboardEvent) => {
|
||||
if (submitted) return;
|
||||
const target = e.target as HTMLElement;
|
||||
@@ -63,7 +71,7 @@ export default function MultiQuestionWidget({ questions, onSubmit, onDismiss }:
|
||||
};
|
||||
window.addEventListener("keydown", handleKeyDown);
|
||||
return () => window.removeEventListener("keydown", handleKeyDown);
|
||||
}, [handleSubmit, submitted]);
|
||||
}, [handleSubmit, submitted, inline]);
|
||||
|
||||
if (submitted) return null;
|
||||
|
||||
|
||||
@@ -10,9 +10,15 @@ export interface QuestionWidgetProps {
|
||||
onSubmit: (answer: string, isOther: boolean) => void;
|
||||
/** Called when user dismisses the question without answering */
|
||||
onDismiss?: () => void;
|
||||
/**
|
||||
* When true, the widget does not register a global keyboard listener. Set this
|
||||
* when rendering the widget inline alongside other inputs (e.g. a chat textarea)
|
||||
* so Enter / number keys do not get hijacked from the surrounding UI.
|
||||
*/
|
||||
inline?: boolean;
|
||||
}
|
||||
|
||||
export default function QuestionWidget({ question, options, onSubmit, onDismiss }: QuestionWidgetProps) {
|
||||
export default function QuestionWidget({ question, options, onSubmit, onDismiss, inline = false }: QuestionWidgetProps) {
|
||||
const [selected, setSelected] = useState<number | null>(null);
|
||||
const [customText, setCustomText] = useState("");
|
||||
const [submitted, setSubmitted] = useState(false);
|
||||
@@ -42,8 +48,10 @@ export default function QuestionWidget({ question, options, onSubmit, onDismiss
|
||||
}
|
||||
}, [canSubmit, submitted, isOtherSelected, customText, options, selected, onSubmit]);
|
||||
|
||||
// Keyboard: Enter to submit, number keys to select (only when text input is not focused)
|
||||
// Keyboard: Enter to submit, number keys to select (only when text input is not focused).
|
||||
// Skipped in inline mode so the widget doesn't hijack keys from surrounding inputs.
|
||||
useEffect(() => {
|
||||
if (inline) return;
|
||||
const handleKeyDown = (e: KeyboardEvent) => {
|
||||
if (submitted) return;
|
||||
const inTextInput = e.target === inputRef.current;
|
||||
@@ -66,7 +74,7 @@ export default function QuestionWidget({ question, options, onSubmit, onDismiss
|
||||
|
||||
window.addEventListener("keydown", handleKeyDown);
|
||||
return () => window.removeEventListener("keydown", handleKeyDown);
|
||||
}, [handleSubmit, submitted, options.length]);
|
||||
}, [handleSubmit, submitted, options.length, inline]);
|
||||
|
||||
if (submitted) return null;
|
||||
|
||||
|
||||
@@ -238,6 +238,12 @@ export default function ColonyChat() {
|
||||
agentStateRef.current = agentState;
|
||||
|
||||
const turnCounterRef = useRef<Record<string, number>>({});
|
||||
// Maps tool_use_id → the pill message ID and tool name that was created for it.
|
||||
// Survives turn counter resets so deferred completions (e.g. ask_user) can
|
||||
// find and update the correct pill even after the counter changes.
|
||||
const toolUseToPillRef = useRef<
|
||||
Record<string, { msgId: string; name: string }>
|
||||
>({});
|
||||
const queenPhaseRef = useRef<string>("planning");
|
||||
const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
|
||||
const suppressIntroRef = useRef(false);
|
||||
@@ -468,6 +474,7 @@ export default function ColonyChat() {
|
||||
setGraphNodes([]);
|
||||
setAgentState(defaultAgentState());
|
||||
turnCounterRef.current = {};
|
||||
toolUseToPillRef.current = {};
|
||||
queenPhaseRef.current = "planning";
|
||||
queenIterTextRef.current = {};
|
||||
suppressIntroRef.current = false;
|
||||
@@ -782,6 +789,12 @@ export default function ColonyChat() {
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
|
||||
const sid = event.stream_id;
|
||||
// Track which pill message this tool belongs to so deferred
|
||||
// completions (ask_user) can find it after the turn counter changes.
|
||||
toolUseToPillRef.current[toolUseId] = {
|
||||
msgId: `tool-pill-${sid}-${event.execution_id || "exec"}-${currentTurn}`,
|
||||
name: toolName,
|
||||
};
|
||||
setAgentState((prev) => {
|
||||
const newActive = {
|
||||
...prev.activeToolCalls,
|
||||
@@ -826,30 +839,73 @@ export default function ColonyChat() {
|
||||
appendNodeLog(event.node_id, `${ts} INFO ${toolName} done${resultStr}`);
|
||||
}
|
||||
|
||||
// Look up the original pill message this tool belongs to.
|
||||
// For deferred completions (ask_user), the turn counter and
|
||||
// activeToolCalls have already been reset, so we rely on the
|
||||
// ref recorded during tool_call_started.
|
||||
const tracked = toolUseToPillRef.current[toolUseId];
|
||||
delete toolUseToPillRef.current[toolUseId];
|
||||
|
||||
const sid = event.stream_id;
|
||||
|
||||
// Mark done in activeToolCalls if still present (normal case)
|
||||
setAgentState((prev) => {
|
||||
const updated = { ...prev.activeToolCalls };
|
||||
if (updated[toolUseId]) {
|
||||
updated[toolUseId] = { ...updated[toolUseId], done: true };
|
||||
if (!prev.activeToolCalls[toolUseId]) return prev;
|
||||
return {
|
||||
...prev,
|
||||
activeToolCalls: {
|
||||
...prev.activeToolCalls,
|
||||
[toolUseId]: {
|
||||
...prev.activeToolCalls[toolUseId],
|
||||
done: true,
|
||||
},
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Determine the correct pill message ID
|
||||
const pillMsgId =
|
||||
tracked?.msgId ??
|
||||
`tool-pill-${sid}-${event.execution_id || "exec"}-${currentTurn}`;
|
||||
const trackedName = tracked?.name;
|
||||
|
||||
// Update the pill message content directly
|
||||
setMessages((prevMsgs) => {
|
||||
const idx = prevMsgs.findIndex((m) => m.id === pillMsgId);
|
||||
if (idx < 0) return prevMsgs;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(prevMsgs[idx].content);
|
||||
const tools: { name: string; done: boolean }[] =
|
||||
parsed.tools || [];
|
||||
|
||||
if (trackedName) {
|
||||
let marked = false;
|
||||
for (let i = 0; i < tools.length; i++) {
|
||||
if (
|
||||
tools[i].name === trackedName &&
|
||||
!tools[i].done &&
|
||||
!marked
|
||||
) {
|
||||
tools[i] = { ...tools[i], done: true };
|
||||
marked = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const allDone =
|
||||
tools.length > 0 && tools.every((t) => t.done);
|
||||
return prevMsgs.map((m, i) =>
|
||||
i === idx
|
||||
? {
|
||||
...m,
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
}
|
||||
: m,
|
||||
);
|
||||
} catch {
|
||||
return prevMsgs;
|
||||
}
|
||||
const tools = Object.values(updated)
|
||||
.filter((t) => t.streamId === sid)
|
||||
.map((t) => ({ name: t.name, done: t.done }));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
upsertMessage({
|
||||
id: `tool-pill-${sid}-${event.execution_id || "exec"}-${currentTurn}`,
|
||||
agent: agentDisplayName || event.node_id || "Agent",
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role,
|
||||
thread: agentPath,
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
});
|
||||
return { ...prev, activeToolCalls: updated };
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -58,6 +58,12 @@ export default function QueenDM() {
|
||||
const [cloneTask, setCloneTask] = useState("");
|
||||
|
||||
const turnCounterRef = useRef(0);
|
||||
// Maps tool_use_id → the pill message ID and tool name that was created for it.
|
||||
// Survives turn counter resets so deferred completions (e.g. ask_user) can
|
||||
// find and update the correct pill even after llm_turn_complete bumps the counter.
|
||||
const toolUseToPillRef = useRef<
|
||||
Record<string, { msgId: string; name: string }>
|
||||
>({});
|
||||
const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
|
||||
const [queenPhase, setQueenPhase] = useState<
|
||||
"planning" | "building" | "staging" | "running" | "independent"
|
||||
@@ -77,6 +83,7 @@ export default function QueenDM() {
|
||||
setQueenPhase("independent");
|
||||
setInitialDraft(null);
|
||||
turnCounterRef.current = 0;
|
||||
toolUseToPillRef.current = {};
|
||||
queenIterTextRef.current = {};
|
||||
}, []);
|
||||
|
||||
@@ -390,6 +397,7 @@ export default function QueenDM() {
|
||||
setIsTyping(true);
|
||||
setQueenReady(true);
|
||||
setActiveToolCalls({});
|
||||
toolUseToPillRef.current = {};
|
||||
// Clear queued flag on all user messages now that the queen is processing
|
||||
setMessages((prev) => {
|
||||
if (!prev.some((m) => m.queued)) return prev;
|
||||
@@ -560,6 +568,11 @@ export default function QueenDM() {
|
||||
? new Date(event.timestamp).getTime()
|
||||
: Date.now();
|
||||
|
||||
// Track which pill message this tool belongs to so deferred
|
||||
// completions (ask_user) can find it after the turn counter changes.
|
||||
const msgId = `tool-pill-${sid}-${execId}-${turnCounterRef.current}`;
|
||||
toolUseToPillRef.current[toolUseId] = { msgId, name: toolName };
|
||||
|
||||
setActiveToolCalls((prev) => {
|
||||
const newActive = {
|
||||
...prev,
|
||||
@@ -570,7 +583,6 @@ export default function QueenDM() {
|
||||
done: t.done,
|
||||
}));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
const msgId = `tool-pill-${sid}-${execId}-${turnCounterRef.current}`;
|
||||
const toolMsg: ChatMessage = {
|
||||
id: msgId,
|
||||
agent: queenName,
|
||||
@@ -611,57 +623,68 @@ export default function QueenDM() {
|
||||
|
||||
case "tool_call_completed": {
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
|
||||
// Look up the original pill message this tool belongs to.
|
||||
// For deferred completions (ask_user), the turn counter and
|
||||
// activeToolCalls have already been reset by llm_turn_complete,
|
||||
// so we rely on the ref recorded during tool_call_started.
|
||||
const tracked = toolUseToPillRef.current[toolUseId];
|
||||
delete toolUseToPillRef.current[toolUseId];
|
||||
|
||||
// Mark done in activeToolCalls if still present (normal case)
|
||||
setActiveToolCalls((prev) => {
|
||||
if (!prev[toolUseId]) return prev;
|
||||
return {
|
||||
...prev,
|
||||
[toolUseId]: { ...prev[toolUseId], done: true },
|
||||
};
|
||||
});
|
||||
|
||||
// Determine the correct pill message ID
|
||||
const sid = event.stream_id;
|
||||
const execId = event.execution_id || "exec";
|
||||
const eventCreatedAt = event.timestamp
|
||||
? new Date(event.timestamp).getTime()
|
||||
: Date.now();
|
||||
const pillMsgId =
|
||||
tracked?.msgId ??
|
||||
`tool-pill-${sid}-${execId}-${turnCounterRef.current}`;
|
||||
const toolName = tracked?.name;
|
||||
|
||||
setActiveToolCalls((prev) => {
|
||||
const updated = { ...prev };
|
||||
if (updated[toolUseId]) {
|
||||
updated[toolUseId] = { ...updated[toolUseId], done: true };
|
||||
// Update the pill message content directly
|
||||
setMessages((prevMsgs) => {
|
||||
const idx = prevMsgs.findIndex((m) => m.id === pillMsgId);
|
||||
if (idx < 0) return prevMsgs;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(prevMsgs[idx].content);
|
||||
const tools: { name: string; done: boolean }[] =
|
||||
parsed.tools || [];
|
||||
|
||||
if (toolName) {
|
||||
let marked = false;
|
||||
for (let i = 0; i < tools.length; i++) {
|
||||
if (
|
||||
tools[i].name === toolName &&
|
||||
!tools[i].done &&
|
||||
!marked
|
||||
) {
|
||||
tools[i] = { ...tools[i], done: true };
|
||||
marked = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const allDone =
|
||||
tools.length > 0 && tools.every((t) => t.done);
|
||||
return prevMsgs.map((m, i) =>
|
||||
i === idx
|
||||
? {
|
||||
...m,
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
}
|
||||
: m,
|
||||
);
|
||||
} catch {
|
||||
return prevMsgs;
|
||||
}
|
||||
const tools = Object.entries(updated).map(([, t]) => ({
|
||||
name: t.name,
|
||||
done: t.done,
|
||||
}));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
const msgId = `tool-pill-${sid}-${execId}-${turnCounterRef.current}`;
|
||||
const toolMsg: ChatMessage = {
|
||||
id: msgId,
|
||||
agent: queenName,
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role: "queen",
|
||||
thread: "queen-dm",
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
};
|
||||
setMessages((prevMsgs) => {
|
||||
const idx = prevMsgs.findIndex((m) => m.id === msgId);
|
||||
if (idx >= 0) {
|
||||
return prevMsgs.map((m, i) =>
|
||||
i === idx ? { ...toolMsg, createdAt: m.createdAt ?? toolMsg.createdAt } : m,
|
||||
);
|
||||
}
|
||||
// Insert in sorted position by createdAt
|
||||
const ts = toolMsg.createdAt ?? Date.now();
|
||||
let insertIdx = prevMsgs.length - 1;
|
||||
while (insertIdx >= 0 && (prevMsgs[insertIdx].createdAt ?? 0) > ts) {
|
||||
insertIdx--;
|
||||
}
|
||||
if (insertIdx === -1 || insertIdx === prevMsgs.length - 1) {
|
||||
return [...prevMsgs, toolMsg];
|
||||
}
|
||||
const next = [...prevMsgs];
|
||||
next.splice(insertIdx + 1, 0, toolMsg);
|
||||
return next;
|
||||
});
|
||||
return updated;
|
||||
});
|
||||
break;
|
||||
}
|
||||
@@ -746,6 +769,7 @@ export default function QueenDM() {
|
||||
setIsTyping(false);
|
||||
setIsStreaming(false);
|
||||
setActiveToolCalls({});
|
||||
toolUseToPillRef.current = {};
|
||||
// Clear queued flags since the queen is now idle
|
||||
setMessages((prev) => {
|
||||
if (!prev.some((m) => m.queued)) return prev;
|
||||
|
||||
@@ -4,7 +4,8 @@ from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
from framework.llm.capabilities import filter_tools_for_model, supports_image_tool_results
|
||||
from framework.llm.provider import Tool
|
||||
|
||||
|
||||
class TestSupportsImageToolResults:
|
||||
@@ -56,3 +57,56 @@ class TestSupportsImageToolResults:
|
||||
assert supports_image_tool_results("DeepSeek/deepseek-chat") is False
|
||||
assert supports_image_tool_results("OLLAMA/llama3") is False
|
||||
assert supports_image_tool_results("GPT-4o") is True
|
||||
|
||||
|
||||
class TestFilterToolsForModel:
|
||||
"""Verify ``filter_tools_for_model`` — the real helper used by AgentLoop."""
|
||||
|
||||
def test_hides_image_tool_from_text_only_model(self):
|
||||
tools = [
|
||||
Tool(name="read_file", description="read a file"),
|
||||
Tool(name="browser_screenshot", description="take a screenshot", produces_image=True),
|
||||
Tool(name="browser_snapshot", description="get page content"),
|
||||
]
|
||||
filtered, hidden = filter_tools_for_model(tools, "glm-5")
|
||||
names = [t.name for t in filtered]
|
||||
assert "browser_screenshot" not in names
|
||||
assert "read_file" in names
|
||||
assert "browser_snapshot" in names
|
||||
assert hidden == ["browser_screenshot"]
|
||||
|
||||
def test_keeps_image_tool_for_vision_model(self):
|
||||
tools = [
|
||||
Tool(name="read_file", description="read a file"),
|
||||
Tool(name="browser_screenshot", description="take a screenshot", produces_image=True),
|
||||
]
|
||||
filtered, hidden = filter_tools_for_model(tools, "claude-sonnet-4-20250514")
|
||||
assert {t.name for t in filtered} == {"read_file", "browser_screenshot"}
|
||||
assert hidden == []
|
||||
|
||||
def test_default_tools_are_not_filtered(self):
|
||||
"""Tools without produces_image (default False) are kept for all models."""
|
||||
tools = [
|
||||
Tool(name="read_file", description="read a file"),
|
||||
Tool(name="web_search", description="search the web"),
|
||||
]
|
||||
text_only, text_hidden = filter_tools_for_model(tools, "glm-5")
|
||||
vision, vision_hidden = filter_tools_for_model(tools, "gpt-4o")
|
||||
assert len(text_only) == 2 and text_hidden == []
|
||||
assert len(vision) == 2 and vision_hidden == []
|
||||
|
||||
def test_empty_model_string_returns_tools_unchanged(self):
|
||||
"""Guards the ctx.llm-missing path where model is empty."""
|
||||
tools = [
|
||||
Tool(name="browser_screenshot", description="", produces_image=True),
|
||||
]
|
||||
filtered, hidden = filter_tools_for_model(tools, "")
|
||||
assert len(filtered) == 1
|
||||
assert hidden == []
|
||||
|
||||
def test_returned_list_is_a_copy(self):
|
||||
"""Caller should be free to mutate the filtered list without affecting input."""
|
||||
tools = [Tool(name="read_file", description="")]
|
||||
filtered, _ = filter_tools_for_model(tools, "gpt-4o")
|
||||
filtered.append(Tool(name="extra", description=""))
|
||||
assert len(tools) == 1
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Tests for vision-only prompt block stripping in Queen nodes.
|
||||
|
||||
Covers ``finalize_queen_prompt`` — the function that resolves
|
||||
``<!-- vision-only -->...<!-- /vision-only -->`` markers in Queen phase
|
||||
prompts before they reach the LLM. Vision-capable models see the inner
|
||||
content; text-only models see the block removed entirely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from framework.agents.queen.nodes import finalize_queen_prompt
|
||||
|
||||
|
||||
class TestFinalizeQueenPrompt:
|
||||
def test_vision_model_keeps_inner_content_and_strips_markers(self):
|
||||
text = "before <!-- vision-only -->secret<!-- /vision-only --> after"
|
||||
result = finalize_queen_prompt(text, has_vision=True)
|
||||
assert result == "before secret after"
|
||||
|
||||
def test_text_only_model_removes_entire_block(self):
|
||||
text = "before <!-- vision-only -->secret<!-- /vision-only --> after"
|
||||
result = finalize_queen_prompt(text, has_vision=False)
|
||||
assert result == "before after"
|
||||
assert "secret" not in result
|
||||
assert "vision-only" not in result
|
||||
|
||||
def test_multiline_block_handled(self):
|
||||
"""Regex must use DOTALL so blocks can span newlines."""
|
||||
text = (
|
||||
"- item 1\n"
|
||||
"<!-- vision-only -->\n"
|
||||
"- item 2 (vision only)\n"
|
||||
"<!-- /vision-only -->\n"
|
||||
"- item 3\n"
|
||||
)
|
||||
vision = finalize_queen_prompt(text, has_vision=True)
|
||||
text_only = finalize_queen_prompt(text, has_vision=False)
|
||||
assert "- item 2 (vision only)" in vision
|
||||
assert "- item 2 (vision only)" not in text_only
|
||||
assert "- item 1" in text_only and "- item 3" in text_only
|
||||
|
||||
def test_multiple_blocks_in_same_text(self):
|
||||
text = (
|
||||
"A <!-- vision-only -->X<!-- /vision-only --> "
|
||||
"B <!-- vision-only -->Y<!-- /vision-only --> C"
|
||||
)
|
||||
assert finalize_queen_prompt(text, has_vision=True) == "A X B Y C"
|
||||
assert finalize_queen_prompt(text, has_vision=False) == "A B C"
|
||||
|
||||
def test_non_greedy_match_does_not_swallow_between_blocks(self):
|
||||
"""A naïve greedy regex would match from the first opening marker
|
||||
to the last closing marker and wipe out the middle section. Lock
|
||||
that down so a future refactor can't regress to greedy."""
|
||||
text = (
|
||||
"<!-- vision-only -->first<!-- /vision-only -->"
|
||||
"KEEP"
|
||||
"<!-- vision-only -->second<!-- /vision-only -->"
|
||||
)
|
||||
assert finalize_queen_prompt(text, has_vision=False) == "KEEP"
|
||||
assert finalize_queen_prompt(text, has_vision=True) == "firstKEEPsecond"
|
||||
|
||||
def test_text_without_markers_is_unchanged(self):
|
||||
text = "plain prompt with no markers at all"
|
||||
assert finalize_queen_prompt(text, has_vision=True) == text
|
||||
assert finalize_queen_prompt(text, has_vision=False) == text
|
||||
@@ -94,7 +94,10 @@ class TestSkillCatalog:
|
||||
assert "<name>beta</name>" in prompt
|
||||
assert "<description>Alpha skill</description>" in prompt
|
||||
assert "<location>/p/alpha/SKILL.md</location>" in prompt
|
||||
assert "<base_dir>/p/alpha</base_dir>" in prompt
|
||||
# <base_dir> is intentionally not emitted — the mandatory header
|
||||
# tells the model to resolve relative paths against the parent of
|
||||
# SKILL.md, so the redundant element was dropped.
|
||||
assert "<base_dir>" not in prompt
|
||||
|
||||
def test_to_prompt_sorted_by_name(self):
|
||||
skills = [
|
||||
@@ -130,13 +133,44 @@ class TestSkillCatalog:
|
||||
assert "<name>usr</name>" in prompt
|
||||
assert "<name>fw</name>" in prompt
|
||||
|
||||
def test_to_prompt_contains_behavioral_instruction(self):
|
||||
def test_to_prompt_contains_mandatory_header(self):
|
||||
"""The rendered catalog must carry the mandatory pre-reply checklist
|
||||
so soft guidance turns into a required step."""
|
||||
catalog = SkillCatalog([_make_skill(source_scope="project")])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "When a task matches a skill's description" in prompt
|
||||
assert "## Skills (mandatory)" in prompt
|
||||
assert "Before replying: scan <available_skills>" in prompt
|
||||
assert "never read more than one skill up front" in prompt
|
||||
assert "`read_file`" in prompt
|
||||
assert "SKILL.md" in prompt
|
||||
|
||||
def test_to_prompt_compact_fallback_drops_descriptions(self):
|
||||
"""When the full XML body exceeds the char threshold, the compact
|
||||
variant drops <description> but keeps every skill's <name>."""
|
||||
# Each skill contributes ~100+ chars with a long description.
|
||||
# 60 skills easily pushes the body past the threshold.
|
||||
skills = [
|
||||
_make_skill(
|
||||
name=f"skill-{i:03d}",
|
||||
description="A reasonably long description " * 4,
|
||||
location=f"/s/skill-{i:03d}/SKILL.md",
|
||||
base_dir=f"/s/skill-{i:03d}",
|
||||
)
|
||||
for i in range(60)
|
||||
]
|
||||
catalog = SkillCatalog(skills)
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
# Mandatory header still present but uses the compact variant wording.
|
||||
assert "## Skills (mandatory)" in prompt
|
||||
assert "scan <available_skills> <name>" in prompt
|
||||
# Every skill's name survives …
|
||||
for i in range(60):
|
||||
assert f"<name>skill-{i:03d}</name>" in prompt
|
||||
# … but no descriptions were rendered.
|
||||
assert "<description>" not in prompt
|
||||
|
||||
def test_build_pre_activated_prompt(self):
|
||||
skill = _make_skill("research", body="## Deep Research\nDo thorough research.")
|
||||
catalog = SkillCatalog([skill])
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
"""Tests for AS-6 skill resource loading support.
|
||||
|
||||
Covers:
|
||||
- <base_dir> element in catalog XML
|
||||
- allowlisted_dirs property reflects trusted skill base directories
|
||||
- skill_dirs propagation to NodeContext
|
||||
|
||||
The catalog XML previously emitted a redundant <base_dir> element next to
|
||||
each <location>. That was dropped when the mandatory header took over the
|
||||
"resolve relative paths against the parent of SKILL.md" instruction, so
|
||||
there is no longer an XML-emission test for base_dir. Programmatic access
|
||||
via ``catalog.allowlisted_dirs`` is still covered below.
|
||||
"""
|
||||
|
||||
from framework.skills.catalog import SkillCatalog
|
||||
@@ -26,31 +31,6 @@ def _make_skill(
|
||||
|
||||
|
||||
class TestSkillResourceBaseDir:
|
||||
def test_base_dir_in_xml(self):
|
||||
"""Each community skill entry should expose its base_dir in the catalog XML."""
|
||||
skill = _make_skill("deploy", "/project/.hive/skills/deploy")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/project/.hive/skills/deploy</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_xml_escaped(self):
|
||||
"""base_dir with XML-special chars should be escaped."""
|
||||
skill = _make_skill("s", "/path/with <&> chars")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/path/with <&> chars</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_present_for_framework_skills(self):
|
||||
"""Framework-scope skills now appear in the catalog like any other scope,
|
||||
and their base_dir is included in the XML."""
|
||||
skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
assert "<name>fw</name>" in prompt
|
||||
assert "<base_dir>/hive/_default_skills/fw</base_dir>" in prompt
|
||||
|
||||
def test_allowlisted_dirs_matches_skills(self):
|
||||
"""allowlisted_dirs returns all skill base_dirs including framework ones."""
|
||||
skills = [
|
||||
|
||||
@@ -799,6 +799,55 @@ def test_resync_returns_false_when_credentials_unchanged(tmp_path, monkeypatch):
|
||||
assert registry.resync_mcp_servers_if_needed() is False
|
||||
|
||||
|
||||
class TestMcpToolProducesImageFlag:
|
||||
"""Verify _convert_mcp_tool_to_framework_tool sets produces_image from the name.
|
||||
|
||||
This is the detection step that the filter in AgentLoop depends on —
|
||||
if the regex regresses, text-only models will start seeing screenshot
|
||||
tools they can't use.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _mcp_tool(name: str):
|
||||
return SimpleNamespace(
|
||||
name=name,
|
||||
description=f"{name} description",
|
||||
input_schema={"type": "object", "properties": {}, "required": []},
|
||||
server_name="test",
|
||||
)
|
||||
|
||||
def test_screenshot_flagged(self):
|
||||
registry = ToolRegistry()
|
||||
mcp = self._mcp_tool("browser_screenshot")
|
||||
tool = registry._convert_mcp_tool_to_framework_tool(mcp) # noqa: SLF001
|
||||
assert tool.produces_image is True
|
||||
|
||||
def test_snapshot_not_flagged(self):
|
||||
"""browser_snapshot returns a DOM tree, not an image — must not match."""
|
||||
registry = ToolRegistry()
|
||||
mcp = self._mcp_tool("browser_snapshot")
|
||||
tool = registry._convert_mcp_tool_to_framework_tool(mcp) # noqa: SLF001
|
||||
assert tool.produces_image is False
|
||||
|
||||
def test_case_insensitive_match(self):
|
||||
registry = ToolRegistry()
|
||||
mcp = self._mcp_tool("TakeScreenshot")
|
||||
tool = registry._convert_mcp_tool_to_framework_tool(mcp) # noqa: SLF001
|
||||
assert tool.produces_image is True
|
||||
|
||||
def test_plain_tool_not_flagged(self):
|
||||
registry = ToolRegistry()
|
||||
mcp = self._mcp_tool("read_file")
|
||||
tool = registry._convert_mcp_tool_to_framework_tool(mcp) # noqa: SLF001
|
||||
assert tool.produces_image is False
|
||||
|
||||
def test_image_suffix_variants_flagged(self):
|
||||
registry = ToolRegistry()
|
||||
for name in ("capture_image", "render_image", "get_image", "snapshot_image"):
|
||||
tool = registry._convert_mcp_tool_to_framework_tool(self._mcp_tool(name)) # noqa: SLF001
|
||||
assert tool.produces_image is True, f"{name} should be flagged"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Concurrency-safe flag propagation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
# 🐝 Hive Agent v0.10.0: The Colony
|
||||
|
||||
> ⚠️ **Breaking change.** This is a large architectural refactor of how agents work in Hive. **Old agents are no longer compatible.** Existing workspaces, custom agents, and saved sessions from pre-v0.10.0 builds will need to be recreated.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
The **Colony** introduces a new way of working: a group of specialized workers operating together to run and scale your business.
|
||||
|
||||
The role of the **Queen** has evolved. Instead of only orchestrating, the Queen now **executes work first** to deliver immediate value, then **builds systems around that work** to create stable, repeatable business processes.
|
||||
|
||||
You now have a full leadership team of eight Queens, each with their own identity, expertise, and voice:
|
||||
|
||||
| Queen | Role |
|
||||
| --- | --- |
|
||||
| **Sophia** | Head of Brand & Design |
|
||||
| **Charlotte** | Head of Finance & Fundraising |
|
||||
| **Victoria** | Head of Growth |
|
||||
| **Eleanor** | Head of Legal |
|
||||
| **Rachel** | Head of Operations |
|
||||
| **Isabella** | Head of Product Strategy |
|
||||
| **Amelia** | Head of Talent |
|
||||
| **Alexandra** | Head of Technology |
|
||||
|
||||
Start automating your business processes with your Queens today.
|
||||
|
||||
---
|
||||
|
||||
## 🏛️ The Colony Architecture
|
||||
|
||||
### Queens as Identities, Not Just Orchestrators
|
||||
|
||||
- **Queen profiles** — each queen is a YAML-backed persona (`~/.hive/agents/queens/{queen_id}/profile.yaml`) with core traits, hidden background, psychological profile, behavior triggers, and skill sets. Profiles are injected into the system prompt at session start.
|
||||
- **CEO-style queen selection** — an LLM classifier routes every new user request to the best-matching queen based on the task at hand, with structured routing diagnostics (`QueenSelection`).
|
||||
- **Queen DMs** — direct-message pages for each queen with a dedicated session flow, session switcher, and prompt library integration.
|
||||
- **Independent / PM mode** — queens run in an independent mode for planning-phase work, with a "think out loud" internal monologue surfaced through internal tags.
|
||||
- **Queen memory v2** — simplified memory implementation with reflection agent, cooldown-gated reflections, user identity, doppelganger wiring, and recall-selector for targeted retrieval.
|
||||
- **Queen lifecycle tools** — first-class tools for escalation, queen reply, and session handoff.
|
||||
|
||||
### Colony Runtime
|
||||
|
||||
- **Grand architecture revamp** — the framework, agent loop, runtime, graph, pipeline, executor, and node worker layers have been rewritten from the ground up. Deprecated shims and legacy orchestration paths have been removed.
|
||||
- **Colony creation flow** — colonies are created via skill, with reliable event bus subscription, worker spawning, and post-creation list refresh.
|
||||
- **Scheduled triggers** — colonies can now be woken on a cron schedule, with triggers firing directly into the owning queen's session.
|
||||
- **Simple fork** for agents, stable credential states, and improved worker execution reliability.
|
||||
|
||||
---
|
||||
|
||||
## 🆕 What's New
|
||||
|
||||
### Colony & Queens
|
||||
|
||||
- 8 default queen personas (Alexandra, Victoria, Isabella, Charlotte, Eleanor, Sophia, Amelia, Rachel) with profile YAML, examples, and behavior triggers
|
||||
- LLM-based queen selector with reasoning output
|
||||
- Queen DM page, queen session switcher, and sidebar queen item
|
||||
- Queen scope memory, role examples, and identity loading
|
||||
- Reflection agent with cooldown and improved reflection runner
|
||||
- Queen orchestrator + `routes_queens` API
|
||||
- Natural chat replies and cleaner home-prompt bootstrap
|
||||
- Queen identity for new sessions
|
||||
- `ask_user` / `ask_user_multiple` tools available in queen prompt
|
||||
- Escalation and queen-reply tools
|
||||
|
||||
### Skills & Tools
|
||||
|
||||
- **Learned default skills** — skills the queen has learned become part of her baseline
|
||||
- **Tool-gated skill activation** — skills only activate when their required tools are present
|
||||
- **Skills for colonies** — per-colony skill registration and loading
|
||||
- **Text-only model filter** — image-producing tools and vision-only prompt blocks are hidden from text-only models
|
||||
- **Browser skills upgrade** — improved click reliability, screenshot capture, and credential filtering
|
||||
- **Deprecated-tool removal** and alignment of Hive tool names across the codebase
|
||||
- **Ask-user widget** with fallback rendering and preserved tool pill mapping across turn boundaries for deferred completions
|
||||
- **Improved tool-call reliability** across the board (tool limit removed, tool blacklist, tool credential filter)
|
||||
- **MCP** — efficient MCP loading at initialization, default MCP bootstrapping, registered available MCP tools, fixed MCP tool initialization and registry pipeline stage
|
||||
|
||||
### LLM & Credentials
|
||||
|
||||
- **Key pool** for credential management with stable credential states
|
||||
- **Aden credentials storage adapter** and subscription-based LLM config activation endpoint
|
||||
- **Consolidated model config** with unified model catalog
|
||||
- **New providers** — Kimi, Hive, and Aden added to the model catalog
|
||||
- **Model switcher** UI with runtime model switching API
|
||||
- **LLM key validation endpoint** with agent errors surfaced via SSE
|
||||
- **BYOK modal** import fixes for subscription token detection
|
||||
|
||||
### Frontend
|
||||
|
||||
- **Home redesign** — new home, credentials, and org chart pages
|
||||
- **Colony chat** and **queen DM** pages
|
||||
- **Sidebar + header** components and global app layout/routing
|
||||
- **Model switcher, settings modal, template card**
|
||||
- **Prompt library** with search, category filtering, and UI polish
|
||||
- **Side panel** fixes and sub-agent pane light-mode support
|
||||
- **Flowchart** light-mode support and normalized settings modal sizing
|
||||
- **User profile settings** and UI enhancements
|
||||
- **Sync user profile** to global memory as `user-profile.md`; queen profile API transformation
|
||||
- Removed the old workspace GUI and its dependencies
|
||||
|
||||
### Framework & Runtime
|
||||
|
||||
- Architecture revamp: new runtime config, simplified agent loading, new infra for queen
|
||||
- Home hive directory structure refactor
|
||||
- Agent loading pipeline fixes, MCP registry pipeline stage fix
|
||||
- Session resume improvements: separate resume vs new-session flow for queen sessions, edge-case fix for message injection in resumed sessions
|
||||
- Strip internal tags from user-visible output
|
||||
- Colony event bus subscription fixes and shared event bus for parent visibility
|
||||
- Worker spawn and stop-worker fixes
|
||||
- Default log level and extra logging hooks
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug Fixes
|
||||
|
||||
- **Ask-user widget** — fallback when widget fails to mount
|
||||
- **Skill loading** for colonies and proper skill resolution across queen sessions
|
||||
- **Model switching** and new-chat flow no longer carry stale state
|
||||
- **Tool pill mapping** preserved across turn boundary for deferred `ask_user` completions
|
||||
- **Tool limit** removed (was capping legitimate long tool lists)
|
||||
- **Queen loading** stability fixes
|
||||
- **Side panel** rendering issues
|
||||
- **Deprecated graphs** removed from UI
|
||||
- **Home-page prompts** now reach the queen directly without waiting for the greeting to finish
|
||||
- **Colony creation** link, reframing, and post-creation refresh
|
||||
- **Build error** in colony creation path
|
||||
- **GCU system prompt** tuning
|
||||
- **Tool credential filter** correctness
|
||||
- **Screenshot** capture and browser click reliability
|
||||
- **Queen message injection** when resuming a session
|
||||
- **Internal-tag diction** fixes in surfaced output
|
||||
- **MCP tool initialization** on cold start
|
||||
- **Frontend DM** edge cases
|
||||
- **Prompt library** new-session handling for new chat
|
||||
- **Config validation** and unavailable Minimax model handling
|
||||
- **Queen identity** loading on cold boot
|
||||
- **Extra text** in queen selector JSON response parsed safely
|
||||
- **Outdated queen communication prompt** removed
|
||||
|
||||
---
|
||||
|
||||
## 🧹 Refactor & Cleanup
|
||||
|
||||
- **Shatter the Eld\*n ring** — top-to-bottom refactor of the runtime core
|
||||
- **Grand clean-up** of deprecated code paths
|
||||
- **Remove deprecated shims** and old session-status tools
|
||||
- **Big test cleanup** — integration tests and component tests rewritten around the new architecture
|
||||
- **Update references** for orchestrator / host / loader renames
|
||||
- **Consolidate tests** for queen state machine and verified outcomes
|
||||
- **Remove old workspace GUI** and its dependencies
|
||||
- **Remove old "new agent" button** and deprecated entry points
|
||||
- **Home hive directory** structure refactor
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Breaking Changes
|
||||
|
||||
- **Old agents are not compatible.** Custom agents authored against the pre-v0.10.0 framework will need to be re-authored against the new Queen/Colony runtime.
|
||||
- **Session format** — pre-v0.10.0 sessions cannot be resumed.
|
||||
- **Deprecated tools removed** and Hive tool names have been realigned; any external scripts referencing old tool names must be updated.
|
||||
- **Old session-status tools** removed in favor of the new queen lifecycle tools.
|
||||
- **Workspace GUI removed** — the legacy workspace UI is gone; use the new home, colony chat, and queen DM pages.
|
||||
- **MCP registry pipeline** — MCP configurations now load through the new registry; custom MCP setups may need to be re-registered.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Upgrading
|
||||
|
||||
Because this release rewrites the agent runtime, the recommended upgrade path is:
|
||||
|
||||
1. Back up `~/.hive/` if you have sessions or custom agents you want to reference.
|
||||
2. Pull `main` at the v0.10.0 tag.
|
||||
3. Let Hive initialize the new queen profiles under `~/.hive/agents/queens/`.
|
||||
4. Re-create any custom agents as colonies/queens against the new framework.
|
||||
5. Re-register any custom MCP servers through the new MCP registry.
|
||||
|
||||
Welcome to the Colony. 🐝
|
||||
@@ -255,9 +255,10 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
# Clean up whitespace
|
||||
text = " ".join(text.split())
|
||||
|
||||
# Truncate if needed
|
||||
# Truncate if needed (reserve 3 chars for the ellipsis so the
|
||||
# final string stays within max_length)
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
text = text[: max_length - 3] + "..."
|
||||
|
||||
result: dict[str, Any] = {
|
||||
"url": url,
|
||||
|
||||
@@ -113,6 +113,24 @@ class TestWebScrapeTool:
|
||||
assert isinstance(result, dict)
|
||||
assert "error" not in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_truncation_respects_max_length(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""Truncated content (including the ellipsis) must not exceed max_length."""
|
||||
# max_length is clamped to >=1000, so build content larger than that
|
||||
long_text = "a" * 5000
|
||||
html = f"<html><body>{long_text}</body></html>"
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com", max_length=1000)
|
||||
assert "error" not in result
|
||||
assert len(result["content"]) <= 1000
|
||||
assert result["content"].endswith("...")
|
||||
assert result["length"] == len(result["content"])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
|
||||
Reference in New Issue
Block a user