fix: skills prompts
This commit is contained in:
@@ -14,13 +14,37 @@ from framework.skills.skill_errors import SkillErrorCode, log_skill_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BEHAVIORAL_INSTRUCTION = (
|
||||
"The following skills provide specialized instructions for specific tasks.\n"
|
||||
"When a task matches a skill's description, read the SKILL.md at the listed\n"
|
||||
"location to load the full instructions before proceeding.\n"
|
||||
"When a skill references relative paths, resolve them against the skill's\n"
|
||||
"directory (the parent of SKILL.md) and use absolute paths in tool calls."
|
||||
)
|
||||
# Upper bound on the raw `<available_skills>` XML body, in characters.
|
||||
# When the full catalog (with <description> entries) exceeds this, we fall
|
||||
# back to the compact variant that drops descriptions but keeps every skill
|
||||
# visible. Preserving awareness of every skill beats truncating entries.
|
||||
_COMPACT_THRESHOLD_CHARS = 5000
|
||||
|
||||
_MANDATORY_HEADER_FULL = """## Skills (mandatory)
|
||||
Before replying: scan <available_skills> <description> entries.
|
||||
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
|
||||
- If multiple could apply: choose the most specific one, then read/follow it.
|
||||
- If none clearly apply: do not read any SKILL.md.
|
||||
Constraints: never read more than one skill up front; only read after selecting.
|
||||
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
|
||||
|
||||
|
||||
The following skills provide specialized instructions for specific tasks.
|
||||
Use `read_file` to load a skill's SKILL.md when the task matches its description.
|
||||
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
|
||||
|
||||
_MANDATORY_HEADER_COMPACT = """## Skills (mandatory)
|
||||
Before replying: scan <available_skills> <name> entries.
|
||||
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
|
||||
- If multiple could apply: choose the most specific one, then read/follow it.
|
||||
- If none clearly apply: do not read any SKILL.md.
|
||||
Constraints: never read more than one skill up front; only read after selecting.
|
||||
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
|
||||
|
||||
|
||||
The following skills provide specialized instructions for specific tasks.
|
||||
Use `read_file` to load a skill's SKILL.md when the task matches its name.
|
||||
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
|
||||
|
||||
|
||||
class SkillCatalog:
|
||||
@@ -61,27 +85,42 @@ class SkillCatalog:
|
||||
def to_prompt(self) -> str:
|
||||
"""Generate the catalog prompt for system prompt injection.
|
||||
|
||||
Returns empty string if no community/user skills are discovered
|
||||
(default skills are handled separately by DefaultSkillManager).
|
||||
"""
|
||||
# All skills go through the catalog for progressive disclosure.
|
||||
all_skills = list(self._skills.values())
|
||||
Returns empty string when no skills are present. Otherwise returns
|
||||
a mandatory pre-reply checklist + decision rules + rate-limit note,
|
||||
followed by the <available_skills> XML body.
|
||||
|
||||
When the full XML body exceeds ``_COMPACT_THRESHOLD_CHARS``, the
|
||||
compact variant is emitted instead: <description> elements are
|
||||
dropped so every skill stays visible before any gets truncated.
|
||||
"""
|
||||
all_skills = sorted(self._skills.values(), key=lambda s: s.name)
|
||||
if not all_skills:
|
||||
return ""
|
||||
|
||||
full_xml = self._render_xml(all_skills, compact=False)
|
||||
if len(full_xml) <= _COMPACT_THRESHOLD_CHARS:
|
||||
return f"{_MANDATORY_HEADER_FULL}\n\n{full_xml}"
|
||||
|
||||
compact_xml = self._render_xml(all_skills, compact=True)
|
||||
return f"{_MANDATORY_HEADER_COMPACT}\n\n{compact_xml}"
|
||||
|
||||
@staticmethod
|
||||
def _render_xml(skills: list[ParsedSkill], *, compact: bool) -> str:
|
||||
"""Render the `<available_skills>` block.
|
||||
|
||||
``compact=True`` drops `<description>` to preserve skill awareness
|
||||
when the catalog would otherwise blow the char budget.
|
||||
"""
|
||||
lines = ["<available_skills>"]
|
||||
for skill in sorted(all_skills, key=lambda s: s.name):
|
||||
for skill in skills:
|
||||
lines.append(" <skill>")
|
||||
lines.append(f" <name>{escape(skill.name)}</name>")
|
||||
if not compact:
|
||||
lines.append(f" <description>{escape(skill.description)}</description>")
|
||||
lines.append(f" <location>{escape(skill.location)}</location>")
|
||||
lines.append(f" <base_dir>{escape(skill.base_dir)}</base_dir>")
|
||||
lines.append(" </skill>")
|
||||
lines.append("</available_skills>")
|
||||
|
||||
xml_block = "\n".join(lines)
|
||||
return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}"
|
||||
return "\n".join(lines)
|
||||
|
||||
def build_pre_activated_prompt(self, skill_names: list[str]) -> str:
|
||||
"""Build prompt content for pre-activated skills.
|
||||
|
||||
@@ -94,7 +94,10 @@ class TestSkillCatalog:
|
||||
assert "<name>beta</name>" in prompt
|
||||
assert "<description>Alpha skill</description>" in prompt
|
||||
assert "<location>/p/alpha/SKILL.md</location>" in prompt
|
||||
assert "<base_dir>/p/alpha</base_dir>" in prompt
|
||||
# <base_dir> is intentionally not emitted — the mandatory header
|
||||
# tells the model to resolve relative paths against the parent of
|
||||
# SKILL.md, so the redundant element was dropped.
|
||||
assert "<base_dir>" not in prompt
|
||||
|
||||
def test_to_prompt_sorted_by_name(self):
|
||||
skills = [
|
||||
@@ -130,13 +133,44 @@ class TestSkillCatalog:
|
||||
assert "<name>usr</name>" in prompt
|
||||
assert "<name>fw</name>" in prompt
|
||||
|
||||
def test_to_prompt_contains_behavioral_instruction(self):
|
||||
def test_to_prompt_contains_mandatory_header(self):
|
||||
"""The rendered catalog must carry the mandatory pre-reply checklist
|
||||
so soft guidance turns into a required step."""
|
||||
catalog = SkillCatalog([_make_skill(source_scope="project")])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "When a task matches a skill's description" in prompt
|
||||
assert "## Skills (mandatory)" in prompt
|
||||
assert "Before replying: scan <available_skills>" in prompt
|
||||
assert "never read more than one skill up front" in prompt
|
||||
assert "`read_file`" in prompt
|
||||
assert "SKILL.md" in prompt
|
||||
|
||||
def test_to_prompt_compact_fallback_drops_descriptions(self):
|
||||
"""When the full XML body exceeds the char threshold, the compact
|
||||
variant drops <description> but keeps every skill's <name>."""
|
||||
# Each skill contributes ~100+ chars with a long description.
|
||||
# 60 skills easily pushes the body past the threshold.
|
||||
skills = [
|
||||
_make_skill(
|
||||
name=f"skill-{i:03d}",
|
||||
description="A reasonably long description " * 4,
|
||||
location=f"/s/skill-{i:03d}/SKILL.md",
|
||||
base_dir=f"/s/skill-{i:03d}",
|
||||
)
|
||||
for i in range(60)
|
||||
]
|
||||
catalog = SkillCatalog(skills)
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
# Mandatory header still present but uses the compact variant wording.
|
||||
assert "## Skills (mandatory)" in prompt
|
||||
assert "scan <available_skills> <name>" in prompt
|
||||
# Every skill's name survives …
|
||||
for i in range(60):
|
||||
assert f"<name>skill-{i:03d}</name>" in prompt
|
||||
# … but no descriptions were rendered.
|
||||
assert "<description>" not in prompt
|
||||
|
||||
def test_build_pre_activated_prompt(self):
|
||||
skill = _make_skill("research", body="## Deep Research\nDo thorough research.")
|
||||
catalog = SkillCatalog([skill])
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
"""Tests for AS-6 skill resource loading support.
|
||||
|
||||
Covers:
|
||||
- <base_dir> element in catalog XML
|
||||
- allowlisted_dirs property reflects trusted skill base directories
|
||||
- skill_dirs propagation to NodeContext
|
||||
|
||||
The catalog XML previously emitted a redundant <base_dir> element next to
|
||||
each <location>. That was dropped when the mandatory header took over the
|
||||
"resolve relative paths against the parent of SKILL.md" instruction, so
|
||||
there is no longer an XML-emission test for base_dir. Programmatic access
|
||||
via ``catalog.allowlisted_dirs`` is still covered below.
|
||||
"""
|
||||
|
||||
from framework.skills.catalog import SkillCatalog
|
||||
@@ -26,31 +31,6 @@ def _make_skill(
|
||||
|
||||
|
||||
class TestSkillResourceBaseDir:
|
||||
def test_base_dir_in_xml(self):
|
||||
"""Each community skill entry should expose its base_dir in the catalog XML."""
|
||||
skill = _make_skill("deploy", "/project/.hive/skills/deploy")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/project/.hive/skills/deploy</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_xml_escaped(self):
|
||||
"""base_dir with XML-special chars should be escaped."""
|
||||
skill = _make_skill("s", "/path/with <&> chars")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/path/with <&> chars</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_present_for_framework_skills(self):
|
||||
"""Framework-scope skills now appear in the catalog like any other scope,
|
||||
and their base_dir is included in the XML."""
|
||||
skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
assert "<name>fw</name>" in prompt
|
||||
assert "<base_dir>/hive/_default_skills/fw</base_dir>" in prompt
|
||||
|
||||
def test_allowlisted_dirs_matches_skills(self):
|
||||
"""allowlisted_dirs returns all skill base_dirs including framework ones."""
|
||||
skills = [
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
# 🐝 Hive Agent v0.10.0: The Colony
|
||||
|
||||
> ⚠️ **Breaking change.** This is a large architectural refactor of how agents work in Hive. **Old agents are no longer compatible.** Existing workspaces, custom agents, and saved sessions from pre-v0.10.0 builds will need to be recreated.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
The **Colony** introduces a new way of working: a group of specialized workers operating together to run and scale your business.
|
||||
|
||||
The role of the **Queen** has evolved. Instead of only orchestrating, the Queen now **executes work first** to deliver immediate value, then **builds systems around that work** to create stable, repeatable business processes.
|
||||
|
||||
You now have a full leadership team of eight Queens, each with their own identity, expertise, and voice:
|
||||
|
||||
| Queen | Role |
|
||||
| --- | --- |
|
||||
| **Sophia** | Head of Brand & Design |
|
||||
| **Charlotte** | Head of Finance & Fundraising |
|
||||
| **Victoria** | Head of Growth |
|
||||
| **Eleanor** | Head of Legal |
|
||||
| **Rachel** | Head of Operations |
|
||||
| **Isabella** | Head of Product Strategy |
|
||||
| **Amelia** | Head of Talent |
|
||||
| **Alexandra** | Head of Technology |
|
||||
|
||||
Start automating your business processes with your Queens today.
|
||||
|
||||
---
|
||||
|
||||
## 🏛️ The Colony Architecture
|
||||
|
||||
### Queens as Identities, Not Just Orchestrators
|
||||
|
||||
- **Queen profiles** — each queen is a YAML-backed persona (`~/.hive/agents/queens/{queen_id}/profile.yaml`) with core traits, hidden background, psychological profile, behavior triggers, and skill sets. Profiles are injected into the system prompt at session start.
|
||||
- **CEO-style queen selection** — an LLM classifier routes every new user request to the best-matching queen based on the task at hand, with structured routing diagnostics (`QueenSelection`).
|
||||
- **Queen DMs** — direct-message pages for each queen with a dedicated session flow, session switcher, and prompt library integration.
|
||||
- **Independent / PM mode** — queens run in an independent mode for planning-phase work, with a "think out loud" internal monologue surfaced through internal tags.
|
||||
- **Queen memory v2** — simplified memory implementation with reflection agent, cooldown-gated reflections, user identity, doppelganger wiring, and recall-selector for targeted retrieval.
|
||||
- **Queen lifecycle tools** — first-class tools for escalation, queen reply, and session handoff.
|
||||
|
||||
### Colony Runtime
|
||||
|
||||
- **Grand architecture revamp** — the framework, agent loop, runtime, graph, pipeline, executor, and node worker layers have been rewritten from the ground up. Deprecated shims and legacy orchestration paths have been removed.
|
||||
- **Colony creation flow** — colonies are created via skill, with reliable event bus subscription, worker spawning, and post-creation list refresh.
|
||||
- **Scheduled triggers** — colonies can now be woken on a cron schedule, with triggers firing directly into the owning queen's session.
|
||||
- **Simple fork** for agents, stable credential states, and improved worker execution reliability.
|
||||
|
||||
---
|
||||
|
||||
## 🆕 What's New
|
||||
|
||||
### Colony & Queens
|
||||
|
||||
- 8 default queen personas (Alexandra, Victoria, Isabella, Charlotte, Eleanor, Sophia, Amelia, Rachel) with profile YAML, examples, and behavior triggers
|
||||
- LLM-based queen selector with reasoning output
|
||||
- Queen DM page, queen session switcher, and sidebar queen item
|
||||
- Queen scope memory, role examples, and identity loading
|
||||
- Reflection agent with cooldown and improved reflection runner
|
||||
- Queen orchestrator + `routes_queens` API
|
||||
- Natural chat replies and cleaner home-prompt bootstrap
|
||||
- Queen identity for new sessions
|
||||
- `ask_user` / `ask_user_multiple` tools available in queen prompt
|
||||
- Escalation and queen-reply tools
|
||||
|
||||
### Skills & Tools
|
||||
|
||||
- **Learned default skills** — skills the queen has learned become part of her baseline
|
||||
- **Tool-gated skill activation** — skills only activate when their required tools are present
|
||||
- **Skills for colonies** — per-colony skill registration and loading
|
||||
- **Text-only model filter** — image-producing tools and vision-only prompt blocks are hidden from text-only models
|
||||
- **Browser skills upgrade** — improved click reliability, screenshot capture, and credential filtering
|
||||
- **Deprecated-tool removal** and alignment of Hive tool names across the codebase
|
||||
- **Ask-user widget** with fallback rendering and preserved tool pill mapping across turn boundaries for deferred completions
|
||||
- **Improved tool-call reliability** across the board (tool limit removed, tool blacklist, tool credential filter)
|
||||
- **MCP** — efficient MCP loading at initialization, default MCP bootstrapping, registered available MCP tools, fixed MCP tool initialization and registry pipeline stage
|
||||
|
||||
### LLM & Credentials
|
||||
|
||||
- **Key pool** for credential management with stable credential states
|
||||
- **Aden credentials storage adapter** and subscription-based LLM config activation endpoint
|
||||
- **Consolidated model config** with unified model catalog
|
||||
- **New providers** — Kimi, Hive, and Aden added to the model catalog
|
||||
- **Model switcher** UI with runtime model switching API
|
||||
- **LLM key validation endpoint** with agent errors surfaced via SSE
|
||||
- **BYOK modal** import fixes for subscription token detection
|
||||
|
||||
### Frontend
|
||||
|
||||
- **Home redesign** — new home, credentials, and org chart pages
|
||||
- **Colony chat** and **queen DM** pages
|
||||
- **Sidebar + header** components and global app layout/routing
|
||||
- **Model switcher, settings modal, template card**
|
||||
- **Prompt library** with search, category filtering, and UI polish
|
||||
- **Side panel** fixes and sub-agent pane light-mode support
|
||||
- **Flowchart** light-mode support and normalized settings modal sizing
|
||||
- **User profile settings** and UI enhancements
|
||||
- **Sync user profile** to global memory as `user-profile.md`; queen profile API transformation
|
||||
- Removed the old workspace GUI and its dependencies
|
||||
|
||||
### Framework & Runtime
|
||||
|
||||
- Architecture revamp: new runtime config, simplified agent loading, new infra for queen
|
||||
- Home hive directory structure refactor
|
||||
- Agent loading pipeline fixes, MCP registry pipeline stage fix
|
||||
- Session resume improvements: separate resume vs new-session flow for queen sessions, edge-case fix for message injection in resumed sessions
|
||||
- Strip internal tags from user-visible output
|
||||
- Colony event bus subscription fixes and shared event bus for parent visibility
|
||||
- Worker spawn and stop-worker fixes
|
||||
- Default log level and extra logging hooks
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug Fixes
|
||||
|
||||
- **Ask-user widget** — fallback when widget fails to mount
|
||||
- **Skill loading** for colonies and proper skill resolution across queen sessions
|
||||
- **Model switching** and new-chat flow no longer carry stale state
|
||||
- **Tool pill mapping** preserved across turn boundary for deferred `ask_user` completions
|
||||
- **Tool limit** removed (was capping legitimate long tool lists)
|
||||
- **Queen loading** stability fixes
|
||||
- **Side panel** rendering issues
|
||||
- **Deprecated graphs** removed from UI
|
||||
- **Home-page prompts** now reach the queen directly without waiting for the greeting to finish
|
||||
- **Colony creation** link, reframing, and post-creation refresh
|
||||
- **Build error** in colony creation path
|
||||
- **GCU system prompt** tuning
|
||||
- **Tool credential filter** correctness
|
||||
- **Screenshot** capture and browser click reliability
|
||||
- **Queen message injection** when resuming a session
|
||||
- **Internal-tag diction** fixes in surfaced output
|
||||
- **MCP tool initialization** on cold start
|
||||
- **Frontend DM** edge cases
|
||||
- **Prompt library** new-session handling for new chat
|
||||
- **Config validation** and unavailable Minimax model handling
|
||||
- **Queen identity** loading on cold boot
|
||||
- **Extra text** in queen selector JSON response parsed safely
|
||||
- **Outdated queen communication prompt** removed
|
||||
|
||||
---
|
||||
|
||||
## 🧹 Refactor & Cleanup
|
||||
|
||||
- **Shatter the Eld\*n ring** — top-to-bottom refactor of the runtime core
|
||||
- **Grand clean-up** of deprecated code paths
|
||||
- **Remove deprecated shims** and old session-status tools
|
||||
- **Big test cleanup** — integration tests and component tests rewritten around the new architecture
|
||||
- **Update references** for orchestrator / host / loader renames
|
||||
- **Consolidate tests** for queen state machine and verified outcomes
|
||||
- **Remove old workspace GUI** and its dependencies
|
||||
- **Remove old "new agent" button** and deprecated entry points
|
||||
- **Home hive directory** structure refactor
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Breaking Changes
|
||||
|
||||
- **Old agents are not compatible.** Custom agents authored against the pre-v0.10.0 framework will need to be re-authored against the new Queen/Colony runtime.
|
||||
- **Session format** — pre-v0.10.0 sessions cannot be resumed.
|
||||
- **Deprecated tools removed** and Hive tool names have been realigned; any external scripts referencing old tool names must be updated.
|
||||
- **Old session-status tools** removed in favor of the new queen lifecycle tools.
|
||||
- **Workspace GUI removed** — the legacy workspace UI is gone; use the new home, colony chat, and queen DM pages.
|
||||
- **MCP registry pipeline** — MCP configurations now load through the new registry; custom MCP setups may need to be re-registered.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Upgrading
|
||||
|
||||
Because this release rewrites the agent runtime, the recommended upgrade path is:
|
||||
|
||||
1. Back up `~/.hive/` if you have sessions or custom agents you want to reference.
|
||||
2. Pull `main` at the v0.10.0 tag.
|
||||
3. Let Hive initialize the new queen profiles under `~/.hive/agents/queens/`.
|
||||
4. Re-create any custom agents as colonies/queens against the new framework.
|
||||
5. Re-register any custom MCP servers through the new MCP registry.
|
||||
|
||||
Welcome to the Colony. 🐝
|
||||
Reference in New Issue
Block a user