fix: skills prompts

This commit is contained in:
Richard Tang
2026-04-14 18:51:14 -07:00
parent 50eb4b0e8f
commit 27b15789fb
4 changed files with 276 additions and 47 deletions
+57 -18
View File
@@ -14,13 +14,37 @@ from framework.skills.skill_errors import SkillErrorCode, log_skill_error
logger = logging.getLogger(__name__)
_BEHAVIORAL_INSTRUCTION = (
"The following skills provide specialized instructions for specific tasks.\n"
"When a task matches a skill's description, read the SKILL.md at the listed\n"
"location to load the full instructions before proceeding.\n"
"When a skill references relative paths, resolve them against the skill's\n"
"directory (the parent of SKILL.md) and use absolute paths in tool calls."
)
# Upper bound on the raw `<available_skills>` XML body, in characters.
# When the full catalog (with <description> entries) exceeds this, we fall
# back to the compact variant that drops descriptions but keeps every skill
# visible. Preserving awareness of every skill beats truncating entries.
_COMPACT_THRESHOLD_CHARS = 5000
_MANDATORY_HEADER_FULL = """## Skills (mandatory)
Before replying: scan <available_skills> <description> entries.
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
- If multiple could apply: choose the most specific one, then read/follow it.
- If none clearly apply: do not read any SKILL.md.
Constraints: never read more than one skill up front; only read after selecting.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
The following skills provide specialized instructions for specific tasks.
Use `read_file` to load a skill's SKILL.md when the task matches its description.
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
_MANDATORY_HEADER_COMPACT = """## Skills (mandatory)
Before replying: scan <available_skills> <name> entries.
- If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
- If multiple could apply: choose the most specific one, then read/follow it.
- If none clearly apply: do not read any SKILL.md.
Constraints: never read more than one skill up front; only read after selecting.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
The following skills provide specialized instructions for specific tasks.
Use `read_file` to load a skill's SKILL.md when the task matches its name.
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
class SkillCatalog:
@@ -61,27 +85,42 @@ class SkillCatalog:
def to_prompt(self) -> str:
"""Generate the catalog prompt for system prompt injection.
Returns empty string if no community/user skills are discovered
(default skills are handled separately by DefaultSkillManager).
"""
# All skills go through the catalog for progressive disclosure.
all_skills = list(self._skills.values())
Returns empty string when no skills are present. Otherwise returns
a mandatory pre-reply checklist + decision rules + rate-limit note,
followed by the <available_skills> XML body.
When the full XML body exceeds ``_COMPACT_THRESHOLD_CHARS``, the
compact variant is emitted instead: <description> elements are
dropped so every skill stays visible before any gets truncated.
"""
all_skills = sorted(self._skills.values(), key=lambda s: s.name)
if not all_skills:
return ""
full_xml = self._render_xml(all_skills, compact=False)
if len(full_xml) <= _COMPACT_THRESHOLD_CHARS:
return f"{_MANDATORY_HEADER_FULL}\n\n{full_xml}"
compact_xml = self._render_xml(all_skills, compact=True)
return f"{_MANDATORY_HEADER_COMPACT}\n\n{compact_xml}"
@staticmethod
def _render_xml(skills: list[ParsedSkill], *, compact: bool) -> str:
"""Render the `<available_skills>` block.
``compact=True`` drops `<description>` to preserve skill awareness
when the catalog would otherwise blow the char budget.
"""
lines = ["<available_skills>"]
for skill in sorted(all_skills, key=lambda s: s.name):
for skill in skills:
lines.append(" <skill>")
lines.append(f" <name>{escape(skill.name)}</name>")
lines.append(f" <description>{escape(skill.description)}</description>")
if not compact:
lines.append(f" <description>{escape(skill.description)}</description>")
lines.append(f" <location>{escape(skill.location)}</location>")
lines.append(f" <base_dir>{escape(skill.base_dir)}</base_dir>")
lines.append(" </skill>")
lines.append("</available_skills>")
xml_block = "\n".join(lines)
return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}"
return "\n".join(lines)
def build_pre_activated_prompt(self, skill_names: list[str]) -> str:
"""Build prompt content for pre-activated skills.
+37 -3
View File
@@ -94,7 +94,10 @@ class TestSkillCatalog:
assert "<name>beta</name>" in prompt
assert "<description>Alpha skill</description>" in prompt
assert "<location>/p/alpha/SKILL.md</location>" in prompt
assert "<base_dir>/p/alpha</base_dir>" in prompt
# <base_dir> is intentionally not emitted — the mandatory header
# tells the model to resolve relative paths against the parent of
# SKILL.md, so the redundant element was dropped.
assert "<base_dir>" not in prompt
def test_to_prompt_sorted_by_name(self):
skills = [
@@ -130,13 +133,44 @@ class TestSkillCatalog:
assert "<name>usr</name>" in prompt
assert "<name>fw</name>" in prompt
def test_to_prompt_contains_behavioral_instruction(self):
def test_to_prompt_contains_mandatory_header(self):
"""The rendered catalog must carry the mandatory pre-reply checklist
so soft guidance turns into a required step."""
catalog = SkillCatalog([_make_skill(source_scope="project")])
prompt = catalog.to_prompt()
assert "When a task matches a skill's description" in prompt
assert "## Skills (mandatory)" in prompt
assert "Before replying: scan <available_skills>" in prompt
assert "never read more than one skill up front" in prompt
assert "`read_file`" in prompt
assert "SKILL.md" in prompt
def test_to_prompt_compact_fallback_drops_descriptions(self):
"""When the full XML body exceeds the char threshold, the compact
variant drops <description> but keeps every skill's <name>."""
# Each skill contributes ~100+ chars with a long description.
# 60 skills easily pushes the body past the threshold.
skills = [
_make_skill(
name=f"skill-{i:03d}",
description="A reasonably long description " * 4,
location=f"/s/skill-{i:03d}/SKILL.md",
base_dir=f"/s/skill-{i:03d}",
)
for i in range(60)
]
catalog = SkillCatalog(skills)
prompt = catalog.to_prompt()
# Mandatory header still present but uses the compact variant wording.
assert "## Skills (mandatory)" in prompt
assert "scan <available_skills> <name>" in prompt
# Every skill's name survives …
for i in range(60):
assert f"<name>skill-{i:03d}</name>" in prompt
# … but no descriptions were rendered.
assert "<description>" not in prompt
def test_build_pre_activated_prompt(self):
skill = _make_skill("research", body="## Deep Research\nDo thorough research.")
catalog = SkillCatalog([skill])
+6 -26
View File
@@ -1,9 +1,14 @@
"""Tests for AS-6 skill resource loading support.
Covers:
- <base_dir> element in catalog XML
- allowlisted_dirs property reflects trusted skill base directories
- skill_dirs propagation to NodeContext
The catalog XML previously emitted a redundant <base_dir> element next to
each <location>. That was dropped when the mandatory header took over the
"resolve relative paths against the parent of SKILL.md" instruction, so
there is no longer an XML-emission test for base_dir. Programmatic access
via ``catalog.allowlisted_dirs`` is still covered below.
"""
from framework.skills.catalog import SkillCatalog
@@ -26,31 +31,6 @@ def _make_skill(
class TestSkillResourceBaseDir:
def test_base_dir_in_xml(self):
"""Each community skill entry should expose its base_dir in the catalog XML."""
skill = _make_skill("deploy", "/project/.hive/skills/deploy")
catalog = SkillCatalog([skill])
prompt = catalog.to_prompt()
assert "<base_dir>/project/.hive/skills/deploy</base_dir>" in prompt
def test_base_dir_xml_escaped(self):
"""base_dir with XML-special chars should be escaped."""
skill = _make_skill("s", "/path/with <&> chars")
catalog = SkillCatalog([skill])
prompt = catalog.to_prompt()
assert "<base_dir>/path/with &lt;&amp;&gt; chars</base_dir>" in prompt
def test_base_dir_present_for_framework_skills(self):
"""Framework-scope skills now appear in the catalog like any other scope,
and their base_dir is included in the XML."""
skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
catalog = SkillCatalog([skill])
prompt = catalog.to_prompt()
assert "<name>fw</name>" in prompt
assert "<base_dir>/hive/_default_skills/fw</base_dir>" in prompt
def test_allowlisted_dirs_matches_skills(self):
"""allowlisted_dirs returns all skill base_dirs including framework ones."""
skills = [
+176
View File
@@ -0,0 +1,176 @@
# 🐝 Hive Agent v0.10.0: The Colony
> ⚠️ **Breaking change.** This is a large architectural refactor of how agents work in Hive. **Old agents are no longer compatible.** Existing workspaces, custom agents, and saved sessions from pre-v0.10.0 builds will need to be recreated.
---
## ✨ Highlights
The **Colony** introduces a new way of working: a group of specialized workers operating together to run and scale your business.
The role of the **Queen** has evolved. Instead of only orchestrating, the Queen now **executes work first** to deliver immediate value, then **builds systems around that work** to create stable, repeatable business processes.
You now have a full leadership team of eight Queens, each with their own identity, expertise, and voice:
| Queen | Role |
| --- | --- |
| **Sophia** | Head of Brand & Design |
| **Charlotte** | Head of Finance & Fundraising |
| **Victoria** | Head of Growth |
| **Eleanor** | Head of Legal |
| **Rachel** | Head of Operations |
| **Isabella** | Head of Product Strategy |
| **Amelia** | Head of Talent |
| **Alexandra** | Head of Technology |
Start automating your business processes with your Queens today.
---
## 🏛️ The Colony Architecture
### Queens as Identities, Not Just Orchestrators
- **Queen profiles** — each queen is a YAML-backed persona (`~/.hive/agents/queens/{queen_id}/profile.yaml`) with core traits, hidden background, psychological profile, behavior triggers, and skill sets. Profiles are injected into the system prompt at session start.
- **CEO-style queen selection** — an LLM classifier routes every new user request to the best-matching queen based on the task at hand, with structured routing diagnostics (`QueenSelection`).
- **Queen DMs** — direct-message pages for each queen with a dedicated session flow, session switcher, and prompt library integration.
- **Independent / PM mode** — queens run in an independent mode for planning-phase work, with a "think out loud" internal monologue surfaced through internal tags.
- **Queen memory v2** — simplified memory implementation with reflection agent, cooldown-gated reflections, user identity, doppelganger wiring, and recall-selector for targeted retrieval.
- **Queen lifecycle tools** — first-class tools for escalation, queen reply, and session handoff.
### Colony Runtime
- **Grand architecture revamp** — the framework, agent loop, runtime, graph, pipeline, executor, and node worker layers have been rewritten from the ground up. Deprecated shims and legacy orchestration paths have been removed.
- **Colony creation flow** — colonies are created via skill, with reliable event bus subscription, worker spawning, and post-creation list refresh.
- **Scheduled triggers** — colonies can now be woken on a cron schedule, with triggers firing directly into the owning queen's session.
- **Simple fork** for agents, stable credential states, and improved worker execution reliability.
---
## 🆕 What's New
### Colony & Queens
- 8 default queen personas (Alexandra, Victoria, Isabella, Charlotte, Eleanor, Sophia, Amelia, Rachel) with profile YAML, examples, and behavior triggers
- LLM-based queen selector with reasoning output
- Queen DM page, queen session switcher, and sidebar queen item
- Queen scope memory, role examples, and identity loading
- Reflection agent with cooldown and improved reflection runner
- Queen orchestrator + `routes_queens` API
- Natural chat replies and cleaner home-prompt bootstrap
- Queen identity for new sessions
- `ask_user` / `ask_user_multiple` tools available in queen prompt
- Escalation and queen-reply tools
### Skills & Tools
- **Learned default skills** — skills the queen has learned become part of her baseline
- **Tool-gated skill activation** — skills only activate when their required tools are present
- **Skills for colonies** — per-colony skill registration and loading
- **Text-only model filter** — image-producing tools and vision-only prompt blocks are hidden from text-only models
- **Browser skills upgrade** — improved click reliability, screenshot capture, and credential filtering
- **Deprecated-tool removal** and alignment of Hive tool names across the codebase
- **Ask-user widget** with fallback rendering and preserved tool pill mapping across turn boundaries for deferred completions
- **Improved tool-call reliability** across the board (tool limit removed, tool blacklist, tool credential filter)
- **MCP** — efficient MCP loading at initialization, default MCP bootstrapping, registered available MCP tools, fixed MCP tool initialization and registry pipeline stage
### LLM & Credentials
- **Key pool** for credential management with stable credential states
- **Aden credentials storage adapter** and subscription-based LLM config activation endpoint
- **Consolidated model config** with unified model catalog
- **New providers** — Kimi, Hive, and Aden added to the model catalog
- **Model switcher** UI with runtime model switching API
- **LLM key validation endpoint** with agent errors surfaced via SSE
- **BYOK modal** import fixes for subscription token detection
### Frontend
- **Home redesign** — new home, credentials, and org chart pages
- **Colony chat** and **queen DM** pages
- **Sidebar + header** components and global app layout/routing
- **Model switcher, settings modal, template card**
- **Prompt library** with search, category filtering, and UI polish
- **Side panel** fixes and sub-agent pane light-mode support
- **Flowchart** light-mode support and normalized settings modal sizing
- **User profile settings** and UI enhancements
- **Sync user profile** to global memory as `user-profile.md`; queen profile API transformation
- Removed the old workspace GUI and its dependencies
### Framework & Runtime
- Architecture revamp: new runtime config, simplified agent loading, new infra for queen
- Home hive directory structure refactor
- Agent loading pipeline fixes, MCP registry pipeline stage fix
- Session resume improvements: separate resume vs new-session flow for queen sessions, edge-case fix for message injection in resumed sessions
- Strip internal tags from user-visible output
- Colony event bus subscription fixes and shared event bus for parent visibility
- Worker spawn and stop-worker fixes
- Default log level and extra logging hooks
---
## 🐛 Bug Fixes
- **Ask-user widget** — fallback when widget fails to mount
- **Skill loading** for colonies and proper skill resolution across queen sessions
- **Model switching** and new-chat flow no longer carry stale state
- **Tool pill mapping** preserved across turn boundary for deferred `ask_user` completions
- **Tool limit** removed (was capping legitimate long tool lists)
- **Queen loading** stability fixes
- **Side panel** rendering issues
- **Deprecated graphs** removed from UI
- **Home-page prompts** now reach the queen directly without waiting for the greeting to finish
- **Colony creation** link, reframing, and post-creation refresh
- **Build error** in colony creation path
- **GCU system prompt** tuning
- **Tool credential filter** correctness
- **Screenshot** capture and browser click reliability
- **Queen message injection** when resuming a session
- **Internal-tag diction** fixes in surfaced output
- **MCP tool initialization** on cold start
- **Frontend DM** edge cases
- **Prompt library** new-session handling for new chat
- **Config validation** and unavailable Minimax model handling
- **Queen identity** loading on cold boot
- **Extra text** in queen selector JSON response parsed safely
- **Outdated queen communication prompt** removed
---
## 🧹 Refactor & Cleanup
- **Shatter the Eld\*n ring** — top-to-bottom refactor of the runtime core
- **Grand clean-up** of deprecated code paths
- **Remove deprecated shims** and old session-status tools
- **Big test cleanup** — integration tests and component tests rewritten around the new architecture
- **Update references** for orchestrator / host / loader renames
- **Consolidate tests** for queen state machine and verified outcomes
- **Remove old workspace GUI** and its dependencies
- **Remove old "new agent" button** and deprecated entry points
- **Home hive directory** structure refactor
---
## ⚠️ Breaking Changes
- **Old agents are not compatible.** Custom agents authored against the pre-v0.10.0 framework will need to be re-authored against the new Queen/Colony runtime.
- **Session format** — pre-v0.10.0 sessions cannot be resumed.
- **Deprecated tools removed** and Hive tool names have been realigned; any external scripts referencing old tool names must be updated.
- **Old session-status tools** removed in favor of the new queen lifecycle tools.
- **Workspace GUI removed** — the legacy workspace UI is gone; use the new home, colony chat, and queen DM pages.
- **MCP registry pipeline** — MCP configurations now load through the new registry; custom MCP setups may need to be re-registered.
---
## 🚀 Upgrading
Because this release rewrites the agent runtime, the recommended upgrade path is:
1. Back up `~/.hive/` if you have sessions or custom agents you want to reference.
2. Pull `main` at the v0.10.0 tag.
3. Let Hive initialize the new queen profiles under `~/.hive/agents/queens/`.
4. Re-create any custom agents as colonies/queens against the new framework.
5. Re-register any custom MCP servers through the new MCP registry.
Welcome to the Colony. 🐝