diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py index 08d3285e..4fdd134f 100644 --- a/core/framework/skills/catalog.py +++ b/core/framework/skills/catalog.py @@ -14,13 +14,37 @@ from framework.skills.skill_errors import SkillErrorCode, log_skill_error logger = logging.getLogger(__name__) -_BEHAVIORAL_INSTRUCTION = ( - "The following skills provide specialized instructions for specific tasks.\n" - "When a task matches a skill's description, read the SKILL.md at the listed\n" - "location to load the full instructions before proceeding.\n" - "When a skill references relative paths, resolve them against the skill's\n" - "directory (the parent of SKILL.md) and use absolute paths in tool calls." -) +# Upper bound on the raw `` XML body, in characters. +# When the full catalog (with entries) exceeds this, we fall +# back to the compact variant that drops descriptions but keeps every skill +# visible. Preserving awareness of every skill beats truncating entries. +_COMPACT_THRESHOLD_CHARS = 5000 + +_MANDATORY_HEADER_FULL = """## Skills (mandatory) +Before replying: scan entries. +- If exactly one skill clearly applies: read its SKILL.md at with `read_file`, then follow it. +- If multiple could apply: choose the most specific one, then read/follow it. +- If none clearly apply: do not read any SKILL.md. +Constraints: never read more than one skill up front; only read after selecting. +- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After. + + +The following skills provide specialized instructions for specific tasks. +Use `read_file` to load a skill's SKILL.md when the task matches its description. +When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" + +_MANDATORY_HEADER_COMPACT = """## Skills (mandatory) +Before replying: scan entries. +- If exactly one skill clearly applies: read its SKILL.md at with `read_file`, then follow it. +- If multiple could apply: choose the most specific one, then read/follow it. +- If none clearly apply: do not read any SKILL.md. +Constraints: never read more than one skill up front; only read after selecting. +- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After. + + +The following skills provide specialized instructions for specific tasks. +Use `read_file` to load a skill's SKILL.md when the task matches its name. +When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" class SkillCatalog: @@ -61,27 +85,42 @@ class SkillCatalog: def to_prompt(self) -> str: """Generate the catalog prompt for system prompt injection. - Returns empty string if no community/user skills are discovered - (default skills are handled separately by DefaultSkillManager). - """ - # All skills go through the catalog for progressive disclosure. - all_skills = list(self._skills.values()) + Returns empty string when no skills are present. Otherwise returns + a mandatory pre-reply checklist + decision rules + rate-limit note, + followed by the XML body. + When the full XML body exceeds ``_COMPACT_THRESHOLD_CHARS``, the + compact variant is emitted instead: elements are + dropped so every skill stays visible before any gets truncated. + """ + all_skills = sorted(self._skills.values(), key=lambda s: s.name) if not all_skills: return "" + full_xml = self._render_xml(all_skills, compact=False) + if len(full_xml) <= _COMPACT_THRESHOLD_CHARS: + return f"{_MANDATORY_HEADER_FULL}\n\n{full_xml}" + + compact_xml = self._render_xml(all_skills, compact=True) + return f"{_MANDATORY_HEADER_COMPACT}\n\n{compact_xml}" + + @staticmethod + def _render_xml(skills: list[ParsedSkill], *, compact: bool) -> str: + """Render the `` block. + + ``compact=True`` drops `` to preserve skill awareness + when the catalog would otherwise blow the char budget. + """ lines = [""] - for skill in sorted(all_skills, key=lambda s: s.name): + for skill in skills: lines.append(" ") lines.append(f" {escape(skill.name)}") - lines.append(f" {escape(skill.description)}") + if not compact: + lines.append(f" {escape(skill.description)}") lines.append(f" {escape(skill.location)}") - lines.append(f" {escape(skill.base_dir)}") lines.append(" ") lines.append("") - - xml_block = "\n".join(lines) - return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}" + return "\n".join(lines) def build_pre_activated_prompt(self, skill_names: list[str]) -> str: """Build prompt content for pre-activated skills. diff --git a/core/tests/test_skill_catalog.py b/core/tests/test_skill_catalog.py index dc55f834..5501c70c 100644 --- a/core/tests/test_skill_catalog.py +++ b/core/tests/test_skill_catalog.py @@ -94,7 +94,10 @@ class TestSkillCatalog: assert "beta" in prompt assert "Alpha skill" in prompt assert "/p/alpha/SKILL.md" in prompt - assert "/p/alpha" in prompt + # is intentionally not emitted โ€” the mandatory header + # tells the model to resolve relative paths against the parent of + # SKILL.md, so the redundant element was dropped. + assert "" not in prompt def test_to_prompt_sorted_by_name(self): skills = [ @@ -130,13 +133,44 @@ class TestSkillCatalog: assert "usr" in prompt assert "fw" in prompt - def test_to_prompt_contains_behavioral_instruction(self): + def test_to_prompt_contains_mandatory_header(self): + """The rendered catalog must carry the mandatory pre-reply checklist + so soft guidance turns into a required step.""" catalog = SkillCatalog([_make_skill(source_scope="project")]) prompt = catalog.to_prompt() - assert "When a task matches a skill's description" in prompt + assert "## Skills (mandatory)" in prompt + assert "Before replying: scan " in prompt + assert "never read more than one skill up front" in prompt + assert "`read_file`" in prompt assert "SKILL.md" in prompt + def test_to_prompt_compact_fallback_drops_descriptions(self): + """When the full XML body exceeds the char threshold, the compact + variant drops but keeps every skill's .""" + # Each skill contributes ~100+ chars with a long description. + # 60 skills easily pushes the body past the threshold. + skills = [ + _make_skill( + name=f"skill-{i:03d}", + description="A reasonably long description " * 4, + location=f"/s/skill-{i:03d}/SKILL.md", + base_dir=f"/s/skill-{i:03d}", + ) + for i in range(60) + ] + catalog = SkillCatalog(skills) + prompt = catalog.to_prompt() + + # Mandatory header still present but uses the compact variant wording. + assert "## Skills (mandatory)" in prompt + assert "scan " in prompt + # Every skill's name survives โ€ฆ + for i in range(60): + assert f"skill-{i:03d}" in prompt + # โ€ฆ but no descriptions were rendered. + assert "" not in prompt + def test_build_pre_activated_prompt(self): skill = _make_skill("research", body="## Deep Research\nDo thorough research.") catalog = SkillCatalog([skill]) diff --git a/core/tests/test_skill_resources.py b/core/tests/test_skill_resources.py index 9b04b144..e7594625 100644 --- a/core/tests/test_skill_resources.py +++ b/core/tests/test_skill_resources.py @@ -1,9 +1,14 @@ """Tests for AS-6 skill resource loading support. Covers: -- element in catalog XML - allowlisted_dirs property reflects trusted skill base directories - skill_dirs propagation to NodeContext + +The catalog XML previously emitted a redundant element next to +each . That was dropped when the mandatory header took over the +"resolve relative paths against the parent of SKILL.md" instruction, so +there is no longer an XML-emission test for base_dir. Programmatic access +via ``catalog.allowlisted_dirs`` is still covered below. """ from framework.skills.catalog import SkillCatalog @@ -26,31 +31,6 @@ def _make_skill( class TestSkillResourceBaseDir: - def test_base_dir_in_xml(self): - """Each community skill entry should expose its base_dir in the catalog XML.""" - skill = _make_skill("deploy", "/project/.hive/skills/deploy") - catalog = SkillCatalog([skill]) - prompt = catalog.to_prompt() - - assert "/project/.hive/skills/deploy" in prompt - - def test_base_dir_xml_escaped(self): - """base_dir with XML-special chars should be escaped.""" - skill = _make_skill("s", "/path/with <&> chars") - catalog = SkillCatalog([skill]) - prompt = catalog.to_prompt() - - assert "/path/with <&> chars" in prompt - - def test_base_dir_present_for_framework_skills(self): - """Framework-scope skills now appear in the catalog like any other scope, - and their base_dir is included in the XML.""" - skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework") - catalog = SkillCatalog([skill]) - prompt = catalog.to_prompt() - assert "fw" in prompt - assert "/hive/_default_skills/fw" in prompt - def test_allowlisted_dirs_matches_skills(self): """allowlisted_dirs returns all skill base_dirs including framework ones.""" skills = [ diff --git a/docs/releases/v0.10.0.md b/docs/releases/v0.10.0.md new file mode 100644 index 00000000..ac3475da --- /dev/null +++ b/docs/releases/v0.10.0.md @@ -0,0 +1,176 @@ +# ๐Ÿ Hive Agent v0.10.0: The Colony + +> โš ๏ธ **Breaking change.** This is a large architectural refactor of how agents work in Hive. **Old agents are no longer compatible.** Existing workspaces, custom agents, and saved sessions from pre-v0.10.0 builds will need to be recreated. + +--- + +## โœจ Highlights + +The **Colony** introduces a new way of working: a group of specialized workers operating together to run and scale your business. + +The role of the **Queen** has evolved. Instead of only orchestrating, the Queen now **executes work first** to deliver immediate value, then **builds systems around that work** to create stable, repeatable business processes. + +You now have a full leadership team of eight Queens, each with their own identity, expertise, and voice: + +| Queen | Role | +| --- | --- | +| **Sophia** | Head of Brand & Design | +| **Charlotte** | Head of Finance & Fundraising | +| **Victoria** | Head of Growth | +| **Eleanor** | Head of Legal | +| **Rachel** | Head of Operations | +| **Isabella** | Head of Product Strategy | +| **Amelia** | Head of Talent | +| **Alexandra** | Head of Technology | + +Start automating your business processes with your Queens today. + +--- + +## ๐Ÿ›๏ธ The Colony Architecture + +### Queens as Identities, Not Just Orchestrators + +- **Queen profiles** โ€” each queen is a YAML-backed persona (`~/.hive/agents/queens/{queen_id}/profile.yaml`) with core traits, hidden background, psychological profile, behavior triggers, and skill sets. Profiles are injected into the system prompt at session start. +- **CEO-style queen selection** โ€” an LLM classifier routes every new user request to the best-matching queen based on the task at hand, with structured routing diagnostics (`QueenSelection`). +- **Queen DMs** โ€” direct-message pages for each queen with a dedicated session flow, session switcher, and prompt library integration. +- **Independent / PM mode** โ€” queens run in an independent mode for planning-phase work, with a "think out loud" internal monologue surfaced through internal tags. +- **Queen memory v2** โ€” simplified memory implementation with reflection agent, cooldown-gated reflections, user identity, doppelganger wiring, and recall-selector for targeted retrieval. +- **Queen lifecycle tools** โ€” first-class tools for escalation, queen reply, and session handoff. + +### Colony Runtime + +- **Grand architecture revamp** โ€” the framework, agent loop, runtime, graph, pipeline, executor, and node worker layers have been rewritten from the ground up. Deprecated shims and legacy orchestration paths have been removed. +- **Colony creation flow** โ€” colonies are created via skill, with reliable event bus subscription, worker spawning, and post-creation list refresh. +- **Scheduled triggers** โ€” colonies can now be woken on a cron schedule, with triggers firing directly into the owning queen's session. +- **Simple fork** for agents, stable credential states, and improved worker execution reliability. + +--- + +## ๐Ÿ†• What's New + +### Colony & Queens + +- 8 default queen personas (Alexandra, Victoria, Isabella, Charlotte, Eleanor, Sophia, Amelia, Rachel) with profile YAML, examples, and behavior triggers +- LLM-based queen selector with reasoning output +- Queen DM page, queen session switcher, and sidebar queen item +- Queen scope memory, role examples, and identity loading +- Reflection agent with cooldown and improved reflection runner +- Queen orchestrator + `routes_queens` API +- Natural chat replies and cleaner home-prompt bootstrap +- Queen identity for new sessions +- `ask_user` / `ask_user_multiple` tools available in queen prompt +- Escalation and queen-reply tools + +### Skills & Tools + +- **Learned default skills** โ€” skills the queen has learned become part of her baseline +- **Tool-gated skill activation** โ€” skills only activate when their required tools are present +- **Skills for colonies** โ€” per-colony skill registration and loading +- **Text-only model filter** โ€” image-producing tools and vision-only prompt blocks are hidden from text-only models +- **Browser skills upgrade** โ€” improved click reliability, screenshot capture, and credential filtering +- **Deprecated-tool removal** and alignment of Hive tool names across the codebase +- **Ask-user widget** with fallback rendering and preserved tool pill mapping across turn boundaries for deferred completions +- **Improved tool-call reliability** across the board (tool limit removed, tool blacklist, tool credential filter) +- **MCP** โ€” efficient MCP loading at initialization, default MCP bootstrapping, registered available MCP tools, fixed MCP tool initialization and registry pipeline stage + +### LLM & Credentials + +- **Key pool** for credential management with stable credential states +- **Aden credentials storage adapter** and subscription-based LLM config activation endpoint +- **Consolidated model config** with unified model catalog +- **New providers** โ€” Kimi, Hive, and Aden added to the model catalog +- **Model switcher** UI with runtime model switching API +- **LLM key validation endpoint** with agent errors surfaced via SSE +- **BYOK modal** import fixes for subscription token detection + +### Frontend + +- **Home redesign** โ€” new home, credentials, and org chart pages +- **Colony chat** and **queen DM** pages +- **Sidebar + header** components and global app layout/routing +- **Model switcher, settings modal, template card** +- **Prompt library** with search, category filtering, and UI polish +- **Side panel** fixes and sub-agent pane light-mode support +- **Flowchart** light-mode support and normalized settings modal sizing +- **User profile settings** and UI enhancements +- **Sync user profile** to global memory as `user-profile.md`; queen profile API transformation +- Removed the old workspace GUI and its dependencies + +### Framework & Runtime + +- Architecture revamp: new runtime config, simplified agent loading, new infra for queen +- Home hive directory structure refactor +- Agent loading pipeline fixes, MCP registry pipeline stage fix +- Session resume improvements: separate resume vs new-session flow for queen sessions, edge-case fix for message injection in resumed sessions +- Strip internal tags from user-visible output +- Colony event bus subscription fixes and shared event bus for parent visibility +- Worker spawn and stop-worker fixes +- Default log level and extra logging hooks + +--- + +## ๐Ÿ› Bug Fixes + +- **Ask-user widget** โ€” fallback when widget fails to mount +- **Skill loading** for colonies and proper skill resolution across queen sessions +- **Model switching** and new-chat flow no longer carry stale state +- **Tool pill mapping** preserved across turn boundary for deferred `ask_user` completions +- **Tool limit** removed (was capping legitimate long tool lists) +- **Queen loading** stability fixes +- **Side panel** rendering issues +- **Deprecated graphs** removed from UI +- **Home-page prompts** now reach the queen directly without waiting for the greeting to finish +- **Colony creation** link, reframing, and post-creation refresh +- **Build error** in colony creation path +- **GCU system prompt** tuning +- **Tool credential filter** correctness +- **Screenshot** capture and browser click reliability +- **Queen message injection** when resuming a session +- **Internal-tag diction** fixes in surfaced output +- **MCP tool initialization** on cold start +- **Frontend DM** edge cases +- **Prompt library** new-session handling for new chat +- **Config validation** and unavailable Minimax model handling +- **Queen identity** loading on cold boot +- **Extra text** in queen selector JSON response parsed safely +- **Outdated queen communication prompt** removed + +--- + +## ๐Ÿงน Refactor & Cleanup + +- **Shatter the Eld\*n ring** โ€” top-to-bottom refactor of the runtime core +- **Grand clean-up** of deprecated code paths +- **Remove deprecated shims** and old session-status tools +- **Big test cleanup** โ€” integration tests and component tests rewritten around the new architecture +- **Update references** for orchestrator / host / loader renames +- **Consolidate tests** for queen state machine and verified outcomes +- **Remove old workspace GUI** and its dependencies +- **Remove old "new agent" button** and deprecated entry points +- **Home hive directory** structure refactor + +--- + +## โš ๏ธ Breaking Changes + +- **Old agents are not compatible.** Custom agents authored against the pre-v0.10.0 framework will need to be re-authored against the new Queen/Colony runtime. +- **Session format** โ€” pre-v0.10.0 sessions cannot be resumed. +- **Deprecated tools removed** and Hive tool names have been realigned; any external scripts referencing old tool names must be updated. +- **Old session-status tools** removed in favor of the new queen lifecycle tools. +- **Workspace GUI removed** โ€” the legacy workspace UI is gone; use the new home, colony chat, and queen DM pages. +- **MCP registry pipeline** โ€” MCP configurations now load through the new registry; custom MCP setups may need to be re-registered. + +--- + +## ๐Ÿš€ Upgrading + +Because this release rewrites the agent runtime, the recommended upgrade path is: + +1. Back up `~/.hive/` if you have sessions or custom agents you want to reference. +2. Pull `main` at the v0.10.0 tag. +3. Let Hive initialize the new queen profiles under `~/.hive/agents/queens/`. +4. Re-create any custom agents as colonies/queens against the new framework. +5. Re-register any custom MCP servers through the new MCP registry. + +Welcome to the Colony. ๐Ÿ