Merge branch 'feature/colony-sqlite' into feature/clean-context
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
import json
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
data['replies'].append({
|
||||
'original_preview': 'Alright, I give in. Here’s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook ‘em!'
|
||||
})
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
@@ -0,0 +1,11 @@
|
||||
import json, sys
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
|
||||
ledger = json.load(f)
|
||||
|
||||
text = sys.argv[1]
|
||||
for r in ledger['replies']:
|
||||
if r.get('original_preview') == text:
|
||||
print("YES")
|
||||
sys.exit(0)
|
||||
print("NO")
|
||||
@@ -184,8 +184,16 @@ _QUEEN_INDEPENDENT_TOOLS = [
|
||||
"search_files",
|
||||
"run_command",
|
||||
"undo_changes",
|
||||
# Parallel fan-out (Phase 4 unified ColonyRuntime)
|
||||
"run_parallel_workers",
|
||||
# NOTE (2026-04-16): ``run_parallel_workers`` was removed from the
|
||||
# independent phase. The queen's pure DM mode is for conversation
|
||||
# with the user; spawning workers from here puts their activity
|
||||
# into a chat surface that's supposed to stay queen↔user only.
|
||||
# Users who want to fan out parallel work should (a) use
|
||||
# ``create_colony`` to fork into a persistent colony (where
|
||||
# worker activity has its own page), or (b) load an agent via
|
||||
# build/stage and use ``run_parallel_workers`` in the running
|
||||
# phase where a worker context already exists.
|
||||
#
|
||||
# Fork this session into a persistent colony for headless /
|
||||
# recurring / background work that needs to keep running in
|
||||
# parallel to (or after) this chat.
|
||||
|
||||
@@ -631,6 +631,43 @@ class ColonyRuntime:
|
||||
spawn_tools = tools if tools is not None else self._tools
|
||||
spawn_executor = tool_executor or self._tool_executor
|
||||
|
||||
# Colony progress tracker: when the caller supplied a db_path
|
||||
# in input_data, this worker is part of a SQLite task queue
|
||||
# and must see the hive.colony-progress-tracker skill body in
|
||||
# its system prompt from turn 0. Rebuild the catalog with the
|
||||
# skill pre-activated; falls back to the colony default when
|
||||
# no db_path is present.
|
||||
_spawn_catalog = self.skills_catalog_prompt
|
||||
_spawn_skill_dirs = self.skill_dirs
|
||||
if isinstance(input_data, dict) and input_data.get("db_path"):
|
||||
try:
|
||||
from framework.skills.config import SkillsConfig
|
||||
from framework.skills.manager import SkillsManager, SkillsManagerConfig
|
||||
|
||||
_pre = SkillsManager(
|
||||
SkillsManagerConfig(
|
||||
skills_config=SkillsConfig.from_agent_vars(
|
||||
skills=["hive.colony-progress-tracker"],
|
||||
),
|
||||
)
|
||||
)
|
||||
_pre.load()
|
||||
_spawn_catalog = _pre.skills_catalog_prompt
|
||||
_spawn_skill_dirs = list(_pre.allowlisted_dirs) if hasattr(_pre, "allowlisted_dirs") else self.skill_dirs
|
||||
logger.info(
|
||||
"spawn: pre-activated hive.colony-progress-tracker "
|
||||
"(catalog %d → %d chars) for worker with db_path=%s",
|
||||
len(self.skills_catalog_prompt),
|
||||
len(_spawn_catalog),
|
||||
input_data.get("db_path"),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"spawn: failed to pre-activate colony-progress-tracker "
|
||||
"skill, falling back to base catalog: %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
# Resolve the SSE stream_id once. When the caller didn't supply
|
||||
# one we use the per-worker fan-out tag (filtered out by the
|
||||
# SSE handler). When the caller passed an explicit value we
|
||||
@@ -685,9 +722,9 @@ class ColonyRuntime:
|
||||
llm=self._llm,
|
||||
available_tools=list(spawn_tools),
|
||||
accounts_prompt=self._accounts_prompt,
|
||||
skills_catalog_prompt=self.skills_catalog_prompt,
|
||||
skills_catalog_prompt=_spawn_catalog,
|
||||
protocols_prompt=self.protocols_prompt,
|
||||
skill_dirs=self.skill_dirs,
|
||||
skill_dirs=_spawn_skill_dirs,
|
||||
execution_id=worker_id,
|
||||
stream_id=explicit_stream_id or f"worker:{worker_id}",
|
||||
)
|
||||
@@ -720,6 +757,8 @@ class ColonyRuntime:
|
||||
async def spawn_batch(
|
||||
self,
|
||||
tasks: list[dict[str, Any]],
|
||||
*,
|
||||
tools_override: list[Any] | None = None,
|
||||
) -> list[str]:
|
||||
"""Spawn a batch of parallel workers, one per task spec.
|
||||
|
||||
@@ -732,6 +771,12 @@ class ColonyRuntime:
|
||||
The overseer's ``run_parallel_workers`` tool is the usual
|
||||
caller; it pairs ``spawn_batch`` + ``wait_for_worker_reports``
|
||||
into a single fan-out/fan-in primitive.
|
||||
|
||||
When ``tools_override`` is supplied, every spawned worker
|
||||
receives that tool list instead of the colony's default. Used
|
||||
by ``run_parallel_workers`` to drop tools whose credentials
|
||||
failed the pre-flight check (so the spawned workers don't
|
||||
waste a startup trying to use them).
|
||||
"""
|
||||
worker_ids: list[str] = []
|
||||
for spec in tasks:
|
||||
@@ -743,6 +788,7 @@ class ColonyRuntime:
|
||||
task=task_text,
|
||||
count=1,
|
||||
input_data=task_data or {"task": task_text},
|
||||
tools=tools_override,
|
||||
)
|
||||
worker_ids.extend(ids)
|
||||
return worker_ids
|
||||
|
||||
@@ -0,0 +1,491 @@
|
||||
"""Per-colony SQLite task queue + progress ledger.
|
||||
|
||||
Every colony gets its own ``progress.db`` under ``~/.hive/colonies/{name}/data/``.
|
||||
The DB holds the colony's task queue plus per-task step and SOP checklist
|
||||
rows. Workers claim tasks atomically, write progress as they execute, and
|
||||
verify SOP gates before marking a task done. This gives cross-run memory
|
||||
that the existing per-iteration stall detectors don't have.
|
||||
|
||||
The DB is driven by agents via the ``sqlite3`` CLI through
|
||||
``execute_command_tool``. This module handles framework-side lifecycle:
|
||||
creation, migration, queen-side bulk seeding, stale-claim reclamation.
|
||||
|
||||
Concurrency model:
|
||||
- WAL mode on from day one so 100 concurrent workers don't serialize.
|
||||
- Workers hold NO long-running connection — they ``sqlite3`` per call,
|
||||
which naturally releases locks between LLM turns.
|
||||
- Atomic claim via ``BEGIN IMMEDIATE; UPDATE tasks SET status='claimed'
|
||||
WHERE id=(SELECT ... LIMIT 1)``. The subquery-form UPDATE runs inside
|
||||
the immediate transaction so racers either win the row or find zero
|
||||
affected rows.
|
||||
- Stale-claim reclaimer runs on host startup: claims older than
|
||||
``stale_after_minutes`` get returned to ``pending`` and the row's
|
||||
``retry_count`` increments. When ``retry_count >= max_retries`` the
|
||||
row is moved to ``failed`` instead.
|
||||
|
||||
All writes go through ``BEGIN IMMEDIATE`` so racing readers see
|
||||
consistent snapshots.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
_SCHEMA_V1 = """
|
||||
CREATE TABLE IF NOT EXISTS tasks (
|
||||
id TEXT PRIMARY KEY,
|
||||
seq INTEGER,
|
||||
priority INTEGER NOT NULL DEFAULT 0,
|
||||
goal TEXT NOT NULL,
|
||||
payload TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
worker_id TEXT,
|
||||
claim_token TEXT,
|
||||
claimed_at TEXT,
|
||||
started_at TEXT,
|
||||
completed_at TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
retry_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_retries INTEGER NOT NULL DEFAULT 3,
|
||||
last_error TEXT,
|
||||
parent_task_id TEXT REFERENCES tasks(id) ON DELETE SET NULL,
|
||||
source TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS steps (
|
||||
id TEXT PRIMARY KEY,
|
||||
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
|
||||
seq INTEGER NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
detail TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
evidence TEXT,
|
||||
worker_id TEXT,
|
||||
started_at TEXT,
|
||||
completed_at TEXT,
|
||||
UNIQUE (task_id, seq)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sop_checklist (
|
||||
id TEXT PRIMARY KEY,
|
||||
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
required INTEGER NOT NULL DEFAULT 1,
|
||||
done_at TEXT,
|
||||
done_by TEXT,
|
||||
note TEXT,
|
||||
UNIQUE (task_id, key)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS colony_meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_claimable
|
||||
ON tasks(status, priority DESC, seq, created_at)
|
||||
WHERE status = 'pending';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_steps_task_seq
|
||||
ON steps(task_id, seq);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sop_required_open
|
||||
ON sop_checklist(task_id, required, done_at);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_status
|
||||
ON tasks(status, updated_at);
|
||||
"""
|
||||
|
||||
_PRAGMAS = (
|
||||
"PRAGMA journal_mode = WAL;",
|
||||
"PRAGMA synchronous = NORMAL;",
|
||||
"PRAGMA foreign_keys = ON;",
|
||||
"PRAGMA busy_timeout = 5000;",
|
||||
)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def _new_id() -> str:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def _connect(db_path: Path) -> sqlite3.Connection:
|
||||
"""Open a connection with the standard pragmas applied.
|
||||
|
||||
WAL mode is sticky on the file once set, so re-applying on every
|
||||
open is cheap. The other pragmas are per-connection and must be
|
||||
set each time.
|
||||
"""
|
||||
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=5.0)
|
||||
for pragma in _PRAGMAS:
|
||||
con.execute(pragma)
|
||||
return con
|
||||
|
||||
|
||||
def ensure_progress_db(colony_dir: Path) -> Path:
|
||||
"""Create or migrate ``{colony_dir}/data/progress.db``.
|
||||
|
||||
Idempotent: safe to call on an already-initialized DB. Returns the
|
||||
absolute path to the DB file.
|
||||
|
||||
Steps:
|
||||
1. Ensure ``data/`` subdir exists.
|
||||
2. Open the DB (creates the file if missing).
|
||||
3. Apply WAL + pragmas.
|
||||
4. Read ``PRAGMA user_version``; if < SCHEMA_VERSION, run the
|
||||
schema block and bump user_version.
|
||||
5. Reclaim any stale claims left from previous runs.
|
||||
6. Patch every ``*.json`` worker config in the colony dir to
|
||||
inject ``input_data.db_path`` and ``input_data.colony_id`` so
|
||||
pre-existing colonies (forked before this feature landed) get
|
||||
the tracker wiring on their next spawn.
|
||||
"""
|
||||
data_dir = Path(colony_dir) / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
db_path = data_dir / "progress.db"
|
||||
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
current_version = con.execute("PRAGMA user_version").fetchone()[0]
|
||||
if current_version < SCHEMA_VERSION:
|
||||
con.executescript(_SCHEMA_V1)
|
||||
con.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
|
||||
con.execute(
|
||||
"INSERT OR REPLACE INTO colony_meta(key, value, updated_at) "
|
||||
"VALUES (?, ?, ?)",
|
||||
("schema_version", str(SCHEMA_VERSION), _now_iso()),
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: initialized schema v%d at %s", SCHEMA_VERSION, db_path
|
||||
)
|
||||
|
||||
reclaimed = _reclaim_stale_inner(con, stale_after_minutes=15)
|
||||
if reclaimed:
|
||||
logger.info(
|
||||
"progress_db: reclaimed %d stale claims at startup (%s)",
|
||||
reclaimed,
|
||||
db_path,
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
resolved_db_path = db_path.resolve()
|
||||
_patch_worker_configs(Path(colony_dir), resolved_db_path)
|
||||
return resolved_db_path
|
||||
|
||||
|
||||
def _patch_worker_configs(colony_dir: Path, db_path: Path) -> int:
|
||||
"""Inject ``input_data.db_path`` + ``input_data.colony_id`` into
|
||||
existing ``worker.json`` files in a colony directory.
|
||||
|
||||
Runs on every ``ensure_progress_db`` call so colonies that were
|
||||
forked before this feature landed get their worker spawn messages
|
||||
patched in place. Idempotent: if ``input_data`` already contains
|
||||
the correct ``db_path``, the file is not rewritten.
|
||||
|
||||
Returns the number of files that were actually modified (0 on
|
||||
the common case of already-patched colonies).
|
||||
"""
|
||||
colony_id = colony_dir.name
|
||||
abs_db = str(db_path)
|
||||
patched = 0
|
||||
|
||||
for worker_cfg in colony_dir.glob("*.json"):
|
||||
# Only patch files that look like worker configs (have the
|
||||
# worker_meta shape). ``metadata.json`` and ``triggers.json``
|
||||
# are colony-level and must not be touched.
|
||||
if worker_cfg.name in ("metadata.json", "triggers.json"):
|
||||
continue
|
||||
try:
|
||||
data = json.loads(worker_cfg.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
if not isinstance(data, dict) or "system_prompt" not in data:
|
||||
# Not a worker config (lacks the worker_meta schema).
|
||||
continue
|
||||
|
||||
input_data = data.get("input_data")
|
||||
if not isinstance(input_data, dict):
|
||||
input_data = {}
|
||||
|
||||
if (
|
||||
input_data.get("db_path") == abs_db
|
||||
and input_data.get("colony_id") == colony_id
|
||||
):
|
||||
continue # already patched
|
||||
|
||||
input_data["db_path"] = abs_db
|
||||
input_data["colony_id"] = colony_id
|
||||
data["input_data"] = input_data
|
||||
|
||||
try:
|
||||
worker_cfg.write_text(
|
||||
json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
|
||||
)
|
||||
patched += 1
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"progress_db: failed to patch worker config %s: %s", worker_cfg, e
|
||||
)
|
||||
|
||||
if patched:
|
||||
logger.info(
|
||||
"progress_db: patched %d worker config(s) in colony '%s' with db_path",
|
||||
patched,
|
||||
colony_id,
|
||||
)
|
||||
return patched
|
||||
|
||||
|
||||
def ensure_all_colony_dbs(colonies_root: Path | None = None) -> list[Path]:
|
||||
"""Idempotently ensure every existing colony has a progress.db.
|
||||
|
||||
Called on framework host startup to backfill older colonies and
|
||||
run the stale-claim reclaimer on all of them in one pass.
|
||||
"""
|
||||
if colonies_root is None:
|
||||
colonies_root = Path.home() / ".hive" / "colonies"
|
||||
if not colonies_root.is_dir():
|
||||
return []
|
||||
|
||||
initialized: list[Path] = []
|
||||
for entry in sorted(colonies_root.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
try:
|
||||
initialized.append(ensure_progress_db(entry))
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"progress_db: failed to ensure DB for colony '%s': %s", entry.name, e
|
||||
)
|
||||
return initialized
|
||||
|
||||
|
||||
def seed_tasks(
|
||||
db_path: Path,
|
||||
tasks: list[dict[str, Any]],
|
||||
*,
|
||||
source: str = "queen_create",
|
||||
) -> list[str]:
|
||||
"""Bulk-insert tasks (with optional nested steps + sop_items).
|
||||
|
||||
Each task dict accepts:
|
||||
- goal: str (required)
|
||||
- seq: int (optional ordering hint)
|
||||
- priority: int (default 0)
|
||||
- payload: dict | str | None (stored as JSON text)
|
||||
- max_retries: int (default 3)
|
||||
- parent_task_id: str | None
|
||||
- steps: list[{"title": str, "detail"?: str}] (optional)
|
||||
- sop_items: list[{"key": str, "description": str, "required"?: bool, "note"?: str}] (optional)
|
||||
|
||||
All rows are inserted in a single BEGIN IMMEDIATE transaction so
|
||||
10k-row seeds finish in one disk flush. Returns the created task ids
|
||||
in the same order as input.
|
||||
"""
|
||||
if not tasks:
|
||||
return []
|
||||
|
||||
created_ids: list[str] = []
|
||||
now = _now_iso()
|
||||
con = _connect(Path(db_path))
|
||||
try:
|
||||
con.execute("BEGIN IMMEDIATE")
|
||||
for idx, task in enumerate(tasks):
|
||||
goal = task.get("goal")
|
||||
if not goal:
|
||||
raise ValueError(f"task[{idx}] missing required 'goal' field")
|
||||
|
||||
task_id = task.get("id") or _new_id()
|
||||
payload = task.get("payload")
|
||||
if payload is not None and not isinstance(payload, str):
|
||||
payload = json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO tasks (
|
||||
id, seq, priority, goal, payload, status,
|
||||
created_at, updated_at, max_retries, parent_task_id, source
|
||||
) VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
task_id,
|
||||
task.get("seq"),
|
||||
int(task.get("priority", 0)),
|
||||
goal,
|
||||
payload,
|
||||
now,
|
||||
now,
|
||||
int(task.get("max_retries", 3)),
|
||||
task.get("parent_task_id"),
|
||||
source,
|
||||
),
|
||||
)
|
||||
|
||||
for step_seq, step in enumerate(task.get("steps") or [], start=1):
|
||||
if not step.get("title"):
|
||||
raise ValueError(
|
||||
f"task[{idx}].steps[{step_seq - 1}] missing required 'title'"
|
||||
)
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO steps (id, task_id, seq, title, detail, status)
|
||||
VALUES (?, ?, ?, ?, ?, 'pending')
|
||||
""",
|
||||
(
|
||||
_new_id(),
|
||||
task_id,
|
||||
step.get("seq", step_seq),
|
||||
step["title"],
|
||||
step.get("detail"),
|
||||
),
|
||||
)
|
||||
|
||||
for sop in task.get("sop_items") or []:
|
||||
key = sop.get("key")
|
||||
description = sop.get("description")
|
||||
if not key or not description:
|
||||
raise ValueError(
|
||||
f"task[{idx}].sop_items missing 'key' or 'description'"
|
||||
)
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO sop_checklist
|
||||
(id, task_id, key, description, required, note)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
_new_id(),
|
||||
task_id,
|
||||
key,
|
||||
description,
|
||||
1 if sop.get("required", True) else 0,
|
||||
sop.get("note"),
|
||||
),
|
||||
)
|
||||
|
||||
created_ids.append(task_id)
|
||||
|
||||
con.execute("COMMIT")
|
||||
except Exception:
|
||||
con.execute("ROLLBACK")
|
||||
raise
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
return created_ids
|
||||
|
||||
|
||||
def enqueue_task(
|
||||
db_path: Path,
|
||||
goal: str,
|
||||
*,
|
||||
steps: list[dict[str, Any]] | None = None,
|
||||
sop_items: list[dict[str, Any]] | None = None,
|
||||
payload: Any = None,
|
||||
priority: int = 0,
|
||||
parent_task_id: str | None = None,
|
||||
source: str = "enqueue_tool",
|
||||
) -> str:
|
||||
"""Append a single task to an existing queue. Thin wrapper over seed_tasks."""
|
||||
ids = seed_tasks(
|
||||
db_path,
|
||||
[
|
||||
{
|
||||
"goal": goal,
|
||||
"steps": steps,
|
||||
"sop_items": sop_items,
|
||||
"payload": payload,
|
||||
"priority": priority,
|
||||
"parent_task_id": parent_task_id,
|
||||
}
|
||||
],
|
||||
source=source,
|
||||
)
|
||||
return ids[0]
|
||||
|
||||
|
||||
def _reclaim_stale_inner(
|
||||
con: sqlite3.Connection, *, stale_after_minutes: int
|
||||
) -> int:
|
||||
"""Reclaim stale claims. Runs inside an existing open connection.
|
||||
|
||||
Two-step:
|
||||
1. Tasks past max_retries go to 'failed' with last_error populated.
|
||||
2. Remaining stale claims return to 'pending', retry_count++.
|
||||
"""
|
||||
cutoff_expr = f"datetime('now', '-{int(stale_after_minutes)} minutes')"
|
||||
|
||||
con.execute("BEGIN IMMEDIATE")
|
||||
try:
|
||||
con.execute(
|
||||
f"""
|
||||
UPDATE tasks
|
||||
SET status = 'failed',
|
||||
last_error = COALESCE(last_error, 'exceeded max_retries after stale claim'),
|
||||
completed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE status IN ('claimed', 'in_progress')
|
||||
AND claimed_at IS NOT NULL
|
||||
AND claimed_at < {cutoff_expr}
|
||||
AND retry_count >= max_retries
|
||||
"""
|
||||
)
|
||||
|
||||
cur = con.execute(
|
||||
f"""
|
||||
UPDATE tasks
|
||||
SET status = 'pending',
|
||||
worker_id = NULL,
|
||||
claim_token = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
retry_count = retry_count + 1,
|
||||
updated_at = datetime('now')
|
||||
WHERE status IN ('claimed', 'in_progress')
|
||||
AND claimed_at IS NOT NULL
|
||||
AND claimed_at < {cutoff_expr}
|
||||
AND retry_count < max_retries
|
||||
"""
|
||||
)
|
||||
reclaimed = cur.rowcount or 0
|
||||
con.execute("COMMIT")
|
||||
return reclaimed
|
||||
except Exception:
|
||||
con.execute("ROLLBACK")
|
||||
raise
|
||||
|
||||
|
||||
def reclaim_stale(db_path: Path, stale_after_minutes: int = 15) -> int:
|
||||
"""Public wrapper that opens its own connection."""
|
||||
con = _connect(Path(db_path))
|
||||
try:
|
||||
return _reclaim_stale_inner(con, stale_after_minutes=stale_after_minutes)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SCHEMA_VERSION",
|
||||
"ensure_progress_db",
|
||||
"ensure_all_colony_dbs",
|
||||
"seed_tasks",
|
||||
"enqueue_task",
|
||||
"reclaim_stale",
|
||||
]
|
||||
@@ -1404,7 +1404,18 @@ class AgentLoader:
|
||||
credential_store=credential_store,
|
||||
)
|
||||
runner._agent_default_skills = None
|
||||
runner._agent_skills = None
|
||||
# Colony workers attached to a SQLite task queue get the
|
||||
# colony-progress-tracker skill pre-activated so its full
|
||||
# claim / step / SOP-gate protocol lands in the system prompt
|
||||
# on turn 0, bypassing the progressive-disclosure catalog
|
||||
# lookup. Triggered by the presence of ``input_data.db_path``
|
||||
# in worker.json (written by fork_session_into_colony and
|
||||
# backfilled by ensure_progress_db for pre-existing colonies).
|
||||
_preactivate: list[str] = []
|
||||
_input_data = first_worker.get("input_data") or {}
|
||||
if isinstance(_input_data, dict) and _input_data.get("db_path"):
|
||||
_preactivate.append("hive.colony-progress-tracker")
|
||||
runner._agent_skills = _preactivate or None
|
||||
return runner
|
||||
|
||||
def register_tool(
|
||||
|
||||
@@ -497,12 +497,22 @@ class ToolRegistry:
|
||||
config["cwd"] = str(resolved_cwd)
|
||||
return config
|
||||
|
||||
# For coder_tools_server, inject --project-root so writes go to the expected workspace
|
||||
# For coder_tools_server, inject --project-root so reads land
|
||||
# in the expected workspace (hive repo, for framework skills
|
||||
# and docs), and inject --write-root so writes land under
|
||||
# ~/.hive/workspace/ instead of polluting the git checkout
|
||||
# with queen-authored skills, ledgers, and scripts. Without
|
||||
# the split, every ``write_file`` call from the queen landed
|
||||
# in the hive repo root.
|
||||
if script_name and "coder_tools" in script_name:
|
||||
project_root = str(resolved_cwd.parent.resolve())
|
||||
args = list(args)
|
||||
if "--project-root" not in args:
|
||||
args.extend(["--project-root", project_root])
|
||||
if "--write-root" not in args:
|
||||
_write_root = Path.home() / ".hive" / "workspace"
|
||||
_write_root.mkdir(parents=True, exist_ok=True)
|
||||
args.extend(["--write-root", str(_write_root)])
|
||||
config["args"] = args
|
||||
|
||||
if os.name == "nt":
|
||||
|
||||
@@ -51,13 +51,18 @@ DEFAULT_EVENT_TYPES = [
|
||||
# Keepalive interval in seconds
|
||||
KEEPALIVE_INTERVAL = 15.0
|
||||
|
||||
# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
|
||||
# publish high-frequency LLM deltas / tool calls that would flood the
|
||||
# user's queen DM chat. We let only this small allowlist of worker
|
||||
# events through to the queen-chat SSE so the frontend can render
|
||||
# fan-out lifecycle and structured fan-in reports without seeing the
|
||||
# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
|
||||
# filter via a dedicated /workers/{worker_id}/events route.
|
||||
# Session-SSE worker filter: workers run outside the queen's DM
|
||||
# chat. Worker activity is observable via the dedicated
|
||||
# ``/api/workers/{worker_id}/events`` per-worker SSE route, not via
|
||||
# the session chat. This keeps the queen↔user conversation clean of
|
||||
# tool-call chatter regardless of whether the worker was spawned by
|
||||
# ``run_agent_with_input`` (stream_id="worker") or
|
||||
# ``run_parallel_workers`` (stream_id="worker:{uuid}").
|
||||
#
|
||||
# Lifecycle events the frontend needs for fan-in summaries
|
||||
# (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED) are still
|
||||
# allowed through so the queen can show "N workers done" surfaces
|
||||
# without exposing the per-turn chatter.
|
||||
_WORKER_EVENT_ALLOWLIST = {
|
||||
EventType.SUBAGENT_REPORT.value,
|
||||
EventType.EXECUTION_COMPLETED.value,
|
||||
@@ -66,9 +71,17 @@ _WORKER_EVENT_ALLOWLIST = {
|
||||
|
||||
|
||||
def _is_worker_noise(evt_dict: dict) -> bool:
|
||||
"""True if the event is a parallel-worker event we should drop."""
|
||||
"""True if the event belongs to a worker stream and should not
|
||||
surface in the queen DM chat.
|
||||
|
||||
Matches any stream starting with ``worker`` — both the bare
|
||||
``"worker"`` tag used by single-worker spawns and the
|
||||
``"worker:{uuid}"`` tag used by parallel fan-outs. The allowlist
|
||||
carves out the three terminal/lifecycle events the UI still
|
||||
needs to render fan-in summaries.
|
||||
"""
|
||||
stream_id = evt_dict.get("stream_id") or ""
|
||||
if not stream_id.startswith("worker:"):
|
||||
if not stream_id.startswith("worker"):
|
||||
return False
|
||||
return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST
|
||||
|
||||
|
||||
@@ -644,6 +644,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
|
||||
body = await request.json()
|
||||
colony_name = body.get("colony_name", "").strip()
|
||||
task = body.get("task", "").strip()
|
||||
tasks = body.get("tasks")
|
||||
|
||||
if not colony_name:
|
||||
return web.json_response({"error": "colony_name is required"}, status=400)
|
||||
@@ -661,6 +662,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
|
||||
session=session,
|
||||
colony_name=colony_name,
|
||||
task=task,
|
||||
tasks=tasks if isinstance(tasks, list) else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("colony_spawn fork failed")
|
||||
@@ -674,6 +676,7 @@ async def fork_session_into_colony(
|
||||
session: Any,
|
||||
colony_name: str,
|
||||
task: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> dict:
|
||||
"""Fork a queen session into a colony directory.
|
||||
|
||||
@@ -690,8 +693,14 @@ async def fork_session_into_colony(
|
||||
the colony resumes with the queen's entire conversation history.
|
||||
3. Multiple independent sessions can be created against the same colony,
|
||||
giving parallel execution capacity without separate worker configs.
|
||||
4. Initializes (or ensures) ``data/progress.db`` — the colony's SQLite
|
||||
task queue + progress ledger. When *tasks* is provided, the queen-
|
||||
authored task batch is seeded into the queue in one transaction.
|
||||
The absolute DB path is threaded into the worker's ``input_data``
|
||||
so spawned workers see it in their first user message.
|
||||
|
||||
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new"}``.
|
||||
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new",
|
||||
"db_path", "task_ids"}``.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
@@ -700,7 +709,8 @@ async def fork_session_into_colony(
|
||||
from pathlib import Path
|
||||
|
||||
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
|
||||
from framework.agent_loop.types import AgentContext
|
||||
from framework.agent_loop.types import AgentContext, AgentSpec
|
||||
from framework.host.progress_db import ensure_progress_db, seed_tasks
|
||||
from framework.server.session_manager import _queen_session_dir
|
||||
|
||||
queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
|
||||
@@ -711,6 +721,49 @@ async def fork_session_into_colony(
|
||||
colony_dir.mkdir(parents=True, exist_ok=True)
|
||||
(colony_dir / "data").mkdir(exist_ok=True)
|
||||
|
||||
# ── 0. Ensure the colony's progress DB exists and seed tasks ──
|
||||
# Runs before worker.json is written so the DB path can be threaded
|
||||
# into input_data. Idempotent on reruns of the same colony name.
|
||||
db_path = await asyncio.to_thread(ensure_progress_db, colony_dir)
|
||||
seeded_task_ids: list[str] = []
|
||||
if tasks:
|
||||
seeded_task_ids = await asyncio.to_thread(
|
||||
seed_tasks, db_path, tasks, source="queen_create"
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: seeded %d task(s) into colony '%s'",
|
||||
len(seeded_task_ids),
|
||||
colony_name,
|
||||
)
|
||||
elif task and task.strip():
|
||||
# Phase 2 auto-seed: when the queen uses the simple single-task
|
||||
# form of create_colony (no explicit ``tasks=[{...}]`` list),
|
||||
# insert exactly one row so the first worker spawned into this
|
||||
# colony has something to claim. Without this the queue is
|
||||
# empty and the worker falls back to executing from the chat
|
||||
# spawn message, defeating the cross-run durability the tracker
|
||||
# exists for.
|
||||
try:
|
||||
seeded_task_ids = await asyncio.to_thread(
|
||||
seed_tasks,
|
||||
db_path,
|
||||
[{"goal": task.strip()}],
|
||||
source="create_colony_auto",
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: auto-seeded 1 task into colony '%s' "
|
||||
"(task_id=%s, from single-task create_colony form)",
|
||||
colony_name,
|
||||
seeded_task_ids[0] if seeded_task_ids else "?",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"progress_db: auto-seed failed for colony '%s' (continuing "
|
||||
"without a pre-seeded row): %s",
|
||||
colony_name,
|
||||
exc,
|
||||
)
|
||||
|
||||
# Fixed worker name -- sessions are the unit of parallelism, not workers
|
||||
worker_name = "worker"
|
||||
|
||||
@@ -772,10 +825,26 @@ async def fork_session_into_colony(
|
||||
# worker is not Charlotte / Alexandra / etc., it is a task executor.
|
||||
# Inheriting the queen's persona made the worker greet the user in
|
||||
# first person with no memory of the task it was actually given.
|
||||
# Thread the first seeded task_id into input_data so the worker's
|
||||
# first claim pins to a specific row (skill's assigned-task-id
|
||||
# branch). When multiple tasks were seeded we only pin the first —
|
||||
# subsequent workers (via run_agent_with_input or parallel spawns)
|
||||
# get their own task_id assigned at spawn time.
|
||||
_worker_input_data: dict[str, Any] = {
|
||||
"db_path": str(db_path),
|
||||
"colony_id": colony_name,
|
||||
}
|
||||
if seeded_task_ids:
|
||||
_worker_input_data["task_id"] = seeded_task_ids[0]
|
||||
|
||||
worker_meta = {
|
||||
"name": worker_name,
|
||||
"version": "1.0.0",
|
||||
"description": f"Worker clone from queen session {session.id}",
|
||||
# Colony progress tracker: worker sees these in its first user
|
||||
# message via _format_spawn_task_message. The colony-progress-
|
||||
# tracker default skill teaches the worker how to use them.
|
||||
"input_data": _worker_input_data,
|
||||
"goal": {
|
||||
"description": worker_task,
|
||||
"success_criteria": [],
|
||||
@@ -907,6 +976,8 @@ async def fork_session_into_colony(
|
||||
"colony_name": colony_name,
|
||||
"queen_session_id": colony_session_id,
|
||||
"is_new": is_new,
|
||||
"db_path": str(db_path),
|
||||
"task_ids": seeded_task_ids,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -686,6 +686,10 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
|
||||
return web.json_response({"colonies": colonies})
|
||||
|
||||
|
||||
_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
|
||||
_EVENTS_HISTORY_MAX_LIMIT = 10000
|
||||
|
||||
|
||||
async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
"""GET /api/sessions/{session_id}/events/history — persisted eventbus log.
|
||||
|
||||
@@ -693,17 +697,58 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
both live sessions and cold (post-server-restart) sessions. The frontend
|
||||
replays these events through ``sseEventToChatMessage`` to fully reconstruct
|
||||
the UI state on resume.
|
||||
|
||||
Query params:
|
||||
limit: maximum number of events to return (default 2000, max 10000).
|
||||
The TAIL of the file is returned — i.e. the most recent N events.
|
||||
Older events are dropped and ``truncated`` is set to True.
|
||||
|
||||
Response shape::
|
||||
|
||||
{
|
||||
"events": [...], # up to ``limit`` events, oldest-first
|
||||
"session_id": "...",
|
||||
"total": 12345, # total events in the file
|
||||
"returned": 2000, # len(events)
|
||||
"truncated": true, # total > returned
|
||||
"limit": 2000, # the effective limit used
|
||||
}
|
||||
|
||||
``events.jsonl`` is append-only chronological, so "last N lines" == "most
|
||||
recent N events". Long-running colonies have produced files with 50k+
|
||||
events; before this cap, restoring on page-mount shipped the whole thing
|
||||
down the wire and blocked the UI for seconds.
|
||||
"""
|
||||
session_id = request.match_info["session_id"]
|
||||
|
||||
try:
|
||||
limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
|
||||
except ValueError:
|
||||
limit = _EVENTS_HISTORY_DEFAULT_LIMIT
|
||||
limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
|
||||
|
||||
from framework.server.session_manager import _find_queen_session_dir
|
||||
|
||||
queen_dir = _find_queen_session_dir(session_id)
|
||||
events_path = queen_dir / "events.jsonl"
|
||||
if not events_path.exists():
|
||||
return web.json_response({"events": [], "session_id": session_id})
|
||||
return web.json_response(
|
||||
{
|
||||
"events": [],
|
||||
"session_id": session_id,
|
||||
"total": 0,
|
||||
"returned": 0,
|
||||
"truncated": False,
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
|
||||
events: list[dict] = []
|
||||
# Tail the file using a bounded deque — O(limit) memory regardless
|
||||
# of file size. No need to materialize the whole list only to slice it.
|
||||
from collections import deque
|
||||
|
||||
tail: deque[dict] = deque(maxlen=limit)
|
||||
total = 0
|
||||
try:
|
||||
with open(events_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
@@ -711,13 +756,34 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
evt = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
total += 1
|
||||
tail.append(evt)
|
||||
except OSError:
|
||||
return web.json_response({"events": [], "session_id": session_id})
|
||||
return web.json_response(
|
||||
{
|
||||
"events": [],
|
||||
"session_id": session_id,
|
||||
"total": 0,
|
||||
"returned": 0,
|
||||
"truncated": False,
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
|
||||
return web.json_response({"events": events, "session_id": session_id})
|
||||
events = list(tail)
|
||||
return web.json_response(
|
||||
{
|
||||
"events": events,
|
||||
"session_id": session_id,
|
||||
"total": total,
|
||||
"returned": len(events),
|
||||
"truncated": total > len(events),
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def handle_session_history(request: web.Request) -> web.Response:
|
||||
|
||||
@@ -139,6 +139,24 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.warning("v2 migration failed (non-fatal)", exc_info=True)
|
||||
|
||||
# Ensure every existing colony has an up-to-date progress.db
|
||||
# (schema v1, WAL mode) and reclaim any stale claims left behind
|
||||
# by crashed workers from the previous run. Idempotent and
|
||||
# fast; runs synchronously because the event loop hasn't
|
||||
# started yet at __init__ time.
|
||||
from framework.host.progress_db import ensure_all_colony_dbs
|
||||
|
||||
try:
|
||||
ensured = ensure_all_colony_dbs()
|
||||
if ensured:
|
||||
logger.info(
|
||||
"progress_db: ensured %d colony DB(s) at startup", len(ensured)
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"progress_db: backfill at startup failed (non-fatal)", exc_info=True
|
||||
)
|
||||
|
||||
def build_llm(self, model: str | None = None):
|
||||
"""Construct an LLM provider using the server's configured defaults."""
|
||||
from framework.config import RuntimeConfig, get_hive_config
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
---
|
||||
name: hive.batch-ledger
|
||||
description: Track per-item status when processing collections to prevent skipped or duplicated items.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Batch Progress Ledger
|
||||
|
||||
When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
|
||||
|
||||
Initialize when you identify the batch:
|
||||
- `_batch_total`: total item count
|
||||
- `_batch_ledger`: JSON with per-item status
|
||||
|
||||
Per-item statuses: pending → in_progress → completed|failed|skipped
|
||||
|
||||
- Set `in_progress` BEFORE processing
|
||||
- Set final status AFTER processing with 1-line result_summary
|
||||
- Include error reason for failed/skipped items
|
||||
- Update aggregate counts after each item
|
||||
- NEVER remove items from the ledger
|
||||
- If resuming, skip items already marked completed
|
||||
@@ -61,6 +61,7 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select
|
||||
### Empirically verified (2026-04-11)
|
||||
|
||||
Tested against `https://www.reddit.com/r/programming/` whose search input lives at:
|
||||
|
||||
```
|
||||
document > reddit-search-large [shadow]
|
||||
> faceplate-search-input#search-input [shadow]
|
||||
@@ -95,13 +96,13 @@ All return real URLs and titles. On a fast page `navigate(wait_until="load")` re
|
||||
|
||||
### Timing expectations (measured against real sites)
|
||||
|
||||
| Site | Navigate load time |
|
||||
|---|---|
|
||||
| example.com | 100–400 ms |
|
||||
| wikipedia.org | 200–500 ms |
|
||||
| reddit.com | 1.5–2 s |
|
||||
| x.com/twitter | 1.2–1.6 s |
|
||||
| linkedin.com (logged in) | 4–5 s |
|
||||
| Site | Navigate load time |
|
||||
| ------------------------ | ------------------ |
|
||||
| example.com | 100–400 ms |
|
||||
| wikipedia.org | 200–500 ms |
|
||||
| reddit.com | 1.5–2 s |
|
||||
| x.com/twitter | 1.2–1.6 s |
|
||||
| linkedin.com (logged in) | 4–5 s |
|
||||
|
||||
For LinkedIn and other heavy SPAs, rely on `sleep()` after navigation to let the page hydrate.
|
||||
|
||||
@@ -124,7 +125,7 @@ Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in
|
||||
|
||||
Why this is necessary:
|
||||
|
||||
- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for *native* pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
|
||||
- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for _native_ pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
|
||||
- **Draft.js** (X/Twitter compose) and **Lexical** (Gmail, LinkedIn DMs) use contenteditable divs with immutable editor state. They only enter "edit mode" after a real click on the editor surface. Typing at them without clicking routes keys to `document.body` or gets silently discarded.
|
||||
- **Send/submit buttons are bound to framework state**, not DOM state. They're typically `disabled={!hasRealContent}` where `hasRealContent` is computed from React/Vue/Svelte state. The input field can have characters in the DOM but the button stays disabled because the framework never saw a real input event.
|
||||
|
||||
@@ -171,16 +172,16 @@ Always include an equivalent cleanup block in any script that types into a compo
|
||||
|
||||
### Verified site-specific quirks
|
||||
|
||||
| Site | Editor | Workaround |
|
||||
|---|---|---|
|
||||
| **X / Twitter** compose | Draft.js | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
|
||||
| **LinkedIn** messaging | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`. |
|
||||
| **LinkedIn** feed post composer | Quill/LinkedIn custom | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type. |
|
||||
| **Reddit** comment/post box | ProseMirror | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer. |
|
||||
| **Gmail** compose | Lexical | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window. |
|
||||
| **Slack** message box | contenteditable | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`. |
|
||||
| **Discord** | Slate | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing. |
|
||||
| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea. |
|
||||
| Site | Editor | Workaround |
|
||||
| ---------------------------------------------------- | ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **X / Twitter** compose | Draft.js | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
|
||||
| **LinkedIn** messaging | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`. |
|
||||
| **LinkedIn** feed post composer | Quill/LinkedIn custom | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type. |
|
||||
| **Reddit** comment/post box | ProseMirror | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer. |
|
||||
| **Gmail** compose | Lexical | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window. |
|
||||
| **Slack** message box | contenteditable | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`. |
|
||||
| **Discord** | Slate | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing. |
|
||||
| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea. |
|
||||
|
||||
### Plain text into a real input
|
||||
|
||||
@@ -247,6 +248,7 @@ The highlight overlay stays visible on the page for **10 seconds** after each in
|
||||
- Popup appeared that you didn't need? Close it immediately
|
||||
|
||||
`browser_tabs` returns an `origin` field for each tab:
|
||||
|
||||
- `"agent"` — you opened it; you own it; close it when done
|
||||
- `"popup"` — opened by a link or script; close after extracting what you need
|
||||
- `"startup"` or `"user"` — leave these alone unless the task requires it
|
||||
@@ -259,22 +261,22 @@ The bridge automatically evicts per-tab state (`_cdp_attached`, `_interaction_hi
|
||||
|
||||
### LinkedIn
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Global search input | `input[data-testid='typeahead-input']` |
|
||||
| Own profile link | `a[href*='linkedin.com/in/']` |
|
||||
| Messaging overlay | `#interop-outlet >>> [aria-label]` (use shadow_query) |
|
||||
| Target | Selector |
|
||||
| ------------------- | ----------------------------------------------------- |
|
||||
| Global search input | `input[data-testid='typeahead-input']` |
|
||||
| Own profile link | `a[href*='linkedin.com/in/']` |
|
||||
| Messaging overlay | `#interop-outlet >>> [aria-label]` (use shadow_query) |
|
||||
|
||||
LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `browser_evaluate` that uses `innerHTML = "<...>"` will be **silently dropped** — the wrapper element gets added but its content is empty, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection on LinkedIn. `style.cssText`, `textContent`, and `.value` assignments are fine (they don't go through the Trusted Types sink).
|
||||
|
||||
### Reddit (new reddit / shreddit)
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Target | Selector |
|
||||
| --------------------- | ---------------------------------------------------------------------------- |
|
||||
| Search input (shadow) | `reddit-search-large >>> #search-input` (rect only; type via click-to-focus) |
|
||||
| Reddit logo (home) | `#reddit-logo` |
|
||||
| Subreddit posts | `shreddit-post` custom elements |
|
||||
| Create post button | `a[href*='/submit']` |
|
||||
| Reddit logo (home) | `#reddit-logo` |
|
||||
| Subreddit posts | `shreddit-post` custom elements |
|
||||
| Create post button | `a[href*='/submit']` |
|
||||
|
||||
Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:
|
||||
|
||||
@@ -285,15 +287,15 @@ Reddit's search input lives **two shadow levels deep** inside `reddit-search-lar
|
||||
|
||||
### X / Twitter
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Main search input | `input[data-testid='SearchBox_Search_Input']` |
|
||||
| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
|
||||
| Post text area (compose) | `[data-testid='tweetTextarea_0']` |
|
||||
| Reply buttons on feed | `[data-testid='reply']` |
|
||||
| Post / Tweet submit button | `[data-testid='tweetButton']` |
|
||||
| Caret (⋯) menu on a post | `[data-testid='caret']` |
|
||||
| Confirmation sheet button | `[data-testid='confirmationSheetConfirm']` |
|
||||
| Target | Selector |
|
||||
| -------------------------- | --------------------------------------------- |
|
||||
| Main search input | `input[data-testid='SearchBox_Search_Input']` |
|
||||
| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
|
||||
| Post text area (compose) | `[data-testid='tweetTextarea_0']` |
|
||||
| Reply buttons on feed | `[data-testid='reply']` |
|
||||
| Post / Tweet submit button | `[data-testid='tweetButton']` |
|
||||
| Caret (⋯) menu on a post | `[data-testid='caret']` |
|
||||
| Confirmation sheet button | `[data-testid='confirmationSheetConfirm']` |
|
||||
|
||||
**X uses Draft.js for the compose text editor**, which does NOT accept synthetic input reliably. Working workaround: `browser_type(selector='[data-testid="tweetTextarea_0"]', text="...", delay_ms=20)`. The delay gives Draft.js time to process each keystroke. The first 1–2 characters may still get eaten — accept minor truncation or prepend a throwaway character. After typing, check `[data-testid="tweetButton"]` has `disabled: false` before clicking submit.
|
||||
|
||||
@@ -366,17 +368,35 @@ If Chrome detaches the debugger for its own reasons (tab closed, user opened Dev
|
||||
|
||||
If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.
|
||||
|
||||
## When to reach for `browser_evaluate`
|
||||
## `browser_evaluate` is a last-resort escape hatch
|
||||
|
||||
Use it when:
|
||||
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
|
||||
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
|
||||
- You need to walk an AX tree or measure layout that the standard tools don't expose
|
||||
**Before using `browser_evaluate`, try these first — in this order:**
|
||||
|
||||
Avoid it when:
|
||||
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
|
||||
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
|
||||
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
|
||||
1. **`browser_screenshot` + `browser_click_coordinate`** — works on every site regardless of shadow DOM, iframes, obfuscated classes. This is the default path for "click a thing you can see."
|
||||
2. **`browser_type(use_insert_text=True, text=...)`** — for typing into ANY input/contenteditable, including Lexical and Draft.js. Handles click-focus-insert with built-in retries. Do **not** call `document.execCommand('insertText')` via evaluate; this tool already does it correctly.
|
||||
3. **`browser_shadow_query`** or **`browser_get_rect(selector)`** with the `>>>` shadow-piercing syntax — for selector-based lookups across shadow roots.
|
||||
4. **`browser_get_text` / `browser_get_attribute`** — for reading element state by selector.
|
||||
5. **`browser_snapshot`** — for dumping the accessibility tree of the page.
|
||||
|
||||
If all five of those fit your goal, **do not use `browser_evaluate`.** Each evaluate call is a small LLM round-trip of ~30-100 tokens of JS plus a JSON response; five of them burn more context than a single screenshot-and-coordinate does, with less reliability.
|
||||
|
||||
### Anti-patterns — stop immediately if you catch yourself doing these
|
||||
|
||||
- **Trying multiple `querySelectorAll` variants when the first returned `[]`.** Different selectors on the same page rarely work if the first guess failed — modern SPAs obfuscate class names at build time. After one empty result, switch to `browser_screenshot` + `browser_click_coordinate`. Do not write `.artdeco-list__item`, then `[data-test-incoming-invitation-card]`, then `[class*="invitation"]` — you are already on the wrong path.
|
||||
- **Writing `walk(root)` recursive shadow-DOM traversal functions.** Use `browser_shadow_query` — it traverses at the CDP level (native C++), not by re-running a recursive JS function every call.
|
||||
- **Calling `document.execCommand('insertText', ...)` to type into a contenteditable.** Use `browser_type(use_insert_text=True, text='...')`. The high-level tool handles the exact same Lexical/Draft.js case but with click-focus-retry logic built in.
|
||||
- **Accessing `iframe.contentDocument`.** Rarely works (cross-origin, late hydration) and when it does, the code is brittle. Use `browser_screenshot` to see the iframe, then `browser_click_coordinate` to interact.
|
||||
- **Using `innerHTML = "<...>"` on a Trusted Types site (LinkedIn, GitHub).** The assignment is silently dropped. Use `createElement` + `appendChild` if you must inject DOM — but first, ask whether you really need to.
|
||||
- **Triggering React/Vue state via synthetic `dispatchEvent`.** Frameworks watch for real browser events. Use `browser_click_coordinate`, `browser_press`, or `browser_type` — all go through CDP's native event pipeline.
|
||||
|
||||
### Legitimate uses (when nothing semantic fits)
|
||||
|
||||
- Reading a computed style, `window.innerWidth/Height`, `document.scrollingElement.scrollTop`, or other layout values the tools don't expose.
|
||||
- Firing a one-shot site-specific API call (analytics beacon, feature-flag toggle).
|
||||
- Stripping `onbeforeunload` before navigating away from a page with an unsent draft (LinkedIn, Gmail).
|
||||
- Detecting whether a specific shadow-root host exists before a follow-up screenshot.
|
||||
|
||||
In all of these cases the script is SHORT (< 10 lines) and the result is CONSUMED (read, then acted on), not further probed.
|
||||
|
||||
## Login & auth walls
|
||||
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
---
|
||||
name: hive.colony-progress-tracker
|
||||
description: Claim tasks, record step progress, and verify SOP gates in the colony SQLite queue. Applies when your spawn message includes a db_path field.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Colony Progress Tracker
|
||||
|
||||
**Applies when** your spawn message has `db_path:` and `colony_id:` fields. The DB is your durable working memory — tells you what's done, what to skip, which SOP gates you owe.
|
||||
|
||||
Access via `execute_command_tool` running `sqlite3 "<db_path>" "..."`. Tables: `tasks` (queue), `steps` (per-task decomposition), `sop_checklist` (hard gates).
|
||||
|
||||
### Claim: assigned task (check this FIRST)
|
||||
|
||||
If your spawn message includes a `task_id:` field, the queen pre-assigned a specific row to you. Claim that row by id — **do not** use the generic next-pending pattern below:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" <<'SQL'
|
||||
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'), updated_at=datetime('now')
|
||||
WHERE id='<task_id>' AND status='pending'
|
||||
RETURNING id, goal, payload;
|
||||
SQL
|
||||
```
|
||||
|
||||
Empty output → another worker raced you or the row is already done. Stop and report. Non-empty → that row is yours, proceed to "Load the plan".
|
||||
|
||||
### Claim: next pending (fallback when no task_id is assigned)
|
||||
|
||||
If your spawn message did NOT include `task_id:` — you are a generic fan-out worker racing on a shared queue. Use the generic next-pending claim:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" <<'SQL'
|
||||
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'), updated_at=datetime('now')
|
||||
WHERE id=(SELECT id FROM tasks WHERE status='pending'
|
||||
ORDER BY priority DESC, seq, created_at LIMIT 1)
|
||||
RETURNING id, goal, payload;
|
||||
SQL
|
||||
```
|
||||
|
||||
Empty output → queue drained, exit. Otherwise the returned `id` is yours. **Never SELECT-then-UPDATE** — races.
|
||||
|
||||
### Load the plan
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "SELECT seq, id, title, status FROM steps WHERE task_id='<task-id>' ORDER BY seq;"
|
||||
sqlite3 "<db_path>" "SELECT key, description, required, done_at FROM sop_checklist WHERE task_id='<task-id>';"
|
||||
```
|
||||
|
||||
**Skip any step where status='done'.** That's the point — don't redo completed work.
|
||||
|
||||
### Execute a step
|
||||
|
||||
Before tool calls:
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE steps SET status='in_progress', worker_id='<worker-id>', started_at=datetime('now') WHERE id='<step-id>';"
|
||||
```
|
||||
After success (one-line evidence: path, URL, key result):
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE steps SET status='done', evidence='<what you did>', completed_at=datetime('now') WHERE id='<step-id>';"
|
||||
```
|
||||
|
||||
### MANDATORY: SOP gate check before marking task done
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "SELECT key, description FROM sop_checklist WHERE task_id='<task-id>' AND required=1 AND done_at IS NULL;"
|
||||
```
|
||||
|
||||
- Empty → proceed to "Mark task done".
|
||||
- Non-empty → each row is work you still owe. Do it, then check it off:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE sop_checklist SET done_at=datetime('now'), done_by='<worker-id>', note='<why>' WHERE task_id='<task-id>' AND key='<key>';"
|
||||
```
|
||||
|
||||
**Never mark a task done while this SELECT returns rows.** This gate exists specifically to stop you from declaring success while skipping required steps.
|
||||
|
||||
### Mark task done / failed
|
||||
|
||||
```bash
|
||||
# Success:
|
||||
sqlite3 "<db_path>" "UPDATE tasks SET status='done', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
|
||||
|
||||
# Unrecoverable failure:
|
||||
sqlite3 "<db_path>" "UPDATE tasks SET status='failed', last_error='<one sentence>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
|
||||
```
|
||||
|
||||
The `AND worker_id=?` guard means a reclaimed row won't accept your write — treat zero rows affected as "your claim was revoked, stop."
|
||||
|
||||
### Loop
|
||||
|
||||
After done/failed → claim the next task. Exit only when claim returns empty.
|
||||
|
||||
### Errors + debug
|
||||
|
||||
- **"database is locked"**: retry with 100ms → 1s backoff, max 5 attempts. `busy_timeout=5000` handles most contention silently.
|
||||
- **Queue health**: `SELECT status, count(*) FROM tasks GROUP BY status;`
|
||||
- **Your in-flight work**: `SELECT id, goal, status FROM tasks WHERE worker_id='<worker-id>';`
|
||||
|
||||
### Anti-patterns (will break the queue)
|
||||
|
||||
- Don't DDL (CREATE/ALTER/DROP).
|
||||
- Don't DELETE — failed tasks stay as `failed` for audit.
|
||||
- Don't skip Protocol 4 (SOP gate) before marking done.
|
||||
- Don't hold a task >15min without updates — the stale-claim reclaimer revokes your claim.
|
||||
- Don't invent task IDs. Workers update existing rows; only the queen enqueues new ones.
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.context-preservation
|
||||
description: Proactively preserve critical information before automatic context pruning destroys it.
|
||||
description: Proactively extract critical values from tool results into working notes before automatic context pruning destroys them.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -8,17 +8,16 @@ metadata:
|
||||
|
||||
## Operational Protocol: Context Preservation
|
||||
|
||||
You operate under a finite context window. Important information WILL be pruned.
|
||||
You operate under a finite context window. Older tool results WILL be pruned. Extract what you need while it's still in context.
|
||||
|
||||
Save-As-You-Go: After any tool call producing information you'll need later,
|
||||
immediately extract key data into `_working_notes` or `_preserved_data`.
|
||||
Do NOT rely on referring back to old tool results.
|
||||
**Save-as-you-go.** After any tool call producing information you'll need later, immediately extract the key data into `_working_notes` or `_preserved_data`. Do not rely on referring back to old tool results — once they're pruned they're gone.
|
||||
|
||||
What to extract: URLs and key snippets (not full pages), relevant API fields
|
||||
(not raw JSON), specific lines/values (not entire files), analysis results
|
||||
(not raw data).
|
||||
**What to extract:**
|
||||
- URLs and key snippets (not full pages)
|
||||
- Relevant API fields (not raw JSON blobs)
|
||||
- Specific lines, values, or IDs (not entire files)
|
||||
- Analysis conclusions (not raw data)
|
||||
|
||||
Before transitioning to the next phase/node, write a handoff summary to
|
||||
`_handoff_context` with everything the next phase needs to know.
|
||||
**Handoffs between tasks** happen through `progress.db`, not through shared-buffer handoff blobs. When you finish a task, any state the next worker needs goes into the task row itself (`steps.evidence`, `tasks.last_error`, `sop_checklist.note`) — see `hive.colony-progress-tracker`. Use `_working_notes` for things the DB schema doesn't cover.
|
||||
|
||||
You will receive an alert when context reaches {{warn_at_usage_ratio_pct}}% — preserve immediately.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.error-recovery
|
||||
description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
|
||||
description: Follow a structured recovery decision tree when tool calls fail instead of blindly retrying or giving up.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -10,9 +10,20 @@ metadata:
|
||||
|
||||
When a tool call fails:
|
||||
|
||||
1. Diagnose — record error in notes, classify as transient or structural
|
||||
2. Decide — transient: retry once. Structural fixable: fix and retry.
|
||||
Structural unfixable: record as failed, move to next item.
|
||||
Blocking all progress: record escalation note.
|
||||
3. Adapt — if same tool failed {{max_retries_per_tool}}+ times, stop using it and find alternative.
|
||||
Update plan in notes. Never silently drop the failed item.
|
||||
1. **Diagnose** — classify the failure as *transient* (network blip, rate limit, timeout) or *structural* (wrong selector, missing auth, invalid schema, permission denied).
|
||||
|
||||
2. **Decide:**
|
||||
- Transient → retry once.
|
||||
- Structural + fixable → fix the input and retry.
|
||||
- Structural + unfixable → record the failure and move to the next item.
|
||||
- Blocking all progress → escalate.
|
||||
|
||||
3. **Adapt** — if the same tool has failed {{max_retries_per_tool}}+ times in a row, stop using it and find an alternative approach.
|
||||
|
||||
**Never silently drop a failed item.** If the item is a task in the colony queue, write the failure to the DB instead of an in-memory buffer:
|
||||
|
||||
```bash
|
||||
sqlite3 "$DB_PATH" "UPDATE tasks SET status='failed', last_error='<one-sentence reason>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<your-worker-id>';"
|
||||
```
|
||||
|
||||
The `tasks.retry_count` column and the stale-claim reclaimer handle auto-retry for crashes; your job is the within-run decision tree above. See `hive.colony-progress-tracker` for the full queue protocol.
|
||||
|
||||
@@ -15,6 +15,28 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow
|
||||
|
||||
**Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`/`browser_type_focused`, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.
|
||||
|
||||
## Rule #0: screenshot + coordinates, not selectors
|
||||
|
||||
LinkedIn changes class names aggressively and hides composers inside shadow roots AND iframes. **Selectors break constantly.** Your default strategy on every LinkedIn page should be:
|
||||
|
||||
1. `browser_screenshot()` — see the page visually
|
||||
2. Pick the target's position from the image
|
||||
3. `browser_coords(image_x, image_y)` → get CSS pixels
|
||||
4. `browser_click_coordinate(css_x, css_y)` — reaches shadow DOM, iframes, and React elements indifferently
|
||||
5. `browser_type(use_insert_text=True, text=...)` — types into whatever is focused, including Lexical composers
|
||||
|
||||
**If `browser_evaluate(...querySelectorAll...)` returns `[]` even once, do not try a different selector.** Stop, screenshot, and click. The "what if I try `.artdeco-list__item` next" instinct has burned ~50 tool calls in real sessions before the agent pivoted. Don't fall into that loop.
|
||||
|
||||
The selectors in the table below are **only** for when you already know the target is in the light DOM and you want a faster path than screenshot+coord. **When in doubt, default to coordinates.**
|
||||
|
||||
## Invitation manager — inline message button path is BROKEN
|
||||
|
||||
If the user asks to message a connection request **from the invitation manager page without accepting first**, the inline "Message" button opens a composer inside a nested **iframe overlay** (not a shadow root). The iframe's `contentDocument` is either cross-origin-blocked or not hydrated at access time. This path is **not reliably automatable today.**
|
||||
|
||||
**Redirect:** click the person's name/profile link on the card, go to the profile page, and use the standard Profile Message flow below. The profile flow is battle-tested; the inline-iframe flow isn't.
|
||||
|
||||
If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `browser_evaluate`, you've hit this trap. Stop and go to the profile page.
|
||||
|
||||
## Timing expectations
|
||||
|
||||
- `browser_navigate(wait_until="load")` — LinkedIn takes **4–5 seconds** to load the feed cold.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.note-taking
|
||||
description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
|
||||
description: Maintain a free-form scratchpad of decisions, extracted values, and open questions so context pruning doesn't lose anything you still need.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -8,20 +8,21 @@ metadata:
|
||||
|
||||
## Operational Protocol: Structured Note-Taking
|
||||
|
||||
Maintain structured working notes in shared buffer key `_working_notes`.
|
||||
Maintain free-form working notes in shared buffer key `_working_notes` for data that *you* need to remember but that isn't captured by the colony task queue.
|
||||
|
||||
**Do not duplicate the queue in here.** Per-task goal, ordered steps, and SOP gates live in `progress.db` — use `hive.colony-progress-tracker` for those. These notes are for things the DB schema doesn't cover.
|
||||
|
||||
Update at these checkpoints:
|
||||
|
||||
- After completing each discrete subtask or batch item
|
||||
- After receiving new information that changes your plan
|
||||
- Before any tool call that will produce substantial output
|
||||
- After receiving new information that changes how you plan to approach the current step
|
||||
- Before any tool call that will produce substantial output you'll need to reference later
|
||||
- When you make a non-obvious decision whose *why* would be lost if the tool call history gets pruned
|
||||
|
||||
Structure:
|
||||
|
||||
### Objective — restate the goal
|
||||
### Current Plan — numbered steps, mark completed with ✓
|
||||
### Key Decisions — decisions made and WHY
|
||||
### Working Data — intermediate results, extracted values
|
||||
### Open Questions — uncertainties to verify
|
||||
### Blockers — anything preventing progress
|
||||
### Working Data — intermediate results, extracted values (URLs, IDs, key snippets — not full pages)
|
||||
### Open Questions — uncertainties you plan to verify
|
||||
### Blockers — anything preventing progress that isn't already captured in `tasks.last_error`
|
||||
|
||||
Update incrementally — do not rewrite from scratch each time.
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
name: hive.task-decomposition
|
||||
description: Decompose complex tasks into explicit subtasks before diving in.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Task Decomposition
|
||||
|
||||
Before starting a complex task:
|
||||
|
||||
1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
|
||||
2. Estimate — relative effort per subtask (small/medium/large)
|
||||
3. Execute — work through in order, mark ✓ when complete
|
||||
4. Budget — if running low on iterations, prioritize by impact
|
||||
5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
|
||||
@@ -36,8 +36,8 @@ class SkillsConfig:
|
||||
# Default skill configuration
|
||||
default_skills = {
|
||||
"hive.note-taking": {"enabled": True},
|
||||
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
|
||||
"hive.quality-monitor": {"enabled": False},
|
||||
"hive.quality-monitor": {"enabled": False, "assessment_interval": 10},
|
||||
"hive.error-recovery": {"max_retries_per_tool": 5},
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
@@ -24,34 +24,21 @@ _SKILL_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
"hive.quality-monitor": {"assessment_interval": 5},
|
||||
"hive.error-recovery": {"max_retries_per_tool": 3},
|
||||
"hive.context-preservation": {"warn_at_usage_ratio_pct": 45},
|
||||
"hive.batch-ledger": {"checkpoint_every_n": 5},
|
||||
}
|
||||
|
||||
# Keywords that indicate a batch processing scenario (DS-12)
|
||||
_BATCH_KEYWORDS: tuple[str, ...] = (
|
||||
"list of",
|
||||
"collection of",
|
||||
"set of",
|
||||
"batch of",
|
||||
"each item",
|
||||
"for each",
|
||||
"process all",
|
||||
"records",
|
||||
"entries",
|
||||
"rows",
|
||||
"items",
|
||||
)
|
||||
|
||||
_BATCH_INIT_NUDGE = (
|
||||
"Note: your input appears to describe a batch operation. "
|
||||
"Initialize `_batch_ledger` with the total item count before processing."
|
||||
)
|
||||
|
||||
|
||||
def is_batch_scenario(text: str) -> bool:
|
||||
"""Return True if *text* contains batch-processing indicators (DS-12)."""
|
||||
lower = text.lower()
|
||||
return any(kw in lower for kw in _BATCH_KEYWORDS)
|
||||
"""Deprecated: batch auto-detection is no longer used.
|
||||
|
||||
Kept as a no-op so the agent_loop call site (which wraps it in an
|
||||
``if ctx.default_skill_batch_nudge:`` guard that's also now always
|
||||
empty) can stay unchanged until a broader cleanup. The old
|
||||
``_batch_ledger`` shared-buffer feature was replaced by the
|
||||
per-colony SQLite task queue (``hive.colony-progress-tracker``),
|
||||
which lives in ``progress.db`` and is authoritative for batch
|
||||
state across workers and runs.
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> str:
|
||||
@@ -67,40 +54,37 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
|
||||
return body
|
||||
|
||||
|
||||
# Ordered list of default skills (name → directory)
|
||||
# Ordered list of default skills (name → directory).
|
||||
#
|
||||
# Removed on 2026-04-15 as part of the colony-progress-tracker rollout:
|
||||
# - hive.task-decomposition — steps table in progress.db supersedes
|
||||
# in-memory ``_working_notes → Current Plan`` decomposition.
|
||||
# - hive.batch-ledger — tasks table in progress.db supersedes
|
||||
# the ``_batch_ledger`` dict-shaped queue with its pending →
|
||||
# in_progress → completed/failed/skipped state machine.
|
||||
# Both were duplicating state that belongs in SQLite.
|
||||
SKILL_REGISTRY: dict[str, str] = {
|
||||
"hive.note-taking": "note-taking",
|
||||
"hive.batch-ledger": "batch-ledger",
|
||||
"hive.context-preservation": "context-preservation",
|
||||
"hive.quality-monitor": "quality-monitor",
|
||||
"hive.error-recovery": "error-recovery",
|
||||
"hive.task-decomposition": "task-decomposition",
|
||||
"hive.colony-progress-tracker": "colony-progress-tracker",
|
||||
"hive.writing-hive-skills": "writing-hive-skills",
|
||||
}
|
||||
|
||||
# All shared buffer keys used by default skills (for permission auto-inclusion)
|
||||
# Shared buffer keys referenced by the remaining default skills (used
|
||||
# for permission auto-inclusion). The dead keys for batch-ledger,
|
||||
# task-decomposition, the handoff buffer, and the error-log buffers
|
||||
# were removed when those features migrated to progress.db.
|
||||
DATA_BUFFER_KEYS: list[str] = [
|
||||
# note-taking
|
||||
"_working_notes",
|
||||
"_notes_updated_at",
|
||||
# batch-ledger
|
||||
"_batch_ledger",
|
||||
"_batch_total",
|
||||
"_batch_completed",
|
||||
"_batch_failed",
|
||||
# context-preservation
|
||||
"_handoff_context",
|
||||
"_preserved_data",
|
||||
# quality-monitor
|
||||
"_quality_log",
|
||||
"_quality_degradation_count",
|
||||
# error-recovery
|
||||
"_error_log",
|
||||
"_failed_tools",
|
||||
"_escalation_needed",
|
||||
# task-decomposition
|
||||
"_subtasks",
|
||||
"_iteration_budget_remaining",
|
||||
]
|
||||
|
||||
|
||||
@@ -252,16 +236,15 @@ class DefaultSkillManager:
|
||||
|
||||
@property
|
||||
def batch_init_nudge(self) -> str | None:
|
||||
"""Nudge text to prepend to system prompt when batch input detected (DS-12).
|
||||
"""Deprecated: always returns None.
|
||||
|
||||
Returns None if ``hive.batch-ledger`` is disabled or auto_detect_batch is False.
|
||||
The ``hive.batch-ledger`` default skill was removed when batch
|
||||
tracking moved into ``progress.db`` (``hive.colony-progress-
|
||||
tracker``). Callers in agent_host, colony_runtime, and
|
||||
orchestrator still read this property; returning None keeps
|
||||
them functional with no system-prompt nudge.
|
||||
"""
|
||||
if "hive.batch-ledger" not in self._skills:
|
||||
return None
|
||||
overrides = self._config.get_default_overrides("hive.batch-ledger")
|
||||
if overrides.get("auto_detect_batch") is False:
|
||||
return None
|
||||
return _BATCH_INIT_NUDGE
|
||||
return None
|
||||
|
||||
@property
|
||||
def context_warn_ratio(self) -> float | None:
|
||||
|
||||
@@ -903,10 +903,76 @@ def register_queen_lifecycle_tools(
|
||||
# ``start_worker`` was removed in the Phase 4 unification — its
|
||||
# bare-bones spawn duplicated ``run_agent_with_input`` (which has
|
||||
# credential preflight, concurrency guard, and phase tracking on
|
||||
# top). The shared preflight timeout below is still used by
|
||||
# ``run_agent_with_input``.
|
||||
# top). The shared preflight timeout below is used by both
|
||||
# ``run_agent_with_input`` and ``run_parallel_workers``.
|
||||
_START_PREFLIGHT_TIMEOUT = 15 # seconds
|
||||
|
||||
async def _preflight_credentials(
|
||||
legacy: Any,
|
||||
*,
|
||||
tool_label: str,
|
||||
) -> set[str]:
|
||||
"""Compute tools whose credentials are missing and resync MCP servers.
|
||||
|
||||
Shared between ``run_agent_with_input`` (single spawn) and
|
||||
``run_parallel_workers`` (batch spawn). Returns the set of
|
||||
tool names whose credentials failed validation; the caller
|
||||
filters these out of the spawn's tool lists.
|
||||
|
||||
Exceptions (including validator bugs) are logged and treated
|
||||
as "no tools dropped" so a broken validator can't block a
|
||||
spawn. Wall-clock bound at ``_START_PREFLIGHT_TIMEOUT`` —
|
||||
slow credential HTTP health checks can't stall the LLM turn.
|
||||
"""
|
||||
unavailable: set[str] = set()
|
||||
|
||||
async def _run() -> None:
|
||||
nonlocal unavailable
|
||||
try:
|
||||
from framework.credentials.validation import compute_unavailable_tools
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
drop, messages = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: compute_unavailable_tools(legacy.graph.nodes),
|
||||
)
|
||||
unavailable = drop
|
||||
if drop:
|
||||
logger.warning(
|
||||
"%s: dropping %d tool(s) with unavailable credentials: %s",
|
||||
tool_label,
|
||||
len(drop),
|
||||
"; ".join(messages),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"%s: compute_unavailable_tools raised, proceeding without "
|
||||
"credential-based tool filtering: %s",
|
||||
tool_label,
|
||||
exc,
|
||||
)
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
if runner is not None:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("%s: MCP resync failed: %s", tool_label, exc)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(_run(), timeout=_START_PREFLIGHT_TIMEOUT)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"%s: credential preflight timed out after %ds — proceeding",
|
||||
tool_label,
|
||||
_START_PREFLIGHT_TIMEOUT,
|
||||
)
|
||||
return unavailable
|
||||
|
||||
# --- stop_worker -----------------------------------------------------------
|
||||
|
||||
async def stop_worker(*, reason: str = "Stopped by queen") -> str:
|
||||
@@ -1078,6 +1144,105 @@ def register_queen_lifecycle_tools(
|
||||
}
|
||||
)
|
||||
|
||||
# Credential preflight — mirrors the one run_agent_with_input
|
||||
# performs. Without this, missing credentials (e.g. stale
|
||||
# GITHUB_TOKEN) fail once PER spawned worker, yielding N
|
||||
# duplicate error reports for a single fixable issue. Catch
|
||||
# once upfront, build a filtered tool list, and pass it to
|
||||
# every spawn via tools_override.
|
||||
legacy_for_preflight = _get_runtime()
|
||||
unavailable_tools_parallel: set[str] = set()
|
||||
tools_override_parallel: list[Any] | None = None
|
||||
if legacy_for_preflight is not None:
|
||||
try:
|
||||
unavailable_tools_parallel = await _preflight_credentials(
|
||||
legacy_for_preflight, tool_label="run_parallel_workers"
|
||||
)
|
||||
except CredentialError as e:
|
||||
# Structured credential failure: publish the
|
||||
# CREDENTIALS_REQUIRED event so the frontend's modal
|
||||
# can fire, and return the same shape the single-path
|
||||
# tool returns on the same failure.
|
||||
error_payload = credential_errors_to_json(e)
|
||||
error_payload["agent_path"] = str(getattr(session, "worker_path", "") or "")
|
||||
bus = getattr(session, "event_bus", None)
|
||||
if bus is not None:
|
||||
await bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CREDENTIALS_REQUIRED,
|
||||
stream_id="queen",
|
||||
data=error_payload,
|
||||
)
|
||||
)
|
||||
return json.dumps(error_payload)
|
||||
|
||||
if unavailable_tools_parallel:
|
||||
colony_tools = list(getattr(colony, "_tools", []) or [])
|
||||
before = len(colony_tools)
|
||||
tools_override_parallel = [
|
||||
t
|
||||
for t in colony_tools
|
||||
if getattr(t, "name", None) not in unavailable_tools_parallel
|
||||
]
|
||||
logger.info(
|
||||
"run_parallel_workers: dropped %d tool object(s) from spawn_tools (unavailable credentials)",
|
||||
before - len(tools_override_parallel),
|
||||
)
|
||||
|
||||
# Colony progress tracker wiring: if the session's loaded
|
||||
# worker points at a colony directory that has a progress.db,
|
||||
# inject db_path + colony_id into every per-task ``data``
|
||||
# dict so each spawned worker sees them in its first user
|
||||
# message and can claim rows from the queue. ColonyRuntime.
|
||||
# spawn() detects db_path in input_data and pre-activates
|
||||
# hive.colony-progress-tracker into the catalog prompt.
|
||||
_colony_db_path: str | None = None
|
||||
_colony_id: str | None = None
|
||||
_worker_path = getattr(session, "worker_path", None)
|
||||
if _worker_path:
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_wp = _Path(_worker_path)
|
||||
_pdb = _wp / "data" / "progress.db"
|
||||
if _pdb.exists():
|
||||
_colony_db_path = str(_pdb.resolve())
|
||||
_colony_id = _wp.name
|
||||
|
||||
# Phase 2: enqueue each task into progress.db BEFORE building
|
||||
# spawn specs so every parallel worker has a pre-assigned row
|
||||
# to claim. Without this the queue stays empty and each
|
||||
# worker's claim UPDATE affects zero rows, silently falling
|
||||
# back to executing from its spawn message.
|
||||
_enqueued_task_ids: list[str | None] = [None] * len(tasks)
|
||||
if _colony_db_path:
|
||||
from pathlib import Path as _PathP
|
||||
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task_fn,
|
||||
)
|
||||
|
||||
_pdb_path_obj = _PathP(_colony_db_path)
|
||||
for _i, _spec in enumerate(tasks):
|
||||
if not isinstance(_spec, dict):
|
||||
continue
|
||||
_task_text_pre = str(_spec.get("task", "")).strip()
|
||||
if not _task_text_pre:
|
||||
continue
|
||||
try:
|
||||
_enqueued_task_ids[_i] = await asyncio.to_thread(
|
||||
_enqueue_task_fn,
|
||||
_pdb_path_obj,
|
||||
_task_text_pre,
|
||||
source="run_parallel_workers",
|
||||
)
|
||||
except Exception as _enqueue_exc:
|
||||
logger.warning(
|
||||
"run_parallel_workers: failed to enqueue tasks[%d] "
|
||||
"(spawn proceeding without pinned task_id): %s",
|
||||
_i,
|
||||
_enqueue_exc,
|
||||
)
|
||||
|
||||
# Normalise: each entry must have a non-empty "task" string.
|
||||
normalised: list[dict] = []
|
||||
for i, spec in enumerate(tasks):
|
||||
@@ -1086,18 +1251,58 @@ def register_queen_lifecycle_tools(
|
||||
task_text = str(spec.get("task", "")).strip()
|
||||
if not task_text:
|
||||
return json.dumps({"error": f"tasks[{i}].task is empty"})
|
||||
spec_data = spec.get("data") if isinstance(spec.get("data"), dict) else {}
|
||||
if _colony_db_path:
|
||||
spec_data = {
|
||||
**spec_data,
|
||||
"db_path": _colony_db_path,
|
||||
"colony_id": _colony_id,
|
||||
}
|
||||
if _enqueued_task_ids[i]:
|
||||
spec_data["task_id"] = _enqueued_task_ids[i]
|
||||
normalised.append(
|
||||
{
|
||||
"task": task_text,
|
||||
"data": spec.get("data") if isinstance(spec.get("data"), dict) else None,
|
||||
"data": spec_data or None,
|
||||
}
|
||||
)
|
||||
|
||||
if _colony_db_path:
|
||||
_pinned = sum(1 for tid in _enqueued_task_ids if tid)
|
||||
logger.info(
|
||||
"run_parallel_workers: attached progress_db context to "
|
||||
"%d spawn(s) (colony_id=%s, %d pinned task_ids)",
|
||||
len(normalised),
|
||||
_colony_id,
|
||||
_pinned,
|
||||
)
|
||||
|
||||
try:
|
||||
worker_ids = await colony.spawn_batch(normalised)
|
||||
worker_ids = await colony.spawn_batch(
|
||||
normalised,
|
||||
tools_override=tools_override_parallel,
|
||||
)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"spawn_batch failed: {e}"})
|
||||
|
||||
# Phase transition — mirrors run_agent_with_input. With the
|
||||
# batch now spawned, the queen is semantically "running" until
|
||||
# wait_for_worker_reports returns, so phase-gated running
|
||||
# tools (inject_message, reply_to_worker, ...) should be
|
||||
# available. Without this change run_parallel_workers left
|
||||
# the queen in whatever phase she was in (typically staging).
|
||||
if phase_state is not None:
|
||||
try:
|
||||
await phase_state.switch_to_running()
|
||||
_update_meta_json(
|
||||
session_manager, manager_session_id, {"phase": "running"}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"run_parallel_workers: phase transition to 'running' failed (non-fatal): %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
try:
|
||||
reports = await colony.wait_for_worker_reports(
|
||||
worker_ids,
|
||||
@@ -1322,6 +1527,35 @@ def register_queen_lifecycle_tools(
|
||||
except OSError as e:
|
||||
return None, f"failed to install skill into {target}: {e}"
|
||||
|
||||
# Cleanup the source directory after a successful install so
|
||||
# the authored skill doesn't linger as debris in the agent
|
||||
# workspace (or — pre-sandbox-split — in the hive git
|
||||
# checkout). Only removes paths that are OUTSIDE
|
||||
# ``~/.hive/skills/`` so we never nuke the canonical install
|
||||
# target or user-owned skill dirs.
|
||||
try:
|
||||
src_resolved = src.resolve()
|
||||
skills_root_resolved = target_root.resolve()
|
||||
try:
|
||||
src_resolved.relative_to(skills_root_resolved)
|
||||
_under_skills_root = True
|
||||
except ValueError:
|
||||
_under_skills_root = False
|
||||
if not _under_skills_root:
|
||||
_shutil.rmtree(src_resolved)
|
||||
logger.info(
|
||||
"create_colony: cleaned up authored skill source at %s "
|
||||
"(installed to %s)",
|
||||
src_resolved,
|
||||
target,
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"create_colony: failed to clean up skill source at %s (non-fatal): %s",
|
||||
src,
|
||||
e,
|
||||
)
|
||||
|
||||
return target, None
|
||||
|
||||
async def create_colony(
|
||||
@@ -1329,6 +1563,7 @@ def register_queen_lifecycle_tools(
|
||||
colony_name: str,
|
||||
task: str,
|
||||
skill_path: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> str:
|
||||
"""Create a colony after installing a pre-authored skill folder.
|
||||
|
||||
@@ -1338,6 +1573,13 @@ def register_queen_lifecycle_tools(
|
||||
they're ready to start the worker — at that point the worker
|
||||
reads the task from ``worker.json`` and the skill from
|
||||
``~/.hive/skills/`` and starts informed.
|
||||
|
||||
When *tasks* is provided, each entry is seeded into the
|
||||
colony's ``progress.db`` task queue in a single transaction.
|
||||
Workers then claim rows from the queue using the
|
||||
``hive.colony-progress-tracker`` default skill. Each task dict
|
||||
accepts: ``goal`` (required), optional ``steps``,
|
||||
``sop_items``, ``priority``, ``payload``, ``parent_task_id``.
|
||||
"""
|
||||
if session is None:
|
||||
return json.dumps({"error": "No session bound to this tool registry."})
|
||||
@@ -1392,6 +1634,7 @@ def register_queen_lifecycle_tools(
|
||||
session=session,
|
||||
colony_name=cn,
|
||||
task=(task or "").strip(),
|
||||
tasks=tasks if isinstance(tasks, list) else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("create_colony: fork failed after installing skill")
|
||||
@@ -1444,6 +1687,8 @@ def register_queen_lifecycle_tools(
|
||||
"is_new": fork_result.get("is_new", True),
|
||||
"skill_installed": str(installed_skill),
|
||||
"skill_name": installed_skill.name if installed_skill else None,
|
||||
"db_path": fork_result.get("db_path"),
|
||||
"tasks_seeded": len(fork_result.get("task_ids") or []),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1541,6 +1786,57 @@ def register_queen_lifecycle_tools(
|
||||
"protocol'."
|
||||
),
|
||||
},
|
||||
"tasks": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional pre-seeded task queue for the colony. "
|
||||
"When the colony is a fan-out of many similar "
|
||||
"units of work (e.g. 'process record #1234', "
|
||||
"'scrape profile X'), pass them here as an "
|
||||
"array and workers will claim rows atomically "
|
||||
"from the SQLite queue using the "
|
||||
"hive.colony-progress-tracker skill. Each task "
|
||||
"needs a 'goal' string; optionally include "
|
||||
"'steps' (ordered subtasks), 'sop_items' "
|
||||
"(required checklist gates), 'priority' "
|
||||
"(higher runs first), and 'payload' "
|
||||
"(task-specific parameters). Can be hundreds "
|
||||
"or thousands of entries — the bulk insert "
|
||||
"runs in a single transaction."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"goal": {"type": "string"},
|
||||
"priority": {"type": "integer"},
|
||||
"payload": {},
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"detail": {"type": "string"},
|
||||
},
|
||||
"required": ["title"],
|
||||
},
|
||||
},
|
||||
"sop_items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"required": {"type": "boolean"},
|
||||
},
|
||||
"required": ["key", "description"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["goal"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["colony_name", "task", "skill_path"],
|
||||
},
|
||||
@@ -1552,6 +1848,158 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- enqueue_task ------------------------------------------------------------
|
||||
|
||||
async def enqueue_task_tool(
|
||||
*,
|
||||
colony_name: str,
|
||||
goal: str,
|
||||
steps: list[dict] | None = None,
|
||||
sop_items: list[dict] | None = None,
|
||||
payload: Any = None,
|
||||
priority: int = 0,
|
||||
parent_task_id: str | None = None,
|
||||
) -> str:
|
||||
"""Append a single task to an existing colony's progress.db queue.
|
||||
|
||||
Use this when the colony is already created and more work
|
||||
needs to be fanned out (webhook-driven, follow-up requests,
|
||||
worker-generated subtasks). The colony's workers pick it up
|
||||
on their next claim cycle.
|
||||
"""
|
||||
cn = (colony_name or "").strip()
|
||||
if not _COLONY_NAME_RE.match(cn):
|
||||
return json.dumps(
|
||||
{"error": "colony_name must be lowercase alphanumeric with underscores"}
|
||||
)
|
||||
|
||||
from pathlib import Path as _Path
|
||||
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task,
|
||||
ensure_progress_db as _ensure_db,
|
||||
)
|
||||
|
||||
colony_dir = _Path.home() / ".hive" / "colonies" / cn
|
||||
if not colony_dir.is_dir():
|
||||
return json.dumps({"error": f"colony '{cn}' not found"})
|
||||
|
||||
try:
|
||||
db_path = await asyncio.to_thread(_ensure_db, colony_dir)
|
||||
task_id = await asyncio.to_thread(
|
||||
_enqueue_task,
|
||||
db_path,
|
||||
goal,
|
||||
steps=steps,
|
||||
sop_items=sop_items,
|
||||
payload=payload,
|
||||
priority=priority,
|
||||
parent_task_id=parent_task_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("enqueue_task: failed to insert row")
|
||||
return json.dumps({"error": f"enqueue_task failed: {e}"})
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "enqueued",
|
||||
"colony_name": cn,
|
||||
"task_id": task_id,
|
||||
"db_path": str(db_path),
|
||||
}
|
||||
)
|
||||
|
||||
_enqueue_task_tool = Tool(
|
||||
name="enqueue_task",
|
||||
description=(
|
||||
"Append a single task to an existing colony's progress.db "
|
||||
"queue. Use this after create_colony when more work needs "
|
||||
"to be fanned out — e.g. a webhook fired, the user asked "
|
||||
"for a follow-up run, or a worker spawned a subtask. The "
|
||||
"colony's workers pick it up on their next claim cycle "
|
||||
"(atomic UPDATE … WHERE status='pending'). For bulk "
|
||||
"authoring at colony creation time, pass the 'tasks' "
|
||||
"array to create_colony instead."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"colony_name": {
|
||||
"type": "string",
|
||||
"description": "Target colony name (lowercase + underscores).",
|
||||
},
|
||||
"goal": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Human-readable task description. Self-contained — "
|
||||
"the worker has no context beyond this string plus "
|
||||
"any steps/sop_items/payload you attach."
|
||||
),
|
||||
},
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional ordered subtasks the worker should "
|
||||
"check off as it executes. Each step needs a "
|
||||
"'title'; optional 'detail' for longer "
|
||||
"instructions."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"detail": {"type": "string"},
|
||||
},
|
||||
"required": ["title"],
|
||||
},
|
||||
},
|
||||
"sop_items": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional hard-gate checklist items the worker "
|
||||
"MUST address before marking the task done. "
|
||||
"Each item needs a 'key' (slug) and "
|
||||
"'description'; 'required' defaults to true."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"required": {"type": "boolean"},
|
||||
},
|
||||
"required": ["key", "description"],
|
||||
},
|
||||
},
|
||||
"payload": {
|
||||
"description": (
|
||||
"Optional task-specific parameters. Stored as "
|
||||
"JSON in the 'payload' column."
|
||||
),
|
||||
},
|
||||
"priority": {
|
||||
"type": "integer",
|
||||
"description": "Higher values run first. Default 0.",
|
||||
},
|
||||
"parent_task_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional reference to an existing task this "
|
||||
"one was spawned from (audit only; no blocking "
|
||||
"dependency resolver today)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["colony_name", "goal"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"enqueue_task",
|
||||
_enqueue_task_tool,
|
||||
lambda inputs: enqueue_task_tool(**inputs),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- switch_to_reviewing ----------------------------------------------------
|
||||
|
||||
async def switch_to_reviewing_tool() -> str:
|
||||
@@ -2969,7 +3417,8 @@ def register_queen_lifecycle_tools(
|
||||
if preamble.get("pending_question"):
|
||||
result["pending_question"] = preamble["pending_question"]
|
||||
|
||||
result["agent_idle_seconds"] = round(runtime.agent_idle_seconds, 1)
|
||||
_idle = runtime.agent_idle_seconds
|
||||
result["agent_idle_seconds"] = round(_idle, 1) if _idle != float("inf") else -1
|
||||
|
||||
for key in ("current_node", "current_iteration"):
|
||||
if key in preamble:
|
||||
@@ -3713,6 +4162,33 @@ def register_queen_lifecycle_tools(
|
||||
task,
|
||||
)
|
||||
|
||||
# Concurrency budget check — mirrors run_parallel_workers so a
|
||||
# queen in a loop can't silently exceed max_concurrent_workers
|
||||
# by hammering run_agent_with_input. Per-call count is 1, so
|
||||
# the check is ``active + 1 > max_concurrent``.
|
||||
colony_cfg = getattr(colony, "_config", None) or getattr(colony, "config", None)
|
||||
max_concurrent = getattr(colony_cfg, "max_concurrent_workers", None)
|
||||
if max_concurrent and max_concurrent > 0:
|
||||
active = 0
|
||||
try:
|
||||
workers = getattr(colony, "_workers", {}) or {}
|
||||
for w in workers.values():
|
||||
handle = getattr(w, "_task_handle", None)
|
||||
if handle is not None and not handle.done():
|
||||
active += 1
|
||||
except Exception:
|
||||
active = 0
|
||||
if active + 1 > max_concurrent:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": (
|
||||
f"run_agent_with_input would exceed max_concurrent_workers "
|
||||
f"({active} active + 1 new > {max_concurrent}). "
|
||||
"Wait for an existing worker to finish or stop one."
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-flight: compute the set of tools whose credentials are
|
||||
# NOT currently available, and resync MCP servers. We do NOT
|
||||
@@ -3723,58 +4199,9 @@ def register_queen_lifecycle_tools(
|
||||
# to block the whole spawn with a CredentialError; the fix
|
||||
# is to treat unset credentials as "drop these tools" rather
|
||||
# than "abort the worker".
|
||||
#
|
||||
# Note: the MCP admission gate (_build_mcp_admission_gate in
|
||||
# tool_registry.py) already filters MCP tools at registration
|
||||
# time. This preflight covers the non-MCP path — tools.py
|
||||
# discoveries via discover_from_module — which has no
|
||||
# credential gate of its own.
|
||||
loop = asyncio.get_running_loop()
|
||||
unavailable_tools: set[str] = set()
|
||||
|
||||
async def _preflight():
|
||||
nonlocal unavailable_tools
|
||||
try:
|
||||
from framework.credentials.validation import compute_unavailable_tools
|
||||
|
||||
drop, messages = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: compute_unavailable_tools(legacy.graph.nodes),
|
||||
)
|
||||
unavailable_tools = drop
|
||||
if drop:
|
||||
logger.warning(
|
||||
"run_agent_with_input: dropping %d tool(s) with "
|
||||
"unavailable credentials from worker spawn: %s",
|
||||
len(drop),
|
||||
"; ".join(messages),
|
||||
)
|
||||
except Exception as exc:
|
||||
# Validation itself failing (not a credential failure —
|
||||
# a code error in the validator) should not block the
|
||||
# spawn. Log and proceed as if nothing was dropped.
|
||||
logger.warning(
|
||||
"compute_unavailable_tools raised, proceeding without credential-based tool filtering: %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
if runner:
|
||||
try:
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("MCP resync failed: %s", e)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"run_agent_with_input preflight timed out after %ds — proceeding",
|
||||
_START_PREFLIGHT_TIMEOUT,
|
||||
)
|
||||
unavailable_tools = await _preflight_credentials(
|
||||
legacy, tool_label="run_agent_with_input"
|
||||
)
|
||||
|
||||
# Build a per-spawn AgentSpec that mirrors the loaded
|
||||
# worker's entry-node identity. This is what makes the
|
||||
@@ -3848,10 +4275,66 @@ def register_queen_lifecycle_tools(
|
||||
dropped_count,
|
||||
)
|
||||
|
||||
# Colony progress tracker wiring: if the loaded worker
|
||||
# lives under ~/.hive/colonies/{name}/ and has a
|
||||
# progress.db, inject db_path + colony_id into input_data
|
||||
# so the spawned worker sees them in its first user
|
||||
# message and can use the hive.colony-progress-tracker
|
||||
# skill to claim tasks from the queue.
|
||||
_spawn_input_data: dict[str, Any] = {"user_request": task}
|
||||
_worker_path = getattr(session, "worker_path", None)
|
||||
if _worker_path:
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_worker_path_p = _Path(_worker_path)
|
||||
_progress_db = _worker_path_p / "data" / "progress.db"
|
||||
if _progress_db.exists():
|
||||
_spawn_input_data["db_path"] = str(_progress_db.resolve())
|
||||
_spawn_input_data["colony_id"] = _worker_path_p.name
|
||||
logger.info(
|
||||
"run_agent_with_input: attached progress_db context "
|
||||
"(colony_id=%s, db_path=%s)",
|
||||
_worker_path_p.name,
|
||||
_progress_db,
|
||||
)
|
||||
|
||||
# Phase 2: enqueue the task into progress.db BEFORE
|
||||
# spawning so the worker has a concrete row to
|
||||
# claim. Without this the queue is empty and the
|
||||
# worker's claim UPDATE affects zero rows, so it
|
||||
# silently falls back to executing from the chat
|
||||
# spawn message. Any enqueue failure is logged and
|
||||
# the spawn proceeds without a pinned task_id
|
||||
# (degrades to the pre-Phase-2 behavior).
|
||||
try:
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task_fn,
|
||||
)
|
||||
|
||||
_task_id = await asyncio.to_thread(
|
||||
_enqueue_task_fn,
|
||||
_progress_db,
|
||||
task,
|
||||
source="run_agent_with_input",
|
||||
)
|
||||
_spawn_input_data["task_id"] = _task_id
|
||||
logger.info(
|
||||
"run_agent_with_input: enqueued task %s into %s",
|
||||
_task_id,
|
||||
_progress_db,
|
||||
)
|
||||
except Exception as _enqueue_exc:
|
||||
logger.warning(
|
||||
"run_agent_with_input: failed to enqueue task "
|
||||
"into progress.db (spawn proceeding without "
|
||||
"pinned task_id): %s",
|
||||
_enqueue_exc,
|
||||
)
|
||||
|
||||
worker_ids = await colony.spawn(
|
||||
task=task,
|
||||
count=1,
|
||||
input_data={"user_request": task},
|
||||
input_data=_spawn_input_data,
|
||||
agent_spec=spawn_spec,
|
||||
tools=spawn_tools,
|
||||
tool_executor=spawn_tool_executor,
|
||||
|
||||
@@ -87,9 +87,25 @@ export const sessionsApi = {
|
||||
colonies: (sessionId: string) =>
|
||||
api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),
|
||||
|
||||
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay). */
|
||||
eventsHistory: (sessionId: string) =>
|
||||
api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
|
||||
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay).
|
||||
*
|
||||
* Returns the TAIL of the event log. Default limit 2000 (server
|
||||
* clamps to [1, 10000]); older events get dropped and
|
||||
* ``truncated: true`` is set so the UI can show an indicator.
|
||||
*/
|
||||
eventsHistory: (sessionId: string, limit?: number) =>
|
||||
api.get<{
|
||||
events: AgentEvent[];
|
||||
session_id: string;
|
||||
total: number;
|
||||
returned: number;
|
||||
truncated: boolean;
|
||||
limit: number;
|
||||
}>(
|
||||
`/sessions/${sessionId}/events/history${
|
||||
limit ? `?limit=${limit}` : ""
|
||||
}`,
|
||||
),
|
||||
|
||||
/** Open the session's data folder in the OS file manager. */
|
||||
revealFolder: (sessionId: string) =>
|
||||
|
||||
@@ -31,6 +31,15 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
|
||||
const colonyId = colonyMatch[1];
|
||||
const colony = colonies.find((c) => c.id === colonyId);
|
||||
title = colony?.name ?? colonyId;
|
||||
// Show queen profile button when the colony has a linked queen profile
|
||||
if (colony?.queenProfileId) {
|
||||
const profile = queenProfiles.find((q) => q.id === colony.queenProfileId);
|
||||
if (profile) {
|
||||
queenIdForProfile = profile.id;
|
||||
queenTitle = profile.title ?? null;
|
||||
icon = <Crown className="w-4 h-4 text-primary" />;
|
||||
}
|
||||
}
|
||||
} else if (queenMatch) {
|
||||
const queenId = queenMatch[1];
|
||||
const profile = queenProfiles.find((q) => q.id === queenId);
|
||||
|
||||
@@ -10,6 +10,8 @@ import {
|
||||
Paperclip,
|
||||
X,
|
||||
} from "lucide-react";
|
||||
import WorkerRunBubble from "@/components/WorkerRunBubble";
|
||||
import type { WorkerRunGroup } from "@/components/WorkerRunBubble";
|
||||
|
||||
export interface ImageContent {
|
||||
type: "image_url";
|
||||
@@ -25,6 +27,8 @@ export interface ContextUsageEntry {
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
import QuestionWidget from "@/components/QuestionWidget";
|
||||
import MultiQuestionWidget from "@/components/MultiQuestionWidget";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useQueenProfile } from "@/context/QueenProfileContext";
|
||||
import ParallelSubagentBubble, {
|
||||
type SubagentGroup,
|
||||
} from "@/components/ParallelSubagentBubble";
|
||||
@@ -60,6 +64,12 @@ export interface ChatMessage {
|
||||
nodeId?: string;
|
||||
/** Backend execution_id for this message */
|
||||
executionId?: string;
|
||||
/** Backend stream_id — the per-worker identity used for grouping
|
||||
* parallel-spawn workers into their own stacked WorkerRunBubble.
|
||||
* "queen" for queen messages, "worker" for the single loaded
|
||||
* worker (run_agent_with_input), or "worker:{uuid}" for each
|
||||
* parallel worker spawned via run_parallel_workers. */
|
||||
streamId?: string;
|
||||
/** True when the message was sent while the queen was still processing */
|
||||
queued?: boolean;
|
||||
}
|
||||
@@ -124,14 +134,14 @@ const TOOL_HEX = [
|
||||
"#e5a820", // sunflower
|
||||
];
|
||||
|
||||
function toolHex(name: string): string {
|
||||
export function toolHex(name: string): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < name.length; i++)
|
||||
hash = (hash * 31 + name.charCodeAt(i)) | 0;
|
||||
return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
|
||||
}
|
||||
|
||||
function ToolActivityRow({ content }: { content: string }) {
|
||||
export function ToolActivityRow({ content }: { content: string }) {
|
||||
let tools: { name: string; done: boolean }[] = [];
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
@@ -336,6 +346,15 @@ function InlineAskUserBubble({
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
const thread = msg.thread || activeThread;
|
||||
|
||||
const { queenProfiles } = useColony();
|
||||
const { openQueenProfile } = useQueenProfile();
|
||||
const queenProfileId = isQueen
|
||||
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
|
||||
: null;
|
||||
const handleQueenClick = queenProfileId
|
||||
? () => openQueenProfile(queenProfileId)
|
||||
: undefined;
|
||||
|
||||
const handleSingle = (answer: string) => {
|
||||
setState("submitted");
|
||||
onSend(answer, thread);
|
||||
@@ -355,12 +374,14 @@ function InlineAskUserBubble({
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
onClick={handleQueenClick}
|
||||
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
@@ -373,8 +394,9 @@ function InlineAskUserBubble({
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
|
||||
style={{ color }}
|
||||
onClick={handleQueenClick}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
@@ -435,6 +457,13 @@ const MessageBubble = memo(
|
||||
const isQueen = msg.role === "queen";
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
|
||||
// Resolve queen profile ID so clicking avatar/name opens the profile panel
|
||||
const { queenProfiles } = useColony();
|
||||
const { openQueenProfile } = useQueenProfile();
|
||||
const queenProfileId = isQueen
|
||||
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
|
||||
: null;
|
||||
|
||||
if (msg.type === "run_divider") {
|
||||
return (
|
||||
<div className="flex items-center gap-3 py-2 my-1">
|
||||
@@ -529,15 +558,21 @@ const MessageBubble = memo(
|
||||
);
|
||||
}
|
||||
|
||||
const handleQueenClick = queenProfileId
|
||||
? () => openQueenProfile(queenProfileId)
|
||||
: undefined;
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
onClick={handleQueenClick}
|
||||
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
@@ -550,8 +585,9 @@ const MessageBubble = memo(
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
|
||||
style={{ color }}
|
||||
onClick={handleQueenClick}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
@@ -665,14 +701,157 @@ export default function ChatPanel({
|
||||
type RenderItem =
|
||||
| { kind: "message"; msg: ChatMessage }
|
||||
| { kind: "parallel"; groupId: string; groups: SubagentGroup[] }
|
||||
| {
|
||||
kind: "worker_run";
|
||||
runId: string;
|
||||
group: WorkerRunGroup;
|
||||
/** Optional short label shown next to the "Worker" badge.
|
||||
* Only set when there are multiple parallel workers in the
|
||||
* same run span (so users can tell them apart). */
|
||||
label?: string;
|
||||
}
|
||||
| { kind: "day_divider"; key: string; createdAt: number };
|
||||
|
||||
/** Derive a short label from a parallel-worker stream id.
|
||||
* `worker:abcdef12-3456-...` → `abcdef12` (first 8 chars of the
|
||||
* uuid after the `worker:` prefix). Falls back to the first
|
||||
* message's nodeId when the streamId isn't the expected shape. */
|
||||
function deriveWorkerLabel(
|
||||
streamKey: string,
|
||||
msgs: ChatMessage[],
|
||||
): string {
|
||||
if (streamKey.startsWith("worker:")) {
|
||||
const suffix = streamKey.slice("worker:".length);
|
||||
// sessions are `session_YYYYMMDD_HHMMSS_<8-hex>` — show the
|
||||
// trailing hex if present, else first 8 chars of the suffix.
|
||||
const tail = suffix.match(/_[0-9a-f]{6,}$/i)?.[0]?.slice(1);
|
||||
return tail ? tail.slice(0, 8) : suffix.slice(0, 8);
|
||||
}
|
||||
const nid = msgs.find((m) => m.nodeId)?.nodeId;
|
||||
return nid || streamKey;
|
||||
}
|
||||
|
||||
const renderItems = useMemo<RenderItem[]>(() => {
|
||||
const items: RenderItem[] = [];
|
||||
let i = 0;
|
||||
while (i < threadMessages.length) {
|
||||
const msg = threadMessages[i];
|
||||
const isSubagent = msg.nodeId?.includes(":subagent:");
|
||||
|
||||
// Worker run grouping: collect consecutive WORKER-role
|
||||
// messages (and worker tool_status pills) into a collapsible
|
||||
// card. Queen tool_status pills (``role === "queen"``) are
|
||||
// deliberately excluded — the queen's own tool calls are part
|
||||
// of the queen↔user conversation and should render inline as
|
||||
// ToolActivityRows, not fold into a "Worker" bubble. Without
|
||||
// this guard, every queen run_command / read_file / etc. shows
|
||||
// up under a misleading "Worker" label in the DM.
|
||||
const isWorkerCandidate =
|
||||
msg.role === "worker" ||
|
||||
(msg.type === "tool_status" && msg.role !== "queen");
|
||||
if (
|
||||
!isSubagent &&
|
||||
isWorkerCandidate &&
|
||||
msg.type !== "user" &&
|
||||
msg.type !== "run_divider"
|
||||
) {
|
||||
const workerMsgs: ChatMessage[] = [];
|
||||
const firstWorkerMsg = msg;
|
||||
|
||||
while (i < threadMessages.length) {
|
||||
const m = threadMessages[i];
|
||||
|
||||
// Hard boundary — stop the worker run group
|
||||
if (m.type === "user" || m.type === "run_divider") break;
|
||||
// Queen message with real text — boundary (queen is talking
|
||||
// to the user, not just emitting a tool)
|
||||
if (m.role === "queen" && m.content?.trim() && !m.type) break;
|
||||
// Queen tool_status — NOT a worker activity, don't bucket
|
||||
// it. Break so the grouping stops and the queen pill
|
||||
// renders inline.
|
||||
if (m.type === "tool_status" && m.role === "queen") break;
|
||||
// Subagent message — different group type, stop here
|
||||
if (m.nodeId?.includes(":subagent:")) break;
|
||||
|
||||
// Worker text messages and worker tool_status belong to the run
|
||||
if (
|
||||
m.role === "worker" ||
|
||||
(m.type === "tool_status" && m.role !== "queen")
|
||||
) {
|
||||
workerMsgs.push(m);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// System message or other — include in the worker run
|
||||
// group to preserve ordering (they'll render inside the
|
||||
// expanded view)
|
||||
workerMsgs.push(m);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (workerMsgs.length > 0) {
|
||||
// Parallel fan-out detection: if any message in this span
|
||||
// is tagged with a parallel-worker streamId (``worker:{uuid}``),
|
||||
// split the span by streamId and emit one ``worker_run``
|
||||
// per worker — they render as stacked independent
|
||||
// ``WorkerRunBubble``s. Un-tagged legacy messages and the
|
||||
// single-worker ``streamId="worker"`` case fall through to
|
||||
// the existing single-bubble behavior.
|
||||
const hasParallel = workerMsgs.some(
|
||||
(m) => !!m.streamId && /^worker:./.test(m.streamId),
|
||||
);
|
||||
|
||||
if (hasParallel) {
|
||||
const buckets = new Map<
|
||||
string,
|
||||
{ messages: ChatMessage[]; firstAt: number }
|
||||
>();
|
||||
// Messages with no streamId (system notes, orphans from
|
||||
// old restore) attach to the most-recent keyed message's
|
||||
// bucket so chronology is preserved.
|
||||
let currentKey: string | null = null;
|
||||
for (const m of workerMsgs) {
|
||||
const key =
|
||||
m.streamId && m.streamId.length > 0
|
||||
? m.streamId
|
||||
: currentKey;
|
||||
if (!key) continue;
|
||||
if (m.streamId && m.streamId.length > 0) currentKey = m.streamId;
|
||||
let bucket = buckets.get(key);
|
||||
if (!bucket) {
|
||||
bucket = { messages: [], firstAt: m.createdAt ?? 0 };
|
||||
buckets.set(key, bucket);
|
||||
}
|
||||
bucket.messages.push(m);
|
||||
bucket.firstAt = Math.min(
|
||||
bucket.firstAt,
|
||||
m.createdAt ?? Number.POSITIVE_INFINITY,
|
||||
);
|
||||
}
|
||||
|
||||
const sorted = Array.from(buckets.entries()).sort(
|
||||
([, a], [, b]) => a.firstAt - b.firstAt,
|
||||
);
|
||||
for (const [streamKey, { messages: bucketMsgs }] of sorted) {
|
||||
items.push({
|
||||
kind: "worker_run",
|
||||
runId: `wrun-${firstWorkerMsg.id}-${streamKey}`,
|
||||
group: { messages: bucketMsgs },
|
||||
label: deriveWorkerLabel(streamKey, bucketMsgs),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
items.push({
|
||||
kind: "worker_run",
|
||||
runId: `wrun-${firstWorkerMsg.id}`,
|
||||
group: { messages: workerMsgs },
|
||||
});
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isSubagent) {
|
||||
items.push({ kind: "message", msg });
|
||||
i++;
|
||||
@@ -872,6 +1051,17 @@ export default function ChatPanel({
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (item.kind === "worker_run") {
|
||||
return (
|
||||
<div key={item.runId}>
|
||||
<WorkerRunBubble
|
||||
runId={item.runId}
|
||||
group={item.group}
|
||||
label={item.label}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
const msg = item.msg;
|
||||
// Detect misformatted ask_user payloads emitted as plain text and
|
||||
// substitute the nicer widget-based bubble. Only inspect regular
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { useState, useEffect, useCallback, useRef } from "react";
|
||||
import { NavLink, useLocation, useNavigate } from "react-router-dom";
|
||||
import {
|
||||
X,
|
||||
@@ -46,8 +46,49 @@ export default function QueenProfilePanel({
|
||||
const name = profile?.name ?? summary?.name ?? "Queen";
|
||||
const title = profile?.title ?? summary?.title ?? "";
|
||||
|
||||
// ── Resizable width ──────────────────────────────────────────────────
|
||||
const MIN_WIDTH = 280;
|
||||
const MAX_WIDTH = 600;
|
||||
const [width, setWidth] = useState(340);
|
||||
const dragging = useRef(false);
|
||||
const startX = useRef(0);
|
||||
const startWidth = useRef(0);
|
||||
|
||||
const onDragStart = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
dragging.current = true;
|
||||
startX.current = e.clientX;
|
||||
startWidth.current = width;
|
||||
|
||||
const onMove = (ev: MouseEvent) => {
|
||||
if (!dragging.current) return;
|
||||
// Panel is on the right, so dragging left (negative delta) grows it
|
||||
const delta = startX.current - ev.clientX;
|
||||
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
|
||||
};
|
||||
const onUp = () => {
|
||||
dragging.current = false;
|
||||
document.removeEventListener("mousemove", onMove);
|
||||
document.removeEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "";
|
||||
document.body.style.userSelect = "";
|
||||
};
|
||||
document.addEventListener("mousemove", onMove);
|
||||
document.addEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "col-resize";
|
||||
document.body.style.userSelect = "none";
|
||||
}, [width]);
|
||||
|
||||
return (
|
||||
<aside className="w-[340px] flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto">
|
||||
<aside
|
||||
className="flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto relative"
|
||||
style={{ width }}
|
||||
>
|
||||
{/* Drag handle */}
|
||||
<div
|
||||
onMouseDown={onDragStart}
|
||||
className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
|
||||
/>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-5 py-3.5 border-b border-border/60">
|
||||
<div className="flex items-center gap-2 text-sm font-semibold text-foreground">
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState } from "react";
|
||||
import { useState, useCallback, useRef } from "react";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import {
|
||||
ChevronLeft,
|
||||
@@ -22,6 +22,38 @@ export default function Sidebar() {
|
||||
const [coloniesExpanded, setColoniesExpanded] = useState(true);
|
||||
const [queensExpanded, setQueensExpanded] = useState(true);
|
||||
|
||||
// ── Resizable width ──────────────────────────────────────────────────
|
||||
const MIN_WIDTH = 180;
|
||||
const MAX_WIDTH = 400;
|
||||
const [width, setWidth] = useState(240);
|
||||
const dragging = useRef(false);
|
||||
const startX = useRef(0);
|
||||
const startWidth = useRef(0);
|
||||
|
||||
const onDragStart = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
dragging.current = true;
|
||||
startX.current = e.clientX;
|
||||
startWidth.current = width;
|
||||
|
||||
const onMove = (ev: MouseEvent) => {
|
||||
if (!dragging.current) return;
|
||||
const delta = ev.clientX - startX.current;
|
||||
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
|
||||
};
|
||||
const onUp = () => {
|
||||
dragging.current = false;
|
||||
document.removeEventListener("mousemove", onMove);
|
||||
document.removeEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "";
|
||||
document.body.style.userSelect = "";
|
||||
};
|
||||
document.addEventListener("mousemove", onMove);
|
||||
document.addEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "col-resize";
|
||||
document.body.style.userSelect = "none";
|
||||
}, [width]);
|
||||
|
||||
if (sidebarCollapsed) {
|
||||
return (
|
||||
<aside className="w-[52px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
|
||||
@@ -50,7 +82,15 @@ export default function Sidebar() {
|
||||
}
|
||||
|
||||
return (
|
||||
<aside className="w-[240px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
|
||||
<aside
|
||||
className="flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full relative"
|
||||
style={{ width }}
|
||||
>
|
||||
{/* Drag handle on right edge */}
|
||||
<div
|
||||
onMouseDown={onDragStart}
|
||||
className="absolute top-0 right-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
|
||||
/>
|
||||
{/* Header */}
|
||||
<div className="h-12 flex items-center justify-between px-4 border-b border-border/60">
|
||||
<button
|
||||
|
||||
@@ -0,0 +1,297 @@
|
||||
import { memo, useState, useRef, useEffect } from "react";
|
||||
import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
|
||||
import type { ChatMessage } from "@/components/ChatPanel";
|
||||
import { ToolActivityRow } from "@/components/ChatPanel";
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
|
||||
const workerColor = "hsl(220,60%,55%)";
|
||||
|
||||
export interface WorkerRunGroup {
|
||||
messages: ChatMessage[];
|
||||
}
|
||||
|
||||
interface WorkerRunBubbleProps {
|
||||
runId: string;
|
||||
group: WorkerRunGroup;
|
||||
/** Short identifier shown next to the "Worker" badge. Populated
|
||||
* only when the parent grouping has multiple parallel workers
|
||||
* in the same run span, so N stacked bubbles can be told apart
|
||||
* at a glance. Omitted for single-worker runs. */
|
||||
label?: string;
|
||||
}
|
||||
|
||||
/** Parse a tool_status JSON blob into a list of tool entries. */
|
||||
function parseToolStatus(content: string): { name: string; done: boolean }[] {
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
return parsed.tools || [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip markdown formatting so the collapsed preview is a single
|
||||
* readable line instead of a scatter of code pills.
|
||||
*
|
||||
* MarkdownContent turns every backtick-wrapped fragment into its own
|
||||
* visually-boxed inline-code pill. In a worker text message those
|
||||
* pills can be coordinates, UUIDs, selectors, tool names — the
|
||||
* collapsed preview ends up looking like confetti. We just want the
|
||||
* plain prose, one line, truncated.
|
||||
*/
|
||||
function stripMarkdownToPreview(s: string, maxLen = 160): string {
|
||||
const cleaned = s
|
||||
.replace(/```[\s\S]*?```/g, " [code] ") // fenced code blocks
|
||||
.replace(/`([^`]+)`/g, "$1") // inline code — keep the text, drop the backticks
|
||||
.replace(/\*\*([^*]+)\*\*/g, "$1") // bold
|
||||
.replace(/\*([^*]+)\*/g, "$1") // italic
|
||||
.replace(/~~([^~]+)~~/g, "$1") // strikethrough
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links -> link text
|
||||
.replace(/^#{1,6}\s+/gm, "") // ATX headers
|
||||
.replace(/^[>\-*+]\s+/gm, "") // blockquote/list markers
|
||||
.replace(/\s+/g, " ") // collapse whitespace
|
||||
.trim();
|
||||
if (cleaned.length <= maxLen) return cleaned;
|
||||
return cleaned.slice(0, maxLen - 1).trimEnd() + "\u2026";
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapsible card that groups all worker messages from a single run
|
||||
* (the span between the queen's `run_agent_with_input` call and the
|
||||
* worker's final `set_output`/`escalate`/idle).
|
||||
*
|
||||
* Collapsed (default): header bar with tool count + latest text snippet.
|
||||
* Expanded: scrollable list of every message and tool status in order.
|
||||
*/
|
||||
const WorkerRunBubble = memo(
|
||||
function WorkerRunBubble({ group, label }: WorkerRunBubbleProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const bodyRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Separate text messages from tool status
|
||||
const textMsgs = group.messages.filter(
|
||||
(m) => m.type !== "tool_status" && m.content?.trim()
|
||||
);
|
||||
const toolStatusMsgs = group.messages.filter(
|
||||
(m) => m.type === "tool_status"
|
||||
);
|
||||
|
||||
// Count total tool calls from tool_status messages
|
||||
const allTools: { name: string; done: boolean }[] = [];
|
||||
for (const m of toolStatusMsgs) {
|
||||
for (const t of parseToolStatus(m.content)) {
|
||||
allTools.push(t);
|
||||
}
|
||||
}
|
||||
const toolCount = allTools.length;
|
||||
const doneCount = allTools.filter((t) => t.done).length;
|
||||
const isFinished = toolCount > 0 && doneCount === toolCount;
|
||||
|
||||
// Latest text from the worker (the last non-empty text message)
|
||||
const latestText = textMsgs.length > 0
|
||||
? textMsgs[textMsgs.length - 1].content
|
||||
: "";
|
||||
|
||||
// Status label. We prefer concrete states over the vague
|
||||
// "starting" fallback — if the worker has emitted any text or
|
||||
// any tool, it's past the startup phase.
|
||||
const statusLabel = isFinished
|
||||
? "done"
|
||||
: toolCount > 0
|
||||
? "running"
|
||||
: textMsgs.length > 0
|
||||
? "active"
|
||||
: "starting";
|
||||
|
||||
// Unique tool names for the summary (deduplicated, ordered by first appearance)
|
||||
const uniqueToolNames: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const t of allTools) {
|
||||
if (!seen.has(t.name)) {
|
||||
seen.add(t.name);
|
||||
uniqueToolNames.push(t.name);
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-scroll body when expanded
|
||||
useEffect(() => {
|
||||
if (expanded && bodyRef.current) {
|
||||
bodyRef.current.scrollTop = bodyRef.current.scrollHeight;
|
||||
}
|
||||
}, [expanded, group.messages.length]);
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
{/* Left icon */}
|
||||
<div
|
||||
className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1"
|
||||
style={{
|
||||
backgroundColor: `${workerColor}18`,
|
||||
border: `1.5px solid ${workerColor}35`,
|
||||
}}
|
||||
>
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
|
||||
</div>
|
||||
|
||||
<div className="flex-1 min-w-0 max-w-[90%]">
|
||||
{/* Clickable header */}
|
||||
<button
|
||||
onClick={() => setExpanded((v) => !v)}
|
||||
className="w-full flex items-center gap-2 mb-1 text-left cursor-pointer group"
|
||||
>
|
||||
<span className="font-medium text-xs" style={{ color: workerColor }}>
|
||||
Worker
|
||||
</span>
|
||||
{label && (
|
||||
<span className="text-[10px] font-mono text-muted-foreground/80 tabular-nums">
|
||||
{label}
|
||||
</span>
|
||||
)}
|
||||
<span
|
||||
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
|
||||
isFinished
|
||||
? "bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400"
|
||||
: "bg-muted text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{statusLabel}
|
||||
</span>
|
||||
{toolCount > 0 && (
|
||||
<span className="text-[10px] text-muted-foreground tabular-nums">
|
||||
{doneCount}/{toolCount} tools
|
||||
</span>
|
||||
)}
|
||||
<span className="ml-auto text-muted-foreground/60 group-hover:text-muted-foreground transition-colors p-0.5 rounded">
|
||||
{expanded ? (
|
||||
<ChevronUp className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<ChevronDown className="w-3.5 h-3.5" />
|
||||
)}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
{/* Card body — use Tailwind theme tokens so dark mode
|
||||
gets a proper dark background instead of a glaring
|
||||
near-white hardcoded hsl. Finished runs get a subtle
|
||||
green tint that also respects theme. */}
|
||||
<div
|
||||
className={`rounded-2xl rounded-tl-md overflow-hidden border ${
|
||||
isFinished
|
||||
? "border-green-300/50 bg-green-50/50 dark:border-green-900/40 dark:bg-green-950/20"
|
||||
: "border-border bg-muted/60"
|
||||
}`}
|
||||
>
|
||||
{/* Collapsed: single-line plain-text preview of the
|
||||
latest worker text, OR a tool-name chain when the
|
||||
worker hasn't emitted any prose yet. MarkdownContent
|
||||
is intentionally NOT used here — its inline-code
|
||||
rendering turns every backtick-wrapped fragment into
|
||||
a floating pill, which wrecks the preview. */}
|
||||
{!expanded && (
|
||||
<div className="px-4 py-2.5 text-sm text-muted-foreground">
|
||||
{latestText ? (
|
||||
<div className="truncate">
|
||||
{stripMarkdownToPreview(latestText)}
|
||||
</div>
|
||||
) : uniqueToolNames.length > 0 ? (
|
||||
<span className="text-xs font-mono truncate block">
|
||||
{uniqueToolNames.slice(0, 5).join(" \u2192 ")}
|
||||
{uniqueToolNames.length > 5 &&
|
||||
` + ${uniqueToolNames.length - 5} more`}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-xs text-muted-foreground/60 italic">
|
||||
{"waiting for first action\u2026"}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Expanded: chronological stream with tool bursts
|
||||
coalesced into a single ToolActivityRow each.
|
||||
Consecutive tool_status messages (no text between)
|
||||
collapse to the LATEST snapshot — each snapshot is
|
||||
cumulative within its turn, so the latest one tells
|
||||
the whole story for that burst. Text messages break
|
||||
the burst and render as markdown. */}
|
||||
{expanded && (
|
||||
<div
|
||||
ref={bodyRef}
|
||||
className="max-h-[400px] overflow-y-auto px-4 py-3 space-y-3"
|
||||
>
|
||||
{(() => {
|
||||
type RenderRow =
|
||||
| { kind: "tools"; content: string; key: string }
|
||||
| { kind: "text"; msg: ChatMessage; key: string };
|
||||
const rows: RenderRow[] = [];
|
||||
let pendingTool: { content: string; id: string } | null = null;
|
||||
const flushTool = () => {
|
||||
if (pendingTool) {
|
||||
rows.push({
|
||||
kind: "tools",
|
||||
content: pendingTool.content,
|
||||
key: `tools-${pendingTool.id}`,
|
||||
});
|
||||
pendingTool = null;
|
||||
}
|
||||
};
|
||||
for (let i = 0; i < group.messages.length; i++) {
|
||||
const m = group.messages[i];
|
||||
if (m.type === "tool_status") {
|
||||
// Overwrite — latest snapshot in the burst wins
|
||||
pendingTool = {
|
||||
content: m.content,
|
||||
id: m.id || `ts-${i}`,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
if (m.content?.trim()) {
|
||||
flushTool();
|
||||
rows.push({
|
||||
kind: "text",
|
||||
msg: m,
|
||||
key: m.id || `txt-${i}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
flushTool();
|
||||
|
||||
return rows.map((row) => {
|
||||
if (row.kind === "tools") {
|
||||
// ToolActivityRow groups by tool name (×N), shows
|
||||
// running pills (spinner) before done pills (check),
|
||||
// and uses the per-tool color hash that matches
|
||||
// the rest of the chat.
|
||||
return (
|
||||
<div key={row.key} className="-ml-10">
|
||||
<ToolActivityRow content={row.content} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div
|
||||
key={row.key}
|
||||
className="text-sm leading-relaxed"
|
||||
>
|
||||
<MarkdownContent content={row.msg.content} />
|
||||
</div>
|
||||
);
|
||||
});
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
(prev, next) =>
|
||||
prev.runId === next.runId &&
|
||||
prev.label === next.label &&
|
||||
prev.group.messages.length === next.group.messages.length &&
|
||||
prev.group.messages[prev.group.messages.length - 1]?.content ===
|
||||
next.group.messages[next.group.messages.length - 1]?.content
|
||||
);
|
||||
|
||||
export default WorkerRunBubble;
|
||||
@@ -0,0 +1,31 @@
|
||||
import { createContext, useContext, useCallback, type ReactNode } from "react";
|
||||
|
||||
interface QueenProfileContextValue {
|
||||
openQueenProfile: (queenId: string) => void;
|
||||
}
|
||||
|
||||
const QueenProfileContext = createContext<QueenProfileContextValue | null>(null);
|
||||
|
||||
export function QueenProfileProvider({
|
||||
onOpen,
|
||||
children,
|
||||
}: {
|
||||
onOpen: (queenId: string) => void;
|
||||
children: ReactNode;
|
||||
}) {
|
||||
const openQueenProfile = useCallback(
|
||||
(queenId: string) => onOpen(queenId),
|
||||
[onOpen],
|
||||
);
|
||||
return (
|
||||
<QueenProfileContext.Provider value={{ openQueenProfile }}>
|
||||
{children}
|
||||
</QueenProfileContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useQueenProfile() {
|
||||
const ctx = useContext(QueenProfileContext);
|
||||
if (!ctx) throw new Error("useQueenProfile must be used within QueenProfileProvider");
|
||||
return ctx;
|
||||
}
|
||||
@@ -1,10 +1,11 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { useEffect, useState, useCallback } from "react";
|
||||
import { Outlet, useLocation } from "react-router-dom";
|
||||
import Sidebar from "@/components/Sidebar";
|
||||
import AppHeader from "@/components/AppHeader";
|
||||
import QueenProfilePanel from "@/components/QueenProfilePanel";
|
||||
import { ColonyProvider, useColony } from "@/context/ColonyContext";
|
||||
import { HeaderActionsProvider } from "@/context/HeaderActionsContext";
|
||||
import { QueenProfileProvider } from "@/context/QueenProfileContext";
|
||||
|
||||
export default function AppLayout() {
|
||||
return (
|
||||
@@ -27,26 +28,33 @@ function AppLayoutInner() {
|
||||
setOpenQueenId(null);
|
||||
}, [location.pathname]);
|
||||
|
||||
const handleOpenQueenProfile = useCallback(
|
||||
(queenId: string) => setOpenQueenId((prev) => (prev === queenId ? null : queenId)),
|
||||
[],
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex h-screen bg-background overflow-hidden">
|
||||
<Sidebar />
|
||||
<div className="flex-1 min-w-0 flex flex-col">
|
||||
<AppHeader onOpenQueenProfile={setOpenQueenId} />
|
||||
<div className="flex-1 min-h-0 flex">
|
||||
<main className="flex-1 min-w-0 flex flex-col">
|
||||
<Outlet />
|
||||
</main>
|
||||
{openQueenId && (
|
||||
<QueenProfilePanel
|
||||
queenId={openQueenId}
|
||||
colonies={colonies.filter(
|
||||
(c) => c.queenProfileId === openQueenId,
|
||||
)}
|
||||
onClose={() => setOpenQueenId(null)}
|
||||
/>
|
||||
)}
|
||||
<QueenProfileProvider onOpen={handleOpenQueenProfile}>
|
||||
<div className="flex h-screen bg-background overflow-hidden">
|
||||
<Sidebar />
|
||||
<div className="flex-1 min-w-0 flex flex-col">
|
||||
<AppHeader onOpenQueenProfile={handleOpenQueenProfile} />
|
||||
<div className="flex-1 min-h-0 flex">
|
||||
<main className="flex-1 min-w-0 flex flex-col">
|
||||
<Outlet />
|
||||
</main>
|
||||
{openQueenId && (
|
||||
<QueenProfilePanel
|
||||
queenId={openQueenId}
|
||||
colonies={colonies.filter(
|
||||
(c) => c.queenProfileId === openQueenId,
|
||||
)}
|
||||
onClose={() => setOpenQueenId(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</QueenProfileProvider>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -119,6 +119,7 @@ export function sseEventToChatMessage(
|
||||
createdAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -138,6 +139,7 @@ export function sseEventToChatMessage(
|
||||
type: "user",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -158,6 +160,7 @@ export function sseEventToChatMessage(
|
||||
createdAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -172,6 +175,7 @@ export function sseEventToChatMessage(
|
||||
type: "system",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -186,6 +190,7 @@ export function sseEventToChatMessage(
|
||||
type: "system",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -194,6 +199,188 @@ export function sseEventToChatMessage(
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stateful event replay — produces tool_status pills + regular messages
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* State maintained while replaying an event stream. Tracks per-stream turn
|
||||
* counters, the set of active tool calls (so tool_status pill content
|
||||
* reflects "tool A done, tool B running" correctly), and a tool_use_id →
|
||||
* pill_msg_id map so deferred `tool_call_completed` events can find the
|
||||
* pill they belong to after the turn counter moves on.
|
||||
*/
|
||||
export interface ReplayState {
|
||||
turnCounters: Record<string, number>;
|
||||
activeToolCalls: Record<
|
||||
string,
|
||||
{ name: string; done: boolean; streamId: string }
|
||||
>;
|
||||
toolUseToPill: Record<string, { msgId: string; name: string }>;
|
||||
}
|
||||
|
||||
export function newReplayState(): ReplayState {
|
||||
return { turnCounters: {}, activeToolCalls: {}, toolUseToPill: {} };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single event and emit zero or more ChatMessage upserts.
|
||||
*
|
||||
* Why this exists: `sseEventToChatMessage` is stateless — one event in, at
|
||||
* most one message out. But the chat's tool_status pill is a SYNTHESIZED
|
||||
* message: each tool_call_started adds to an accumulating pill, and each
|
||||
* tool_call_completed flips one of its tools from running to done. Live
|
||||
* SSE handlers in colony-chat and queen-dm already do this synthesis
|
||||
* against React refs. Cold-restore from events.jsonl used to skip
|
||||
* tool_call_* events entirely, so refreshed sessions looked completely
|
||||
* different from live ones — no tool activity visible, just prose.
|
||||
*
|
||||
* This function centralizes the synthesis so cold-restore and live paths
|
||||
* can use the exact same state machine. The caller treats the returned
|
||||
* messages as upserts (by id) — a later event in the same replay may
|
||||
* emit the same pill id with updated content, which should REPLACE the
|
||||
* earlier row in the caller's message list.
|
||||
*/
|
||||
export function replayEvent(
|
||||
state: ReplayState,
|
||||
event: AgentEvent,
|
||||
thread: string,
|
||||
agentDisplayName: string | undefined,
|
||||
): ChatMessage[] {
|
||||
const streamId = event.stream_id;
|
||||
const isQueen = streamId === "queen";
|
||||
const role: "queen" | "worker" = isQueen ? "queen" : "worker";
|
||||
const turnKey = streamId;
|
||||
const currentTurn = state.turnCounters[turnKey] ?? 0;
|
||||
const eventCreatedAt = event.timestamp
|
||||
? new Date(event.timestamp).getTime()
|
||||
: Date.now();
|
||||
|
||||
const out: ChatMessage[] = [];
|
||||
|
||||
// Update state machine BEFORE the generic converter runs so the
|
||||
// regular message emitted for this event sees the post-update
|
||||
// counter (matches live handler ordering at colony-chat.tsx:525).
|
||||
switch (event.type) {
|
||||
case "execution_started":
|
||||
state.turnCounters[turnKey] = currentTurn + 1;
|
||||
// New execution for a worker resets its active tools, mirroring
|
||||
// the live handler's setAgentState at colony-chat.tsx:566.
|
||||
if (!isQueen) {
|
||||
const keepActive: typeof state.activeToolCalls = {};
|
||||
for (const [k, v] of Object.entries(state.activeToolCalls)) {
|
||||
if (v.streamId !== streamId) keepActive[k] = v;
|
||||
}
|
||||
state.activeToolCalls = keepActive;
|
||||
}
|
||||
break;
|
||||
case "llm_turn_complete":
|
||||
state.turnCounters[turnKey] = currentTurn + 1;
|
||||
break;
|
||||
case "tool_call_started": {
|
||||
if (!event.node_id) break;
|
||||
const toolName = (event.data?.tool_name as string) || "unknown";
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
state.activeToolCalls[toolUseId] = {
|
||||
name: toolName,
|
||||
done: false,
|
||||
streamId,
|
||||
};
|
||||
const pillId = `tool-pill-${streamId}-${event.execution_id || "exec"}-${currentTurn}`;
|
||||
if (toolUseId) {
|
||||
state.toolUseToPill[toolUseId] = { msgId: pillId, name: toolName };
|
||||
}
|
||||
const tools = Object.values(state.activeToolCalls)
|
||||
.filter((t) => t.streamId === streamId)
|
||||
.map((t) => ({ name: t.name, done: t.done }));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
out.push({
|
||||
id: pillId,
|
||||
agent: agentDisplayName || event.node_id || "Agent",
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role,
|
||||
thread,
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: streamId || undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case "tool_call_completed": {
|
||||
if (!event.node_id) break;
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
const tracked = state.toolUseToPill[toolUseId];
|
||||
if (toolUseId) delete state.toolUseToPill[toolUseId];
|
||||
if (toolUseId && state.activeToolCalls[toolUseId]) {
|
||||
state.activeToolCalls[toolUseId].done = true;
|
||||
}
|
||||
if (!tracked) break;
|
||||
const tools = Object.values(state.activeToolCalls)
|
||||
.filter((t) => t.streamId === streamId)
|
||||
.map((t) => ({ name: t.name, done: t.done }));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
// Re-emit the SAME pill id with updated content. Caller upserts
|
||||
// by id, so this replaces the row from tool_call_started.
|
||||
out.push({
|
||||
id: tracked.msgId,
|
||||
agent: agentDisplayName || event.node_id || "Agent",
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role,
|
||||
thread,
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: streamId || undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Regular stateless conversion (prose, user input, system notes).
|
||||
const msg = sseEventToChatMessage(
|
||||
event,
|
||||
thread,
|
||||
agentDisplayName,
|
||||
state.turnCounters[turnKey] ?? 0,
|
||||
);
|
||||
if (msg) {
|
||||
if (isQueen) msg.role = "queen";
|
||||
out.push(msg);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replay an entire event array and return a deduplicated, chronologically
|
||||
* sorted ChatMessage list. Used by cold-restore paths so refreshed
|
||||
* sessions match the live stream exactly.
|
||||
*/
|
||||
export function replayEventsToMessages(
|
||||
events: AgentEvent[],
|
||||
thread: string,
|
||||
agentDisplayName: string | undefined,
|
||||
): ChatMessage[] {
|
||||
const state = newReplayState();
|
||||
// Upsert by id — later emissions for the same pill replace earlier ones.
|
||||
const byId = new Map<string, ChatMessage>();
|
||||
for (const evt of events) {
|
||||
for (const m of replayEvent(state, evt, thread, agentDisplayName)) {
|
||||
byId.set(m.id, m);
|
||||
}
|
||||
}
|
||||
return Array.from(byId.values()).sort(
|
||||
(a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0),
|
||||
);
|
||||
}
|
||||
|
||||
type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
|
||||
const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);
|
||||
|
||||
|
||||
@@ -13,7 +13,11 @@ import { executionApi } from "@/api/execution";
|
||||
import { sessionsApi } from "@/api/sessions";
|
||||
import { useMultiSSE } from "@/hooks/use-sse";
|
||||
import type { LiveSession, AgentEvent } from "@/api/types";
|
||||
import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
|
||||
import {
|
||||
sseEventToChatMessage,
|
||||
formatAgentDisplayName,
|
||||
replayEventsToMessages,
|
||||
} from "@/lib/chat-helpers";
|
||||
import { cronToLabel } from "@/lib/graphUtils";
|
||||
import { ApiError } from "@/api/client";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
@@ -41,6 +45,8 @@ function truncate(s: string, max: number): string {
|
||||
type SessionRestoreResult = {
|
||||
messages: ChatMessage[];
|
||||
restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
|
||||
truncated: boolean;
|
||||
droppedCount: number;
|
||||
};
|
||||
|
||||
async function restoreSessionMessages(
|
||||
@@ -49,34 +55,67 @@ async function restoreSessionMessages(
|
||||
agentDisplayName: string,
|
||||
): Promise<SessionRestoreResult> {
|
||||
try {
|
||||
const { events } = await sessionsApi.eventsHistory(sessionId);
|
||||
const { events, truncated, total, returned } =
|
||||
await sessionsApi.eventsHistory(sessionId);
|
||||
if (events.length > 0) {
|
||||
const messages: ChatMessage[] = [];
|
||||
// Walk events twice:
|
||||
// 1. Extract the trailing queen phase (unchanged logic).
|
||||
// 2. Run the full state-machine replay so tool_status pills
|
||||
// are synthesized just like the live SSE handler does.
|
||||
// Without (2), refreshed sessions showed zero tool activity
|
||||
// because tool_call_started/completed events are ignored by
|
||||
// the stateless converter.
|
||||
let runningPhase: ChatMessage["phase"] = undefined;
|
||||
for (const evt of events) {
|
||||
const p =
|
||||
evt.type === "queen_phase_changed"
|
||||
? (evt.data?.phase as string)
|
||||
: evt.type === "node_loop_iteration"
|
||||
? (evt.data?.phase as string | undefined)
|
||||
: undefined;
|
||||
? (evt.data?.phase as string | undefined)
|
||||
: undefined;
|
||||
if (p && ["planning", "building", "staging", "running"].includes(p)) {
|
||||
runningPhase = p as ChatMessage["phase"];
|
||||
}
|
||||
const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
|
||||
if (!msg) continue;
|
||||
if (evt.stream_id === "queen") {
|
||||
msg.role = "queen";
|
||||
msg.phase = runningPhase;
|
||||
}
|
||||
messages.push(msg);
|
||||
}
|
||||
return { messages, restoredPhase: runningPhase ?? null };
|
||||
|
||||
const messages = replayEventsToMessages(events, thread, agentDisplayName);
|
||||
// Stamp the latest phase on every queen message so the UI's
|
||||
// phase-badge rendering matches what the live path would have
|
||||
// displayed at the time of the refresh.
|
||||
if (runningPhase) {
|
||||
for (const m of messages) {
|
||||
if (m.role === "queen") m.phase = runningPhase;
|
||||
}
|
||||
}
|
||||
|
||||
// Prepend a run_divider banner when the server truncated older
|
||||
// events so the user knows how many are hidden.
|
||||
const droppedCount = Math.max(0, total - returned);
|
||||
if (truncated && droppedCount > 0) {
|
||||
const firstTs = events[0]?.timestamp;
|
||||
const bannerCreatedAt = firstTs ? new Date(firstTs).getTime() - 1 : 0;
|
||||
messages.unshift({
|
||||
id: `restore-truncated-${sessionId}`,
|
||||
agent: "System",
|
||||
agentColor: "",
|
||||
type: "run_divider",
|
||||
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
|
||||
timestamp: firstTs ?? new Date().toISOString(),
|
||||
thread,
|
||||
createdAt: bannerCreatedAt,
|
||||
});
|
||||
}
|
||||
return {
|
||||
messages,
|
||||
restoredPhase: runningPhase ?? null,
|
||||
truncated,
|
||||
droppedCount,
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Event log not available
|
||||
}
|
||||
return { messages: [], restoredPhase: null };
|
||||
return { messages: [], restoredPhase: null, truncated: false, droppedCount: 0 };
|
||||
}
|
||||
|
||||
// ── Agent backend state ──────────────────────────────────────────────────────
|
||||
@@ -816,6 +855,7 @@ export default function ColonyChat() {
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: sid || undefined,
|
||||
});
|
||||
return { ...prev, isStreaming: false, activeToolCalls: newActive };
|
||||
});
|
||||
|
||||
@@ -11,7 +11,10 @@ import { sessionsApi } from "@/api/sessions";
|
||||
import { queensApi } from "@/api/queens";
|
||||
import { useMultiSSE } from "@/hooks/use-sse";
|
||||
import type { AgentEvent, HistorySession } from "@/api/types";
|
||||
import { sseEventToChatMessage } from "@/lib/chat-helpers";
|
||||
import {
|
||||
sseEventToChatMessage,
|
||||
replayEventsToMessages,
|
||||
} from "@/lib/chat-helpers";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useHeaderActions } from "@/context/HeaderActionsContext";
|
||||
import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
|
||||
@@ -90,17 +93,34 @@ export default function QueenDM() {
|
||||
const restoreMessages = useCallback(
|
||||
async (sid: string, cancelled: () => boolean) => {
|
||||
try {
|
||||
const { events } = await sessionsApi.eventsHistory(sid);
|
||||
const { events, truncated, total, returned } =
|
||||
await sessionsApi.eventsHistory(sid);
|
||||
if (cancelled()) return;
|
||||
const restored: ChatMessage[] = [];
|
||||
for (const evt of events) {
|
||||
const msg = sseEventToChatMessage(evt, "queen-dm", queenName);
|
||||
if (!msg) continue;
|
||||
if (evt.stream_id === "queen") msg.role = "queen";
|
||||
restored.push(msg);
|
||||
|
||||
// Use the stateful replay so tool_status pills are synthesized
|
||||
// the same way the live SSE handler does — without this the
|
||||
// refreshed queen DM shows zero tool activity.
|
||||
const restored = replayEventsToMessages(events, "queen-dm", queenName);
|
||||
|
||||
// Show a banner if the server truncated older events.
|
||||
const droppedCount = Math.max(0, total - returned);
|
||||
if (truncated && droppedCount > 0) {
|
||||
const firstTs = events[0]?.timestamp;
|
||||
const bannerCreatedAt = firstTs
|
||||
? new Date(firstTs).getTime() - 1
|
||||
: 0;
|
||||
restored.unshift({
|
||||
id: `restore-truncated-${sid}`,
|
||||
agent: "System",
|
||||
agentColor: "",
|
||||
type: "run_divider",
|
||||
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
|
||||
timestamp: firstTs ?? new Date().toISOString(),
|
||||
thread: "queen-dm",
|
||||
createdAt: bannerCreatedAt,
|
||||
});
|
||||
}
|
||||
if (restored.length > 0 && !cancelled()) {
|
||||
restored.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
|
||||
setMessages(restored);
|
||||
// Only clear typing if the history contains a completed execution;
|
||||
// during bootstrap the queen is still processing.
|
||||
@@ -601,6 +621,7 @@ export default function QueenDM() {
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: sid || undefined,
|
||||
};
|
||||
setMessages((prevMsgs) => {
|
||||
const idx = prevMsgs.findIndex((m) => m.id === msgId);
|
||||
|
||||
@@ -72,13 +72,28 @@ def patched_fork(monkeypatch):
|
||||
"""Stub out fork_session_into_colony so we don't need a real queen."""
|
||||
calls: list[dict] = []
|
||||
|
||||
async def _stub_fork(*, session: Any, colony_name: str, task: str) -> dict:
|
||||
calls.append({"session": session, "colony_name": colony_name, "task": task})
|
||||
async def _stub_fork(
|
||||
*,
|
||||
session: Any,
|
||||
colony_name: str,
|
||||
task: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> dict:
|
||||
calls.append(
|
||||
{
|
||||
"session": session,
|
||||
"colony_name": colony_name,
|
||||
"task": task,
|
||||
"tasks": tasks,
|
||||
}
|
||||
)
|
||||
return {
|
||||
"colony_path": f"/tmp/fake_colonies/{colony_name}",
|
||||
"colony_name": colony_name,
|
||||
"queen_session_id": "session_fake_fork_id",
|
||||
"is_new": True,
|
||||
"db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
|
||||
"task_ids": [],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
|
||||
@@ -17,10 +17,10 @@ _DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "sk
|
||||
|
||||
|
||||
class TestDefaultSkillFiles:
|
||||
"""Verify all 7 built-in SKILL.md files parse correctly."""
|
||||
"""Verify all built-in SKILL.md files parse correctly."""
|
||||
|
||||
def test_all_seven_skills_exist(self):
|
||||
assert len(SKILL_REGISTRY) == 7
|
||||
def test_all_skills_exist(self):
|
||||
assert len(SKILL_REGISTRY) == 6
|
||||
|
||||
@pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items()))
|
||||
def test_skill_parses(self, skill_name, dir_name):
|
||||
@@ -35,7 +35,13 @@ class TestDefaultSkillFiles:
|
||||
assert parsed.source_scope == "framework"
|
||||
|
||||
def test_combined_token_budget(self):
|
||||
"""All default skill bodies combined should be under 3000 tokens (~12000 chars)."""
|
||||
"""All default skill bodies combined should stay within the protocols budget.
|
||||
|
||||
Ceiling is 5000 tokens (~20000 chars): the prompt-injection path
|
||||
appends every registered skill body to the system prompt, so
|
||||
uncontrolled growth would balloon every LLM call. 5000 gives
|
||||
headroom over today's ~3500 while still catching obvious bloat.
|
||||
"""
|
||||
total_chars = 0
|
||||
for dir_name in SKILL_REGISTRY.values():
|
||||
path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
|
||||
@@ -44,9 +50,9 @@ class TestDefaultSkillFiles:
|
||||
total_chars += len(parsed.body)
|
||||
|
||||
approx_tokens = total_chars // 4
|
||||
assert approx_tokens < 3000, (
|
||||
assert approx_tokens < 5000, (
|
||||
f"Combined default skill bodies are ~{approx_tokens} tokens "
|
||||
f"({total_chars} chars), exceeding the 3000 token budget"
|
||||
f"({total_chars} chars), exceeding the 5000 token budget"
|
||||
)
|
||||
|
||||
def test_data_buffer_keys_all_prefixed(self):
|
||||
@@ -60,7 +66,7 @@ class TestDefaultSkillManager:
|
||||
manager = DefaultSkillManager()
|
||||
manager.load()
|
||||
|
||||
assert len(manager.active_skill_names) == 7
|
||||
assert len(manager.active_skill_names) == len(SKILL_REGISTRY)
|
||||
for name in SKILL_REGISTRY:
|
||||
assert name in manager.active_skill_names
|
||||
|
||||
@@ -97,7 +103,7 @@ class TestDefaultSkillManager:
|
||||
manager.load()
|
||||
|
||||
assert "hive.quality-monitor" not in manager.active_skill_names
|
||||
assert len(manager.active_skill_names) == 6
|
||||
assert len(manager.active_skill_names) == len(SKILL_REGISTRY) - 1
|
||||
|
||||
def test_disable_all_via_convention(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"_all": {"enabled": False}})
|
||||
@@ -136,7 +142,7 @@ class TestSkillsConfig:
|
||||
def test_explicit_disable(self):
|
||||
config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)})
|
||||
assert config.is_default_enabled("hive.note-taking") is False
|
||||
assert config.is_default_enabled("hive.batch-ledger") is True
|
||||
assert config.is_default_enabled("hive.quality-monitor") is True
|
||||
|
||||
def test_all_disabled_flag(self):
|
||||
config = SkillsConfig(all_defaults_disabled=True)
|
||||
@@ -166,11 +172,11 @@ class TestSkillsConfig:
|
||||
def test_get_default_overrides(self):
|
||||
config = SkillsConfig.from_agent_vars(
|
||||
default_skills={
|
||||
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
|
||||
"hive.quality-monitor": {"enabled": True, "assessment_interval": 10},
|
||||
}
|
||||
)
|
||||
overrides = config.get_default_overrides("hive.batch-ledger")
|
||||
assert overrides == {"checkpoint_every_n": 10}
|
||||
overrides = config.get_default_overrides("hive.quality-monitor")
|
||||
assert overrides == {"assessment_interval": 10}
|
||||
|
||||
def test_get_default_overrides_empty(self):
|
||||
config = SkillsConfig()
|
||||
@@ -244,40 +250,20 @@ class TestConfigOverrideSubstitution:
|
||||
assert "{{" not in cleaned
|
||||
|
||||
|
||||
class TestBatchAutoDetection:
|
||||
"""DS-12: is_batch_scenario() and batch_init_nudge property."""
|
||||
class TestBatchDeprecatedNoOps:
|
||||
"""batch-ledger skill was removed; is_batch_scenario() and batch_init_nudge
|
||||
are deprecated no-ops that return False / None unconditionally. They are
|
||||
kept in-tree to avoid touching every orchestrator/execution_manager call
|
||||
site that still reads the nudge through the config plumbing."""
|
||||
|
||||
def test_detects_list_of(self):
|
||||
assert is_batch_scenario("process a list of 100 leads") is True
|
||||
def test_is_batch_scenario_always_false(self):
|
||||
assert is_batch_scenario("process a list of 100 leads") is False
|
||||
assert is_batch_scenario("for each record, send an email") is False
|
||||
assert is_batch_scenario("write a summary") is False
|
||||
|
||||
def test_detects_collection_of(self):
|
||||
assert is_batch_scenario("a collection of invoices") is True
|
||||
|
||||
def test_detects_items(self):
|
||||
assert is_batch_scenario("go through all items in the spreadsheet") is True
|
||||
|
||||
def test_detects_for_each(self):
|
||||
assert is_batch_scenario("for each record, send an email") is True
|
||||
|
||||
def test_no_match_single_task(self):
|
||||
assert is_batch_scenario("write a summary of the quarterly report") is False
|
||||
|
||||
def test_batch_nudge_active_by_default(self):
|
||||
def test_batch_init_nudge_always_none(self):
|
||||
manager = DefaultSkillManager()
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is not None
|
||||
assert "_batch_ledger" in manager.batch_init_nudge
|
||||
|
||||
def test_batch_nudge_none_when_skill_disabled(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}})
|
||||
manager = DefaultSkillManager(config)
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is None
|
||||
|
||||
def test_batch_nudge_none_when_auto_detect_disabled(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}})
|
||||
manager = DefaultSkillManager(config)
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is None
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,590 @@
|
||||
"""Tests for framework.host.progress_db — per-colony task queue."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.host.progress_db import (
|
||||
SCHEMA_VERSION,
|
||||
ensure_all_colony_dbs,
|
||||
ensure_progress_db,
|
||||
enqueue_task,
|
||||
reclaim_stale,
|
||||
seed_tasks,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Schema / init
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ensure_progress_db_fresh(tmp_path: Path) -> None:
|
||||
colony = tmp_path / "c"
|
||||
db_path = ensure_progress_db(colony)
|
||||
assert db_path.exists()
|
||||
assert db_path.name == "progress.db"
|
||||
assert db_path.parent.name == "data"
|
||||
|
||||
con = sqlite3.connect(str(db_path))
|
||||
try:
|
||||
assert con.execute("PRAGMA journal_mode").fetchone()[0].lower() == "wal"
|
||||
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
|
||||
tables = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='table'")}
|
||||
assert {"tasks", "steps", "sop_checklist", "colony_meta"}.issubset(tables)
|
||||
|
||||
indexes = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='index'")}
|
||||
# Named indexes we declared
|
||||
assert "idx_tasks_claimable" in indexes
|
||||
assert "idx_steps_task_seq" in indexes
|
||||
assert "idx_sop_required_open" in indexes
|
||||
assert "idx_tasks_status" in indexes
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_ensure_progress_db_idempotent(tmp_path: Path) -> None:
|
||||
colony = tmp_path / "c"
|
||||
p1 = ensure_progress_db(colony)
|
||||
p2 = ensure_progress_db(colony)
|
||||
assert p1 == p2
|
||||
con = sqlite3.connect(str(p1))
|
||||
try:
|
||||
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_ensure_all_colony_dbs_backfill(tmp_path: Path) -> None:
|
||||
colonies_root = tmp_path / "colonies"
|
||||
(colonies_root / "alpha").mkdir(parents=True)
|
||||
(colonies_root / "beta").mkdir(parents=True)
|
||||
(colonies_root / "gamma_not_dir").touch() # should be ignored
|
||||
|
||||
initialized = ensure_all_colony_dbs(colonies_root)
|
||||
names = {p.parent.parent.name for p in initialized}
|
||||
assert names == {"alpha", "beta"}
|
||||
for p in initialized:
|
||||
assert p.exists()
|
||||
|
||||
|
||||
def test_ensure_all_colony_dbs_missing_root(tmp_path: Path) -> None:
|
||||
missing = tmp_path / "nonexistent"
|
||||
assert ensure_all_colony_dbs(missing) == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Seeding / enqueue
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_seed_tasks_basic(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
ids = seed_tasks(
|
||||
db,
|
||||
[
|
||||
{
|
||||
"goal": "task one",
|
||||
"priority": 5,
|
||||
"payload": {"url": "https://example.com"},
|
||||
"steps": [
|
||||
{"title": "open page"},
|
||||
{"title": "extract data", "detail": "selector .content"},
|
||||
],
|
||||
"sop_items": [
|
||||
{"key": "captcha_handled", "description": "Verify no CAPTCHA blocks"},
|
||||
{"key": "soft_hint", "description": "optional", "required": False},
|
||||
],
|
||||
},
|
||||
{"goal": "task two"},
|
||||
],
|
||||
)
|
||||
assert len(ids) == 2
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
rows = list(con.execute("SELECT id, goal, priority, status, source, payload FROM tasks ORDER BY goal"))
|
||||
assert len(rows) == 2
|
||||
assert rows[0][1] == "task one"
|
||||
assert rows[0][2] == 5
|
||||
assert rows[0][3] == "pending"
|
||||
assert rows[0][4] == "queen_create"
|
||||
assert '"url"' in rows[0][5]
|
||||
|
||||
step_count = con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (ids[0],)
|
||||
).fetchone()[0]
|
||||
assert step_count == 2
|
||||
|
||||
sop_rows = list(con.execute(
|
||||
"SELECT key, required FROM sop_checklist WHERE task_id=? ORDER BY key", (ids[0],)
|
||||
))
|
||||
assert sop_rows == [("captcha_handled", 1), ("soft_hint", 0)]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_seed_tasks_rejects_missing_goal(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
with pytest.raises(ValueError):
|
||||
seed_tasks(db, [{"priority": 1}])
|
||||
|
||||
|
||||
def test_seed_tasks_empty_is_noop(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
assert seed_tasks(db, []) == []
|
||||
|
||||
|
||||
def test_seed_tasks_rollback_on_partial_failure(tmp_path: Path) -> None:
|
||||
"""A bad row mid-batch must roll back the whole transaction."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
with pytest.raises(ValueError):
|
||||
seed_tasks(
|
||||
db,
|
||||
[
|
||||
{"goal": "good one"},
|
||||
{"priority": 1}, # missing goal -> boom
|
||||
{"goal": "never inserted"},
|
||||
],
|
||||
)
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
count = con.execute("SELECT count(*) FROM tasks").fetchone()[0]
|
||||
assert count == 0
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_enqueue_task(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tid = enqueue_task(
|
||||
db,
|
||||
"appended",
|
||||
steps=[{"title": "s1"}],
|
||||
sop_items=[{"key": "k", "description": "d"}],
|
||||
priority=3,
|
||||
)
|
||||
assert tid
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT goal, priority, source FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row == ("appended", 3, "enqueue_tool")
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 1
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_enqueue_task_custom_source(tmp_path: Path) -> None:
|
||||
"""enqueue_task must accept a custom source value (e.g. run_agent_with_input).
|
||||
|
||||
Phase 2 wiring adds source values: create_colony_auto,
|
||||
run_agent_with_input, run_parallel_workers. Verify the source
|
||||
column stores them verbatim.
|
||||
"""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tid = enqueue_task(db, "chat task", source="run_agent_with_input")
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute("SELECT goal, source FROM tasks WHERE id=?", (tid,)).fetchone()
|
||||
assert row == ("chat task", "run_agent_with_input")
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_by_assigned_id(tmp_path: Path) -> None:
|
||||
"""Worker protocol: claim a specific row by id (not the generic next-pending).
|
||||
|
||||
The Phase 2 fix threads ``task_id`` into ``input_data`` when the
|
||||
queen pre-assigns a row. The worker must be able to claim THAT
|
||||
row atomically with an ``UPDATE ... WHERE id=? AND status='pending'``
|
||||
pattern, and a second claim on the same id must return 0 rows.
|
||||
"""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "pinned task"}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None, timeout=5.0)
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
UPDATE tasks SET status='claimed', worker_id=?,
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'),
|
||||
updated_at=datetime('now')
|
||||
WHERE id=? AND status='pending'
|
||||
RETURNING id, goal
|
||||
""",
|
||||
("w1", tid),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
assert row == (tid, "pinned task"), f"expected one claim, got {row}"
|
||||
|
||||
# Second attempt on the same id must affect zero rows.
|
||||
cur2 = con.execute(
|
||||
"""
|
||||
UPDATE tasks SET status='claimed', worker_id=?,
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now')
|
||||
WHERE id=? AND status='pending'
|
||||
RETURNING id
|
||||
""",
|
||||
("w2", tid),
|
||||
)
|
||||
assert cur2.fetchone() is None, "second claim should affect zero rows"
|
||||
|
||||
# Ensure worker_id on the row is still the first claimant.
|
||||
owner = con.execute(
|
||||
"SELECT worker_id, status FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert owner == ("w1", "claimed")
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_by_id_does_not_steal_unrelated_rows(tmp_path: Path) -> None:
|
||||
"""Claim-by-id must only touch the named row, not siblings."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
ids = seed_tasks(db, [{"goal": "a"}, {"goal": "b"}, {"goal": "c"}])
|
||||
target = ids[1]
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now') WHERE id=? AND status='pending'",
|
||||
(target,),
|
||||
)
|
||||
statuses = dict(con.execute("SELECT goal, status FROM tasks").fetchall())
|
||||
assert statuses == {"a": "pending", "b": "claimed", "c": "pending"}
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_seed_tasks_bulk_10k(tmp_path: Path) -> None:
|
||||
"""10k rows in one transaction should finish under a second on local disk."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tasks = [{"goal": f"task {i}", "seq": i} for i in range(10_000)]
|
||||
start = time.perf_counter()
|
||||
ids = seed_tasks(db, tasks)
|
||||
elapsed = time.perf_counter() - start
|
||||
assert len(ids) == 10_000
|
||||
# Generous ceiling — on CI with slow disk we've seen ~300ms.
|
||||
assert elapsed < 3.0, f"bulk seed too slow: {elapsed:.2f}s"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Atomic claim under concurrency
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
_CLAIM_SQL = """
|
||||
BEGIN IMMEDIATE;
|
||||
UPDATE tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = ?,
|
||||
claim_token = lower(hex(randomblob(8))),
|
||||
claimed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE id = (
|
||||
SELECT id FROM tasks
|
||||
WHERE status = 'pending'
|
||||
ORDER BY priority DESC, seq, created_at
|
||||
LIMIT 1
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def _claim_one(db_path: Path, worker_id: str) -> str | None:
|
||||
"""Atomic single-shot claim using RETURNING (SQLite 3.35+).
|
||||
|
||||
The skill teaches agents the BEGIN IMMEDIATE + subquery UPDATE
|
||||
pattern; for an in-process test helper we use RETURNING so the
|
||||
claimed row id is returned from the same statement (no racing
|
||||
follow-up SELECT). Functionally equivalent: both approaches rely
|
||||
on the atomic subquery-UPDATE.
|
||||
"""
|
||||
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=10.0)
|
||||
con.execute("PRAGMA busy_timeout = 10000")
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
UPDATE tasks
|
||||
SET status = 'claimed',
|
||||
worker_id = ?,
|
||||
claim_token = lower(hex(randomblob(8))),
|
||||
claimed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE id = (
|
||||
SELECT id FROM tasks
|
||||
WHERE status = 'pending'
|
||||
ORDER BY priority DESC, seq, created_at
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id
|
||||
""",
|
||||
(worker_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_atomicity_under_concurrency(tmp_path: Path) -> None:
|
||||
"""20 threads racing to drain 100 tasks — each task claimed exactly once."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
seed_tasks(db, [{"goal": f"task {i}", "seq": i} for i in range(100)])
|
||||
|
||||
claims: list[tuple[str, str]] = []
|
||||
claims_lock = threading.Lock()
|
||||
|
||||
def worker(worker_id: str) -> None:
|
||||
while True:
|
||||
tid = _claim_one(db, worker_id)
|
||||
if tid is None:
|
||||
return
|
||||
with claims_lock:
|
||||
claims.append((worker_id, tid))
|
||||
|
||||
threads = [threading.Thread(target=worker, args=(f"w{i}",)) for i in range(20)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=30)
|
||||
|
||||
task_ids = [tid for _, tid in claims]
|
||||
assert len(task_ids) == 100, f"expected 100 claims, got {len(task_ids)}"
|
||||
assert len(set(task_ids)) == 100, "duplicate claims detected"
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
remaining = con.execute(
|
||||
"SELECT count(*) FROM tasks WHERE status='pending'"
|
||||
).fetchone()[0]
|
||||
assert remaining == 0
|
||||
claimed = con.execute(
|
||||
"SELECT count(*) FROM tasks WHERE status='claimed'"
|
||||
).fetchone()[0]
|
||||
assert claimed == 100
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Stale-claim reclaimer
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_reclaim_stale_returns_to_pending(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "stuck"}])
|
||||
|
||||
# Simulate a claim made 20 minutes ago.
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaimed = reclaim_stale(db, stale_after_minutes=15)
|
||||
assert reclaimed == 1
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT status, worker_id, retry_count FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row == ("pending", None, 1)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_reclaim_stale_fails_after_max_retries(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "doomed", "max_retries": 2}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', retry_count=2, "
|
||||
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaim_stale(db, stale_after_minutes=15)
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT status, last_error FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row[0] == "failed"
|
||||
assert row[1] is not None and "max_retries" in row[1]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_reclaim_stale_ignores_fresh_claims(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "working"}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaimed = reclaim_stale(db, stale_after_minutes=15)
|
||||
assert reclaimed == 0
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Foreign key cascade
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Worker config patching for pre-existing colonies
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_worker_cfg(path: Path, *, with_input_data: dict | None = None) -> None:
|
||||
"""Write a minimal worker.json that matches the shape ensure_progress_db patches."""
|
||||
import json as _json
|
||||
|
||||
cfg = {
|
||||
"name": "worker",
|
||||
"system_prompt": "You are a worker.",
|
||||
"goal": {"description": "do stuff", "success_criteria": [], "constraints": []},
|
||||
"tools": [],
|
||||
}
|
||||
if with_input_data is not None:
|
||||
cfg["input_data"] = with_input_data
|
||||
path.write_text(_json.dumps(cfg, indent=2))
|
||||
|
||||
|
||||
def test_ensure_progress_db_patches_existing_worker_json(tmp_path: Path) -> None:
|
||||
"""Pre-existing worker.json without input_data gets db_path injected."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "legacy_colony"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
# Before: no input_data
|
||||
before = _json.loads((colony / "worker.json").read_text())
|
||||
assert "input_data" not in before
|
||||
|
||||
db = ensure_progress_db(colony)
|
||||
|
||||
after = _json.loads((colony / "worker.json").read_text())
|
||||
assert after["input_data"]["db_path"] == str(db)
|
||||
assert after["input_data"]["colony_id"] == "legacy_colony"
|
||||
# Other fields untouched
|
||||
assert after["system_prompt"] == "You are a worker."
|
||||
assert after["goal"]["description"] == "do stuff"
|
||||
|
||||
|
||||
def test_ensure_progress_db_patch_is_idempotent(tmp_path: Path) -> None:
|
||||
"""Second call must not rewrite the file (mtime unchanged)."""
|
||||
import time as _time
|
||||
|
||||
colony = tmp_path / "idem"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
ensure_progress_db(colony)
|
||||
mtime1 = (colony / "worker.json").stat().st_mtime
|
||||
|
||||
_time.sleep(0.02) # ensure any rewrite would bump mtime
|
||||
ensure_progress_db(colony)
|
||||
mtime2 = (colony / "worker.json").stat().st_mtime
|
||||
|
||||
assert mtime1 == mtime2, "second ensure_progress_db must not rewrite worker.json"
|
||||
|
||||
|
||||
def test_ensure_progress_db_preserves_existing_input_data_keys(tmp_path: Path) -> None:
|
||||
"""Pre-existing input_data keys (other than db_path/colony_id) are preserved."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "preserved"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(
|
||||
colony / "worker.json",
|
||||
with_input_data={"custom_key": "hello", "db_path": "/stale/path.db"},
|
||||
)
|
||||
|
||||
db = ensure_progress_db(colony)
|
||||
after = _json.loads((colony / "worker.json").read_text())
|
||||
|
||||
assert after["input_data"]["custom_key"] == "hello"
|
||||
assert after["input_data"]["db_path"] == str(db)
|
||||
assert after["input_data"]["colony_id"] == "preserved"
|
||||
|
||||
|
||||
def test_ensure_progress_db_skips_metadata_and_triggers(tmp_path: Path) -> None:
|
||||
"""metadata.json and triggers.json are not worker configs — must not be touched."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "guarded"
|
||||
colony.mkdir()
|
||||
(colony / "metadata.json").write_text(_json.dumps({"colony_name": "guarded"}))
|
||||
(colony / "triggers.json").write_text(_json.dumps([{"id": "t1"}]))
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
ensure_progress_db(colony)
|
||||
|
||||
meta = _json.loads((colony / "metadata.json").read_text())
|
||||
trig = _json.loads((colony / "triggers.json").read_text())
|
||||
assert "input_data" not in meta
|
||||
assert trig == [{"id": "t1"}]
|
||||
|
||||
worker = _json.loads((colony / "worker.json").read_text())
|
||||
assert "input_data" in worker
|
||||
|
||||
|
||||
def test_task_delete_cascades_to_steps_and_sop(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(
|
||||
db,
|
||||
[
|
||||
{
|
||||
"goal": "cascade test",
|
||||
"steps": [{"title": "a"}, {"title": "b"}],
|
||||
"sop_items": [{"key": "k", "description": "d"}],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute("PRAGMA foreign_keys = ON")
|
||||
con.execute("DELETE FROM tasks WHERE id=?", (tid,))
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 0
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM sop_checklist WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 0
|
||||
finally:
|
||||
con.close()
|
||||
@@ -141,7 +141,7 @@ class TestSkillDiscovery:
|
||||
framework_skills = [s for s in skills if s.source_scope == "framework"]
|
||||
names = {s.name for s in framework_skills}
|
||||
assert "hive.note-taking" in names
|
||||
assert "hive.batch-ledger" in names
|
||||
assert "hive.colony-progress-tracker" in names
|
||||
|
||||
def test_max_depth_limit(self, tmp_path):
|
||||
# Create a skill nested beyond max_depth
|
||||
|
||||
@@ -271,6 +271,48 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for sqlite3 CLI (required for colony progress tracking)
|
||||
echo -n " Checking for sqlite3... "
|
||||
if command -v sqlite3 &> /dev/null; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}not found${NC}"
|
||||
# Attempt auto-install on common package managers
|
||||
SQLITE_INSTALLED=false
|
||||
if command -v apt-get &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via apt... "
|
||||
if sudo apt-get install -y sqlite3 > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v brew &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via brew... "
|
||||
if brew install sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v apk &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via apk... "
|
||||
if apk add sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v dnf &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via dnf... "
|
||||
if sudo dnf install -y sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v pacman &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via pacman... "
|
||||
if sudo pacman -S --noconfirm sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
fi
|
||||
if [ "$SQLITE_INSTALLED" = true ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
else
|
||||
echo -e "${YELLOW} ⚠ Could not install sqlite3 automatically${NC}"
|
||||
echo -e "${DIM} Install manually: apt install sqlite3 / brew install sqlite / apk add sqlite${NC}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for Chrome/Edge (required for GCU browser tools)
|
||||
echo -n " Checking for Chrome/Edge browser... "
|
||||
# Check common browser locations
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
---
|
||||
name: linkedin-connection-greeter
|
||||
description: Automates accepting LinkedIn connections and sending a welcome message about the HoneyComb prediction market. Handles shadow DOM and Lexical editors.
|
||||
---
|
||||
|
||||
# LinkedIn Connection Greeter
|
||||
|
||||
This skill outlines the exact flow to accept connection requests and send a specific welcome message without triggering spam filters.
|
||||
|
||||
## 1. Load Ledger
|
||||
Before starting, read `data/linkedin_contacts.json`. If it doesn't exist, initialize with `{"contacts": []}`. You will use this to skip people you've already messaged.
|
||||
|
||||
## 2. Scan Pending Connections
|
||||
Navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/`. Wait until load + sleep 4s.
|
||||
Strip unload handlers:
|
||||
`browser_evaluate("(function(){window.onbeforeunload=null;})()")`
|
||||
|
||||
Extract cards using this specific snippet (handles changing classes and follow invites):
|
||||
```javascript
|
||||
(function(){
|
||||
const btns = Array.from(document.querySelectorAll('button')).filter(b => b.textContent.includes('Accept'));
|
||||
let results = [];
|
||||
for (let b of btns) {
|
||||
let card = b.closest('[role="listitem"]');
|
||||
if (!card) continue;
|
||||
let text = card.textContent.toLowerCase();
|
||||
if (text.includes('invited you to follow') || text.includes('invited you to subscribe')) continue;
|
||||
|
||||
let nameEls = Array.from(card.querySelectorAll('a[href*="/in/"]'));
|
||||
let nameEl = nameEls.find(el => el.textContent.trim().length > 0);
|
||||
|
||||
let r = b.getBoundingClientRect();
|
||||
results.push({
|
||||
first_name: nameEl ? nameEl.textContent.trim().split(/\s+/)[0] : 'there',
|
||||
profile_url: nameEl ? nameEl.href : '',
|
||||
cx: r.x + r.width/2,
|
||||
cy: r.y + r.height/2
|
||||
});
|
||||
}
|
||||
return results;
|
||||
})();
|
||||
```
|
||||
|
||||
## 3. Process Each Card (Max 10 per run)
|
||||
For each card, check if `profile_url` is already in the ledger. If not:
|
||||
1. `browser_click_coordinate(cx, cy)` to click the specific Accept button.
|
||||
2. `sleep(2)`
|
||||
3. `browser_navigate(profile_url, wait_until="load")`
|
||||
4. `sleep(4)`
|
||||
5. `browser_evaluate("(function(){window.onbeforeunload=null; window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);})()")`
|
||||
|
||||
## 4. Message the User
|
||||
Click Message Button on their profile:
|
||||
```javascript
|
||||
(function(){
|
||||
const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
|
||||
for (const a of links){
|
||||
if (!a.href.includes('NON_SELF_PROFILE_VIEW') || a.href.includes('body=')) continue;
|
||||
const r = a.getBoundingClientRect();
|
||||
if (r.width === 0 || r.x > 700) continue;
|
||||
return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, then `sleep(2.5)`.
|
||||
|
||||
Find Textarea (it is hidden inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight, vw = window.innerWidth;
|
||||
const candidates = [];
|
||||
function walk(root){
|
||||
const els = root.querySelectorAll ? root.querySelectorAll('div.msg-form__contenteditable') : [];
|
||||
for (const el of els){
|
||||
const r = el.getBoundingClientRect();
|
||||
if (r.width > 0 && r.height > 0 && r.y >= 0 && r.y + r.height <= vh && r.x >= 0 && r.x + r.width <= vw) {
|
||||
candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
|
||||
}
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
|
||||
}
|
||||
walk(document);
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates.length ? candidates[0] : null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, `sleep(1)`.
|
||||
|
||||
Type the message:
|
||||
Construct the message: `Hey {first_name}, thanks for the connection invite! I'm currently building a prediction market for jobs: https://honeycomb.open-hive.com/. If you could check it out and share some feedback, I'd really appreciate it.`
|
||||
|
||||
Use `browser_type_focused` — it dispatches CDP `Input.insertText` to the already-focused composer (document.activeElement), which works through shadow DOM without JSON-escaping issues:
|
||||
```
|
||||
browser_type_focused(text=message_text)
|
||||
sleep(1.0)
|
||||
```
|
||||
|
||||
Find Send button (also inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight;
|
||||
function walk(root){
|
||||
const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
|
||||
for (const b of btns){
|
||||
const cls = (b.className || '').toString();
|
||||
if (!cls.includes('send-button') && b.textContent.trim() !== 'Send') continue;
|
||||
const r = b.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.y + r.height > vh) continue;
|
||||
return { cx: r.x + r.width/2, cy: r.y + r.height/2, disabled: b.disabled || b.getAttribute('aria-disabled') === 'true' };
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) { const got = walk(host.shadowRoot); if (got) return got; } }
|
||||
return null;
|
||||
}
|
||||
return walk(document);
|
||||
})();
|
||||
```
|
||||
Click send coordinate, `sleep(2)`.
|
||||
|
||||
## 5. Update Ledger
|
||||
Append the user to `data/linkedin_contacts.json`.
|
||||
```json
|
||||
{
|
||||
"profile_url": "...",
|
||||
"name": "...",
|
||||
"action": "connection_accepted+message_sent",
|
||||
"timestamp": "2026-..."
|
||||
}
|
||||
```
|
||||
`sleep(5)` before moving to the next card to mimic human pacing.
|
||||
+121
-7
@@ -82,10 +82,29 @@ def _find_project_root() -> str:
|
||||
return os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def _resolve_path(path: str) -> str:
|
||||
"""Resolve path relative to PROJECT_ROOT. Raises ValueError if outside.
|
||||
# When ``--write-root`` is passed on the CLI, ``WRITE_ROOT`` diverges
|
||||
# from ``PROJECT_ROOT``: reads stay permissive (so the queen can
|
||||
# reference framework skills, docs, and the hive repo), but writes
|
||||
# are confined to the write root plus the ``~/.hive/`` escape hatch.
|
||||
# Without this split, the coder-tools sandbox IS the hive git
|
||||
# checkout — every queen-authored skill/ledger/script lands there as
|
||||
# untracked debris, which was the 2026-04-15 incident
|
||||
# (``~/aden/hive/x-rapid-reply/`` and siblings).
|
||||
WRITE_ROOT: str = ""
|
||||
|
||||
Also allows access to ~/.hive/ directory for agent session data files.
|
||||
|
||||
def _resolve_read_path(path: str) -> str:
|
||||
"""Resolve path for READ operations.
|
||||
|
||||
Allowlist (in order):
|
||||
1. Paths under ``~/.hive/`` — agent session data, colonies, skills.
|
||||
2. Paths under ``PROJECT_ROOT`` — hive repo, for reading framework
|
||||
defaults, docs, examples, etc.
|
||||
3. Relative paths — joined against ``PROJECT_ROOT`` (read-side
|
||||
default; writes use ``WRITE_ROOT`` instead).
|
||||
|
||||
Raises ``ValueError`` when the resolved path falls outside all
|
||||
allowed roots.
|
||||
"""
|
||||
# Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM)
|
||||
path = path.replace("/", os.sep)
|
||||
@@ -153,6 +172,88 @@ def _resolve_path(path: str) -> str:
|
||||
return resolved
|
||||
|
||||
|
||||
def _resolve_write_path(path: str) -> str:
|
||||
"""Resolve path for WRITE operations.
|
||||
|
||||
Stricter than the read resolver: only allows writes under:
|
||||
1. ``WRITE_ROOT`` — the agent workspace (default: ``~/.hive/workspace/``
|
||||
when ``--write-root`` is passed).
|
||||
2. ``~/.hive/`` — agent session data.
|
||||
|
||||
Writes to the hive repo (``PROJECT_ROOT``) are REJECTED to keep
|
||||
the git checkout clean of queen-authored debris. Relative paths
|
||||
resolve against ``WRITE_ROOT``, not ``PROJECT_ROOT``.
|
||||
|
||||
When ``WRITE_ROOT`` equals ``PROJECT_ROOT`` (no split configured),
|
||||
this function is semantically identical to ``_resolve_read_path``.
|
||||
"""
|
||||
# Normalize slashes + expand ~
|
||||
path = path.replace("/", os.sep)
|
||||
if path.startswith("~"):
|
||||
path = os.path.expanduser(path)
|
||||
|
||||
hive_dir = os.path.expanduser("~/.hive")
|
||||
|
||||
if os.path.isabs(path):
|
||||
resolved = os.path.abspath(path)
|
||||
|
||||
# Always allow writes under ~/.hive/
|
||||
try:
|
||||
if os.path.commonpath([resolved, hive_dir]) == hive_dir:
|
||||
return resolved
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Writes are ALSO allowed under WRITE_ROOT (the agent workspace).
|
||||
try:
|
||||
if os.path.commonpath([resolved, WRITE_ROOT]) == WRITE_ROOT:
|
||||
return resolved
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# If WRITE_ROOT == PROJECT_ROOT (legacy behavior: no split),
|
||||
# fall through to the read-side resolver so existing callers
|
||||
# keep working unchanged.
|
||||
if WRITE_ROOT == PROJECT_ROOT:
|
||||
return _resolve_read_path(path)
|
||||
|
||||
# Split configured AND the path isn't under WRITE_ROOT or
|
||||
# ~/.hive/. Reject — this is the whole point of the split.
|
||||
raise ValueError(
|
||||
f"Access denied: writes must be under '{WRITE_ROOT}' or "
|
||||
f"'{hive_dir}'. Path '{path}' is outside both "
|
||||
"(use an absolute path under one of those roots, or a "
|
||||
"relative path which will resolve under the write root)."
|
||||
)
|
||||
else:
|
||||
# Relative path: resolve against WRITE_ROOT, not PROJECT_ROOT.
|
||||
resolved = os.path.abspath(os.path.join(WRITE_ROOT, path))
|
||||
|
||||
# Double-check the resolved absolute path is inside WRITE_ROOT or
|
||||
# ~/.hive/ (covers edge cases like "../../etc/passwd" that escape).
|
||||
try:
|
||||
wr_common = os.path.commonpath([resolved, WRITE_ROOT])
|
||||
except ValueError:
|
||||
wr_common = ""
|
||||
try:
|
||||
hv_common = os.path.commonpath([resolved, hive_dir])
|
||||
except ValueError:
|
||||
hv_common = ""
|
||||
if wr_common != WRITE_ROOT and hv_common != hive_dir:
|
||||
raise ValueError(
|
||||
f"Access denied: resolved write path '{resolved}' escaped the "
|
||||
f"allowed roots ('{WRITE_ROOT}', '{hive_dir}')."
|
||||
)
|
||||
return resolved
|
||||
|
||||
|
||||
# Back-compat alias: existing call sites in this module call
|
||||
# ``_resolve_path`` directly (e.g. for snapshot dirs, agent tool
|
||||
# introspection). Those are all non-user-driven paths; route them
|
||||
# through the read resolver.
|
||||
_resolve_path = _resolve_read_path
|
||||
|
||||
|
||||
# ── Git snapshot system (ported from opencode's shadow git) ───────────────
|
||||
|
||||
|
||||
@@ -1637,32 +1738,45 @@ def validate_agent_package(agent_name: str) -> str:
|
||||
|
||||
|
||||
def main() -> None:
|
||||
global PROJECT_ROOT, SNAPSHOT_DIR
|
||||
global PROJECT_ROOT, SNAPSHOT_DIR, WRITE_ROOT
|
||||
|
||||
from aden_tools.file_ops import register_file_tools
|
||||
|
||||
parser = argparse.ArgumentParser(description="Coder Tools MCP Server")
|
||||
parser.add_argument("--project-root", default="")
|
||||
# ``--write-root`` isolates file writes from the project root so
|
||||
# queen-authored skills, ledgers, and scripts don't land in the
|
||||
# hive git checkout. Reads remain permissive under PROJECT_ROOT
|
||||
# so framework skills, docs, and examples stay accessible.
|
||||
# Defaults to PROJECT_ROOT when empty (legacy behavior).
|
||||
parser.add_argument("--write-root", default="")
|
||||
parser.add_argument("--port", type=int, default=int(os.getenv("CODER_TOOLS_PORT", "4002")))
|
||||
parser.add_argument("--host", default="0.0.0.0")
|
||||
parser.add_argument("--stdio", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
PROJECT_ROOT = os.path.abspath(args.project_root) if args.project_root else _find_project_root()
|
||||
if args.write_root:
|
||||
WRITE_ROOT = os.path.abspath(os.path.expanduser(args.write_root))
|
||||
os.makedirs(WRITE_ROOT, exist_ok=True)
|
||||
else:
|
||||
WRITE_ROOT = PROJECT_ROOT # legacy: no split
|
||||
SNAPSHOT_DIR = os.path.join(
|
||||
os.path.expanduser("~"),
|
||||
".hive",
|
||||
"snapshots",
|
||||
os.path.basename(PROJECT_ROOT),
|
||||
)
|
||||
logger.info(f"Project root: {PROJECT_ROOT}")
|
||||
logger.info(f"Project root (reads): {PROJECT_ROOT}")
|
||||
logger.info(f"Write root (writes): {WRITE_ROOT}")
|
||||
logger.info(f"Snapshot dir: {SNAPSHOT_DIR}")
|
||||
|
||||
register_file_tools(
|
||||
mcp,
|
||||
resolve_path=_resolve_path,
|
||||
resolve_path=_resolve_read_path,
|
||||
resolve_path_write=_resolve_write_path,
|
||||
before_write=None, # Git snapshot causes stdio deadlock on Windows; undo_changes limited
|
||||
project_root=PROJECT_ROOT,
|
||||
project_root=WRITE_ROOT,
|
||||
)
|
||||
|
||||
if args.stdio:
|
||||
|
||||
@@ -328,6 +328,7 @@ def register_file_tools(
|
||||
mcp: FastMCP,
|
||||
*,
|
||||
resolve_path: Callable[[str], str] | None = None,
|
||||
resolve_path_write: Callable[[str], str] | None = None,
|
||||
before_write: Callable[[], None] | None = None,
|
||||
project_root: str | None = None,
|
||||
) -> None:
|
||||
@@ -335,12 +336,18 @@ def register_file_tools(
|
||||
|
||||
Args:
|
||||
mcp: FastMCP instance to register tools on.
|
||||
resolve_path: Path resolver. Default: resolve to absolute path.
|
||||
Raise ValueError to reject paths (e.g. outside sandbox).
|
||||
resolve_path: Path resolver for READ operations. Default:
|
||||
resolve to absolute path. Raise ValueError to reject paths
|
||||
(e.g. outside sandbox).
|
||||
resolve_path_write: Path resolver for WRITE/EDIT operations.
|
||||
Defaults to ``resolve_path`` when not provided. Split
|
||||
resolvers let callers keep reads permissive (framework
|
||||
skills, docs) while confining writes to an agent workspace.
|
||||
before_write: Hook called before write/edit operations (e.g. git snapshot).
|
||||
project_root: If set, search_files relativizes output paths to this root.
|
||||
"""
|
||||
_resolve = resolve_path or _default_resolve_path
|
||||
_resolve_write = resolve_path_write or _resolve
|
||||
|
||||
@mcp.tool()
|
||||
def read_file(path: str, offset: int = 1, limit: int = 0, hashline: bool = False) -> str:
|
||||
@@ -440,7 +447,7 @@ def register_file_tools(
|
||||
path: Absolute file path to write.
|
||||
content: Complete file content to write.
|
||||
"""
|
||||
resolved = _resolve(path)
|
||||
resolved = _resolve_write(path)
|
||||
resolved_path = Path(resolved)
|
||||
|
||||
# Stale-edit guard: an existing file must have been read recently
|
||||
@@ -509,7 +516,7 @@ def register_file_tools(
|
||||
new_text: Replacement text.
|
||||
replace_all: Replace all occurrences (default: first only).
|
||||
"""
|
||||
resolved = _resolve(path)
|
||||
resolved = _resolve_write(path)
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
@@ -815,7 +822,7 @@ def register_file_tools(
|
||||
return "Error: Too many edits in one call (max 100). Split into multiple calls."
|
||||
|
||||
# 2. Read file
|
||||
resolved = _resolve(path)
|
||||
resolved = _resolve_write(path)
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
|
||||
@@ -96,15 +96,59 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Execute JavaScript in the browser context.
|
||||
ESCAPE HATCH — execute raw JavaScript. USE ONLY as a last
|
||||
resort. 99% of browser automation does NOT need this tool.
|
||||
Before reaching for it, try a semantic tool first:
|
||||
|
||||
- browser_click / browser_click_coordinate → for clicks
|
||||
- browser_type(use_insert_text=True) → for text input
|
||||
- browser_screenshot + browser_get_rect → for locating elements
|
||||
- browser_shadow_query → for shadow-DOM selectors
|
||||
- browser_get_text / browser_get_attribute → for reading state
|
||||
|
||||
ANTI-PATTERNS — stop and switch tools if you notice yourself:
|
||||
|
||||
1. Calling browser_evaluate 2+ times in a row to guess at
|
||||
selectors. Each attempt costs ~30 tokens of JS + a full
|
||||
LLM round-trip. After 2 empty results, the selector
|
||||
strategy is wrong — pivot to browser_screenshot +
|
||||
browser_click_coordinate. The screenshot + coord path
|
||||
works on shadow DOM, iframes, and React-obfuscated
|
||||
class names indifferently.
|
||||
|
||||
2. Writing a walk(root) recursive shadow-DOM traversal
|
||||
function. Use browser_shadow_query — it does the
|
||||
traversal in C++ via CDP's querySelector, not in JS.
|
||||
|
||||
3. Calling document.execCommand('insertText', ...) to type
|
||||
into Lexical / contenteditable. Use
|
||||
browser_type(use_insert_text=True, text='...') instead.
|
||||
It handles the click-then-focus-then-insert sequence
|
||||
with built-in retries.
|
||||
|
||||
4. Trying to read a nested iframe's contentDocument. That
|
||||
usually fails (cross-origin or late hydration). Use
|
||||
browser_screenshot to see it, then browser_click_coordinate.
|
||||
|
||||
LEGITIMATE uses (when nothing semantic fits):
|
||||
|
||||
- Reading a computed style, window size, or scroll position
|
||||
that no tool exposes.
|
||||
- Firing a one-shot site-specific API call (e.g. an analytics
|
||||
beacon the test needs).
|
||||
- Stripping an onbeforeunload handler that blocks navigation.
|
||||
- Probing for shadow roots whose existence is conditional.
|
||||
|
||||
Args:
|
||||
script: JavaScript code to execute
|
||||
script: JavaScript code to execute. Keep it small. If you
|
||||
need to traverse the DOM, prefer browser_shadow_query.
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with evaluation result
|
||||
Dict with evaluation result. On a "find X" script that
|
||||
returns [] or null: do NOT retry with a different
|
||||
selector — take a screenshot and switch to coordinates.
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
import json
|
||||
|
||||
try:
|
||||
with open('data/linkedin_ledger.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
profiles = data.get('messaged_profiles', [])
|
||||
for p in profiles:
|
||||
if 'variant' not in p:
|
||||
p['variant'] = 'Control' # Retroactively label our first runs
|
||||
|
||||
with open('data/linkedin_ledger.json', 'w') as f:
|
||||
json.dump({"messaged_profiles": profiles}, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"replies": [
|
||||
{
|
||||
"original_preview": "NASA Ames@NASAAmes\u00b75hWe\u2019re just getting started\n\nDuring their historic journey around the Moon, Artemis II observed lunar targets to study color, text"
|
||||
},
|
||||
{
|
||||
"original_preview": "NASA Marshall@NASA_Marshall\u00b74h Enjoy these views of the Artemis II launch from cameras affixed to the rocket! On April 1, 2026, the SLS (Space Launch "
|
||||
},
|
||||
{
|
||||
"original_preview": "U.S. Navy@USNavy\u00b711hFirst contact. On April 10, U.S. Navy divers were the first on the scene as the Navy and NASA successfully recovered the Orion s"
|
||||
},
|
||||
{
|
||||
"original_preview": "Alright, I give in. Here\u2019s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook \u2018em!"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user