Compare commits

..

9 Commits

Author SHA1 Message Date
Timothy bb2e55f5fa fix: click response 2026-04-16 17:43:21 -07:00
Timothy 5d0106ac34 fix: remove coordinate conversation 2026-04-16 17:36:05 -07:00
Timothy 3a219e27ab fix: two dimensions 2026-04-16 17:08:40 -07:00
Timothy 7f62e7a2d0 fix: split image click vs coordinate click 2026-04-16 16:37:52 -07:00
Richard Tang 44d114f0d0 feat: default 1ms delay and prompt improvements 2026-04-16 16:19:38 -07:00
Richard Tang 9e71f16d15 Merge remote-tracking branch 'origin/fix/browser-behaviour-improvements' into fix/browser-behaviour-improvements 2026-04-16 16:14:43 -07:00
Richard Tang 28cad2376c feat: separate type focus tool 2026-04-16 16:08:43 -07:00
Timothy 8222cd306e fix: simplify canonical workflow 2026-04-16 16:02:37 -07:00
Richard Tang 916803889f feat: browswer control tools improvement and debugger 2026-04-16 15:14:08 -07:00
55 changed files with 2218 additions and 3834 deletions
+1 -13
View File
@@ -44,19 +44,7 @@
"WebFetch(domain:docs.litellm.ai)",
"Bash(cat /home/timothy/aden/hive/.venv/lib/python3.11/site-packages/litellm-*.dist-info/METADATA)",
"Bash(find \"/home/timothy/.hive/agents/queens/queen_brand_design/sessions/session_20260415_100751_d49f4c28/\" -type f -name \"*.json*\" -exec grep -l \"协日\" {} \\\\;)",
"Bash(grep -v ':0$')",
"Bash(find /home/timothy/aden/hive/core/framework/skills/_default_skills -name \"SKILL.md\" -exec grep -l \"curl\\\\|jq\\\\|bash\\\\|sh\\\\|CLI\\\\|command\" {} \\\\;)",
"Bash(python3 -c \"import sys, json; [print\\(json.loads\\(line\\).get\\('data', {}\\).get\\('iteration'\\)\\) for line in sys.stdin]\")",
"Bash(grep -l \"shell\\\\|bash\\\\|exec\\\\|subprocess\" /home/timothy/aden/hive/tools/src/gcu/files/*.py)",
"Bash(python3 -c \"import aden_tools.file_ops; print\\(aden_tools.file_ops.__file__\\)\")",
"Bash(find / -path /proc -prune -o -name \"file_ops*\" -print)",
"Bash(grep -l \"sqlite\\\\|sqlite3\" /home/timothy/aden/hive/tools/src/aden_tools/tools/*/*.py)",
"Bash(grep -iv \"_tool$\")",
"Bash(grep -n \"add_post.*sessions\\\\|add_post.*colonies\\\\|add_get.*sessions\" /home/timothy/aden/hive/core/framework/server/*.py)",
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\''input_data:'\\\\'', d.get\\('\\\\''input_data'\\\\'', '\\\\''MISSING'\\\\''\\)\\)')",
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\'' __TRACKED_VAR__:'\\\\'', '\\\\''input_data='\\\\'', d.get\\('\\\\''input_data'\\\\'', '\\\\''MISSING'\\\\''\\)\\)')",
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\'' __TRACKED_VAR__: input_data ='\\\\'', d.get\\('\\\\''input_data'\\\\''\\)\\)')",
"Bash(kill 2466637 2466632)"
"Bash(grep -v ':0$')"
],
"additionalDirectories": [
"/home/timothy/.hive/skills/writing-hive-skills",
-11
View File
@@ -1,11 +0,0 @@
import json
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
data = json.load(f)
data['replies'].append({
'original_preview': 'Alright, I give in. Heres my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook em!'
})
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'w') as f:
json.dump(data, f, indent=2)
-11
View File
@@ -1,11 +0,0 @@
import json, sys
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
ledger = json.load(f)
text = sys.argv[1]
for r in ledger['replies']:
if r.get('original_preview') == text:
print("YES")
sys.exit(0)
print("NO")
+2 -10
View File
@@ -184,16 +184,8 @@ _QUEEN_INDEPENDENT_TOOLS = [
"search_files",
"run_command",
"undo_changes",
# NOTE (2026-04-16): ``run_parallel_workers`` was removed from the
# independent phase. The queen's pure DM mode is for conversation
# with the user; spawning workers from here puts their activity
# into a chat surface that's supposed to stay queen↔user only.
# Users who want to fan out parallel work should (a) use
# ``create_colony`` to fork into a persistent colony (where
# worker activity has its own page), or (b) load an agent via
# build/stage and use ``run_parallel_workers`` in the running
# phase where a worker context already exists.
#
# Parallel fan-out (Phase 4 unified ColonyRuntime)
"run_parallel_workers",
# Fork this session into a persistent colony for headless /
# recurring / background work that needs to keep running in
# parallel to (or after) this chat.
@@ -25,7 +25,6 @@ All tools are prefixed with `browser_`:
- `browser_screenshot` — visual capture (annotated PNG)
<!-- /vision-only -->
- `browser_shadow_query`, `browser_get_rect` — locate elements (shadow-piercing via `>>>`)
- `browser_coords` — convert image pixels to CSS pixels (always use `css_x/y`, never `physical_x/y`)
- `browser_scroll`, `browser_wait` — navigation helpers
- `browser_evaluate` — run JavaScript
- `browser_close`, `browser_close_finished` — tab cleanup
@@ -38,9 +37,9 @@ All tools are prefixed with `browser_`:
Neither tool is "preferred" universally — they're for different jobs. Default to snapshot on text-heavy static pages, screenshot on SPAs and anything shadow-DOM-heavy. Activate the `browser-automation` skill for the full decision tree.
## Coordinate rule: always CSS pixels
## Coordinate rule
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS pixels**, not physical pixels. After a screenshot, use `browser_coords(image_x, image_y)` and feed the returned `css_x/y` (NOT `physical_x/y`) to `browser_click_coordinate`, `browser_hover_coordinate`, `browser_press_at`. Feeding physical pixels on a HiDPI display (DPR=1.6, 2, or 3) overshoots by `DPR×` and clicks land in the wrong place. `getBoundingClientRect()` already returns CSS pixels pass through unchanged, no DPR multiplication.
`browser_screenshot` delivers the image at the CSS viewport's own dimensions, so a pixel you read off the screenshot is the same coordinate `browser_click_coordinate`, `browser_hover_coordinate`, and `browser_press_at` expect — no conversion. `getBoundingClientRect()` likewise returns CSS pixels; pass through unchanged.
## System prompt tips for browser nodes
+2 -48
View File
@@ -631,43 +631,6 @@ class ColonyRuntime:
spawn_tools = tools if tools is not None else self._tools
spawn_executor = tool_executor or self._tool_executor
# Colony progress tracker: when the caller supplied a db_path
# in input_data, this worker is part of a SQLite task queue
# and must see the hive.colony-progress-tracker skill body in
# its system prompt from turn 0. Rebuild the catalog with the
# skill pre-activated; falls back to the colony default when
# no db_path is present.
_spawn_catalog = self.skills_catalog_prompt
_spawn_skill_dirs = self.skill_dirs
if isinstance(input_data, dict) and input_data.get("db_path"):
try:
from framework.skills.config import SkillsConfig
from framework.skills.manager import SkillsManager, SkillsManagerConfig
_pre = SkillsManager(
SkillsManagerConfig(
skills_config=SkillsConfig.from_agent_vars(
skills=["hive.colony-progress-tracker"],
),
)
)
_pre.load()
_spawn_catalog = _pre.skills_catalog_prompt
_spawn_skill_dirs = list(_pre.allowlisted_dirs) if hasattr(_pre, "allowlisted_dirs") else self.skill_dirs
logger.info(
"spawn: pre-activated hive.colony-progress-tracker "
"(catalog %d%d chars) for worker with db_path=%s",
len(self.skills_catalog_prompt),
len(_spawn_catalog),
input_data.get("db_path"),
)
except Exception as exc:
logger.warning(
"spawn: failed to pre-activate colony-progress-tracker "
"skill, falling back to base catalog: %s",
exc,
)
# Resolve the SSE stream_id once. When the caller didn't supply
# one we use the per-worker fan-out tag (filtered out by the
# SSE handler). When the caller passed an explicit value we
@@ -722,9 +685,9 @@ class ColonyRuntime:
llm=self._llm,
available_tools=list(spawn_tools),
accounts_prompt=self._accounts_prompt,
skills_catalog_prompt=_spawn_catalog,
skills_catalog_prompt=self.skills_catalog_prompt,
protocols_prompt=self.protocols_prompt,
skill_dirs=_spawn_skill_dirs,
skill_dirs=self.skill_dirs,
execution_id=worker_id,
stream_id=explicit_stream_id or f"worker:{worker_id}",
)
@@ -757,8 +720,6 @@ class ColonyRuntime:
async def spawn_batch(
self,
tasks: list[dict[str, Any]],
*,
tools_override: list[Any] | None = None,
) -> list[str]:
"""Spawn a batch of parallel workers, one per task spec.
@@ -771,12 +732,6 @@ class ColonyRuntime:
The overseer's ``run_parallel_workers`` tool is the usual
caller; it pairs ``spawn_batch`` + ``wait_for_worker_reports``
into a single fan-out/fan-in primitive.
When ``tools_override`` is supplied, every spawned worker
receives that tool list instead of the colony's default. Used
by ``run_parallel_workers`` to drop tools whose credentials
failed the pre-flight check (so the spawned workers don't
waste a startup trying to use them).
"""
worker_ids: list[str] = []
for spec in tasks:
@@ -788,7 +743,6 @@ class ColonyRuntime:
task=task_text,
count=1,
input_data=task_data or {"task": task_text},
tools=tools_override,
)
worker_ids.extend(ids)
return worker_ids
-491
View File
@@ -1,491 +0,0 @@
"""Per-colony SQLite task queue + progress ledger.
Every colony gets its own ``progress.db`` under ``~/.hive/colonies/{name}/data/``.
The DB holds the colony's task queue plus per-task step and SOP checklist
rows. Workers claim tasks atomically, write progress as they execute, and
verify SOP gates before marking a task done. This gives cross-run memory
that the existing per-iteration stall detectors don't have.
The DB is driven by agents via the ``sqlite3`` CLI through
``execute_command_tool``. This module handles framework-side lifecycle:
creation, migration, queen-side bulk seeding, stale-claim reclamation.
Concurrency model:
- WAL mode on from day one so 100 concurrent workers don't serialize.
- Workers hold NO long-running connection they ``sqlite3`` per call,
which naturally releases locks between LLM turns.
- Atomic claim via ``BEGIN IMMEDIATE; UPDATE tasks SET status='claimed'
WHERE id=(SELECT ... LIMIT 1)``. The subquery-form UPDATE runs inside
the immediate transaction so racers either win the row or find zero
affected rows.
- Stale-claim reclaimer runs on host startup: claims older than
``stale_after_minutes`` get returned to ``pending`` and the row's
``retry_count`` increments. When ``retry_count >= max_retries`` the
row is moved to ``failed`` instead.
All writes go through ``BEGIN IMMEDIATE`` so racing readers see
consistent snapshots.
"""
from __future__ import annotations
import json
import logging
import sqlite3
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
SCHEMA_VERSION = 1
_SCHEMA_V1 = """
CREATE TABLE IF NOT EXISTS tasks (
id TEXT PRIMARY KEY,
seq INTEGER,
priority INTEGER NOT NULL DEFAULT 0,
goal TEXT NOT NULL,
payload TEXT,
status TEXT NOT NULL DEFAULT 'pending',
worker_id TEXT,
claim_token TEXT,
claimed_at TEXT,
started_at TEXT,
completed_at TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
retry_count INTEGER NOT NULL DEFAULT 0,
max_retries INTEGER NOT NULL DEFAULT 3,
last_error TEXT,
parent_task_id TEXT REFERENCES tasks(id) ON DELETE SET NULL,
source TEXT
);
CREATE TABLE IF NOT EXISTS steps (
id TEXT PRIMARY KEY,
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
seq INTEGER NOT NULL,
title TEXT NOT NULL,
detail TEXT,
status TEXT NOT NULL DEFAULT 'pending',
evidence TEXT,
worker_id TEXT,
started_at TEXT,
completed_at TEXT,
UNIQUE (task_id, seq)
);
CREATE TABLE IF NOT EXISTS sop_checklist (
id TEXT PRIMARY KEY,
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
key TEXT NOT NULL,
description TEXT NOT NULL,
required INTEGER NOT NULL DEFAULT 1,
done_at TEXT,
done_by TEXT,
note TEXT,
UNIQUE (task_id, key)
);
CREATE TABLE IF NOT EXISTS colony_meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_tasks_claimable
ON tasks(status, priority DESC, seq, created_at)
WHERE status = 'pending';
CREATE INDEX IF NOT EXISTS idx_steps_task_seq
ON steps(task_id, seq);
CREATE INDEX IF NOT EXISTS idx_sop_required_open
ON sop_checklist(task_id, required, done_at);
CREATE INDEX IF NOT EXISTS idx_tasks_status
ON tasks(status, updated_at);
"""
_PRAGMAS = (
"PRAGMA journal_mode = WAL;",
"PRAGMA synchronous = NORMAL;",
"PRAGMA foreign_keys = ON;",
"PRAGMA busy_timeout = 5000;",
)
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat(timespec="seconds")
def _new_id() -> str:
return str(uuid.uuid4())
def _connect(db_path: Path) -> sqlite3.Connection:
"""Open a connection with the standard pragmas applied.
WAL mode is sticky on the file once set, so re-applying on every
open is cheap. The other pragmas are per-connection and must be
set each time.
"""
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=5.0)
for pragma in _PRAGMAS:
con.execute(pragma)
return con
def ensure_progress_db(colony_dir: Path) -> Path:
"""Create or migrate ``{colony_dir}/data/progress.db``.
Idempotent: safe to call on an already-initialized DB. Returns the
absolute path to the DB file.
Steps:
1. Ensure ``data/`` subdir exists.
2. Open the DB (creates the file if missing).
3. Apply WAL + pragmas.
4. Read ``PRAGMA user_version``; if < SCHEMA_VERSION, run the
schema block and bump user_version.
5. Reclaim any stale claims left from previous runs.
6. Patch every ``*.json`` worker config in the colony dir to
inject ``input_data.db_path`` and ``input_data.colony_id`` so
pre-existing colonies (forked before this feature landed) get
the tracker wiring on their next spawn.
"""
data_dir = Path(colony_dir) / "data"
data_dir.mkdir(parents=True, exist_ok=True)
db_path = data_dir / "progress.db"
con = _connect(db_path)
try:
current_version = con.execute("PRAGMA user_version").fetchone()[0]
if current_version < SCHEMA_VERSION:
con.executescript(_SCHEMA_V1)
con.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
con.execute(
"INSERT OR REPLACE INTO colony_meta(key, value, updated_at) "
"VALUES (?, ?, ?)",
("schema_version", str(SCHEMA_VERSION), _now_iso()),
)
logger.info(
"progress_db: initialized schema v%d at %s", SCHEMA_VERSION, db_path
)
reclaimed = _reclaim_stale_inner(con, stale_after_minutes=15)
if reclaimed:
logger.info(
"progress_db: reclaimed %d stale claims at startup (%s)",
reclaimed,
db_path,
)
finally:
con.close()
resolved_db_path = db_path.resolve()
_patch_worker_configs(Path(colony_dir), resolved_db_path)
return resolved_db_path
def _patch_worker_configs(colony_dir: Path, db_path: Path) -> int:
"""Inject ``input_data.db_path`` + ``input_data.colony_id`` into
existing ``worker.json`` files in a colony directory.
Runs on every ``ensure_progress_db`` call so colonies that were
forked before this feature landed get their worker spawn messages
patched in place. Idempotent: if ``input_data`` already contains
the correct ``db_path``, the file is not rewritten.
Returns the number of files that were actually modified (0 on
the common case of already-patched colonies).
"""
colony_id = colony_dir.name
abs_db = str(db_path)
patched = 0
for worker_cfg in colony_dir.glob("*.json"):
# Only patch files that look like worker configs (have the
# worker_meta shape). ``metadata.json`` and ``triggers.json``
# are colony-level and must not be touched.
if worker_cfg.name in ("metadata.json", "triggers.json"):
continue
try:
data = json.loads(worker_cfg.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
continue
if not isinstance(data, dict) or "system_prompt" not in data:
# Not a worker config (lacks the worker_meta schema).
continue
input_data = data.get("input_data")
if not isinstance(input_data, dict):
input_data = {}
if (
input_data.get("db_path") == abs_db
and input_data.get("colony_id") == colony_id
):
continue # already patched
input_data["db_path"] = abs_db
input_data["colony_id"] = colony_id
data["input_data"] = input_data
try:
worker_cfg.write_text(
json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
)
patched += 1
except OSError as e:
logger.warning(
"progress_db: failed to patch worker config %s: %s", worker_cfg, e
)
if patched:
logger.info(
"progress_db: patched %d worker config(s) in colony '%s' with db_path",
patched,
colony_id,
)
return patched
def ensure_all_colony_dbs(colonies_root: Path | None = None) -> list[Path]:
"""Idempotently ensure every existing colony has a progress.db.
Called on framework host startup to backfill older colonies and
run the stale-claim reclaimer on all of them in one pass.
"""
if colonies_root is None:
colonies_root = Path.home() / ".hive" / "colonies"
if not colonies_root.is_dir():
return []
initialized: list[Path] = []
for entry in sorted(colonies_root.iterdir()):
if not entry.is_dir():
continue
try:
initialized.append(ensure_progress_db(entry))
except Exception as e:
logger.warning(
"progress_db: failed to ensure DB for colony '%s': %s", entry.name, e
)
return initialized
def seed_tasks(
db_path: Path,
tasks: list[dict[str, Any]],
*,
source: str = "queen_create",
) -> list[str]:
"""Bulk-insert tasks (with optional nested steps + sop_items).
Each task dict accepts:
- goal: str (required)
- seq: int (optional ordering hint)
- priority: int (default 0)
- payload: dict | str | None (stored as JSON text)
- max_retries: int (default 3)
- parent_task_id: str | None
- steps: list[{"title": str, "detail"?: str}] (optional)
- sop_items: list[{"key": str, "description": str, "required"?: bool, "note"?: str}] (optional)
All rows are inserted in a single BEGIN IMMEDIATE transaction so
10k-row seeds finish in one disk flush. Returns the created task ids
in the same order as input.
"""
if not tasks:
return []
created_ids: list[str] = []
now = _now_iso()
con = _connect(Path(db_path))
try:
con.execute("BEGIN IMMEDIATE")
for idx, task in enumerate(tasks):
goal = task.get("goal")
if not goal:
raise ValueError(f"task[{idx}] missing required 'goal' field")
task_id = task.get("id") or _new_id()
payload = task.get("payload")
if payload is not None and not isinstance(payload, str):
payload = json.dumps(payload, ensure_ascii=False)
con.execute(
"""
INSERT INTO tasks (
id, seq, priority, goal, payload, status,
created_at, updated_at, max_retries, parent_task_id, source
) VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?, ?, ?)
""",
(
task_id,
task.get("seq"),
int(task.get("priority", 0)),
goal,
payload,
now,
now,
int(task.get("max_retries", 3)),
task.get("parent_task_id"),
source,
),
)
for step_seq, step in enumerate(task.get("steps") or [], start=1):
if not step.get("title"):
raise ValueError(
f"task[{idx}].steps[{step_seq - 1}] missing required 'title'"
)
con.execute(
"""
INSERT INTO steps (id, task_id, seq, title, detail, status)
VALUES (?, ?, ?, ?, ?, 'pending')
""",
(
_new_id(),
task_id,
step.get("seq", step_seq),
step["title"],
step.get("detail"),
),
)
for sop in task.get("sop_items") or []:
key = sop.get("key")
description = sop.get("description")
if not key or not description:
raise ValueError(
f"task[{idx}].sop_items missing 'key' or 'description'"
)
con.execute(
"""
INSERT INTO sop_checklist
(id, task_id, key, description, required, note)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
_new_id(),
task_id,
key,
description,
1 if sop.get("required", True) else 0,
sop.get("note"),
),
)
created_ids.append(task_id)
con.execute("COMMIT")
except Exception:
con.execute("ROLLBACK")
raise
finally:
con.close()
return created_ids
def enqueue_task(
db_path: Path,
goal: str,
*,
steps: list[dict[str, Any]] | None = None,
sop_items: list[dict[str, Any]] | None = None,
payload: Any = None,
priority: int = 0,
parent_task_id: str | None = None,
source: str = "enqueue_tool",
) -> str:
"""Append a single task to an existing queue. Thin wrapper over seed_tasks."""
ids = seed_tasks(
db_path,
[
{
"goal": goal,
"steps": steps,
"sop_items": sop_items,
"payload": payload,
"priority": priority,
"parent_task_id": parent_task_id,
}
],
source=source,
)
return ids[0]
def _reclaim_stale_inner(
con: sqlite3.Connection, *, stale_after_minutes: int
) -> int:
"""Reclaim stale claims. Runs inside an existing open connection.
Two-step:
1. Tasks past max_retries go to 'failed' with last_error populated.
2. Remaining stale claims return to 'pending', retry_count++.
"""
cutoff_expr = f"datetime('now', '-{int(stale_after_minutes)} minutes')"
con.execute("BEGIN IMMEDIATE")
try:
con.execute(
f"""
UPDATE tasks
SET status = 'failed',
last_error = COALESCE(last_error, 'exceeded max_retries after stale claim'),
completed_at = datetime('now'),
updated_at = datetime('now')
WHERE status IN ('claimed', 'in_progress')
AND claimed_at IS NOT NULL
AND claimed_at < {cutoff_expr}
AND retry_count >= max_retries
"""
)
cur = con.execute(
f"""
UPDATE tasks
SET status = 'pending',
worker_id = NULL,
claim_token = NULL,
claimed_at = NULL,
started_at = NULL,
retry_count = retry_count + 1,
updated_at = datetime('now')
WHERE status IN ('claimed', 'in_progress')
AND claimed_at IS NOT NULL
AND claimed_at < {cutoff_expr}
AND retry_count < max_retries
"""
)
reclaimed = cur.rowcount or 0
con.execute("COMMIT")
return reclaimed
except Exception:
con.execute("ROLLBACK")
raise
def reclaim_stale(db_path: Path, stale_after_minutes: int = 15) -> int:
"""Public wrapper that opens its own connection."""
con = _connect(Path(db_path))
try:
return _reclaim_stale_inner(con, stale_after_minutes=stale_after_minutes)
finally:
con.close()
__all__ = [
"SCHEMA_VERSION",
"ensure_progress_db",
"ensure_all_colony_dbs",
"seed_tasks",
"enqueue_task",
"reclaim_stale",
]
+1 -12
View File
@@ -1404,18 +1404,7 @@ class AgentLoader:
credential_store=credential_store,
)
runner._agent_default_skills = None
# Colony workers attached to a SQLite task queue get the
# colony-progress-tracker skill pre-activated so its full
# claim / step / SOP-gate protocol lands in the system prompt
# on turn 0, bypassing the progressive-disclosure catalog
# lookup. Triggered by the presence of ``input_data.db_path``
# in worker.json (written by fork_session_into_colony and
# backfilled by ensure_progress_db for pre-existing colonies).
_preactivate: list[str] = []
_input_data = first_worker.get("input_data") or {}
if isinstance(_input_data, dict) and _input_data.get("db_path"):
_preactivate.append("hive.colony-progress-tracker")
runner._agent_skills = _preactivate or None
runner._agent_skills = None
return runner
def register_tool(
+1 -11
View File
@@ -497,22 +497,12 @@ class ToolRegistry:
config["cwd"] = str(resolved_cwd)
return config
# For coder_tools_server, inject --project-root so reads land
# in the expected workspace (hive repo, for framework skills
# and docs), and inject --write-root so writes land under
# ~/.hive/workspace/ instead of polluting the git checkout
# with queen-authored skills, ledgers, and scripts. Without
# the split, every ``write_file`` call from the queen landed
# in the hive repo root.
# For coder_tools_server, inject --project-root so writes go to the expected workspace
if script_name and "coder_tools" in script_name:
project_root = str(resolved_cwd.parent.resolve())
args = list(args)
if "--project-root" not in args:
args.extend(["--project-root", project_root])
if "--write-root" not in args:
_write_root = Path.home() / ".hive" / "workspace"
_write_root.mkdir(parents=True, exist_ok=True)
args.extend(["--write-root", str(_write_root)])
config["args"] = args
if os.name == "nt":
+12 -21
View File
@@ -42,22 +42,14 @@ after an interaction unless you need a fresh view.
Only fall back to `browser_get_text` for extracting small elements by
CSS selector.
## Coordinates: always CSS pixels
## Coordinates
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS
pixels**, not physical pixels. This is critical and often gets wrong:
| Tool | Unit |
|---|---|
| `browser_click_coordinate(x, y)` | **CSS pixels** |
| `browser_hover_coordinate(x, y)` | **CSS pixels** |
| `browser_press_at(x, y, key)` | **CSS pixels** |
| `getBoundingClientRect()` | already CSS pixels pass straight through |
| `browser_coords(img_x, img_y)` | returns `css_x/y` (use this) and `physical_x/y` (debug only) |
**Always use `css_x/y`** from `browser_coords`. Feeding `physical_x/y`
on a HiDPI display overshoots by `DPR×` clicks land DPR times too
far right and down. On a DPR=1.6 display that's 60% off.
`browser_screenshot` delivers the image at the CSS viewport's own
dimensions, so a pixel you read off the screenshot is the same number
you pass to `browser_click_coordinate` / `browser_hover_coordinate` /
`browser_press_at`. `browser_get_rect` and `browser_shadow_query` also
return CSS px feed `rect.css.cx` / `rect.css.cy` straight through.
No scale factors to remember.
Never multiply `getBoundingClientRect()` by `devicePixelRatio` it's
already in the right unit.
@@ -86,12 +78,11 @@ reach shadow elements transparently.
**Shadow-heavy site workflow:**
1. `browser_screenshot()` visual image
2. Identify target visually image coordinate
3. `browser_coords(x, y)` CSS px
4. `browser_click_coordinate(css_x, css_y)` lands via native hit
test; inputs get focused regardless of shadow depth
5. Type via `browser_type` or, if the selector path can't reach the
element, dispatch keys to the focused element
2. Identify target visually pixel `(x, y)` read straight off the image
3. `browser_click_coordinate(x, y)` lands via native hit test; inputs
get focused regardless of shadow depth
4. Type via `browser_type_focused` (no selector needed types into the
already-focused element), or `browser_type` if you have a selector
For selector-style access when you know the shadow path:
`browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")`
+9 -22
View File
@@ -51,18 +51,13 @@ DEFAULT_EVENT_TYPES = [
# Keepalive interval in seconds
KEEPALIVE_INTERVAL = 15.0
# Session-SSE worker filter: workers run outside the queen's DM
# chat. Worker activity is observable via the dedicated
# ``/api/workers/{worker_id}/events`` per-worker SSE route, not via
# the session chat. This keeps the queen↔user conversation clean of
# tool-call chatter regardless of whether the worker was spawned by
# ``run_agent_with_input`` (stream_id="worker") or
# ``run_parallel_workers`` (stream_id="worker:{uuid}").
#
# Lifecycle events the frontend needs for fan-in summaries
# (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED) are still
# allowed through so the queen can show "N workers done" surfaces
# without exposing the per-turn chatter.
# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
# publish high-frequency LLM deltas / tool calls that would flood the
# user's queen DM chat. We let only this small allowlist of worker
# events through to the queen-chat SSE so the frontend can render
# fan-out lifecycle and structured fan-in reports without seeing the
# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
# filter via a dedicated /workers/{worker_id}/events route.
_WORKER_EVENT_ALLOWLIST = {
EventType.SUBAGENT_REPORT.value,
EventType.EXECUTION_COMPLETED.value,
@@ -71,17 +66,9 @@ _WORKER_EVENT_ALLOWLIST = {
def _is_worker_noise(evt_dict: dict) -> bool:
"""True if the event belongs to a worker stream and should not
surface in the queen DM chat.
Matches any stream starting with ``worker`` both the bare
``"worker"`` tag used by single-worker spawns and the
``"worker:{uuid}"`` tag used by parallel fan-outs. The allowlist
carves out the three terminal/lifecycle events the UI still
needs to render fan-in summaries.
"""
"""True if the event is a parallel-worker event we should drop."""
stream_id = evt_dict.get("stream_id") or ""
if not stream_id.startswith("worker"):
if not stream_id.startswith("worker:"):
return False
return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST
+2 -73
View File
@@ -644,7 +644,6 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
body = await request.json()
colony_name = body.get("colony_name", "").strip()
task = body.get("task", "").strip()
tasks = body.get("tasks")
if not colony_name:
return web.json_response({"error": "colony_name is required"}, status=400)
@@ -662,7 +661,6 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
session=session,
colony_name=colony_name,
task=task,
tasks=tasks if isinstance(tasks, list) else None,
)
except Exception as e:
logger.exception("colony_spawn fork failed")
@@ -676,7 +674,6 @@ async def fork_session_into_colony(
session: Any,
colony_name: str,
task: str,
tasks: list[dict] | None = None,
) -> dict:
"""Fork a queen session into a colony directory.
@@ -693,14 +690,8 @@ async def fork_session_into_colony(
the colony resumes with the queen's entire conversation history.
3. Multiple independent sessions can be created against the same colony,
giving parallel execution capacity without separate worker configs.
4. Initializes (or ensures) ``data/progress.db`` the colony's SQLite
task queue + progress ledger. When *tasks* is provided, the queen-
authored task batch is seeded into the queue in one transaction.
The absolute DB path is threaded into the worker's ``input_data``
so spawned workers see it in their first user message.
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new",
"db_path", "task_ids"}``.
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new"}``.
"""
import asyncio
import json
@@ -709,8 +700,7 @@ async def fork_session_into_colony(
from pathlib import Path
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
from framework.agent_loop.types import AgentContext, AgentSpec
from framework.host.progress_db import ensure_progress_db, seed_tasks
from framework.agent_loop.types import AgentContext
from framework.server.session_manager import _queen_session_dir
queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
@@ -721,49 +711,6 @@ async def fork_session_into_colony(
colony_dir.mkdir(parents=True, exist_ok=True)
(colony_dir / "data").mkdir(exist_ok=True)
# ── 0. Ensure the colony's progress DB exists and seed tasks ──
# Runs before worker.json is written so the DB path can be threaded
# into input_data. Idempotent on reruns of the same colony name.
db_path = await asyncio.to_thread(ensure_progress_db, colony_dir)
seeded_task_ids: list[str] = []
if tasks:
seeded_task_ids = await asyncio.to_thread(
seed_tasks, db_path, tasks, source="queen_create"
)
logger.info(
"progress_db: seeded %d task(s) into colony '%s'",
len(seeded_task_ids),
colony_name,
)
elif task and task.strip():
# Phase 2 auto-seed: when the queen uses the simple single-task
# form of create_colony (no explicit ``tasks=[{...}]`` list),
# insert exactly one row so the first worker spawned into this
# colony has something to claim. Without this the queue is
# empty and the worker falls back to executing from the chat
# spawn message, defeating the cross-run durability the tracker
# exists for.
try:
seeded_task_ids = await asyncio.to_thread(
seed_tasks,
db_path,
[{"goal": task.strip()}],
source="create_colony_auto",
)
logger.info(
"progress_db: auto-seeded 1 task into colony '%s' "
"(task_id=%s, from single-task create_colony form)",
colony_name,
seeded_task_ids[0] if seeded_task_ids else "?",
)
except Exception as exc:
logger.warning(
"progress_db: auto-seed failed for colony '%s' (continuing "
"without a pre-seeded row): %s",
colony_name,
exc,
)
# Fixed worker name -- sessions are the unit of parallelism, not workers
worker_name = "worker"
@@ -825,26 +772,10 @@ async def fork_session_into_colony(
# worker is not Charlotte / Alexandra / etc., it is a task executor.
# Inheriting the queen's persona made the worker greet the user in
# first person with no memory of the task it was actually given.
# Thread the first seeded task_id into input_data so the worker's
# first claim pins to a specific row (skill's assigned-task-id
# branch). When multiple tasks were seeded we only pin the first —
# subsequent workers (via run_agent_with_input or parallel spawns)
# get their own task_id assigned at spawn time.
_worker_input_data: dict[str, Any] = {
"db_path": str(db_path),
"colony_id": colony_name,
}
if seeded_task_ids:
_worker_input_data["task_id"] = seeded_task_ids[0]
worker_meta = {
"name": worker_name,
"version": "1.0.0",
"description": f"Worker clone from queen session {session.id}",
# Colony progress tracker: worker sees these in its first user
# message via _format_spawn_task_message. The colony-progress-
# tracker default skill teaches the worker how to use them.
"input_data": _worker_input_data,
"goal": {
"description": worker_task,
"success_criteria": [],
@@ -976,8 +907,6 @@ async def fork_session_into_colony(
"colony_name": colony_name,
"queen_session_id": colony_session_id,
"is_new": is_new,
"db_path": str(db_path),
"task_ids": seeded_task_ids,
}
+5 -71
View File
@@ -686,10 +686,6 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
return web.json_response({"colonies": colonies})
_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
_EVENTS_HISTORY_MAX_LIMIT = 10000
async def handle_session_events_history(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id}/events/history — persisted eventbus log.
@@ -697,58 +693,17 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
both live sessions and cold (post-server-restart) sessions. The frontend
replays these events through ``sseEventToChatMessage`` to fully reconstruct
the UI state on resume.
Query params:
limit: maximum number of events to return (default 2000, max 10000).
The TAIL of the file is returned i.e. the most recent N events.
Older events are dropped and ``truncated`` is set to True.
Response shape::
{
"events": [...], # up to ``limit`` events, oldest-first
"session_id": "...",
"total": 12345, # total events in the file
"returned": 2000, # len(events)
"truncated": true, # total > returned
"limit": 2000, # the effective limit used
}
``events.jsonl`` is append-only chronological, so "last N lines" == "most
recent N events". Long-running colonies have produced files with 50k+
events; before this cap, restoring on page-mount shipped the whole thing
down the wire and blocked the UI for seconds.
"""
session_id = request.match_info["session_id"]
try:
limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
except ValueError:
limit = _EVENTS_HISTORY_DEFAULT_LIMIT
limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
from framework.server.session_manager import _find_queen_session_dir
queen_dir = _find_queen_session_dir(session_id)
events_path = queen_dir / "events.jsonl"
if not events_path.exists():
return web.json_response(
{
"events": [],
"session_id": session_id,
"total": 0,
"returned": 0,
"truncated": False,
"limit": limit,
}
)
return web.json_response({"events": [], "session_id": session_id})
# Tail the file using a bounded deque — O(limit) memory regardless
# of file size. No need to materialize the whole list only to slice it.
from collections import deque
tail: deque[dict] = deque(maxlen=limit)
total = 0
events: list[dict] = []
try:
with open(events_path, encoding="utf-8") as f:
for line in f:
@@ -756,34 +711,13 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
if not line:
continue
try:
evt = json.loads(line)
events.append(json.loads(line))
except json.JSONDecodeError:
continue
total += 1
tail.append(evt)
except OSError:
return web.json_response(
{
"events": [],
"session_id": session_id,
"total": 0,
"returned": 0,
"truncated": False,
"limit": limit,
}
)
return web.json_response({"events": [], "session_id": session_id})
events = list(tail)
return web.json_response(
{
"events": events,
"session_id": session_id,
"total": total,
"returned": len(events),
"truncated": total > len(events),
"limit": limit,
}
)
return web.json_response({"events": events, "session_id": session_id})
async def handle_session_history(request: web.Request) -> web.Response:
-18
View File
@@ -139,24 +139,6 @@ class SessionManager:
except Exception:
logger.warning("v2 migration failed (non-fatal)", exc_info=True)
# Ensure every existing colony has an up-to-date progress.db
# (schema v1, WAL mode) and reclaim any stale claims left behind
# by crashed workers from the previous run. Idempotent and
# fast; runs synchronously because the event loop hasn't
# started yet at __init__ time.
from framework.host.progress_db import ensure_all_colony_dbs
try:
ensured = ensure_all_colony_dbs()
if ensured:
logger.info(
"progress_db: ensured %d colony DB(s) at startup", len(ensured)
)
except Exception:
logger.warning(
"progress_db: backfill at startup failed (non-fatal)", exc_info=True
)
def build_llm(self, model: str | None = None):
"""Construct an LLM provider using the server's configured defaults."""
from framework.config import RuntimeConfig, get_hive_config
@@ -0,0 +1,24 @@
---
name: hive.batch-ledger
description: Track per-item status when processing collections to prevent skipped or duplicated items.
metadata:
author: hive
type: default-skill
---
## Operational Protocol: Batch Progress Ledger
When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
Initialize when you identify the batch:
- `_batch_total`: total item count
- `_batch_ledger`: JSON with per-item status
Per-item statuses: pending → in_progress → completed|failed|skipped
- Set `in_progress` BEFORE processing
- Set final status AFTER processing with 1-line result_summary
- Include error reason for failed/skipped items
- Update aggregate counts after each item
- NEVER remove items from the ledger
- If resuming, skip items already marked completed
@@ -1,6 +1,6 @@
---
name: hive.browser-automation
description: Required before any browser_* tool call. Teaches the screenshot + browser_click_coordinate workflow that reaches shadow-DOM inputs selectors can't see, the CSS-pixel coordinate rule (not physical px), rich-text editor quirks ("send button stays disabled" failures), and CSP gotchas. Covers Chrome via CDP through the GCU Beeline extension. Skipping this causes repeated failures on LinkedIn / Reddit / X. Verified against real production sites 2026-04-11.
description: Required before any browser_* tool call. Teaches the screenshot + browser_click_coordinate workflow that reaches shadow-DOM inputs selectors can't see, rich-text editor quirks ("send button stays disabled" failures), and CSP gotchas. Covers Chrome via CDP through the GCU Beeline extension. Skipping this causes repeated failures on LinkedIn / Reddit / X. Verified against real production sites 2026-04-11.
metadata:
author: hive
type: default-skill
@@ -12,40 +12,20 @@ metadata:
All GCU browser tools drive a real Chrome instance through the Beeline extension and Chrome DevTools Protocol (CDP). That means clicks, keystrokes, and screenshots are processed by the actual browser's native hit testing, focus, and layout engines — **not** a synthetic event layer. Understanding this unlocks strategies that make hard sites easy.
## Rule #0: screenshot + coordinates beats selectors
## Coordinates
When in doubt, the most reliable browser-automation primitive is **`browser_screenshot` → identify visually → `browser_click_coordinate``browser_type`** (with `use_insert_text=True` for rich-text editors).
This path works on every site regardless of:
- React class-name obfuscation (LinkedIn, X, most SPAs)
- Shadow-DOM boundaries (Reddit, LinkedIn `#interop-outlet`)
- Nested iframes (LinkedIn invitation-manager inline message, embedded composers)
- Trusted Types CSP (LinkedIn, GitHub)
- Lexical / Draft.js / contenteditable composers
If you catch yourself writing `document.querySelectorAll(...)` inside `browser_evaluate` and it returns `[]`, **stop immediately**. Do not try a different selector. Take a screenshot and use coordinates. This single rule would have prevented dozens of empty-selector probing loops in past sessions.
**`browser_evaluate` is an escape hatch, not a default.** See the "When to reach for `browser_evaluate`" section near the end — most browser automation should not need it.
## Coordinates: always CSS pixels
**Chrome DevTools Protocol `Input.dispatchMouseEvent` operates in CSS pixels, not physical pixels.**
When you call `browser_coords(image_x, image_y)` after a screenshot, the returned dict has both `css_x/y` and `physical_x/y`. **Always use `css_x/y` for clicks, hovers, and key presses.**
Screenshots are delivered at the CSS viewport's own dimensions. A pixel you see in the screenshot is the same coordinate `browser_click_coordinate` expects — no conversion, no scale factors.
```
browser_screenshot() → image (downscaled to 800/900 px wide)
browser_coords(img_x, img_y) → {css_x, css_y, physical_x, physical_y}
browser_click_coordinate(css_x, css_y) ← USE css_x/y
browser_hover_coordinate(css_x, css_y) ← USE css_x/y
browser_press_at(css_x, css_y, key) ← USE css_x/y
browser_screenshot() → image at CSS-viewport size (JPEG)
browser_click_coordinate(x, y) → same (x, y)
browser_hover_coordinate(x, y) → same (x, y)
browser_press_at(x, y, key) → same (x, y)
browser_get_rect(selector) → rect.css → pass rect.css.cx, rect.css.cy to any of the above
browser_shadow_query(...) → sq.css → same
```
Feeding `physical_x/y` on a HiDPI display overshoots by DPR× — on a DPR=1.6 laptop, clicks land 60% too far right and down. The ratio between `physicalScale` and `cssScale` tells you the effective DPR.
`getBoundingClientRect()` already returns CSS pixels — feed those values straight through to click/hover tools without any DPR multiplication.
**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or fall back to visually picking coordinates from a screenshot.
**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or visually pick coordinates from a screenshot.
## Screenshot + coordinates is shadow-agnostic — prefer it on shadow-heavy sites
@@ -61,14 +41,28 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select
### Recommended workflow on shadow-heavy sites
1. `browser_screenshot()` → visual image
2. Identify the target visually → image pixel `(x, y)` (eyeball from the screenshot)
3. `browser_coords(x, y)` → convert to CSS px
4. `browser_click_coordinate(css_x, css_y)` → lands on the element via native hit testing; inputs get focused
5. For typing:
- If the element was reachable via a selector → `browser_type(selector, text)`
- Otherwise → `browser_press(key)` per character (dispatches to focused element, no selector needed)
6. Verify by reading element state via a targeted `browser_evaluate` that walks the shadow tree
1. `browser_screenshot()` → visual image (delivered at the CSS-viewport's own dimensions).
2. Identify the target visually → pixel `(x, y)` read straight off the image.
3. `browser_click_coordinate(x, y)` → clicks there. **The response includes `focused_element: {tag, id, role, contenteditable, rect, ...}`** — use it to verify you actually focused what you intended.
4. `browser_type_focused(text="...")` → dispatches CDP `Input.insertText` to `document.activeElement`. Shadow roots, iframes, Lexical, Draft.js, ProseMirror all just work. Use `browser_type(selector, text)` only when you want to target a different element than the one you just focused.
5. Verify via `browser_screenshot` OR `browser_get_attribute` on a known-reachable marker (e.g. check that the Send button's `aria-disabled` flipped to `false`).
### The click→type loop (canonical pattern)
```
resp = browser_click_coordinate(x, y) # x, y read straight off the screenshot
fe = resp.get("focused_element")
if fe and (fe.get("contenteditable") or fe["tag"] in ("textarea", "input")):
browser_type_focused(text="...") # insertText to activeElement
else:
# you clicked something that isn't editable — refine the pixel and retry.
# Do NOT reach for browser_evaluate + execCommand('insertText', ...)
# or a walk(root) shadow traversal. The problem is your click, not
# the typing method.
...
```
`browser_click` (selector-based) also returns `focused_element`, so the same check works whether you clicked by selector or by coordinate.
### Empirically verified (2026-04-11)
@@ -154,7 +148,7 @@ The symptom is always the same: **you type, the characters appear visually, and
```
# 1. Focus the real element via a real click (not JS .focus()).
rect = browser_get_rect(selector) # or browser_shadow_query for shadow sites
browser_click_coordinate(rect.cx, rect.cy)
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
sleep(0.5) # let the editor open / focus settle
# 2. Type. browser_type now uses CDP Input.insertText by default, which is
@@ -183,7 +177,7 @@ if not state['disabled']:
else:
# Recovery: sometimes a click-again + one extra keystroke nudges
# React into recomputing hasRealContent.
browser_click_coordinate(rect.cx, rect.cy)
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
browser_press("End")
browser_press(" ")
browser_press("Backspace")
@@ -266,25 +260,15 @@ Recognized without modifiers: `Enter`, `Tab`, `Escape`, `Backspace`, `Delete`, `
## Screenshots
```
browser_screenshot() # viewport, 900 px wide by default
browser_screenshot() # viewport, CSS-sized JPEG
browser_screenshot(full_page=True) # full scrollable page
browser_screenshot(selector="#header") # clip to element's rect
```
Returns a PNG with automatic downscaling to a target width (default 900 px) plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `physicalScale`, `cssScale`, and a `scaleHint` string. The image is also annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
Returns a JPEG (quality 75, ~150250 KB for a typical UI) at the CSS viewport's own dimensions, plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `imageWidth` (= `cssWidth`), and a `scaleHint` confirming image-px == CSS-px. The image is annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
The highlight overlay stays visible on the page for **10 seconds** after each interaction, then fades. Before a screenshot is likely, make sure your click / hover / type happens <10 s before the screenshot.
### Anatomy of the scale fields
- `cssWidth` = `window.innerWidth` (CSS px)
- `devicePixelRatio` = `window.devicePixelRatio` (often 1.6, 2, or 3 on modern displays)
- `physicalScale = png_width / image_width` (how many physical-px per image-px)
- `cssScale = cssWidth / image_width` (how many CSS-px per image-px)
- Effective DPR = `physicalScale / cssScale` (should match `devicePixelRatio`)
When converting image coordinates for clicks, always use `cssScale`. The `physicalScale` field is there for debugging HiDPI displays, not for inputs.
## Scrolling
- Use large scroll amounts (~2000) when loading more content — sites like Twitter and LinkedIn have lazy loading for paging.
@@ -339,7 +323,7 @@ LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `brows
Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:
1. `browser_shadow_query("reddit-search-large >>> #search-input")` → rect
2. `browser_click_coordinate(rect.cx, rect.cy)` → click lands on the real shadow input via native hit testing; input becomes focused
2. `browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair` → click lands on the real shadow input via native hit testing; input becomes focused
3. `browser_press(c)` for each character → dispatches to focused element
4. Verify by reading `.value` via `browser_evaluate` walking the shadow path
@@ -409,7 +393,7 @@ Then pass the most specific selector that uniquely identifies the right input (e
- **Typing into a rich-text editor without clicking first → send button stays disabled.** Draft.js (X), Lexical (Gmail, LinkedIn DMs), ProseMirror (Reddit), and React-controlled `contenteditable` elements only register input as "real" when the element received a native focus event — JS-sourced `.focus()` is not enough. `browser_type` now does this automatically via a real CDP pointer click before inserting text, but always verify the submit button's `disabled` state before clicking send. See the "ALWAYS click before typing" section above.
- **Using per-character `keyDown` on Lexical / Draft.js editors → keys dispatch but text never appears.** Those editors intercept `beforeinput` and route insertion through their own state machine; raw keyDown events are silently dropped. `browser_type` now uses `Input.insertText` by default (the CDP IME-commit method) which these editors accept cleanly. Only set `use_insert_text=False` when you explicitly need per-keystroke dispatch.
- **Leaving a composer with text then trying to navigate → `beforeunload` dialog hangs the bridge.** LinkedIn and several other sites pop a native "unsent message" confirm. `browser_navigate` and `close_tab` both time out against this. Always strip `window.onbeforeunload = null` via `browser_evaluate` before any navigation after typing in a composer, or wrap your logic in a `try/finally` that runs the cleanup block.
- **Clicking at physical pixels.** CDP uses CSS px. `browser_coords` returns both for debugging, but always feed `css_x/y` to click tools.
- **Click landed in the wrong region (sidebar / header instead of target).** The `focused_element` in the click response shows what actually got focused (e.g. `className: "msg-conversation-listitem__link"` means you hit the messaging sidebar). Treat it as ground truth — if it isn't the target, adjust the pixel and retry. Screenshot pixels equal CSS pixels, so the number you passed is the number CDP clicked; a wrong result means you picked the wrong pixel, not that any conversion went sideways.
- **Calling `wait_for_selector` on a shadow element.** It'll always time out. Use `browser_shadow_query` or the screenshot + coordinate strategy.
- **Relying on `innerHTML` in injected scripts on LinkedIn.** Silently discarded. Use `createElement` + `appendChild`.
- **Not waiting for SPA hydration.** `wait_until="load"` fires before React/Vue rendering on many sites. Add a 23 s sleep before querying for chrome elements.
@@ -425,35 +409,17 @@ If Chrome detaches the debugger for its own reasons (tab closed, user opened Dev
If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.
## `browser_evaluate` is a last-resort escape hatch
## When to reach for `browser_evaluate`
**Before using `browser_evaluate`, try these first — in this order:**
Use it when:
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
- You need to walk an AX tree or measure layout that the standard tools don't expose
1. **`browser_screenshot` + `browser_click_coordinate`** — works on every site regardless of shadow DOM, iframes, obfuscated classes. This is the default path for "click a thing you can see."
2. **`browser_type(use_insert_text=True, text=...)`** — for typing into ANY input/contenteditable, including Lexical and Draft.js. Handles click-focus-insert with built-in retries. Do **not** call `document.execCommand('insertText')` via evaluate; this tool already does it correctly.
3. **`browser_shadow_query`** or **`browser_get_rect(selector)`** with the `>>>` shadow-piercing syntax — for selector-based lookups across shadow roots.
4. **`browser_get_text` / `browser_get_attribute`** — for reading element state by selector.
5. **`browser_snapshot`** — for dumping the accessibility tree of the page.
If all five of those fit your goal, **do not use `browser_evaluate`.** Each evaluate call is a small LLM round-trip of ~30-100 tokens of JS plus a JSON response; five of them burn more context than a single screenshot-and-coordinate does, with less reliability.
### Anti-patterns — stop immediately if you catch yourself doing these
- **Trying multiple `querySelectorAll` variants when the first returned `[]`.** Different selectors on the same page rarely work if the first guess failed — modern SPAs obfuscate class names at build time. After one empty result, switch to `browser_screenshot` + `browser_click_coordinate`. Do not write `.artdeco-list__item`, then `[data-test-incoming-invitation-card]`, then `[class*="invitation"]` — you are already on the wrong path.
- **Writing `walk(root)` recursive shadow-DOM traversal functions.** Use `browser_shadow_query` — it traverses at the CDP level (native C++), not by re-running a recursive JS function every call.
- **Calling `document.execCommand('insertText', ...)` to type into a contenteditable.** Use `browser_type(use_insert_text=True, text='...')`. The high-level tool handles the exact same Lexical/Draft.js case but with click-focus-retry logic built in.
- **Accessing `iframe.contentDocument`.** Rarely works (cross-origin, late hydration) and when it does, the code is brittle. Use `browser_screenshot` to see the iframe, then `browser_click_coordinate` to interact.
- **Using `innerHTML = "<...>"` on a Trusted Types site (LinkedIn, GitHub).** The assignment is silently dropped. Use `createElement` + `appendChild` if you must inject DOM — but first, ask whether you really need to.
- **Triggering React/Vue state via synthetic `dispatchEvent`.** Frameworks watch for real browser events. Use `browser_click_coordinate`, `browser_press`, or `browser_type` — all go through CDP's native event pipeline.
### Legitimate uses (when nothing semantic fits)
- Reading a computed style, `window.innerWidth/Height`, `document.scrollingElement.scrollTop`, or other layout values the tools don't expose.
- Firing a one-shot site-specific API call (analytics beacon, feature-flag toggle).
- Stripping `onbeforeunload` before navigating away from a page with an unsent draft (LinkedIn, Gmail).
- Detecting whether a specific shadow-root host exists before a follow-up screenshot.
In all of these cases the script is SHORT (< 10 lines) and the result is CONSUMED (read, then acted on), not further probed.
Avoid it when:
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
## Login & auth walls
@@ -479,7 +445,7 @@ browser_navigate("https://x.com/explore", wait_until="load")
sleep(3)
browser_wait_for_selector("input[data-testid='SearchBox_Search_Input']", timeout_ms=5000)
rect = browser_get_rect("input[data-testid='SearchBox_Search_Input']")
browser_click_coordinate(rect.cx, rect.cy)
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
browser_type("input[data-testid='SearchBox_Search_Input']", "openai", clear_first=True)
# Screenshot now shows live search suggestions
browser_screenshot()
@@ -493,7 +459,7 @@ browser_navigate("https://www.reddit.com/r/programming/", wait_until="load")
sleep(2)
# Shadow-pierce the nested search input
sq = browser_shadow_query("reddit-search-large >>> #search-input")
browser_click_coordinate(sq.rect.cx, sq.rect.cy)
browser_click_coordinate(sq.css.cx, sq.css.cy) # sq.css.cx/cy — matched pair
# Typing can't use selector (shadow); focused input receives raw key presses
for c in "python":
browser_press(c)
@@ -508,7 +474,7 @@ browser_navigate("https://www.linkedin.com/feed/", wait_until="load", timeout_ms
sleep(3)
browser_wait_for_selector("input[data-testid='typeahead-input']", timeout_ms=5000)
rect = browser_get_rect("input[data-testid='typeahead-input']")
browser_click_coordinate(rect.cx, rect.cy)
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
browser_type("input[data-testid='typeahead-input']", "anthropic", clear_first=True)
# Dropdown shows real live suggestions
browser_screenshot()
@@ -1,111 +0,0 @@
---
name: hive.colony-progress-tracker
description: Claim tasks, record step progress, and verify SOP gates in the colony SQLite queue. Applies when your spawn message includes a db_path field.
metadata:
author: hive
type: default-skill
---
## Operational Protocol: Colony Progress Tracker
**Applies when** your spawn message has `db_path:` and `colony_id:` fields. The DB is your durable working memory — tells you what's done, what to skip, which SOP gates you owe.
Access via `execute_command_tool` running `sqlite3 "<db_path>" "..."`. Tables: `tasks` (queue), `steps` (per-task decomposition), `sop_checklist` (hard gates).
### Claim: assigned task (check this FIRST)
If your spawn message includes a `task_id:` field, the queen pre-assigned a specific row to you. Claim that row by id — **do not** use the generic next-pending pattern below:
```bash
sqlite3 "<db_path>" <<'SQL'
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
claim_token=lower(hex(randomblob(8))),
claimed_at=datetime('now'), updated_at=datetime('now')
WHERE id='<task_id>' AND status='pending'
RETURNING id, goal, payload;
SQL
```
Empty output → another worker raced you or the row is already done. Stop and report. Non-empty → that row is yours, proceed to "Load the plan".
### Claim: next pending (fallback when no task_id is assigned)
If your spawn message did NOT include `task_id:` — you are a generic fan-out worker racing on a shared queue. Use the generic next-pending claim:
```bash
sqlite3 "<db_path>" <<'SQL'
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
claim_token=lower(hex(randomblob(8))),
claimed_at=datetime('now'), updated_at=datetime('now')
WHERE id=(SELECT id FROM tasks WHERE status='pending'
ORDER BY priority DESC, seq, created_at LIMIT 1)
RETURNING id, goal, payload;
SQL
```
Empty output → queue drained, exit. Otherwise the returned `id` is yours. **Never SELECT-then-UPDATE** — races.
### Load the plan
```bash
sqlite3 "<db_path>" "SELECT seq, id, title, status FROM steps WHERE task_id='<task-id>' ORDER BY seq;"
sqlite3 "<db_path>" "SELECT key, description, required, done_at FROM sop_checklist WHERE task_id='<task-id>';"
```
**Skip any step where status='done'.** That's the point — don't redo completed work.
### Execute a step
Before tool calls:
```bash
sqlite3 "<db_path>" "UPDATE steps SET status='in_progress', worker_id='<worker-id>', started_at=datetime('now') WHERE id='<step-id>';"
```
After success (one-line evidence: path, URL, key result):
```bash
sqlite3 "<db_path>" "UPDATE steps SET status='done', evidence='<what you did>', completed_at=datetime('now') WHERE id='<step-id>';"
```
### MANDATORY: SOP gate check before marking task done
```bash
sqlite3 "<db_path>" "SELECT key, description FROM sop_checklist WHERE task_id='<task-id>' AND required=1 AND done_at IS NULL;"
```
- Empty → proceed to "Mark task done".
- Non-empty → each row is work you still owe. Do it, then check it off:
```bash
sqlite3 "<db_path>" "UPDATE sop_checklist SET done_at=datetime('now'), done_by='<worker-id>', note='<why>' WHERE task_id='<task-id>' AND key='<key>';"
```
**Never mark a task done while this SELECT returns rows.** This gate exists specifically to stop you from declaring success while skipping required steps.
### Mark task done / failed
```bash
# Success:
sqlite3 "<db_path>" "UPDATE tasks SET status='done', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
# Unrecoverable failure:
sqlite3 "<db_path>" "UPDATE tasks SET status='failed', last_error='<one sentence>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
```
The `AND worker_id=?` guard means a reclaimed row won't accept your write — treat zero rows affected as "your claim was revoked, stop."
### Loop
After done/failed → claim the next task. Exit only when claim returns empty.
### Errors + debug
- **"database is locked"**: retry with 100ms → 1s backoff, max 5 attempts. `busy_timeout=5000` handles most contention silently.
- **Queue health**: `SELECT status, count(*) FROM tasks GROUP BY status;`
- **Your in-flight work**: `SELECT id, goal, status FROM tasks WHERE worker_id='<worker-id>';`
### Anti-patterns (will break the queue)
- Don't DDL (CREATE/ALTER/DROP).
- Don't DELETE — failed tasks stay as `failed` for audit.
- Don't skip Protocol 4 (SOP gate) before marking done.
- Don't hold a task >15min without updates — the stale-claim reclaimer revokes your claim.
- Don't invent task IDs. Workers update existing rows; only the queen enqueues new ones.
@@ -1,6 +1,6 @@
---
name: hive.context-preservation
description: Proactively extract critical values from tool results into working notes before automatic context pruning destroys them.
description: Proactively preserve critical information before automatic context pruning destroys it.
metadata:
author: hive
type: default-skill
@@ -8,16 +8,17 @@ metadata:
## Operational Protocol: Context Preservation
You operate under a finite context window. Older tool results WILL be pruned. Extract what you need while it's still in context.
You operate under a finite context window. Important information WILL be pruned.
**Save-as-you-go.** After any tool call producing information you'll need later, immediately extract the key data into `_working_notes` or `_preserved_data`. Do not rely on referring back to old tool results — once they're pruned they're gone.
Save-As-You-Go: After any tool call producing information you'll need later,
immediately extract key data into `_working_notes` or `_preserved_data`.
Do NOT rely on referring back to old tool results.
**What to extract:**
- URLs and key snippets (not full pages)
- Relevant API fields (not raw JSON blobs)
- Specific lines, values, or IDs (not entire files)
- Analysis conclusions (not raw data)
What to extract: URLs and key snippets (not full pages), relevant API fields
(not raw JSON), specific lines/values (not entire files), analysis results
(not raw data).
**Handoffs between tasks** happen through `progress.db`, not through shared-buffer handoff blobs. When you finish a task, any state the next worker needs goes into the task row itself (`steps.evidence`, `tasks.last_error`, `sop_checklist.note`) — see `hive.colony-progress-tracker`. Use `_working_notes` for things the DB schema doesn't cover.
Before transitioning to the next phase/node, write a handoff summary to
`_handoff_context` with everything the next phase needs to know.
You will receive an alert when context reaches {{warn_at_usage_ratio_pct}}% — preserve immediately.
@@ -1,6 +1,6 @@
---
name: hive.error-recovery
description: Follow a structured recovery decision tree when tool calls fail instead of blindly retrying or giving up.
description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
metadata:
author: hive
type: default-skill
@@ -10,20 +10,9 @@ metadata:
When a tool call fails:
1. **Diagnose** — classify the failure as *transient* (network blip, rate limit, timeout) or *structural* (wrong selector, missing auth, invalid schema, permission denied).
2. **Decide:**
- Transient → retry once.
- Structural + fixable → fix the input and retry.
- Structural + unfixable → record the failure and move to the next item.
- Blocking all progress → escalate.
3. **Adapt** — if the same tool has failed {{max_retries_per_tool}}+ times in a row, stop using it and find an alternative approach.
**Never silently drop a failed item.** If the item is a task in the colony queue, write the failure to the DB instead of an in-memory buffer:
```bash
sqlite3 "$DB_PATH" "UPDATE tasks SET status='failed', last_error='<one-sentence reason>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<your-worker-id>';"
```
The `tasks.retry_count` column and the stale-claim reclaimer handle auto-retry for crashes; your job is the within-run decision tree above. See `hive.colony-progress-tracker` for the full queue protocol.
1. Diagnose — record error in notes, classify as transient or structural
2. Decide — transient: retry once. Structural fixable: fix and retry.
Structural unfixable: record as failed, move to next item.
Blocking all progress: record escalation note.
3. Adapt — if same tool failed {{max_retries_per_tool}}+ times, stop using it and find alternative.
Update plan in notes. Never silently drop the failed item.
@@ -15,28 +15,6 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow
**Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`'s click-first behavior, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.
## Rule #0: screenshot + coordinates, not selectors
LinkedIn changes class names aggressively and hides composers inside shadow roots AND iframes. **Selectors break constantly.** Your default strategy on every LinkedIn page should be:
1. `browser_screenshot()` — see the page visually
2. Pick the target's position from the image
3. `browser_coords(image_x, image_y)` → get CSS pixels
4. `browser_click_coordinate(css_x, css_y)` — reaches shadow DOM, iframes, and React elements indifferently
5. `browser_type(use_insert_text=True, text=...)` — types into whatever is focused, including Lexical composers
**If `browser_evaluate(...querySelectorAll...)` returns `[]` even once, do not try a different selector.** Stop, screenshot, and click. The "what if I try `.artdeco-list__item` next" instinct has burned ~50 tool calls in real sessions before the agent pivoted. Don't fall into that loop.
The selectors in the table below are **only** for when you already know the target is in the light DOM and you want a faster path than screenshot+coord. **When in doubt, default to coordinates.**
## Invitation manager — inline message button path is BROKEN
If the user asks to message a connection request **from the invitation manager page without accepting first**, the inline "Message" button opens a composer inside a nested **iframe overlay** (not a shadow root). The iframe's `contentDocument` is either cross-origin-blocked or not hydrated at access time. This path is **not reliably automatable today.**
**Redirect:** click the person's name/profile link on the card, go to the profile page, and use the standard Profile Message flow below. The profile flow is battle-tested; the inline-iframe flow isn't.
If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `browser_evaluate`, you've hit this trap. Stop and go to the profile page.
## Timing expectations
- `browser_navigate(wait_until="load", timeout_ms=20000)` — LinkedIn takes **45 seconds** to load the feed cold. Default 30s timeout is fine; use 20s as a floor.
@@ -56,7 +34,7 @@ If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `br
| Pending connection card | `.invitation-card, .invitations-card, [data-test-incoming-invitation-card]` | Filter out "invited you to follow" / "subscribe" cards |
| Accept button | `button[aria-label*="Accept"]` within the card scope | Per-card scoping is critical — there are many Accept buttons on the page |
LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_coords``browser_click_coordinate`**. The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_click_coordinate`** with the pixel you read straight off the image (screenshots are CSS-sized, so no conversion). The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
## Profile Message flow (verified end-to-end 2026-04-11)
@@ -120,19 +98,17 @@ textarea = browser_evaluate("""
browser_click_coordinate(textarea['cx'], textarea['cy'])
sleep(0.6)
# 6. Insert text via document.execCommand('insertText') through browser_evaluate.
# This is the ONLY reliable approach for LinkedIn's Lexical composer.
# See the "Lexical composer quirks" section below for why browser_type
# with a selector does NOT work here (the contenteditable lives inside
# the #interop-outlet shadow root which document.querySelector can't
# reach). The click in step 5 already put Lexical into edit mode, so
# execCommand injects straight into the focused editor's state.
browser_evaluate("""
(function(){
document.execCommand('insertText', false, %s);
return true;
})();
""" % json.dumps(message_text)) # json.dumps gives you a safely-escaped JS string literal
# 6. Insert text via browser_type WITHOUT a selector. This dispatches
# CDP Input.insertText to document.activeElement — the same underlying
# mechanism as execCommand('insertText') but with no JSON escaping,
# no browser_evaluate round trip, and built-in retry. The click in
# step 5 already focused Lexical, so insertText lands in the editor
# regardless of the shadow wrapping around #interop-outlet.
#
# Do NOT pass a selector here. Selector-based browser_type cannot see
# past the #interop-outlet shadow root. No-selector mode sidesteps
# that entirely by routing to activeElement.
browser_type_focused(text=message_text) # targets document.activeElement
sleep(1.0) # let Lexical commit state + enable Send button
# 7. Find the modal Send button (filter by in-viewport, reject pinned bar)
@@ -165,20 +141,21 @@ send = browser_evaluate("""
})();
""")
# 8. ONLY click Send if it's enabled — if disabled, the execCommand
# 8. ONLY click Send if it's enabled — if disabled, the insertText
# didn't land. DO NOT retry with a different tool; the fix is
# always: re-click the composer rect, re-run execCommand, re-check.
# The Send button's `disabled` state IS the ground truth — if
# Lexical registered your text, it enables the button. If it's
# always: re-click the composer rect, re-run browser_type_focused(text=...),
# re-check. The Send button's `disabled` state IS the ground truth —
# if Lexical registered your text, it enables the button. If it's
# still disabled, your text did not reach the editor, regardless
# of what any tool call claims.
if send['disabled']:
# The editor didn't receive your text. Do NOT click Send. Do NOT
# fall back to browser_type with a dummy selector (see anti-pattern
# in Common Pitfalls). Instead: re-click the textarea rect from
# step 4, wait a beat, re-run the execCommand insertText from step
# 6. If that still fails after 2 retries, bail and surface — the
# modal may have been reclaimed by a stale state or auth wall.
# fall back to browser_type with a selector (see anti-pattern in
# Common Pitfalls — selector-based type can't reach the shadow-DOM
# composer). Instead: re-click the textarea rect from step 4, wait
# a beat, re-run browser_type_focused(text=message_text) from
# step 6. If that still fails after 2 retries, bail and surface —
# the modal may have been reclaimed by a stale state or auth wall.
raise Exception("Send button disabled after insertText — editor did not receive input")
browser_click_coordinate(send['cx'], send['cy'])
@@ -346,9 +323,9 @@ If any of those show up, **stop the run, screenshot the state, and surface the i
## Common pitfalls
- **`innerHTML` injection is silently dropped** — LinkedIn's Trusted Types CSP discards any `innerHTML = "<...>"` from injected scripts, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection. `textContent`, `style.cssText`, and `.value` assignments are fine.
- **Do NOT use `browser_type` on the message composer — use `document.execCommand('insertText', false, text)` via `browser_evaluate` instead.** The Lexical contenteditable lives inside the `#interop-outlet` shadow root which `document.querySelector` (what `browser_type` uses under the hood) cannot see. Attempts to work around this with `browser_shadow_query` fail because `browser_type` doesn't support the `>>>` shadow-pierce syntax. The ONLY reliable insert path is: (1) `browser_click_coordinate` on the composer rect (put Lexical in edit mode via a real CDP pointer click) → (2) `browser_evaluate` with `document.execCommand('insertText', false, <message>)` against the focused editor. This pattern is verified end-to-end across 15+ successful sends in session `session_20260414_113244_a98cfd66` (2026-04-14).
- **Per-char keyDown on the message composer produces empty text** — Lexical intercepts `beforeinput` and drops raw keys. Ignore `browser_type` entirely for LinkedIn DMs; use the `execCommand('insertText')` path above.
- **ANTI-PATTERN: "inject a dummy `<div id='dummy-target'>` and pass it as the `selector` arg to `browser_type`".** This looks tempting but fails compoundingly: `browser_type` clicks the **dummy div's** rect (not the editor's), the click lands on the Lexical wrapper's non-editable chrome, the contenteditable never receives focus, and `Input.insertText` fires against nothing. The bridge will still return `{"ok": true, "action": "type", "length": N}` because it has no way to verify the text actually landed. Symptom: Send button stays `disabled: true` forever. Fix: use `execCommand('insertText')` exactly as shown in the profile-message flow above. (See `session_20260414_114820_08bd3c4d` for the failed attempt.)
- **Do NOT use selector-based `browser_type` on the message composer — use `browser_type_focused(text=...)`.** The Lexical contenteditable lives inside the `#interop-outlet` iframe/shadow wrapper which `document.querySelector` cannot see. `browser_shadow_query` can find it but selector-based `browser_type` doesn't support the `>>>` shadow-pierce syntax. The reliable insert path is: (1) `browser_click_coordinate` on the composer rect — the response's `focused_element` (which recurses into same-origin iframes) confirms what actually received focus → (2) `browser_type_focused(text=message_text)` — CDP `Input.insertText` dispatches to `document.activeElement` regardless of shadow wrapping.
- **Per-char keyDown on the message composer produces empty text** — Lexical intercepts `beforeinput` and drops raw keys. Use `browser_type_focused(text=..., use_insert_text=True)` after click-coordinate focused the composer. The CDP `Input.insertText` method commits as if IME fired, which Lexical accepts cleanly.
- **ANTI-PATTERN: "inject a dummy `<div id='dummy-target'>` and pass it as the `selector` arg to `browser_type`".** This fails compoundingly: `browser_type` clicks the **dummy div's** rect (not the editor's), the click lands on the Lexical wrapper's non-editable chrome, the contenteditable never receives focus, and `Input.insertText` fires against nothing. The bridge will still return `{"ok": true, "action": "type", "length": N}` because it has no way to verify the text actually landed. Symptom: Send button stays `disabled: true` forever. Fix: `browser_click_coordinate` on the real composer rect, then `browser_type_focused(text=message_text)` — CDP `Input.insertText` dispatches to `document.activeElement`.
- **Multiple Send buttons on the page** — the pinned bottom-right messaging bar has its own `msg-form__send-button` that's usually below `innerHeight`. Filter by in-viewport before clicking.
- **`window.onbeforeunload` hangs navigation/close** — after typing in a composer, any `browser_navigate` or `close_tab` can pop a native "unsent message, leave?" confirm dialog that deadlocks the bridge. Always strip `onbeforeunload` before any navigation, and wrap composer flows in a `try/finally` that runs the cleanup block:
@@ -1,6 +1,6 @@
---
name: hive.note-taking
description: Maintain a free-form scratchpad of decisions, extracted values, and open questions so context pruning doesn't lose anything you still need.
description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
metadata:
author: hive
type: default-skill
@@ -8,21 +8,20 @@ metadata:
## Operational Protocol: Structured Note-Taking
Maintain free-form working notes in shared buffer key `_working_notes` for data that *you* need to remember but that isn't captured by the colony task queue.
**Do not duplicate the queue in here.** Per-task goal, ordered steps, and SOP gates live in `progress.db` — use `hive.colony-progress-tracker` for those. These notes are for things the DB schema doesn't cover.
Maintain structured working notes in shared buffer key `_working_notes`.
Update at these checkpoints:
- After receiving new information that changes how you plan to approach the current step
- Before any tool call that will produce substantial output you'll need to reference later
- When you make a non-obvious decision whose *why* would be lost if the tool call history gets pruned
- After completing each discrete subtask or batch item
- After receiving new information that changes your plan
- Before any tool call that will produce substantial output
Structure:
### Objective — restate the goal
### Current Plan — numbered steps, mark completed with ✓
### Key Decisions — decisions made and WHY
### Working Data — intermediate results, extracted values (URLs, IDs, key snippets — not full pages)
### Open Questions — uncertainties you plan to verify
### Blockers — anything preventing progress that isn't already captured in `tasks.last_error`
### Working Data — intermediate results, extracted values
### Open Questions — uncertainties to verify
### Blockers — anything preventing progress
Update incrementally — do not rewrite from scratch each time.
@@ -0,0 +1,17 @@
---
name: hive.task-decomposition
description: Decompose complex tasks into explicit subtasks before diving in.
metadata:
author: hive
type: default-skill
---
## Operational Protocol: Task Decomposition
Before starting a complex task:
1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
2. Estimate — relative effort per subtask (small/medium/large)
3. Execute — work through in order, mark ✓ when complete
4. Budget — if running low on iterations, prioritize by impact
5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
+2 -2
View File
@@ -36,8 +36,8 @@ class SkillsConfig:
# Default skill configuration
default_skills = {
"hive.note-taking": {"enabled": True},
"hive.quality-monitor": {"enabled": False, "assessment_interval": 10},
"hive.error-recovery": {"max_retries_per_tool": 5},
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
"hive.quality-monitor": {"enabled": False},
}
"""
+49 -32
View File
@@ -24,21 +24,34 @@ _SKILL_DEFAULTS: dict[str, dict[str, Any]] = {
"hive.quality-monitor": {"assessment_interval": 5},
"hive.error-recovery": {"max_retries_per_tool": 3},
"hive.context-preservation": {"warn_at_usage_ratio_pct": 45},
"hive.batch-ledger": {"checkpoint_every_n": 5},
}
# Keywords that indicate a batch processing scenario (DS-12)
_BATCH_KEYWORDS: tuple[str, ...] = (
"list of",
"collection of",
"set of",
"batch of",
"each item",
"for each",
"process all",
"records",
"entries",
"rows",
"items",
)
_BATCH_INIT_NUDGE = (
"Note: your input appears to describe a batch operation. "
"Initialize `_batch_ledger` with the total item count before processing."
)
def is_batch_scenario(text: str) -> bool:
"""Deprecated: batch auto-detection is no longer used.
Kept as a no-op so the agent_loop call site (which wraps it in an
``if ctx.default_skill_batch_nudge:`` guard that's also now always
empty) can stay unchanged until a broader cleanup. The old
``_batch_ledger`` shared-buffer feature was replaced by the
per-colony SQLite task queue (``hive.colony-progress-tracker``),
which lives in ``progress.db`` and is authoritative for batch
state across workers and runs.
"""
return False
"""Return True if *text* contains batch-processing indicators (DS-12)."""
lower = text.lower()
return any(kw in lower for kw in _BATCH_KEYWORDS)
def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> str:
@@ -54,37 +67,40 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
return body
# Ordered list of default skills (name → directory).
#
# Removed on 2026-04-15 as part of the colony-progress-tracker rollout:
# - hive.task-decomposition — steps table in progress.db supersedes
# in-memory ``_working_notes → Current Plan`` decomposition.
# - hive.batch-ledger — tasks table in progress.db supersedes
# the ``_batch_ledger`` dict-shaped queue with its pending →
# in_progress → completed/failed/skipped state machine.
# Both were duplicating state that belongs in SQLite.
# Ordered list of default skills (name → directory)
SKILL_REGISTRY: dict[str, str] = {
"hive.note-taking": "note-taking",
"hive.batch-ledger": "batch-ledger",
"hive.context-preservation": "context-preservation",
"hive.quality-monitor": "quality-monitor",
"hive.error-recovery": "error-recovery",
"hive.colony-progress-tracker": "colony-progress-tracker",
"hive.task-decomposition": "task-decomposition",
"hive.writing-hive-skills": "writing-hive-skills",
}
# Shared buffer keys referenced by the remaining default skills (used
# for permission auto-inclusion). The dead keys for batch-ledger,
# task-decomposition, the handoff buffer, and the error-log buffers
# were removed when those features migrated to progress.db.
# All shared buffer keys used by default skills (for permission auto-inclusion)
DATA_BUFFER_KEYS: list[str] = [
# note-taking
"_working_notes",
"_notes_updated_at",
# batch-ledger
"_batch_ledger",
"_batch_total",
"_batch_completed",
"_batch_failed",
# context-preservation
"_handoff_context",
"_preserved_data",
# quality-monitor
"_quality_log",
"_quality_degradation_count",
# error-recovery
"_error_log",
"_failed_tools",
"_escalation_needed",
# task-decomposition
"_subtasks",
"_iteration_budget_remaining",
]
@@ -236,15 +252,16 @@ class DefaultSkillManager:
@property
def batch_init_nudge(self) -> str | None:
"""Deprecated: always returns None.
"""Nudge text to prepend to system prompt when batch input detected (DS-12).
The ``hive.batch-ledger`` default skill was removed when batch
tracking moved into ``progress.db`` (``hive.colony-progress-
tracker``). Callers in agent_host, colony_runtime, and
orchestrator still read this property; returning None keeps
them functional with no system-prompt nudge.
Returns None if ``hive.batch-ledger`` is disabled or auto_detect_batch is False.
"""
return None
if "hive.batch-ledger" not in self._skills:
return None
overrides = self._config.get_default_overrides("hive.batch-ledger")
if overrides.get("auto_detect_batch") is False:
return None
return _BATCH_INIT_NUDGE
@property
def context_warn_ratio(self) -> float | None:
+58 -541
View File
@@ -903,76 +903,10 @@ def register_queen_lifecycle_tools(
# ``start_worker`` was removed in the Phase 4 unification — its
# bare-bones spawn duplicated ``run_agent_with_input`` (which has
# credential preflight, concurrency guard, and phase tracking on
# top). The shared preflight timeout below is used by both
# ``run_agent_with_input`` and ``run_parallel_workers``.
# top). The shared preflight timeout below is still used by
# ``run_agent_with_input``.
_START_PREFLIGHT_TIMEOUT = 15 # seconds
async def _preflight_credentials(
legacy: Any,
*,
tool_label: str,
) -> set[str]:
"""Compute tools whose credentials are missing and resync MCP servers.
Shared between ``run_agent_with_input`` (single spawn) and
``run_parallel_workers`` (batch spawn). Returns the set of
tool names whose credentials failed validation; the caller
filters these out of the spawn's tool lists.
Exceptions (including validator bugs) are logged and treated
as "no tools dropped" so a broken validator can't block a
spawn. Wall-clock bound at ``_START_PREFLIGHT_TIMEOUT``
slow credential HTTP health checks can't stall the LLM turn.
"""
unavailable: set[str] = set()
async def _run() -> None:
nonlocal unavailable
try:
from framework.credentials.validation import compute_unavailable_tools
loop = asyncio.get_running_loop()
drop, messages = await loop.run_in_executor(
None,
lambda: compute_unavailable_tools(legacy.graph.nodes),
)
unavailable = drop
if drop:
logger.warning(
"%s: dropping %d tool(s) with unavailable credentials: %s",
tool_label,
len(drop),
"; ".join(messages),
)
except Exception as exc:
logger.warning(
"%s: compute_unavailable_tools raised, proceeding without "
"credential-based tool filtering: %s",
tool_label,
exc,
)
runner = getattr(session, "runner", None)
if runner is not None:
try:
loop = asyncio.get_running_loop()
await loop.run_in_executor(
None,
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
)
except Exception as exc:
logger.warning("%s: MCP resync failed: %s", tool_label, exc)
try:
await asyncio.wait_for(_run(), timeout=_START_PREFLIGHT_TIMEOUT)
except TimeoutError:
logger.warning(
"%s: credential preflight timed out after %ds — proceeding",
tool_label,
_START_PREFLIGHT_TIMEOUT,
)
return unavailable
# --- stop_worker -----------------------------------------------------------
async def stop_worker(*, reason: str = "Stopped by queen") -> str:
@@ -1144,105 +1078,6 @@ def register_queen_lifecycle_tools(
}
)
# Credential preflight — mirrors the one run_agent_with_input
# performs. Without this, missing credentials (e.g. stale
# GITHUB_TOKEN) fail once PER spawned worker, yielding N
# duplicate error reports for a single fixable issue. Catch
# once upfront, build a filtered tool list, and pass it to
# every spawn via tools_override.
legacy_for_preflight = _get_runtime()
unavailable_tools_parallel: set[str] = set()
tools_override_parallel: list[Any] | None = None
if legacy_for_preflight is not None:
try:
unavailable_tools_parallel = await _preflight_credentials(
legacy_for_preflight, tool_label="run_parallel_workers"
)
except CredentialError as e:
# Structured credential failure: publish the
# CREDENTIALS_REQUIRED event so the frontend's modal
# can fire, and return the same shape the single-path
# tool returns on the same failure.
error_payload = credential_errors_to_json(e)
error_payload["agent_path"] = str(getattr(session, "worker_path", "") or "")
bus = getattr(session, "event_bus", None)
if bus is not None:
await bus.publish(
AgentEvent(
type=EventType.CREDENTIALS_REQUIRED,
stream_id="queen",
data=error_payload,
)
)
return json.dumps(error_payload)
if unavailable_tools_parallel:
colony_tools = list(getattr(colony, "_tools", []) or [])
before = len(colony_tools)
tools_override_parallel = [
t
for t in colony_tools
if getattr(t, "name", None) not in unavailable_tools_parallel
]
logger.info(
"run_parallel_workers: dropped %d tool object(s) from spawn_tools (unavailable credentials)",
before - len(tools_override_parallel),
)
# Colony progress tracker wiring: if the session's loaded
# worker points at a colony directory that has a progress.db,
# inject db_path + colony_id into every per-task ``data``
# dict so each spawned worker sees them in its first user
# message and can claim rows from the queue. ColonyRuntime.
# spawn() detects db_path in input_data and pre-activates
# hive.colony-progress-tracker into the catalog prompt.
_colony_db_path: str | None = None
_colony_id: str | None = None
_worker_path = getattr(session, "worker_path", None)
if _worker_path:
from pathlib import Path as _Path
_wp = _Path(_worker_path)
_pdb = _wp / "data" / "progress.db"
if _pdb.exists():
_colony_db_path = str(_pdb.resolve())
_colony_id = _wp.name
# Phase 2: enqueue each task into progress.db BEFORE building
# spawn specs so every parallel worker has a pre-assigned row
# to claim. Without this the queue stays empty and each
# worker's claim UPDATE affects zero rows, silently falling
# back to executing from its spawn message.
_enqueued_task_ids: list[str | None] = [None] * len(tasks)
if _colony_db_path:
from pathlib import Path as _PathP
from framework.host.progress_db import (
enqueue_task as _enqueue_task_fn,
)
_pdb_path_obj = _PathP(_colony_db_path)
for _i, _spec in enumerate(tasks):
if not isinstance(_spec, dict):
continue
_task_text_pre = str(_spec.get("task", "")).strip()
if not _task_text_pre:
continue
try:
_enqueued_task_ids[_i] = await asyncio.to_thread(
_enqueue_task_fn,
_pdb_path_obj,
_task_text_pre,
source="run_parallel_workers",
)
except Exception as _enqueue_exc:
logger.warning(
"run_parallel_workers: failed to enqueue tasks[%d] "
"(spawn proceeding without pinned task_id): %s",
_i,
_enqueue_exc,
)
# Normalise: each entry must have a non-empty "task" string.
normalised: list[dict] = []
for i, spec in enumerate(tasks):
@@ -1251,58 +1086,18 @@ def register_queen_lifecycle_tools(
task_text = str(spec.get("task", "")).strip()
if not task_text:
return json.dumps({"error": f"tasks[{i}].task is empty"})
spec_data = spec.get("data") if isinstance(spec.get("data"), dict) else {}
if _colony_db_path:
spec_data = {
**spec_data,
"db_path": _colony_db_path,
"colony_id": _colony_id,
}
if _enqueued_task_ids[i]:
spec_data["task_id"] = _enqueued_task_ids[i]
normalised.append(
{
"task": task_text,
"data": spec_data or None,
"data": spec.get("data") if isinstance(spec.get("data"), dict) else None,
}
)
if _colony_db_path:
_pinned = sum(1 for tid in _enqueued_task_ids if tid)
logger.info(
"run_parallel_workers: attached progress_db context to "
"%d spawn(s) (colony_id=%s, %d pinned task_ids)",
len(normalised),
_colony_id,
_pinned,
)
try:
worker_ids = await colony.spawn_batch(
normalised,
tools_override=tools_override_parallel,
)
worker_ids = await colony.spawn_batch(normalised)
except Exception as e:
return json.dumps({"error": f"spawn_batch failed: {e}"})
# Phase transition — mirrors run_agent_with_input. With the
# batch now spawned, the queen is semantically "running" until
# wait_for_worker_reports returns, so phase-gated running
# tools (inject_message, reply_to_worker, ...) should be
# available. Without this change run_parallel_workers left
# the queen in whatever phase she was in (typically staging).
if phase_state is not None:
try:
await phase_state.switch_to_running()
_update_meta_json(
session_manager, manager_session_id, {"phase": "running"}
)
except Exception as exc:
logger.warning(
"run_parallel_workers: phase transition to 'running' failed (non-fatal): %s",
exc,
)
try:
reports = await colony.wait_for_worker_reports(
worker_ids,
@@ -1527,35 +1322,6 @@ def register_queen_lifecycle_tools(
except OSError as e:
return None, f"failed to install skill into {target}: {e}"
# Cleanup the source directory after a successful install so
# the authored skill doesn't linger as debris in the agent
# workspace (or — pre-sandbox-split — in the hive git
# checkout). Only removes paths that are OUTSIDE
# ``~/.hive/skills/`` so we never nuke the canonical install
# target or user-owned skill dirs.
try:
src_resolved = src.resolve()
skills_root_resolved = target_root.resolve()
try:
src_resolved.relative_to(skills_root_resolved)
_under_skills_root = True
except ValueError:
_under_skills_root = False
if not _under_skills_root:
_shutil.rmtree(src_resolved)
logger.info(
"create_colony: cleaned up authored skill source at %s "
"(installed to %s)",
src_resolved,
target,
)
except OSError as e:
logger.warning(
"create_colony: failed to clean up skill source at %s (non-fatal): %s",
src,
e,
)
return target, None
async def create_colony(
@@ -1563,7 +1329,6 @@ def register_queen_lifecycle_tools(
colony_name: str,
task: str,
skill_path: str,
tasks: list[dict] | None = None,
) -> str:
"""Create a colony after installing a pre-authored skill folder.
@@ -1573,13 +1338,6 @@ def register_queen_lifecycle_tools(
they're ready to start the worker — at that point the worker
reads the task from ``worker.json`` and the skill from
``~/.hive/skills/`` and starts informed.
When *tasks* is provided, each entry is seeded into the
colony's ``progress.db`` task queue in a single transaction.
Workers then claim rows from the queue using the
``hive.colony-progress-tracker`` default skill. Each task dict
accepts: ``goal`` (required), optional ``steps``,
``sop_items``, ``priority``, ``payload``, ``parent_task_id``.
"""
if session is None:
return json.dumps({"error": "No session bound to this tool registry."})
@@ -1634,7 +1392,6 @@ def register_queen_lifecycle_tools(
session=session,
colony_name=cn,
task=(task or "").strip(),
tasks=tasks if isinstance(tasks, list) else None,
)
except Exception as e:
logger.exception("create_colony: fork failed after installing skill")
@@ -1687,8 +1444,6 @@ def register_queen_lifecycle_tools(
"is_new": fork_result.get("is_new", True),
"skill_installed": str(installed_skill),
"skill_name": installed_skill.name if installed_skill else None,
"db_path": fork_result.get("db_path"),
"tasks_seeded": len(fork_result.get("task_ids") or []),
}
)
@@ -1786,57 +1541,6 @@ def register_queen_lifecycle_tools(
"protocol'."
),
},
"tasks": {
"type": "array",
"description": (
"Optional pre-seeded task queue for the colony. "
"When the colony is a fan-out of many similar "
"units of work (e.g. 'process record #1234', "
"'scrape profile X'), pass them here as an "
"array and workers will claim rows atomically "
"from the SQLite queue using the "
"hive.colony-progress-tracker skill. Each task "
"needs a 'goal' string; optionally include "
"'steps' (ordered subtasks), 'sop_items' "
"(required checklist gates), 'priority' "
"(higher runs first), and 'payload' "
"(task-specific parameters). Can be hundreds "
"or thousands of entries — the bulk insert "
"runs in a single transaction."
),
"items": {
"type": "object",
"properties": {
"goal": {"type": "string"},
"priority": {"type": "integer"},
"payload": {},
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"detail": {"type": "string"},
},
"required": ["title"],
},
},
"sop_items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {"type": "string"},
"description": {"type": "string"},
"required": {"type": "boolean"},
},
"required": ["key", "description"],
},
},
},
"required": ["goal"],
},
},
},
"required": ["colony_name", "task", "skill_path"],
},
@@ -1848,158 +1552,6 @@ def register_queen_lifecycle_tools(
)
tools_registered += 1
# --- enqueue_task ------------------------------------------------------------
async def enqueue_task_tool(
*,
colony_name: str,
goal: str,
steps: list[dict] | None = None,
sop_items: list[dict] | None = None,
payload: Any = None,
priority: int = 0,
parent_task_id: str | None = None,
) -> str:
"""Append a single task to an existing colony's progress.db queue.
Use this when the colony is already created and more work
needs to be fanned out (webhook-driven, follow-up requests,
worker-generated subtasks). The colony's workers pick it up
on their next claim cycle.
"""
cn = (colony_name or "").strip()
if not _COLONY_NAME_RE.match(cn):
return json.dumps(
{"error": "colony_name must be lowercase alphanumeric with underscores"}
)
from pathlib import Path as _Path
from framework.host.progress_db import (
enqueue_task as _enqueue_task,
ensure_progress_db as _ensure_db,
)
colony_dir = _Path.home() / ".hive" / "colonies" / cn
if not colony_dir.is_dir():
return json.dumps({"error": f"colony '{cn}' not found"})
try:
db_path = await asyncio.to_thread(_ensure_db, colony_dir)
task_id = await asyncio.to_thread(
_enqueue_task,
db_path,
goal,
steps=steps,
sop_items=sop_items,
payload=payload,
priority=priority,
parent_task_id=parent_task_id,
)
except Exception as e:
logger.exception("enqueue_task: failed to insert row")
return json.dumps({"error": f"enqueue_task failed: {e}"})
return json.dumps(
{
"status": "enqueued",
"colony_name": cn,
"task_id": task_id,
"db_path": str(db_path),
}
)
_enqueue_task_tool = Tool(
name="enqueue_task",
description=(
"Append a single task to an existing colony's progress.db "
"queue. Use this after create_colony when more work needs "
"to be fanned out — e.g. a webhook fired, the user asked "
"for a follow-up run, or a worker spawned a subtask. The "
"colony's workers pick it up on their next claim cycle "
"(atomic UPDATE … WHERE status='pending'). For bulk "
"authoring at colony creation time, pass the 'tasks' "
"array to create_colony instead."
),
parameters={
"type": "object",
"properties": {
"colony_name": {
"type": "string",
"description": "Target colony name (lowercase + underscores).",
},
"goal": {
"type": "string",
"description": (
"Human-readable task description. Self-contained — "
"the worker has no context beyond this string plus "
"any steps/sop_items/payload you attach."
),
},
"steps": {
"type": "array",
"description": (
"Optional ordered subtasks the worker should "
"check off as it executes. Each step needs a "
"'title'; optional 'detail' for longer "
"instructions."
),
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"detail": {"type": "string"},
},
"required": ["title"],
},
},
"sop_items": {
"type": "array",
"description": (
"Optional hard-gate checklist items the worker "
"MUST address before marking the task done. "
"Each item needs a 'key' (slug) and "
"'description'; 'required' defaults to true."
),
"items": {
"type": "object",
"properties": {
"key": {"type": "string"},
"description": {"type": "string"},
"required": {"type": "boolean"},
},
"required": ["key", "description"],
},
},
"payload": {
"description": (
"Optional task-specific parameters. Stored as "
"JSON in the 'payload' column."
),
},
"priority": {
"type": "integer",
"description": "Higher values run first. Default 0.",
},
"parent_task_id": {
"type": "string",
"description": (
"Optional reference to an existing task this "
"one was spawned from (audit only; no blocking "
"dependency resolver today)."
),
},
},
"required": ["colony_name", "goal"],
},
)
registry.register(
"enqueue_task",
_enqueue_task_tool,
lambda inputs: enqueue_task_tool(**inputs),
)
tools_registered += 1
# --- switch_to_reviewing ----------------------------------------------------
async def switch_to_reviewing_tool() -> str:
@@ -3417,8 +2969,7 @@ def register_queen_lifecycle_tools(
if preamble.get("pending_question"):
result["pending_question"] = preamble["pending_question"]
_idle = runtime.agent_idle_seconds
result["agent_idle_seconds"] = round(_idle, 1) if _idle != float("inf") else -1
result["agent_idle_seconds"] = round(runtime.agent_idle_seconds, 1)
for key in ("current_node", "current_iteration"):
if key in preamble:
@@ -4162,33 +3713,6 @@ def register_queen_lifecycle_tools(
task,
)
# Concurrency budget check — mirrors run_parallel_workers so a
# queen in a loop can't silently exceed max_concurrent_workers
# by hammering run_agent_with_input. Per-call count is 1, so
# the check is ``active + 1 > max_concurrent``.
colony_cfg = getattr(colony, "_config", None) or getattr(colony, "config", None)
max_concurrent = getattr(colony_cfg, "max_concurrent_workers", None)
if max_concurrent and max_concurrent > 0:
active = 0
try:
workers = getattr(colony, "_workers", {}) or {}
for w in workers.values():
handle = getattr(w, "_task_handle", None)
if handle is not None and not handle.done():
active += 1
except Exception:
active = 0
if active + 1 > max_concurrent:
return json.dumps(
{
"error": (
f"run_agent_with_input would exceed max_concurrent_workers "
f"({active} active + 1 new > {max_concurrent}). "
"Wait for an existing worker to finish or stop one."
)
}
)
try:
# Pre-flight: compute the set of tools whose credentials are
# NOT currently available, and resync MCP servers. We do NOT
@@ -4199,9 +3723,58 @@ def register_queen_lifecycle_tools(
# to block the whole spawn with a CredentialError; the fix
# is to treat unset credentials as "drop these tools" rather
# than "abort the worker".
unavailable_tools = await _preflight_credentials(
legacy, tool_label="run_agent_with_input"
)
#
# Note: the MCP admission gate (_build_mcp_admission_gate in
# tool_registry.py) already filters MCP tools at registration
# time. This preflight covers the non-MCP path — tools.py
# discoveries via discover_from_module — which has no
# credential gate of its own.
loop = asyncio.get_running_loop()
unavailable_tools: set[str] = set()
async def _preflight():
nonlocal unavailable_tools
try:
from framework.credentials.validation import compute_unavailable_tools
drop, messages = await loop.run_in_executor(
None,
lambda: compute_unavailable_tools(legacy.graph.nodes),
)
unavailable_tools = drop
if drop:
logger.warning(
"run_agent_with_input: dropping %d tool(s) with "
"unavailable credentials from worker spawn: %s",
len(drop),
"; ".join(messages),
)
except Exception as exc:
# Validation itself failing (not a credential failure —
# a code error in the validator) should not block the
# spawn. Log and proceed as if nothing was dropped.
logger.warning(
"compute_unavailable_tools raised, proceeding without credential-based tool filtering: %s",
exc,
)
runner = getattr(session, "runner", None)
if runner:
try:
await loop.run_in_executor(
None,
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
)
except Exception as e:
logger.warning("MCP resync failed: %s", e)
try:
await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
except TimeoutError:
logger.warning(
"run_agent_with_input preflight timed out after %ds — proceeding",
_START_PREFLIGHT_TIMEOUT,
)
# Build a per-spawn AgentSpec that mirrors the loaded
# worker's entry-node identity. This is what makes the
@@ -4275,66 +3848,10 @@ def register_queen_lifecycle_tools(
dropped_count,
)
# Colony progress tracker wiring: if the loaded worker
# lives under ~/.hive/colonies/{name}/ and has a
# progress.db, inject db_path + colony_id into input_data
# so the spawned worker sees them in its first user
# message and can use the hive.colony-progress-tracker
# skill to claim tasks from the queue.
_spawn_input_data: dict[str, Any] = {"user_request": task}
_worker_path = getattr(session, "worker_path", None)
if _worker_path:
from pathlib import Path as _Path
_worker_path_p = _Path(_worker_path)
_progress_db = _worker_path_p / "data" / "progress.db"
if _progress_db.exists():
_spawn_input_data["db_path"] = str(_progress_db.resolve())
_spawn_input_data["colony_id"] = _worker_path_p.name
logger.info(
"run_agent_with_input: attached progress_db context "
"(colony_id=%s, db_path=%s)",
_worker_path_p.name,
_progress_db,
)
# Phase 2: enqueue the task into progress.db BEFORE
# spawning so the worker has a concrete row to
# claim. Without this the queue is empty and the
# worker's claim UPDATE affects zero rows, so it
# silently falls back to executing from the chat
# spawn message. Any enqueue failure is logged and
# the spawn proceeds without a pinned task_id
# (degrades to the pre-Phase-2 behavior).
try:
from framework.host.progress_db import (
enqueue_task as _enqueue_task_fn,
)
_task_id = await asyncio.to_thread(
_enqueue_task_fn,
_progress_db,
task,
source="run_agent_with_input",
)
_spawn_input_data["task_id"] = _task_id
logger.info(
"run_agent_with_input: enqueued task %s into %s",
_task_id,
_progress_db,
)
except Exception as _enqueue_exc:
logger.warning(
"run_agent_with_input: failed to enqueue task "
"into progress.db (spawn proceeding without "
"pinned task_id): %s",
_enqueue_exc,
)
worker_ids = await colony.spawn(
task=task,
count=1,
input_data=_spawn_input_data,
input_data={"user_request": task},
agent_spec=spawn_spec,
tools=spawn_tools,
tool_executor=spawn_tool_executor,
+3 -19
View File
@@ -87,25 +87,9 @@ export const sessionsApi = {
colonies: (sessionId: string) =>
api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay).
*
* Returns the TAIL of the event log. Default limit 2000 (server
* clamps to [1, 10000]); older events get dropped and
* ``truncated: true`` is set so the UI can show an indicator.
*/
eventsHistory: (sessionId: string, limit?: number) =>
api.get<{
events: AgentEvent[];
session_id: string;
total: number;
returned: number;
truncated: boolean;
limit: number;
}>(
`/sessions/${sessionId}/events/history${
limit ? `?limit=${limit}` : ""
}`,
),
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay). */
eventsHistory: (sessionId: string) =>
api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
/** Open the session's data folder in the OS file manager. */
revealFolder: (sessionId: string) =>
@@ -31,15 +31,6 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
const colonyId = colonyMatch[1];
const colony = colonies.find((c) => c.id === colonyId);
title = colony?.name ?? colonyId;
// Show queen profile button when the colony has a linked queen profile
if (colony?.queenProfileId) {
const profile = queenProfiles.find((q) => q.id === colony.queenProfileId);
if (profile) {
queenIdForProfile = profile.id;
queenTitle = profile.title ?? null;
icon = <Crown className="w-4 h-4 text-primary" />;
}
}
} else if (queenMatch) {
const queenId = queenMatch[1];
const profile = queenProfiles.find((q) => q.id === queenId);
+6 -196
View File
@@ -10,8 +10,6 @@ import {
Paperclip,
X,
} from "lucide-react";
import WorkerRunBubble from "@/components/WorkerRunBubble";
import type { WorkerRunGroup } from "@/components/WorkerRunBubble";
export interface ImageContent {
type: "image_url";
@@ -27,8 +25,6 @@ export interface ContextUsageEntry {
import MarkdownContent from "@/components/MarkdownContent";
import QuestionWidget from "@/components/QuestionWidget";
import MultiQuestionWidget from "@/components/MultiQuestionWidget";
import { useColony } from "@/context/ColonyContext";
import { useQueenProfile } from "@/context/QueenProfileContext";
import ParallelSubagentBubble, {
type SubagentGroup,
} from "@/components/ParallelSubagentBubble";
@@ -64,12 +60,6 @@ export interface ChatMessage {
nodeId?: string;
/** Backend execution_id for this message */
executionId?: string;
/** Backend stream_id — the per-worker identity used for grouping
* parallel-spawn workers into their own stacked WorkerRunBubble.
* "queen" for queen messages, "worker" for the single loaded
* worker (run_agent_with_input), or "worker:{uuid}" for each
* parallel worker spawned via run_parallel_workers. */
streamId?: string;
/** True when the message was sent while the queen was still processing */
queued?: boolean;
}
@@ -134,14 +124,14 @@ const TOOL_HEX = [
"#e5a820", // sunflower
];
export function toolHex(name: string): string {
function toolHex(name: string): string {
let hash = 0;
for (let i = 0; i < name.length; i++)
hash = (hash * 31 + name.charCodeAt(i)) | 0;
return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
}
export function ToolActivityRow({ content }: { content: string }) {
function ToolActivityRow({ content }: { content: string }) {
let tools: { name: string; done: boolean }[] = [];
try {
const parsed = JSON.parse(content);
@@ -346,15 +336,6 @@ function InlineAskUserBubble({
const color = getColor(msg.agent, msg.role);
const thread = msg.thread || activeThread;
const { queenProfiles } = useColony();
const { openQueenProfile } = useQueenProfile();
const queenProfileId = isQueen
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
: null;
const handleQueenClick = queenProfileId
? () => openQueenProfile(queenProfileId)
: undefined;
const handleSingle = (answer: string) => {
setState("submitted");
onSend(answer, thread);
@@ -374,14 +355,12 @@ function InlineAskUserBubble({
return (
<div className="flex gap-3">
<div
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
style={{
backgroundColor: `${color}18`,
border: `1.5px solid ${color}35`,
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
}}
onClick={handleQueenClick}
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
>
{isQueen ? (
<Crown className="w-4 h-4" style={{ color }} />
@@ -394,9 +373,8 @@ function InlineAskUserBubble({
>
<div className="flex items-center gap-2 mb-1">
<span
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
style={{ color }}
onClick={handleQueenClick}
>
{msg.agent}
</span>
@@ -457,13 +435,6 @@ const MessageBubble = memo(
const isQueen = msg.role === "queen";
const color = getColor(msg.agent, msg.role);
// Resolve queen profile ID so clicking avatar/name opens the profile panel
const { queenProfiles } = useColony();
const { openQueenProfile } = useQueenProfile();
const queenProfileId = isQueen
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
: null;
if (msg.type === "run_divider") {
return (
<div className="flex items-center gap-3 py-2 my-1">
@@ -558,21 +529,15 @@ const MessageBubble = memo(
);
}
const handleQueenClick = queenProfileId
? () => openQueenProfile(queenProfileId)
: undefined;
return (
<div className="flex gap-3">
<div
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
style={{
backgroundColor: `${color}18`,
border: `1.5px solid ${color}35`,
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
}}
onClick={handleQueenClick}
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
>
{isQueen ? (
<Crown className="w-4 h-4" style={{ color }} />
@@ -585,9 +550,8 @@ const MessageBubble = memo(
>
<div className="flex items-center gap-2 mb-1">
<span
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
style={{ color }}
onClick={handleQueenClick}
>
{msg.agent}
</span>
@@ -701,157 +665,14 @@ export default function ChatPanel({
type RenderItem =
| { kind: "message"; msg: ChatMessage }
| { kind: "parallel"; groupId: string; groups: SubagentGroup[] }
| {
kind: "worker_run";
runId: string;
group: WorkerRunGroup;
/** Optional short label shown next to the "Worker" badge.
* Only set when there are multiple parallel workers in the
* same run span (so users can tell them apart). */
label?: string;
}
| { kind: "day_divider"; key: string; createdAt: number };
/** Derive a short label from a parallel-worker stream id.
* `worker:abcdef12-3456-...` → `abcdef12` (first 8 chars of the
* uuid after the `worker:` prefix). Falls back to the first
* message's nodeId when the streamId isn't the expected shape. */
function deriveWorkerLabel(
streamKey: string,
msgs: ChatMessage[],
): string {
if (streamKey.startsWith("worker:")) {
const suffix = streamKey.slice("worker:".length);
// sessions are `session_YYYYMMDD_HHMMSS_<8-hex>` — show the
// trailing hex if present, else first 8 chars of the suffix.
const tail = suffix.match(/_[0-9a-f]{6,}$/i)?.[0]?.slice(1);
return tail ? tail.slice(0, 8) : suffix.slice(0, 8);
}
const nid = msgs.find((m) => m.nodeId)?.nodeId;
return nid || streamKey;
}
const renderItems = useMemo<RenderItem[]>(() => {
const items: RenderItem[] = [];
let i = 0;
while (i < threadMessages.length) {
const msg = threadMessages[i];
const isSubagent = msg.nodeId?.includes(":subagent:");
// Worker run grouping: collect consecutive WORKER-role
// messages (and worker tool_status pills) into a collapsible
// card. Queen tool_status pills (``role === "queen"``) are
// deliberately excluded — the queen's own tool calls are part
// of the queen↔user conversation and should render inline as
// ToolActivityRows, not fold into a "Worker" bubble. Without
// this guard, every queen run_command / read_file / etc. shows
// up under a misleading "Worker" label in the DM.
const isWorkerCandidate =
msg.role === "worker" ||
(msg.type === "tool_status" && msg.role !== "queen");
if (
!isSubagent &&
isWorkerCandidate &&
msg.type !== "user" &&
msg.type !== "run_divider"
) {
const workerMsgs: ChatMessage[] = [];
const firstWorkerMsg = msg;
while (i < threadMessages.length) {
const m = threadMessages[i];
// Hard boundary — stop the worker run group
if (m.type === "user" || m.type === "run_divider") break;
// Queen message with real text — boundary (queen is talking
// to the user, not just emitting a tool)
if (m.role === "queen" && m.content?.trim() && !m.type) break;
// Queen tool_status — NOT a worker activity, don't bucket
// it. Break so the grouping stops and the queen pill
// renders inline.
if (m.type === "tool_status" && m.role === "queen") break;
// Subagent message — different group type, stop here
if (m.nodeId?.includes(":subagent:")) break;
// Worker text messages and worker tool_status belong to the run
if (
m.role === "worker" ||
(m.type === "tool_status" && m.role !== "queen")
) {
workerMsgs.push(m);
i++;
continue;
}
// System message or other — include in the worker run
// group to preserve ordering (they'll render inside the
// expanded view)
workerMsgs.push(m);
i++;
}
if (workerMsgs.length > 0) {
// Parallel fan-out detection: if any message in this span
// is tagged with a parallel-worker streamId (``worker:{uuid}``),
// split the span by streamId and emit one ``worker_run``
// per worker — they render as stacked independent
// ``WorkerRunBubble``s. Un-tagged legacy messages and the
// single-worker ``streamId="worker"`` case fall through to
// the existing single-bubble behavior.
const hasParallel = workerMsgs.some(
(m) => !!m.streamId && /^worker:./.test(m.streamId),
);
if (hasParallel) {
const buckets = new Map<
string,
{ messages: ChatMessage[]; firstAt: number }
>();
// Messages with no streamId (system notes, orphans from
// old restore) attach to the most-recent keyed message's
// bucket so chronology is preserved.
let currentKey: string | null = null;
for (const m of workerMsgs) {
const key =
m.streamId && m.streamId.length > 0
? m.streamId
: currentKey;
if (!key) continue;
if (m.streamId && m.streamId.length > 0) currentKey = m.streamId;
let bucket = buckets.get(key);
if (!bucket) {
bucket = { messages: [], firstAt: m.createdAt ?? 0 };
buckets.set(key, bucket);
}
bucket.messages.push(m);
bucket.firstAt = Math.min(
bucket.firstAt,
m.createdAt ?? Number.POSITIVE_INFINITY,
);
}
const sorted = Array.from(buckets.entries()).sort(
([, a], [, b]) => a.firstAt - b.firstAt,
);
for (const [streamKey, { messages: bucketMsgs }] of sorted) {
items.push({
kind: "worker_run",
runId: `wrun-${firstWorkerMsg.id}-${streamKey}`,
group: { messages: bucketMsgs },
label: deriveWorkerLabel(streamKey, bucketMsgs),
});
}
} else {
items.push({
kind: "worker_run",
runId: `wrun-${firstWorkerMsg.id}`,
group: { messages: workerMsgs },
});
}
}
continue;
}
if (!isSubagent) {
items.push({ kind: "message", msg });
i++;
@@ -1051,17 +872,6 @@ export default function ChatPanel({
</div>
);
}
if (item.kind === "worker_run") {
return (
<div key={item.runId}>
<WorkerRunBubble
runId={item.runId}
group={item.group}
label={item.label}
/>
</div>
);
}
const msg = item.msg;
// Detect misformatted ask_user payloads emitted as plain text and
// substitute the nicer widget-based bubble. Only inspect regular
@@ -1,4 +1,4 @@
import { useState, useEffect, useCallback, useRef } from "react";
import { useState, useEffect } from "react";
import { NavLink, useLocation, useNavigate } from "react-router-dom";
import {
X,
@@ -46,49 +46,8 @@ export default function QueenProfilePanel({
const name = profile?.name ?? summary?.name ?? "Queen";
const title = profile?.title ?? summary?.title ?? "";
// ── Resizable width ──────────────────────────────────────────────────
const MIN_WIDTH = 280;
const MAX_WIDTH = 600;
const [width, setWidth] = useState(340);
const dragging = useRef(false);
const startX = useRef(0);
const startWidth = useRef(0);
const onDragStart = useCallback((e: React.MouseEvent) => {
e.preventDefault();
dragging.current = true;
startX.current = e.clientX;
startWidth.current = width;
const onMove = (ev: MouseEvent) => {
if (!dragging.current) return;
// Panel is on the right, so dragging left (negative delta) grows it
const delta = startX.current - ev.clientX;
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
};
const onUp = () => {
dragging.current = false;
document.removeEventListener("mousemove", onMove);
document.removeEventListener("mouseup", onUp);
document.body.style.cursor = "";
document.body.style.userSelect = "";
};
document.addEventListener("mousemove", onMove);
document.addEventListener("mouseup", onUp);
document.body.style.cursor = "col-resize";
document.body.style.userSelect = "none";
}, [width]);
return (
<aside
className="flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto relative"
style={{ width }}
>
{/* Drag handle */}
<div
onMouseDown={onDragStart}
className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
/>
<aside className="w-[340px] flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto">
{/* Header */}
<div className="flex items-center justify-between px-5 py-3.5 border-b border-border/60">
<div className="flex items-center gap-2 text-sm font-semibold text-foreground">
+2 -42
View File
@@ -1,4 +1,4 @@
import { useState, useCallback, useRef } from "react";
import { useState } from "react";
import { useNavigate } from "react-router-dom";
import {
ChevronLeft,
@@ -19,38 +19,6 @@ export default function Sidebar() {
const [coloniesExpanded, setColoniesExpanded] = useState(true);
const [queensExpanded, setQueensExpanded] = useState(true);
// ── Resizable width ──────────────────────────────────────────────────
const MIN_WIDTH = 180;
const MAX_WIDTH = 400;
const [width, setWidth] = useState(240);
const dragging = useRef(false);
const startX = useRef(0);
const startWidth = useRef(0);
const onDragStart = useCallback((e: React.MouseEvent) => {
e.preventDefault();
dragging.current = true;
startX.current = e.clientX;
startWidth.current = width;
const onMove = (ev: MouseEvent) => {
if (!dragging.current) return;
const delta = ev.clientX - startX.current;
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
};
const onUp = () => {
dragging.current = false;
document.removeEventListener("mousemove", onMove);
document.removeEventListener("mouseup", onUp);
document.body.style.cursor = "";
document.body.style.userSelect = "";
};
document.addEventListener("mousemove", onMove);
document.addEventListener("mouseup", onUp);
document.body.style.cursor = "col-resize";
document.body.style.userSelect = "none";
}, [width]);
if (sidebarCollapsed) {
return (
<aside className="w-[52px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
@@ -79,15 +47,7 @@ export default function Sidebar() {
}
return (
<aside
className="flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full relative"
style={{ width }}
>
{/* Drag handle on right edge */}
<div
onMouseDown={onDragStart}
className="absolute top-0 right-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
/>
<aside className="w-[240px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
{/* Header */}
<div className="h-12 flex items-center justify-between px-4 border-b border-border/60">
<button
@@ -1,297 +0,0 @@
import { memo, useState, useRef, useEffect } from "react";
import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
import type { ChatMessage } from "@/components/ChatPanel";
import { ToolActivityRow } from "@/components/ChatPanel";
import MarkdownContent from "@/components/MarkdownContent";
const workerColor = "hsl(220,60%,55%)";
export interface WorkerRunGroup {
messages: ChatMessage[];
}
interface WorkerRunBubbleProps {
runId: string;
group: WorkerRunGroup;
/** Short identifier shown next to the "Worker" badge. Populated
* only when the parent grouping has multiple parallel workers
* in the same run span, so N stacked bubbles can be told apart
* at a glance. Omitted for single-worker runs. */
label?: string;
}
/** Parse a tool_status JSON blob into a list of tool entries. */
function parseToolStatus(content: string): { name: string; done: boolean }[] {
try {
const parsed = JSON.parse(content);
return parsed.tools || [];
} catch {
return [];
}
}
/**
* Strip markdown formatting so the collapsed preview is a single
* readable line instead of a scatter of code pills.
*
* MarkdownContent turns every backtick-wrapped fragment into its own
* visually-boxed inline-code pill. In a worker text message those
* pills can be coordinates, UUIDs, selectors, tool names — the
* collapsed preview ends up looking like confetti. We just want the
* plain prose, one line, truncated.
*/
function stripMarkdownToPreview(s: string, maxLen = 160): string {
const cleaned = s
.replace(/```[\s\S]*?```/g, " [code] ") // fenced code blocks
.replace(/`([^`]+)`/g, "$1") // inline code — keep the text, drop the backticks
.replace(/\*\*([^*]+)\*\*/g, "$1") // bold
.replace(/\*([^*]+)\*/g, "$1") // italic
.replace(/~~([^~]+)~~/g, "$1") // strikethrough
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links -> link text
.replace(/^#{1,6}\s+/gm, "") // ATX headers
.replace(/^[>\-*+]\s+/gm, "") // blockquote/list markers
.replace(/\s+/g, " ") // collapse whitespace
.trim();
if (cleaned.length <= maxLen) return cleaned;
return cleaned.slice(0, maxLen - 1).trimEnd() + "\u2026";
}
/**
* Collapsible card that groups all worker messages from a single run
* (the span between the queen's `run_agent_with_input` call and the
* worker's final `set_output`/`escalate`/idle).
*
* Collapsed (default): header bar with tool count + latest text snippet.
* Expanded: scrollable list of every message and tool status in order.
*/
const WorkerRunBubble = memo(
function WorkerRunBubble({ group, label }: WorkerRunBubbleProps) {
const [expanded, setExpanded] = useState(false);
const bodyRef = useRef<HTMLDivElement>(null);
// Separate text messages from tool status
const textMsgs = group.messages.filter(
(m) => m.type !== "tool_status" && m.content?.trim()
);
const toolStatusMsgs = group.messages.filter(
(m) => m.type === "tool_status"
);
// Count total tool calls from tool_status messages
const allTools: { name: string; done: boolean }[] = [];
for (const m of toolStatusMsgs) {
for (const t of parseToolStatus(m.content)) {
allTools.push(t);
}
}
const toolCount = allTools.length;
const doneCount = allTools.filter((t) => t.done).length;
const isFinished = toolCount > 0 && doneCount === toolCount;
// Latest text from the worker (the last non-empty text message)
const latestText = textMsgs.length > 0
? textMsgs[textMsgs.length - 1].content
: "";
// Status label. We prefer concrete states over the vague
// "starting" fallback — if the worker has emitted any text or
// any tool, it's past the startup phase.
const statusLabel = isFinished
? "done"
: toolCount > 0
? "running"
: textMsgs.length > 0
? "active"
: "starting";
// Unique tool names for the summary (deduplicated, ordered by first appearance)
const uniqueToolNames: string[] = [];
const seen = new Set<string>();
for (const t of allTools) {
if (!seen.has(t.name)) {
seen.add(t.name);
uniqueToolNames.push(t.name);
}
}
// Auto-scroll body when expanded
useEffect(() => {
if (expanded && bodyRef.current) {
bodyRef.current.scrollTop = bodyRef.current.scrollHeight;
}
}, [expanded, group.messages.length]);
return (
<div className="flex gap-3">
{/* Left icon */}
<div
className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1"
style={{
backgroundColor: `${workerColor}18`,
border: `1.5px solid ${workerColor}35`,
}}
>
<Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
</div>
<div className="flex-1 min-w-0 max-w-[90%]">
{/* Clickable header */}
<button
onClick={() => setExpanded((v) => !v)}
className="w-full flex items-center gap-2 mb-1 text-left cursor-pointer group"
>
<span className="font-medium text-xs" style={{ color: workerColor }}>
Worker
</span>
{label && (
<span className="text-[10px] font-mono text-muted-foreground/80 tabular-nums">
{label}
</span>
)}
<span
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
isFinished
? "bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400"
: "bg-muted text-muted-foreground"
}`}
>
{statusLabel}
</span>
{toolCount > 0 && (
<span className="text-[10px] text-muted-foreground tabular-nums">
{doneCount}/{toolCount} tools
</span>
)}
<span className="ml-auto text-muted-foreground/60 group-hover:text-muted-foreground transition-colors p-0.5 rounded">
{expanded ? (
<ChevronUp className="w-3.5 h-3.5" />
) : (
<ChevronDown className="w-3.5 h-3.5" />
)}
</span>
</button>
{/* Card body — use Tailwind theme tokens so dark mode
gets a proper dark background instead of a glaring
near-white hardcoded hsl. Finished runs get a subtle
green tint that also respects theme. */}
<div
className={`rounded-2xl rounded-tl-md overflow-hidden border ${
isFinished
? "border-green-300/50 bg-green-50/50 dark:border-green-900/40 dark:bg-green-950/20"
: "border-border bg-muted/60"
}`}
>
{/* Collapsed: single-line plain-text preview of the
latest worker text, OR a tool-name chain when the
worker hasn't emitted any prose yet. MarkdownContent
is intentionally NOT used here — its inline-code
rendering turns every backtick-wrapped fragment into
a floating pill, which wrecks the preview. */}
{!expanded && (
<div className="px-4 py-2.5 text-sm text-muted-foreground">
{latestText ? (
<div className="truncate">
{stripMarkdownToPreview(latestText)}
</div>
) : uniqueToolNames.length > 0 ? (
<span className="text-xs font-mono truncate block">
{uniqueToolNames.slice(0, 5).join(" \u2192 ")}
{uniqueToolNames.length > 5 &&
` + ${uniqueToolNames.length - 5} more`}
</span>
) : (
<span className="text-xs text-muted-foreground/60 italic">
{"waiting for first action\u2026"}
</span>
)}
</div>
)}
{/* Expanded: chronological stream with tool bursts
coalesced into a single ToolActivityRow each.
Consecutive tool_status messages (no text between)
collapse to the LATEST snapshot — each snapshot is
cumulative within its turn, so the latest one tells
the whole story for that burst. Text messages break
the burst and render as markdown. */}
{expanded && (
<div
ref={bodyRef}
className="max-h-[400px] overflow-y-auto px-4 py-3 space-y-3"
>
{(() => {
type RenderRow =
| { kind: "tools"; content: string; key: string }
| { kind: "text"; msg: ChatMessage; key: string };
const rows: RenderRow[] = [];
let pendingTool: { content: string; id: string } | null = null;
const flushTool = () => {
if (pendingTool) {
rows.push({
kind: "tools",
content: pendingTool.content,
key: `tools-${pendingTool.id}`,
});
pendingTool = null;
}
};
for (let i = 0; i < group.messages.length; i++) {
const m = group.messages[i];
if (m.type === "tool_status") {
// Overwrite — latest snapshot in the burst wins
pendingTool = {
content: m.content,
id: m.id || `ts-${i}`,
};
continue;
}
if (m.content?.trim()) {
flushTool();
rows.push({
kind: "text",
msg: m,
key: m.id || `txt-${i}`,
});
}
}
flushTool();
return rows.map((row) => {
if (row.kind === "tools") {
// ToolActivityRow groups by tool name (×N), shows
// running pills (spinner) before done pills (check),
// and uses the per-tool color hash that matches
// the rest of the chat.
return (
<div key={row.key} className="-ml-10">
<ToolActivityRow content={row.content} />
</div>
);
}
return (
<div
key={row.key}
className="text-sm leading-relaxed"
>
<MarkdownContent content={row.msg.content} />
</div>
);
});
})()}
</div>
)}
</div>
</div>
</div>
);
},
(prev, next) =>
prev.runId === next.runId &&
prev.label === next.label &&
prev.group.messages.length === next.group.messages.length &&
prev.group.messages[prev.group.messages.length - 1]?.content ===
next.group.messages[next.group.messages.length - 1]?.content
);
export default WorkerRunBubble;
@@ -1,31 +0,0 @@
import { createContext, useContext, useCallback, type ReactNode } from "react";
interface QueenProfileContextValue {
openQueenProfile: (queenId: string) => void;
}
const QueenProfileContext = createContext<QueenProfileContextValue | null>(null);
export function QueenProfileProvider({
onOpen,
children,
}: {
onOpen: (queenId: string) => void;
children: ReactNode;
}) {
const openQueenProfile = useCallback(
(queenId: string) => onOpen(queenId),
[onOpen],
);
return (
<QueenProfileContext.Provider value={{ openQueenProfile }}>
{children}
</QueenProfileContext.Provider>
);
}
export function useQueenProfile() {
const ctx = useContext(QueenProfileContext);
if (!ctx) throw new Error("useQueenProfile must be used within QueenProfileProvider");
return ctx;
}
+19 -27
View File
@@ -1,11 +1,10 @@
import { useEffect, useState, useCallback } from "react";
import { useEffect, useState } from "react";
import { Outlet, useLocation } from "react-router-dom";
import Sidebar from "@/components/Sidebar";
import AppHeader from "@/components/AppHeader";
import QueenProfilePanel from "@/components/QueenProfilePanel";
import { ColonyProvider, useColony } from "@/context/ColonyContext";
import { HeaderActionsProvider } from "@/context/HeaderActionsContext";
import { QueenProfileProvider } from "@/context/QueenProfileContext";
export default function AppLayout() {
return (
@@ -28,33 +27,26 @@ function AppLayoutInner() {
setOpenQueenId(null);
}, [location.pathname]);
const handleOpenQueenProfile = useCallback(
(queenId: string) => setOpenQueenId((prev) => (prev === queenId ? null : queenId)),
[],
);
return (
<QueenProfileProvider onOpen={handleOpenQueenProfile}>
<div className="flex h-screen bg-background overflow-hidden">
<Sidebar />
<div className="flex-1 min-w-0 flex flex-col">
<AppHeader onOpenQueenProfile={handleOpenQueenProfile} />
<div className="flex-1 min-h-0 flex">
<main className="flex-1 min-w-0 flex flex-col">
<Outlet />
</main>
{openQueenId && (
<QueenProfilePanel
queenId={openQueenId}
colonies={colonies.filter(
(c) => c.queenProfileId === openQueenId,
)}
onClose={() => setOpenQueenId(null)}
/>
)}
</div>
<div className="flex h-screen bg-background overflow-hidden">
<Sidebar />
<div className="flex-1 min-w-0 flex flex-col">
<AppHeader onOpenQueenProfile={setOpenQueenId} />
<div className="flex-1 min-h-0 flex">
<main className="flex-1 min-w-0 flex flex-col">
<Outlet />
</main>
{openQueenId && (
<QueenProfilePanel
queenId={openQueenId}
colonies={colonies.filter(
(c) => c.queenProfileId === openQueenId,
)}
onClose={() => setOpenQueenId(null)}
/>
)}
</div>
</div>
</QueenProfileProvider>
</div>
);
}
-187
View File
@@ -119,7 +119,6 @@ export function sseEventToChatMessage(
createdAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: event.stream_id || undefined,
};
}
@@ -139,7 +138,6 @@ export function sseEventToChatMessage(
type: "user",
thread,
createdAt,
streamId: event.stream_id || undefined,
};
}
@@ -160,7 +158,6 @@ export function sseEventToChatMessage(
createdAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: event.stream_id || undefined,
};
}
@@ -175,7 +172,6 @@ export function sseEventToChatMessage(
type: "system",
thread,
createdAt,
streamId: event.stream_id || undefined,
};
}
@@ -190,7 +186,6 @@ export function sseEventToChatMessage(
type: "system",
thread,
createdAt,
streamId: event.stream_id || undefined,
};
}
@@ -199,188 +194,6 @@ export function sseEventToChatMessage(
}
}
// ---------------------------------------------------------------------------
// Stateful event replay — produces tool_status pills + regular messages
// ---------------------------------------------------------------------------
/**
* State maintained while replaying an event stream. Tracks per-stream turn
* counters, the set of active tool calls (so tool_status pill content
* reflects "tool A done, tool B running" correctly), and a tool_use_id →
* pill_msg_id map so deferred `tool_call_completed` events can find the
* pill they belong to after the turn counter moves on.
*/
export interface ReplayState {
turnCounters: Record<string, number>;
activeToolCalls: Record<
string,
{ name: string; done: boolean; streamId: string }
>;
toolUseToPill: Record<string, { msgId: string; name: string }>;
}
export function newReplayState(): ReplayState {
return { turnCounters: {}, activeToolCalls: {}, toolUseToPill: {} };
}
/**
* Process a single event and emit zero or more ChatMessage upserts.
*
* Why this exists: `sseEventToChatMessage` is stateless — one event in, at
* most one message out. But the chat's tool_status pill is a SYNTHESIZED
* message: each tool_call_started adds to an accumulating pill, and each
* tool_call_completed flips one of its tools from running to done. Live
* SSE handlers in colony-chat and queen-dm already do this synthesis
* against React refs. Cold-restore from events.jsonl used to skip
* tool_call_* events entirely, so refreshed sessions looked completely
* different from live ones — no tool activity visible, just prose.
*
* This function centralizes the synthesis so cold-restore and live paths
* can use the exact same state machine. The caller treats the returned
* messages as upserts (by id) — a later event in the same replay may
* emit the same pill id with updated content, which should REPLACE the
* earlier row in the caller's message list.
*/
export function replayEvent(
state: ReplayState,
event: AgentEvent,
thread: string,
agentDisplayName: string | undefined,
): ChatMessage[] {
const streamId = event.stream_id;
const isQueen = streamId === "queen";
const role: "queen" | "worker" = isQueen ? "queen" : "worker";
const turnKey = streamId;
const currentTurn = state.turnCounters[turnKey] ?? 0;
const eventCreatedAt = event.timestamp
? new Date(event.timestamp).getTime()
: Date.now();
const out: ChatMessage[] = [];
// Update state machine BEFORE the generic converter runs so the
// regular message emitted for this event sees the post-update
// counter (matches live handler ordering at colony-chat.tsx:525).
switch (event.type) {
case "execution_started":
state.turnCounters[turnKey] = currentTurn + 1;
// New execution for a worker resets its active tools, mirroring
// the live handler's setAgentState at colony-chat.tsx:566.
if (!isQueen) {
const keepActive: typeof state.activeToolCalls = {};
for (const [k, v] of Object.entries(state.activeToolCalls)) {
if (v.streamId !== streamId) keepActive[k] = v;
}
state.activeToolCalls = keepActive;
}
break;
case "llm_turn_complete":
state.turnCounters[turnKey] = currentTurn + 1;
break;
case "tool_call_started": {
if (!event.node_id) break;
const toolName = (event.data?.tool_name as string) || "unknown";
const toolUseId = (event.data?.tool_use_id as string) || "";
state.activeToolCalls[toolUseId] = {
name: toolName,
done: false,
streamId,
};
const pillId = `tool-pill-${streamId}-${event.execution_id || "exec"}-${currentTurn}`;
if (toolUseId) {
state.toolUseToPill[toolUseId] = { msgId: pillId, name: toolName };
}
const tools = Object.values(state.activeToolCalls)
.filter((t) => t.streamId === streamId)
.map((t) => ({ name: t.name, done: t.done }));
const allDone = tools.length > 0 && tools.every((t) => t.done);
out.push({
id: pillId,
agent: agentDisplayName || event.node_id || "Agent",
agentColor: "",
content: JSON.stringify({ tools, allDone }),
timestamp: "",
type: "tool_status",
role,
thread,
createdAt: eventCreatedAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: streamId || undefined,
});
break;
}
case "tool_call_completed": {
if (!event.node_id) break;
const toolUseId = (event.data?.tool_use_id as string) || "";
const tracked = state.toolUseToPill[toolUseId];
if (toolUseId) delete state.toolUseToPill[toolUseId];
if (toolUseId && state.activeToolCalls[toolUseId]) {
state.activeToolCalls[toolUseId].done = true;
}
if (!tracked) break;
const tools = Object.values(state.activeToolCalls)
.filter((t) => t.streamId === streamId)
.map((t) => ({ name: t.name, done: t.done }));
const allDone = tools.length > 0 && tools.every((t) => t.done);
// Re-emit the SAME pill id with updated content. Caller upserts
// by id, so this replaces the row from tool_call_started.
out.push({
id: tracked.msgId,
agent: agentDisplayName || event.node_id || "Agent",
agentColor: "",
content: JSON.stringify({ tools, allDone }),
timestamp: "",
type: "tool_status",
role,
thread,
createdAt: eventCreatedAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: streamId || undefined,
});
break;
}
}
// Regular stateless conversion (prose, user input, system notes).
const msg = sseEventToChatMessage(
event,
thread,
agentDisplayName,
state.turnCounters[turnKey] ?? 0,
);
if (msg) {
if (isQueen) msg.role = "queen";
out.push(msg);
}
return out;
}
/**
* Replay an entire event array and return a deduplicated, chronologically
* sorted ChatMessage list. Used by cold-restore paths so refreshed
* sessions match the live stream exactly.
*/
export function replayEventsToMessages(
events: AgentEvent[],
thread: string,
agentDisplayName: string | undefined,
): ChatMessage[] {
const state = newReplayState();
// Upsert by id — later emissions for the same pill replace earlier ones.
const byId = new Map<string, ChatMessage>();
for (const evt of events) {
for (const m of replayEvent(state, evt, thread, agentDisplayName)) {
byId.set(m.id, m);
}
}
return Array.from(byId.values()).sort(
(a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0),
);
}
type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);
+13 -53
View File
@@ -13,11 +13,7 @@ import { executionApi } from "@/api/execution";
import { sessionsApi } from "@/api/sessions";
import { useMultiSSE } from "@/hooks/use-sse";
import type { LiveSession, AgentEvent } from "@/api/types";
import {
sseEventToChatMessage,
formatAgentDisplayName,
replayEventsToMessages,
} from "@/lib/chat-helpers";
import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
import { cronToLabel } from "@/lib/graphUtils";
import { ApiError } from "@/api/client";
import { useColony } from "@/context/ColonyContext";
@@ -45,8 +41,6 @@ function truncate(s: string, max: number): string {
type SessionRestoreResult = {
messages: ChatMessage[];
restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
truncated: boolean;
droppedCount: number;
};
async function restoreSessionMessages(
@@ -55,67 +49,34 @@ async function restoreSessionMessages(
agentDisplayName: string,
): Promise<SessionRestoreResult> {
try {
const { events, truncated, total, returned } =
await sessionsApi.eventsHistory(sessionId);
const { events } = await sessionsApi.eventsHistory(sessionId);
if (events.length > 0) {
// Walk events twice:
// 1. Extract the trailing queen phase (unchanged logic).
// 2. Run the full state-machine replay so tool_status pills
// are synthesized just like the live SSE handler does.
// Without (2), refreshed sessions showed zero tool activity
// because tool_call_started/completed events are ignored by
// the stateless converter.
const messages: ChatMessage[] = [];
let runningPhase: ChatMessage["phase"] = undefined;
for (const evt of events) {
const p =
evt.type === "queen_phase_changed"
? (evt.data?.phase as string)
: evt.type === "node_loop_iteration"
? (evt.data?.phase as string | undefined)
: undefined;
? (evt.data?.phase as string | undefined)
: undefined;
if (p && ["planning", "building", "staging", "running"].includes(p)) {
runningPhase = p as ChatMessage["phase"];
}
}
const messages = replayEventsToMessages(events, thread, agentDisplayName);
// Stamp the latest phase on every queen message so the UI's
// phase-badge rendering matches what the live path would have
// displayed at the time of the refresh.
if (runningPhase) {
for (const m of messages) {
if (m.role === "queen") m.phase = runningPhase;
const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
if (!msg) continue;
if (evt.stream_id === "queen") {
msg.role = "queen";
msg.phase = runningPhase;
}
messages.push(msg);
}
// Prepend a run_divider banner when the server truncated older
// events so the user knows how many are hidden.
const droppedCount = Math.max(0, total - returned);
if (truncated && droppedCount > 0) {
const firstTs = events[0]?.timestamp;
const bannerCreatedAt = firstTs ? new Date(firstTs).getTime() - 1 : 0;
messages.unshift({
id: `restore-truncated-${sessionId}`,
agent: "System",
agentColor: "",
type: "run_divider",
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
timestamp: firstTs ?? new Date().toISOString(),
thread,
createdAt: bannerCreatedAt,
});
}
return {
messages,
restoredPhase: runningPhase ?? null,
truncated,
droppedCount,
};
return { messages, restoredPhase: runningPhase ?? null };
}
} catch {
// Event log not available
}
return { messages: [], restoredPhase: null, truncated: false, droppedCount: 0 };
return { messages: [], restoredPhase: null };
}
// ── Agent backend state ──────────────────────────────────────────────────────
@@ -855,7 +816,6 @@ export default function ColonyChat() {
createdAt: eventCreatedAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: sid || undefined,
});
return { ...prev, isStreaming: false, activeToolCalls: newActive };
});
+9 -30
View File
@@ -11,10 +11,7 @@ import { sessionsApi } from "@/api/sessions";
import { queensApi } from "@/api/queens";
import { useMultiSSE } from "@/hooks/use-sse";
import type { AgentEvent, HistorySession } from "@/api/types";
import {
sseEventToChatMessage,
replayEventsToMessages,
} from "@/lib/chat-helpers";
import { sseEventToChatMessage } from "@/lib/chat-helpers";
import { useColony } from "@/context/ColonyContext";
import { useHeaderActions } from "@/context/HeaderActionsContext";
import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
@@ -93,34 +90,17 @@ export default function QueenDM() {
const restoreMessages = useCallback(
async (sid: string, cancelled: () => boolean) => {
try {
const { events, truncated, total, returned } =
await sessionsApi.eventsHistory(sid);
const { events } = await sessionsApi.eventsHistory(sid);
if (cancelled()) return;
// Use the stateful replay so tool_status pills are synthesized
// the same way the live SSE handler does — without this the
// refreshed queen DM shows zero tool activity.
const restored = replayEventsToMessages(events, "queen-dm", queenName);
// Show a banner if the server truncated older events.
const droppedCount = Math.max(0, total - returned);
if (truncated && droppedCount > 0) {
const firstTs = events[0]?.timestamp;
const bannerCreatedAt = firstTs
? new Date(firstTs).getTime() - 1
: 0;
restored.unshift({
id: `restore-truncated-${sid}`,
agent: "System",
agentColor: "",
type: "run_divider",
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
timestamp: firstTs ?? new Date().toISOString(),
thread: "queen-dm",
createdAt: bannerCreatedAt,
});
const restored: ChatMessage[] = [];
for (const evt of events) {
const msg = sseEventToChatMessage(evt, "queen-dm", queenName);
if (!msg) continue;
if (evt.stream_id === "queen") msg.role = "queen";
restored.push(msg);
}
if (restored.length > 0 && !cancelled()) {
restored.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
setMessages(restored);
// Only clear typing if the history contains a completed execution;
// during bootstrap the queen is still processing.
@@ -621,7 +601,6 @@ export default function QueenDM() {
createdAt: eventCreatedAt,
nodeId: event.node_id || undefined,
executionId: event.execution_id || undefined,
streamId: sid || undefined,
};
setMessages((prevMsgs) => {
const idx = prevMsgs.findIndex((m) => m.id === msgId);
+2 -17
View File
@@ -72,28 +72,13 @@ def patched_fork(monkeypatch):
"""Stub out fork_session_into_colony so we don't need a real queen."""
calls: list[dict] = []
async def _stub_fork(
*,
session: Any,
colony_name: str,
task: str,
tasks: list[dict] | None = None,
) -> dict:
calls.append(
{
"session": session,
"colony_name": colony_name,
"task": task,
"tasks": tasks,
}
)
async def _stub_fork(*, session: Any, colony_name: str, task: str) -> dict:
calls.append({"session": session, "colony_name": colony_name, "task": task})
return {
"colony_path": f"/tmp/fake_colonies/{colony_name}",
"colony_name": colony_name,
"queen_session_id": "session_fake_fork_id",
"is_new": True,
"db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
"task_ids": [],
}
monkeypatch.setattr(
+42 -28
View File
@@ -17,10 +17,10 @@ _DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "sk
class TestDefaultSkillFiles:
"""Verify all built-in SKILL.md files parse correctly."""
"""Verify all 7 built-in SKILL.md files parse correctly."""
def test_all_skills_exist(self):
assert len(SKILL_REGISTRY) == 6
def test_all_seven_skills_exist(self):
assert len(SKILL_REGISTRY) == 7
@pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items()))
def test_skill_parses(self, skill_name, dir_name):
@@ -35,13 +35,7 @@ class TestDefaultSkillFiles:
assert parsed.source_scope == "framework"
def test_combined_token_budget(self):
"""All default skill bodies combined should stay within the protocols budget.
Ceiling is 5000 tokens (~20000 chars): the prompt-injection path
appends every registered skill body to the system prompt, so
uncontrolled growth would balloon every LLM call. 5000 gives
headroom over today's ~3500 while still catching obvious bloat.
"""
"""All default skill bodies combined should be under 3000 tokens (~12000 chars)."""
total_chars = 0
for dir_name in SKILL_REGISTRY.values():
path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
@@ -50,9 +44,9 @@ class TestDefaultSkillFiles:
total_chars += len(parsed.body)
approx_tokens = total_chars // 4
assert approx_tokens < 5000, (
assert approx_tokens < 3000, (
f"Combined default skill bodies are ~{approx_tokens} tokens "
f"({total_chars} chars), exceeding the 5000 token budget"
f"({total_chars} chars), exceeding the 3000 token budget"
)
def test_data_buffer_keys_all_prefixed(self):
@@ -66,7 +60,7 @@ class TestDefaultSkillManager:
manager = DefaultSkillManager()
manager.load()
assert len(manager.active_skill_names) == len(SKILL_REGISTRY)
assert len(manager.active_skill_names) == 7
for name in SKILL_REGISTRY:
assert name in manager.active_skill_names
@@ -103,7 +97,7 @@ class TestDefaultSkillManager:
manager.load()
assert "hive.quality-monitor" not in manager.active_skill_names
assert len(manager.active_skill_names) == len(SKILL_REGISTRY) - 1
assert len(manager.active_skill_names) == 6
def test_disable_all_via_convention(self):
config = SkillsConfig.from_agent_vars(default_skills={"_all": {"enabled": False}})
@@ -142,7 +136,7 @@ class TestSkillsConfig:
def test_explicit_disable(self):
config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)})
assert config.is_default_enabled("hive.note-taking") is False
assert config.is_default_enabled("hive.quality-monitor") is True
assert config.is_default_enabled("hive.batch-ledger") is True
def test_all_disabled_flag(self):
config = SkillsConfig(all_defaults_disabled=True)
@@ -172,11 +166,11 @@ class TestSkillsConfig:
def test_get_default_overrides(self):
config = SkillsConfig.from_agent_vars(
default_skills={
"hive.quality-monitor": {"enabled": True, "assessment_interval": 10},
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
}
)
overrides = config.get_default_overrides("hive.quality-monitor")
assert overrides == {"assessment_interval": 10}
overrides = config.get_default_overrides("hive.batch-ledger")
assert overrides == {"checkpoint_every_n": 10}
def test_get_default_overrides_empty(self):
config = SkillsConfig()
@@ -250,20 +244,40 @@ class TestConfigOverrideSubstitution:
assert "{{" not in cleaned
class TestBatchDeprecatedNoOps:
"""batch-ledger skill was removed; is_batch_scenario() and batch_init_nudge
are deprecated no-ops that return False / None unconditionally. They are
kept in-tree to avoid touching every orchestrator/execution_manager call
site that still reads the nudge through the config plumbing."""
class TestBatchAutoDetection:
"""DS-12: is_batch_scenario() and batch_init_nudge property."""
def test_is_batch_scenario_always_false(self):
assert is_batch_scenario("process a list of 100 leads") is False
assert is_batch_scenario("for each record, send an email") is False
assert is_batch_scenario("write a summary") is False
def test_detects_list_of(self):
assert is_batch_scenario("process a list of 100 leads") is True
def test_batch_init_nudge_always_none(self):
def test_detects_collection_of(self):
assert is_batch_scenario("a collection of invoices") is True
def test_detects_items(self):
assert is_batch_scenario("go through all items in the spreadsheet") is True
def test_detects_for_each(self):
assert is_batch_scenario("for each record, send an email") is True
def test_no_match_single_task(self):
assert is_batch_scenario("write a summary of the quarterly report") is False
def test_batch_nudge_active_by_default(self):
manager = DefaultSkillManager()
manager.load()
assert manager.batch_init_nudge is not None
assert "_batch_ledger" in manager.batch_init_nudge
def test_batch_nudge_none_when_skill_disabled(self):
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}})
manager = DefaultSkillManager(config)
manager.load()
assert manager.batch_init_nudge is None
def test_batch_nudge_none_when_auto_detect_disabled(self):
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}})
manager = DefaultSkillManager(config)
manager.load()
assert manager.batch_init_nudge is None
-590
View File
@@ -1,590 +0,0 @@
"""Tests for framework.host.progress_db — per-colony task queue."""
from __future__ import annotations
import sqlite3
import threading
import time
from pathlib import Path
import pytest
from framework.host.progress_db import (
SCHEMA_VERSION,
ensure_all_colony_dbs,
ensure_progress_db,
enqueue_task,
reclaim_stale,
seed_tasks,
)
# ----------------------------------------------------------------------
# Schema / init
# ----------------------------------------------------------------------
def test_ensure_progress_db_fresh(tmp_path: Path) -> None:
colony = tmp_path / "c"
db_path = ensure_progress_db(colony)
assert db_path.exists()
assert db_path.name == "progress.db"
assert db_path.parent.name == "data"
con = sqlite3.connect(str(db_path))
try:
assert con.execute("PRAGMA journal_mode").fetchone()[0].lower() == "wal"
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
tables = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='table'")}
assert {"tasks", "steps", "sop_checklist", "colony_meta"}.issubset(tables)
indexes = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='index'")}
# Named indexes we declared
assert "idx_tasks_claimable" in indexes
assert "idx_steps_task_seq" in indexes
assert "idx_sop_required_open" in indexes
assert "idx_tasks_status" in indexes
finally:
con.close()
def test_ensure_progress_db_idempotent(tmp_path: Path) -> None:
colony = tmp_path / "c"
p1 = ensure_progress_db(colony)
p2 = ensure_progress_db(colony)
assert p1 == p2
con = sqlite3.connect(str(p1))
try:
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
finally:
con.close()
def test_ensure_all_colony_dbs_backfill(tmp_path: Path) -> None:
colonies_root = tmp_path / "colonies"
(colonies_root / "alpha").mkdir(parents=True)
(colonies_root / "beta").mkdir(parents=True)
(colonies_root / "gamma_not_dir").touch() # should be ignored
initialized = ensure_all_colony_dbs(colonies_root)
names = {p.parent.parent.name for p in initialized}
assert names == {"alpha", "beta"}
for p in initialized:
assert p.exists()
def test_ensure_all_colony_dbs_missing_root(tmp_path: Path) -> None:
missing = tmp_path / "nonexistent"
assert ensure_all_colony_dbs(missing) == []
# ----------------------------------------------------------------------
# Seeding / enqueue
# ----------------------------------------------------------------------
def test_seed_tasks_basic(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
ids = seed_tasks(
db,
[
{
"goal": "task one",
"priority": 5,
"payload": {"url": "https://example.com"},
"steps": [
{"title": "open page"},
{"title": "extract data", "detail": "selector .content"},
],
"sop_items": [
{"key": "captcha_handled", "description": "Verify no CAPTCHA blocks"},
{"key": "soft_hint", "description": "optional", "required": False},
],
},
{"goal": "task two"},
],
)
assert len(ids) == 2
con = sqlite3.connect(str(db))
try:
rows = list(con.execute("SELECT id, goal, priority, status, source, payload FROM tasks ORDER BY goal"))
assert len(rows) == 2
assert rows[0][1] == "task one"
assert rows[0][2] == 5
assert rows[0][3] == "pending"
assert rows[0][4] == "queen_create"
assert '"url"' in rows[0][5]
step_count = con.execute(
"SELECT count(*) FROM steps WHERE task_id=?", (ids[0],)
).fetchone()[0]
assert step_count == 2
sop_rows = list(con.execute(
"SELECT key, required FROM sop_checklist WHERE task_id=? ORDER BY key", (ids[0],)
))
assert sop_rows == [("captcha_handled", 1), ("soft_hint", 0)]
finally:
con.close()
def test_seed_tasks_rejects_missing_goal(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
with pytest.raises(ValueError):
seed_tasks(db, [{"priority": 1}])
def test_seed_tasks_empty_is_noop(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
assert seed_tasks(db, []) == []
def test_seed_tasks_rollback_on_partial_failure(tmp_path: Path) -> None:
"""A bad row mid-batch must roll back the whole transaction."""
db = ensure_progress_db(tmp_path / "c")
with pytest.raises(ValueError):
seed_tasks(
db,
[
{"goal": "good one"},
{"priority": 1}, # missing goal -> boom
{"goal": "never inserted"},
],
)
con = sqlite3.connect(str(db))
try:
count = con.execute("SELECT count(*) FROM tasks").fetchone()[0]
assert count == 0
finally:
con.close()
def test_enqueue_task(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
tid = enqueue_task(
db,
"appended",
steps=[{"title": "s1"}],
sop_items=[{"key": "k", "description": "d"}],
priority=3,
)
assert tid
con = sqlite3.connect(str(db))
try:
row = con.execute(
"SELECT goal, priority, source FROM tasks WHERE id=?", (tid,)
).fetchone()
assert row == ("appended", 3, "enqueue_tool")
assert con.execute(
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
).fetchone()[0] == 1
finally:
con.close()
def test_enqueue_task_custom_source(tmp_path: Path) -> None:
"""enqueue_task must accept a custom source value (e.g. run_agent_with_input).
Phase 2 wiring adds source values: create_colony_auto,
run_agent_with_input, run_parallel_workers. Verify the source
column stores them verbatim.
"""
db = ensure_progress_db(tmp_path / "c")
tid = enqueue_task(db, "chat task", source="run_agent_with_input")
con = sqlite3.connect(str(db))
try:
row = con.execute("SELECT goal, source FROM tasks WHERE id=?", (tid,)).fetchone()
assert row == ("chat task", "run_agent_with_input")
finally:
con.close()
def test_claim_by_assigned_id(tmp_path: Path) -> None:
"""Worker protocol: claim a specific row by id (not the generic next-pending).
The Phase 2 fix threads ``task_id`` into ``input_data`` when the
queen pre-assigns a row. The worker must be able to claim THAT
row atomically with an ``UPDATE ... WHERE id=? AND status='pending'``
pattern, and a second claim on the same id must return 0 rows.
"""
db = ensure_progress_db(tmp_path / "c")
[tid] = seed_tasks(db, [{"goal": "pinned task"}])
con = sqlite3.connect(str(db), isolation_level=None, timeout=5.0)
try:
cur = con.execute(
"""
UPDATE tasks SET status='claimed', worker_id=?,
claim_token=lower(hex(randomblob(8))),
claimed_at=datetime('now'),
updated_at=datetime('now')
WHERE id=? AND status='pending'
RETURNING id, goal
""",
("w1", tid),
)
row = cur.fetchone()
assert row == (tid, "pinned task"), f"expected one claim, got {row}"
# Second attempt on the same id must affect zero rows.
cur2 = con.execute(
"""
UPDATE tasks SET status='claimed', worker_id=?,
claim_token=lower(hex(randomblob(8))),
claimed_at=datetime('now')
WHERE id=? AND status='pending'
RETURNING id
""",
("w2", tid),
)
assert cur2.fetchone() is None, "second claim should affect zero rows"
# Ensure worker_id on the row is still the first claimant.
owner = con.execute(
"SELECT worker_id, status FROM tasks WHERE id=?", (tid,)
).fetchone()
assert owner == ("w1", "claimed")
finally:
con.close()
def test_claim_by_id_does_not_steal_unrelated_rows(tmp_path: Path) -> None:
"""Claim-by-id must only touch the named row, not siblings."""
db = ensure_progress_db(tmp_path / "c")
ids = seed_tasks(db, [{"goal": "a"}, {"goal": "b"}, {"goal": "c"}])
target = ids[1]
con = sqlite3.connect(str(db), isolation_level=None)
try:
con.execute(
"UPDATE tasks SET status='claimed', worker_id='w1', "
"claimed_at=datetime('now') WHERE id=? AND status='pending'",
(target,),
)
statuses = dict(con.execute("SELECT goal, status FROM tasks").fetchall())
assert statuses == {"a": "pending", "b": "claimed", "c": "pending"}
finally:
con.close()
def test_seed_tasks_bulk_10k(tmp_path: Path) -> None:
"""10k rows in one transaction should finish under a second on local disk."""
db = ensure_progress_db(tmp_path / "c")
tasks = [{"goal": f"task {i}", "seq": i} for i in range(10_000)]
start = time.perf_counter()
ids = seed_tasks(db, tasks)
elapsed = time.perf_counter() - start
assert len(ids) == 10_000
# Generous ceiling — on CI with slow disk we've seen ~300ms.
assert elapsed < 3.0, f"bulk seed too slow: {elapsed:.2f}s"
# ----------------------------------------------------------------------
# Atomic claim under concurrency
# ----------------------------------------------------------------------
_CLAIM_SQL = """
BEGIN IMMEDIATE;
UPDATE tasks
SET
status = 'claimed',
worker_id = ?,
claim_token = lower(hex(randomblob(8))),
claimed_at = datetime('now'),
updated_at = datetime('now')
WHERE id = (
SELECT id FROM tasks
WHERE status = 'pending'
ORDER BY priority DESC, seq, created_at
LIMIT 1
);
"""
def _claim_one(db_path: Path, worker_id: str) -> str | None:
"""Atomic single-shot claim using RETURNING (SQLite 3.35+).
The skill teaches agents the BEGIN IMMEDIATE + subquery UPDATE
pattern; for an in-process test helper we use RETURNING so the
claimed row id is returned from the same statement (no racing
follow-up SELECT). Functionally equivalent: both approaches rely
on the atomic subquery-UPDATE.
"""
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=10.0)
con.execute("PRAGMA busy_timeout = 10000")
try:
cur = con.execute(
"""
UPDATE tasks
SET status = 'claimed',
worker_id = ?,
claim_token = lower(hex(randomblob(8))),
claimed_at = datetime('now'),
updated_at = datetime('now')
WHERE id = (
SELECT id FROM tasks
WHERE status = 'pending'
ORDER BY priority DESC, seq, created_at
LIMIT 1
)
RETURNING id
""",
(worker_id,),
)
row = cur.fetchone()
return row[0] if row else None
finally:
con.close()
def test_claim_atomicity_under_concurrency(tmp_path: Path) -> None:
"""20 threads racing to drain 100 tasks — each task claimed exactly once."""
db = ensure_progress_db(tmp_path / "c")
seed_tasks(db, [{"goal": f"task {i}", "seq": i} for i in range(100)])
claims: list[tuple[str, str]] = []
claims_lock = threading.Lock()
def worker(worker_id: str) -> None:
while True:
tid = _claim_one(db, worker_id)
if tid is None:
return
with claims_lock:
claims.append((worker_id, tid))
threads = [threading.Thread(target=worker, args=(f"w{i}",)) for i in range(20)]
for t in threads:
t.start()
for t in threads:
t.join(timeout=30)
task_ids = [tid for _, tid in claims]
assert len(task_ids) == 100, f"expected 100 claims, got {len(task_ids)}"
assert len(set(task_ids)) == 100, "duplicate claims detected"
con = sqlite3.connect(str(db))
try:
remaining = con.execute(
"SELECT count(*) FROM tasks WHERE status='pending'"
).fetchone()[0]
assert remaining == 0
claimed = con.execute(
"SELECT count(*) FROM tasks WHERE status='claimed'"
).fetchone()[0]
assert claimed == 100
finally:
con.close()
# ----------------------------------------------------------------------
# Stale-claim reclaimer
# ----------------------------------------------------------------------
def test_reclaim_stale_returns_to_pending(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
[tid] = seed_tasks(db, [{"goal": "stuck"}])
# Simulate a claim made 20 minutes ago.
con = sqlite3.connect(str(db), isolation_level=None)
try:
con.execute(
"UPDATE tasks SET status='claimed', worker_id='w1', "
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
(tid,),
)
finally:
con.close()
reclaimed = reclaim_stale(db, stale_after_minutes=15)
assert reclaimed == 1
con = sqlite3.connect(str(db))
try:
row = con.execute(
"SELECT status, worker_id, retry_count FROM tasks WHERE id=?", (tid,)
).fetchone()
assert row == ("pending", None, 1)
finally:
con.close()
def test_reclaim_stale_fails_after_max_retries(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
[tid] = seed_tasks(db, [{"goal": "doomed", "max_retries": 2}])
con = sqlite3.connect(str(db), isolation_level=None)
try:
con.execute(
"UPDATE tasks SET status='claimed', worker_id='w1', retry_count=2, "
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
(tid,),
)
finally:
con.close()
reclaim_stale(db, stale_after_minutes=15)
con = sqlite3.connect(str(db))
try:
row = con.execute(
"SELECT status, last_error FROM tasks WHERE id=?", (tid,)
).fetchone()
assert row[0] == "failed"
assert row[1] is not None and "max_retries" in row[1]
finally:
con.close()
def test_reclaim_stale_ignores_fresh_claims(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
[tid] = seed_tasks(db, [{"goal": "working"}])
con = sqlite3.connect(str(db), isolation_level=None)
try:
con.execute(
"UPDATE tasks SET status='claimed', worker_id='w1', "
"claimed_at=datetime('now') WHERE id=?",
(tid,),
)
finally:
con.close()
reclaimed = reclaim_stale(db, stale_after_minutes=15)
assert reclaimed == 0
# ----------------------------------------------------------------------
# Foreign key cascade
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Worker config patching for pre-existing colonies
# ----------------------------------------------------------------------
def _write_worker_cfg(path: Path, *, with_input_data: dict | None = None) -> None:
"""Write a minimal worker.json that matches the shape ensure_progress_db patches."""
import json as _json
cfg = {
"name": "worker",
"system_prompt": "You are a worker.",
"goal": {"description": "do stuff", "success_criteria": [], "constraints": []},
"tools": [],
}
if with_input_data is not None:
cfg["input_data"] = with_input_data
path.write_text(_json.dumps(cfg, indent=2))
def test_ensure_progress_db_patches_existing_worker_json(tmp_path: Path) -> None:
"""Pre-existing worker.json without input_data gets db_path injected."""
import json as _json
colony = tmp_path / "legacy_colony"
colony.mkdir()
_write_worker_cfg(colony / "worker.json")
# Before: no input_data
before = _json.loads((colony / "worker.json").read_text())
assert "input_data" not in before
db = ensure_progress_db(colony)
after = _json.loads((colony / "worker.json").read_text())
assert after["input_data"]["db_path"] == str(db)
assert after["input_data"]["colony_id"] == "legacy_colony"
# Other fields untouched
assert after["system_prompt"] == "You are a worker."
assert after["goal"]["description"] == "do stuff"
def test_ensure_progress_db_patch_is_idempotent(tmp_path: Path) -> None:
"""Second call must not rewrite the file (mtime unchanged)."""
import time as _time
colony = tmp_path / "idem"
colony.mkdir()
_write_worker_cfg(colony / "worker.json")
ensure_progress_db(colony)
mtime1 = (colony / "worker.json").stat().st_mtime
_time.sleep(0.02) # ensure any rewrite would bump mtime
ensure_progress_db(colony)
mtime2 = (colony / "worker.json").stat().st_mtime
assert mtime1 == mtime2, "second ensure_progress_db must not rewrite worker.json"
def test_ensure_progress_db_preserves_existing_input_data_keys(tmp_path: Path) -> None:
"""Pre-existing input_data keys (other than db_path/colony_id) are preserved."""
import json as _json
colony = tmp_path / "preserved"
colony.mkdir()
_write_worker_cfg(
colony / "worker.json",
with_input_data={"custom_key": "hello", "db_path": "/stale/path.db"},
)
db = ensure_progress_db(colony)
after = _json.loads((colony / "worker.json").read_text())
assert after["input_data"]["custom_key"] == "hello"
assert after["input_data"]["db_path"] == str(db)
assert after["input_data"]["colony_id"] == "preserved"
def test_ensure_progress_db_skips_metadata_and_triggers(tmp_path: Path) -> None:
"""metadata.json and triggers.json are not worker configs — must not be touched."""
import json as _json
colony = tmp_path / "guarded"
colony.mkdir()
(colony / "metadata.json").write_text(_json.dumps({"colony_name": "guarded"}))
(colony / "triggers.json").write_text(_json.dumps([{"id": "t1"}]))
_write_worker_cfg(colony / "worker.json")
ensure_progress_db(colony)
meta = _json.loads((colony / "metadata.json").read_text())
trig = _json.loads((colony / "triggers.json").read_text())
assert "input_data" not in meta
assert trig == [{"id": "t1"}]
worker = _json.loads((colony / "worker.json").read_text())
assert "input_data" in worker
def test_task_delete_cascades_to_steps_and_sop(tmp_path: Path) -> None:
db = ensure_progress_db(tmp_path / "c")
[tid] = seed_tasks(
db,
[
{
"goal": "cascade test",
"steps": [{"title": "a"}, {"title": "b"}],
"sop_items": [{"key": "k", "description": "d"}],
}
],
)
con = sqlite3.connect(str(db), isolation_level=None)
try:
con.execute("PRAGMA foreign_keys = ON")
con.execute("DELETE FROM tasks WHERE id=?", (tid,))
assert con.execute(
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
).fetchone()[0] == 0
assert con.execute(
"SELECT count(*) FROM sop_checklist WHERE task_id=?", (tid,)
).fetchone()[0] == 0
finally:
con.close()
+1 -1
View File
@@ -141,7 +141,7 @@ class TestSkillDiscovery:
framework_skills = [s for s in skills if s.source_scope == "framework"]
names = {s.name for s in framework_skills}
assert "hive.note-taking" in names
assert "hive.colony-progress-tracker" in names
assert "hive.batch-ledger" in names
def test_max_depth_limit(self, tmp_path):
# Create a skill nested beyond max_depth
+1
View File
@@ -899,6 +899,7 @@ def test_concurrency_safe_allowlist_is_conservative():
"hashline_edit",
"browser_click",
"browser_type",
"browser_type_focused",
"browser_navigate",
):
assert forbidden not in allowlist, f"{forbidden} must not be concurrency-safe"
-42
View File
@@ -271,48 +271,6 @@ else
exit 1
fi
# Check for sqlite3 CLI (required for colony progress tracking)
echo -n " Checking for sqlite3... "
if command -v sqlite3 &> /dev/null; then
echo -e "${GREEN}ok${NC}"
else
echo -e "${YELLOW}not found${NC}"
# Attempt auto-install on common package managers
SQLITE_INSTALLED=false
if command -v apt-get &> /dev/null; then
echo -n " Installing sqlite3 via apt... "
if sudo apt-get install -y sqlite3 > /dev/null 2>&1; then
SQLITE_INSTALLED=true
fi
elif command -v brew &> /dev/null; then
echo -n " Installing sqlite3 via brew... "
if brew install sqlite > /dev/null 2>&1; then
SQLITE_INSTALLED=true
fi
elif command -v apk &> /dev/null; then
echo -n " Installing sqlite3 via apk... "
if apk add sqlite > /dev/null 2>&1; then
SQLITE_INSTALLED=true
fi
elif command -v dnf &> /dev/null; then
echo -n " Installing sqlite3 via dnf... "
if sudo dnf install -y sqlite > /dev/null 2>&1; then
SQLITE_INSTALLED=true
fi
elif command -v pacman &> /dev/null; then
echo -n " Installing sqlite3 via pacman... "
if sudo pacman -S --noconfirm sqlite > /dev/null 2>&1; then
SQLITE_INSTALLED=true
fi
fi
if [ "$SQLITE_INSTALLED" = true ]; then
echo -e "${GREEN}ok${NC}"
else
echo -e "${YELLOW} ⚠ Could not install sqlite3 automatically${NC}"
echo -e "${DIM} Install manually: apt install sqlite3 / brew install sqlite / apk add sqlite${NC}"
fi
fi
# Check for Chrome/Edge (required for GCU browser tools)
echo -n " Checking for Chrome/Edge browser... "
# Check common browser locations
+268
View File
@@ -0,0 +1,268 @@
"""
Browser Remote Control act as an agent to call browser tools via a UI.
Spawns its own GCU MCP server subprocess (same way a real agent does),
connects as an MCP client, and exposes the tools over HTTP for the web UI.
Usage:
uv run scripts/browser_remote.py # starts server + opens UI
uv run scripts/browser_remote.py --no-ui # API only, no browser open
Then use the UI at http://localhost:9250/ui or curl directly:
curl -X POST http://localhost:9250/browser_click \
-H 'Content-Type: application/json' \
-d '{"selector": "#login-btn"}'
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import sys
import webbrowser
from pathlib import Path
from typing import Any
from aiohttp import web
# Add framework to path so we can use the existing MCPClient
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "core"))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "src"))
from framework.loader.mcp_client import MCPClient, MCPServerConfig
logger = logging.getLogger("browser_remote")
DEFAULT_PORT = 9250
TOOLS_DIR = str((Path(__file__).parent.parent / "tools").resolve())
# ---------------------------------------------------------------------------
# MCP client — connects to GCU server exactly like an agent would
# ---------------------------------------------------------------------------
_mcp_client: MCPClient | None = None
def get_mcp_client() -> MCPClient:
"""Get or create the MCP client connected to the GCU server."""
global _mcp_client
if _mcp_client is None:
bridge_port = os.environ.get("HIVE_BRIDGE_PORT", "9229")
config = MCPServerConfig(
name="gcu-tools",
transport="stdio",
command="uv",
args=["run", "python", "-m", "gcu.server", "--stdio", "--capabilities", "browser"],
cwd=TOOLS_DIR,
env={"HIVE_BRIDGE_PORT": bridge_port},
)
_mcp_client = MCPClient(config)
_mcp_client.connect()
tools = _mcp_client.list_tools()
logger.info(
"Connected to GCU server, %d tools available: %s",
len(tools),
[t.name for t in tools],
)
return _mcp_client
# ---------------------------------------------------------------------------
# HTTP Handlers
# ---------------------------------------------------------------------------
async def handle_ui(request: web.Request) -> web.Response:
"""GET /ui — serve the web UI."""
ui_path = Path(__file__).parent / "browser_remote_ui.html"
return web.FileResponse(ui_path)
async def handle_index(request: web.Request) -> web.Response:
"""GET / — redirect to UI."""
raise web.HTTPFound("/ui")
async def handle_status(request: web.Request) -> web.Response:
"""GET /status — connection status."""
try:
client = get_mcp_client()
tools = client.list_tools()
return web.json_response({
"connected": True,
"tools_count": len(tools),
})
except Exception as e:
return web.json_response({"connected": False, "error": str(e)})
async def handle_tools(request: web.Request) -> web.Response:
"""GET /tools — list available tools with their schemas."""
try:
client = get_mcp_client()
tools = client.list_tools()
schemas = {}
for tool in tools:
props = tool.input_schema.get("properties", {})
required = tool.input_schema.get("required", [])
params = {}
for pname, pspec in props.items():
param_def: dict[str, Any] = {"type": pspec.get("type", "string")}
if pname in required:
param_def["required"] = True
if "default" in pspec:
param_def["default"] = pspec["default"]
if "enum" in pspec:
param_def["enum"] = pspec["enum"]
if pspec.get("type") == "array" and "items" in pspec:
param_def["items"] = pspec["items"].get("type", "string")
params[pname] = param_def
schemas[tool.name] = {
"description": tool.description.split("\n")[0].strip() if tool.description else "",
"params": params,
}
return web.json_response(schemas)
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
async def handle_tool_call(request: web.Request) -> web.Response:
"""POST /<tool_name> — call a browser tool."""
tool_name = request.match_info["tool"]
try:
body = await request.read()
params = json.loads(body) if body.strip() else {}
except json.JSONDecodeError:
return web.json_response({"ok": False, "error": "Invalid JSON"}, status=400)
logger.info("=> %s %s", tool_name, json.dumps(params, default=str)[:200])
try:
client = get_mcp_client()
# call_tool is synchronous (blocks on the stdio subprocess)
# Run it in a thread so we don't block the event loop
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, client.call_tool, tool_name, params)
# MCP returns a list of content blocks — extract text/image
response = _format_mcp_result(result)
logger.info("<= %s ok=%s", tool_name, response.get("ok", True))
return web.json_response(response)
except Exception as e:
logger.error("<= %s error: %s", tool_name, e)
return web.json_response({"ok": False, "error": str(e)}, status=500)
def _format_mcp_result(result: Any) -> dict:
"""Convert MCP tool result into a JSON-friendly dict."""
if result is None:
return {"ok": True}
# MCPClient.call_tool returns the raw result from the MCP SDK
# which could be a list of content blocks, a dict, or a string
if isinstance(result, dict):
return result
if isinstance(result, str):
try:
return json.loads(result)
except (json.JSONDecodeError, TypeError):
return {"ok": True, "text": result}
if isinstance(result, list):
# List of MCP content blocks (TextContent, ImageContent, etc.)
texts = []
images = []
for item in result:
if hasattr(item, "text"):
try:
parsed = json.loads(item.text)
if isinstance(parsed, dict):
return parsed # Tool returned structured JSON
except (json.JSONDecodeError, TypeError):
pass
texts.append(item.text)
elif hasattr(item, "data"):
images.append({"mime_type": getattr(item, "mime_type", "image/png"), "data": item.data})
response: dict[str, Any] = {"ok": True}
if texts:
response["text"] = "\n".join(texts)
if images:
response["images"] = images
return response
return {"ok": True, "result": str(result)}
# ---------------------------------------------------------------------------
# Server setup
# ---------------------------------------------------------------------------
@web.middleware
async def cors_middleware(request: web.Request, handler):
if request.method == "OPTIONS":
resp = web.Response()
else:
resp = await handler(request)
resp.headers["Access-Control-Allow-Origin"] = "*"
resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
resp.headers["Access-Control-Allow-Headers"] = "Content-Type"
return resp
def create_app() -> web.Application:
app = web.Application(middlewares=[cors_middleware])
app.router.add_get("/", handle_index)
app.router.add_get("/ui", handle_ui)
app.router.add_get("/tools", handle_tools)
app.router.add_get("/status", handle_status)
app.router.add_post("/{tool}", handle_tool_call)
return app
def main() -> None:
parser = argparse.ArgumentParser(description="Browser Remote Control")
parser.add_argument("--port", type=int, default=int(os.environ.get("BROWSER_REMOTE_PORT", DEFAULT_PORT)))
parser.add_argument("--no-ui", action="store_true", help="Don't auto-open the browser")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
# Connect to GCU server eagerly so we fail fast if something is wrong
try:
client = get_mcp_client()
except Exception as e:
logger.error("Failed to connect to GCU server: %s", e)
sys.exit(1)
# Auto-start browser context so tools work immediately
try:
result = client.call_tool("browser_start", {})
logger.info("browser_start: %s", result)
except Exception as e:
logger.warning("browser_start failed (may already be started): %s", e)
app = create_app()
async def on_startup(app: web.Application) -> None:
if not args.no_ui:
webbrowser.open(f"http://localhost:{args.port}/ui")
app.on_startup.append(on_startup)
print(f"Browser Remote Control on http://localhost:{args.port}")
print(f" UI: http://localhost:{args.port}/ui")
print(f" API: POST http://localhost:{args.port}/<tool>")
print()
web.run_app(app, host="127.0.0.1", port=args.port, print=None)
if __name__ == "__main__":
main()
+838
View File
@@ -0,0 +1,838 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Browser Remote Control</title>
<style>
:root {
--bg: #0d1117;
--surface: #161b22;
--surface2: #21262d;
--border: #30363d;
--text: #e6edf3;
--text2: #8b949e;
--accent: #58a6ff;
--accent-dim: #1f6feb;
--green: #3fb950;
--red: #f85149;
--orange: #d29922;
--radius: 8px;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
background: var(--bg);
color: var(--text);
line-height: 1.5;
padding: 0;
}
header {
background: var(--surface);
border-bottom: 1px solid var(--border);
padding: 16px 24px;
display: flex;
align-items: center;
justify-content: space-between;
position: sticky;
top: 0;
z-index: 100;
}
header h1 {
font-size: 18px;
font-weight: 600;
}
#status-badge {
font-size: 13px;
padding: 4px 12px;
border-radius: 20px;
font-weight: 500;
}
#status-badge.connected { background: rgba(63,185,80,0.15); color: var(--green); }
#status-badge.disconnected { background: rgba(248,81,73,0.15); color: var(--red); }
#status-badge.checking { background: rgba(210,153,34,0.15); color: var(--orange); }
.layout {
display: flex;
height: calc(100vh - 57px);
}
/* Sidebar */
.sidebar {
width: 240px;
min-width: 240px;
background: var(--surface);
border-right: 1px solid var(--border);
overflow-y: auto;
padding: 12px 0;
}
.sidebar-group {
margin-bottom: 8px;
}
.sidebar-group-label {
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text2);
padding: 8px 16px 4px;
}
.sidebar-item {
display: block;
width: 100%;
text-align: left;
background: none;
border: none;
color: var(--text2);
font-size: 13px;
padding: 6px 16px 6px 24px;
cursor: pointer;
font-family: 'SF Mono', 'Fira Code', monospace;
transition: background 0.1s, color 0.1s;
}
.sidebar-item:hover {
background: var(--surface2);
color: var(--text);
}
.sidebar-item.active {
background: rgba(88,166,255,0.1);
color: var(--accent);
border-right: 2px solid var(--accent);
}
/* Main content */
.main {
flex: 1;
overflow-y: auto;
padding: 24px 32px;
}
.tools-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(420px, 1fr));
gap: 16px;
}
.tool-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
overflow: hidden;
transition: border-color 0.15s;
}
.tool-card:hover { border-color: var(--accent-dim); }
.tool-card.active { border-color: var(--accent); }
.tool-card-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 12px 16px;
border-bottom: 1px solid var(--border);
cursor: pointer;
user-select: none;
}
.tool-card-header:hover { background: var(--surface2); }
.tool-name {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 13px;
font-weight: 600;
color: var(--accent);
}
.tool-desc {
font-size: 12px;
color: var(--text2);
margin-left: 8px;
}
.tool-card-body {
padding: 16px;
display: none;
}
.tool-card.open .tool-card-body { display: block; }
.chevron {
color: var(--text2);
transition: transform 0.2s;
font-size: 12px;
}
.tool-card.open .chevron { transform: rotate(90deg); }
/* Form fields */
.field {
margin-bottom: 12px;
}
.field:last-of-type { margin-bottom: 16px; }
.field label {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
font-weight: 500;
color: var(--text2);
margin-bottom: 4px;
}
.field label .required {
color: var(--red);
font-size: 10px;
}
.field label .type-tag {
font-size: 10px;
padding: 1px 5px;
border-radius: 3px;
background: var(--surface2);
color: var(--text2);
font-family: 'SF Mono', 'Fira Code', monospace;
}
.field input, .field select, .field textarea {
width: 100%;
background: var(--bg);
border: 1px solid var(--border);
border-radius: 6px;
color: var(--text);
font-size: 13px;
padding: 8px 10px;
font-family: 'SF Mono', 'Fira Code', monospace;
outline: none;
transition: border-color 0.15s;
}
.field input:focus, .field select:focus, .field textarea:focus {
border-color: var(--accent);
}
.field textarea { min-height: 60px; resize: vertical; }
.field input[type="checkbox"] {
width: auto;
margin-right: 4px;
}
.checkbox-row {
display: flex;
align-items: center;
gap: 6px;
padding: 4px 0;
}
.checkbox-row label {
margin-bottom: 0;
cursor: pointer;
}
/* Buttons */
.btn-run {
display: inline-flex;
align-items: center;
gap: 6px;
background: var(--accent-dim);
color: #fff;
border: none;
border-radius: 6px;
padding: 8px 20px;
font-size: 13px;
font-weight: 600;
cursor: pointer;
transition: background 0.15s;
}
.btn-run:hover { background: var(--accent); }
.btn-run:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-run.running { background: var(--orange); }
/* Result area */
.result-area {
margin-top: 12px;
display: none;
}
.result-area.visible { display: block; }
.result-header {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 6px;
}
.result-status {
font-size: 12px;
font-weight: 600;
padding: 2px 8px;
border-radius: 4px;
}
.result-status.ok { background: rgba(63,185,80,0.15); color: var(--green); }
.result-status.error { background: rgba(248,81,73,0.15); color: var(--red); }
.result-duration {
font-size: 11px;
color: var(--text2);
}
.result-json {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 6px;
padding: 12px;
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 12px;
line-height: 1.6;
max-height: 300px;
overflow: auto;
white-space: pre-wrap;
word-break: break-word;
}
.result-screenshot {
max-width: 100%;
border: 1px solid var(--border);
border-radius: 6px;
margin-top: 8px;
}
/* History panel */
.history-panel {
width: 320px;
min-width: 320px;
background: var(--surface);
border-left: 1px solid var(--border);
overflow-y: auto;
padding: 12px;
}
.history-title {
font-size: 12px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text2);
padding: 4px 4px 8px;
border-bottom: 1px solid var(--border);
margin-bottom: 8px;
}
.history-item {
padding: 8px;
border-radius: 6px;
margin-bottom: 4px;
cursor: pointer;
transition: background 0.1s;
border: 1px solid transparent;
}
.history-item:hover {
background: var(--surface2);
}
.history-item-tool {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 12px;
font-weight: 600;
}
.history-item-tool.ok { color: var(--green); }
.history-item-tool.error { color: var(--red); }
.history-item-time {
font-size: 11px;
color: var(--text2);
}
.history-item-params {
font-size: 11px;
color: var(--text2);
font-family: 'SF Mono', 'Fira Code', monospace;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 280px;
}
.history-empty {
color: var(--text2);
font-size: 13px;
text-align: center;
padding: 24px 0;
}
.clear-history {
background: none;
border: none;
color: var(--text2);
font-size: 11px;
cursor: pointer;
float: right;
padding: 0;
}
.clear-history:hover { color: var(--red); }
/* View mode toggle */
.view-toggle {
display: flex;
gap: 4px;
background: var(--surface2);
border-radius: 6px;
padding: 2px;
}
.view-toggle button {
background: none;
border: none;
color: var(--text2);
font-size: 12px;
padding: 4px 12px;
border-radius: 4px;
cursor: pointer;
}
.view-toggle button.active {
background: var(--accent-dim);
color: #fff;
}
/* Scrollbar */
::-webkit-scrollbar { width: 8px; height: 8px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background: var(--text2); }
</style>
</head>
<body>
<header>
<div style="display:flex;align-items:center;gap:16px;">
<h1>Browser Remote Control</h1>
<div class="view-toggle">
<button class="active" onclick="setView('grid')">Grid</button>
<button onclick="setView('single')">Focus</button>
</div>
</div>
<div style="display:flex;align-items:center;gap:12px;">
<span id="context-info" style="font-size:12px;color:var(--text2)"></span>
<span id="status-badge" class="checking">checking...</span>
</div>
</header>
<div class="layout">
<nav class="sidebar" id="sidebar"></nav>
<main class="main" id="main-content"></main>
<aside class="history-panel" id="history-panel">
<div class="history-title">
History
<button class="clear-history" onclick="clearHistory()">clear</button>
</div>
<div id="history-list">
<div class="history-empty">No calls yet</div>
</div>
</aside>
</div>
<script>
const API_BASE = window.location.origin;
let toolSchemas = {};
let history = [];
let currentView = 'grid';
// Tool categories for sidebar grouping
const CATEGORIES = {
'Lifecycle': ['browser_start', 'browser_stop', 'browser_status'],
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_focus'],
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll'],
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_console', 'browser_get_text', 'browser_evaluate', 'browser_wait'],
'Advanced': ['browser_resize', 'browser_upload', 'browser_dialog'],
};
async function init() {
await checkStatus();
await loadTools();
setInterval(checkStatus, 5000);
}
async function checkStatus() {
const badge = document.getElementById('status-badge');
const ctx = document.getElementById('context-info');
try {
const res = await fetch(`${API_BASE}/status`);
const data = await res.json();
if (data.connected) {
badge.textContent = 'connected';
badge.className = 'connected';
if (data.tools_count) {
ctx.textContent = `${data.tools_count} tools`;
} else if (data.contexts) {
const contexts = Object.entries(data.contexts);
ctx.textContent = contexts.length > 0
? contexts.map(([k,v]) => `${k}: tab ${v.activeTabId}`).join(', ')
: 'no active context';
} else {
ctx.textContent = '';
}
} else {
badge.textContent = 'disconnected';
badge.className = 'disconnected';
ctx.textContent = '';
}
} catch {
badge.textContent = 'unreachable';
badge.className = 'disconnected';
ctx.textContent = '';
}
}
async function loadTools() {
try {
const res = await fetch(`${API_BASE}/tools`);
toolSchemas = await res.json();
renderSidebar();
renderToolCards();
} catch (e) {
document.getElementById('main-content').innerHTML =
`<div style="color:var(--red);padding:40px;">Failed to load tools: ${e.message}</div>`;
}
}
function renderSidebar() {
const sidebar = document.getElementById('sidebar');
let html = '';
const categorized = new Set();
for (const [group, tools] of Object.entries(CATEGORIES)) {
const available = tools.filter(t => toolSchemas[t]);
if (available.length === 0) continue;
html += `<div class="sidebar-group"><div class="sidebar-group-label">${group}</div>`;
for (const tool of available) {
categorized.add(tool);
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
html += '</div>';
}
// Show any uncategorized tools from the server
const other = Object.keys(toolSchemas).filter(t => !categorized.has(t));
if (other.length > 0) {
html += `<div class="sidebar-group"><div class="sidebar-group-label">Other</div>`;
for (const tool of other) {
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
html += '</div>';
}
sidebar.innerHTML = html;
}
function renderToolCards() {
const main = document.getElementById('main-content');
let html = '<div class="tools-grid" id="tools-grid">';
for (const [tool, schema] of Object.entries(toolSchemas)) {
html += buildToolCard(tool, schema);
}
html += '</div>';
main.innerHTML = html;
}
function buildToolCard(tool, schema) {
const shortName = tool.replace('browser_', '');
let fieldsHtml = '';
for (const [param, spec] of Object.entries(schema.params)) {
fieldsHtml += buildField(tool, param, spec);
}
return `
<div class="tool-card" id="card-${tool}" data-tool="${tool}">
<div class="tool-card-header" onclick="toggleCard('${tool}')">
<div>
<span class="tool-name">${shortName}</span>
<span class="tool-desc">${schema.description}</span>
</div>
<span class="chevron">&#9654;</span>
</div>
<div class="tool-card-body">
<form id="form-${tool}" onsubmit="runTool(event, '${tool}')">
${fieldsHtml}
<button class="btn-run" type="submit" id="btn-${tool}">Run</button>
</form>
<div class="result-area" id="result-${tool}"></div>
</div>
</div>`;
}
function buildField(tool, param, spec) {
const id = `${tool}__${param}`;
const required = spec.required ? '<span class="required">*</span>' : '';
const typeTag = `<span class="type-tag">${spec.type}</span>`;
const defaultVal = spec.default !== undefined ? spec.default : '';
if (spec.type === 'boolean') {
return `
<div class="field">
<div class="checkbox-row">
<input type="checkbox" id="${id}" ${defaultVal ? 'checked' : ''}>
<label for="${id}">${param} ${typeTag} ${required}</label>
</div>
</div>`;
}
if (spec.enum) {
const opts = spec.enum.map(v => `<option value="${v}" ${v === defaultVal ? 'selected' : ''}>${v}</option>`).join('');
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<select id="${id}">${opts}</select>
</div>`;
}
if (spec.type === 'array') {
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}
<span class="type-tag" style="margin-left:2px">JSON</span>
</label>
<input type="text" id="${id}" placeholder='["value1", "value2"]'>
</div>`;
}
// For expression / text that might be multiline
if (param === 'expression' || param === 'text') {
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<textarea id="${id}" placeholder="${param}">${defaultVal}</textarea>
</div>`;
}
const inputType = (spec.type === 'integer' || spec.type === 'number') ? 'number' : 'text';
const step = spec.type === 'number' ? ' step="any"' : '';
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<input type="${inputType}" id="${id}"${step} placeholder="${defaultVal !== '' ? defaultVal : param}" value="${defaultVal !== '' && spec.type !== 'string' ? defaultVal : ''}">
</div>`;
}
function toggleCard(tool) {
const card = document.getElementById(`card-${tool}`);
const wasOpen = card.classList.contains('open');
if (currentView === 'single') {
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
}
card.classList.toggle('open', !wasOpen);
// Update sidebar active state
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
if (!wasOpen) {
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
if (sideItem) sideItem.classList.add('active');
}
}
function scrollToTool(tool) {
const card = document.getElementById(`card-${tool}`);
if (!card) return;
// Open it
if (!card.classList.contains('open')) {
if (currentView === 'single') {
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
}
card.classList.add('open');
}
card.scrollIntoView({ behavior: 'smooth', block: 'start' });
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
if (sideItem) sideItem.classList.add('active');
}
function collectParams(tool) {
const schema = toolSchemas[tool];
const params = {};
for (const [param, spec] of Object.entries(schema.params)) {
const el = document.getElementById(`${tool}__${param}`);
if (!el) continue;
if (spec.type === 'boolean') {
params[param] = el.checked;
} else if (spec.type === 'array') {
const v = el.value.trim();
if (v) {
try { params[param] = JSON.parse(v); }
catch { params[param] = v.split(',').map(s => s.trim()); }
}
} else if (spec.type === 'integer') {
const v = el.value.trim();
if (v) params[param] = parseInt(v, 10);
} else if (spec.type === 'number') {
const v = el.value.trim();
if (v) params[param] = parseFloat(v);
} else {
const v = (el.value || '').trim();
if (v) params[param] = v;
}
}
return params;
}
async function runTool(event, tool) {
event.preventDefault();
const btn = document.getElementById(`btn-${tool}`);
const resultArea = document.getElementById(`result-${tool}`);
const params = collectParams(tool);
btn.textContent = 'Running...';
btn.classList.add('running');
btn.disabled = true;
const startTime = Date.now();
let result;
try {
const res = await fetch(`${API_BASE}/${tool}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(params),
});
result = await res.json();
} catch (e) {
result = { ok: false, error: e.message };
}
const elapsed = Date.now() - startTime;
btn.textContent = 'Run';
btn.classList.remove('running');
btn.disabled = false;
// Render result
const isOk = result.ok !== false;
const statusClass = isOk ? 'ok' : 'error';
const statusText = isOk ? 'OK' : 'ERROR';
const duration = result._duration_ms ? `${result._duration_ms}ms` : `${elapsed}ms`;
let bodyHtml = '';
// Special handling for screenshot — show the image
if (tool === 'browser_screenshot' && result.data) {
bodyHtml = `<img class="result-screenshot" src="data:image/png;base64,${result.data}">`;
// Don't show the raw base64 in JSON
const display = { ...result };
display.data = `[${result.data.length} chars base64]`;
bodyHtml += `<pre class="result-json">${JSON.stringify(display, null, 2)}</pre>`;
} else {
bodyHtml = `<pre class="result-json">${JSON.stringify(result, null, 2)}</pre>`;
}
resultArea.innerHTML = `
<div class="result-header">
<span class="result-status ${statusClass}">${statusText}</span>
<span class="result-duration">${duration}</span>
</div>
${bodyHtml}`;
resultArea.classList.add('visible');
// Add to history
addHistory(tool, params, result, duration);
}
function addHistory(tool, params, result, duration) {
const entry = {
tool,
params,
result,
duration,
time: new Date().toLocaleTimeString(),
ok: result.ok !== false,
};
history.unshift(entry);
if (history.length > 50) history.pop();
renderHistory();
}
function renderHistory() {
const list = document.getElementById('history-list');
if (history.length === 0) {
list.innerHTML = '<div class="history-empty">No calls yet</div>';
return;
}
list.innerHTML = history.map((h, i) => {
const shortName = h.tool.replace('browser_', '');
const paramsStr = JSON.stringify(h.params);
const statusCls = h.ok ? 'ok' : 'error';
return `
<div class="history-item" onclick="replayHistory(${i})" title="Click to load params">
<div style="display:flex;justify-content:space-between;align-items:center;">
<span class="history-item-tool ${statusCls}">${shortName}</span>
<span class="history-item-time">${h.time} (${h.duration})</span>
</div>
<div class="history-item-params">${paramsStr}</div>
</div>`;
}).join('');
}
function replayHistory(idx) {
const h = history[idx];
const tool = h.tool;
// Open the card and scroll to it
scrollToTool(tool);
// Fill the form with saved params
const schema = toolSchemas[tool];
for (const [param, spec] of Object.entries(schema.params)) {
const el = document.getElementById(`${tool}__${param}`);
if (!el) continue;
const val = h.params[param];
if (val === undefined) continue;
if (spec.type === 'boolean') {
el.checked = !!val;
} else if (spec.type === 'array') {
el.value = JSON.stringify(val);
} else {
el.value = val;
}
}
}
function clearHistory() {
history = [];
renderHistory();
}
function setView(mode) {
currentView = mode;
document.querySelectorAll('.view-toggle button').forEach(b => b.classList.remove('active'));
event.target.classList.add('active');
const grid = document.getElementById('tools-grid');
if (mode === 'single') {
grid.style.gridTemplateColumns = '1fr';
} else {
grid.style.gridTemplateColumns = 'repeat(auto-fill, minmax(420px, 1fr))';
}
}
init();
</script>
</body>
</html>
+132
View File
@@ -0,0 +1,132 @@
---
name: linkedin-connection-greeter
description: Automates accepting LinkedIn connections and sending a welcome message about the HoneyComb prediction market. Handles shadow DOM and Lexical editors.
---
# LinkedIn Connection Greeter
This skill outlines the exact flow to accept connection requests and send a specific welcome message without triggering spam filters.
## 1. Load Ledger
Before starting, read `data/linkedin_contacts.json`. If it doesn't exist, initialize with `{"contacts": []}`. You will use this to skip people you've already messaged.
## 2. Scan Pending Connections
Navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/`. Wait until load + sleep 4s.
Strip unload handlers:
`browser_evaluate("(function(){window.onbeforeunload=null;})()")`
Extract cards using this specific snippet (handles changing classes and follow invites):
```javascript
(function(){
const btns = Array.from(document.querySelectorAll('button')).filter(b => b.textContent.includes('Accept'));
let results = [];
for (let b of btns) {
let card = b.closest('[role="listitem"]');
if (!card) continue;
let text = card.textContent.toLowerCase();
if (text.includes('invited you to follow') || text.includes('invited you to subscribe')) continue;
let nameEls = Array.from(card.querySelectorAll('a[href*="/in/"]'));
let nameEl = nameEls.find(el => el.textContent.trim().length > 0);
let r = b.getBoundingClientRect();
results.push({
first_name: nameEl ? nameEl.textContent.trim().split(/\s+/)[0] : 'there',
profile_url: nameEl ? nameEl.href : '',
cx: r.x + r.width/2,
cy: r.y + r.height/2
});
}
return results;
})();
```
## 3. Process Each Card (Max 10 per run)
For each card, check if `profile_url` is already in the ledger. If not:
1. `browser_click_coordinate(cx, cy)` to click the specific Accept button.
2. `sleep(2)`
3. `browser_navigate(profile_url, wait_until="load")`
4. `sleep(4)`
5. `browser_evaluate("(function(){window.onbeforeunload=null; window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);})()")`
## 4. Message the User
Click Message Button on their profile:
```javascript
(function(){
const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
for (const a of links){
if (!a.href.includes('NON_SELF_PROFILE_VIEW') || a.href.includes('body=')) continue;
const r = a.getBoundingClientRect();
if (r.width === 0 || r.x > 700) continue;
return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
}
return null;
})();
```
Click that coordinate, then `sleep(2.5)`.
Find Textarea (it is hidden inside shadow DOM):
```javascript
(function(){
const vh = window.innerHeight, vw = window.innerWidth;
const candidates = [];
function walk(root){
const els = root.querySelectorAll ? root.querySelectorAll('div.msg-form__contenteditable') : [];
for (const el of els){
const r = el.getBoundingClientRect();
if (r.width > 0 && r.height > 0 && r.y >= 0 && r.y + r.height <= vh && r.x >= 0 && r.x + r.width <= vw) {
candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
}
}
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
}
walk(document);
candidates.sort((a, b) => b.area - a.area);
return candidates.length ? candidates[0] : null;
})();
```
Click that coordinate, `sleep(1)`.
Inject text and Send:
Construct the message: `Hey {first_name}, thanks for the connection invite! I'm currently building a prediction market for jobs: https://honeycomb.open-hive.com/. If you could check it out and share some feedback, I'd really appreciate it.`
Escape the string properly for JS injection, then run:
```javascript
// Replace MSG_TEXT with your actual string
browser_evaluate("(function(){ document.execCommand('insertText', false, `MSG_TEXT`); return true; })()")
```
Find Send button (also inside shadow DOM):
```javascript
(function(){
const vh = window.innerHeight;
function walk(root){
const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
for (const b of btns){
const cls = (b.className || '').toString();
if (!cls.includes('send-button') && b.textContent.trim() !== 'Send') continue;
const r = b.getBoundingClientRect();
if (r.width <= 0 || r.y + r.height > vh) continue;
return { cx: r.x + r.width/2, cy: r.y + r.height/2, disabled: b.disabled || b.getAttribute('aria-disabled') === 'true' };
}
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
for (const host of all){ if (host.shadowRoot) { const got = walk(host.shadowRoot); if (got) return got; } }
return null;
}
return walk(document);
})();
```
Click send coordinate, `sleep(2)`.
## 5. Update Ledger
Append the user to `data/linkedin_contacts.json`.
```json
{
"profile_url": "...",
"name": "...",
"action": "connection_accepted+message_sent",
"timestamp": "2026-..."
}
```
`sleep(5)` before moving to the next card to mimic human pacing.
+7 -121
View File
@@ -82,29 +82,10 @@ def _find_project_root() -> str:
return os.path.dirname(os.path.abspath(__file__))
# When ``--write-root`` is passed on the CLI, ``WRITE_ROOT`` diverges
# from ``PROJECT_ROOT``: reads stay permissive (so the queen can
# reference framework skills, docs, and the hive repo), but writes
# are confined to the write root plus the ``~/.hive/`` escape hatch.
# Without this split, the coder-tools sandbox IS the hive git
# checkout — every queen-authored skill/ledger/script lands there as
# untracked debris, which was the 2026-04-15 incident
# (``~/aden/hive/x-rapid-reply/`` and siblings).
WRITE_ROOT: str = ""
def _resolve_path(path: str) -> str:
"""Resolve path relative to PROJECT_ROOT. Raises ValueError if outside.
def _resolve_read_path(path: str) -> str:
"""Resolve path for READ operations.
Allowlist (in order):
1. Paths under ``~/.hive/`` agent session data, colonies, skills.
2. Paths under ``PROJECT_ROOT`` hive repo, for reading framework
defaults, docs, examples, etc.
3. Relative paths joined against ``PROJECT_ROOT`` (read-side
default; writes use ``WRITE_ROOT`` instead).
Raises ``ValueError`` when the resolved path falls outside all
allowed roots.
Also allows access to ~/.hive/ directory for agent session data files.
"""
# Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM)
path = path.replace("/", os.sep)
@@ -172,88 +153,6 @@ def _resolve_read_path(path: str) -> str:
return resolved
def _resolve_write_path(path: str) -> str:
"""Resolve path for WRITE operations.
Stricter than the read resolver: only allows writes under:
1. ``WRITE_ROOT`` the agent workspace (default: ``~/.hive/workspace/``
when ``--write-root`` is passed).
2. ``~/.hive/`` agent session data.
Writes to the hive repo (``PROJECT_ROOT``) are REJECTED to keep
the git checkout clean of queen-authored debris. Relative paths
resolve against ``WRITE_ROOT``, not ``PROJECT_ROOT``.
When ``WRITE_ROOT`` equals ``PROJECT_ROOT`` (no split configured),
this function is semantically identical to ``_resolve_read_path``.
"""
# Normalize slashes + expand ~
path = path.replace("/", os.sep)
if path.startswith("~"):
path = os.path.expanduser(path)
hive_dir = os.path.expanduser("~/.hive")
if os.path.isabs(path):
resolved = os.path.abspath(path)
# Always allow writes under ~/.hive/
try:
if os.path.commonpath([resolved, hive_dir]) == hive_dir:
return resolved
except ValueError:
pass
# Writes are ALSO allowed under WRITE_ROOT (the agent workspace).
try:
if os.path.commonpath([resolved, WRITE_ROOT]) == WRITE_ROOT:
return resolved
except ValueError:
pass
# If WRITE_ROOT == PROJECT_ROOT (legacy behavior: no split),
# fall through to the read-side resolver so existing callers
# keep working unchanged.
if WRITE_ROOT == PROJECT_ROOT:
return _resolve_read_path(path)
# Split configured AND the path isn't under WRITE_ROOT or
# ~/.hive/. Reject — this is the whole point of the split.
raise ValueError(
f"Access denied: writes must be under '{WRITE_ROOT}' or "
f"'{hive_dir}'. Path '{path}' is outside both "
"(use an absolute path under one of those roots, or a "
"relative path which will resolve under the write root)."
)
else:
# Relative path: resolve against WRITE_ROOT, not PROJECT_ROOT.
resolved = os.path.abspath(os.path.join(WRITE_ROOT, path))
# Double-check the resolved absolute path is inside WRITE_ROOT or
# ~/.hive/ (covers edge cases like "../../etc/passwd" that escape).
try:
wr_common = os.path.commonpath([resolved, WRITE_ROOT])
except ValueError:
wr_common = ""
try:
hv_common = os.path.commonpath([resolved, hive_dir])
except ValueError:
hv_common = ""
if wr_common != WRITE_ROOT and hv_common != hive_dir:
raise ValueError(
f"Access denied: resolved write path '{resolved}' escaped the "
f"allowed roots ('{WRITE_ROOT}', '{hive_dir}')."
)
return resolved
# Back-compat alias: existing call sites in this module call
# ``_resolve_path`` directly (e.g. for snapshot dirs, agent tool
# introspection). Those are all non-user-driven paths; route them
# through the read resolver.
_resolve_path = _resolve_read_path
# ── Git snapshot system (ported from opencode's shadow git) ───────────────
@@ -1738,45 +1637,32 @@ def validate_agent_package(agent_name: str) -> str:
def main() -> None:
global PROJECT_ROOT, SNAPSHOT_DIR, WRITE_ROOT
global PROJECT_ROOT, SNAPSHOT_DIR
from aden_tools.file_ops import register_file_tools
parser = argparse.ArgumentParser(description="Coder Tools MCP Server")
parser.add_argument("--project-root", default="")
# ``--write-root`` isolates file writes from the project root so
# queen-authored skills, ledgers, and scripts don't land in the
# hive git checkout. Reads remain permissive under PROJECT_ROOT
# so framework skills, docs, and examples stay accessible.
# Defaults to PROJECT_ROOT when empty (legacy behavior).
parser.add_argument("--write-root", default="")
parser.add_argument("--port", type=int, default=int(os.getenv("CODER_TOOLS_PORT", "4002")))
parser.add_argument("--host", default="0.0.0.0")
parser.add_argument("--stdio", action="store_true")
args = parser.parse_args()
PROJECT_ROOT = os.path.abspath(args.project_root) if args.project_root else _find_project_root()
if args.write_root:
WRITE_ROOT = os.path.abspath(os.path.expanduser(args.write_root))
os.makedirs(WRITE_ROOT, exist_ok=True)
else:
WRITE_ROOT = PROJECT_ROOT # legacy: no split
SNAPSHOT_DIR = os.path.join(
os.path.expanduser("~"),
".hive",
"snapshots",
os.path.basename(PROJECT_ROOT),
)
logger.info(f"Project root (reads): {PROJECT_ROOT}")
logger.info(f"Write root (writes): {WRITE_ROOT}")
logger.info(f"Project root: {PROJECT_ROOT}")
logger.info(f"Snapshot dir: {SNAPSHOT_DIR}")
register_file_tools(
mcp,
resolve_path=_resolve_read_path,
resolve_path_write=_resolve_write_path,
resolve_path=_resolve_path,
before_write=None, # Git snapshot causes stdio deadlock on Windows; undo_changes limited
project_root=WRITE_ROOT,
project_root=PROJECT_ROOT,
)
if args.stdio:
+5 -12
View File
@@ -328,7 +328,6 @@ def register_file_tools(
mcp: FastMCP,
*,
resolve_path: Callable[[str], str] | None = None,
resolve_path_write: Callable[[str], str] | None = None,
before_write: Callable[[], None] | None = None,
project_root: str | None = None,
) -> None:
@@ -336,18 +335,12 @@ def register_file_tools(
Args:
mcp: FastMCP instance to register tools on.
resolve_path: Path resolver for READ operations. Default:
resolve to absolute path. Raise ValueError to reject paths
(e.g. outside sandbox).
resolve_path_write: Path resolver for WRITE/EDIT operations.
Defaults to ``resolve_path`` when not provided. Split
resolvers let callers keep reads permissive (framework
skills, docs) while confining writes to an agent workspace.
resolve_path: Path resolver. Default: resolve to absolute path.
Raise ValueError to reject paths (e.g. outside sandbox).
before_write: Hook called before write/edit operations (e.g. git snapshot).
project_root: If set, search_files relativizes output paths to this root.
"""
_resolve = resolve_path or _default_resolve_path
_resolve_write = resolve_path_write or _resolve
@mcp.tool()
def read_file(path: str, offset: int = 1, limit: int = 0, hashline: bool = False) -> str:
@@ -447,7 +440,7 @@ def register_file_tools(
path: Absolute file path to write.
content: Complete file content to write.
"""
resolved = _resolve_write(path)
resolved = _resolve(path)
resolved_path = Path(resolved)
# Stale-edit guard: an existing file must have been read recently
@@ -516,7 +509,7 @@ def register_file_tools(
new_text: Replacement text.
replace_all: Replace all occurrences (default: first only).
"""
resolved = _resolve_write(path)
resolved = _resolve(path)
if not os.path.isfile(resolved):
return f"Error: File not found: {path}"
@@ -822,7 +815,7 @@ def register_file_tools(
return "Error: Too many edits in one call (max 100). Split into multiple calls."
# 2. Read file
resolved = _resolve_write(path)
resolved = _resolve(path)
if not os.path.isfile(resolved):
return f"Error: File not found: {path}"
+2 -2
View File
@@ -45,8 +45,8 @@ def register_tools(mcp: FastMCP) -> None:
- Tabs: browser_tabs, browser_open, browser_close, browser_focus
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
- Inspection: browser_screenshot, browser_snapshot, browser_console
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_fill,
browser_press, browser_hover, browser_select, browser_scroll, browser_drag
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_type_focused,
browser_fill, browser_press, browser_hover, browser_select, browser_scroll, browser_drag
- Advanced: browser_wait, browser_evaluate, browser_get_text, browser_get_attribute,
browser_resize, browser_upload, browser_dialog
"""
+195 -87
View File
@@ -80,6 +80,61 @@ async def _adaptive_poll_sleep(elapsed_s: float) -> None:
_interaction_highlights: dict[int, dict] = {}
# Compact descriptor of the focused element. Returned by both click()
# and click_coordinate() so the agent can verify it focused what it
# intended. When the outer document's activeElement is an <iframe>,
# we recurse into the iframe's document (same-origin only) so the
# response describes the real inner element — otherwise the agent
# always sees {tag: "iframe"} and can't tell whether it hit the
# composer or something else inside the frame (e.g. a sidebar item in
# LinkedIn's #interop-outlet messaging overlay).
_FOCUSED_ELEMENT_JS = """
(function() {
function describe(el) {
var rect = el.getBoundingClientRect();
var attrs = {};
for (var i = 0; i < el.attributes.length && i < 10; i++) {
attrs[el.attributes[i].name] = el.attributes[i].value.substring(0, 200);
}
return {
tag: el.tagName.toLowerCase(),
id: el.id || null,
className: el.className || null,
name: el.getAttribute('name') || null,
type: el.getAttribute('type') || null,
role: el.getAttribute('role') || null,
contenteditable: el.getAttribute('contenteditable') || null,
text: (el.innerText || '').substring(0, 200),
value: (el.value !== undefined ? String(el.value).substring(0, 200) : null),
attributes: attrs,
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
};
}
var el = document.activeElement;
if (!el || el === document.body) return null;
// Descend into same-origin iframes. Capped at 5 levels of nesting
// to bound cost and prevent a pathological loop. If a frame is
// cross-origin, contentDocument throws; catch and report the
// outermost iframe instead.
var framePath = [];
var depth = 0;
while (el && (el.tagName === 'IFRAME' || el.tagName === 'FRAME') && depth < 5) {
framePath.push(el.id || el.getAttribute('data-testid') || el.tagName.toLowerCase());
var innerDoc = null;
try { innerDoc = el.contentDocument; } catch (e) { innerDoc = null; }
if (!innerDoc) break;
var innerActive = innerDoc.activeElement;
if (!innerActive || innerActive === innerDoc.body) break;
el = innerActive;
depth++;
}
var out = describe(el);
if (framePath.length) out.inFrame = framePath;
return out;
})()
"""
def _get_active_profile() -> str:
"""Get the current active profile from context variable."""
try:
@@ -763,7 +818,8 @@ class BeelineBridge:
rx = value.get("x", 0) - value.get("width", 0) / 2
ry = value.get("y", 0) - value.get("height", 0) / 2
await self.highlight_rect(tab_id, rx, ry, value.get("width", 0), value.get("height", 0), label=selector)
return {
focused_info = await self._read_focused_element(tab_id)
resp = {
"ok": True,
"action": "click",
"selector": selector,
@@ -771,6 +827,9 @@ class BeelineBridge:
"y": value.get("y", 0),
"method": "javascript",
}
if focused_info:
resp["focused_element"] = focused_info
return resp
# If JavaScript click failed, try CDP approach
if isinstance(value, dict) and value.get("error"):
@@ -883,7 +942,8 @@ class BeelineBridge:
w = bounds_value.get("width", 0)
h = bounds_value.get("height", 0)
await self.highlight_rect(tab_id, x - w / 2, y - h / 2, w, h, label=selector)
return {
focused_info = await self._read_focused_element(tab_id)
resp = {
"ok": True,
"action": "click",
"selector": selector,
@@ -891,10 +951,29 @@ class BeelineBridge:
"y": y,
"method": "cdp",
}
if focused_info:
resp["focused_element"] = focused_info
return resp
except Exception as e:
return {"ok": False, "error": f"Click failed: {e}"}
async def _read_focused_element(self, tab_id: int) -> dict | None:
"""Read document.activeElement and return a compact descriptor.
Returns None on any failure never raises. Used by both click
paths (selector-based click() and click_coordinate()) so the
agent gets the same response shape regardless of which one was
called. The descriptor lets the agent answer "did my click land
on an editable?" without a second round-trip.
"""
try:
await self._try_enable_domain(tab_id, "Runtime")
result = await self.evaluate(tab_id, _FOCUSED_ELEMENT_JS)
return (result or {}).get("result")
except Exception:
return None
async def click_coordinate(self, tab_id: int, x: float, y: float, button: str = "left") -> dict:
"""Click at specific coordinates."""
await self.cdp_attach(tab_id)
@@ -904,18 +983,11 @@ class BeelineBridge:
button_map = {"left": "left", "right": "right", "middle": "middle"}
cdp_button = button_map.get(button, "left")
from .tools.inspection import _screenshot_css_scales, _screenshot_scales
phys_scale = _screenshot_scales.get(tab_id, "unset")
css_scale = _screenshot_css_scales.get(tab_id, "unset")
logger.info(
"click_coordinate tab=%d: x=%.1f, y=%.1f → CDP Input.dispatchMouseEvent. "
"stored_scales: physicalScale=%s, cssScale=%s",
"click_coordinate tab=%d: x=%.1f, y=%.1f → CDP Input.dispatchMouseEvent",
tab_id,
x,
y,
phys_scale,
css_scale,
)
await self._cdp(
@@ -930,15 +1002,20 @@ class BeelineBridge:
)
await self.highlight_point(tab_id, x, y, label=f"click ({x},{y})")
return {"ok": True, "action": "click_coordinate", "x": x, "y": y}
focused_info = await self._read_focused_element(tab_id)
resp = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
if focused_info:
resp["focused_element"] = focused_info
return resp
async def type_text(
self,
tab_id: int,
selector: str,
selector: str | None,
text: str,
clear_first: bool = True,
delay_ms: int = 0,
delay_ms: int = 1,
timeout_ms: int = 30000,
use_insert_text: bool = True,
) -> dict:
@@ -974,79 +1051,98 @@ class BeelineBridge:
await self._try_enable_domain(tab_id, "Input")
await self._try_enable_domain(tab_id, "Runtime")
# Find + scroll + (optionally) clear via JS. We still need the
# rect, and clearing via `.value = ''` / `.textContent = ''`
# is the most reliable way to reset pre-existing content.
focus_script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
if (!el) return null;
if selector is not None:
# Find + scroll + (optionally) clear via JS. We still need the
# rect, and clearing via `.value = ''` / `.textContent = ''`
# is the most reliable way to reset pre-existing content.
focus_script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
if (!el) return null;
// Scroll into view so the click lands in-viewport.
el.scrollIntoView({{ block: 'center' }});
// Scroll into view so the click lands in-viewport.
el.scrollIntoView({{ block: 'center' }});
// Clear if requested.
if ({str(clear_first).lower()}) {{
if (el.value !== undefined) {{
el.value = '';
// Nudge React's onChange — the framework reads
// .value via a setter hook, and without firing
// an input event the component state remains
// stale after our value assignment.
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}} else if (el.isContentEditable) {{
el.textContent = '';
el.dispatchEvent(new Event('input', {{bubbles: true}}));
// Clear if requested.
if ({str(clear_first).lower()}) {{
if (el.value !== undefined) {{
el.value = '';
// Nudge React's onChange — the framework reads
// .value via a setter hook, and without firing
// an input event the component state remains
// stale after our value assignment.
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}} else if (el.isContentEditable) {{
el.textContent = '';
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}}
}}
}}
const r = el.getBoundingClientRect();
return {{
x: r.left + r.width / 2,
y: r.top + r.height / 2,
w: r.width,
h: r.height,
}};
}})();
"""
const r = el.getBoundingClientRect();
return {{
x: r.left + r.width / 2,
y: r.top + r.height / 2,
w: r.width,
h: r.height,
}};
}})();
"""
focus_result = await self.evaluate(tab_id, focus_script)
rect = (focus_result or {}).get("result")
if not rect:
# Element not found — wait + retry until timeout.
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self.evaluate(tab_id, focus_script)
rect = (result or {}).get("result") if result else None
if rect:
break
await asyncio.sleep(0.1)
focus_result = await self.evaluate(tab_id, focus_script)
rect = (focus_result or {}).get("result")
if not rect:
return {"ok": False, "error": f"Element not found: {selector}"}
# Element not found — wait + retry until timeout.
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self.evaluate(tab_id, focus_script)
rect = (result or {}).get("result") if result else None
if rect:
break
await asyncio.sleep(0.1)
if not rect.get("w") or not rect.get("h"):
return {
"ok": False,
"error": f"Element has zero dimensions, can't click to focus: {selector}",
}
if not rect:
return {"ok": False, "error": f"Element not found: {selector}"}
# Fire a real CDP pointer click at the element's center. This is
# what unblocks rich-text editors — JS el.focus() is not enough.
click_x = rect["x"]
click_y = rect["y"]
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
if not rect.get("w") or not rect.get("h"):
return {
"ok": False,
"error": f"Element has zero dimensions, can't click to focus: {selector}",
}
# Fire a real CDP pointer click at the element's center. This is
# what unblocks rich-text editors — JS el.focus() is not enough.
click_x = rect["x"]
click_y = rect["y"]
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
else:
# No selector — assume the caller already focused the target
# element (e.g. via browser_click_coordinate). Just clear the
# active element if requested, then insert text directly.
if clear_first:
await self.evaluate(tab_id, """
(function() {
const el = document.activeElement;
if (!el) return;
if (el.value !== undefined) {
el.value = '';
el.dispatchEvent(new Event('input', {bubbles: true}));
} else if (el.isContentEditable) {
el.textContent = '';
el.dispatchEvent(new Event('input', {bubbles: true}));
}
})();
""")
if use_insert_text and delay_ms <= 0:
# CDP Input.insertText is the most reliable way to insert
@@ -1086,16 +1182,28 @@ class BeelineBridge:
await asyncio.sleep(delay_ms / 1000)
# Highlight the element that was typed into
rect_result = await self.evaluate(
tab_id,
f"(function(){{const el=document.querySelector("
f"{json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();"
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
if selector is not None:
rect_result = await self.evaluate(
tab_id,
f"(function(){{const el=document.querySelector("
f"{json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();"
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
else:
# Highlight the active element when no selector was provided
rect_result = await self.evaluate(
tab_id,
"(function(){const el=document.activeElement;if(!el)return null;"
"const r=el.getBoundingClientRect();"
"return{x:r.left,y:r.top,w:r.width,h:r.height};})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label="active element")
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
# CDP Input.dispatchKeyEvent modifiers bitmask.
+186
View File
@@ -0,0 +1,186 @@
"""Tool schemas for the bridge remote HTTP API (port 9230)."""
TOOL_SCHEMAS: dict[str, dict] = {
"browser_click": {
"description": "Click an element on the page.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"button": {"type": "string", "default": "left", "enum": ["left", "right", "middle"]},
"double_click": {"type": "boolean", "default": False},
"timeout_ms": {"type": "integer", "default": 5000},
},
},
"browser_click_coordinate": {
"description": "Click at specific viewport coordinates (CSS pixels).",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"button": {"type": "string", "default": "left"},
},
},
"browser_type": {
"description": "Type text into an input element.",
"params": {
"selector": {"type": "string", "required": True},
"text": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"delay_ms": {"type": "integer", "default": 1},
"clear_first": {"type": "boolean", "default": True},
"timeout_ms": {"type": "integer", "default": 30000},
"use_insert_text": {"type": "boolean", "default": True},
},
},
"browser_fill": {
"description": "Fill an input element (clears existing content first).",
"params": {
"selector": {"type": "string", "required": True},
"value": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
"browser_type_focused": {
"description": "Type text into the already-focused element. Use after browser_click_coordinate has focused the target. Faster than browser_press for multi-character input.",
"params": {
"text": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"delay_ms": {"type": "integer", "default": 1},
"clear_first": {"type": "boolean", "default": True},
"use_insert_text": {"type": "boolean", "default": True},
},
},
"browser_press": {
"description": "Press a keyboard key, optionally with modifiers.",
"params": {
"key": {"type": "string", "required": True},
"selector": {"type": "string"},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"modifiers": {"type": "array", "items": "string"},
},
},
"browser_press_at": {
"description": "Move mouse to coordinates then press a key.",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"key": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_navigate": {
"description": "Navigate a tab to a URL.",
"params": {
"url": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"wait_until": {"type": "string", "default": "load"},
},
},
"browser_go_back": {
"description": "Navigate back in browser history.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_go_forward": {
"description": "Navigate forward in browser history.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_reload": {
"description": "Reload the current page.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_scroll": {
"description": "Scroll the page.",
"params": {
"direction": {"type": "string", "default": "down", "enum": ["up", "down", "left", "right"]},
"amount": {"type": "integer", "default": 500},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_hover": {
"description": "Hover over an element.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
"browser_hover_coordinate": {
"description": "Hover at CSS pixel coordinates.",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_select": {
"description": "Select option(s) in a dropdown.",
"params": {
"selector": {"type": "string", "required": True},
"values": {"type": "array", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_screenshot": {
"description": "Take a screenshot of the page (returns base64 PNG).",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"full_page": {"type": "boolean", "default": False},
},
},
"browser_snapshot": {
"description": "Get the accessibility tree snapshot of the page.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_evaluate": {
"description": "Evaluate JavaScript in the page.",
"params": {
"expression": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_get_text": {
"description": "Get text content of an element.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_wait": {
"description": "Wait for an element or text to appear on the page.",
"params": {
"selector": {"type": "string"},
"text": {"type": "string"},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
}
+3 -47
View File
@@ -96,59 +96,15 @@ def register_advanced_tools(mcp: FastMCP) -> None:
profile: str | None = None,
) -> dict:
"""
ESCAPE HATCH execute raw JavaScript. USE ONLY as a last
resort. 99% of browser automation does NOT need this tool.
Before reaching for it, try a semantic tool first:
- browser_click / browser_click_coordinate for clicks
- browser_type(use_insert_text=True) for text input
- browser_screenshot + browser_get_rect for locating elements
- browser_shadow_query for shadow-DOM selectors
- browser_get_text / browser_get_attribute for reading state
ANTI-PATTERNS stop and switch tools if you notice yourself:
1. Calling browser_evaluate 2+ times in a row to guess at
selectors. Each attempt costs ~30 tokens of JS + a full
LLM round-trip. After 2 empty results, the selector
strategy is wrong pivot to browser_screenshot +
browser_click_coordinate. The screenshot + coord path
works on shadow DOM, iframes, and React-obfuscated
class names indifferently.
2. Writing a walk(root) recursive shadow-DOM traversal
function. Use browser_shadow_query it does the
traversal in C++ via CDP's querySelector, not in JS.
3. Calling document.execCommand('insertText', ...) to type
into Lexical / contenteditable. Use
browser_type(use_insert_text=True, text='...') instead.
It handles the click-then-focus-then-insert sequence
with built-in retries.
4. Trying to read a nested iframe's contentDocument. That
usually fails (cross-origin or late hydration). Use
browser_screenshot to see it, then browser_click_coordinate.
LEGITIMATE uses (when nothing semantic fits):
- Reading a computed style, window size, or scroll position
that no tool exposes.
- Firing a one-shot site-specific API call (e.g. an analytics
beacon the test needs).
- Stripping an onbeforeunload handler that blocks navigation.
- Probing for shadow roots whose existence is conditional.
Execute JavaScript in the browser context.
Args:
script: JavaScript code to execute. Keep it small. If you
need to traverse the DOM, prefer browser_shadow_query.
script: JavaScript code to execute
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with evaluation result. On a "find X" script that
returns [] or null: do NOT retry with a different
selector take a screenshot and switch to coordinates.
Dict with evaluation result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
+84 -200
View File
@@ -12,7 +12,6 @@ import io
import json
import logging
import time
from typing import Literal
from fastmcp import FastMCP
from mcp.types import ImageContent, TextContent
@@ -23,32 +22,31 @@ from .tabs import _get_context
logger = logging.getLogger(__name__)
# Target width for normalized screenshots (px in the delivered image)
_SCREENSHOT_WIDTH = 600
# Maps tab_id -> physical scale: image_coord × scale = physical pixels (for CDP Input events)
_screenshot_scales: dict[int, float] = {}
# Maps tab_id -> CSS scale: image_coord × scale = CSS pixels (for DOM APIs / getBoundingClientRect)
_screenshot_css_scales: dict[int, float] = {}
def _resize_and_annotate(
data: str,
css_width: int,
dpr: float = 1.0,
highlights: list[dict] | None = None,
width: int = _SCREENSHOT_WIDTH,
) -> tuple[str, float, float]:
"""Resize a base64 PNG to _SCREENSHOT_WIDTH wide, annotate highlights.
) -> tuple[str, float]:
"""Resize a captured PNG so that image pixels == CSS pixels, then
re-encode as JPEG quality 75.
Returns (new_b64, physical_scale, css_scale) where:
physical_scale = physical_px_per_image_px (multiply image coords physical px)
css_scale = css_px_per_image_px (multiply image coords CSS px for DOM APIs)
Output is ``css_width × round(orig_h × css_width / orig_w)``. The
1:1 imageCSS mapping means a coord the agent reads off the image
is the same coord CDP expects no conversion, no scale factors to
remember. Highlight annotations are drawn directly in CSS px (which
equal image px after resize).
Highlights have x,y,w,h in CSS pixels (what getBoundingClientRect returns,
and what CDP Input.dispatchMouseEvent accepts).
Falls back to original data if Pillow unavailable or resize fails.
Returns ``(new_b64, physical_scale)`` where
``physical_scale = orig_png_w / css_width`` (= DPR). Kept for logs
and HiDPI debugging only.
"""
if not css_width or css_width <= 0:
# Capture path always supplies css_width; only reach here on a
# degraded bridge response. Return the raw image untouched.
return data, 1.0
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
@@ -58,48 +56,39 @@ def _resize_and_annotate(
import struct
orig_w = struct.unpack(">I", raw[16:20])[0]
raw_size_bytes = len(raw)
physical_scale = orig_w / width if orig_w and width else 1.0
css_scale = (css_width / width) if css_width else (physical_scale / max(dpr, 1.0))
physical_scale = orig_w / css_width if orig_w else 1.0
logger.warning(
"PIL not available — screenshot resize SKIPPED (cannot downscale image). "
"raw_size=%d bytes, png_width=%d, css_width=%s, dpr=%s, target_width=%d. "
"Returning ORIGINAL image with computed scales: physicalScale=%.4f, cssScale=%.4f. "
"Agent must use browser_coords() to convert image positions before clicking.",
raw_size_bytes,
orig_w,
"PIL not available — screenshot resize+convert SKIPPED. "
"Returning original physical-px PNG. physicalScale=%.4f, "
"css_width=%d, dpr=%s. Clicks WILL be misaligned; install Pillow.",
physical_scale,
css_width,
dpr,
width,
physical_scale,
css_scale,
)
return data, round(physical_scale, 4), round(css_scale, 4)
return data, round(physical_scale, 4)
try:
raw = base64.b64decode(data)
img = Image.open(io.BytesIO(raw)).convert("RGBA")
orig_w, orig_h = img.size
physical_scale = orig_w / width
css_scale = (css_width / width) if css_width else (physical_scale / max(dpr, 1.0))
physical_scale = orig_w / css_width
new_w = css_width
new_h = round(orig_h * new_w / orig_w)
if (new_w, new_h) != img.size:
img = img.resize((new_w, new_h), Image.LANCZOS)
logger.info(
"Screenshot resize: orig=%dx%dtarget=%dx%d, css_width=%s, dpr=%s, physicalScale=%.4f, cssScale=%.4f",
"Screenshot: orig=%dx%dout=%dx%d (css_width=%d, dpr=%s), physicalScale=%.4f",
orig_w,
orig_h,
width,
round(orig_h * width / orig_w),
new_w,
new_h,
css_width,
dpr,
physical_scale,
css_scale,
)
new_w = width
new_h = round(orig_h * new_w / orig_w)
img = img.resize((new_w, new_h), Image.LANCZOS)
if highlights:
overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(overlay)
@@ -111,11 +100,11 @@ def _resize_and_annotate(
for h in highlights:
kind = h.get("kind", "rect")
label = h.get("label", "")
# Highlights are in CSS px → convert to image px
ix = h["x"] / css_scale
iy = h["y"] / css_scale
iw = h.get("w", 0) / css_scale
ih = h.get("h", 0) / css_scale
# Highlights are in CSS px. Image px == CSS px, no conversion.
ix = h["x"]
iy = h["y"]
iw = h.get("w", 0)
ih = h.get("h", 0)
if kind == "point":
cx, cy, r = ix, iy, 10
@@ -135,11 +124,9 @@ def _resize_and_annotate(
width=2,
)
# Label: show image pixel position so user knows where to look
img_coords = f"img:({round(ix)},{round(iy)})"
display_label = f"{img_coords} {label}" if label else img_coords
display_label = f"({round(ix)},{round(iy)}) {label}".strip()
lx, ly = ix, max(2, iy - 16)
lx = max(2, min(lx, width - 120))
lx = max(2, min(lx, new_w - 120))
bbox = draw.textbbox((lx, ly), display_label, font=font)
pad = 3
draw.rectangle(
@@ -153,22 +140,20 @@ def _resize_and_annotate(
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="PNG", optimize=True)
img.save(buf, format="JPEG", quality=75, optimize=True)
return (
base64.b64encode(buf.getvalue()).decode(),
round(physical_scale, 4),
round(css_scale, 4),
)
except Exception:
logger.warning(
"Screenshot resize/annotate FAILED — returning original image with scale=1.0. "
"css_width=%s, dpr=%s, target_width=%d. Clicks will be misaligned.",
"Screenshot resize/annotate FAILED — returning original image. "
"css_width=%s, dpr=%s.",
css_width,
dpr,
width,
exc_info=True,
)
return data, 1.0, 1.0
return data, 1.0
def register_inspection_tools(mcp: FastMCP) -> None:
@@ -180,26 +165,24 @@ def register_inspection_tools(mcp: FastMCP) -> None:
profile: str | None = None,
full_page: bool = False,
selector: str | None = None,
image_type: Literal["png", "jpeg"] = "png",
annotate: bool = True,
width: int = _SCREENSHOT_WIDTH,
) -> list:
"""
Take a screenshot of the current page.
Returns a normalized image alongside text metadata (URL, size, scale
factors, etc.). Automatically annotates the last interaction (click,
hover, type) with a bounding box overlay.
The image is delivered at the CSS viewport's own dimensions, so
a pixel you see in the screenshot is the same coordinate you
pass to ``browser_click_coordinate`` / ``browser_hover_coordinate``
/ ``browser_press_at``. No conversion, no scale factors.
Output is JPEG quality 75 (~150250 KB for a typical UI).
Args:
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
full_page: Capture full scrollable page (default: False)
selector: CSS selector to screenshot a specific element (optional)
image_type: Image format - png or jpeg (default: png)
annotate: Draw bounding box of last interaction on image (default: True)
width: Output image width in pixels (default: 600). Use 800+ for fine
text, 400 for quick layout checks.
Returns:
List of content blocks: text metadata + image
@@ -252,7 +235,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
return [TextContent(type="text", text=json.dumps(screenshot_result))]
data = screenshot_result.get("data")
mime_type = screenshot_result.get("mimeType", "image/png")
css_width = screenshot_result.get("cssWidth", 0)
dpr = screenshot_result.get("devicePixelRatio", 1.0)
@@ -263,45 +245,38 @@ def register_inspection_tools(mcp: FastMCP) -> None:
if annotate and target_tab in _interaction_highlights:
highlights = [_interaction_highlights[target_tab]]
# Normalize to 800px wide and annotate. Offloaded to a
# thread because PIL Image.open/resize/ImageDraw/composite on
# a 2-megapixel PNG blocks for ~150-300ms of CPU — plenty to
# freeze the asyncio event loop and delay every concurrent
# tool call during a screenshot. The function is reentrant
# (fresh PIL Image per call, no shared state), so to_thread
# is safe.
data, physical_scale, css_scale = await asyncio.to_thread(
# Resize to CSS-viewport dimensions so image px == CSS px,
# and re-encode as the chosen lossy format. Offloaded to a
# thread because PIL Image.open/resize/ImageDraw/composite
# on a 2-megapixel PNG blocks for ~150300 ms of CPU —
# plenty to freeze the asyncio event loop. The function is
# reentrant (fresh PIL Image per call, no shared state), so
# to_thread is safe.
data, physical_scale = await asyncio.to_thread(
_resize_and_annotate,
data,
css_width,
dpr,
highlights,
width,
)
_screenshot_scales[target_tab] = physical_scale
_screenshot_css_scales[target_tab] = css_scale
meta = json.dumps(
{
"ok": True,
"tabId": target_tab,
"url": screenshot_result.get("url", ""),
"imageType": mime_type.split("/")[-1],
"imageType": "jpeg",
"size": len(base64.b64decode(data)) if data else 0,
"imageWidth": width,
"imageWidth": css_width,
"fullPage": full_page,
"devicePixelRatio": dpr,
"physicalScale": physical_scale,
"cssScale": css_scale,
"annotated": bool(highlights),
"scaleHint": (
f"image_coord × {css_scale} = CSS px "
f"→ feed to browser_click_coordinate, "
f"browser_hover_coordinate, browser_press_at "
f"(CDP Input events use CSS pixels). "
f"image_coord × {physical_scale} = physical px "
f"is debug-only on HiDPI displays and must NOT "
f"be used for clicks — it overshoots by DPR×."
"Image pixel = CSS pixel. Feed any coord you see "
"in this image directly to browser_click_coordinate "
"/ browser_hover_coordinate / browser_press_at "
"no conversion needed."
),
}
)
@@ -313,17 +288,15 @@ def register_inspection_tools(mcp: FastMCP) -> None:
"ok": True,
"size": len(base64.b64decode(data)) if data else 0,
"url": screenshot_result.get("url", ""),
"physicalScale": physical_scale,
"cssScale": css_scale,
"debug_cssWidth": css_width,
"debug_dpr": dpr,
"cssWidth": css_width,
"dpr": dpr,
},
duration_ms=(time.perf_counter() - start) * 1000,
)
return [
TextContent(type="text", text=meta),
ImageContent(type="image", data=data, mimeType=mime_type),
ImageContent(type="image", data=data, mimeType="image/jpeg"),
]
except Exception as e:
log_tool_call(
@@ -334,73 +307,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
)
return [TextContent(type="text", text=json.dumps({"ok": False, "error": str(e)}))]
@mcp.tool()
def browser_coords(
x: float,
y: float,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
Convert screenshot image coordinates to browser click coordinates.
After browser_screenshot returns a downscaled image, use this to
translate pixel positions you see in the image into the CSS pixel
coordinates that Chrome DevTools Protocol expects.
**CDP Input.dispatchMouseEvent uses CSS pixels**, so you want
``css_x`` / ``css_y`` for every click/hover tool. ``physical_x/y``
is kept in the return for debugging on HiDPI displays do NOT
feed it to clicks; on a DPR=2 screen it lands 2× too far.
Edge case: pages using ``zoom`` or ``transform: scale()`` (e.g.
LinkedIn's ``#interop-outlet`` shadow DOM) render in a scaled
local coordinate space. For those, ``getBoundingClientRect()``
reports pre-zoom coordinates and you may still need to multiply
by the element's effective zoom. Use browser_shadow_query to
get the zoomed rect directly.
Args:
x: X pixel position in the screenshot image
y: Y pixel position in the screenshot image
tab_id: Chrome tab ID (default: active tab for profile)
profile: Browser profile name (default: "default")
Returns:
Dict with css_x, css_y (primary use these), physical_x,
physical_y (debug only), and scale factors.
"""
ctx = _get_context(profile)
target_tab = tab_id or (ctx.get("activeTabId") if ctx else None)
physical_scale = _screenshot_scales.get(target_tab, 1.0) if target_tab else 1.0
# css_scale stored in second slot via _screenshot_css_scales
css_scale = _screenshot_css_scales.get(target_tab, physical_scale) if target_tab else physical_scale
return {
"ok": True,
# Primary output: CSS pixels. Feed these to click/hover/press.
"css_x": round(x * css_scale, 1),
"css_y": round(y * css_scale, 1),
# Debug output: raw physical pixels. DO NOT feed to clicks on
# HiDPI displays — CDP Input events use CSS pixels, so sending
# physical coordinates lands the click at roughly DPR× the
# intended position.
"physical_x": round(x * physical_scale, 1),
"physical_y": round(y * physical_scale, 1),
"physicalScale": physical_scale,
"cssScale": css_scale,
"tabId": target_tab,
"note": (
"Use css_x/css_y with browser_click_coordinate, "
"browser_hover_coordinate, browser_press_at — "
"Chrome DevTools Protocol Input.dispatchMouseEvent "
"operates in CSS pixels. physical_x/y is for debugging "
"on HiDPI displays only; feeding it to clicks lands "
"them at DPR× the intended coordinate."
),
}
@mcp.tool()
async def browser_shadow_query(
selector: str,
@@ -412,7 +318,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
Traverses shadow roots to find elements inside closed/open shadow DOM,
overlays, and virtual-rendered components (e.g. LinkedIn's #interop-outlet).
Returns getBoundingClientRect in both CSS and physical pixels.
Returns the element's bounding rect in CSS pixels. Screenshot
pixels == CSS pixels, so the same numbers also match whatever
the agent sees in a browser_screenshot feed ``css.cx/cy``
straight to browser_click_coordinate / hover_coordinate /
press_at.
Args:
selector: CSS selectors joined by ' >>> ' to pierce shadow roots.
@@ -421,7 +331,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
profile: Browser profile name (default: "default")
Returns:
Dict with rect (CSS px) and physical rect (CSS px × DPR) of the element
Dict with ``css`` block (x, y, w, h, cx, cy).
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
@@ -438,10 +348,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
return result
rect = result["rect"]
physical_scale = _screenshot_scales.get(target_tab, 1.0)
css_scale = _screenshot_css_scales.get(target_tab, 1.0)
dpr = physical_scale / css_scale if css_scale else 1.0
return {
"ok": True,
"selector": selector,
@@ -454,20 +360,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
"cx": rect["cx"],
"cy": rect["cy"],
},
"physical": {
"x": round(rect["x"] * dpr, 1),
"y": round(rect["y"] * dpr, 1),
"w": round(rect["w"] * dpr, 1),
"h": round(rect["h"] * dpr, 1),
"cx": round(rect["cx"] * dpr, 1),
"cy": round(rect["cy"] * dpr, 1),
},
"note": (
"Use css.cx/cy with browser_click_coordinate, "
"browser_hover_coordinate, browser_press_at — "
"CDP Input events operate in CSS pixels. "
"physical.* is debug-only; feeding it to clicks "
"lands them DPR× too far on HiDPI displays."
"Pass css.cx/cy browser_click_coordinate / "
"hover_coordinate / press_at. Screenshot pixels == CSS "
"pixels, so these coords also match anything you see in "
"browser_screenshot."
),
}
@@ -480,11 +377,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
"""
Get the bounding rect of an element by CSS selector.
Supports '>>>' shadow-piercing selectors for overlay/shadow DOM content.
Returns coordinates in CSS pixels (for clicks and DOM APIs); the
physical-pixel variant is returned for debugging on HiDPI displays
only it must not be fed to click/hover/press tools, which use
CSS pixels.
Supports '>>>' shadow-piercing selectors for overlay/shadow DOM
content. Returns the rect in CSS pixels. Screenshot pixels ==
CSS pixels, so the same numbers match anything visible in
browser_screenshot feed ``css.cx/cy`` straight to
browser_click_coordinate / hover_coordinate / press_at.
Args:
selector: CSS selector, optionally with ' >>> ' to pierce shadow roots.
@@ -493,7 +390,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
profile: Browser profile name (default: "default")
Returns:
Dict with css and physical bounding rects
Dict with ``css`` block (x, y, w, h, cx, cy).
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
@@ -510,10 +407,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
return result
rect = result["rect"]
physical_scale = _screenshot_scales.get(target_tab, 1.0)
css_scale = _screenshot_css_scales.get(target_tab, 1.0)
dpr = physical_scale / css_scale if css_scale else 1.0
return {
"ok": True,
"selector": selector,
@@ -526,20 +419,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
"cx": rect["cx"],
"cy": rect["cy"],
},
"physical": {
"x": round(rect["x"] * dpr, 1),
"y": round(rect["y"] * dpr, 1),
"w": round(rect["w"] * dpr, 1),
"h": round(rect["h"] * dpr, 1),
"cx": round(rect["cx"] * dpr, 1),
"cy": round(rect["cy"] * dpr, 1),
},
"note": (
"Use css.cx/cy with browser_click_coordinate, "
"browser_hover_coordinate, browser_press_at — "
"CDP Input events operate in CSS pixels. "
"physical.* is debug-only; feeding it to clicks "
"lands them DPR× too far on HiDPI displays."
"Pass css.cx/cy browser_click_coordinate / "
"hover_coordinate / press_at. Screenshot pixels == CSS "
"pixels, so these coords also match anything you see in "
"browser_screenshot."
),
}
+109 -51
View File
@@ -108,24 +108,24 @@ def register_interaction_tools(mcp: FastMCP) -> None:
button: Literal["left", "right", "middle"] = "left",
) -> dict:
"""
Click at specific viewport coordinates (CSS pixels).
Click at viewport coordinates.
Chrome DevTools Protocol's Input.dispatchMouseEvent operates in
**CSS pixels**, not physical pixels. If you have a screenshot
image coordinate, convert it with ``browser_coords(x, y)`` and
use the returned ``css_x`` / ``css_y`` not ``physical_x/y``.
On a DPR=2 display, feeding physical coordinates lands the click
at 2× the intended position.
Screenshots are delivered at the CSS viewport's own dimensions
(see ``browser_screenshot``), so a pixel you read off the image
is the same number you pass here no conversion, no scale
factors. ``browser_get_rect`` likewise returns coords you can
feed straight through.
Args:
x: X coordinate in CSS pixels (viewport space)
y: Y coordinate in CSS pixels (viewport space)
x: X coordinate in the screenshot / CSS viewport.
y: Y coordinate in the screenshot / CSS viewport.
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
button: Mouse button to click (left, right, middle)
Returns:
Dict with click result
Dict with click result, including ``focused_element``
describing what the click focused.
"""
start = time.perf_counter()
params = {"x": x, "y": y, "tab_id": tab_id, "profile": profile, "button": button}
@@ -149,17 +149,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
return result
try:
from .inspection import _screenshot_css_scales, _screenshot_scales
click_result = await bridge.click_coordinate(target_tab, x, y, button=button)
log_tool_call(
"browser_click_coordinate",
params,
result={
**click_result,
"debug_stored_physicalScale": _screenshot_scales.get(target_tab, "unset"),
"debug_stored_cssScale": _screenshot_css_scales.get(target_tab, "unset"),
},
result=click_result,
duration_ms=(time.perf_counter() - start) * 1000,
)
return click_result
@@ -179,43 +173,34 @@ def register_interaction_tools(mcp: FastMCP) -> None:
text: str,
tab_id: int | None = None,
profile: str | None = None,
delay_ms: int = 0,
delay_ms: int = 1,
clear_first: bool = True,
timeout_ms: int = 30000,
use_insert_text: bool = True,
) -> dict:
"""
Type text into an input element.
Click a selector to focus it, then type text into it.
Automatically routes through a real CDP pointer click on the
element before inserting text so that rich-text editors like
Lexical (Gmail, LinkedIn DMs), Draft.js (X compose), and
ProseMirror (Reddit) see a native focus event and enable their
submit buttons. See the gcu-browser skill for the full "click-
then-type" pattern.
By default uses CDP Input.insertText which is the most reliable
way to insert text into rich editors. Set
``use_insert_text=False`` to fall back to per-character
keyDown/keyUp events (needed only for code editors that fire
on specific keystrokes, or when ``delay_ms`` typing animation
is required).
Uses CDP ``Input.insertText`` by default, which works for both
standard inputs and many rich-text editors. Use
``browser_type_focused`` when the target is already focused or
you cannot reliably address it with a selector.
Args:
selector: CSS selector for the input element
text: Text to type
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
delay_ms: Delay between keystrokes in ms (default: 0).
Forces the per-keystroke fallback when > 0.
clear_first: Clear existing text before typing (default: True)
timeout_ms: Timeout waiting for element (default: 30000)
selector: CSS selector for the input element.
text: Text to type.
tab_id: Chrome tab ID (default: active tab).
profile: Browser profile name (default: "default").
delay_ms: Delay between keystrokes in ms (default: 1).
Forces the per-keystroke fallback when > 0.
clear_first: Clear existing text before typing (default: True).
timeout_ms: Timeout waiting for element (default: 30000).
use_insert_text: Use CDP Input.insertText (default: True) for
reliable insertion into rich-text editors.
Set False for per-keystroke dispatch.
reliable insertion into rich-text editors. Set False for
per-keystroke dispatch.
Returns:
Dict with type result
Dict with type result.
"""
start = time.perf_counter()
params = {"selector": selector, "text": text, "tab_id": tab_id, "profile": profile}
@@ -293,6 +278,77 @@ def register_interaction_tools(mcp: FastMCP) -> None:
timeout_ms=timeout_ms,
)
@mcp.tool()
async def browser_type_focused(
text: str,
tab_id: int | None = None,
profile: str | None = None,
delay_ms: int = 1,
clear_first: bool = True,
use_insert_text: bool = True,
) -> dict:
"""
Type text into the already-focused element.
Targets ``document.activeElement`` and is ideal after a
coordinate click, or when the editable cannot be reached
reliably with a selector. Faster than repeated
``browser_press`` calls for multi-character input.
Args:
text: Text to insert at the current cursor position.
tab_id: Chrome tab ID (default: active tab).
profile: Browser profile name (default: "default").
delay_ms: Delay between keystrokes in ms (default: 1).
Forces per-keystroke dispatch when > 0.
clear_first: Clear existing text before typing (default: True).
use_insert_text: Use CDP Input.insertText (default: True).
Returns:
Dict with type result.
"""
start = time.perf_counter()
params = {"text": text, "tab_id": tab_id, "profile": profile}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
result = {"ok": False, "error": "Browser extension not connected"}
log_tool_call("browser_type_focused", params, result=result)
return result
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
log_tool_call("browser_type_focused", params, result=result)
return result
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
result = {"ok": False, "error": "No active tab"}
log_tool_call("browser_type_focused", params, result=result)
return result
try:
type_result = await bridge.type_text(
target_tab,
None,
text,
clear_first=clear_first,
delay_ms=delay_ms,
use_insert_text=use_insert_text,
)
log_tool_call(
"browser_type_focused",
params,
result=type_result,
duration_ms=(time.perf_counter() - start) * 1000,
)
return type_result
except Exception as e:
result = {"ok": False, "error": str(e)}
log_tool_call("browser_type_focused", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
return result
@mcp.tool()
async def browser_press(
key: str,
@@ -422,15 +478,16 @@ def register_interaction_tools(mcp: FastMCP) -> None:
profile: str | None = None,
) -> dict:
"""
Hover at CSS pixel coordinates without needing a CSS selector.
Hover at viewport coordinates without needing a CSS selector.
Use this instead of browser_hover when the element is in an overlay,
shadow DOM, or virtual-rendered component that isn't in the regular DOM.
Pair with browser_coords to convert screenshot image positions to CSS pixels.
Screenshot pixels == CSS pixels, so any coord you read off a
browser_screenshot image can be fed straight through.
Args:
x: CSS pixel X coordinate
y: CSS pixel Y coordinate
x: X coordinate in the screenshot / CSS viewport.
y: Y coordinate in the screenshot / CSS viewport.
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
@@ -486,16 +543,17 @@ def register_interaction_tools(mcp: FastMCP) -> None:
profile: str | None = None,
) -> dict:
"""
Move mouse to CSS pixel coordinates then press a key.
Move mouse to viewport coordinates then press a key.
Use this instead of browser_press when the focused element is in an overlay
or virtual-rendered component. Moving the mouse first routes the key event
through native browser hit-testing instead of the DOM focus chain.
Pair with browser_coords to convert screenshot image positions to CSS pixels.
Screenshot pixels == CSS pixels, so coords read off a
browser_screenshot image can be fed straight through.
Args:
x: CSS pixel X coordinate to position mouse
y: CSS pixel Y coordinate to position mouse
x: X coordinate in the screenshot / CSS viewport.
y: Y coordinate in the screenshot / CSS viewport.
key: Key to press (e.g. 'Enter', 'Space', 'Escape', 'ArrowDown')
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
-15
View File
@@ -1,15 +0,0 @@
import json
try:
with open('data/linkedin_ledger.json', 'r') as f:
data = json.load(f)
profiles = data.get('messaged_profiles', [])
for p in profiles:
if 'variant' not in p:
p['variant'] = 'Control' # Retroactively label our first runs
with open('data/linkedin_ledger.json', 'w') as f:
json.dump({"messaged_profiles": profiles}, f, indent=2)
except Exception as e:
print(f"Error: {e}")
-16
View File
@@ -1,16 +0,0 @@
{
"replies": [
{
"original_preview": "NASA Ames@NASAAmes\u00b75hWe\u2019re just getting started\n\nDuring their historic journey around the Moon, Artemis II observed lunar targets to study color, text"
},
{
"original_preview": "NASA Marshall@NASA_Marshall\u00b74h Enjoy these views of the Artemis II launch from cameras affixed to the rocket! On April 1, 2026, the SLS (Space Launch "
},
{
"original_preview": "U.S. Navy@USNavy\u00b711hFirst contact. On April 10, U.S. Navy divers were the first on the scene as the Navy and NASA successfully recovered the Orion s"
},
{
"original_preview": "Alright, I give in. Here\u2019s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook \u2018em!"
}
]
}