Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bb2e55f5fa | |||
| 5d0106ac34 | |||
| 3a219e27ab | |||
| 7f62e7a2d0 | |||
| 44d114f0d0 | |||
| 9e71f16d15 | |||
| 28cad2376c | |||
| 8222cd306e | |||
| 916803889f |
+1
-13
@@ -44,19 +44,7 @@
|
||||
"WebFetch(domain:docs.litellm.ai)",
|
||||
"Bash(cat /home/timothy/aden/hive/.venv/lib/python3.11/site-packages/litellm-*.dist-info/METADATA)",
|
||||
"Bash(find \"/home/timothy/.hive/agents/queens/queen_brand_design/sessions/session_20260415_100751_d49f4c28/\" -type f -name \"*.json*\" -exec grep -l \"协日\" {} \\\\;)",
|
||||
"Bash(grep -v ':0$')",
|
||||
"Bash(find /home/timothy/aden/hive/core/framework/skills/_default_skills -name \"SKILL.md\" -exec grep -l \"curl\\\\|jq\\\\|bash\\\\|sh\\\\|CLI\\\\|command\" {} \\\\;)",
|
||||
"Bash(python3 -c \"import sys, json; [print\\(json.loads\\(line\\).get\\('data', {}\\).get\\('iteration'\\)\\) for line in sys.stdin]\")",
|
||||
"Bash(grep -l \"shell\\\\|bash\\\\|exec\\\\|subprocess\" /home/timothy/aden/hive/tools/src/gcu/files/*.py)",
|
||||
"Bash(python3 -c \"import aden_tools.file_ops; print\\(aden_tools.file_ops.__file__\\)\")",
|
||||
"Bash(find / -path /proc -prune -o -name \"file_ops*\" -print)",
|
||||
"Bash(grep -l \"sqlite\\\\|sqlite3\" /home/timothy/aden/hive/tools/src/aden_tools/tools/*/*.py)",
|
||||
"Bash(grep -iv \"_tool$\")",
|
||||
"Bash(grep -n \"add_post.*sessions\\\\|add_post.*colonies\\\\|add_get.*sessions\" /home/timothy/aden/hive/core/framework/server/*.py)",
|
||||
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\''input_data:'\\\\'', d.get\\('\\\\''input_data'\\\\'', '\\\\''MISSING'\\\\''\\)\\)')",
|
||||
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\'' __TRACKED_VAR__:'\\\\'', '\\\\''input_data='\\\\'', d.get\\('\\\\''input_data'\\\\'', '\\\\''MISSING'\\\\''\\)\\)')",
|
||||
"Bash(python -c 'import json; d=json.load\\(open\\('\\\\''__TRACKED_VAR__/.hive/colonies/__TRACKED_VAR__/worker.json'\\\\''\\)\\); print\\('\\\\'' __TRACKED_VAR__: input_data ='\\\\'', d.get\\('\\\\''input_data'\\\\''\\)\\)')",
|
||||
"Bash(kill 2466637 2466632)"
|
||||
"Bash(grep -v ':0$')"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/home/timothy/.hive/skills/writing-hive-skills",
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
import json
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
data['replies'].append({
|
||||
'original_preview': 'Alright, I give in. Here’s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook ‘em!'
|
||||
})
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
@@ -1,11 +0,0 @@
|
||||
import json, sys
|
||||
|
||||
with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
|
||||
ledger = json.load(f)
|
||||
|
||||
text = sys.argv[1]
|
||||
for r in ledger['replies']:
|
||||
if r.get('original_preview') == text:
|
||||
print("YES")
|
||||
sys.exit(0)
|
||||
print("NO")
|
||||
@@ -184,16 +184,8 @@ _QUEEN_INDEPENDENT_TOOLS = [
|
||||
"search_files",
|
||||
"run_command",
|
||||
"undo_changes",
|
||||
# NOTE (2026-04-16): ``run_parallel_workers`` was removed from the
|
||||
# independent phase. The queen's pure DM mode is for conversation
|
||||
# with the user; spawning workers from here puts their activity
|
||||
# into a chat surface that's supposed to stay queen↔user only.
|
||||
# Users who want to fan out parallel work should (a) use
|
||||
# ``create_colony`` to fork into a persistent colony (where
|
||||
# worker activity has its own page), or (b) load an agent via
|
||||
# build/stage and use ``run_parallel_workers`` in the running
|
||||
# phase where a worker context already exists.
|
||||
#
|
||||
# Parallel fan-out (Phase 4 unified ColonyRuntime)
|
||||
"run_parallel_workers",
|
||||
# Fork this session into a persistent colony for headless /
|
||||
# recurring / background work that needs to keep running in
|
||||
# parallel to (or after) this chat.
|
||||
|
||||
@@ -25,7 +25,6 @@ All tools are prefixed with `browser_`:
|
||||
- `browser_screenshot` — visual capture (annotated PNG)
|
||||
<!-- /vision-only -->
|
||||
- `browser_shadow_query`, `browser_get_rect` — locate elements (shadow-piercing via `>>>`)
|
||||
- `browser_coords` — convert image pixels to CSS pixels (always use `css_x/y`, never `physical_x/y`)
|
||||
- `browser_scroll`, `browser_wait` — navigation helpers
|
||||
- `browser_evaluate` — run JavaScript
|
||||
- `browser_close`, `browser_close_finished` — tab cleanup
|
||||
@@ -38,9 +37,9 @@ All tools are prefixed with `browser_`:
|
||||
|
||||
Neither tool is "preferred" universally — they're for different jobs. Default to snapshot on text-heavy static pages, screenshot on SPAs and anything shadow-DOM-heavy. Activate the `browser-automation` skill for the full decision tree.
|
||||
|
||||
## Coordinate rule: always CSS pixels
|
||||
## Coordinate rule
|
||||
|
||||
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS pixels**, not physical pixels. After a screenshot, use `browser_coords(image_x, image_y)` and feed the returned `css_x/y` (NOT `physical_x/y`) to `browser_click_coordinate`, `browser_hover_coordinate`, `browser_press_at`. Feeding physical pixels on a HiDPI display (DPR=1.6, 2, or 3) overshoots by `DPR×` and clicks land in the wrong place. `getBoundingClientRect()` already returns CSS pixels — pass through unchanged, no DPR multiplication.
|
||||
`browser_screenshot` delivers the image at the CSS viewport's own dimensions, so a pixel you read off the screenshot is the same coordinate `browser_click_coordinate`, `browser_hover_coordinate`, and `browser_press_at` expect — no conversion. `getBoundingClientRect()` likewise returns CSS pixels; pass through unchanged.
|
||||
|
||||
## System prompt tips for browser nodes
|
||||
|
||||
|
||||
@@ -631,43 +631,6 @@ class ColonyRuntime:
|
||||
spawn_tools = tools if tools is not None else self._tools
|
||||
spawn_executor = tool_executor or self._tool_executor
|
||||
|
||||
# Colony progress tracker: when the caller supplied a db_path
|
||||
# in input_data, this worker is part of a SQLite task queue
|
||||
# and must see the hive.colony-progress-tracker skill body in
|
||||
# its system prompt from turn 0. Rebuild the catalog with the
|
||||
# skill pre-activated; falls back to the colony default when
|
||||
# no db_path is present.
|
||||
_spawn_catalog = self.skills_catalog_prompt
|
||||
_spawn_skill_dirs = self.skill_dirs
|
||||
if isinstance(input_data, dict) and input_data.get("db_path"):
|
||||
try:
|
||||
from framework.skills.config import SkillsConfig
|
||||
from framework.skills.manager import SkillsManager, SkillsManagerConfig
|
||||
|
||||
_pre = SkillsManager(
|
||||
SkillsManagerConfig(
|
||||
skills_config=SkillsConfig.from_agent_vars(
|
||||
skills=["hive.colony-progress-tracker"],
|
||||
),
|
||||
)
|
||||
)
|
||||
_pre.load()
|
||||
_spawn_catalog = _pre.skills_catalog_prompt
|
||||
_spawn_skill_dirs = list(_pre.allowlisted_dirs) if hasattr(_pre, "allowlisted_dirs") else self.skill_dirs
|
||||
logger.info(
|
||||
"spawn: pre-activated hive.colony-progress-tracker "
|
||||
"(catalog %d → %d chars) for worker with db_path=%s",
|
||||
len(self.skills_catalog_prompt),
|
||||
len(_spawn_catalog),
|
||||
input_data.get("db_path"),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"spawn: failed to pre-activate colony-progress-tracker "
|
||||
"skill, falling back to base catalog: %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
# Resolve the SSE stream_id once. When the caller didn't supply
|
||||
# one we use the per-worker fan-out tag (filtered out by the
|
||||
# SSE handler). When the caller passed an explicit value we
|
||||
@@ -722,9 +685,9 @@ class ColonyRuntime:
|
||||
llm=self._llm,
|
||||
available_tools=list(spawn_tools),
|
||||
accounts_prompt=self._accounts_prompt,
|
||||
skills_catalog_prompt=_spawn_catalog,
|
||||
skills_catalog_prompt=self.skills_catalog_prompt,
|
||||
protocols_prompt=self.protocols_prompt,
|
||||
skill_dirs=_spawn_skill_dirs,
|
||||
skill_dirs=self.skill_dirs,
|
||||
execution_id=worker_id,
|
||||
stream_id=explicit_stream_id or f"worker:{worker_id}",
|
||||
)
|
||||
@@ -757,8 +720,6 @@ class ColonyRuntime:
|
||||
async def spawn_batch(
|
||||
self,
|
||||
tasks: list[dict[str, Any]],
|
||||
*,
|
||||
tools_override: list[Any] | None = None,
|
||||
) -> list[str]:
|
||||
"""Spawn a batch of parallel workers, one per task spec.
|
||||
|
||||
@@ -771,12 +732,6 @@ class ColonyRuntime:
|
||||
The overseer's ``run_parallel_workers`` tool is the usual
|
||||
caller; it pairs ``spawn_batch`` + ``wait_for_worker_reports``
|
||||
into a single fan-out/fan-in primitive.
|
||||
|
||||
When ``tools_override`` is supplied, every spawned worker
|
||||
receives that tool list instead of the colony's default. Used
|
||||
by ``run_parallel_workers`` to drop tools whose credentials
|
||||
failed the pre-flight check (so the spawned workers don't
|
||||
waste a startup trying to use them).
|
||||
"""
|
||||
worker_ids: list[str] = []
|
||||
for spec in tasks:
|
||||
@@ -788,7 +743,6 @@ class ColonyRuntime:
|
||||
task=task_text,
|
||||
count=1,
|
||||
input_data=task_data or {"task": task_text},
|
||||
tools=tools_override,
|
||||
)
|
||||
worker_ids.extend(ids)
|
||||
return worker_ids
|
||||
|
||||
@@ -1,491 +0,0 @@
|
||||
"""Per-colony SQLite task queue + progress ledger.
|
||||
|
||||
Every colony gets its own ``progress.db`` under ``~/.hive/colonies/{name}/data/``.
|
||||
The DB holds the colony's task queue plus per-task step and SOP checklist
|
||||
rows. Workers claim tasks atomically, write progress as they execute, and
|
||||
verify SOP gates before marking a task done. This gives cross-run memory
|
||||
that the existing per-iteration stall detectors don't have.
|
||||
|
||||
The DB is driven by agents via the ``sqlite3`` CLI through
|
||||
``execute_command_tool``. This module handles framework-side lifecycle:
|
||||
creation, migration, queen-side bulk seeding, stale-claim reclamation.
|
||||
|
||||
Concurrency model:
|
||||
- WAL mode on from day one so 100 concurrent workers don't serialize.
|
||||
- Workers hold NO long-running connection — they ``sqlite3`` per call,
|
||||
which naturally releases locks between LLM turns.
|
||||
- Atomic claim via ``BEGIN IMMEDIATE; UPDATE tasks SET status='claimed'
|
||||
WHERE id=(SELECT ... LIMIT 1)``. The subquery-form UPDATE runs inside
|
||||
the immediate transaction so racers either win the row or find zero
|
||||
affected rows.
|
||||
- Stale-claim reclaimer runs on host startup: claims older than
|
||||
``stale_after_minutes`` get returned to ``pending`` and the row's
|
||||
``retry_count`` increments. When ``retry_count >= max_retries`` the
|
||||
row is moved to ``failed`` instead.
|
||||
|
||||
All writes go through ``BEGIN IMMEDIATE`` so racing readers see
|
||||
consistent snapshots.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
_SCHEMA_V1 = """
|
||||
CREATE TABLE IF NOT EXISTS tasks (
|
||||
id TEXT PRIMARY KEY,
|
||||
seq INTEGER,
|
||||
priority INTEGER NOT NULL DEFAULT 0,
|
||||
goal TEXT NOT NULL,
|
||||
payload TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
worker_id TEXT,
|
||||
claim_token TEXT,
|
||||
claimed_at TEXT,
|
||||
started_at TEXT,
|
||||
completed_at TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
retry_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_retries INTEGER NOT NULL DEFAULT 3,
|
||||
last_error TEXT,
|
||||
parent_task_id TEXT REFERENCES tasks(id) ON DELETE SET NULL,
|
||||
source TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS steps (
|
||||
id TEXT PRIMARY KEY,
|
||||
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
|
||||
seq INTEGER NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
detail TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
evidence TEXT,
|
||||
worker_id TEXT,
|
||||
started_at TEXT,
|
||||
completed_at TEXT,
|
||||
UNIQUE (task_id, seq)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sop_checklist (
|
||||
id TEXT PRIMARY KEY,
|
||||
task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
required INTEGER NOT NULL DEFAULT 1,
|
||||
done_at TEXT,
|
||||
done_by TEXT,
|
||||
note TEXT,
|
||||
UNIQUE (task_id, key)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS colony_meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_claimable
|
||||
ON tasks(status, priority DESC, seq, created_at)
|
||||
WHERE status = 'pending';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_steps_task_seq
|
||||
ON steps(task_id, seq);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sop_required_open
|
||||
ON sop_checklist(task_id, required, done_at);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_status
|
||||
ON tasks(status, updated_at);
|
||||
"""
|
||||
|
||||
_PRAGMAS = (
|
||||
"PRAGMA journal_mode = WAL;",
|
||||
"PRAGMA synchronous = NORMAL;",
|
||||
"PRAGMA foreign_keys = ON;",
|
||||
"PRAGMA busy_timeout = 5000;",
|
||||
)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def _new_id() -> str:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def _connect(db_path: Path) -> sqlite3.Connection:
|
||||
"""Open a connection with the standard pragmas applied.
|
||||
|
||||
WAL mode is sticky on the file once set, so re-applying on every
|
||||
open is cheap. The other pragmas are per-connection and must be
|
||||
set each time.
|
||||
"""
|
||||
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=5.0)
|
||||
for pragma in _PRAGMAS:
|
||||
con.execute(pragma)
|
||||
return con
|
||||
|
||||
|
||||
def ensure_progress_db(colony_dir: Path) -> Path:
|
||||
"""Create or migrate ``{colony_dir}/data/progress.db``.
|
||||
|
||||
Idempotent: safe to call on an already-initialized DB. Returns the
|
||||
absolute path to the DB file.
|
||||
|
||||
Steps:
|
||||
1. Ensure ``data/`` subdir exists.
|
||||
2. Open the DB (creates the file if missing).
|
||||
3. Apply WAL + pragmas.
|
||||
4. Read ``PRAGMA user_version``; if < SCHEMA_VERSION, run the
|
||||
schema block and bump user_version.
|
||||
5. Reclaim any stale claims left from previous runs.
|
||||
6. Patch every ``*.json`` worker config in the colony dir to
|
||||
inject ``input_data.db_path`` and ``input_data.colony_id`` so
|
||||
pre-existing colonies (forked before this feature landed) get
|
||||
the tracker wiring on their next spawn.
|
||||
"""
|
||||
data_dir = Path(colony_dir) / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
db_path = data_dir / "progress.db"
|
||||
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
current_version = con.execute("PRAGMA user_version").fetchone()[0]
|
||||
if current_version < SCHEMA_VERSION:
|
||||
con.executescript(_SCHEMA_V1)
|
||||
con.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
|
||||
con.execute(
|
||||
"INSERT OR REPLACE INTO colony_meta(key, value, updated_at) "
|
||||
"VALUES (?, ?, ?)",
|
||||
("schema_version", str(SCHEMA_VERSION), _now_iso()),
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: initialized schema v%d at %s", SCHEMA_VERSION, db_path
|
||||
)
|
||||
|
||||
reclaimed = _reclaim_stale_inner(con, stale_after_minutes=15)
|
||||
if reclaimed:
|
||||
logger.info(
|
||||
"progress_db: reclaimed %d stale claims at startup (%s)",
|
||||
reclaimed,
|
||||
db_path,
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
resolved_db_path = db_path.resolve()
|
||||
_patch_worker_configs(Path(colony_dir), resolved_db_path)
|
||||
return resolved_db_path
|
||||
|
||||
|
||||
def _patch_worker_configs(colony_dir: Path, db_path: Path) -> int:
|
||||
"""Inject ``input_data.db_path`` + ``input_data.colony_id`` into
|
||||
existing ``worker.json`` files in a colony directory.
|
||||
|
||||
Runs on every ``ensure_progress_db`` call so colonies that were
|
||||
forked before this feature landed get their worker spawn messages
|
||||
patched in place. Idempotent: if ``input_data`` already contains
|
||||
the correct ``db_path``, the file is not rewritten.
|
||||
|
||||
Returns the number of files that were actually modified (0 on
|
||||
the common case of already-patched colonies).
|
||||
"""
|
||||
colony_id = colony_dir.name
|
||||
abs_db = str(db_path)
|
||||
patched = 0
|
||||
|
||||
for worker_cfg in colony_dir.glob("*.json"):
|
||||
# Only patch files that look like worker configs (have the
|
||||
# worker_meta shape). ``metadata.json`` and ``triggers.json``
|
||||
# are colony-level and must not be touched.
|
||||
if worker_cfg.name in ("metadata.json", "triggers.json"):
|
||||
continue
|
||||
try:
|
||||
data = json.loads(worker_cfg.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
if not isinstance(data, dict) or "system_prompt" not in data:
|
||||
# Not a worker config (lacks the worker_meta schema).
|
||||
continue
|
||||
|
||||
input_data = data.get("input_data")
|
||||
if not isinstance(input_data, dict):
|
||||
input_data = {}
|
||||
|
||||
if (
|
||||
input_data.get("db_path") == abs_db
|
||||
and input_data.get("colony_id") == colony_id
|
||||
):
|
||||
continue # already patched
|
||||
|
||||
input_data["db_path"] = abs_db
|
||||
input_data["colony_id"] = colony_id
|
||||
data["input_data"] = input_data
|
||||
|
||||
try:
|
||||
worker_cfg.write_text(
|
||||
json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
|
||||
)
|
||||
patched += 1
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"progress_db: failed to patch worker config %s: %s", worker_cfg, e
|
||||
)
|
||||
|
||||
if patched:
|
||||
logger.info(
|
||||
"progress_db: patched %d worker config(s) in colony '%s' with db_path",
|
||||
patched,
|
||||
colony_id,
|
||||
)
|
||||
return patched
|
||||
|
||||
|
||||
def ensure_all_colony_dbs(colonies_root: Path | None = None) -> list[Path]:
|
||||
"""Idempotently ensure every existing colony has a progress.db.
|
||||
|
||||
Called on framework host startup to backfill older colonies and
|
||||
run the stale-claim reclaimer on all of them in one pass.
|
||||
"""
|
||||
if colonies_root is None:
|
||||
colonies_root = Path.home() / ".hive" / "colonies"
|
||||
if not colonies_root.is_dir():
|
||||
return []
|
||||
|
||||
initialized: list[Path] = []
|
||||
for entry in sorted(colonies_root.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
try:
|
||||
initialized.append(ensure_progress_db(entry))
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"progress_db: failed to ensure DB for colony '%s': %s", entry.name, e
|
||||
)
|
||||
return initialized
|
||||
|
||||
|
||||
def seed_tasks(
|
||||
db_path: Path,
|
||||
tasks: list[dict[str, Any]],
|
||||
*,
|
||||
source: str = "queen_create",
|
||||
) -> list[str]:
|
||||
"""Bulk-insert tasks (with optional nested steps + sop_items).
|
||||
|
||||
Each task dict accepts:
|
||||
- goal: str (required)
|
||||
- seq: int (optional ordering hint)
|
||||
- priority: int (default 0)
|
||||
- payload: dict | str | None (stored as JSON text)
|
||||
- max_retries: int (default 3)
|
||||
- parent_task_id: str | None
|
||||
- steps: list[{"title": str, "detail"?: str}] (optional)
|
||||
- sop_items: list[{"key": str, "description": str, "required"?: bool, "note"?: str}] (optional)
|
||||
|
||||
All rows are inserted in a single BEGIN IMMEDIATE transaction so
|
||||
10k-row seeds finish in one disk flush. Returns the created task ids
|
||||
in the same order as input.
|
||||
"""
|
||||
if not tasks:
|
||||
return []
|
||||
|
||||
created_ids: list[str] = []
|
||||
now = _now_iso()
|
||||
con = _connect(Path(db_path))
|
||||
try:
|
||||
con.execute("BEGIN IMMEDIATE")
|
||||
for idx, task in enumerate(tasks):
|
||||
goal = task.get("goal")
|
||||
if not goal:
|
||||
raise ValueError(f"task[{idx}] missing required 'goal' field")
|
||||
|
||||
task_id = task.get("id") or _new_id()
|
||||
payload = task.get("payload")
|
||||
if payload is not None and not isinstance(payload, str):
|
||||
payload = json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO tasks (
|
||||
id, seq, priority, goal, payload, status,
|
||||
created_at, updated_at, max_retries, parent_task_id, source
|
||||
) VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
task_id,
|
||||
task.get("seq"),
|
||||
int(task.get("priority", 0)),
|
||||
goal,
|
||||
payload,
|
||||
now,
|
||||
now,
|
||||
int(task.get("max_retries", 3)),
|
||||
task.get("parent_task_id"),
|
||||
source,
|
||||
),
|
||||
)
|
||||
|
||||
for step_seq, step in enumerate(task.get("steps") or [], start=1):
|
||||
if not step.get("title"):
|
||||
raise ValueError(
|
||||
f"task[{idx}].steps[{step_seq - 1}] missing required 'title'"
|
||||
)
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO steps (id, task_id, seq, title, detail, status)
|
||||
VALUES (?, ?, ?, ?, ?, 'pending')
|
||||
""",
|
||||
(
|
||||
_new_id(),
|
||||
task_id,
|
||||
step.get("seq", step_seq),
|
||||
step["title"],
|
||||
step.get("detail"),
|
||||
),
|
||||
)
|
||||
|
||||
for sop in task.get("sop_items") or []:
|
||||
key = sop.get("key")
|
||||
description = sop.get("description")
|
||||
if not key or not description:
|
||||
raise ValueError(
|
||||
f"task[{idx}].sop_items missing 'key' or 'description'"
|
||||
)
|
||||
con.execute(
|
||||
"""
|
||||
INSERT INTO sop_checklist
|
||||
(id, task_id, key, description, required, note)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
_new_id(),
|
||||
task_id,
|
||||
key,
|
||||
description,
|
||||
1 if sop.get("required", True) else 0,
|
||||
sop.get("note"),
|
||||
),
|
||||
)
|
||||
|
||||
created_ids.append(task_id)
|
||||
|
||||
con.execute("COMMIT")
|
||||
except Exception:
|
||||
con.execute("ROLLBACK")
|
||||
raise
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
return created_ids
|
||||
|
||||
|
||||
def enqueue_task(
|
||||
db_path: Path,
|
||||
goal: str,
|
||||
*,
|
||||
steps: list[dict[str, Any]] | None = None,
|
||||
sop_items: list[dict[str, Any]] | None = None,
|
||||
payload: Any = None,
|
||||
priority: int = 0,
|
||||
parent_task_id: str | None = None,
|
||||
source: str = "enqueue_tool",
|
||||
) -> str:
|
||||
"""Append a single task to an existing queue. Thin wrapper over seed_tasks."""
|
||||
ids = seed_tasks(
|
||||
db_path,
|
||||
[
|
||||
{
|
||||
"goal": goal,
|
||||
"steps": steps,
|
||||
"sop_items": sop_items,
|
||||
"payload": payload,
|
||||
"priority": priority,
|
||||
"parent_task_id": parent_task_id,
|
||||
}
|
||||
],
|
||||
source=source,
|
||||
)
|
||||
return ids[0]
|
||||
|
||||
|
||||
def _reclaim_stale_inner(
|
||||
con: sqlite3.Connection, *, stale_after_minutes: int
|
||||
) -> int:
|
||||
"""Reclaim stale claims. Runs inside an existing open connection.
|
||||
|
||||
Two-step:
|
||||
1. Tasks past max_retries go to 'failed' with last_error populated.
|
||||
2. Remaining stale claims return to 'pending', retry_count++.
|
||||
"""
|
||||
cutoff_expr = f"datetime('now', '-{int(stale_after_minutes)} minutes')"
|
||||
|
||||
con.execute("BEGIN IMMEDIATE")
|
||||
try:
|
||||
con.execute(
|
||||
f"""
|
||||
UPDATE tasks
|
||||
SET status = 'failed',
|
||||
last_error = COALESCE(last_error, 'exceeded max_retries after stale claim'),
|
||||
completed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE status IN ('claimed', 'in_progress')
|
||||
AND claimed_at IS NOT NULL
|
||||
AND claimed_at < {cutoff_expr}
|
||||
AND retry_count >= max_retries
|
||||
"""
|
||||
)
|
||||
|
||||
cur = con.execute(
|
||||
f"""
|
||||
UPDATE tasks
|
||||
SET status = 'pending',
|
||||
worker_id = NULL,
|
||||
claim_token = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
retry_count = retry_count + 1,
|
||||
updated_at = datetime('now')
|
||||
WHERE status IN ('claimed', 'in_progress')
|
||||
AND claimed_at IS NOT NULL
|
||||
AND claimed_at < {cutoff_expr}
|
||||
AND retry_count < max_retries
|
||||
"""
|
||||
)
|
||||
reclaimed = cur.rowcount or 0
|
||||
con.execute("COMMIT")
|
||||
return reclaimed
|
||||
except Exception:
|
||||
con.execute("ROLLBACK")
|
||||
raise
|
||||
|
||||
|
||||
def reclaim_stale(db_path: Path, stale_after_minutes: int = 15) -> int:
|
||||
"""Public wrapper that opens its own connection."""
|
||||
con = _connect(Path(db_path))
|
||||
try:
|
||||
return _reclaim_stale_inner(con, stale_after_minutes=stale_after_minutes)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SCHEMA_VERSION",
|
||||
"ensure_progress_db",
|
||||
"ensure_all_colony_dbs",
|
||||
"seed_tasks",
|
||||
"enqueue_task",
|
||||
"reclaim_stale",
|
||||
]
|
||||
@@ -1404,18 +1404,7 @@ class AgentLoader:
|
||||
credential_store=credential_store,
|
||||
)
|
||||
runner._agent_default_skills = None
|
||||
# Colony workers attached to a SQLite task queue get the
|
||||
# colony-progress-tracker skill pre-activated so its full
|
||||
# claim / step / SOP-gate protocol lands in the system prompt
|
||||
# on turn 0, bypassing the progressive-disclosure catalog
|
||||
# lookup. Triggered by the presence of ``input_data.db_path``
|
||||
# in worker.json (written by fork_session_into_colony and
|
||||
# backfilled by ensure_progress_db for pre-existing colonies).
|
||||
_preactivate: list[str] = []
|
||||
_input_data = first_worker.get("input_data") or {}
|
||||
if isinstance(_input_data, dict) and _input_data.get("db_path"):
|
||||
_preactivate.append("hive.colony-progress-tracker")
|
||||
runner._agent_skills = _preactivate or None
|
||||
runner._agent_skills = None
|
||||
return runner
|
||||
|
||||
def register_tool(
|
||||
|
||||
@@ -497,22 +497,12 @@ class ToolRegistry:
|
||||
config["cwd"] = str(resolved_cwd)
|
||||
return config
|
||||
|
||||
# For coder_tools_server, inject --project-root so reads land
|
||||
# in the expected workspace (hive repo, for framework skills
|
||||
# and docs), and inject --write-root so writes land under
|
||||
# ~/.hive/workspace/ instead of polluting the git checkout
|
||||
# with queen-authored skills, ledgers, and scripts. Without
|
||||
# the split, every ``write_file`` call from the queen landed
|
||||
# in the hive repo root.
|
||||
# For coder_tools_server, inject --project-root so writes go to the expected workspace
|
||||
if script_name and "coder_tools" in script_name:
|
||||
project_root = str(resolved_cwd.parent.resolve())
|
||||
args = list(args)
|
||||
if "--project-root" not in args:
|
||||
args.extend(["--project-root", project_root])
|
||||
if "--write-root" not in args:
|
||||
_write_root = Path.home() / ".hive" / "workspace"
|
||||
_write_root.mkdir(parents=True, exist_ok=True)
|
||||
args.extend(["--write-root", str(_write_root)])
|
||||
config["args"] = args
|
||||
|
||||
if os.name == "nt":
|
||||
|
||||
@@ -42,22 +42,14 @@ after an interaction unless you need a fresh view.
|
||||
Only fall back to `browser_get_text` for extracting small elements by
|
||||
CSS selector.
|
||||
|
||||
## Coordinates: always CSS pixels
|
||||
## Coordinates
|
||||
|
||||
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS
|
||||
pixels**, not physical pixels. This is critical and often gets wrong:
|
||||
|
||||
| Tool | Unit |
|
||||
|---|---|
|
||||
| `browser_click_coordinate(x, y)` | **CSS pixels** |
|
||||
| `browser_hover_coordinate(x, y)` | **CSS pixels** |
|
||||
| `browser_press_at(x, y, key)` | **CSS pixels** |
|
||||
| `getBoundingClientRect()` | already CSS pixels — pass straight through |
|
||||
| `browser_coords(img_x, img_y)` | returns `css_x/y` (use this) and `physical_x/y` (debug only) |
|
||||
|
||||
**Always use `css_x/y`** from `browser_coords`. Feeding `physical_x/y`
|
||||
on a HiDPI display overshoots by `DPR×` — clicks land DPR times too
|
||||
far right and down. On a DPR=1.6 display that's 60% off.
|
||||
`browser_screenshot` delivers the image at the CSS viewport's own
|
||||
dimensions, so a pixel you read off the screenshot is the same number
|
||||
you pass to `browser_click_coordinate` / `browser_hover_coordinate` /
|
||||
`browser_press_at`. `browser_get_rect` and `browser_shadow_query` also
|
||||
return CSS px — feed `rect.css.cx` / `rect.css.cy` straight through.
|
||||
No scale factors to remember.
|
||||
|
||||
Never multiply `getBoundingClientRect()` by `devicePixelRatio` — it's
|
||||
already in the right unit.
|
||||
@@ -86,12 +78,11 @@ reach shadow elements transparently.
|
||||
|
||||
**Shadow-heavy site workflow:**
|
||||
1. `browser_screenshot()` → visual image
|
||||
2. Identify target visually → image coordinate
|
||||
3. `browser_coords(x, y)` → CSS px
|
||||
4. `browser_click_coordinate(css_x, css_y)` → lands via native hit
|
||||
test; inputs get focused regardless of shadow depth
|
||||
5. Type via `browser_type` or, if the selector path can't reach the
|
||||
element, dispatch keys to the focused element
|
||||
2. Identify target visually → pixel `(x, y)` read straight off the image
|
||||
3. `browser_click_coordinate(x, y)` → lands via native hit test; inputs
|
||||
get focused regardless of shadow depth
|
||||
4. Type via `browser_type_focused` (no selector needed — types into the
|
||||
already-focused element), or `browser_type` if you have a selector
|
||||
|
||||
For selector-style access when you know the shadow path:
|
||||
`browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` —
|
||||
|
||||
@@ -51,18 +51,13 @@ DEFAULT_EVENT_TYPES = [
|
||||
# Keepalive interval in seconds
|
||||
KEEPALIVE_INTERVAL = 15.0
|
||||
|
||||
# Session-SSE worker filter: workers run outside the queen's DM
|
||||
# chat. Worker activity is observable via the dedicated
|
||||
# ``/api/workers/{worker_id}/events`` per-worker SSE route, not via
|
||||
# the session chat. This keeps the queen↔user conversation clean of
|
||||
# tool-call chatter regardless of whether the worker was spawned by
|
||||
# ``run_agent_with_input`` (stream_id="worker") or
|
||||
# ``run_parallel_workers`` (stream_id="worker:{uuid}").
|
||||
#
|
||||
# Lifecycle events the frontend needs for fan-in summaries
|
||||
# (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED) are still
|
||||
# allowed through so the queen can show "N workers done" surfaces
|
||||
# without exposing the per-turn chatter.
|
||||
# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
|
||||
# publish high-frequency LLM deltas / tool calls that would flood the
|
||||
# user's queen DM chat. We let only this small allowlist of worker
|
||||
# events through to the queen-chat SSE so the frontend can render
|
||||
# fan-out lifecycle and structured fan-in reports without seeing the
|
||||
# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
|
||||
# filter via a dedicated /workers/{worker_id}/events route.
|
||||
_WORKER_EVENT_ALLOWLIST = {
|
||||
EventType.SUBAGENT_REPORT.value,
|
||||
EventType.EXECUTION_COMPLETED.value,
|
||||
@@ -71,17 +66,9 @@ _WORKER_EVENT_ALLOWLIST = {
|
||||
|
||||
|
||||
def _is_worker_noise(evt_dict: dict) -> bool:
|
||||
"""True if the event belongs to a worker stream and should not
|
||||
surface in the queen DM chat.
|
||||
|
||||
Matches any stream starting with ``worker`` — both the bare
|
||||
``"worker"`` tag used by single-worker spawns and the
|
||||
``"worker:{uuid}"`` tag used by parallel fan-outs. The allowlist
|
||||
carves out the three terminal/lifecycle events the UI still
|
||||
needs to render fan-in summaries.
|
||||
"""
|
||||
"""True if the event is a parallel-worker event we should drop."""
|
||||
stream_id = evt_dict.get("stream_id") or ""
|
||||
if not stream_id.startswith("worker"):
|
||||
if not stream_id.startswith("worker:"):
|
||||
return False
|
||||
return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST
|
||||
|
||||
|
||||
@@ -644,7 +644,6 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
|
||||
body = await request.json()
|
||||
colony_name = body.get("colony_name", "").strip()
|
||||
task = body.get("task", "").strip()
|
||||
tasks = body.get("tasks")
|
||||
|
||||
if not colony_name:
|
||||
return web.json_response({"error": "colony_name is required"}, status=400)
|
||||
@@ -662,7 +661,6 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
|
||||
session=session,
|
||||
colony_name=colony_name,
|
||||
task=task,
|
||||
tasks=tasks if isinstance(tasks, list) else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("colony_spawn fork failed")
|
||||
@@ -676,7 +674,6 @@ async def fork_session_into_colony(
|
||||
session: Any,
|
||||
colony_name: str,
|
||||
task: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> dict:
|
||||
"""Fork a queen session into a colony directory.
|
||||
|
||||
@@ -693,14 +690,8 @@ async def fork_session_into_colony(
|
||||
the colony resumes with the queen's entire conversation history.
|
||||
3. Multiple independent sessions can be created against the same colony,
|
||||
giving parallel execution capacity without separate worker configs.
|
||||
4. Initializes (or ensures) ``data/progress.db`` — the colony's SQLite
|
||||
task queue + progress ledger. When *tasks* is provided, the queen-
|
||||
authored task batch is seeded into the queue in one transaction.
|
||||
The absolute DB path is threaded into the worker's ``input_data``
|
||||
so spawned workers see it in their first user message.
|
||||
|
||||
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new",
|
||||
"db_path", "task_ids"}``.
|
||||
Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new"}``.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
@@ -709,8 +700,7 @@ async def fork_session_into_colony(
|
||||
from pathlib import Path
|
||||
|
||||
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
|
||||
from framework.agent_loop.types import AgentContext, AgentSpec
|
||||
from framework.host.progress_db import ensure_progress_db, seed_tasks
|
||||
from framework.agent_loop.types import AgentContext
|
||||
from framework.server.session_manager import _queen_session_dir
|
||||
|
||||
queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
|
||||
@@ -721,49 +711,6 @@ async def fork_session_into_colony(
|
||||
colony_dir.mkdir(parents=True, exist_ok=True)
|
||||
(colony_dir / "data").mkdir(exist_ok=True)
|
||||
|
||||
# ── 0. Ensure the colony's progress DB exists and seed tasks ──
|
||||
# Runs before worker.json is written so the DB path can be threaded
|
||||
# into input_data. Idempotent on reruns of the same colony name.
|
||||
db_path = await asyncio.to_thread(ensure_progress_db, colony_dir)
|
||||
seeded_task_ids: list[str] = []
|
||||
if tasks:
|
||||
seeded_task_ids = await asyncio.to_thread(
|
||||
seed_tasks, db_path, tasks, source="queen_create"
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: seeded %d task(s) into colony '%s'",
|
||||
len(seeded_task_ids),
|
||||
colony_name,
|
||||
)
|
||||
elif task and task.strip():
|
||||
# Phase 2 auto-seed: when the queen uses the simple single-task
|
||||
# form of create_colony (no explicit ``tasks=[{...}]`` list),
|
||||
# insert exactly one row so the first worker spawned into this
|
||||
# colony has something to claim. Without this the queue is
|
||||
# empty and the worker falls back to executing from the chat
|
||||
# spawn message, defeating the cross-run durability the tracker
|
||||
# exists for.
|
||||
try:
|
||||
seeded_task_ids = await asyncio.to_thread(
|
||||
seed_tasks,
|
||||
db_path,
|
||||
[{"goal": task.strip()}],
|
||||
source="create_colony_auto",
|
||||
)
|
||||
logger.info(
|
||||
"progress_db: auto-seeded 1 task into colony '%s' "
|
||||
"(task_id=%s, from single-task create_colony form)",
|
||||
colony_name,
|
||||
seeded_task_ids[0] if seeded_task_ids else "?",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"progress_db: auto-seed failed for colony '%s' (continuing "
|
||||
"without a pre-seeded row): %s",
|
||||
colony_name,
|
||||
exc,
|
||||
)
|
||||
|
||||
# Fixed worker name -- sessions are the unit of parallelism, not workers
|
||||
worker_name = "worker"
|
||||
|
||||
@@ -825,26 +772,10 @@ async def fork_session_into_colony(
|
||||
# worker is not Charlotte / Alexandra / etc., it is a task executor.
|
||||
# Inheriting the queen's persona made the worker greet the user in
|
||||
# first person with no memory of the task it was actually given.
|
||||
# Thread the first seeded task_id into input_data so the worker's
|
||||
# first claim pins to a specific row (skill's assigned-task-id
|
||||
# branch). When multiple tasks were seeded we only pin the first —
|
||||
# subsequent workers (via run_agent_with_input or parallel spawns)
|
||||
# get their own task_id assigned at spawn time.
|
||||
_worker_input_data: dict[str, Any] = {
|
||||
"db_path": str(db_path),
|
||||
"colony_id": colony_name,
|
||||
}
|
||||
if seeded_task_ids:
|
||||
_worker_input_data["task_id"] = seeded_task_ids[0]
|
||||
|
||||
worker_meta = {
|
||||
"name": worker_name,
|
||||
"version": "1.0.0",
|
||||
"description": f"Worker clone from queen session {session.id}",
|
||||
# Colony progress tracker: worker sees these in its first user
|
||||
# message via _format_spawn_task_message. The colony-progress-
|
||||
# tracker default skill teaches the worker how to use them.
|
||||
"input_data": _worker_input_data,
|
||||
"goal": {
|
||||
"description": worker_task,
|
||||
"success_criteria": [],
|
||||
@@ -976,8 +907,6 @@ async def fork_session_into_colony(
|
||||
"colony_name": colony_name,
|
||||
"queen_session_id": colony_session_id,
|
||||
"is_new": is_new,
|
||||
"db_path": str(db_path),
|
||||
"task_ids": seeded_task_ids,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -686,10 +686,6 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
|
||||
return web.json_response({"colonies": colonies})
|
||||
|
||||
|
||||
_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
|
||||
_EVENTS_HISTORY_MAX_LIMIT = 10000
|
||||
|
||||
|
||||
async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
"""GET /api/sessions/{session_id}/events/history — persisted eventbus log.
|
||||
|
||||
@@ -697,58 +693,17 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
both live sessions and cold (post-server-restart) sessions. The frontend
|
||||
replays these events through ``sseEventToChatMessage`` to fully reconstruct
|
||||
the UI state on resume.
|
||||
|
||||
Query params:
|
||||
limit: maximum number of events to return (default 2000, max 10000).
|
||||
The TAIL of the file is returned — i.e. the most recent N events.
|
||||
Older events are dropped and ``truncated`` is set to True.
|
||||
|
||||
Response shape::
|
||||
|
||||
{
|
||||
"events": [...], # up to ``limit`` events, oldest-first
|
||||
"session_id": "...",
|
||||
"total": 12345, # total events in the file
|
||||
"returned": 2000, # len(events)
|
||||
"truncated": true, # total > returned
|
||||
"limit": 2000, # the effective limit used
|
||||
}
|
||||
|
||||
``events.jsonl`` is append-only chronological, so "last N lines" == "most
|
||||
recent N events". Long-running colonies have produced files with 50k+
|
||||
events; before this cap, restoring on page-mount shipped the whole thing
|
||||
down the wire and blocked the UI for seconds.
|
||||
"""
|
||||
session_id = request.match_info["session_id"]
|
||||
|
||||
try:
|
||||
limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
|
||||
except ValueError:
|
||||
limit = _EVENTS_HISTORY_DEFAULT_LIMIT
|
||||
limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
|
||||
|
||||
from framework.server.session_manager import _find_queen_session_dir
|
||||
|
||||
queen_dir = _find_queen_session_dir(session_id)
|
||||
events_path = queen_dir / "events.jsonl"
|
||||
if not events_path.exists():
|
||||
return web.json_response(
|
||||
{
|
||||
"events": [],
|
||||
"session_id": session_id,
|
||||
"total": 0,
|
||||
"returned": 0,
|
||||
"truncated": False,
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
return web.json_response({"events": [], "session_id": session_id})
|
||||
|
||||
# Tail the file using a bounded deque — O(limit) memory regardless
|
||||
# of file size. No need to materialize the whole list only to slice it.
|
||||
from collections import deque
|
||||
|
||||
tail: deque[dict] = deque(maxlen=limit)
|
||||
total = 0
|
||||
events: list[dict] = []
|
||||
try:
|
||||
with open(events_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
@@ -756,34 +711,13 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
evt = json.loads(line)
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
total += 1
|
||||
tail.append(evt)
|
||||
except OSError:
|
||||
return web.json_response(
|
||||
{
|
||||
"events": [],
|
||||
"session_id": session_id,
|
||||
"total": 0,
|
||||
"returned": 0,
|
||||
"truncated": False,
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
return web.json_response({"events": [], "session_id": session_id})
|
||||
|
||||
events = list(tail)
|
||||
return web.json_response(
|
||||
{
|
||||
"events": events,
|
||||
"session_id": session_id,
|
||||
"total": total,
|
||||
"returned": len(events),
|
||||
"truncated": total > len(events),
|
||||
"limit": limit,
|
||||
}
|
||||
)
|
||||
return web.json_response({"events": events, "session_id": session_id})
|
||||
|
||||
|
||||
async def handle_session_history(request: web.Request) -> web.Response:
|
||||
|
||||
@@ -139,24 +139,6 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.warning("v2 migration failed (non-fatal)", exc_info=True)
|
||||
|
||||
# Ensure every existing colony has an up-to-date progress.db
|
||||
# (schema v1, WAL mode) and reclaim any stale claims left behind
|
||||
# by crashed workers from the previous run. Idempotent and
|
||||
# fast; runs synchronously because the event loop hasn't
|
||||
# started yet at __init__ time.
|
||||
from framework.host.progress_db import ensure_all_colony_dbs
|
||||
|
||||
try:
|
||||
ensured = ensure_all_colony_dbs()
|
||||
if ensured:
|
||||
logger.info(
|
||||
"progress_db: ensured %d colony DB(s) at startup", len(ensured)
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"progress_db: backfill at startup failed (non-fatal)", exc_info=True
|
||||
)
|
||||
|
||||
def build_llm(self, model: str | None = None):
|
||||
"""Construct an LLM provider using the server's configured defaults."""
|
||||
from framework.config import RuntimeConfig, get_hive_config
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
---
|
||||
name: hive.batch-ledger
|
||||
description: Track per-item status when processing collections to prevent skipped or duplicated items.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Batch Progress Ledger
|
||||
|
||||
When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
|
||||
|
||||
Initialize when you identify the batch:
|
||||
- `_batch_total`: total item count
|
||||
- `_batch_ledger`: JSON with per-item status
|
||||
|
||||
Per-item statuses: pending → in_progress → completed|failed|skipped
|
||||
|
||||
- Set `in_progress` BEFORE processing
|
||||
- Set final status AFTER processing with 1-line result_summary
|
||||
- Include error reason for failed/skipped items
|
||||
- Update aggregate counts after each item
|
||||
- NEVER remove items from the ledger
|
||||
- If resuming, skip items already marked completed
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.browser-automation
|
||||
description: Required before any browser_* tool call. Teaches the screenshot + browser_click_coordinate workflow that reaches shadow-DOM inputs selectors can't see, the CSS-pixel coordinate rule (not physical px), rich-text editor quirks ("send button stays disabled" failures), and CSP gotchas. Covers Chrome via CDP through the GCU Beeline extension. Skipping this causes repeated failures on LinkedIn / Reddit / X. Verified against real production sites 2026-04-11.
|
||||
description: Required before any browser_* tool call. Teaches the screenshot + browser_click_coordinate workflow that reaches shadow-DOM inputs selectors can't see, rich-text editor quirks ("send button stays disabled" failures), and CSP gotchas. Covers Chrome via CDP through the GCU Beeline extension. Skipping this causes repeated failures on LinkedIn / Reddit / X. Verified against real production sites 2026-04-11.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -12,40 +12,20 @@ metadata:
|
||||
|
||||
All GCU browser tools drive a real Chrome instance through the Beeline extension and Chrome DevTools Protocol (CDP). That means clicks, keystrokes, and screenshots are processed by the actual browser's native hit testing, focus, and layout engines — **not** a synthetic event layer. Understanding this unlocks strategies that make hard sites easy.
|
||||
|
||||
## Rule #0: screenshot + coordinates beats selectors
|
||||
## Coordinates
|
||||
|
||||
When in doubt, the most reliable browser-automation primitive is **`browser_screenshot` → identify visually → `browser_click_coordinate` → `browser_type`** (with `use_insert_text=True` for rich-text editors).
|
||||
|
||||
This path works on every site regardless of:
|
||||
- React class-name obfuscation (LinkedIn, X, most SPAs)
|
||||
- Shadow-DOM boundaries (Reddit, LinkedIn `#interop-outlet`)
|
||||
- Nested iframes (LinkedIn invitation-manager inline message, embedded composers)
|
||||
- Trusted Types CSP (LinkedIn, GitHub)
|
||||
- Lexical / Draft.js / contenteditable composers
|
||||
|
||||
If you catch yourself writing `document.querySelectorAll(...)` inside `browser_evaluate` and it returns `[]`, **stop immediately**. Do not try a different selector. Take a screenshot and use coordinates. This single rule would have prevented dozens of empty-selector probing loops in past sessions.
|
||||
|
||||
**`browser_evaluate` is an escape hatch, not a default.** See the "When to reach for `browser_evaluate`" section near the end — most browser automation should not need it.
|
||||
|
||||
## Coordinates: always CSS pixels
|
||||
|
||||
**Chrome DevTools Protocol `Input.dispatchMouseEvent` operates in CSS pixels, not physical pixels.**
|
||||
|
||||
When you call `browser_coords(image_x, image_y)` after a screenshot, the returned dict has both `css_x/y` and `physical_x/y`. **Always use `css_x/y` for clicks, hovers, and key presses.**
|
||||
Screenshots are delivered at the CSS viewport's own dimensions. A pixel you see in the screenshot is the same coordinate `browser_click_coordinate` expects — no conversion, no scale factors.
|
||||
|
||||
```
|
||||
browser_screenshot() → image (downscaled to 800/900 px wide)
|
||||
browser_coords(img_x, img_y) → {css_x, css_y, physical_x, physical_y}
|
||||
browser_click_coordinate(css_x, css_y) ← USE css_x/y
|
||||
browser_hover_coordinate(css_x, css_y) ← USE css_x/y
|
||||
browser_press_at(css_x, css_y, key) ← USE css_x/y
|
||||
browser_screenshot() → image at CSS-viewport size (JPEG)
|
||||
browser_click_coordinate(x, y) → same (x, y)
|
||||
browser_hover_coordinate(x, y) → same (x, y)
|
||||
browser_press_at(x, y, key) → same (x, y)
|
||||
browser_get_rect(selector) → rect.css → pass rect.css.cx, rect.css.cy to any of the above
|
||||
browser_shadow_query(...) → sq.css → same
|
||||
```
|
||||
|
||||
Feeding `physical_x/y` on a HiDPI display overshoots by DPR× — on a DPR=1.6 laptop, clicks land 60% too far right and down. The ratio between `physicalScale` and `cssScale` tells you the effective DPR.
|
||||
|
||||
`getBoundingClientRect()` already returns CSS pixels — feed those values straight through to click/hover tools without any DPR multiplication.
|
||||
|
||||
**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or fall back to visually picking coordinates from a screenshot.
|
||||
**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or visually pick coordinates from a screenshot.
|
||||
|
||||
## Screenshot + coordinates is shadow-agnostic — prefer it on shadow-heavy sites
|
||||
|
||||
@@ -61,14 +41,28 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select
|
||||
|
||||
### Recommended workflow on shadow-heavy sites
|
||||
|
||||
1. `browser_screenshot()` → visual image
|
||||
2. Identify the target visually → image pixel `(x, y)` (eyeball from the screenshot)
|
||||
3. `browser_coords(x, y)` → convert to CSS px
|
||||
4. `browser_click_coordinate(css_x, css_y)` → lands on the element via native hit testing; inputs get focused
|
||||
5. For typing:
|
||||
- If the element was reachable via a selector → `browser_type(selector, text)`
|
||||
- Otherwise → `browser_press(key)` per character (dispatches to focused element, no selector needed)
|
||||
6. Verify by reading element state via a targeted `browser_evaluate` that walks the shadow tree
|
||||
1. `browser_screenshot()` → visual image (delivered at the CSS-viewport's own dimensions).
|
||||
2. Identify the target visually → pixel `(x, y)` read straight off the image.
|
||||
3. `browser_click_coordinate(x, y)` → clicks there. **The response includes `focused_element: {tag, id, role, contenteditable, rect, ...}`** — use it to verify you actually focused what you intended.
|
||||
4. `browser_type_focused(text="...")` → dispatches CDP `Input.insertText` to `document.activeElement`. Shadow roots, iframes, Lexical, Draft.js, ProseMirror all just work. Use `browser_type(selector, text)` only when you want to target a different element than the one you just focused.
|
||||
5. Verify via `browser_screenshot` OR `browser_get_attribute` on a known-reachable marker (e.g. check that the Send button's `aria-disabled` flipped to `false`).
|
||||
|
||||
### The click→type loop (canonical pattern)
|
||||
|
||||
```
|
||||
resp = browser_click_coordinate(x, y) # x, y read straight off the screenshot
|
||||
fe = resp.get("focused_element")
|
||||
if fe and (fe.get("contenteditable") or fe["tag"] in ("textarea", "input")):
|
||||
browser_type_focused(text="...") # insertText to activeElement
|
||||
else:
|
||||
# you clicked something that isn't editable — refine the pixel and retry.
|
||||
# Do NOT reach for browser_evaluate + execCommand('insertText', ...)
|
||||
# or a walk(root) shadow traversal. The problem is your click, not
|
||||
# the typing method.
|
||||
...
|
||||
```
|
||||
|
||||
`browser_click` (selector-based) also returns `focused_element`, so the same check works whether you clicked by selector or by coordinate.
|
||||
|
||||
### Empirically verified (2026-04-11)
|
||||
|
||||
@@ -154,7 +148,7 @@ The symptom is always the same: **you type, the characters appear visually, and
|
||||
```
|
||||
# 1. Focus the real element via a real click (not JS .focus()).
|
||||
rect = browser_get_rect(selector) # or browser_shadow_query for shadow sites
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
|
||||
sleep(0.5) # let the editor open / focus settle
|
||||
|
||||
# 2. Type. browser_type now uses CDP Input.insertText by default, which is
|
||||
@@ -183,7 +177,7 @@ if not state['disabled']:
|
||||
else:
|
||||
# Recovery: sometimes a click-again + one extra keystroke nudges
|
||||
# React into recomputing hasRealContent.
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
|
||||
browser_press("End")
|
||||
browser_press(" ")
|
||||
browser_press("Backspace")
|
||||
@@ -266,25 +260,15 @@ Recognized without modifiers: `Enter`, `Tab`, `Escape`, `Backspace`, `Delete`, `
|
||||
## Screenshots
|
||||
|
||||
```
|
||||
browser_screenshot() # viewport, 900 px wide by default
|
||||
browser_screenshot() # viewport, CSS-sized JPEG
|
||||
browser_screenshot(full_page=True) # full scrollable page
|
||||
browser_screenshot(selector="#header") # clip to element's rect
|
||||
```
|
||||
|
||||
Returns a PNG with automatic downscaling to a target width (default 900 px) plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `physicalScale`, `cssScale`, and a `scaleHint` string. The image is also annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
|
||||
Returns a JPEG (quality 75, ~150–250 KB for a typical UI) at the CSS viewport's own dimensions, plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `imageWidth` (= `cssWidth`), and a `scaleHint` confirming image-px == CSS-px. The image is annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
|
||||
|
||||
The highlight overlay stays visible on the page for **10 seconds** after each interaction, then fades. Before a screenshot is likely, make sure your click / hover / type happens <10 s before the screenshot.
|
||||
|
||||
### Anatomy of the scale fields
|
||||
|
||||
- `cssWidth` = `window.innerWidth` (CSS px)
|
||||
- `devicePixelRatio` = `window.devicePixelRatio` (often 1.6, 2, or 3 on modern displays)
|
||||
- `physicalScale = png_width / image_width` (how many physical-px per image-px)
|
||||
- `cssScale = cssWidth / image_width` (how many CSS-px per image-px)
|
||||
- Effective DPR = `physicalScale / cssScale` (should match `devicePixelRatio`)
|
||||
|
||||
When converting image coordinates for clicks, always use `cssScale`. The `physicalScale` field is there for debugging HiDPI displays, not for inputs.
|
||||
|
||||
## Scrolling
|
||||
|
||||
- Use large scroll amounts (~2000) when loading more content — sites like Twitter and LinkedIn have lazy loading for paging.
|
||||
@@ -339,7 +323,7 @@ LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `brows
|
||||
Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:
|
||||
|
||||
1. `browser_shadow_query("reddit-search-large >>> #search-input")` → rect
|
||||
2. `browser_click_coordinate(rect.cx, rect.cy)` → click lands on the real shadow input via native hit testing; input becomes focused
|
||||
2. `browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair` → click lands on the real shadow input via native hit testing; input becomes focused
|
||||
3. `browser_press(c)` for each character → dispatches to focused element
|
||||
4. Verify by reading `.value` via `browser_evaluate` walking the shadow path
|
||||
|
||||
@@ -409,7 +393,7 @@ Then pass the most specific selector that uniquely identifies the right input (e
|
||||
- **Typing into a rich-text editor without clicking first → send button stays disabled.** Draft.js (X), Lexical (Gmail, LinkedIn DMs), ProseMirror (Reddit), and React-controlled `contenteditable` elements only register input as "real" when the element received a native focus event — JS-sourced `.focus()` is not enough. `browser_type` now does this automatically via a real CDP pointer click before inserting text, but always verify the submit button's `disabled` state before clicking send. See the "ALWAYS click before typing" section above.
|
||||
- **Using per-character `keyDown` on Lexical / Draft.js editors → keys dispatch but text never appears.** Those editors intercept `beforeinput` and route insertion through their own state machine; raw keyDown events are silently dropped. `browser_type` now uses `Input.insertText` by default (the CDP IME-commit method) which these editors accept cleanly. Only set `use_insert_text=False` when you explicitly need per-keystroke dispatch.
|
||||
- **Leaving a composer with text then trying to navigate → `beforeunload` dialog hangs the bridge.** LinkedIn and several other sites pop a native "unsent message" confirm. `browser_navigate` and `close_tab` both time out against this. Always strip `window.onbeforeunload = null` via `browser_evaluate` before any navigation after typing in a composer, or wrap your logic in a `try/finally` that runs the cleanup block.
|
||||
- **Clicking at physical pixels.** CDP uses CSS px. `browser_coords` returns both for debugging, but always feed `css_x/y` to click tools.
|
||||
- **Click landed in the wrong region (sidebar / header instead of target).** The `focused_element` in the click response shows what actually got focused (e.g. `className: "msg-conversation-listitem__link"` means you hit the messaging sidebar). Treat it as ground truth — if it isn't the target, adjust the pixel and retry. Screenshot pixels equal CSS pixels, so the number you passed is the number CDP clicked; a wrong result means you picked the wrong pixel, not that any conversion went sideways.
|
||||
- **Calling `wait_for_selector` on a shadow element.** It'll always time out. Use `browser_shadow_query` or the screenshot + coordinate strategy.
|
||||
- **Relying on `innerHTML` in injected scripts on LinkedIn.** Silently discarded. Use `createElement` + `appendChild`.
|
||||
- **Not waiting for SPA hydration.** `wait_until="load"` fires before React/Vue rendering on many sites. Add a 2–3 s sleep before querying for chrome elements.
|
||||
@@ -425,35 +409,17 @@ If Chrome detaches the debugger for its own reasons (tab closed, user opened Dev
|
||||
|
||||
If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.
|
||||
|
||||
## `browser_evaluate` is a last-resort escape hatch
|
||||
## When to reach for `browser_evaluate`
|
||||
|
||||
**Before using `browser_evaluate`, try these first — in this order:**
|
||||
Use it when:
|
||||
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
|
||||
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
|
||||
- You need to walk an AX tree or measure layout that the standard tools don't expose
|
||||
|
||||
1. **`browser_screenshot` + `browser_click_coordinate`** — works on every site regardless of shadow DOM, iframes, obfuscated classes. This is the default path for "click a thing you can see."
|
||||
2. **`browser_type(use_insert_text=True, text=...)`** — for typing into ANY input/contenteditable, including Lexical and Draft.js. Handles click-focus-insert with built-in retries. Do **not** call `document.execCommand('insertText')` via evaluate; this tool already does it correctly.
|
||||
3. **`browser_shadow_query`** or **`browser_get_rect(selector)`** with the `>>>` shadow-piercing syntax — for selector-based lookups across shadow roots.
|
||||
4. **`browser_get_text` / `browser_get_attribute`** — for reading element state by selector.
|
||||
5. **`browser_snapshot`** — for dumping the accessibility tree of the page.
|
||||
|
||||
If all five of those fit your goal, **do not use `browser_evaluate`.** Each evaluate call is a small LLM round-trip of ~30-100 tokens of JS plus a JSON response; five of them burn more context than a single screenshot-and-coordinate does, with less reliability.
|
||||
|
||||
### Anti-patterns — stop immediately if you catch yourself doing these
|
||||
|
||||
- **Trying multiple `querySelectorAll` variants when the first returned `[]`.** Different selectors on the same page rarely work if the first guess failed — modern SPAs obfuscate class names at build time. After one empty result, switch to `browser_screenshot` + `browser_click_coordinate`. Do not write `.artdeco-list__item`, then `[data-test-incoming-invitation-card]`, then `[class*="invitation"]` — you are already on the wrong path.
|
||||
- **Writing `walk(root)` recursive shadow-DOM traversal functions.** Use `browser_shadow_query` — it traverses at the CDP level (native C++), not by re-running a recursive JS function every call.
|
||||
- **Calling `document.execCommand('insertText', ...)` to type into a contenteditable.** Use `browser_type(use_insert_text=True, text='...')`. The high-level tool handles the exact same Lexical/Draft.js case but with click-focus-retry logic built in.
|
||||
- **Accessing `iframe.contentDocument`.** Rarely works (cross-origin, late hydration) and when it does, the code is brittle. Use `browser_screenshot` to see the iframe, then `browser_click_coordinate` to interact.
|
||||
- **Using `innerHTML = "<...>"` on a Trusted Types site (LinkedIn, GitHub).** The assignment is silently dropped. Use `createElement` + `appendChild` if you must inject DOM — but first, ask whether you really need to.
|
||||
- **Triggering React/Vue state via synthetic `dispatchEvent`.** Frameworks watch for real browser events. Use `browser_click_coordinate`, `browser_press`, or `browser_type` — all go through CDP's native event pipeline.
|
||||
|
||||
### Legitimate uses (when nothing semantic fits)
|
||||
|
||||
- Reading a computed style, `window.innerWidth/Height`, `document.scrollingElement.scrollTop`, or other layout values the tools don't expose.
|
||||
- Firing a one-shot site-specific API call (analytics beacon, feature-flag toggle).
|
||||
- Stripping `onbeforeunload` before navigating away from a page with an unsent draft (LinkedIn, Gmail).
|
||||
- Detecting whether a specific shadow-root host exists before a follow-up screenshot.
|
||||
|
||||
In all of these cases the script is SHORT (< 10 lines) and the result is CONSUMED (read, then acted on), not further probed.
|
||||
Avoid it when:
|
||||
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
|
||||
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
|
||||
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
|
||||
|
||||
## Login & auth walls
|
||||
|
||||
@@ -479,7 +445,7 @@ browser_navigate("https://x.com/explore", wait_until="load")
|
||||
sleep(3)
|
||||
browser_wait_for_selector("input[data-testid='SearchBox_Search_Input']", timeout_ms=5000)
|
||||
rect = browser_get_rect("input[data-testid='SearchBox_Search_Input']")
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
|
||||
browser_type("input[data-testid='SearchBox_Search_Input']", "openai", clear_first=True)
|
||||
# Screenshot now shows live search suggestions
|
||||
browser_screenshot()
|
||||
@@ -493,7 +459,7 @@ browser_navigate("https://www.reddit.com/r/programming/", wait_until="load")
|
||||
sleep(2)
|
||||
# Shadow-pierce the nested search input
|
||||
sq = browser_shadow_query("reddit-search-large >>> #search-input")
|
||||
browser_click_coordinate(sq.rect.cx, sq.rect.cy)
|
||||
browser_click_coordinate(sq.css.cx, sq.css.cy) # sq.css.cx/cy — matched pair
|
||||
# Typing can't use selector (shadow); focused input receives raw key presses
|
||||
for c in "python":
|
||||
browser_press(c)
|
||||
@@ -508,7 +474,7 @@ browser_navigate("https://www.linkedin.com/feed/", wait_until="load", timeout_ms
|
||||
sleep(3)
|
||||
browser_wait_for_selector("input[data-testid='typeahead-input']", timeout_ms=5000)
|
||||
rect = browser_get_rect("input[data-testid='typeahead-input']")
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_click_coordinate(rect.css.cx, rect.css.cy) # rect.css.cx/cy — matched pair
|
||||
browser_type("input[data-testid='typeahead-input']", "anthropic", clear_first=True)
|
||||
# Dropdown shows real live suggestions
|
||||
browser_screenshot()
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
---
|
||||
name: hive.colony-progress-tracker
|
||||
description: Claim tasks, record step progress, and verify SOP gates in the colony SQLite queue. Applies when your spawn message includes a db_path field.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Colony Progress Tracker
|
||||
|
||||
**Applies when** your spawn message has `db_path:` and `colony_id:` fields. The DB is your durable working memory — tells you what's done, what to skip, which SOP gates you owe.
|
||||
|
||||
Access via `execute_command_tool` running `sqlite3 "<db_path>" "..."`. Tables: `tasks` (queue), `steps` (per-task decomposition), `sop_checklist` (hard gates).
|
||||
|
||||
### Claim: assigned task (check this FIRST)
|
||||
|
||||
If your spawn message includes a `task_id:` field, the queen pre-assigned a specific row to you. Claim that row by id — **do not** use the generic next-pending pattern below:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" <<'SQL'
|
||||
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'), updated_at=datetime('now')
|
||||
WHERE id='<task_id>' AND status='pending'
|
||||
RETURNING id, goal, payload;
|
||||
SQL
|
||||
```
|
||||
|
||||
Empty output → another worker raced you or the row is already done. Stop and report. Non-empty → that row is yours, proceed to "Load the plan".
|
||||
|
||||
### Claim: next pending (fallback when no task_id is assigned)
|
||||
|
||||
If your spawn message did NOT include `task_id:` — you are a generic fan-out worker racing on a shared queue. Use the generic next-pending claim:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" <<'SQL'
|
||||
UPDATE tasks SET status='claimed', worker_id='<worker-id>',
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'), updated_at=datetime('now')
|
||||
WHERE id=(SELECT id FROM tasks WHERE status='pending'
|
||||
ORDER BY priority DESC, seq, created_at LIMIT 1)
|
||||
RETURNING id, goal, payload;
|
||||
SQL
|
||||
```
|
||||
|
||||
Empty output → queue drained, exit. Otherwise the returned `id` is yours. **Never SELECT-then-UPDATE** — races.
|
||||
|
||||
### Load the plan
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "SELECT seq, id, title, status FROM steps WHERE task_id='<task-id>' ORDER BY seq;"
|
||||
sqlite3 "<db_path>" "SELECT key, description, required, done_at FROM sop_checklist WHERE task_id='<task-id>';"
|
||||
```
|
||||
|
||||
**Skip any step where status='done'.** That's the point — don't redo completed work.
|
||||
|
||||
### Execute a step
|
||||
|
||||
Before tool calls:
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE steps SET status='in_progress', worker_id='<worker-id>', started_at=datetime('now') WHERE id='<step-id>';"
|
||||
```
|
||||
After success (one-line evidence: path, URL, key result):
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE steps SET status='done', evidence='<what you did>', completed_at=datetime('now') WHERE id='<step-id>';"
|
||||
```
|
||||
|
||||
### MANDATORY: SOP gate check before marking task done
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "SELECT key, description FROM sop_checklist WHERE task_id='<task-id>' AND required=1 AND done_at IS NULL;"
|
||||
```
|
||||
|
||||
- Empty → proceed to "Mark task done".
|
||||
- Non-empty → each row is work you still owe. Do it, then check it off:
|
||||
|
||||
```bash
|
||||
sqlite3 "<db_path>" "UPDATE sop_checklist SET done_at=datetime('now'), done_by='<worker-id>', note='<why>' WHERE task_id='<task-id>' AND key='<key>';"
|
||||
```
|
||||
|
||||
**Never mark a task done while this SELECT returns rows.** This gate exists specifically to stop you from declaring success while skipping required steps.
|
||||
|
||||
### Mark task done / failed
|
||||
|
||||
```bash
|
||||
# Success:
|
||||
sqlite3 "<db_path>" "UPDATE tasks SET status='done', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
|
||||
|
||||
# Unrecoverable failure:
|
||||
sqlite3 "<db_path>" "UPDATE tasks SET status='failed', last_error='<one sentence>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
|
||||
```
|
||||
|
||||
The `AND worker_id=?` guard means a reclaimed row won't accept your write — treat zero rows affected as "your claim was revoked, stop."
|
||||
|
||||
### Loop
|
||||
|
||||
After done/failed → claim the next task. Exit only when claim returns empty.
|
||||
|
||||
### Errors + debug
|
||||
|
||||
- **"database is locked"**: retry with 100ms → 1s backoff, max 5 attempts. `busy_timeout=5000` handles most contention silently.
|
||||
- **Queue health**: `SELECT status, count(*) FROM tasks GROUP BY status;`
|
||||
- **Your in-flight work**: `SELECT id, goal, status FROM tasks WHERE worker_id='<worker-id>';`
|
||||
|
||||
### Anti-patterns (will break the queue)
|
||||
|
||||
- Don't DDL (CREATE/ALTER/DROP).
|
||||
- Don't DELETE — failed tasks stay as `failed` for audit.
|
||||
- Don't skip Protocol 4 (SOP gate) before marking done.
|
||||
- Don't hold a task >15min without updates — the stale-claim reclaimer revokes your claim.
|
||||
- Don't invent task IDs. Workers update existing rows; only the queen enqueues new ones.
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.context-preservation
|
||||
description: Proactively extract critical values from tool results into working notes before automatic context pruning destroys them.
|
||||
description: Proactively preserve critical information before automatic context pruning destroys it.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -8,16 +8,17 @@ metadata:
|
||||
|
||||
## Operational Protocol: Context Preservation
|
||||
|
||||
You operate under a finite context window. Older tool results WILL be pruned. Extract what you need while it's still in context.
|
||||
You operate under a finite context window. Important information WILL be pruned.
|
||||
|
||||
**Save-as-you-go.** After any tool call producing information you'll need later, immediately extract the key data into `_working_notes` or `_preserved_data`. Do not rely on referring back to old tool results — once they're pruned they're gone.
|
||||
Save-As-You-Go: After any tool call producing information you'll need later,
|
||||
immediately extract key data into `_working_notes` or `_preserved_data`.
|
||||
Do NOT rely on referring back to old tool results.
|
||||
|
||||
**What to extract:**
|
||||
- URLs and key snippets (not full pages)
|
||||
- Relevant API fields (not raw JSON blobs)
|
||||
- Specific lines, values, or IDs (not entire files)
|
||||
- Analysis conclusions (not raw data)
|
||||
What to extract: URLs and key snippets (not full pages), relevant API fields
|
||||
(not raw JSON), specific lines/values (not entire files), analysis results
|
||||
(not raw data).
|
||||
|
||||
**Handoffs between tasks** happen through `progress.db`, not through shared-buffer handoff blobs. When you finish a task, any state the next worker needs goes into the task row itself (`steps.evidence`, `tasks.last_error`, `sop_checklist.note`) — see `hive.colony-progress-tracker`. Use `_working_notes` for things the DB schema doesn't cover.
|
||||
Before transitioning to the next phase/node, write a handoff summary to
|
||||
`_handoff_context` with everything the next phase needs to know.
|
||||
|
||||
You will receive an alert when context reaches {{warn_at_usage_ratio_pct}}% — preserve immediately.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.error-recovery
|
||||
description: Follow a structured recovery decision tree when tool calls fail instead of blindly retrying or giving up.
|
||||
description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -10,20 +10,9 @@ metadata:
|
||||
|
||||
When a tool call fails:
|
||||
|
||||
1. **Diagnose** — classify the failure as *transient* (network blip, rate limit, timeout) or *structural* (wrong selector, missing auth, invalid schema, permission denied).
|
||||
|
||||
2. **Decide:**
|
||||
- Transient → retry once.
|
||||
- Structural + fixable → fix the input and retry.
|
||||
- Structural + unfixable → record the failure and move to the next item.
|
||||
- Blocking all progress → escalate.
|
||||
|
||||
3. **Adapt** — if the same tool has failed {{max_retries_per_tool}}+ times in a row, stop using it and find an alternative approach.
|
||||
|
||||
**Never silently drop a failed item.** If the item is a task in the colony queue, write the failure to the DB instead of an in-memory buffer:
|
||||
|
||||
```bash
|
||||
sqlite3 "$DB_PATH" "UPDATE tasks SET status='failed', last_error='<one-sentence reason>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<your-worker-id>';"
|
||||
```
|
||||
|
||||
The `tasks.retry_count` column and the stale-claim reclaimer handle auto-retry for crashes; your job is the within-run decision tree above. See `hive.colony-progress-tracker` for the full queue protocol.
|
||||
1. Diagnose — record error in notes, classify as transient or structural
|
||||
2. Decide — transient: retry once. Structural fixable: fix and retry.
|
||||
Structural unfixable: record as failed, move to next item.
|
||||
Blocking all progress: record escalation note.
|
||||
3. Adapt — if same tool failed {{max_retries_per_tool}}+ times, stop using it and find alternative.
|
||||
Update plan in notes. Never silently drop the failed item.
|
||||
|
||||
@@ -15,28 +15,6 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow
|
||||
|
||||
**Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`'s click-first behavior, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.
|
||||
|
||||
## Rule #0: screenshot + coordinates, not selectors
|
||||
|
||||
LinkedIn changes class names aggressively and hides composers inside shadow roots AND iframes. **Selectors break constantly.** Your default strategy on every LinkedIn page should be:
|
||||
|
||||
1. `browser_screenshot()` — see the page visually
|
||||
2. Pick the target's position from the image
|
||||
3. `browser_coords(image_x, image_y)` → get CSS pixels
|
||||
4. `browser_click_coordinate(css_x, css_y)` — reaches shadow DOM, iframes, and React elements indifferently
|
||||
5. `browser_type(use_insert_text=True, text=...)` — types into whatever is focused, including Lexical composers
|
||||
|
||||
**If `browser_evaluate(...querySelectorAll...)` returns `[]` even once, do not try a different selector.** Stop, screenshot, and click. The "what if I try `.artdeco-list__item` next" instinct has burned ~50 tool calls in real sessions before the agent pivoted. Don't fall into that loop.
|
||||
|
||||
The selectors in the table below are **only** for when you already know the target is in the light DOM and you want a faster path than screenshot+coord. **When in doubt, default to coordinates.**
|
||||
|
||||
## Invitation manager — inline message button path is BROKEN
|
||||
|
||||
If the user asks to message a connection request **from the invitation manager page without accepting first**, the inline "Message" button opens a composer inside a nested **iframe overlay** (not a shadow root). The iframe's `contentDocument` is either cross-origin-blocked or not hydrated at access time. This path is **not reliably automatable today.**
|
||||
|
||||
**Redirect:** click the person's name/profile link on the card, go to the profile page, and use the standard Profile Message flow below. The profile flow is battle-tested; the inline-iframe flow isn't.
|
||||
|
||||
If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `browser_evaluate`, you've hit this trap. Stop and go to the profile page.
|
||||
|
||||
## Timing expectations
|
||||
|
||||
- `browser_navigate(wait_until="load", timeout_ms=20000)` — LinkedIn takes **4–5 seconds** to load the feed cold. Default 30s timeout is fine; use 20s as a floor.
|
||||
@@ -56,7 +34,7 @@ If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `br
|
||||
| Pending connection card | `.invitation-card, .invitations-card, [data-test-incoming-invitation-card]` | Filter out "invited you to follow" / "subscribe" cards |
|
||||
| Accept button | `button[aria-label*="Accept"]` within the card scope | Per-card scoping is critical — there are many Accept buttons on the page |
|
||||
|
||||
LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_coords` → `browser_click_coordinate`**. The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
|
||||
LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_click_coordinate`** with the pixel you read straight off the image (screenshots are CSS-sized, so no conversion). The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
|
||||
|
||||
## Profile Message flow (verified end-to-end 2026-04-11)
|
||||
|
||||
@@ -120,19 +98,17 @@ textarea = browser_evaluate("""
|
||||
browser_click_coordinate(textarea['cx'], textarea['cy'])
|
||||
sleep(0.6)
|
||||
|
||||
# 6. Insert text via document.execCommand('insertText') through browser_evaluate.
|
||||
# This is the ONLY reliable approach for LinkedIn's Lexical composer.
|
||||
# See the "Lexical composer quirks" section below for why browser_type
|
||||
# with a selector does NOT work here (the contenteditable lives inside
|
||||
# the #interop-outlet shadow root which document.querySelector can't
|
||||
# reach). The click in step 5 already put Lexical into edit mode, so
|
||||
# execCommand injects straight into the focused editor's state.
|
||||
browser_evaluate("""
|
||||
(function(){
|
||||
document.execCommand('insertText', false, %s);
|
||||
return true;
|
||||
})();
|
||||
""" % json.dumps(message_text)) # json.dumps gives you a safely-escaped JS string literal
|
||||
# 6. Insert text via browser_type WITHOUT a selector. This dispatches
|
||||
# CDP Input.insertText to document.activeElement — the same underlying
|
||||
# mechanism as execCommand('insertText') but with no JSON escaping,
|
||||
# no browser_evaluate round trip, and built-in retry. The click in
|
||||
# step 5 already focused Lexical, so insertText lands in the editor
|
||||
# regardless of the shadow wrapping around #interop-outlet.
|
||||
#
|
||||
# Do NOT pass a selector here. Selector-based browser_type cannot see
|
||||
# past the #interop-outlet shadow root. No-selector mode sidesteps
|
||||
# that entirely by routing to activeElement.
|
||||
browser_type_focused(text=message_text) # targets document.activeElement
|
||||
sleep(1.0) # let Lexical commit state + enable Send button
|
||||
|
||||
# 7. Find the modal Send button (filter by in-viewport, reject pinned bar)
|
||||
@@ -165,20 +141,21 @@ send = browser_evaluate("""
|
||||
})();
|
||||
""")
|
||||
|
||||
# 8. ONLY click Send if it's enabled — if disabled, the execCommand
|
||||
# 8. ONLY click Send if it's enabled — if disabled, the insertText
|
||||
# didn't land. DO NOT retry with a different tool; the fix is
|
||||
# always: re-click the composer rect, re-run execCommand, re-check.
|
||||
# The Send button's `disabled` state IS the ground truth — if
|
||||
# Lexical registered your text, it enables the button. If it's
|
||||
# always: re-click the composer rect, re-run browser_type_focused(text=...),
|
||||
# re-check. The Send button's `disabled` state IS the ground truth —
|
||||
# if Lexical registered your text, it enables the button. If it's
|
||||
# still disabled, your text did not reach the editor, regardless
|
||||
# of what any tool call claims.
|
||||
if send['disabled']:
|
||||
# The editor didn't receive your text. Do NOT click Send. Do NOT
|
||||
# fall back to browser_type with a dummy selector (see anti-pattern
|
||||
# in Common Pitfalls). Instead: re-click the textarea rect from
|
||||
# step 4, wait a beat, re-run the execCommand insertText from step
|
||||
# 6. If that still fails after 2 retries, bail and surface — the
|
||||
# modal may have been reclaimed by a stale state or auth wall.
|
||||
# fall back to browser_type with a selector (see anti-pattern in
|
||||
# Common Pitfalls — selector-based type can't reach the shadow-DOM
|
||||
# composer). Instead: re-click the textarea rect from step 4, wait
|
||||
# a beat, re-run browser_type_focused(text=message_text) from
|
||||
# step 6. If that still fails after 2 retries, bail and surface —
|
||||
# the modal may have been reclaimed by a stale state or auth wall.
|
||||
raise Exception("Send button disabled after insertText — editor did not receive input")
|
||||
|
||||
browser_click_coordinate(send['cx'], send['cy'])
|
||||
@@ -346,9 +323,9 @@ If any of those show up, **stop the run, screenshot the state, and surface the i
|
||||
## Common pitfalls
|
||||
|
||||
- **`innerHTML` injection is silently dropped** — LinkedIn's Trusted Types CSP discards any `innerHTML = "<...>"` from injected scripts, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection. `textContent`, `style.cssText`, and `.value` assignments are fine.
|
||||
- **Do NOT use `browser_type` on the message composer — use `document.execCommand('insertText', false, text)` via `browser_evaluate` instead.** The Lexical contenteditable lives inside the `#interop-outlet` shadow root which `document.querySelector` (what `browser_type` uses under the hood) cannot see. Attempts to work around this with `browser_shadow_query` fail because `browser_type` doesn't support the `>>>` shadow-pierce syntax. The ONLY reliable insert path is: (1) `browser_click_coordinate` on the composer rect (put Lexical in edit mode via a real CDP pointer click) → (2) `browser_evaluate` with `document.execCommand('insertText', false, <message>)` against the focused editor. This pattern is verified end-to-end across 15+ successful sends in session `session_20260414_113244_a98cfd66` (2026-04-14).
|
||||
- **Per-char keyDown on the message composer produces empty text** — Lexical intercepts `beforeinput` and drops raw keys. Ignore `browser_type` entirely for LinkedIn DMs; use the `execCommand('insertText')` path above.
|
||||
- **ANTI-PATTERN: "inject a dummy `<div id='dummy-target'>` and pass it as the `selector` arg to `browser_type`".** This looks tempting but fails compoundingly: `browser_type` clicks the **dummy div's** rect (not the editor's), the click lands on the Lexical wrapper's non-editable chrome, the contenteditable never receives focus, and `Input.insertText` fires against nothing. The bridge will still return `{"ok": true, "action": "type", "length": N}` because it has no way to verify the text actually landed. Symptom: Send button stays `disabled: true` forever. Fix: use `execCommand('insertText')` exactly as shown in the profile-message flow above. (See `session_20260414_114820_08bd3c4d` for the failed attempt.)
|
||||
- **Do NOT use selector-based `browser_type` on the message composer — use `browser_type_focused(text=...)`.** The Lexical contenteditable lives inside the `#interop-outlet` iframe/shadow wrapper which `document.querySelector` cannot see. `browser_shadow_query` can find it but selector-based `browser_type` doesn't support the `>>>` shadow-pierce syntax. The reliable insert path is: (1) `browser_click_coordinate` on the composer rect — the response's `focused_element` (which recurses into same-origin iframes) confirms what actually received focus → (2) `browser_type_focused(text=message_text)` — CDP `Input.insertText` dispatches to `document.activeElement` regardless of shadow wrapping.
|
||||
- **Per-char keyDown on the message composer produces empty text** — Lexical intercepts `beforeinput` and drops raw keys. Use `browser_type_focused(text=..., use_insert_text=True)` after click-coordinate focused the composer. The CDP `Input.insertText` method commits as if IME fired, which Lexical accepts cleanly.
|
||||
- **ANTI-PATTERN: "inject a dummy `<div id='dummy-target'>` and pass it as the `selector` arg to `browser_type`".** This fails compoundingly: `browser_type` clicks the **dummy div's** rect (not the editor's), the click lands on the Lexical wrapper's non-editable chrome, the contenteditable never receives focus, and `Input.insertText` fires against nothing. The bridge will still return `{"ok": true, "action": "type", "length": N}` because it has no way to verify the text actually landed. Symptom: Send button stays `disabled: true` forever. Fix: `browser_click_coordinate` on the real composer rect, then `browser_type_focused(text=message_text)` — CDP `Input.insertText` dispatches to `document.activeElement`.
|
||||
- **Multiple Send buttons on the page** — the pinned bottom-right messaging bar has its own `msg-form__send-button` that's usually below `innerHeight`. Filter by in-viewport before clicking.
|
||||
- **`window.onbeforeunload` hangs navigation/close** — after typing in a composer, any `browser_navigate` or `close_tab` can pop a native "unsent message, leave?" confirm dialog that deadlocks the bridge. Always strip `onbeforeunload` before any navigation, and wrap composer flows in a `try/finally` that runs the cleanup block:
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: hive.note-taking
|
||||
description: Maintain a free-form scratchpad of decisions, extracted values, and open questions so context pruning doesn't lose anything you still need.
|
||||
description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
@@ -8,21 +8,20 @@ metadata:
|
||||
|
||||
## Operational Protocol: Structured Note-Taking
|
||||
|
||||
Maintain free-form working notes in shared buffer key `_working_notes` for data that *you* need to remember but that isn't captured by the colony task queue.
|
||||
|
||||
**Do not duplicate the queue in here.** Per-task goal, ordered steps, and SOP gates live in `progress.db` — use `hive.colony-progress-tracker` for those. These notes are for things the DB schema doesn't cover.
|
||||
|
||||
Maintain structured working notes in shared buffer key `_working_notes`.
|
||||
Update at these checkpoints:
|
||||
|
||||
- After receiving new information that changes how you plan to approach the current step
|
||||
- Before any tool call that will produce substantial output you'll need to reference later
|
||||
- When you make a non-obvious decision whose *why* would be lost if the tool call history gets pruned
|
||||
- After completing each discrete subtask or batch item
|
||||
- After receiving new information that changes your plan
|
||||
- Before any tool call that will produce substantial output
|
||||
|
||||
Structure:
|
||||
|
||||
### Objective — restate the goal
|
||||
### Current Plan — numbered steps, mark completed with ✓
|
||||
### Key Decisions — decisions made and WHY
|
||||
### Working Data — intermediate results, extracted values (URLs, IDs, key snippets — not full pages)
|
||||
### Open Questions — uncertainties you plan to verify
|
||||
### Blockers — anything preventing progress that isn't already captured in `tasks.last_error`
|
||||
### Working Data — intermediate results, extracted values
|
||||
### Open Questions — uncertainties to verify
|
||||
### Blockers — anything preventing progress
|
||||
|
||||
Update incrementally — do not rewrite from scratch each time.
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
---
|
||||
name: hive.task-decomposition
|
||||
description: Decompose complex tasks into explicit subtasks before diving in.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
---
|
||||
|
||||
## Operational Protocol: Task Decomposition
|
||||
|
||||
Before starting a complex task:
|
||||
|
||||
1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
|
||||
2. Estimate — relative effort per subtask (small/medium/large)
|
||||
3. Execute — work through in order, mark ✓ when complete
|
||||
4. Budget — if running low on iterations, prioritize by impact
|
||||
5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
|
||||
@@ -36,8 +36,8 @@ class SkillsConfig:
|
||||
# Default skill configuration
|
||||
default_skills = {
|
||||
"hive.note-taking": {"enabled": True},
|
||||
"hive.quality-monitor": {"enabled": False, "assessment_interval": 10},
|
||||
"hive.error-recovery": {"max_retries_per_tool": 5},
|
||||
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
|
||||
"hive.quality-monitor": {"enabled": False},
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
@@ -24,21 +24,34 @@ _SKILL_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
"hive.quality-monitor": {"assessment_interval": 5},
|
||||
"hive.error-recovery": {"max_retries_per_tool": 3},
|
||||
"hive.context-preservation": {"warn_at_usage_ratio_pct": 45},
|
||||
"hive.batch-ledger": {"checkpoint_every_n": 5},
|
||||
}
|
||||
|
||||
# Keywords that indicate a batch processing scenario (DS-12)
|
||||
_BATCH_KEYWORDS: tuple[str, ...] = (
|
||||
"list of",
|
||||
"collection of",
|
||||
"set of",
|
||||
"batch of",
|
||||
"each item",
|
||||
"for each",
|
||||
"process all",
|
||||
"records",
|
||||
"entries",
|
||||
"rows",
|
||||
"items",
|
||||
)
|
||||
|
||||
_BATCH_INIT_NUDGE = (
|
||||
"Note: your input appears to describe a batch operation. "
|
||||
"Initialize `_batch_ledger` with the total item count before processing."
|
||||
)
|
||||
|
||||
|
||||
def is_batch_scenario(text: str) -> bool:
|
||||
"""Deprecated: batch auto-detection is no longer used.
|
||||
|
||||
Kept as a no-op so the agent_loop call site (which wraps it in an
|
||||
``if ctx.default_skill_batch_nudge:`` guard that's also now always
|
||||
empty) can stay unchanged until a broader cleanup. The old
|
||||
``_batch_ledger`` shared-buffer feature was replaced by the
|
||||
per-colony SQLite task queue (``hive.colony-progress-tracker``),
|
||||
which lives in ``progress.db`` and is authoritative for batch
|
||||
state across workers and runs.
|
||||
"""
|
||||
return False
|
||||
"""Return True if *text* contains batch-processing indicators (DS-12)."""
|
||||
lower = text.lower()
|
||||
return any(kw in lower for kw in _BATCH_KEYWORDS)
|
||||
|
||||
|
||||
def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> str:
|
||||
@@ -54,37 +67,40 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
|
||||
return body
|
||||
|
||||
|
||||
# Ordered list of default skills (name → directory).
|
||||
#
|
||||
# Removed on 2026-04-15 as part of the colony-progress-tracker rollout:
|
||||
# - hive.task-decomposition — steps table in progress.db supersedes
|
||||
# in-memory ``_working_notes → Current Plan`` decomposition.
|
||||
# - hive.batch-ledger — tasks table in progress.db supersedes
|
||||
# the ``_batch_ledger`` dict-shaped queue with its pending →
|
||||
# in_progress → completed/failed/skipped state machine.
|
||||
# Both were duplicating state that belongs in SQLite.
|
||||
# Ordered list of default skills (name → directory)
|
||||
SKILL_REGISTRY: dict[str, str] = {
|
||||
"hive.note-taking": "note-taking",
|
||||
"hive.batch-ledger": "batch-ledger",
|
||||
"hive.context-preservation": "context-preservation",
|
||||
"hive.quality-monitor": "quality-monitor",
|
||||
"hive.error-recovery": "error-recovery",
|
||||
"hive.colony-progress-tracker": "colony-progress-tracker",
|
||||
"hive.task-decomposition": "task-decomposition",
|
||||
"hive.writing-hive-skills": "writing-hive-skills",
|
||||
}
|
||||
|
||||
# Shared buffer keys referenced by the remaining default skills (used
|
||||
# for permission auto-inclusion). The dead keys for batch-ledger,
|
||||
# task-decomposition, the handoff buffer, and the error-log buffers
|
||||
# were removed when those features migrated to progress.db.
|
||||
# All shared buffer keys used by default skills (for permission auto-inclusion)
|
||||
DATA_BUFFER_KEYS: list[str] = [
|
||||
# note-taking
|
||||
"_working_notes",
|
||||
"_notes_updated_at",
|
||||
# batch-ledger
|
||||
"_batch_ledger",
|
||||
"_batch_total",
|
||||
"_batch_completed",
|
||||
"_batch_failed",
|
||||
# context-preservation
|
||||
"_handoff_context",
|
||||
"_preserved_data",
|
||||
# quality-monitor
|
||||
"_quality_log",
|
||||
"_quality_degradation_count",
|
||||
# error-recovery
|
||||
"_error_log",
|
||||
"_failed_tools",
|
||||
"_escalation_needed",
|
||||
# task-decomposition
|
||||
"_subtasks",
|
||||
"_iteration_budget_remaining",
|
||||
]
|
||||
|
||||
|
||||
@@ -236,15 +252,16 @@ class DefaultSkillManager:
|
||||
|
||||
@property
|
||||
def batch_init_nudge(self) -> str | None:
|
||||
"""Deprecated: always returns None.
|
||||
"""Nudge text to prepend to system prompt when batch input detected (DS-12).
|
||||
|
||||
The ``hive.batch-ledger`` default skill was removed when batch
|
||||
tracking moved into ``progress.db`` (``hive.colony-progress-
|
||||
tracker``). Callers in agent_host, colony_runtime, and
|
||||
orchestrator still read this property; returning None keeps
|
||||
them functional with no system-prompt nudge.
|
||||
Returns None if ``hive.batch-ledger`` is disabled or auto_detect_batch is False.
|
||||
"""
|
||||
return None
|
||||
if "hive.batch-ledger" not in self._skills:
|
||||
return None
|
||||
overrides = self._config.get_default_overrides("hive.batch-ledger")
|
||||
if overrides.get("auto_detect_batch") is False:
|
||||
return None
|
||||
return _BATCH_INIT_NUDGE
|
||||
|
||||
@property
|
||||
def context_warn_ratio(self) -> float | None:
|
||||
|
||||
@@ -903,76 +903,10 @@ def register_queen_lifecycle_tools(
|
||||
# ``start_worker`` was removed in the Phase 4 unification — its
|
||||
# bare-bones spawn duplicated ``run_agent_with_input`` (which has
|
||||
# credential preflight, concurrency guard, and phase tracking on
|
||||
# top). The shared preflight timeout below is used by both
|
||||
# ``run_agent_with_input`` and ``run_parallel_workers``.
|
||||
# top). The shared preflight timeout below is still used by
|
||||
# ``run_agent_with_input``.
|
||||
_START_PREFLIGHT_TIMEOUT = 15 # seconds
|
||||
|
||||
async def _preflight_credentials(
|
||||
legacy: Any,
|
||||
*,
|
||||
tool_label: str,
|
||||
) -> set[str]:
|
||||
"""Compute tools whose credentials are missing and resync MCP servers.
|
||||
|
||||
Shared between ``run_agent_with_input`` (single spawn) and
|
||||
``run_parallel_workers`` (batch spawn). Returns the set of
|
||||
tool names whose credentials failed validation; the caller
|
||||
filters these out of the spawn's tool lists.
|
||||
|
||||
Exceptions (including validator bugs) are logged and treated
|
||||
as "no tools dropped" so a broken validator can't block a
|
||||
spawn. Wall-clock bound at ``_START_PREFLIGHT_TIMEOUT`` —
|
||||
slow credential HTTP health checks can't stall the LLM turn.
|
||||
"""
|
||||
unavailable: set[str] = set()
|
||||
|
||||
async def _run() -> None:
|
||||
nonlocal unavailable
|
||||
try:
|
||||
from framework.credentials.validation import compute_unavailable_tools
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
drop, messages = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: compute_unavailable_tools(legacy.graph.nodes),
|
||||
)
|
||||
unavailable = drop
|
||||
if drop:
|
||||
logger.warning(
|
||||
"%s: dropping %d tool(s) with unavailable credentials: %s",
|
||||
tool_label,
|
||||
len(drop),
|
||||
"; ".join(messages),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"%s: compute_unavailable_tools raised, proceeding without "
|
||||
"credential-based tool filtering: %s",
|
||||
tool_label,
|
||||
exc,
|
||||
)
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
if runner is not None:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("%s: MCP resync failed: %s", tool_label, exc)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(_run(), timeout=_START_PREFLIGHT_TIMEOUT)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"%s: credential preflight timed out after %ds — proceeding",
|
||||
tool_label,
|
||||
_START_PREFLIGHT_TIMEOUT,
|
||||
)
|
||||
return unavailable
|
||||
|
||||
# --- stop_worker -----------------------------------------------------------
|
||||
|
||||
async def stop_worker(*, reason: str = "Stopped by queen") -> str:
|
||||
@@ -1144,105 +1078,6 @@ def register_queen_lifecycle_tools(
|
||||
}
|
||||
)
|
||||
|
||||
# Credential preflight — mirrors the one run_agent_with_input
|
||||
# performs. Without this, missing credentials (e.g. stale
|
||||
# GITHUB_TOKEN) fail once PER spawned worker, yielding N
|
||||
# duplicate error reports for a single fixable issue. Catch
|
||||
# once upfront, build a filtered tool list, and pass it to
|
||||
# every spawn via tools_override.
|
||||
legacy_for_preflight = _get_runtime()
|
||||
unavailable_tools_parallel: set[str] = set()
|
||||
tools_override_parallel: list[Any] | None = None
|
||||
if legacy_for_preflight is not None:
|
||||
try:
|
||||
unavailable_tools_parallel = await _preflight_credentials(
|
||||
legacy_for_preflight, tool_label="run_parallel_workers"
|
||||
)
|
||||
except CredentialError as e:
|
||||
# Structured credential failure: publish the
|
||||
# CREDENTIALS_REQUIRED event so the frontend's modal
|
||||
# can fire, and return the same shape the single-path
|
||||
# tool returns on the same failure.
|
||||
error_payload = credential_errors_to_json(e)
|
||||
error_payload["agent_path"] = str(getattr(session, "worker_path", "") or "")
|
||||
bus = getattr(session, "event_bus", None)
|
||||
if bus is not None:
|
||||
await bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CREDENTIALS_REQUIRED,
|
||||
stream_id="queen",
|
||||
data=error_payload,
|
||||
)
|
||||
)
|
||||
return json.dumps(error_payload)
|
||||
|
||||
if unavailable_tools_parallel:
|
||||
colony_tools = list(getattr(colony, "_tools", []) or [])
|
||||
before = len(colony_tools)
|
||||
tools_override_parallel = [
|
||||
t
|
||||
for t in colony_tools
|
||||
if getattr(t, "name", None) not in unavailable_tools_parallel
|
||||
]
|
||||
logger.info(
|
||||
"run_parallel_workers: dropped %d tool object(s) from spawn_tools (unavailable credentials)",
|
||||
before - len(tools_override_parallel),
|
||||
)
|
||||
|
||||
# Colony progress tracker wiring: if the session's loaded
|
||||
# worker points at a colony directory that has a progress.db,
|
||||
# inject db_path + colony_id into every per-task ``data``
|
||||
# dict so each spawned worker sees them in its first user
|
||||
# message and can claim rows from the queue. ColonyRuntime.
|
||||
# spawn() detects db_path in input_data and pre-activates
|
||||
# hive.colony-progress-tracker into the catalog prompt.
|
||||
_colony_db_path: str | None = None
|
||||
_colony_id: str | None = None
|
||||
_worker_path = getattr(session, "worker_path", None)
|
||||
if _worker_path:
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_wp = _Path(_worker_path)
|
||||
_pdb = _wp / "data" / "progress.db"
|
||||
if _pdb.exists():
|
||||
_colony_db_path = str(_pdb.resolve())
|
||||
_colony_id = _wp.name
|
||||
|
||||
# Phase 2: enqueue each task into progress.db BEFORE building
|
||||
# spawn specs so every parallel worker has a pre-assigned row
|
||||
# to claim. Without this the queue stays empty and each
|
||||
# worker's claim UPDATE affects zero rows, silently falling
|
||||
# back to executing from its spawn message.
|
||||
_enqueued_task_ids: list[str | None] = [None] * len(tasks)
|
||||
if _colony_db_path:
|
||||
from pathlib import Path as _PathP
|
||||
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task_fn,
|
||||
)
|
||||
|
||||
_pdb_path_obj = _PathP(_colony_db_path)
|
||||
for _i, _spec in enumerate(tasks):
|
||||
if not isinstance(_spec, dict):
|
||||
continue
|
||||
_task_text_pre = str(_spec.get("task", "")).strip()
|
||||
if not _task_text_pre:
|
||||
continue
|
||||
try:
|
||||
_enqueued_task_ids[_i] = await asyncio.to_thread(
|
||||
_enqueue_task_fn,
|
||||
_pdb_path_obj,
|
||||
_task_text_pre,
|
||||
source="run_parallel_workers",
|
||||
)
|
||||
except Exception as _enqueue_exc:
|
||||
logger.warning(
|
||||
"run_parallel_workers: failed to enqueue tasks[%d] "
|
||||
"(spawn proceeding without pinned task_id): %s",
|
||||
_i,
|
||||
_enqueue_exc,
|
||||
)
|
||||
|
||||
# Normalise: each entry must have a non-empty "task" string.
|
||||
normalised: list[dict] = []
|
||||
for i, spec in enumerate(tasks):
|
||||
@@ -1251,58 +1086,18 @@ def register_queen_lifecycle_tools(
|
||||
task_text = str(spec.get("task", "")).strip()
|
||||
if not task_text:
|
||||
return json.dumps({"error": f"tasks[{i}].task is empty"})
|
||||
spec_data = spec.get("data") if isinstance(spec.get("data"), dict) else {}
|
||||
if _colony_db_path:
|
||||
spec_data = {
|
||||
**spec_data,
|
||||
"db_path": _colony_db_path,
|
||||
"colony_id": _colony_id,
|
||||
}
|
||||
if _enqueued_task_ids[i]:
|
||||
spec_data["task_id"] = _enqueued_task_ids[i]
|
||||
normalised.append(
|
||||
{
|
||||
"task": task_text,
|
||||
"data": spec_data or None,
|
||||
"data": spec.get("data") if isinstance(spec.get("data"), dict) else None,
|
||||
}
|
||||
)
|
||||
|
||||
if _colony_db_path:
|
||||
_pinned = sum(1 for tid in _enqueued_task_ids if tid)
|
||||
logger.info(
|
||||
"run_parallel_workers: attached progress_db context to "
|
||||
"%d spawn(s) (colony_id=%s, %d pinned task_ids)",
|
||||
len(normalised),
|
||||
_colony_id,
|
||||
_pinned,
|
||||
)
|
||||
|
||||
try:
|
||||
worker_ids = await colony.spawn_batch(
|
||||
normalised,
|
||||
tools_override=tools_override_parallel,
|
||||
)
|
||||
worker_ids = await colony.spawn_batch(normalised)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"spawn_batch failed: {e}"})
|
||||
|
||||
# Phase transition — mirrors run_agent_with_input. With the
|
||||
# batch now spawned, the queen is semantically "running" until
|
||||
# wait_for_worker_reports returns, so phase-gated running
|
||||
# tools (inject_message, reply_to_worker, ...) should be
|
||||
# available. Without this change run_parallel_workers left
|
||||
# the queen in whatever phase she was in (typically staging).
|
||||
if phase_state is not None:
|
||||
try:
|
||||
await phase_state.switch_to_running()
|
||||
_update_meta_json(
|
||||
session_manager, manager_session_id, {"phase": "running"}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"run_parallel_workers: phase transition to 'running' failed (non-fatal): %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
try:
|
||||
reports = await colony.wait_for_worker_reports(
|
||||
worker_ids,
|
||||
@@ -1527,35 +1322,6 @@ def register_queen_lifecycle_tools(
|
||||
except OSError as e:
|
||||
return None, f"failed to install skill into {target}: {e}"
|
||||
|
||||
# Cleanup the source directory after a successful install so
|
||||
# the authored skill doesn't linger as debris in the agent
|
||||
# workspace (or — pre-sandbox-split — in the hive git
|
||||
# checkout). Only removes paths that are OUTSIDE
|
||||
# ``~/.hive/skills/`` so we never nuke the canonical install
|
||||
# target or user-owned skill dirs.
|
||||
try:
|
||||
src_resolved = src.resolve()
|
||||
skills_root_resolved = target_root.resolve()
|
||||
try:
|
||||
src_resolved.relative_to(skills_root_resolved)
|
||||
_under_skills_root = True
|
||||
except ValueError:
|
||||
_under_skills_root = False
|
||||
if not _under_skills_root:
|
||||
_shutil.rmtree(src_resolved)
|
||||
logger.info(
|
||||
"create_colony: cleaned up authored skill source at %s "
|
||||
"(installed to %s)",
|
||||
src_resolved,
|
||||
target,
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"create_colony: failed to clean up skill source at %s (non-fatal): %s",
|
||||
src,
|
||||
e,
|
||||
)
|
||||
|
||||
return target, None
|
||||
|
||||
async def create_colony(
|
||||
@@ -1563,7 +1329,6 @@ def register_queen_lifecycle_tools(
|
||||
colony_name: str,
|
||||
task: str,
|
||||
skill_path: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> str:
|
||||
"""Create a colony after installing a pre-authored skill folder.
|
||||
|
||||
@@ -1573,13 +1338,6 @@ def register_queen_lifecycle_tools(
|
||||
they're ready to start the worker — at that point the worker
|
||||
reads the task from ``worker.json`` and the skill from
|
||||
``~/.hive/skills/`` and starts informed.
|
||||
|
||||
When *tasks* is provided, each entry is seeded into the
|
||||
colony's ``progress.db`` task queue in a single transaction.
|
||||
Workers then claim rows from the queue using the
|
||||
``hive.colony-progress-tracker`` default skill. Each task dict
|
||||
accepts: ``goal`` (required), optional ``steps``,
|
||||
``sop_items``, ``priority``, ``payload``, ``parent_task_id``.
|
||||
"""
|
||||
if session is None:
|
||||
return json.dumps({"error": "No session bound to this tool registry."})
|
||||
@@ -1634,7 +1392,6 @@ def register_queen_lifecycle_tools(
|
||||
session=session,
|
||||
colony_name=cn,
|
||||
task=(task or "").strip(),
|
||||
tasks=tasks if isinstance(tasks, list) else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("create_colony: fork failed after installing skill")
|
||||
@@ -1687,8 +1444,6 @@ def register_queen_lifecycle_tools(
|
||||
"is_new": fork_result.get("is_new", True),
|
||||
"skill_installed": str(installed_skill),
|
||||
"skill_name": installed_skill.name if installed_skill else None,
|
||||
"db_path": fork_result.get("db_path"),
|
||||
"tasks_seeded": len(fork_result.get("task_ids") or []),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1786,57 +1541,6 @@ def register_queen_lifecycle_tools(
|
||||
"protocol'."
|
||||
),
|
||||
},
|
||||
"tasks": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional pre-seeded task queue for the colony. "
|
||||
"When the colony is a fan-out of many similar "
|
||||
"units of work (e.g. 'process record #1234', "
|
||||
"'scrape profile X'), pass them here as an "
|
||||
"array and workers will claim rows atomically "
|
||||
"from the SQLite queue using the "
|
||||
"hive.colony-progress-tracker skill. Each task "
|
||||
"needs a 'goal' string; optionally include "
|
||||
"'steps' (ordered subtasks), 'sop_items' "
|
||||
"(required checklist gates), 'priority' "
|
||||
"(higher runs first), and 'payload' "
|
||||
"(task-specific parameters). Can be hundreds "
|
||||
"or thousands of entries — the bulk insert "
|
||||
"runs in a single transaction."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"goal": {"type": "string"},
|
||||
"priority": {"type": "integer"},
|
||||
"payload": {},
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"detail": {"type": "string"},
|
||||
},
|
||||
"required": ["title"],
|
||||
},
|
||||
},
|
||||
"sop_items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"required": {"type": "boolean"},
|
||||
},
|
||||
"required": ["key", "description"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["goal"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["colony_name", "task", "skill_path"],
|
||||
},
|
||||
@@ -1848,158 +1552,6 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- enqueue_task ------------------------------------------------------------
|
||||
|
||||
async def enqueue_task_tool(
|
||||
*,
|
||||
colony_name: str,
|
||||
goal: str,
|
||||
steps: list[dict] | None = None,
|
||||
sop_items: list[dict] | None = None,
|
||||
payload: Any = None,
|
||||
priority: int = 0,
|
||||
parent_task_id: str | None = None,
|
||||
) -> str:
|
||||
"""Append a single task to an existing colony's progress.db queue.
|
||||
|
||||
Use this when the colony is already created and more work
|
||||
needs to be fanned out (webhook-driven, follow-up requests,
|
||||
worker-generated subtasks). The colony's workers pick it up
|
||||
on their next claim cycle.
|
||||
"""
|
||||
cn = (colony_name or "").strip()
|
||||
if not _COLONY_NAME_RE.match(cn):
|
||||
return json.dumps(
|
||||
{"error": "colony_name must be lowercase alphanumeric with underscores"}
|
||||
)
|
||||
|
||||
from pathlib import Path as _Path
|
||||
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task,
|
||||
ensure_progress_db as _ensure_db,
|
||||
)
|
||||
|
||||
colony_dir = _Path.home() / ".hive" / "colonies" / cn
|
||||
if not colony_dir.is_dir():
|
||||
return json.dumps({"error": f"colony '{cn}' not found"})
|
||||
|
||||
try:
|
||||
db_path = await asyncio.to_thread(_ensure_db, colony_dir)
|
||||
task_id = await asyncio.to_thread(
|
||||
_enqueue_task,
|
||||
db_path,
|
||||
goal,
|
||||
steps=steps,
|
||||
sop_items=sop_items,
|
||||
payload=payload,
|
||||
priority=priority,
|
||||
parent_task_id=parent_task_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("enqueue_task: failed to insert row")
|
||||
return json.dumps({"error": f"enqueue_task failed: {e}"})
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "enqueued",
|
||||
"colony_name": cn,
|
||||
"task_id": task_id,
|
||||
"db_path": str(db_path),
|
||||
}
|
||||
)
|
||||
|
||||
_enqueue_task_tool = Tool(
|
||||
name="enqueue_task",
|
||||
description=(
|
||||
"Append a single task to an existing colony's progress.db "
|
||||
"queue. Use this after create_colony when more work needs "
|
||||
"to be fanned out — e.g. a webhook fired, the user asked "
|
||||
"for a follow-up run, or a worker spawned a subtask. The "
|
||||
"colony's workers pick it up on their next claim cycle "
|
||||
"(atomic UPDATE … WHERE status='pending'). For bulk "
|
||||
"authoring at colony creation time, pass the 'tasks' "
|
||||
"array to create_colony instead."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"colony_name": {
|
||||
"type": "string",
|
||||
"description": "Target colony name (lowercase + underscores).",
|
||||
},
|
||||
"goal": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Human-readable task description. Self-contained — "
|
||||
"the worker has no context beyond this string plus "
|
||||
"any steps/sop_items/payload you attach."
|
||||
),
|
||||
},
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional ordered subtasks the worker should "
|
||||
"check off as it executes. Each step needs a "
|
||||
"'title'; optional 'detail' for longer "
|
||||
"instructions."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"detail": {"type": "string"},
|
||||
},
|
||||
"required": ["title"],
|
||||
},
|
||||
},
|
||||
"sop_items": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional hard-gate checklist items the worker "
|
||||
"MUST address before marking the task done. "
|
||||
"Each item needs a 'key' (slug) and "
|
||||
"'description'; 'required' defaults to true."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"required": {"type": "boolean"},
|
||||
},
|
||||
"required": ["key", "description"],
|
||||
},
|
||||
},
|
||||
"payload": {
|
||||
"description": (
|
||||
"Optional task-specific parameters. Stored as "
|
||||
"JSON in the 'payload' column."
|
||||
),
|
||||
},
|
||||
"priority": {
|
||||
"type": "integer",
|
||||
"description": "Higher values run first. Default 0.",
|
||||
},
|
||||
"parent_task_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional reference to an existing task this "
|
||||
"one was spawned from (audit only; no blocking "
|
||||
"dependency resolver today)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["colony_name", "goal"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"enqueue_task",
|
||||
_enqueue_task_tool,
|
||||
lambda inputs: enqueue_task_tool(**inputs),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- switch_to_reviewing ----------------------------------------------------
|
||||
|
||||
async def switch_to_reviewing_tool() -> str:
|
||||
@@ -3417,8 +2969,7 @@ def register_queen_lifecycle_tools(
|
||||
if preamble.get("pending_question"):
|
||||
result["pending_question"] = preamble["pending_question"]
|
||||
|
||||
_idle = runtime.agent_idle_seconds
|
||||
result["agent_idle_seconds"] = round(_idle, 1) if _idle != float("inf") else -1
|
||||
result["agent_idle_seconds"] = round(runtime.agent_idle_seconds, 1)
|
||||
|
||||
for key in ("current_node", "current_iteration"):
|
||||
if key in preamble:
|
||||
@@ -4162,33 +3713,6 @@ def register_queen_lifecycle_tools(
|
||||
task,
|
||||
)
|
||||
|
||||
# Concurrency budget check — mirrors run_parallel_workers so a
|
||||
# queen in a loop can't silently exceed max_concurrent_workers
|
||||
# by hammering run_agent_with_input. Per-call count is 1, so
|
||||
# the check is ``active + 1 > max_concurrent``.
|
||||
colony_cfg = getattr(colony, "_config", None) or getattr(colony, "config", None)
|
||||
max_concurrent = getattr(colony_cfg, "max_concurrent_workers", None)
|
||||
if max_concurrent and max_concurrent > 0:
|
||||
active = 0
|
||||
try:
|
||||
workers = getattr(colony, "_workers", {}) or {}
|
||||
for w in workers.values():
|
||||
handle = getattr(w, "_task_handle", None)
|
||||
if handle is not None and not handle.done():
|
||||
active += 1
|
||||
except Exception:
|
||||
active = 0
|
||||
if active + 1 > max_concurrent:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": (
|
||||
f"run_agent_with_input would exceed max_concurrent_workers "
|
||||
f"({active} active + 1 new > {max_concurrent}). "
|
||||
"Wait for an existing worker to finish or stop one."
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-flight: compute the set of tools whose credentials are
|
||||
# NOT currently available, and resync MCP servers. We do NOT
|
||||
@@ -4199,9 +3723,58 @@ def register_queen_lifecycle_tools(
|
||||
# to block the whole spawn with a CredentialError; the fix
|
||||
# is to treat unset credentials as "drop these tools" rather
|
||||
# than "abort the worker".
|
||||
unavailable_tools = await _preflight_credentials(
|
||||
legacy, tool_label="run_agent_with_input"
|
||||
)
|
||||
#
|
||||
# Note: the MCP admission gate (_build_mcp_admission_gate in
|
||||
# tool_registry.py) already filters MCP tools at registration
|
||||
# time. This preflight covers the non-MCP path — tools.py
|
||||
# discoveries via discover_from_module — which has no
|
||||
# credential gate of its own.
|
||||
loop = asyncio.get_running_loop()
|
||||
unavailable_tools: set[str] = set()
|
||||
|
||||
async def _preflight():
|
||||
nonlocal unavailable_tools
|
||||
try:
|
||||
from framework.credentials.validation import compute_unavailable_tools
|
||||
|
||||
drop, messages = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: compute_unavailable_tools(legacy.graph.nodes),
|
||||
)
|
||||
unavailable_tools = drop
|
||||
if drop:
|
||||
logger.warning(
|
||||
"run_agent_with_input: dropping %d tool(s) with "
|
||||
"unavailable credentials from worker spawn: %s",
|
||||
len(drop),
|
||||
"; ".join(messages),
|
||||
)
|
||||
except Exception as exc:
|
||||
# Validation itself failing (not a credential failure —
|
||||
# a code error in the validator) should not block the
|
||||
# spawn. Log and proceed as if nothing was dropped.
|
||||
logger.warning(
|
||||
"compute_unavailable_tools raised, proceeding without credential-based tool filtering: %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
if runner:
|
||||
try:
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("MCP resync failed: %s", e)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"run_agent_with_input preflight timed out after %ds — proceeding",
|
||||
_START_PREFLIGHT_TIMEOUT,
|
||||
)
|
||||
|
||||
# Build a per-spawn AgentSpec that mirrors the loaded
|
||||
# worker's entry-node identity. This is what makes the
|
||||
@@ -4275,66 +3848,10 @@ def register_queen_lifecycle_tools(
|
||||
dropped_count,
|
||||
)
|
||||
|
||||
# Colony progress tracker wiring: if the loaded worker
|
||||
# lives under ~/.hive/colonies/{name}/ and has a
|
||||
# progress.db, inject db_path + colony_id into input_data
|
||||
# so the spawned worker sees them in its first user
|
||||
# message and can use the hive.colony-progress-tracker
|
||||
# skill to claim tasks from the queue.
|
||||
_spawn_input_data: dict[str, Any] = {"user_request": task}
|
||||
_worker_path = getattr(session, "worker_path", None)
|
||||
if _worker_path:
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_worker_path_p = _Path(_worker_path)
|
||||
_progress_db = _worker_path_p / "data" / "progress.db"
|
||||
if _progress_db.exists():
|
||||
_spawn_input_data["db_path"] = str(_progress_db.resolve())
|
||||
_spawn_input_data["colony_id"] = _worker_path_p.name
|
||||
logger.info(
|
||||
"run_agent_with_input: attached progress_db context "
|
||||
"(colony_id=%s, db_path=%s)",
|
||||
_worker_path_p.name,
|
||||
_progress_db,
|
||||
)
|
||||
|
||||
# Phase 2: enqueue the task into progress.db BEFORE
|
||||
# spawning so the worker has a concrete row to
|
||||
# claim. Without this the queue is empty and the
|
||||
# worker's claim UPDATE affects zero rows, so it
|
||||
# silently falls back to executing from the chat
|
||||
# spawn message. Any enqueue failure is logged and
|
||||
# the spawn proceeds without a pinned task_id
|
||||
# (degrades to the pre-Phase-2 behavior).
|
||||
try:
|
||||
from framework.host.progress_db import (
|
||||
enqueue_task as _enqueue_task_fn,
|
||||
)
|
||||
|
||||
_task_id = await asyncio.to_thread(
|
||||
_enqueue_task_fn,
|
||||
_progress_db,
|
||||
task,
|
||||
source="run_agent_with_input",
|
||||
)
|
||||
_spawn_input_data["task_id"] = _task_id
|
||||
logger.info(
|
||||
"run_agent_with_input: enqueued task %s into %s",
|
||||
_task_id,
|
||||
_progress_db,
|
||||
)
|
||||
except Exception as _enqueue_exc:
|
||||
logger.warning(
|
||||
"run_agent_with_input: failed to enqueue task "
|
||||
"into progress.db (spawn proceeding without "
|
||||
"pinned task_id): %s",
|
||||
_enqueue_exc,
|
||||
)
|
||||
|
||||
worker_ids = await colony.spawn(
|
||||
task=task,
|
||||
count=1,
|
||||
input_data=_spawn_input_data,
|
||||
input_data={"user_request": task},
|
||||
agent_spec=spawn_spec,
|
||||
tools=spawn_tools,
|
||||
tool_executor=spawn_tool_executor,
|
||||
|
||||
@@ -87,25 +87,9 @@ export const sessionsApi = {
|
||||
colonies: (sessionId: string) =>
|
||||
api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),
|
||||
|
||||
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay).
|
||||
*
|
||||
* Returns the TAIL of the event log. Default limit 2000 (server
|
||||
* clamps to [1, 10000]); older events get dropped and
|
||||
* ``truncated: true`` is set so the UI can show an indicator.
|
||||
*/
|
||||
eventsHistory: (sessionId: string, limit?: number) =>
|
||||
api.get<{
|
||||
events: AgentEvent[];
|
||||
session_id: string;
|
||||
total: number;
|
||||
returned: number;
|
||||
truncated: boolean;
|
||||
limit: number;
|
||||
}>(
|
||||
`/sessions/${sessionId}/events/history${
|
||||
limit ? `?limit=${limit}` : ""
|
||||
}`,
|
||||
),
|
||||
/** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay). */
|
||||
eventsHistory: (sessionId: string) =>
|
||||
api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
|
||||
|
||||
/** Open the session's data folder in the OS file manager. */
|
||||
revealFolder: (sessionId: string) =>
|
||||
|
||||
@@ -31,15 +31,6 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
|
||||
const colonyId = colonyMatch[1];
|
||||
const colony = colonies.find((c) => c.id === colonyId);
|
||||
title = colony?.name ?? colonyId;
|
||||
// Show queen profile button when the colony has a linked queen profile
|
||||
if (colony?.queenProfileId) {
|
||||
const profile = queenProfiles.find((q) => q.id === colony.queenProfileId);
|
||||
if (profile) {
|
||||
queenIdForProfile = profile.id;
|
||||
queenTitle = profile.title ?? null;
|
||||
icon = <Crown className="w-4 h-4 text-primary" />;
|
||||
}
|
||||
}
|
||||
} else if (queenMatch) {
|
||||
const queenId = queenMatch[1];
|
||||
const profile = queenProfiles.find((q) => q.id === queenId);
|
||||
|
||||
@@ -10,8 +10,6 @@ import {
|
||||
Paperclip,
|
||||
X,
|
||||
} from "lucide-react";
|
||||
import WorkerRunBubble from "@/components/WorkerRunBubble";
|
||||
import type { WorkerRunGroup } from "@/components/WorkerRunBubble";
|
||||
|
||||
export interface ImageContent {
|
||||
type: "image_url";
|
||||
@@ -27,8 +25,6 @@ export interface ContextUsageEntry {
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
import QuestionWidget from "@/components/QuestionWidget";
|
||||
import MultiQuestionWidget from "@/components/MultiQuestionWidget";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useQueenProfile } from "@/context/QueenProfileContext";
|
||||
import ParallelSubagentBubble, {
|
||||
type SubagentGroup,
|
||||
} from "@/components/ParallelSubagentBubble";
|
||||
@@ -64,12 +60,6 @@ export interface ChatMessage {
|
||||
nodeId?: string;
|
||||
/** Backend execution_id for this message */
|
||||
executionId?: string;
|
||||
/** Backend stream_id — the per-worker identity used for grouping
|
||||
* parallel-spawn workers into their own stacked WorkerRunBubble.
|
||||
* "queen" for queen messages, "worker" for the single loaded
|
||||
* worker (run_agent_with_input), or "worker:{uuid}" for each
|
||||
* parallel worker spawned via run_parallel_workers. */
|
||||
streamId?: string;
|
||||
/** True when the message was sent while the queen was still processing */
|
||||
queued?: boolean;
|
||||
}
|
||||
@@ -134,14 +124,14 @@ const TOOL_HEX = [
|
||||
"#e5a820", // sunflower
|
||||
];
|
||||
|
||||
export function toolHex(name: string): string {
|
||||
function toolHex(name: string): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < name.length; i++)
|
||||
hash = (hash * 31 + name.charCodeAt(i)) | 0;
|
||||
return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
|
||||
}
|
||||
|
||||
export function ToolActivityRow({ content }: { content: string }) {
|
||||
function ToolActivityRow({ content }: { content: string }) {
|
||||
let tools: { name: string; done: boolean }[] = [];
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
@@ -346,15 +336,6 @@ function InlineAskUserBubble({
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
const thread = msg.thread || activeThread;
|
||||
|
||||
const { queenProfiles } = useColony();
|
||||
const { openQueenProfile } = useQueenProfile();
|
||||
const queenProfileId = isQueen
|
||||
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
|
||||
: null;
|
||||
const handleQueenClick = queenProfileId
|
||||
? () => openQueenProfile(queenProfileId)
|
||||
: undefined;
|
||||
|
||||
const handleSingle = (answer: string) => {
|
||||
setState("submitted");
|
||||
onSend(answer, thread);
|
||||
@@ -374,14 +355,12 @@ function InlineAskUserBubble({
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
onClick={handleQueenClick}
|
||||
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
@@ -394,9 +373,8 @@ function InlineAskUserBubble({
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
style={{ color }}
|
||||
onClick={handleQueenClick}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
@@ -457,13 +435,6 @@ const MessageBubble = memo(
|
||||
const isQueen = msg.role === "queen";
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
|
||||
// Resolve queen profile ID so clicking avatar/name opens the profile panel
|
||||
const { queenProfiles } = useColony();
|
||||
const { openQueenProfile } = useQueenProfile();
|
||||
const queenProfileId = isQueen
|
||||
? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
|
||||
: null;
|
||||
|
||||
if (msg.type === "run_divider") {
|
||||
return (
|
||||
<div className="flex items-center gap-3 py-2 my-1">
|
||||
@@ -558,21 +529,15 @@ const MessageBubble = memo(
|
||||
);
|
||||
}
|
||||
|
||||
const handleQueenClick = queenProfileId
|
||||
? () => openQueenProfile(queenProfileId)
|
||||
: undefined;
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
onClick={handleQueenClick}
|
||||
title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
@@ -585,9 +550,8 @@ const MessageBubble = memo(
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
style={{ color }}
|
||||
onClick={handleQueenClick}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
@@ -701,157 +665,14 @@ export default function ChatPanel({
|
||||
type RenderItem =
|
||||
| { kind: "message"; msg: ChatMessage }
|
||||
| { kind: "parallel"; groupId: string; groups: SubagentGroup[] }
|
||||
| {
|
||||
kind: "worker_run";
|
||||
runId: string;
|
||||
group: WorkerRunGroup;
|
||||
/** Optional short label shown next to the "Worker" badge.
|
||||
* Only set when there are multiple parallel workers in the
|
||||
* same run span (so users can tell them apart). */
|
||||
label?: string;
|
||||
}
|
||||
| { kind: "day_divider"; key: string; createdAt: number };
|
||||
|
||||
/** Derive a short label from a parallel-worker stream id.
|
||||
* `worker:abcdef12-3456-...` → `abcdef12` (first 8 chars of the
|
||||
* uuid after the `worker:` prefix). Falls back to the first
|
||||
* message's nodeId when the streamId isn't the expected shape. */
|
||||
function deriveWorkerLabel(
|
||||
streamKey: string,
|
||||
msgs: ChatMessage[],
|
||||
): string {
|
||||
if (streamKey.startsWith("worker:")) {
|
||||
const suffix = streamKey.slice("worker:".length);
|
||||
// sessions are `session_YYYYMMDD_HHMMSS_<8-hex>` — show the
|
||||
// trailing hex if present, else first 8 chars of the suffix.
|
||||
const tail = suffix.match(/_[0-9a-f]{6,}$/i)?.[0]?.slice(1);
|
||||
return tail ? tail.slice(0, 8) : suffix.slice(0, 8);
|
||||
}
|
||||
const nid = msgs.find((m) => m.nodeId)?.nodeId;
|
||||
return nid || streamKey;
|
||||
}
|
||||
|
||||
const renderItems = useMemo<RenderItem[]>(() => {
|
||||
const items: RenderItem[] = [];
|
||||
let i = 0;
|
||||
while (i < threadMessages.length) {
|
||||
const msg = threadMessages[i];
|
||||
const isSubagent = msg.nodeId?.includes(":subagent:");
|
||||
|
||||
// Worker run grouping: collect consecutive WORKER-role
|
||||
// messages (and worker tool_status pills) into a collapsible
|
||||
// card. Queen tool_status pills (``role === "queen"``) are
|
||||
// deliberately excluded — the queen's own tool calls are part
|
||||
// of the queen↔user conversation and should render inline as
|
||||
// ToolActivityRows, not fold into a "Worker" bubble. Without
|
||||
// this guard, every queen run_command / read_file / etc. shows
|
||||
// up under a misleading "Worker" label in the DM.
|
||||
const isWorkerCandidate =
|
||||
msg.role === "worker" ||
|
||||
(msg.type === "tool_status" && msg.role !== "queen");
|
||||
if (
|
||||
!isSubagent &&
|
||||
isWorkerCandidate &&
|
||||
msg.type !== "user" &&
|
||||
msg.type !== "run_divider"
|
||||
) {
|
||||
const workerMsgs: ChatMessage[] = [];
|
||||
const firstWorkerMsg = msg;
|
||||
|
||||
while (i < threadMessages.length) {
|
||||
const m = threadMessages[i];
|
||||
|
||||
// Hard boundary — stop the worker run group
|
||||
if (m.type === "user" || m.type === "run_divider") break;
|
||||
// Queen message with real text — boundary (queen is talking
|
||||
// to the user, not just emitting a tool)
|
||||
if (m.role === "queen" && m.content?.trim() && !m.type) break;
|
||||
// Queen tool_status — NOT a worker activity, don't bucket
|
||||
// it. Break so the grouping stops and the queen pill
|
||||
// renders inline.
|
||||
if (m.type === "tool_status" && m.role === "queen") break;
|
||||
// Subagent message — different group type, stop here
|
||||
if (m.nodeId?.includes(":subagent:")) break;
|
||||
|
||||
// Worker text messages and worker tool_status belong to the run
|
||||
if (
|
||||
m.role === "worker" ||
|
||||
(m.type === "tool_status" && m.role !== "queen")
|
||||
) {
|
||||
workerMsgs.push(m);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// System message or other — include in the worker run
|
||||
// group to preserve ordering (they'll render inside the
|
||||
// expanded view)
|
||||
workerMsgs.push(m);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (workerMsgs.length > 0) {
|
||||
// Parallel fan-out detection: if any message in this span
|
||||
// is tagged with a parallel-worker streamId (``worker:{uuid}``),
|
||||
// split the span by streamId and emit one ``worker_run``
|
||||
// per worker — they render as stacked independent
|
||||
// ``WorkerRunBubble``s. Un-tagged legacy messages and the
|
||||
// single-worker ``streamId="worker"`` case fall through to
|
||||
// the existing single-bubble behavior.
|
||||
const hasParallel = workerMsgs.some(
|
||||
(m) => !!m.streamId && /^worker:./.test(m.streamId),
|
||||
);
|
||||
|
||||
if (hasParallel) {
|
||||
const buckets = new Map<
|
||||
string,
|
||||
{ messages: ChatMessage[]; firstAt: number }
|
||||
>();
|
||||
// Messages with no streamId (system notes, orphans from
|
||||
// old restore) attach to the most-recent keyed message's
|
||||
// bucket so chronology is preserved.
|
||||
let currentKey: string | null = null;
|
||||
for (const m of workerMsgs) {
|
||||
const key =
|
||||
m.streamId && m.streamId.length > 0
|
||||
? m.streamId
|
||||
: currentKey;
|
||||
if (!key) continue;
|
||||
if (m.streamId && m.streamId.length > 0) currentKey = m.streamId;
|
||||
let bucket = buckets.get(key);
|
||||
if (!bucket) {
|
||||
bucket = { messages: [], firstAt: m.createdAt ?? 0 };
|
||||
buckets.set(key, bucket);
|
||||
}
|
||||
bucket.messages.push(m);
|
||||
bucket.firstAt = Math.min(
|
||||
bucket.firstAt,
|
||||
m.createdAt ?? Number.POSITIVE_INFINITY,
|
||||
);
|
||||
}
|
||||
|
||||
const sorted = Array.from(buckets.entries()).sort(
|
||||
([, a], [, b]) => a.firstAt - b.firstAt,
|
||||
);
|
||||
for (const [streamKey, { messages: bucketMsgs }] of sorted) {
|
||||
items.push({
|
||||
kind: "worker_run",
|
||||
runId: `wrun-${firstWorkerMsg.id}-${streamKey}`,
|
||||
group: { messages: bucketMsgs },
|
||||
label: deriveWorkerLabel(streamKey, bucketMsgs),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
items.push({
|
||||
kind: "worker_run",
|
||||
runId: `wrun-${firstWorkerMsg.id}`,
|
||||
group: { messages: workerMsgs },
|
||||
});
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isSubagent) {
|
||||
items.push({ kind: "message", msg });
|
||||
i++;
|
||||
@@ -1051,17 +872,6 @@ export default function ChatPanel({
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (item.kind === "worker_run") {
|
||||
return (
|
||||
<div key={item.runId}>
|
||||
<WorkerRunBubble
|
||||
runId={item.runId}
|
||||
group={item.group}
|
||||
label={item.label}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
const msg = item.msg;
|
||||
// Detect misformatted ask_user payloads emitted as plain text and
|
||||
// substitute the nicer widget-based bubble. Only inspect regular
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState, useEffect, useCallback, useRef } from "react";
|
||||
import { useState, useEffect } from "react";
|
||||
import { NavLink, useLocation, useNavigate } from "react-router-dom";
|
||||
import {
|
||||
X,
|
||||
@@ -46,49 +46,8 @@ export default function QueenProfilePanel({
|
||||
const name = profile?.name ?? summary?.name ?? "Queen";
|
||||
const title = profile?.title ?? summary?.title ?? "";
|
||||
|
||||
// ── Resizable width ──────────────────────────────────────────────────
|
||||
const MIN_WIDTH = 280;
|
||||
const MAX_WIDTH = 600;
|
||||
const [width, setWidth] = useState(340);
|
||||
const dragging = useRef(false);
|
||||
const startX = useRef(0);
|
||||
const startWidth = useRef(0);
|
||||
|
||||
const onDragStart = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
dragging.current = true;
|
||||
startX.current = e.clientX;
|
||||
startWidth.current = width;
|
||||
|
||||
const onMove = (ev: MouseEvent) => {
|
||||
if (!dragging.current) return;
|
||||
// Panel is on the right, so dragging left (negative delta) grows it
|
||||
const delta = startX.current - ev.clientX;
|
||||
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
|
||||
};
|
||||
const onUp = () => {
|
||||
dragging.current = false;
|
||||
document.removeEventListener("mousemove", onMove);
|
||||
document.removeEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "";
|
||||
document.body.style.userSelect = "";
|
||||
};
|
||||
document.addEventListener("mousemove", onMove);
|
||||
document.addEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "col-resize";
|
||||
document.body.style.userSelect = "none";
|
||||
}, [width]);
|
||||
|
||||
return (
|
||||
<aside
|
||||
className="flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto relative"
|
||||
style={{ width }}
|
||||
>
|
||||
{/* Drag handle */}
|
||||
<div
|
||||
onMouseDown={onDragStart}
|
||||
className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
|
||||
/>
|
||||
<aside className="w-[340px] flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-5 py-3.5 border-b border-border/60">
|
||||
<div className="flex items-center gap-2 text-sm font-semibold text-foreground">
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState, useCallback, useRef } from "react";
|
||||
import { useState } from "react";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import {
|
||||
ChevronLeft,
|
||||
@@ -19,38 +19,6 @@ export default function Sidebar() {
|
||||
const [coloniesExpanded, setColoniesExpanded] = useState(true);
|
||||
const [queensExpanded, setQueensExpanded] = useState(true);
|
||||
|
||||
// ── Resizable width ──────────────────────────────────────────────────
|
||||
const MIN_WIDTH = 180;
|
||||
const MAX_WIDTH = 400;
|
||||
const [width, setWidth] = useState(240);
|
||||
const dragging = useRef(false);
|
||||
const startX = useRef(0);
|
||||
const startWidth = useRef(0);
|
||||
|
||||
const onDragStart = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
dragging.current = true;
|
||||
startX.current = e.clientX;
|
||||
startWidth.current = width;
|
||||
|
||||
const onMove = (ev: MouseEvent) => {
|
||||
if (!dragging.current) return;
|
||||
const delta = ev.clientX - startX.current;
|
||||
setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
|
||||
};
|
||||
const onUp = () => {
|
||||
dragging.current = false;
|
||||
document.removeEventListener("mousemove", onMove);
|
||||
document.removeEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "";
|
||||
document.body.style.userSelect = "";
|
||||
};
|
||||
document.addEventListener("mousemove", onMove);
|
||||
document.addEventListener("mouseup", onUp);
|
||||
document.body.style.cursor = "col-resize";
|
||||
document.body.style.userSelect = "none";
|
||||
}, [width]);
|
||||
|
||||
if (sidebarCollapsed) {
|
||||
return (
|
||||
<aside className="w-[52px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
|
||||
@@ -79,15 +47,7 @@ export default function Sidebar() {
|
||||
}
|
||||
|
||||
return (
|
||||
<aside
|
||||
className="flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full relative"
|
||||
style={{ width }}
|
||||
>
|
||||
{/* Drag handle on right edge */}
|
||||
<div
|
||||
onMouseDown={onDragStart}
|
||||
className="absolute top-0 right-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
|
||||
/>
|
||||
<aside className="w-[240px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
|
||||
{/* Header */}
|
||||
<div className="h-12 flex items-center justify-between px-4 border-b border-border/60">
|
||||
<button
|
||||
|
||||
@@ -1,297 +0,0 @@
|
||||
import { memo, useState, useRef, useEffect } from "react";
|
||||
import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
|
||||
import type { ChatMessage } from "@/components/ChatPanel";
|
||||
import { ToolActivityRow } from "@/components/ChatPanel";
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
|
||||
const workerColor = "hsl(220,60%,55%)";
|
||||
|
||||
export interface WorkerRunGroup {
|
||||
messages: ChatMessage[];
|
||||
}
|
||||
|
||||
interface WorkerRunBubbleProps {
|
||||
runId: string;
|
||||
group: WorkerRunGroup;
|
||||
/** Short identifier shown next to the "Worker" badge. Populated
|
||||
* only when the parent grouping has multiple parallel workers
|
||||
* in the same run span, so N stacked bubbles can be told apart
|
||||
* at a glance. Omitted for single-worker runs. */
|
||||
label?: string;
|
||||
}
|
||||
|
||||
/** Parse a tool_status JSON blob into a list of tool entries. */
|
||||
function parseToolStatus(content: string): { name: string; done: boolean }[] {
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
return parsed.tools || [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip markdown formatting so the collapsed preview is a single
|
||||
* readable line instead of a scatter of code pills.
|
||||
*
|
||||
* MarkdownContent turns every backtick-wrapped fragment into its own
|
||||
* visually-boxed inline-code pill. In a worker text message those
|
||||
* pills can be coordinates, UUIDs, selectors, tool names — the
|
||||
* collapsed preview ends up looking like confetti. We just want the
|
||||
* plain prose, one line, truncated.
|
||||
*/
|
||||
function stripMarkdownToPreview(s: string, maxLen = 160): string {
|
||||
const cleaned = s
|
||||
.replace(/```[\s\S]*?```/g, " [code] ") // fenced code blocks
|
||||
.replace(/`([^`]+)`/g, "$1") // inline code — keep the text, drop the backticks
|
||||
.replace(/\*\*([^*]+)\*\*/g, "$1") // bold
|
||||
.replace(/\*([^*]+)\*/g, "$1") // italic
|
||||
.replace(/~~([^~]+)~~/g, "$1") // strikethrough
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links -> link text
|
||||
.replace(/^#{1,6}\s+/gm, "") // ATX headers
|
||||
.replace(/^[>\-*+]\s+/gm, "") // blockquote/list markers
|
||||
.replace(/\s+/g, " ") // collapse whitespace
|
||||
.trim();
|
||||
if (cleaned.length <= maxLen) return cleaned;
|
||||
return cleaned.slice(0, maxLen - 1).trimEnd() + "\u2026";
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapsible card that groups all worker messages from a single run
|
||||
* (the span between the queen's `run_agent_with_input` call and the
|
||||
* worker's final `set_output`/`escalate`/idle).
|
||||
*
|
||||
* Collapsed (default): header bar with tool count + latest text snippet.
|
||||
* Expanded: scrollable list of every message and tool status in order.
|
||||
*/
|
||||
const WorkerRunBubble = memo(
|
||||
function WorkerRunBubble({ group, label }: WorkerRunBubbleProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const bodyRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Separate text messages from tool status
|
||||
const textMsgs = group.messages.filter(
|
||||
(m) => m.type !== "tool_status" && m.content?.trim()
|
||||
);
|
||||
const toolStatusMsgs = group.messages.filter(
|
||||
(m) => m.type === "tool_status"
|
||||
);
|
||||
|
||||
// Count total tool calls from tool_status messages
|
||||
const allTools: { name: string; done: boolean }[] = [];
|
||||
for (const m of toolStatusMsgs) {
|
||||
for (const t of parseToolStatus(m.content)) {
|
||||
allTools.push(t);
|
||||
}
|
||||
}
|
||||
const toolCount = allTools.length;
|
||||
const doneCount = allTools.filter((t) => t.done).length;
|
||||
const isFinished = toolCount > 0 && doneCount === toolCount;
|
||||
|
||||
// Latest text from the worker (the last non-empty text message)
|
||||
const latestText = textMsgs.length > 0
|
||||
? textMsgs[textMsgs.length - 1].content
|
||||
: "";
|
||||
|
||||
// Status label. We prefer concrete states over the vague
|
||||
// "starting" fallback — if the worker has emitted any text or
|
||||
// any tool, it's past the startup phase.
|
||||
const statusLabel = isFinished
|
||||
? "done"
|
||||
: toolCount > 0
|
||||
? "running"
|
||||
: textMsgs.length > 0
|
||||
? "active"
|
||||
: "starting";
|
||||
|
||||
// Unique tool names for the summary (deduplicated, ordered by first appearance)
|
||||
const uniqueToolNames: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const t of allTools) {
|
||||
if (!seen.has(t.name)) {
|
||||
seen.add(t.name);
|
||||
uniqueToolNames.push(t.name);
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-scroll body when expanded
|
||||
useEffect(() => {
|
||||
if (expanded && bodyRef.current) {
|
||||
bodyRef.current.scrollTop = bodyRef.current.scrollHeight;
|
||||
}
|
||||
}, [expanded, group.messages.length]);
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
{/* Left icon */}
|
||||
<div
|
||||
className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1"
|
||||
style={{
|
||||
backgroundColor: `${workerColor}18`,
|
||||
border: `1.5px solid ${workerColor}35`,
|
||||
}}
|
||||
>
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
|
||||
</div>
|
||||
|
||||
<div className="flex-1 min-w-0 max-w-[90%]">
|
||||
{/* Clickable header */}
|
||||
<button
|
||||
onClick={() => setExpanded((v) => !v)}
|
||||
className="w-full flex items-center gap-2 mb-1 text-left cursor-pointer group"
|
||||
>
|
||||
<span className="font-medium text-xs" style={{ color: workerColor }}>
|
||||
Worker
|
||||
</span>
|
||||
{label && (
|
||||
<span className="text-[10px] font-mono text-muted-foreground/80 tabular-nums">
|
||||
{label}
|
||||
</span>
|
||||
)}
|
||||
<span
|
||||
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
|
||||
isFinished
|
||||
? "bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400"
|
||||
: "bg-muted text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{statusLabel}
|
||||
</span>
|
||||
{toolCount > 0 && (
|
||||
<span className="text-[10px] text-muted-foreground tabular-nums">
|
||||
{doneCount}/{toolCount} tools
|
||||
</span>
|
||||
)}
|
||||
<span className="ml-auto text-muted-foreground/60 group-hover:text-muted-foreground transition-colors p-0.5 rounded">
|
||||
{expanded ? (
|
||||
<ChevronUp className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<ChevronDown className="w-3.5 h-3.5" />
|
||||
)}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
{/* Card body — use Tailwind theme tokens so dark mode
|
||||
gets a proper dark background instead of a glaring
|
||||
near-white hardcoded hsl. Finished runs get a subtle
|
||||
green tint that also respects theme. */}
|
||||
<div
|
||||
className={`rounded-2xl rounded-tl-md overflow-hidden border ${
|
||||
isFinished
|
||||
? "border-green-300/50 bg-green-50/50 dark:border-green-900/40 dark:bg-green-950/20"
|
||||
: "border-border bg-muted/60"
|
||||
}`}
|
||||
>
|
||||
{/* Collapsed: single-line plain-text preview of the
|
||||
latest worker text, OR a tool-name chain when the
|
||||
worker hasn't emitted any prose yet. MarkdownContent
|
||||
is intentionally NOT used here — its inline-code
|
||||
rendering turns every backtick-wrapped fragment into
|
||||
a floating pill, which wrecks the preview. */}
|
||||
{!expanded && (
|
||||
<div className="px-4 py-2.5 text-sm text-muted-foreground">
|
||||
{latestText ? (
|
||||
<div className="truncate">
|
||||
{stripMarkdownToPreview(latestText)}
|
||||
</div>
|
||||
) : uniqueToolNames.length > 0 ? (
|
||||
<span className="text-xs font-mono truncate block">
|
||||
{uniqueToolNames.slice(0, 5).join(" \u2192 ")}
|
||||
{uniqueToolNames.length > 5 &&
|
||||
` + ${uniqueToolNames.length - 5} more`}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-xs text-muted-foreground/60 italic">
|
||||
{"waiting for first action\u2026"}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Expanded: chronological stream with tool bursts
|
||||
coalesced into a single ToolActivityRow each.
|
||||
Consecutive tool_status messages (no text between)
|
||||
collapse to the LATEST snapshot — each snapshot is
|
||||
cumulative within its turn, so the latest one tells
|
||||
the whole story for that burst. Text messages break
|
||||
the burst and render as markdown. */}
|
||||
{expanded && (
|
||||
<div
|
||||
ref={bodyRef}
|
||||
className="max-h-[400px] overflow-y-auto px-4 py-3 space-y-3"
|
||||
>
|
||||
{(() => {
|
||||
type RenderRow =
|
||||
| { kind: "tools"; content: string; key: string }
|
||||
| { kind: "text"; msg: ChatMessage; key: string };
|
||||
const rows: RenderRow[] = [];
|
||||
let pendingTool: { content: string; id: string } | null = null;
|
||||
const flushTool = () => {
|
||||
if (pendingTool) {
|
||||
rows.push({
|
||||
kind: "tools",
|
||||
content: pendingTool.content,
|
||||
key: `tools-${pendingTool.id}`,
|
||||
});
|
||||
pendingTool = null;
|
||||
}
|
||||
};
|
||||
for (let i = 0; i < group.messages.length; i++) {
|
||||
const m = group.messages[i];
|
||||
if (m.type === "tool_status") {
|
||||
// Overwrite — latest snapshot in the burst wins
|
||||
pendingTool = {
|
||||
content: m.content,
|
||||
id: m.id || `ts-${i}`,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
if (m.content?.trim()) {
|
||||
flushTool();
|
||||
rows.push({
|
||||
kind: "text",
|
||||
msg: m,
|
||||
key: m.id || `txt-${i}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
flushTool();
|
||||
|
||||
return rows.map((row) => {
|
||||
if (row.kind === "tools") {
|
||||
// ToolActivityRow groups by tool name (×N), shows
|
||||
// running pills (spinner) before done pills (check),
|
||||
// and uses the per-tool color hash that matches
|
||||
// the rest of the chat.
|
||||
return (
|
||||
<div key={row.key} className="-ml-10">
|
||||
<ToolActivityRow content={row.content} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div
|
||||
key={row.key}
|
||||
className="text-sm leading-relaxed"
|
||||
>
|
||||
<MarkdownContent content={row.msg.content} />
|
||||
</div>
|
||||
);
|
||||
});
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
(prev, next) =>
|
||||
prev.runId === next.runId &&
|
||||
prev.label === next.label &&
|
||||
prev.group.messages.length === next.group.messages.length &&
|
||||
prev.group.messages[prev.group.messages.length - 1]?.content ===
|
||||
next.group.messages[next.group.messages.length - 1]?.content
|
||||
);
|
||||
|
||||
export default WorkerRunBubble;
|
||||
@@ -1,31 +0,0 @@
|
||||
import { createContext, useContext, useCallback, type ReactNode } from "react";
|
||||
|
||||
interface QueenProfileContextValue {
|
||||
openQueenProfile: (queenId: string) => void;
|
||||
}
|
||||
|
||||
const QueenProfileContext = createContext<QueenProfileContextValue | null>(null);
|
||||
|
||||
export function QueenProfileProvider({
|
||||
onOpen,
|
||||
children,
|
||||
}: {
|
||||
onOpen: (queenId: string) => void;
|
||||
children: ReactNode;
|
||||
}) {
|
||||
const openQueenProfile = useCallback(
|
||||
(queenId: string) => onOpen(queenId),
|
||||
[onOpen],
|
||||
);
|
||||
return (
|
||||
<QueenProfileContext.Provider value={{ openQueenProfile }}>
|
||||
{children}
|
||||
</QueenProfileContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useQueenProfile() {
|
||||
const ctx = useContext(QueenProfileContext);
|
||||
if (!ctx) throw new Error("useQueenProfile must be used within QueenProfileProvider");
|
||||
return ctx;
|
||||
}
|
||||
@@ -1,11 +1,10 @@
|
||||
import { useEffect, useState, useCallback } from "react";
|
||||
import { useEffect, useState } from "react";
|
||||
import { Outlet, useLocation } from "react-router-dom";
|
||||
import Sidebar from "@/components/Sidebar";
|
||||
import AppHeader from "@/components/AppHeader";
|
||||
import QueenProfilePanel from "@/components/QueenProfilePanel";
|
||||
import { ColonyProvider, useColony } from "@/context/ColonyContext";
|
||||
import { HeaderActionsProvider } from "@/context/HeaderActionsContext";
|
||||
import { QueenProfileProvider } from "@/context/QueenProfileContext";
|
||||
|
||||
export default function AppLayout() {
|
||||
return (
|
||||
@@ -28,33 +27,26 @@ function AppLayoutInner() {
|
||||
setOpenQueenId(null);
|
||||
}, [location.pathname]);
|
||||
|
||||
const handleOpenQueenProfile = useCallback(
|
||||
(queenId: string) => setOpenQueenId((prev) => (prev === queenId ? null : queenId)),
|
||||
[],
|
||||
);
|
||||
|
||||
return (
|
||||
<QueenProfileProvider onOpen={handleOpenQueenProfile}>
|
||||
<div className="flex h-screen bg-background overflow-hidden">
|
||||
<Sidebar />
|
||||
<div className="flex-1 min-w-0 flex flex-col">
|
||||
<AppHeader onOpenQueenProfile={handleOpenQueenProfile} />
|
||||
<div className="flex-1 min-h-0 flex">
|
||||
<main className="flex-1 min-w-0 flex flex-col">
|
||||
<Outlet />
|
||||
</main>
|
||||
{openQueenId && (
|
||||
<QueenProfilePanel
|
||||
queenId={openQueenId}
|
||||
colonies={colonies.filter(
|
||||
(c) => c.queenProfileId === openQueenId,
|
||||
)}
|
||||
onClose={() => setOpenQueenId(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex h-screen bg-background overflow-hidden">
|
||||
<Sidebar />
|
||||
<div className="flex-1 min-w-0 flex flex-col">
|
||||
<AppHeader onOpenQueenProfile={setOpenQueenId} />
|
||||
<div className="flex-1 min-h-0 flex">
|
||||
<main className="flex-1 min-w-0 flex flex-col">
|
||||
<Outlet />
|
||||
</main>
|
||||
{openQueenId && (
|
||||
<QueenProfilePanel
|
||||
queenId={openQueenId}
|
||||
colonies={colonies.filter(
|
||||
(c) => c.queenProfileId === openQueenId,
|
||||
)}
|
||||
onClose={() => setOpenQueenId(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</QueenProfileProvider>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -119,7 +119,6 @@ export function sseEventToChatMessage(
|
||||
createdAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -139,7 +138,6 @@ export function sseEventToChatMessage(
|
||||
type: "user",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -160,7 +158,6 @@ export function sseEventToChatMessage(
|
||||
createdAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -175,7 +172,6 @@ export function sseEventToChatMessage(
|
||||
type: "system",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -190,7 +186,6 @@ export function sseEventToChatMessage(
|
||||
type: "system",
|
||||
thread,
|
||||
createdAt,
|
||||
streamId: event.stream_id || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -199,188 +194,6 @@ export function sseEventToChatMessage(
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stateful event replay — produces tool_status pills + regular messages
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* State maintained while replaying an event stream. Tracks per-stream turn
|
||||
* counters, the set of active tool calls (so tool_status pill content
|
||||
* reflects "tool A done, tool B running" correctly), and a tool_use_id →
|
||||
* pill_msg_id map so deferred `tool_call_completed` events can find the
|
||||
* pill they belong to after the turn counter moves on.
|
||||
*/
|
||||
export interface ReplayState {
|
||||
turnCounters: Record<string, number>;
|
||||
activeToolCalls: Record<
|
||||
string,
|
||||
{ name: string; done: boolean; streamId: string }
|
||||
>;
|
||||
toolUseToPill: Record<string, { msgId: string; name: string }>;
|
||||
}
|
||||
|
||||
export function newReplayState(): ReplayState {
|
||||
return { turnCounters: {}, activeToolCalls: {}, toolUseToPill: {} };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single event and emit zero or more ChatMessage upserts.
|
||||
*
|
||||
* Why this exists: `sseEventToChatMessage` is stateless — one event in, at
|
||||
* most one message out. But the chat's tool_status pill is a SYNTHESIZED
|
||||
* message: each tool_call_started adds to an accumulating pill, and each
|
||||
* tool_call_completed flips one of its tools from running to done. Live
|
||||
* SSE handlers in colony-chat and queen-dm already do this synthesis
|
||||
* against React refs. Cold-restore from events.jsonl used to skip
|
||||
* tool_call_* events entirely, so refreshed sessions looked completely
|
||||
* different from live ones — no tool activity visible, just prose.
|
||||
*
|
||||
* This function centralizes the synthesis so cold-restore and live paths
|
||||
* can use the exact same state machine. The caller treats the returned
|
||||
* messages as upserts (by id) — a later event in the same replay may
|
||||
* emit the same pill id with updated content, which should REPLACE the
|
||||
* earlier row in the caller's message list.
|
||||
*/
|
||||
export function replayEvent(
|
||||
state: ReplayState,
|
||||
event: AgentEvent,
|
||||
thread: string,
|
||||
agentDisplayName: string | undefined,
|
||||
): ChatMessage[] {
|
||||
const streamId = event.stream_id;
|
||||
const isQueen = streamId === "queen";
|
||||
const role: "queen" | "worker" = isQueen ? "queen" : "worker";
|
||||
const turnKey = streamId;
|
||||
const currentTurn = state.turnCounters[turnKey] ?? 0;
|
||||
const eventCreatedAt = event.timestamp
|
||||
? new Date(event.timestamp).getTime()
|
||||
: Date.now();
|
||||
|
||||
const out: ChatMessage[] = [];
|
||||
|
||||
// Update state machine BEFORE the generic converter runs so the
|
||||
// regular message emitted for this event sees the post-update
|
||||
// counter (matches live handler ordering at colony-chat.tsx:525).
|
||||
switch (event.type) {
|
||||
case "execution_started":
|
||||
state.turnCounters[turnKey] = currentTurn + 1;
|
||||
// New execution for a worker resets its active tools, mirroring
|
||||
// the live handler's setAgentState at colony-chat.tsx:566.
|
||||
if (!isQueen) {
|
||||
const keepActive: typeof state.activeToolCalls = {};
|
||||
for (const [k, v] of Object.entries(state.activeToolCalls)) {
|
||||
if (v.streamId !== streamId) keepActive[k] = v;
|
||||
}
|
||||
state.activeToolCalls = keepActive;
|
||||
}
|
||||
break;
|
||||
case "llm_turn_complete":
|
||||
state.turnCounters[turnKey] = currentTurn + 1;
|
||||
break;
|
||||
case "tool_call_started": {
|
||||
if (!event.node_id) break;
|
||||
const toolName = (event.data?.tool_name as string) || "unknown";
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
state.activeToolCalls[toolUseId] = {
|
||||
name: toolName,
|
||||
done: false,
|
||||
streamId,
|
||||
};
|
||||
const pillId = `tool-pill-${streamId}-${event.execution_id || "exec"}-${currentTurn}`;
|
||||
if (toolUseId) {
|
||||
state.toolUseToPill[toolUseId] = { msgId: pillId, name: toolName };
|
||||
}
|
||||
const tools = Object.values(state.activeToolCalls)
|
||||
.filter((t) => t.streamId === streamId)
|
||||
.map((t) => ({ name: t.name, done: t.done }));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
out.push({
|
||||
id: pillId,
|
||||
agent: agentDisplayName || event.node_id || "Agent",
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role,
|
||||
thread,
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: streamId || undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case "tool_call_completed": {
|
||||
if (!event.node_id) break;
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
const tracked = state.toolUseToPill[toolUseId];
|
||||
if (toolUseId) delete state.toolUseToPill[toolUseId];
|
||||
if (toolUseId && state.activeToolCalls[toolUseId]) {
|
||||
state.activeToolCalls[toolUseId].done = true;
|
||||
}
|
||||
if (!tracked) break;
|
||||
const tools = Object.values(state.activeToolCalls)
|
||||
.filter((t) => t.streamId === streamId)
|
||||
.map((t) => ({ name: t.name, done: t.done }));
|
||||
const allDone = tools.length > 0 && tools.every((t) => t.done);
|
||||
// Re-emit the SAME pill id with updated content. Caller upserts
|
||||
// by id, so this replaces the row from tool_call_started.
|
||||
out.push({
|
||||
id: tracked.msgId,
|
||||
agent: agentDisplayName || event.node_id || "Agent",
|
||||
agentColor: "",
|
||||
content: JSON.stringify({ tools, allDone }),
|
||||
timestamp: "",
|
||||
type: "tool_status",
|
||||
role,
|
||||
thread,
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: streamId || undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Regular stateless conversion (prose, user input, system notes).
|
||||
const msg = sseEventToChatMessage(
|
||||
event,
|
||||
thread,
|
||||
agentDisplayName,
|
||||
state.turnCounters[turnKey] ?? 0,
|
||||
);
|
||||
if (msg) {
|
||||
if (isQueen) msg.role = "queen";
|
||||
out.push(msg);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replay an entire event array and return a deduplicated, chronologically
|
||||
* sorted ChatMessage list. Used by cold-restore paths so refreshed
|
||||
* sessions match the live stream exactly.
|
||||
*/
|
||||
export function replayEventsToMessages(
|
||||
events: AgentEvent[],
|
||||
thread: string,
|
||||
agentDisplayName: string | undefined,
|
||||
): ChatMessage[] {
|
||||
const state = newReplayState();
|
||||
// Upsert by id — later emissions for the same pill replace earlier ones.
|
||||
const byId = new Map<string, ChatMessage>();
|
||||
for (const evt of events) {
|
||||
for (const m of replayEvent(state, evt, thread, agentDisplayName)) {
|
||||
byId.set(m.id, m);
|
||||
}
|
||||
}
|
||||
return Array.from(byId.values()).sort(
|
||||
(a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0),
|
||||
);
|
||||
}
|
||||
|
||||
type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
|
||||
const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);
|
||||
|
||||
|
||||
@@ -13,11 +13,7 @@ import { executionApi } from "@/api/execution";
|
||||
import { sessionsApi } from "@/api/sessions";
|
||||
import { useMultiSSE } from "@/hooks/use-sse";
|
||||
import type { LiveSession, AgentEvent } from "@/api/types";
|
||||
import {
|
||||
sseEventToChatMessage,
|
||||
formatAgentDisplayName,
|
||||
replayEventsToMessages,
|
||||
} from "@/lib/chat-helpers";
|
||||
import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
|
||||
import { cronToLabel } from "@/lib/graphUtils";
|
||||
import { ApiError } from "@/api/client";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
@@ -45,8 +41,6 @@ function truncate(s: string, max: number): string {
|
||||
type SessionRestoreResult = {
|
||||
messages: ChatMessage[];
|
||||
restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
|
||||
truncated: boolean;
|
||||
droppedCount: number;
|
||||
};
|
||||
|
||||
async function restoreSessionMessages(
|
||||
@@ -55,67 +49,34 @@ async function restoreSessionMessages(
|
||||
agentDisplayName: string,
|
||||
): Promise<SessionRestoreResult> {
|
||||
try {
|
||||
const { events, truncated, total, returned } =
|
||||
await sessionsApi.eventsHistory(sessionId);
|
||||
const { events } = await sessionsApi.eventsHistory(sessionId);
|
||||
if (events.length > 0) {
|
||||
// Walk events twice:
|
||||
// 1. Extract the trailing queen phase (unchanged logic).
|
||||
// 2. Run the full state-machine replay so tool_status pills
|
||||
// are synthesized just like the live SSE handler does.
|
||||
// Without (2), refreshed sessions showed zero tool activity
|
||||
// because tool_call_started/completed events are ignored by
|
||||
// the stateless converter.
|
||||
const messages: ChatMessage[] = [];
|
||||
let runningPhase: ChatMessage["phase"] = undefined;
|
||||
for (const evt of events) {
|
||||
const p =
|
||||
evt.type === "queen_phase_changed"
|
||||
? (evt.data?.phase as string)
|
||||
: evt.type === "node_loop_iteration"
|
||||
? (evt.data?.phase as string | undefined)
|
||||
: undefined;
|
||||
? (evt.data?.phase as string | undefined)
|
||||
: undefined;
|
||||
if (p && ["planning", "building", "staging", "running"].includes(p)) {
|
||||
runningPhase = p as ChatMessage["phase"];
|
||||
}
|
||||
}
|
||||
|
||||
const messages = replayEventsToMessages(events, thread, agentDisplayName);
|
||||
// Stamp the latest phase on every queen message so the UI's
|
||||
// phase-badge rendering matches what the live path would have
|
||||
// displayed at the time of the refresh.
|
||||
if (runningPhase) {
|
||||
for (const m of messages) {
|
||||
if (m.role === "queen") m.phase = runningPhase;
|
||||
const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
|
||||
if (!msg) continue;
|
||||
if (evt.stream_id === "queen") {
|
||||
msg.role = "queen";
|
||||
msg.phase = runningPhase;
|
||||
}
|
||||
messages.push(msg);
|
||||
}
|
||||
|
||||
// Prepend a run_divider banner when the server truncated older
|
||||
// events so the user knows how many are hidden.
|
||||
const droppedCount = Math.max(0, total - returned);
|
||||
if (truncated && droppedCount > 0) {
|
||||
const firstTs = events[0]?.timestamp;
|
||||
const bannerCreatedAt = firstTs ? new Date(firstTs).getTime() - 1 : 0;
|
||||
messages.unshift({
|
||||
id: `restore-truncated-${sessionId}`,
|
||||
agent: "System",
|
||||
agentColor: "",
|
||||
type: "run_divider",
|
||||
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
|
||||
timestamp: firstTs ?? new Date().toISOString(),
|
||||
thread,
|
||||
createdAt: bannerCreatedAt,
|
||||
});
|
||||
}
|
||||
return {
|
||||
messages,
|
||||
restoredPhase: runningPhase ?? null,
|
||||
truncated,
|
||||
droppedCount,
|
||||
};
|
||||
return { messages, restoredPhase: runningPhase ?? null };
|
||||
}
|
||||
} catch {
|
||||
// Event log not available
|
||||
}
|
||||
return { messages: [], restoredPhase: null, truncated: false, droppedCount: 0 };
|
||||
return { messages: [], restoredPhase: null };
|
||||
}
|
||||
|
||||
// ── Agent backend state ──────────────────────────────────────────────────────
|
||||
@@ -855,7 +816,6 @@ export default function ColonyChat() {
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: sid || undefined,
|
||||
});
|
||||
return { ...prev, isStreaming: false, activeToolCalls: newActive };
|
||||
});
|
||||
|
||||
@@ -11,10 +11,7 @@ import { sessionsApi } from "@/api/sessions";
|
||||
import { queensApi } from "@/api/queens";
|
||||
import { useMultiSSE } from "@/hooks/use-sse";
|
||||
import type { AgentEvent, HistorySession } from "@/api/types";
|
||||
import {
|
||||
sseEventToChatMessage,
|
||||
replayEventsToMessages,
|
||||
} from "@/lib/chat-helpers";
|
||||
import { sseEventToChatMessage } from "@/lib/chat-helpers";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useHeaderActions } from "@/context/HeaderActionsContext";
|
||||
import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
|
||||
@@ -93,34 +90,17 @@ export default function QueenDM() {
|
||||
const restoreMessages = useCallback(
|
||||
async (sid: string, cancelled: () => boolean) => {
|
||||
try {
|
||||
const { events, truncated, total, returned } =
|
||||
await sessionsApi.eventsHistory(sid);
|
||||
const { events } = await sessionsApi.eventsHistory(sid);
|
||||
if (cancelled()) return;
|
||||
|
||||
// Use the stateful replay so tool_status pills are synthesized
|
||||
// the same way the live SSE handler does — without this the
|
||||
// refreshed queen DM shows zero tool activity.
|
||||
const restored = replayEventsToMessages(events, "queen-dm", queenName);
|
||||
|
||||
// Show a banner if the server truncated older events.
|
||||
const droppedCount = Math.max(0, total - returned);
|
||||
if (truncated && droppedCount > 0) {
|
||||
const firstTs = events[0]?.timestamp;
|
||||
const bannerCreatedAt = firstTs
|
||||
? new Date(firstTs).getTime() - 1
|
||||
: 0;
|
||||
restored.unshift({
|
||||
id: `restore-truncated-${sid}`,
|
||||
agent: "System",
|
||||
agentColor: "",
|
||||
type: "run_divider",
|
||||
content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
|
||||
timestamp: firstTs ?? new Date().toISOString(),
|
||||
thread: "queen-dm",
|
||||
createdAt: bannerCreatedAt,
|
||||
});
|
||||
const restored: ChatMessage[] = [];
|
||||
for (const evt of events) {
|
||||
const msg = sseEventToChatMessage(evt, "queen-dm", queenName);
|
||||
if (!msg) continue;
|
||||
if (evt.stream_id === "queen") msg.role = "queen";
|
||||
restored.push(msg);
|
||||
}
|
||||
if (restored.length > 0 && !cancelled()) {
|
||||
restored.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
|
||||
setMessages(restored);
|
||||
// Only clear typing if the history contains a completed execution;
|
||||
// during bootstrap the queen is still processing.
|
||||
@@ -621,7 +601,6 @@ export default function QueenDM() {
|
||||
createdAt: eventCreatedAt,
|
||||
nodeId: event.node_id || undefined,
|
||||
executionId: event.execution_id || undefined,
|
||||
streamId: sid || undefined,
|
||||
};
|
||||
setMessages((prevMsgs) => {
|
||||
const idx = prevMsgs.findIndex((m) => m.id === msgId);
|
||||
|
||||
@@ -72,28 +72,13 @@ def patched_fork(monkeypatch):
|
||||
"""Stub out fork_session_into_colony so we don't need a real queen."""
|
||||
calls: list[dict] = []
|
||||
|
||||
async def _stub_fork(
|
||||
*,
|
||||
session: Any,
|
||||
colony_name: str,
|
||||
task: str,
|
||||
tasks: list[dict] | None = None,
|
||||
) -> dict:
|
||||
calls.append(
|
||||
{
|
||||
"session": session,
|
||||
"colony_name": colony_name,
|
||||
"task": task,
|
||||
"tasks": tasks,
|
||||
}
|
||||
)
|
||||
async def _stub_fork(*, session: Any, colony_name: str, task: str) -> dict:
|
||||
calls.append({"session": session, "colony_name": colony_name, "task": task})
|
||||
return {
|
||||
"colony_path": f"/tmp/fake_colonies/{colony_name}",
|
||||
"colony_name": colony_name,
|
||||
"queen_session_id": "session_fake_fork_id",
|
||||
"is_new": True,
|
||||
"db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
|
||||
"task_ids": [],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
|
||||
@@ -17,10 +17,10 @@ _DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "sk
|
||||
|
||||
|
||||
class TestDefaultSkillFiles:
|
||||
"""Verify all built-in SKILL.md files parse correctly."""
|
||||
"""Verify all 7 built-in SKILL.md files parse correctly."""
|
||||
|
||||
def test_all_skills_exist(self):
|
||||
assert len(SKILL_REGISTRY) == 6
|
||||
def test_all_seven_skills_exist(self):
|
||||
assert len(SKILL_REGISTRY) == 7
|
||||
|
||||
@pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items()))
|
||||
def test_skill_parses(self, skill_name, dir_name):
|
||||
@@ -35,13 +35,7 @@ class TestDefaultSkillFiles:
|
||||
assert parsed.source_scope == "framework"
|
||||
|
||||
def test_combined_token_budget(self):
|
||||
"""All default skill bodies combined should stay within the protocols budget.
|
||||
|
||||
Ceiling is 5000 tokens (~20000 chars): the prompt-injection path
|
||||
appends every registered skill body to the system prompt, so
|
||||
uncontrolled growth would balloon every LLM call. 5000 gives
|
||||
headroom over today's ~3500 while still catching obvious bloat.
|
||||
"""
|
||||
"""All default skill bodies combined should be under 3000 tokens (~12000 chars)."""
|
||||
total_chars = 0
|
||||
for dir_name in SKILL_REGISTRY.values():
|
||||
path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
|
||||
@@ -50,9 +44,9 @@ class TestDefaultSkillFiles:
|
||||
total_chars += len(parsed.body)
|
||||
|
||||
approx_tokens = total_chars // 4
|
||||
assert approx_tokens < 5000, (
|
||||
assert approx_tokens < 3000, (
|
||||
f"Combined default skill bodies are ~{approx_tokens} tokens "
|
||||
f"({total_chars} chars), exceeding the 5000 token budget"
|
||||
f"({total_chars} chars), exceeding the 3000 token budget"
|
||||
)
|
||||
|
||||
def test_data_buffer_keys_all_prefixed(self):
|
||||
@@ -66,7 +60,7 @@ class TestDefaultSkillManager:
|
||||
manager = DefaultSkillManager()
|
||||
manager.load()
|
||||
|
||||
assert len(manager.active_skill_names) == len(SKILL_REGISTRY)
|
||||
assert len(manager.active_skill_names) == 7
|
||||
for name in SKILL_REGISTRY:
|
||||
assert name in manager.active_skill_names
|
||||
|
||||
@@ -103,7 +97,7 @@ class TestDefaultSkillManager:
|
||||
manager.load()
|
||||
|
||||
assert "hive.quality-monitor" not in manager.active_skill_names
|
||||
assert len(manager.active_skill_names) == len(SKILL_REGISTRY) - 1
|
||||
assert len(manager.active_skill_names) == 6
|
||||
|
||||
def test_disable_all_via_convention(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"_all": {"enabled": False}})
|
||||
@@ -142,7 +136,7 @@ class TestSkillsConfig:
|
||||
def test_explicit_disable(self):
|
||||
config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)})
|
||||
assert config.is_default_enabled("hive.note-taking") is False
|
||||
assert config.is_default_enabled("hive.quality-monitor") is True
|
||||
assert config.is_default_enabled("hive.batch-ledger") is True
|
||||
|
||||
def test_all_disabled_flag(self):
|
||||
config = SkillsConfig(all_defaults_disabled=True)
|
||||
@@ -172,11 +166,11 @@ class TestSkillsConfig:
|
||||
def test_get_default_overrides(self):
|
||||
config = SkillsConfig.from_agent_vars(
|
||||
default_skills={
|
||||
"hive.quality-monitor": {"enabled": True, "assessment_interval": 10},
|
||||
"hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
|
||||
}
|
||||
)
|
||||
overrides = config.get_default_overrides("hive.quality-monitor")
|
||||
assert overrides == {"assessment_interval": 10}
|
||||
overrides = config.get_default_overrides("hive.batch-ledger")
|
||||
assert overrides == {"checkpoint_every_n": 10}
|
||||
|
||||
def test_get_default_overrides_empty(self):
|
||||
config = SkillsConfig()
|
||||
@@ -250,20 +244,40 @@ class TestConfigOverrideSubstitution:
|
||||
assert "{{" not in cleaned
|
||||
|
||||
|
||||
class TestBatchDeprecatedNoOps:
|
||||
"""batch-ledger skill was removed; is_batch_scenario() and batch_init_nudge
|
||||
are deprecated no-ops that return False / None unconditionally. They are
|
||||
kept in-tree to avoid touching every orchestrator/execution_manager call
|
||||
site that still reads the nudge through the config plumbing."""
|
||||
class TestBatchAutoDetection:
|
||||
"""DS-12: is_batch_scenario() and batch_init_nudge property."""
|
||||
|
||||
def test_is_batch_scenario_always_false(self):
|
||||
assert is_batch_scenario("process a list of 100 leads") is False
|
||||
assert is_batch_scenario("for each record, send an email") is False
|
||||
assert is_batch_scenario("write a summary") is False
|
||||
def test_detects_list_of(self):
|
||||
assert is_batch_scenario("process a list of 100 leads") is True
|
||||
|
||||
def test_batch_init_nudge_always_none(self):
|
||||
def test_detects_collection_of(self):
|
||||
assert is_batch_scenario("a collection of invoices") is True
|
||||
|
||||
def test_detects_items(self):
|
||||
assert is_batch_scenario("go through all items in the spreadsheet") is True
|
||||
|
||||
def test_detects_for_each(self):
|
||||
assert is_batch_scenario("for each record, send an email") is True
|
||||
|
||||
def test_no_match_single_task(self):
|
||||
assert is_batch_scenario("write a summary of the quarterly report") is False
|
||||
|
||||
def test_batch_nudge_active_by_default(self):
|
||||
manager = DefaultSkillManager()
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is not None
|
||||
assert "_batch_ledger" in manager.batch_init_nudge
|
||||
|
||||
def test_batch_nudge_none_when_skill_disabled(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}})
|
||||
manager = DefaultSkillManager(config)
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is None
|
||||
|
||||
def test_batch_nudge_none_when_auto_detect_disabled(self):
|
||||
config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}})
|
||||
manager = DefaultSkillManager(config)
|
||||
manager.load()
|
||||
assert manager.batch_init_nudge is None
|
||||
|
||||
|
||||
|
||||
@@ -1,590 +0,0 @@
|
||||
"""Tests for framework.host.progress_db — per-colony task queue."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.host.progress_db import (
|
||||
SCHEMA_VERSION,
|
||||
ensure_all_colony_dbs,
|
||||
ensure_progress_db,
|
||||
enqueue_task,
|
||||
reclaim_stale,
|
||||
seed_tasks,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Schema / init
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ensure_progress_db_fresh(tmp_path: Path) -> None:
|
||||
colony = tmp_path / "c"
|
||||
db_path = ensure_progress_db(colony)
|
||||
assert db_path.exists()
|
||||
assert db_path.name == "progress.db"
|
||||
assert db_path.parent.name == "data"
|
||||
|
||||
con = sqlite3.connect(str(db_path))
|
||||
try:
|
||||
assert con.execute("PRAGMA journal_mode").fetchone()[0].lower() == "wal"
|
||||
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
|
||||
tables = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='table'")}
|
||||
assert {"tasks", "steps", "sop_checklist", "colony_meta"}.issubset(tables)
|
||||
|
||||
indexes = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='index'")}
|
||||
# Named indexes we declared
|
||||
assert "idx_tasks_claimable" in indexes
|
||||
assert "idx_steps_task_seq" in indexes
|
||||
assert "idx_sop_required_open" in indexes
|
||||
assert "idx_tasks_status" in indexes
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_ensure_progress_db_idempotent(tmp_path: Path) -> None:
|
||||
colony = tmp_path / "c"
|
||||
p1 = ensure_progress_db(colony)
|
||||
p2 = ensure_progress_db(colony)
|
||||
assert p1 == p2
|
||||
con = sqlite3.connect(str(p1))
|
||||
try:
|
||||
assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_ensure_all_colony_dbs_backfill(tmp_path: Path) -> None:
|
||||
colonies_root = tmp_path / "colonies"
|
||||
(colonies_root / "alpha").mkdir(parents=True)
|
||||
(colonies_root / "beta").mkdir(parents=True)
|
||||
(colonies_root / "gamma_not_dir").touch() # should be ignored
|
||||
|
||||
initialized = ensure_all_colony_dbs(colonies_root)
|
||||
names = {p.parent.parent.name for p in initialized}
|
||||
assert names == {"alpha", "beta"}
|
||||
for p in initialized:
|
||||
assert p.exists()
|
||||
|
||||
|
||||
def test_ensure_all_colony_dbs_missing_root(tmp_path: Path) -> None:
|
||||
missing = tmp_path / "nonexistent"
|
||||
assert ensure_all_colony_dbs(missing) == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Seeding / enqueue
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_seed_tasks_basic(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
ids = seed_tasks(
|
||||
db,
|
||||
[
|
||||
{
|
||||
"goal": "task one",
|
||||
"priority": 5,
|
||||
"payload": {"url": "https://example.com"},
|
||||
"steps": [
|
||||
{"title": "open page"},
|
||||
{"title": "extract data", "detail": "selector .content"},
|
||||
],
|
||||
"sop_items": [
|
||||
{"key": "captcha_handled", "description": "Verify no CAPTCHA blocks"},
|
||||
{"key": "soft_hint", "description": "optional", "required": False},
|
||||
],
|
||||
},
|
||||
{"goal": "task two"},
|
||||
],
|
||||
)
|
||||
assert len(ids) == 2
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
rows = list(con.execute("SELECT id, goal, priority, status, source, payload FROM tasks ORDER BY goal"))
|
||||
assert len(rows) == 2
|
||||
assert rows[0][1] == "task one"
|
||||
assert rows[0][2] == 5
|
||||
assert rows[0][3] == "pending"
|
||||
assert rows[0][4] == "queen_create"
|
||||
assert '"url"' in rows[0][5]
|
||||
|
||||
step_count = con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (ids[0],)
|
||||
).fetchone()[0]
|
||||
assert step_count == 2
|
||||
|
||||
sop_rows = list(con.execute(
|
||||
"SELECT key, required FROM sop_checklist WHERE task_id=? ORDER BY key", (ids[0],)
|
||||
))
|
||||
assert sop_rows == [("captcha_handled", 1), ("soft_hint", 0)]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_seed_tasks_rejects_missing_goal(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
with pytest.raises(ValueError):
|
||||
seed_tasks(db, [{"priority": 1}])
|
||||
|
||||
|
||||
def test_seed_tasks_empty_is_noop(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
assert seed_tasks(db, []) == []
|
||||
|
||||
|
||||
def test_seed_tasks_rollback_on_partial_failure(tmp_path: Path) -> None:
|
||||
"""A bad row mid-batch must roll back the whole transaction."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
with pytest.raises(ValueError):
|
||||
seed_tasks(
|
||||
db,
|
||||
[
|
||||
{"goal": "good one"},
|
||||
{"priority": 1}, # missing goal -> boom
|
||||
{"goal": "never inserted"},
|
||||
],
|
||||
)
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
count = con.execute("SELECT count(*) FROM tasks").fetchone()[0]
|
||||
assert count == 0
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_enqueue_task(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tid = enqueue_task(
|
||||
db,
|
||||
"appended",
|
||||
steps=[{"title": "s1"}],
|
||||
sop_items=[{"key": "k", "description": "d"}],
|
||||
priority=3,
|
||||
)
|
||||
assert tid
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT goal, priority, source FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row == ("appended", 3, "enqueue_tool")
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 1
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_enqueue_task_custom_source(tmp_path: Path) -> None:
|
||||
"""enqueue_task must accept a custom source value (e.g. run_agent_with_input).
|
||||
|
||||
Phase 2 wiring adds source values: create_colony_auto,
|
||||
run_agent_with_input, run_parallel_workers. Verify the source
|
||||
column stores them verbatim.
|
||||
"""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tid = enqueue_task(db, "chat task", source="run_agent_with_input")
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute("SELECT goal, source FROM tasks WHERE id=?", (tid,)).fetchone()
|
||||
assert row == ("chat task", "run_agent_with_input")
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_by_assigned_id(tmp_path: Path) -> None:
|
||||
"""Worker protocol: claim a specific row by id (not the generic next-pending).
|
||||
|
||||
The Phase 2 fix threads ``task_id`` into ``input_data`` when the
|
||||
queen pre-assigns a row. The worker must be able to claim THAT
|
||||
row atomically with an ``UPDATE ... WHERE id=? AND status='pending'``
|
||||
pattern, and a second claim on the same id must return 0 rows.
|
||||
"""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "pinned task"}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None, timeout=5.0)
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
UPDATE tasks SET status='claimed', worker_id=?,
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now'),
|
||||
updated_at=datetime('now')
|
||||
WHERE id=? AND status='pending'
|
||||
RETURNING id, goal
|
||||
""",
|
||||
("w1", tid),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
assert row == (tid, "pinned task"), f"expected one claim, got {row}"
|
||||
|
||||
# Second attempt on the same id must affect zero rows.
|
||||
cur2 = con.execute(
|
||||
"""
|
||||
UPDATE tasks SET status='claimed', worker_id=?,
|
||||
claim_token=lower(hex(randomblob(8))),
|
||||
claimed_at=datetime('now')
|
||||
WHERE id=? AND status='pending'
|
||||
RETURNING id
|
||||
""",
|
||||
("w2", tid),
|
||||
)
|
||||
assert cur2.fetchone() is None, "second claim should affect zero rows"
|
||||
|
||||
# Ensure worker_id on the row is still the first claimant.
|
||||
owner = con.execute(
|
||||
"SELECT worker_id, status FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert owner == ("w1", "claimed")
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_by_id_does_not_steal_unrelated_rows(tmp_path: Path) -> None:
|
||||
"""Claim-by-id must only touch the named row, not siblings."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
ids = seed_tasks(db, [{"goal": "a"}, {"goal": "b"}, {"goal": "c"}])
|
||||
target = ids[1]
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now') WHERE id=? AND status='pending'",
|
||||
(target,),
|
||||
)
|
||||
statuses = dict(con.execute("SELECT goal, status FROM tasks").fetchall())
|
||||
assert statuses == {"a": "pending", "b": "claimed", "c": "pending"}
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_seed_tasks_bulk_10k(tmp_path: Path) -> None:
|
||||
"""10k rows in one transaction should finish under a second on local disk."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
tasks = [{"goal": f"task {i}", "seq": i} for i in range(10_000)]
|
||||
start = time.perf_counter()
|
||||
ids = seed_tasks(db, tasks)
|
||||
elapsed = time.perf_counter() - start
|
||||
assert len(ids) == 10_000
|
||||
# Generous ceiling — on CI with slow disk we've seen ~300ms.
|
||||
assert elapsed < 3.0, f"bulk seed too slow: {elapsed:.2f}s"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Atomic claim under concurrency
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
_CLAIM_SQL = """
|
||||
BEGIN IMMEDIATE;
|
||||
UPDATE tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = ?,
|
||||
claim_token = lower(hex(randomblob(8))),
|
||||
claimed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE id = (
|
||||
SELECT id FROM tasks
|
||||
WHERE status = 'pending'
|
||||
ORDER BY priority DESC, seq, created_at
|
||||
LIMIT 1
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def _claim_one(db_path: Path, worker_id: str) -> str | None:
|
||||
"""Atomic single-shot claim using RETURNING (SQLite 3.35+).
|
||||
|
||||
The skill teaches agents the BEGIN IMMEDIATE + subquery UPDATE
|
||||
pattern; for an in-process test helper we use RETURNING so the
|
||||
claimed row id is returned from the same statement (no racing
|
||||
follow-up SELECT). Functionally equivalent: both approaches rely
|
||||
on the atomic subquery-UPDATE.
|
||||
"""
|
||||
con = sqlite3.connect(str(db_path), isolation_level=None, timeout=10.0)
|
||||
con.execute("PRAGMA busy_timeout = 10000")
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
UPDATE tasks
|
||||
SET status = 'claimed',
|
||||
worker_id = ?,
|
||||
claim_token = lower(hex(randomblob(8))),
|
||||
claimed_at = datetime('now'),
|
||||
updated_at = datetime('now')
|
||||
WHERE id = (
|
||||
SELECT id FROM tasks
|
||||
WHERE status = 'pending'
|
||||
ORDER BY priority DESC, seq, created_at
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id
|
||||
""",
|
||||
(worker_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_claim_atomicity_under_concurrency(tmp_path: Path) -> None:
|
||||
"""20 threads racing to drain 100 tasks — each task claimed exactly once."""
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
seed_tasks(db, [{"goal": f"task {i}", "seq": i} for i in range(100)])
|
||||
|
||||
claims: list[tuple[str, str]] = []
|
||||
claims_lock = threading.Lock()
|
||||
|
||||
def worker(worker_id: str) -> None:
|
||||
while True:
|
||||
tid = _claim_one(db, worker_id)
|
||||
if tid is None:
|
||||
return
|
||||
with claims_lock:
|
||||
claims.append((worker_id, tid))
|
||||
|
||||
threads = [threading.Thread(target=worker, args=(f"w{i}",)) for i in range(20)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=30)
|
||||
|
||||
task_ids = [tid for _, tid in claims]
|
||||
assert len(task_ids) == 100, f"expected 100 claims, got {len(task_ids)}"
|
||||
assert len(set(task_ids)) == 100, "duplicate claims detected"
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
remaining = con.execute(
|
||||
"SELECT count(*) FROM tasks WHERE status='pending'"
|
||||
).fetchone()[0]
|
||||
assert remaining == 0
|
||||
claimed = con.execute(
|
||||
"SELECT count(*) FROM tasks WHERE status='claimed'"
|
||||
).fetchone()[0]
|
||||
assert claimed == 100
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Stale-claim reclaimer
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_reclaim_stale_returns_to_pending(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "stuck"}])
|
||||
|
||||
# Simulate a claim made 20 minutes ago.
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaimed = reclaim_stale(db, stale_after_minutes=15)
|
||||
assert reclaimed == 1
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT status, worker_id, retry_count FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row == ("pending", None, 1)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_reclaim_stale_fails_after_max_retries(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "doomed", "max_retries": 2}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', retry_count=2, "
|
||||
"claimed_at=datetime('now', '-20 minutes') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaim_stale(db, stale_after_minutes=15)
|
||||
|
||||
con = sqlite3.connect(str(db))
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT status, last_error FROM tasks WHERE id=?", (tid,)
|
||||
).fetchone()
|
||||
assert row[0] == "failed"
|
||||
assert row[1] is not None and "max_retries" in row[1]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def test_reclaim_stale_ignores_fresh_claims(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(db, [{"goal": "working"}])
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute(
|
||||
"UPDATE tasks SET status='claimed', worker_id='w1', "
|
||||
"claimed_at=datetime('now') WHERE id=?",
|
||||
(tid,),
|
||||
)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
reclaimed = reclaim_stale(db, stale_after_minutes=15)
|
||||
assert reclaimed == 0
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Foreign key cascade
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Worker config patching for pre-existing colonies
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_worker_cfg(path: Path, *, with_input_data: dict | None = None) -> None:
|
||||
"""Write a minimal worker.json that matches the shape ensure_progress_db patches."""
|
||||
import json as _json
|
||||
|
||||
cfg = {
|
||||
"name": "worker",
|
||||
"system_prompt": "You are a worker.",
|
||||
"goal": {"description": "do stuff", "success_criteria": [], "constraints": []},
|
||||
"tools": [],
|
||||
}
|
||||
if with_input_data is not None:
|
||||
cfg["input_data"] = with_input_data
|
||||
path.write_text(_json.dumps(cfg, indent=2))
|
||||
|
||||
|
||||
def test_ensure_progress_db_patches_existing_worker_json(tmp_path: Path) -> None:
|
||||
"""Pre-existing worker.json without input_data gets db_path injected."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "legacy_colony"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
# Before: no input_data
|
||||
before = _json.loads((colony / "worker.json").read_text())
|
||||
assert "input_data" not in before
|
||||
|
||||
db = ensure_progress_db(colony)
|
||||
|
||||
after = _json.loads((colony / "worker.json").read_text())
|
||||
assert after["input_data"]["db_path"] == str(db)
|
||||
assert after["input_data"]["colony_id"] == "legacy_colony"
|
||||
# Other fields untouched
|
||||
assert after["system_prompt"] == "You are a worker."
|
||||
assert after["goal"]["description"] == "do stuff"
|
||||
|
||||
|
||||
def test_ensure_progress_db_patch_is_idempotent(tmp_path: Path) -> None:
|
||||
"""Second call must not rewrite the file (mtime unchanged)."""
|
||||
import time as _time
|
||||
|
||||
colony = tmp_path / "idem"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
ensure_progress_db(colony)
|
||||
mtime1 = (colony / "worker.json").stat().st_mtime
|
||||
|
||||
_time.sleep(0.02) # ensure any rewrite would bump mtime
|
||||
ensure_progress_db(colony)
|
||||
mtime2 = (colony / "worker.json").stat().st_mtime
|
||||
|
||||
assert mtime1 == mtime2, "second ensure_progress_db must not rewrite worker.json"
|
||||
|
||||
|
||||
def test_ensure_progress_db_preserves_existing_input_data_keys(tmp_path: Path) -> None:
|
||||
"""Pre-existing input_data keys (other than db_path/colony_id) are preserved."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "preserved"
|
||||
colony.mkdir()
|
||||
_write_worker_cfg(
|
||||
colony / "worker.json",
|
||||
with_input_data={"custom_key": "hello", "db_path": "/stale/path.db"},
|
||||
)
|
||||
|
||||
db = ensure_progress_db(colony)
|
||||
after = _json.loads((colony / "worker.json").read_text())
|
||||
|
||||
assert after["input_data"]["custom_key"] == "hello"
|
||||
assert after["input_data"]["db_path"] == str(db)
|
||||
assert after["input_data"]["colony_id"] == "preserved"
|
||||
|
||||
|
||||
def test_ensure_progress_db_skips_metadata_and_triggers(tmp_path: Path) -> None:
|
||||
"""metadata.json and triggers.json are not worker configs — must not be touched."""
|
||||
import json as _json
|
||||
|
||||
colony = tmp_path / "guarded"
|
||||
colony.mkdir()
|
||||
(colony / "metadata.json").write_text(_json.dumps({"colony_name": "guarded"}))
|
||||
(colony / "triggers.json").write_text(_json.dumps([{"id": "t1"}]))
|
||||
_write_worker_cfg(colony / "worker.json")
|
||||
|
||||
ensure_progress_db(colony)
|
||||
|
||||
meta = _json.loads((colony / "metadata.json").read_text())
|
||||
trig = _json.loads((colony / "triggers.json").read_text())
|
||||
assert "input_data" not in meta
|
||||
assert trig == [{"id": "t1"}]
|
||||
|
||||
worker = _json.loads((colony / "worker.json").read_text())
|
||||
assert "input_data" in worker
|
||||
|
||||
|
||||
def test_task_delete_cascades_to_steps_and_sop(tmp_path: Path) -> None:
|
||||
db = ensure_progress_db(tmp_path / "c")
|
||||
[tid] = seed_tasks(
|
||||
db,
|
||||
[
|
||||
{
|
||||
"goal": "cascade test",
|
||||
"steps": [{"title": "a"}, {"title": "b"}],
|
||||
"sop_items": [{"key": "k", "description": "d"}],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
con = sqlite3.connect(str(db), isolation_level=None)
|
||||
try:
|
||||
con.execute("PRAGMA foreign_keys = ON")
|
||||
con.execute("DELETE FROM tasks WHERE id=?", (tid,))
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM steps WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 0
|
||||
assert con.execute(
|
||||
"SELECT count(*) FROM sop_checklist WHERE task_id=?", (tid,)
|
||||
).fetchone()[0] == 0
|
||||
finally:
|
||||
con.close()
|
||||
@@ -141,7 +141,7 @@ class TestSkillDiscovery:
|
||||
framework_skills = [s for s in skills if s.source_scope == "framework"]
|
||||
names = {s.name for s in framework_skills}
|
||||
assert "hive.note-taking" in names
|
||||
assert "hive.colony-progress-tracker" in names
|
||||
assert "hive.batch-ledger" in names
|
||||
|
||||
def test_max_depth_limit(self, tmp_path):
|
||||
# Create a skill nested beyond max_depth
|
||||
|
||||
@@ -899,6 +899,7 @@ def test_concurrency_safe_allowlist_is_conservative():
|
||||
"hashline_edit",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_type_focused",
|
||||
"browser_navigate",
|
||||
):
|
||||
assert forbidden not in allowlist, f"{forbidden} must not be concurrency-safe"
|
||||
|
||||
@@ -271,48 +271,6 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for sqlite3 CLI (required for colony progress tracking)
|
||||
echo -n " Checking for sqlite3... "
|
||||
if command -v sqlite3 &> /dev/null; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}not found${NC}"
|
||||
# Attempt auto-install on common package managers
|
||||
SQLITE_INSTALLED=false
|
||||
if command -v apt-get &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via apt... "
|
||||
if sudo apt-get install -y sqlite3 > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v brew &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via brew... "
|
||||
if brew install sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v apk &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via apk... "
|
||||
if apk add sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v dnf &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via dnf... "
|
||||
if sudo dnf install -y sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
elif command -v pacman &> /dev/null; then
|
||||
echo -n " Installing sqlite3 via pacman... "
|
||||
if sudo pacman -S --noconfirm sqlite > /dev/null 2>&1; then
|
||||
SQLITE_INSTALLED=true
|
||||
fi
|
||||
fi
|
||||
if [ "$SQLITE_INSTALLED" = true ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
else
|
||||
echo -e "${YELLOW} ⚠ Could not install sqlite3 automatically${NC}"
|
||||
echo -e "${DIM} Install manually: apt install sqlite3 / brew install sqlite / apk add sqlite${NC}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for Chrome/Edge (required for GCU browser tools)
|
||||
echo -n " Checking for Chrome/Edge browser... "
|
||||
# Check common browser locations
|
||||
|
||||
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
Browser Remote Control — act as an agent to call browser tools via a UI.
|
||||
|
||||
Spawns its own GCU MCP server subprocess (same way a real agent does),
|
||||
connects as an MCP client, and exposes the tools over HTTP for the web UI.
|
||||
|
||||
Usage:
|
||||
uv run scripts/browser_remote.py # starts server + opens UI
|
||||
uv run scripts/browser_remote.py --no-ui # API only, no browser open
|
||||
|
||||
Then use the UI at http://localhost:9250/ui or curl directly:
|
||||
curl -X POST http://localhost:9250/browser_click \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"selector": "#login-btn"}'
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
# Add framework to path so we can use the existing MCPClient
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "core"))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "src"))
|
||||
|
||||
from framework.loader.mcp_client import MCPClient, MCPServerConfig
|
||||
|
||||
logger = logging.getLogger("browser_remote")
|
||||
|
||||
DEFAULT_PORT = 9250
|
||||
TOOLS_DIR = str((Path(__file__).parent.parent / "tools").resolve())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCP client — connects to GCU server exactly like an agent would
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_mcp_client: MCPClient | None = None
|
||||
|
||||
|
||||
def get_mcp_client() -> MCPClient:
|
||||
"""Get or create the MCP client connected to the GCU server."""
|
||||
global _mcp_client
|
||||
if _mcp_client is None:
|
||||
bridge_port = os.environ.get("HIVE_BRIDGE_PORT", "9229")
|
||||
config = MCPServerConfig(
|
||||
name="gcu-tools",
|
||||
transport="stdio",
|
||||
command="uv",
|
||||
args=["run", "python", "-m", "gcu.server", "--stdio", "--capabilities", "browser"],
|
||||
cwd=TOOLS_DIR,
|
||||
env={"HIVE_BRIDGE_PORT": bridge_port},
|
||||
)
|
||||
_mcp_client = MCPClient(config)
|
||||
_mcp_client.connect()
|
||||
tools = _mcp_client.list_tools()
|
||||
logger.info(
|
||||
"Connected to GCU server, %d tools available: %s",
|
||||
len(tools),
|
||||
[t.name for t in tools],
|
||||
)
|
||||
return _mcp_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def handle_ui(request: web.Request) -> web.Response:
|
||||
"""GET /ui — serve the web UI."""
|
||||
ui_path = Path(__file__).parent / "browser_remote_ui.html"
|
||||
return web.FileResponse(ui_path)
|
||||
|
||||
|
||||
async def handle_index(request: web.Request) -> web.Response:
|
||||
"""GET / — redirect to UI."""
|
||||
raise web.HTTPFound("/ui")
|
||||
|
||||
|
||||
async def handle_status(request: web.Request) -> web.Response:
|
||||
"""GET /status — connection status."""
|
||||
try:
|
||||
client = get_mcp_client()
|
||||
tools = client.list_tools()
|
||||
return web.json_response({
|
||||
"connected": True,
|
||||
"tools_count": len(tools),
|
||||
})
|
||||
except Exception as e:
|
||||
return web.json_response({"connected": False, "error": str(e)})
|
||||
|
||||
|
||||
async def handle_tools(request: web.Request) -> web.Response:
|
||||
"""GET /tools — list available tools with their schemas."""
|
||||
try:
|
||||
client = get_mcp_client()
|
||||
tools = client.list_tools()
|
||||
schemas = {}
|
||||
for tool in tools:
|
||||
props = tool.input_schema.get("properties", {})
|
||||
required = tool.input_schema.get("required", [])
|
||||
params = {}
|
||||
for pname, pspec in props.items():
|
||||
param_def: dict[str, Any] = {"type": pspec.get("type", "string")}
|
||||
if pname in required:
|
||||
param_def["required"] = True
|
||||
if "default" in pspec:
|
||||
param_def["default"] = pspec["default"]
|
||||
if "enum" in pspec:
|
||||
param_def["enum"] = pspec["enum"]
|
||||
if pspec.get("type") == "array" and "items" in pspec:
|
||||
param_def["items"] = pspec["items"].get("type", "string")
|
||||
params[pname] = param_def
|
||||
schemas[tool.name] = {
|
||||
"description": tool.description.split("\n")[0].strip() if tool.description else "",
|
||||
"params": params,
|
||||
}
|
||||
return web.json_response(schemas)
|
||||
except Exception as e:
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
|
||||
|
||||
async def handle_tool_call(request: web.Request) -> web.Response:
|
||||
"""POST /<tool_name> — call a browser tool."""
|
||||
tool_name = request.match_info["tool"]
|
||||
|
||||
try:
|
||||
body = await request.read()
|
||||
params = json.loads(body) if body.strip() else {}
|
||||
except json.JSONDecodeError:
|
||||
return web.json_response({"ok": False, "error": "Invalid JSON"}, status=400)
|
||||
|
||||
logger.info("=> %s %s", tool_name, json.dumps(params, default=str)[:200])
|
||||
|
||||
try:
|
||||
client = get_mcp_client()
|
||||
# call_tool is synchronous (blocks on the stdio subprocess)
|
||||
# Run it in a thread so we don't block the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(None, client.call_tool, tool_name, params)
|
||||
|
||||
# MCP returns a list of content blocks — extract text/image
|
||||
response = _format_mcp_result(result)
|
||||
logger.info("<= %s ok=%s", tool_name, response.get("ok", True))
|
||||
return web.json_response(response)
|
||||
except Exception as e:
|
||||
logger.error("<= %s error: %s", tool_name, e)
|
||||
return web.json_response({"ok": False, "error": str(e)}, status=500)
|
||||
|
||||
|
||||
def _format_mcp_result(result: Any) -> dict:
|
||||
"""Convert MCP tool result into a JSON-friendly dict."""
|
||||
if result is None:
|
||||
return {"ok": True}
|
||||
|
||||
# MCPClient.call_tool returns the raw result from the MCP SDK
|
||||
# which could be a list of content blocks, a dict, or a string
|
||||
if isinstance(result, dict):
|
||||
return result
|
||||
|
||||
if isinstance(result, str):
|
||||
try:
|
||||
return json.loads(result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {"ok": True, "text": result}
|
||||
|
||||
if isinstance(result, list):
|
||||
# List of MCP content blocks (TextContent, ImageContent, etc.)
|
||||
texts = []
|
||||
images = []
|
||||
for item in result:
|
||||
if hasattr(item, "text"):
|
||||
try:
|
||||
parsed = json.loads(item.text)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed # Tool returned structured JSON
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
texts.append(item.text)
|
||||
elif hasattr(item, "data"):
|
||||
images.append({"mime_type": getattr(item, "mime_type", "image/png"), "data": item.data})
|
||||
|
||||
response: dict[str, Any] = {"ok": True}
|
||||
if texts:
|
||||
response["text"] = "\n".join(texts)
|
||||
if images:
|
||||
response["images"] = images
|
||||
return response
|
||||
|
||||
return {"ok": True, "result": str(result)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Server setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@web.middleware
|
||||
async def cors_middleware(request: web.Request, handler):
|
||||
if request.method == "OPTIONS":
|
||||
resp = web.Response()
|
||||
else:
|
||||
resp = await handler(request)
|
||||
resp.headers["Access-Control-Allow-Origin"] = "*"
|
||||
resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
|
||||
resp.headers["Access-Control-Allow-Headers"] = "Content-Type"
|
||||
return resp
|
||||
|
||||
|
||||
def create_app() -> web.Application:
|
||||
app = web.Application(middlewares=[cors_middleware])
|
||||
app.router.add_get("/", handle_index)
|
||||
app.router.add_get("/ui", handle_ui)
|
||||
app.router.add_get("/tools", handle_tools)
|
||||
app.router.add_get("/status", handle_status)
|
||||
app.router.add_post("/{tool}", handle_tool_call)
|
||||
return app
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Browser Remote Control")
|
||||
parser.add_argument("--port", type=int, default=int(os.environ.get("BROWSER_REMOTE_PORT", DEFAULT_PORT)))
|
||||
parser.add_argument("--no-ui", action="store_true", help="Don't auto-open the browser")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
|
||||
|
||||
# Connect to GCU server eagerly so we fail fast if something is wrong
|
||||
try:
|
||||
client = get_mcp_client()
|
||||
except Exception as e:
|
||||
logger.error("Failed to connect to GCU server: %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
# Auto-start browser context so tools work immediately
|
||||
try:
|
||||
result = client.call_tool("browser_start", {})
|
||||
logger.info("browser_start: %s", result)
|
||||
except Exception as e:
|
||||
logger.warning("browser_start failed (may already be started): %s", e)
|
||||
|
||||
app = create_app()
|
||||
|
||||
async def on_startup(app: web.Application) -> None:
|
||||
if not args.no_ui:
|
||||
webbrowser.open(f"http://localhost:{args.port}/ui")
|
||||
|
||||
app.on_startup.append(on_startup)
|
||||
|
||||
print(f"Browser Remote Control on http://localhost:{args.port}")
|
||||
print(f" UI: http://localhost:{args.port}/ui")
|
||||
print(f" API: POST http://localhost:{args.port}/<tool>")
|
||||
print()
|
||||
web.run_app(app, host="127.0.0.1", port=args.port, print=None)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,838 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Browser Remote Control</title>
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0d1117;
|
||||
--surface: #161b22;
|
||||
--surface2: #21262d;
|
||||
--border: #30363d;
|
||||
--text: #e6edf3;
|
||||
--text2: #8b949e;
|
||||
--accent: #58a6ff;
|
||||
--accent-dim: #1f6feb;
|
||||
--green: #3fb950;
|
||||
--red: #f85149;
|
||||
--orange: #d29922;
|
||||
--radius: 8px;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.5;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
header {
|
||||
background: var(--surface);
|
||||
border-bottom: 1px solid var(--border);
|
||||
padding: 16px 24px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
z-index: 100;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
#status-badge {
|
||||
font-size: 13px;
|
||||
padding: 4px 12px;
|
||||
border-radius: 20px;
|
||||
font-weight: 500;
|
||||
}
|
||||
#status-badge.connected { background: rgba(63,185,80,0.15); color: var(--green); }
|
||||
#status-badge.disconnected { background: rgba(248,81,73,0.15); color: var(--red); }
|
||||
#status-badge.checking { background: rgba(210,153,34,0.15); color: var(--orange); }
|
||||
|
||||
.layout {
|
||||
display: flex;
|
||||
height: calc(100vh - 57px);
|
||||
}
|
||||
|
||||
/* Sidebar */
|
||||
.sidebar {
|
||||
width: 240px;
|
||||
min-width: 240px;
|
||||
background: var(--surface);
|
||||
border-right: 1px solid var(--border);
|
||||
overflow-y: auto;
|
||||
padding: 12px 0;
|
||||
}
|
||||
|
||||
.sidebar-group {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.sidebar-group-label {
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
color: var(--text2);
|
||||
padding: 8px 16px 4px;
|
||||
}
|
||||
|
||||
.sidebar-item {
|
||||
display: block;
|
||||
width: 100%;
|
||||
text-align: left;
|
||||
background: none;
|
||||
border: none;
|
||||
color: var(--text2);
|
||||
font-size: 13px;
|
||||
padding: 6px 16px 6px 24px;
|
||||
cursor: pointer;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
transition: background 0.1s, color 0.1s;
|
||||
}
|
||||
|
||||
.sidebar-item:hover {
|
||||
background: var(--surface2);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.sidebar-item.active {
|
||||
background: rgba(88,166,255,0.1);
|
||||
color: var(--accent);
|
||||
border-right: 2px solid var(--accent);
|
||||
}
|
||||
|
||||
/* Main content */
|
||||
.main {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 24px 32px;
|
||||
}
|
||||
|
||||
.tools-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(420px, 1fr));
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.tool-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
overflow: hidden;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
|
||||
.tool-card:hover { border-color: var(--accent-dim); }
|
||||
.tool-card.active { border-color: var(--accent); }
|
||||
|
||||
.tool-card-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 12px 16px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.tool-card-header:hover { background: var(--surface2); }
|
||||
|
||||
.tool-name {
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
.tool-desc {
|
||||
font-size: 12px;
|
||||
color: var(--text2);
|
||||
margin-left: 8px;
|
||||
}
|
||||
|
||||
.tool-card-body {
|
||||
padding: 16px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.tool-card.open .tool-card-body { display: block; }
|
||||
|
||||
.chevron {
|
||||
color: var(--text2);
|
||||
transition: transform 0.2s;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.tool-card.open .chevron { transform: rotate(90deg); }
|
||||
|
||||
/* Form fields */
|
||||
.field {
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.field:last-of-type { margin-bottom: 16px; }
|
||||
|
||||
.field label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: var(--text2);
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.field label .required {
|
||||
color: var(--red);
|
||||
font-size: 10px;
|
||||
}
|
||||
|
||||
.field label .type-tag {
|
||||
font-size: 10px;
|
||||
padding: 1px 5px;
|
||||
border-radius: 3px;
|
||||
background: var(--surface2);
|
||||
color: var(--text2);
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
}
|
||||
|
||||
.field input, .field select, .field textarea {
|
||||
width: 100%;
|
||||
background: var(--bg);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
color: var(--text);
|
||||
font-size: 13px;
|
||||
padding: 8px 10px;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
outline: none;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
|
||||
.field input:focus, .field select:focus, .field textarea:focus {
|
||||
border-color: var(--accent);
|
||||
}
|
||||
|
||||
.field textarea { min-height: 60px; resize: vertical; }
|
||||
|
||||
.field input[type="checkbox"] {
|
||||
width: auto;
|
||||
margin-right: 4px;
|
||||
}
|
||||
|
||||
.checkbox-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 4px 0;
|
||||
}
|
||||
|
||||
.checkbox-row label {
|
||||
margin-bottom: 0;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Buttons */
|
||||
.btn-run {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
background: var(--accent-dim);
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
padding: 8px 20px;
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
.btn-run:hover { background: var(--accent); }
|
||||
.btn-run:disabled { opacity: 0.5; cursor: not-allowed; }
|
||||
.btn-run.running { background: var(--orange); }
|
||||
|
||||
/* Result area */
|
||||
.result-area {
|
||||
margin-top: 12px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.result-area.visible { display: block; }
|
||||
|
||||
.result-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.result-status {
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
padding: 2px 8px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.result-status.ok { background: rgba(63,185,80,0.15); color: var(--green); }
|
||||
.result-status.error { background: rgba(248,81,73,0.15); color: var(--red); }
|
||||
|
||||
.result-duration {
|
||||
font-size: 11px;
|
||||
color: var(--text2);
|
||||
}
|
||||
|
||||
.result-json {
|
||||
background: var(--bg);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 12px;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.6;
|
||||
max-height: 300px;
|
||||
overflow: auto;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.result-screenshot {
|
||||
max-width: 100%;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
/* History panel */
|
||||
.history-panel {
|
||||
width: 320px;
|
||||
min-width: 320px;
|
||||
background: var(--surface);
|
||||
border-left: 1px solid var(--border);
|
||||
overflow-y: auto;
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.history-title {
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
color: var(--text2);
|
||||
padding: 4px 4px 8px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.history-item {
|
||||
padding: 8px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 4px;
|
||||
cursor: pointer;
|
||||
transition: background 0.1s;
|
||||
border: 1px solid transparent;
|
||||
}
|
||||
|
||||
.history-item:hover {
|
||||
background: var(--surface2);
|
||||
}
|
||||
|
||||
.history-item-tool {
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.history-item-tool.ok { color: var(--green); }
|
||||
.history-item-tool.error { color: var(--red); }
|
||||
|
||||
.history-item-time {
|
||||
font-size: 11px;
|
||||
color: var(--text2);
|
||||
}
|
||||
|
||||
.history-item-params {
|
||||
font-size: 11px;
|
||||
color: var(--text2);
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.history-empty {
|
||||
color: var(--text2);
|
||||
font-size: 13px;
|
||||
text-align: center;
|
||||
padding: 24px 0;
|
||||
}
|
||||
|
||||
.clear-history {
|
||||
background: none;
|
||||
border: none;
|
||||
color: var(--text2);
|
||||
font-size: 11px;
|
||||
cursor: pointer;
|
||||
float: right;
|
||||
padding: 0;
|
||||
}
|
||||
.clear-history:hover { color: var(--red); }
|
||||
|
||||
/* View mode toggle */
|
||||
.view-toggle {
|
||||
display: flex;
|
||||
gap: 4px;
|
||||
background: var(--surface2);
|
||||
border-radius: 6px;
|
||||
padding: 2px;
|
||||
}
|
||||
|
||||
.view-toggle button {
|
||||
background: none;
|
||||
border: none;
|
||||
color: var(--text2);
|
||||
font-size: 12px;
|
||||
padding: 4px 12px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.view-toggle button.active {
|
||||
background: var(--accent-dim);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
/* Scrollbar */
|
||||
::-webkit-scrollbar { width: 8px; height: 8px; }
|
||||
::-webkit-scrollbar-track { background: transparent; }
|
||||
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
|
||||
::-webkit-scrollbar-thumb:hover { background: var(--text2); }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<div style="display:flex;align-items:center;gap:16px;">
|
||||
<h1>Browser Remote Control</h1>
|
||||
<div class="view-toggle">
|
||||
<button class="active" onclick="setView('grid')">Grid</button>
|
||||
<button onclick="setView('single')">Focus</button>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex;align-items:center;gap:12px;">
|
||||
<span id="context-info" style="font-size:12px;color:var(--text2)"></span>
|
||||
<span id="status-badge" class="checking">checking...</span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div class="layout">
|
||||
<nav class="sidebar" id="sidebar"></nav>
|
||||
<main class="main" id="main-content"></main>
|
||||
<aside class="history-panel" id="history-panel">
|
||||
<div class="history-title">
|
||||
History
|
||||
<button class="clear-history" onclick="clearHistory()">clear</button>
|
||||
</div>
|
||||
<div id="history-list">
|
||||
<div class="history-empty">No calls yet</div>
|
||||
</div>
|
||||
</aside>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const API_BASE = window.location.origin;
|
||||
let toolSchemas = {};
|
||||
let history = [];
|
||||
let currentView = 'grid';
|
||||
|
||||
// Tool categories for sidebar grouping
|
||||
const CATEGORIES = {
|
||||
'Lifecycle': ['browser_start', 'browser_stop', 'browser_status'],
|
||||
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_focus'],
|
||||
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
|
||||
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll'],
|
||||
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_console', 'browser_get_text', 'browser_evaluate', 'browser_wait'],
|
||||
'Advanced': ['browser_resize', 'browser_upload', 'browser_dialog'],
|
||||
};
|
||||
|
||||
async function init() {
|
||||
await checkStatus();
|
||||
await loadTools();
|
||||
setInterval(checkStatus, 5000);
|
||||
}
|
||||
|
||||
async function checkStatus() {
|
||||
const badge = document.getElementById('status-badge');
|
||||
const ctx = document.getElementById('context-info');
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/status`);
|
||||
const data = await res.json();
|
||||
if (data.connected) {
|
||||
badge.textContent = 'connected';
|
||||
badge.className = 'connected';
|
||||
if (data.tools_count) {
|
||||
ctx.textContent = `${data.tools_count} tools`;
|
||||
} else if (data.contexts) {
|
||||
const contexts = Object.entries(data.contexts);
|
||||
ctx.textContent = contexts.length > 0
|
||||
? contexts.map(([k,v]) => `${k}: tab ${v.activeTabId}`).join(', ')
|
||||
: 'no active context';
|
||||
} else {
|
||||
ctx.textContent = '';
|
||||
}
|
||||
} else {
|
||||
badge.textContent = 'disconnected';
|
||||
badge.className = 'disconnected';
|
||||
ctx.textContent = '';
|
||||
}
|
||||
} catch {
|
||||
badge.textContent = 'unreachable';
|
||||
badge.className = 'disconnected';
|
||||
ctx.textContent = '';
|
||||
}
|
||||
}
|
||||
|
||||
async function loadTools() {
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/tools`);
|
||||
toolSchemas = await res.json();
|
||||
renderSidebar();
|
||||
renderToolCards();
|
||||
} catch (e) {
|
||||
document.getElementById('main-content').innerHTML =
|
||||
`<div style="color:var(--red);padding:40px;">Failed to load tools: ${e.message}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
function renderSidebar() {
|
||||
const sidebar = document.getElementById('sidebar');
|
||||
let html = '';
|
||||
const categorized = new Set();
|
||||
for (const [group, tools] of Object.entries(CATEGORIES)) {
|
||||
const available = tools.filter(t => toolSchemas[t]);
|
||||
if (available.length === 0) continue;
|
||||
html += `<div class="sidebar-group"><div class="sidebar-group-label">${group}</div>`;
|
||||
for (const tool of available) {
|
||||
categorized.add(tool);
|
||||
const shortName = tool.replace('browser_', '');
|
||||
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
|
||||
}
|
||||
html += '</div>';
|
||||
}
|
||||
// Show any uncategorized tools from the server
|
||||
const other = Object.keys(toolSchemas).filter(t => !categorized.has(t));
|
||||
if (other.length > 0) {
|
||||
html += `<div class="sidebar-group"><div class="sidebar-group-label">Other</div>`;
|
||||
for (const tool of other) {
|
||||
const shortName = tool.replace('browser_', '');
|
||||
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
|
||||
}
|
||||
html += '</div>';
|
||||
}
|
||||
sidebar.innerHTML = html;
|
||||
}
|
||||
|
||||
function renderToolCards() {
|
||||
const main = document.getElementById('main-content');
|
||||
let html = '<div class="tools-grid" id="tools-grid">';
|
||||
for (const [tool, schema] of Object.entries(toolSchemas)) {
|
||||
html += buildToolCard(tool, schema);
|
||||
}
|
||||
html += '</div>';
|
||||
main.innerHTML = html;
|
||||
}
|
||||
|
||||
function buildToolCard(tool, schema) {
|
||||
const shortName = tool.replace('browser_', '');
|
||||
let fieldsHtml = '';
|
||||
for (const [param, spec] of Object.entries(schema.params)) {
|
||||
fieldsHtml += buildField(tool, param, spec);
|
||||
}
|
||||
|
||||
return `
|
||||
<div class="tool-card" id="card-${tool}" data-tool="${tool}">
|
||||
<div class="tool-card-header" onclick="toggleCard('${tool}')">
|
||||
<div>
|
||||
<span class="tool-name">${shortName}</span>
|
||||
<span class="tool-desc">${schema.description}</span>
|
||||
</div>
|
||||
<span class="chevron">▶</span>
|
||||
</div>
|
||||
<div class="tool-card-body">
|
||||
<form id="form-${tool}" onsubmit="runTool(event, '${tool}')">
|
||||
${fieldsHtml}
|
||||
<button class="btn-run" type="submit" id="btn-${tool}">Run</button>
|
||||
</form>
|
||||
<div class="result-area" id="result-${tool}"></div>
|
||||
</div>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
function buildField(tool, param, spec) {
|
||||
const id = `${tool}__${param}`;
|
||||
const required = spec.required ? '<span class="required">*</span>' : '';
|
||||
const typeTag = `<span class="type-tag">${spec.type}</span>`;
|
||||
const defaultVal = spec.default !== undefined ? spec.default : '';
|
||||
|
||||
if (spec.type === 'boolean') {
|
||||
return `
|
||||
<div class="field">
|
||||
<div class="checkbox-row">
|
||||
<input type="checkbox" id="${id}" ${defaultVal ? 'checked' : ''}>
|
||||
<label for="${id}">${param} ${typeTag} ${required}</label>
|
||||
</div>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
if (spec.enum) {
|
||||
const opts = spec.enum.map(v => `<option value="${v}" ${v === defaultVal ? 'selected' : ''}>${v}</option>`).join('');
|
||||
return `
|
||||
<div class="field">
|
||||
<label for="${id}">${param} ${typeTag} ${required}</label>
|
||||
<select id="${id}">${opts}</select>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
if (spec.type === 'array') {
|
||||
return `
|
||||
<div class="field">
|
||||
<label for="${id}">${param} ${typeTag} ${required}
|
||||
<span class="type-tag" style="margin-left:2px">JSON</span>
|
||||
</label>
|
||||
<input type="text" id="${id}" placeholder='["value1", "value2"]'>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
// For expression / text that might be multiline
|
||||
if (param === 'expression' || param === 'text') {
|
||||
return `
|
||||
<div class="field">
|
||||
<label for="${id}">${param} ${typeTag} ${required}</label>
|
||||
<textarea id="${id}" placeholder="${param}">${defaultVal}</textarea>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
const inputType = (spec.type === 'integer' || spec.type === 'number') ? 'number' : 'text';
|
||||
const step = spec.type === 'number' ? ' step="any"' : '';
|
||||
|
||||
return `
|
||||
<div class="field">
|
||||
<label for="${id}">${param} ${typeTag} ${required}</label>
|
||||
<input type="${inputType}" id="${id}"${step} placeholder="${defaultVal !== '' ? defaultVal : param}" value="${defaultVal !== '' && spec.type !== 'string' ? defaultVal : ''}">
|
||||
</div>`;
|
||||
}
|
||||
|
||||
function toggleCard(tool) {
|
||||
const card = document.getElementById(`card-${tool}`);
|
||||
const wasOpen = card.classList.contains('open');
|
||||
if (currentView === 'single') {
|
||||
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
|
||||
}
|
||||
card.classList.toggle('open', !wasOpen);
|
||||
|
||||
// Update sidebar active state
|
||||
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
|
||||
if (!wasOpen) {
|
||||
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
|
||||
if (sideItem) sideItem.classList.add('active');
|
||||
}
|
||||
}
|
||||
|
||||
function scrollToTool(tool) {
|
||||
const card = document.getElementById(`card-${tool}`);
|
||||
if (!card) return;
|
||||
|
||||
// Open it
|
||||
if (!card.classList.contains('open')) {
|
||||
if (currentView === 'single') {
|
||||
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
|
||||
}
|
||||
card.classList.add('open');
|
||||
}
|
||||
|
||||
card.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
|
||||
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
|
||||
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
|
||||
if (sideItem) sideItem.classList.add('active');
|
||||
}
|
||||
|
||||
function collectParams(tool) {
|
||||
const schema = toolSchemas[tool];
|
||||
const params = {};
|
||||
for (const [param, spec] of Object.entries(schema.params)) {
|
||||
const el = document.getElementById(`${tool}__${param}`);
|
||||
if (!el) continue;
|
||||
|
||||
if (spec.type === 'boolean') {
|
||||
params[param] = el.checked;
|
||||
} else if (spec.type === 'array') {
|
||||
const v = el.value.trim();
|
||||
if (v) {
|
||||
try { params[param] = JSON.parse(v); }
|
||||
catch { params[param] = v.split(',').map(s => s.trim()); }
|
||||
}
|
||||
} else if (spec.type === 'integer') {
|
||||
const v = el.value.trim();
|
||||
if (v) params[param] = parseInt(v, 10);
|
||||
} else if (spec.type === 'number') {
|
||||
const v = el.value.trim();
|
||||
if (v) params[param] = parseFloat(v);
|
||||
} else {
|
||||
const v = (el.value || '').trim();
|
||||
if (v) params[param] = v;
|
||||
}
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
async function runTool(event, tool) {
|
||||
event.preventDefault();
|
||||
const btn = document.getElementById(`btn-${tool}`);
|
||||
const resultArea = document.getElementById(`result-${tool}`);
|
||||
|
||||
const params = collectParams(tool);
|
||||
btn.textContent = 'Running...';
|
||||
btn.classList.add('running');
|
||||
btn.disabled = true;
|
||||
|
||||
const startTime = Date.now();
|
||||
let result;
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/${tool}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(params),
|
||||
});
|
||||
result = await res.json();
|
||||
} catch (e) {
|
||||
result = { ok: false, error: e.message };
|
||||
}
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
btn.textContent = 'Run';
|
||||
btn.classList.remove('running');
|
||||
btn.disabled = false;
|
||||
|
||||
// Render result
|
||||
const isOk = result.ok !== false;
|
||||
const statusClass = isOk ? 'ok' : 'error';
|
||||
const statusText = isOk ? 'OK' : 'ERROR';
|
||||
const duration = result._duration_ms ? `${result._duration_ms}ms` : `${elapsed}ms`;
|
||||
|
||||
let bodyHtml = '';
|
||||
|
||||
// Special handling for screenshot — show the image
|
||||
if (tool === 'browser_screenshot' && result.data) {
|
||||
bodyHtml = `<img class="result-screenshot" src="data:image/png;base64,${result.data}">`;
|
||||
// Don't show the raw base64 in JSON
|
||||
const display = { ...result };
|
||||
display.data = `[${result.data.length} chars base64]`;
|
||||
bodyHtml += `<pre class="result-json">${JSON.stringify(display, null, 2)}</pre>`;
|
||||
} else {
|
||||
bodyHtml = `<pre class="result-json">${JSON.stringify(result, null, 2)}</pre>`;
|
||||
}
|
||||
|
||||
resultArea.innerHTML = `
|
||||
<div class="result-header">
|
||||
<span class="result-status ${statusClass}">${statusText}</span>
|
||||
<span class="result-duration">${duration}</span>
|
||||
</div>
|
||||
${bodyHtml}`;
|
||||
resultArea.classList.add('visible');
|
||||
|
||||
// Add to history
|
||||
addHistory(tool, params, result, duration);
|
||||
}
|
||||
|
||||
function addHistory(tool, params, result, duration) {
|
||||
const entry = {
|
||||
tool,
|
||||
params,
|
||||
result,
|
||||
duration,
|
||||
time: new Date().toLocaleTimeString(),
|
||||
ok: result.ok !== false,
|
||||
};
|
||||
history.unshift(entry);
|
||||
if (history.length > 50) history.pop();
|
||||
renderHistory();
|
||||
}
|
||||
|
||||
function renderHistory() {
|
||||
const list = document.getElementById('history-list');
|
||||
if (history.length === 0) {
|
||||
list.innerHTML = '<div class="history-empty">No calls yet</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = history.map((h, i) => {
|
||||
const shortName = h.tool.replace('browser_', '');
|
||||
const paramsStr = JSON.stringify(h.params);
|
||||
const statusCls = h.ok ? 'ok' : 'error';
|
||||
return `
|
||||
<div class="history-item" onclick="replayHistory(${i})" title="Click to load params">
|
||||
<div style="display:flex;justify-content:space-between;align-items:center;">
|
||||
<span class="history-item-tool ${statusCls}">${shortName}</span>
|
||||
<span class="history-item-time">${h.time} (${h.duration})</span>
|
||||
</div>
|
||||
<div class="history-item-params">${paramsStr}</div>
|
||||
</div>`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function replayHistory(idx) {
|
||||
const h = history[idx];
|
||||
const tool = h.tool;
|
||||
|
||||
// Open the card and scroll to it
|
||||
scrollToTool(tool);
|
||||
|
||||
// Fill the form with saved params
|
||||
const schema = toolSchemas[tool];
|
||||
for (const [param, spec] of Object.entries(schema.params)) {
|
||||
const el = document.getElementById(`${tool}__${param}`);
|
||||
if (!el) continue;
|
||||
const val = h.params[param];
|
||||
if (val === undefined) continue;
|
||||
|
||||
if (spec.type === 'boolean') {
|
||||
el.checked = !!val;
|
||||
} else if (spec.type === 'array') {
|
||||
el.value = JSON.stringify(val);
|
||||
} else {
|
||||
el.value = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function clearHistory() {
|
||||
history = [];
|
||||
renderHistory();
|
||||
}
|
||||
|
||||
function setView(mode) {
|
||||
currentView = mode;
|
||||
document.querySelectorAll('.view-toggle button').forEach(b => b.classList.remove('active'));
|
||||
event.target.classList.add('active');
|
||||
const grid = document.getElementById('tools-grid');
|
||||
if (mode === 'single') {
|
||||
grid.style.gridTemplateColumns = '1fr';
|
||||
} else {
|
||||
grid.style.gridTemplateColumns = 'repeat(auto-fill, minmax(420px, 1fr))';
|
||||
}
|
||||
}
|
||||
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,132 @@
|
||||
---
|
||||
name: linkedin-connection-greeter
|
||||
description: Automates accepting LinkedIn connections and sending a welcome message about the HoneyComb prediction market. Handles shadow DOM and Lexical editors.
|
||||
---
|
||||
|
||||
# LinkedIn Connection Greeter
|
||||
|
||||
This skill outlines the exact flow to accept connection requests and send a specific welcome message without triggering spam filters.
|
||||
|
||||
## 1. Load Ledger
|
||||
Before starting, read `data/linkedin_contacts.json`. If it doesn't exist, initialize with `{"contacts": []}`. You will use this to skip people you've already messaged.
|
||||
|
||||
## 2. Scan Pending Connections
|
||||
Navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/`. Wait until load + sleep 4s.
|
||||
Strip unload handlers:
|
||||
`browser_evaluate("(function(){window.onbeforeunload=null;})()")`
|
||||
|
||||
Extract cards using this specific snippet (handles changing classes and follow invites):
|
||||
```javascript
|
||||
(function(){
|
||||
const btns = Array.from(document.querySelectorAll('button')).filter(b => b.textContent.includes('Accept'));
|
||||
let results = [];
|
||||
for (let b of btns) {
|
||||
let card = b.closest('[role="listitem"]');
|
||||
if (!card) continue;
|
||||
let text = card.textContent.toLowerCase();
|
||||
if (text.includes('invited you to follow') || text.includes('invited you to subscribe')) continue;
|
||||
|
||||
let nameEls = Array.from(card.querySelectorAll('a[href*="/in/"]'));
|
||||
let nameEl = nameEls.find(el => el.textContent.trim().length > 0);
|
||||
|
||||
let r = b.getBoundingClientRect();
|
||||
results.push({
|
||||
first_name: nameEl ? nameEl.textContent.trim().split(/\s+/)[0] : 'there',
|
||||
profile_url: nameEl ? nameEl.href : '',
|
||||
cx: r.x + r.width/2,
|
||||
cy: r.y + r.height/2
|
||||
});
|
||||
}
|
||||
return results;
|
||||
})();
|
||||
```
|
||||
|
||||
## 3. Process Each Card (Max 10 per run)
|
||||
For each card, check if `profile_url` is already in the ledger. If not:
|
||||
1. `browser_click_coordinate(cx, cy)` to click the specific Accept button.
|
||||
2. `sleep(2)`
|
||||
3. `browser_navigate(profile_url, wait_until="load")`
|
||||
4. `sleep(4)`
|
||||
5. `browser_evaluate("(function(){window.onbeforeunload=null; window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);})()")`
|
||||
|
||||
## 4. Message the User
|
||||
Click Message Button on their profile:
|
||||
```javascript
|
||||
(function(){
|
||||
const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
|
||||
for (const a of links){
|
||||
if (!a.href.includes('NON_SELF_PROFILE_VIEW') || a.href.includes('body=')) continue;
|
||||
const r = a.getBoundingClientRect();
|
||||
if (r.width === 0 || r.x > 700) continue;
|
||||
return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, then `sleep(2.5)`.
|
||||
|
||||
Find Textarea (it is hidden inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight, vw = window.innerWidth;
|
||||
const candidates = [];
|
||||
function walk(root){
|
||||
const els = root.querySelectorAll ? root.querySelectorAll('div.msg-form__contenteditable') : [];
|
||||
for (const el of els){
|
||||
const r = el.getBoundingClientRect();
|
||||
if (r.width > 0 && r.height > 0 && r.y >= 0 && r.y + r.height <= vh && r.x >= 0 && r.x + r.width <= vw) {
|
||||
candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
|
||||
}
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
|
||||
}
|
||||
walk(document);
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates.length ? candidates[0] : null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, `sleep(1)`.
|
||||
|
||||
Inject text and Send:
|
||||
Construct the message: `Hey {first_name}, thanks for the connection invite! I'm currently building a prediction market for jobs: https://honeycomb.open-hive.com/. If you could check it out and share some feedback, I'd really appreciate it.`
|
||||
|
||||
Escape the string properly for JS injection, then run:
|
||||
```javascript
|
||||
// Replace MSG_TEXT with your actual string
|
||||
browser_evaluate("(function(){ document.execCommand('insertText', false, `MSG_TEXT`); return true; })()")
|
||||
```
|
||||
|
||||
Find Send button (also inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight;
|
||||
function walk(root){
|
||||
const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
|
||||
for (const b of btns){
|
||||
const cls = (b.className || '').toString();
|
||||
if (!cls.includes('send-button') && b.textContent.trim() !== 'Send') continue;
|
||||
const r = b.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.y + r.height > vh) continue;
|
||||
return { cx: r.x + r.width/2, cy: r.y + r.height/2, disabled: b.disabled || b.getAttribute('aria-disabled') === 'true' };
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) { const got = walk(host.shadowRoot); if (got) return got; } }
|
||||
return null;
|
||||
}
|
||||
return walk(document);
|
||||
})();
|
||||
```
|
||||
Click send coordinate, `sleep(2)`.
|
||||
|
||||
## 5. Update Ledger
|
||||
Append the user to `data/linkedin_contacts.json`.
|
||||
```json
|
||||
{
|
||||
"profile_url": "...",
|
||||
"name": "...",
|
||||
"action": "connection_accepted+message_sent",
|
||||
"timestamp": "2026-..."
|
||||
}
|
||||
```
|
||||
`sleep(5)` before moving to the next card to mimic human pacing.
|
||||
+7
-121
@@ -82,29 +82,10 @@ def _find_project_root() -> str:
|
||||
return os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
# When ``--write-root`` is passed on the CLI, ``WRITE_ROOT`` diverges
|
||||
# from ``PROJECT_ROOT``: reads stay permissive (so the queen can
|
||||
# reference framework skills, docs, and the hive repo), but writes
|
||||
# are confined to the write root plus the ``~/.hive/`` escape hatch.
|
||||
# Without this split, the coder-tools sandbox IS the hive git
|
||||
# checkout — every queen-authored skill/ledger/script lands there as
|
||||
# untracked debris, which was the 2026-04-15 incident
|
||||
# (``~/aden/hive/x-rapid-reply/`` and siblings).
|
||||
WRITE_ROOT: str = ""
|
||||
def _resolve_path(path: str) -> str:
|
||||
"""Resolve path relative to PROJECT_ROOT. Raises ValueError if outside.
|
||||
|
||||
|
||||
def _resolve_read_path(path: str) -> str:
|
||||
"""Resolve path for READ operations.
|
||||
|
||||
Allowlist (in order):
|
||||
1. Paths under ``~/.hive/`` — agent session data, colonies, skills.
|
||||
2. Paths under ``PROJECT_ROOT`` — hive repo, for reading framework
|
||||
defaults, docs, examples, etc.
|
||||
3. Relative paths — joined against ``PROJECT_ROOT`` (read-side
|
||||
default; writes use ``WRITE_ROOT`` instead).
|
||||
|
||||
Raises ``ValueError`` when the resolved path falls outside all
|
||||
allowed roots.
|
||||
Also allows access to ~/.hive/ directory for agent session data files.
|
||||
"""
|
||||
# Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM)
|
||||
path = path.replace("/", os.sep)
|
||||
@@ -172,88 +153,6 @@ def _resolve_read_path(path: str) -> str:
|
||||
return resolved
|
||||
|
||||
|
||||
def _resolve_write_path(path: str) -> str:
|
||||
"""Resolve path for WRITE operations.
|
||||
|
||||
Stricter than the read resolver: only allows writes under:
|
||||
1. ``WRITE_ROOT`` — the agent workspace (default: ``~/.hive/workspace/``
|
||||
when ``--write-root`` is passed).
|
||||
2. ``~/.hive/`` — agent session data.
|
||||
|
||||
Writes to the hive repo (``PROJECT_ROOT``) are REJECTED to keep
|
||||
the git checkout clean of queen-authored debris. Relative paths
|
||||
resolve against ``WRITE_ROOT``, not ``PROJECT_ROOT``.
|
||||
|
||||
When ``WRITE_ROOT`` equals ``PROJECT_ROOT`` (no split configured),
|
||||
this function is semantically identical to ``_resolve_read_path``.
|
||||
"""
|
||||
# Normalize slashes + expand ~
|
||||
path = path.replace("/", os.sep)
|
||||
if path.startswith("~"):
|
||||
path = os.path.expanduser(path)
|
||||
|
||||
hive_dir = os.path.expanduser("~/.hive")
|
||||
|
||||
if os.path.isabs(path):
|
||||
resolved = os.path.abspath(path)
|
||||
|
||||
# Always allow writes under ~/.hive/
|
||||
try:
|
||||
if os.path.commonpath([resolved, hive_dir]) == hive_dir:
|
||||
return resolved
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Writes are ALSO allowed under WRITE_ROOT (the agent workspace).
|
||||
try:
|
||||
if os.path.commonpath([resolved, WRITE_ROOT]) == WRITE_ROOT:
|
||||
return resolved
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# If WRITE_ROOT == PROJECT_ROOT (legacy behavior: no split),
|
||||
# fall through to the read-side resolver so existing callers
|
||||
# keep working unchanged.
|
||||
if WRITE_ROOT == PROJECT_ROOT:
|
||||
return _resolve_read_path(path)
|
||||
|
||||
# Split configured AND the path isn't under WRITE_ROOT or
|
||||
# ~/.hive/. Reject — this is the whole point of the split.
|
||||
raise ValueError(
|
||||
f"Access denied: writes must be under '{WRITE_ROOT}' or "
|
||||
f"'{hive_dir}'. Path '{path}' is outside both "
|
||||
"(use an absolute path under one of those roots, or a "
|
||||
"relative path which will resolve under the write root)."
|
||||
)
|
||||
else:
|
||||
# Relative path: resolve against WRITE_ROOT, not PROJECT_ROOT.
|
||||
resolved = os.path.abspath(os.path.join(WRITE_ROOT, path))
|
||||
|
||||
# Double-check the resolved absolute path is inside WRITE_ROOT or
|
||||
# ~/.hive/ (covers edge cases like "../../etc/passwd" that escape).
|
||||
try:
|
||||
wr_common = os.path.commonpath([resolved, WRITE_ROOT])
|
||||
except ValueError:
|
||||
wr_common = ""
|
||||
try:
|
||||
hv_common = os.path.commonpath([resolved, hive_dir])
|
||||
except ValueError:
|
||||
hv_common = ""
|
||||
if wr_common != WRITE_ROOT and hv_common != hive_dir:
|
||||
raise ValueError(
|
||||
f"Access denied: resolved write path '{resolved}' escaped the "
|
||||
f"allowed roots ('{WRITE_ROOT}', '{hive_dir}')."
|
||||
)
|
||||
return resolved
|
||||
|
||||
|
||||
# Back-compat alias: existing call sites in this module call
|
||||
# ``_resolve_path`` directly (e.g. for snapshot dirs, agent tool
|
||||
# introspection). Those are all non-user-driven paths; route them
|
||||
# through the read resolver.
|
||||
_resolve_path = _resolve_read_path
|
||||
|
||||
|
||||
# ── Git snapshot system (ported from opencode's shadow git) ───────────────
|
||||
|
||||
|
||||
@@ -1738,45 +1637,32 @@ def validate_agent_package(agent_name: str) -> str:
|
||||
|
||||
|
||||
def main() -> None:
|
||||
global PROJECT_ROOT, SNAPSHOT_DIR, WRITE_ROOT
|
||||
global PROJECT_ROOT, SNAPSHOT_DIR
|
||||
|
||||
from aden_tools.file_ops import register_file_tools
|
||||
|
||||
parser = argparse.ArgumentParser(description="Coder Tools MCP Server")
|
||||
parser.add_argument("--project-root", default="")
|
||||
# ``--write-root`` isolates file writes from the project root so
|
||||
# queen-authored skills, ledgers, and scripts don't land in the
|
||||
# hive git checkout. Reads remain permissive under PROJECT_ROOT
|
||||
# so framework skills, docs, and examples stay accessible.
|
||||
# Defaults to PROJECT_ROOT when empty (legacy behavior).
|
||||
parser.add_argument("--write-root", default="")
|
||||
parser.add_argument("--port", type=int, default=int(os.getenv("CODER_TOOLS_PORT", "4002")))
|
||||
parser.add_argument("--host", default="0.0.0.0")
|
||||
parser.add_argument("--stdio", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
PROJECT_ROOT = os.path.abspath(args.project_root) if args.project_root else _find_project_root()
|
||||
if args.write_root:
|
||||
WRITE_ROOT = os.path.abspath(os.path.expanduser(args.write_root))
|
||||
os.makedirs(WRITE_ROOT, exist_ok=True)
|
||||
else:
|
||||
WRITE_ROOT = PROJECT_ROOT # legacy: no split
|
||||
SNAPSHOT_DIR = os.path.join(
|
||||
os.path.expanduser("~"),
|
||||
".hive",
|
||||
"snapshots",
|
||||
os.path.basename(PROJECT_ROOT),
|
||||
)
|
||||
logger.info(f"Project root (reads): {PROJECT_ROOT}")
|
||||
logger.info(f"Write root (writes): {WRITE_ROOT}")
|
||||
logger.info(f"Project root: {PROJECT_ROOT}")
|
||||
logger.info(f"Snapshot dir: {SNAPSHOT_DIR}")
|
||||
|
||||
register_file_tools(
|
||||
mcp,
|
||||
resolve_path=_resolve_read_path,
|
||||
resolve_path_write=_resolve_write_path,
|
||||
resolve_path=_resolve_path,
|
||||
before_write=None, # Git snapshot causes stdio deadlock on Windows; undo_changes limited
|
||||
project_root=WRITE_ROOT,
|
||||
project_root=PROJECT_ROOT,
|
||||
)
|
||||
|
||||
if args.stdio:
|
||||
|
||||
@@ -328,7 +328,6 @@ def register_file_tools(
|
||||
mcp: FastMCP,
|
||||
*,
|
||||
resolve_path: Callable[[str], str] | None = None,
|
||||
resolve_path_write: Callable[[str], str] | None = None,
|
||||
before_write: Callable[[], None] | None = None,
|
||||
project_root: str | None = None,
|
||||
) -> None:
|
||||
@@ -336,18 +335,12 @@ def register_file_tools(
|
||||
|
||||
Args:
|
||||
mcp: FastMCP instance to register tools on.
|
||||
resolve_path: Path resolver for READ operations. Default:
|
||||
resolve to absolute path. Raise ValueError to reject paths
|
||||
(e.g. outside sandbox).
|
||||
resolve_path_write: Path resolver for WRITE/EDIT operations.
|
||||
Defaults to ``resolve_path`` when not provided. Split
|
||||
resolvers let callers keep reads permissive (framework
|
||||
skills, docs) while confining writes to an agent workspace.
|
||||
resolve_path: Path resolver. Default: resolve to absolute path.
|
||||
Raise ValueError to reject paths (e.g. outside sandbox).
|
||||
before_write: Hook called before write/edit operations (e.g. git snapshot).
|
||||
project_root: If set, search_files relativizes output paths to this root.
|
||||
"""
|
||||
_resolve = resolve_path or _default_resolve_path
|
||||
_resolve_write = resolve_path_write or _resolve
|
||||
|
||||
@mcp.tool()
|
||||
def read_file(path: str, offset: int = 1, limit: int = 0, hashline: bool = False) -> str:
|
||||
@@ -447,7 +440,7 @@ def register_file_tools(
|
||||
path: Absolute file path to write.
|
||||
content: Complete file content to write.
|
||||
"""
|
||||
resolved = _resolve_write(path)
|
||||
resolved = _resolve(path)
|
||||
resolved_path = Path(resolved)
|
||||
|
||||
# Stale-edit guard: an existing file must have been read recently
|
||||
@@ -516,7 +509,7 @@ def register_file_tools(
|
||||
new_text: Replacement text.
|
||||
replace_all: Replace all occurrences (default: first only).
|
||||
"""
|
||||
resolved = _resolve_write(path)
|
||||
resolved = _resolve(path)
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
@@ -822,7 +815,7 @@ def register_file_tools(
|
||||
return "Error: Too many edits in one call (max 100). Split into multiple calls."
|
||||
|
||||
# 2. Read file
|
||||
resolved = _resolve_write(path)
|
||||
resolved = _resolve(path)
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
|
||||
@@ -45,8 +45,8 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
- Tabs: browser_tabs, browser_open, browser_close, browser_focus
|
||||
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
|
||||
- Inspection: browser_screenshot, browser_snapshot, browser_console
|
||||
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_fill,
|
||||
browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_type_focused,
|
||||
browser_fill, browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
- Advanced: browser_wait, browser_evaluate, browser_get_text, browser_get_attribute,
|
||||
browser_resize, browser_upload, browser_dialog
|
||||
"""
|
||||
|
||||
+195
-87
@@ -80,6 +80,61 @@ async def _adaptive_poll_sleep(elapsed_s: float) -> None:
|
||||
_interaction_highlights: dict[int, dict] = {}
|
||||
|
||||
|
||||
# Compact descriptor of the focused element. Returned by both click()
|
||||
# and click_coordinate() so the agent can verify it focused what it
|
||||
# intended. When the outer document's activeElement is an <iframe>,
|
||||
# we recurse into the iframe's document (same-origin only) so the
|
||||
# response describes the real inner element — otherwise the agent
|
||||
# always sees {tag: "iframe"} and can't tell whether it hit the
|
||||
# composer or something else inside the frame (e.g. a sidebar item in
|
||||
# LinkedIn's #interop-outlet messaging overlay).
|
||||
_FOCUSED_ELEMENT_JS = """
|
||||
(function() {
|
||||
function describe(el) {
|
||||
var rect = el.getBoundingClientRect();
|
||||
var attrs = {};
|
||||
for (var i = 0; i < el.attributes.length && i < 10; i++) {
|
||||
attrs[el.attributes[i].name] = el.attributes[i].value.substring(0, 200);
|
||||
}
|
||||
return {
|
||||
tag: el.tagName.toLowerCase(),
|
||||
id: el.id || null,
|
||||
className: el.className || null,
|
||||
name: el.getAttribute('name') || null,
|
||||
type: el.getAttribute('type') || null,
|
||||
role: el.getAttribute('role') || null,
|
||||
contenteditable: el.getAttribute('contenteditable') || null,
|
||||
text: (el.innerText || '').substring(0, 200),
|
||||
value: (el.value !== undefined ? String(el.value).substring(0, 200) : null),
|
||||
attributes: attrs,
|
||||
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
|
||||
};
|
||||
}
|
||||
var el = document.activeElement;
|
||||
if (!el || el === document.body) return null;
|
||||
// Descend into same-origin iframes. Capped at 5 levels of nesting
|
||||
// to bound cost and prevent a pathological loop. If a frame is
|
||||
// cross-origin, contentDocument throws; catch and report the
|
||||
// outermost iframe instead.
|
||||
var framePath = [];
|
||||
var depth = 0;
|
||||
while (el && (el.tagName === 'IFRAME' || el.tagName === 'FRAME') && depth < 5) {
|
||||
framePath.push(el.id || el.getAttribute('data-testid') || el.tagName.toLowerCase());
|
||||
var innerDoc = null;
|
||||
try { innerDoc = el.contentDocument; } catch (e) { innerDoc = null; }
|
||||
if (!innerDoc) break;
|
||||
var innerActive = innerDoc.activeElement;
|
||||
if (!innerActive || innerActive === innerDoc.body) break;
|
||||
el = innerActive;
|
||||
depth++;
|
||||
}
|
||||
var out = describe(el);
|
||||
if (framePath.length) out.inFrame = framePath;
|
||||
return out;
|
||||
})()
|
||||
"""
|
||||
|
||||
|
||||
def _get_active_profile() -> str:
|
||||
"""Get the current active profile from context variable."""
|
||||
try:
|
||||
@@ -763,7 +818,8 @@ class BeelineBridge:
|
||||
rx = value.get("x", 0) - value.get("width", 0) / 2
|
||||
ry = value.get("y", 0) - value.get("height", 0) / 2
|
||||
await self.highlight_rect(tab_id, rx, ry, value.get("width", 0), value.get("height", 0), label=selector)
|
||||
return {
|
||||
focused_info = await self._read_focused_element(tab_id)
|
||||
resp = {
|
||||
"ok": True,
|
||||
"action": "click",
|
||||
"selector": selector,
|
||||
@@ -771,6 +827,9 @@ class BeelineBridge:
|
||||
"y": value.get("y", 0),
|
||||
"method": "javascript",
|
||||
}
|
||||
if focused_info:
|
||||
resp["focused_element"] = focused_info
|
||||
return resp
|
||||
|
||||
# If JavaScript click failed, try CDP approach
|
||||
if isinstance(value, dict) and value.get("error"):
|
||||
@@ -883,7 +942,8 @@ class BeelineBridge:
|
||||
w = bounds_value.get("width", 0)
|
||||
h = bounds_value.get("height", 0)
|
||||
await self.highlight_rect(tab_id, x - w / 2, y - h / 2, w, h, label=selector)
|
||||
return {
|
||||
focused_info = await self._read_focused_element(tab_id)
|
||||
resp = {
|
||||
"ok": True,
|
||||
"action": "click",
|
||||
"selector": selector,
|
||||
@@ -891,10 +951,29 @@ class BeelineBridge:
|
||||
"y": y,
|
||||
"method": "cdp",
|
||||
}
|
||||
if focused_info:
|
||||
resp["focused_element"] = focused_info
|
||||
return resp
|
||||
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": f"Click failed: {e}"}
|
||||
|
||||
async def _read_focused_element(self, tab_id: int) -> dict | None:
|
||||
"""Read document.activeElement and return a compact descriptor.
|
||||
|
||||
Returns None on any failure — never raises. Used by both click
|
||||
paths (selector-based click() and click_coordinate()) so the
|
||||
agent gets the same response shape regardless of which one was
|
||||
called. The descriptor lets the agent answer "did my click land
|
||||
on an editable?" without a second round-trip.
|
||||
"""
|
||||
try:
|
||||
await self._try_enable_domain(tab_id, "Runtime")
|
||||
result = await self.evaluate(tab_id, _FOCUSED_ELEMENT_JS)
|
||||
return (result or {}).get("result")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def click_coordinate(self, tab_id: int, x: float, y: float, button: str = "left") -> dict:
|
||||
"""Click at specific coordinates."""
|
||||
await self.cdp_attach(tab_id)
|
||||
@@ -904,18 +983,11 @@ class BeelineBridge:
|
||||
button_map = {"left": "left", "right": "right", "middle": "middle"}
|
||||
cdp_button = button_map.get(button, "left")
|
||||
|
||||
from .tools.inspection import _screenshot_css_scales, _screenshot_scales
|
||||
|
||||
phys_scale = _screenshot_scales.get(tab_id, "unset")
|
||||
css_scale = _screenshot_css_scales.get(tab_id, "unset")
|
||||
logger.info(
|
||||
"click_coordinate tab=%d: x=%.1f, y=%.1f → CDP Input.dispatchMouseEvent. "
|
||||
"stored_scales: physicalScale=%s, cssScale=%s",
|
||||
"click_coordinate tab=%d: x=%.1f, y=%.1f → CDP Input.dispatchMouseEvent",
|
||||
tab_id,
|
||||
x,
|
||||
y,
|
||||
phys_scale,
|
||||
css_scale,
|
||||
)
|
||||
|
||||
await self._cdp(
|
||||
@@ -930,15 +1002,20 @@ class BeelineBridge:
|
||||
)
|
||||
|
||||
await self.highlight_point(tab_id, x, y, label=f"click ({x},{y})")
|
||||
return {"ok": True, "action": "click_coordinate", "x": x, "y": y}
|
||||
|
||||
focused_info = await self._read_focused_element(tab_id)
|
||||
resp = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
|
||||
if focused_info:
|
||||
resp["focused_element"] = focused_info
|
||||
return resp
|
||||
|
||||
async def type_text(
|
||||
self,
|
||||
tab_id: int,
|
||||
selector: str,
|
||||
selector: str | None,
|
||||
text: str,
|
||||
clear_first: bool = True,
|
||||
delay_ms: int = 0,
|
||||
delay_ms: int = 1,
|
||||
timeout_ms: int = 30000,
|
||||
use_insert_text: bool = True,
|
||||
) -> dict:
|
||||
@@ -974,79 +1051,98 @@ class BeelineBridge:
|
||||
await self._try_enable_domain(tab_id, "Input")
|
||||
await self._try_enable_domain(tab_id, "Runtime")
|
||||
|
||||
# Find + scroll + (optionally) clear via JS. We still need the
|
||||
# rect, and clearing via `.value = ''` / `.textContent = ''`
|
||||
# is the most reliable way to reset pre-existing content.
|
||||
focus_script = f"""
|
||||
(function() {{
|
||||
const el = document.querySelector({json.dumps(selector)});
|
||||
if (!el) return null;
|
||||
if selector is not None:
|
||||
# Find + scroll + (optionally) clear via JS. We still need the
|
||||
# rect, and clearing via `.value = ''` / `.textContent = ''`
|
||||
# is the most reliable way to reset pre-existing content.
|
||||
focus_script = f"""
|
||||
(function() {{
|
||||
const el = document.querySelector({json.dumps(selector)});
|
||||
if (!el) return null;
|
||||
|
||||
// Scroll into view so the click lands in-viewport.
|
||||
el.scrollIntoView({{ block: 'center' }});
|
||||
// Scroll into view so the click lands in-viewport.
|
||||
el.scrollIntoView({{ block: 'center' }});
|
||||
|
||||
// Clear if requested.
|
||||
if ({str(clear_first).lower()}) {{
|
||||
if (el.value !== undefined) {{
|
||||
el.value = '';
|
||||
// Nudge React's onChange — the framework reads
|
||||
// .value via a setter hook, and without firing
|
||||
// an input event the component state remains
|
||||
// stale after our value assignment.
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
}} else if (el.isContentEditable) {{
|
||||
el.textContent = '';
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
// Clear if requested.
|
||||
if ({str(clear_first).lower()}) {{
|
||||
if (el.value !== undefined) {{
|
||||
el.value = '';
|
||||
// Nudge React's onChange — the framework reads
|
||||
// .value via a setter hook, and without firing
|
||||
// an input event the component state remains
|
||||
// stale after our value assignment.
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
}} else if (el.isContentEditable) {{
|
||||
el.textContent = '';
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
|
||||
const r = el.getBoundingClientRect();
|
||||
return {{
|
||||
x: r.left + r.width / 2,
|
||||
y: r.top + r.height / 2,
|
||||
w: r.width,
|
||||
h: r.height,
|
||||
}};
|
||||
}})();
|
||||
"""
|
||||
const r = el.getBoundingClientRect();
|
||||
return {{
|
||||
x: r.left + r.width / 2,
|
||||
y: r.top + r.height / 2,
|
||||
w: r.width,
|
||||
h: r.height,
|
||||
}};
|
||||
}})();
|
||||
"""
|
||||
|
||||
focus_result = await self.evaluate(tab_id, focus_script)
|
||||
rect = (focus_result or {}).get("result")
|
||||
|
||||
if not rect:
|
||||
# Element not found — wait + retry until timeout.
|
||||
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
|
||||
while asyncio.get_event_loop().time() < deadline:
|
||||
result = await self.evaluate(tab_id, focus_script)
|
||||
rect = (result or {}).get("result") if result else None
|
||||
if rect:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
focus_result = await self.evaluate(tab_id, focus_script)
|
||||
rect = (focus_result or {}).get("result")
|
||||
|
||||
if not rect:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
# Element not found — wait + retry until timeout.
|
||||
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
|
||||
while asyncio.get_event_loop().time() < deadline:
|
||||
result = await self.evaluate(tab_id, focus_script)
|
||||
rect = (result or {}).get("result") if result else None
|
||||
if rect:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
if not rect.get("w") or not rect.get("h"):
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Element has zero dimensions, can't click to focus: {selector}",
|
||||
}
|
||||
if not rect:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
|
||||
# Fire a real CDP pointer click at the element's center. This is
|
||||
# what unblocks rich-text editors — JS el.focus() is not enough.
|
||||
click_x = rect["x"]
|
||||
click_y = rect["y"]
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
|
||||
if not rect.get("w") or not rect.get("h"):
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Element has zero dimensions, can't click to focus: {selector}",
|
||||
}
|
||||
|
||||
# Fire a real CDP pointer click at the element's center. This is
|
||||
# what unblocks rich-text editors — JS el.focus() is not enough.
|
||||
click_x = rect["x"]
|
||||
click_y = rect["y"]
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
|
||||
else:
|
||||
# No selector — assume the caller already focused the target
|
||||
# element (e.g. via browser_click_coordinate). Just clear the
|
||||
# active element if requested, then insert text directly.
|
||||
if clear_first:
|
||||
await self.evaluate(tab_id, """
|
||||
(function() {
|
||||
const el = document.activeElement;
|
||||
if (!el) return;
|
||||
if (el.value !== undefined) {
|
||||
el.value = '';
|
||||
el.dispatchEvent(new Event('input', {bubbles: true}));
|
||||
} else if (el.isContentEditable) {
|
||||
el.textContent = '';
|
||||
el.dispatchEvent(new Event('input', {bubbles: true}));
|
||||
}
|
||||
})();
|
||||
""")
|
||||
|
||||
if use_insert_text and delay_ms <= 0:
|
||||
# CDP Input.insertText is the most reliable way to insert
|
||||
@@ -1086,16 +1182,28 @@ class BeelineBridge:
|
||||
await asyncio.sleep(delay_ms / 1000)
|
||||
|
||||
# Highlight the element that was typed into
|
||||
rect_result = await self.evaluate(
|
||||
tab_id,
|
||||
f"(function(){{const el=document.querySelector("
|
||||
f"{json.dumps(selector)});if(!el)return null;"
|
||||
f"const r=el.getBoundingClientRect();"
|
||||
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
|
||||
)
|
||||
rect = (rect_result or {}).get("result")
|
||||
if rect:
|
||||
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
|
||||
if selector is not None:
|
||||
rect_result = await self.evaluate(
|
||||
tab_id,
|
||||
f"(function(){{const el=document.querySelector("
|
||||
f"{json.dumps(selector)});if(!el)return null;"
|
||||
f"const r=el.getBoundingClientRect();"
|
||||
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
|
||||
)
|
||||
rect = (rect_result or {}).get("result")
|
||||
if rect:
|
||||
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
|
||||
else:
|
||||
# Highlight the active element when no selector was provided
|
||||
rect_result = await self.evaluate(
|
||||
tab_id,
|
||||
"(function(){const el=document.activeElement;if(!el)return null;"
|
||||
"const r=el.getBoundingClientRect();"
|
||||
"return{x:r.left,y:r.top,w:r.width,h:r.height};})()",
|
||||
)
|
||||
rect = (rect_result or {}).get("result")
|
||||
if rect:
|
||||
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label="active element")
|
||||
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
|
||||
|
||||
# CDP Input.dispatchKeyEvent modifiers bitmask.
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
"""Tool schemas for the bridge remote HTTP API (port 9230)."""
|
||||
|
||||
TOOL_SCHEMAS: dict[str, dict] = {
|
||||
"browser_click": {
|
||||
"description": "Click an element on the page.",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"button": {"type": "string", "default": "left", "enum": ["left", "right", "middle"]},
|
||||
"double_click": {"type": "boolean", "default": False},
|
||||
"timeout_ms": {"type": "integer", "default": 5000},
|
||||
},
|
||||
},
|
||||
"browser_click_coordinate": {
|
||||
"description": "Click at specific viewport coordinates (CSS pixels).",
|
||||
"params": {
|
||||
"x": {"type": "number", "required": True},
|
||||
"y": {"type": "number", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"button": {"type": "string", "default": "left"},
|
||||
},
|
||||
},
|
||||
"browser_type": {
|
||||
"description": "Type text into an input element.",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"text": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"delay_ms": {"type": "integer", "default": 1},
|
||||
"clear_first": {"type": "boolean", "default": True},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
"use_insert_text": {"type": "boolean", "default": True},
|
||||
},
|
||||
},
|
||||
"browser_fill": {
|
||||
"description": "Fill an input element (clears existing content first).",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"value": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
},
|
||||
},
|
||||
"browser_type_focused": {
|
||||
"description": "Type text into the already-focused element. Use after browser_click_coordinate has focused the target. Faster than browser_press for multi-character input.",
|
||||
"params": {
|
||||
"text": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"delay_ms": {"type": "integer", "default": 1},
|
||||
"clear_first": {"type": "boolean", "default": True},
|
||||
"use_insert_text": {"type": "boolean", "default": True},
|
||||
},
|
||||
},
|
||||
"browser_press": {
|
||||
"description": "Press a keyboard key, optionally with modifiers.",
|
||||
"params": {
|
||||
"key": {"type": "string", "required": True},
|
||||
"selector": {"type": "string"},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"modifiers": {"type": "array", "items": "string"},
|
||||
},
|
||||
},
|
||||
"browser_press_at": {
|
||||
"description": "Move mouse to coordinates then press a key.",
|
||||
"params": {
|
||||
"x": {"type": "number", "required": True},
|
||||
"y": {"type": "number", "required": True},
|
||||
"key": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_navigate": {
|
||||
"description": "Navigate a tab to a URL.",
|
||||
"params": {
|
||||
"url": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"wait_until": {"type": "string", "default": "load"},
|
||||
},
|
||||
},
|
||||
"browser_go_back": {
|
||||
"description": "Navigate back in browser history.",
|
||||
"params": {
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_go_forward": {
|
||||
"description": "Navigate forward in browser history.",
|
||||
"params": {
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_reload": {
|
||||
"description": "Reload the current page.",
|
||||
"params": {
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_scroll": {
|
||||
"description": "Scroll the page.",
|
||||
"params": {
|
||||
"direction": {"type": "string", "default": "down", "enum": ["up", "down", "left", "right"]},
|
||||
"amount": {"type": "integer", "default": 500},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_hover": {
|
||||
"description": "Hover over an element.",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
},
|
||||
},
|
||||
"browser_hover_coordinate": {
|
||||
"description": "Hover at CSS pixel coordinates.",
|
||||
"params": {
|
||||
"x": {"type": "number", "required": True},
|
||||
"y": {"type": "number", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_select": {
|
||||
"description": "Select option(s) in a dropdown.",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"values": {"type": "array", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_screenshot": {
|
||||
"description": "Take a screenshot of the page (returns base64 PNG).",
|
||||
"params": {
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"full_page": {"type": "boolean", "default": False},
|
||||
},
|
||||
},
|
||||
"browser_snapshot": {
|
||||
"description": "Get the accessibility tree snapshot of the page.",
|
||||
"params": {
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_evaluate": {
|
||||
"description": "Evaluate JavaScript in the page.",
|
||||
"params": {
|
||||
"expression": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_get_text": {
|
||||
"description": "Get text content of an element.",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"browser_wait": {
|
||||
"description": "Wait for an element or text to appear on the page.",
|
||||
"params": {
|
||||
"selector": {"type": "string"},
|
||||
"text": {"type": "string"},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -96,59 +96,15 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
ESCAPE HATCH — execute raw JavaScript. USE ONLY as a last
|
||||
resort. 99% of browser automation does NOT need this tool.
|
||||
Before reaching for it, try a semantic tool first:
|
||||
|
||||
- browser_click / browser_click_coordinate → for clicks
|
||||
- browser_type(use_insert_text=True) → for text input
|
||||
- browser_screenshot + browser_get_rect → for locating elements
|
||||
- browser_shadow_query → for shadow-DOM selectors
|
||||
- browser_get_text / browser_get_attribute → for reading state
|
||||
|
||||
ANTI-PATTERNS — stop and switch tools if you notice yourself:
|
||||
|
||||
1. Calling browser_evaluate 2+ times in a row to guess at
|
||||
selectors. Each attempt costs ~30 tokens of JS + a full
|
||||
LLM round-trip. After 2 empty results, the selector
|
||||
strategy is wrong — pivot to browser_screenshot +
|
||||
browser_click_coordinate. The screenshot + coord path
|
||||
works on shadow DOM, iframes, and React-obfuscated
|
||||
class names indifferently.
|
||||
|
||||
2. Writing a walk(root) recursive shadow-DOM traversal
|
||||
function. Use browser_shadow_query — it does the
|
||||
traversal in C++ via CDP's querySelector, not in JS.
|
||||
|
||||
3. Calling document.execCommand('insertText', ...) to type
|
||||
into Lexical / contenteditable. Use
|
||||
browser_type(use_insert_text=True, text='...') instead.
|
||||
It handles the click-then-focus-then-insert sequence
|
||||
with built-in retries.
|
||||
|
||||
4. Trying to read a nested iframe's contentDocument. That
|
||||
usually fails (cross-origin or late hydration). Use
|
||||
browser_screenshot to see it, then browser_click_coordinate.
|
||||
|
||||
LEGITIMATE uses (when nothing semantic fits):
|
||||
|
||||
- Reading a computed style, window size, or scroll position
|
||||
that no tool exposes.
|
||||
- Firing a one-shot site-specific API call (e.g. an analytics
|
||||
beacon the test needs).
|
||||
- Stripping an onbeforeunload handler that blocks navigation.
|
||||
- Probing for shadow roots whose existence is conditional.
|
||||
Execute JavaScript in the browser context.
|
||||
|
||||
Args:
|
||||
script: JavaScript code to execute. Keep it small. If you
|
||||
need to traverse the DOM, prefer browser_shadow_query.
|
||||
script: JavaScript code to execute
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with evaluation result. On a "find X" script that
|
||||
returns [] or null: do NOT retry with a different
|
||||
selector — take a screenshot and switch to coordinates.
|
||||
Dict with evaluation result
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
|
||||
@@ -12,7 +12,6 @@ import io
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Literal
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from mcp.types import ImageContent, TextContent
|
||||
@@ -23,32 +22,31 @@ from .tabs import _get_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Target width for normalized screenshots (px in the delivered image)
|
||||
_SCREENSHOT_WIDTH = 600
|
||||
|
||||
# Maps tab_id -> physical scale: image_coord × scale = physical pixels (for CDP Input events)
|
||||
_screenshot_scales: dict[int, float] = {}
|
||||
# Maps tab_id -> CSS scale: image_coord × scale = CSS pixels (for DOM APIs / getBoundingClientRect)
|
||||
_screenshot_css_scales: dict[int, float] = {}
|
||||
|
||||
|
||||
def _resize_and_annotate(
|
||||
data: str,
|
||||
css_width: int,
|
||||
dpr: float = 1.0,
|
||||
highlights: list[dict] | None = None,
|
||||
width: int = _SCREENSHOT_WIDTH,
|
||||
) -> tuple[str, float, float]:
|
||||
"""Resize a base64 PNG to _SCREENSHOT_WIDTH wide, annotate highlights.
|
||||
) -> tuple[str, float]:
|
||||
"""Resize a captured PNG so that image pixels == CSS pixels, then
|
||||
re-encode as JPEG quality 75.
|
||||
|
||||
Returns (new_b64, physical_scale, css_scale) where:
|
||||
physical_scale = physical_px_per_image_px (multiply image coords → physical px)
|
||||
css_scale = css_px_per_image_px (multiply image coords → CSS px for DOM APIs)
|
||||
Output is ``css_width × round(orig_h × css_width / orig_w)``. The
|
||||
1:1 image↔CSS mapping means a coord the agent reads off the image
|
||||
is the same coord CDP expects — no conversion, no scale factors to
|
||||
remember. Highlight annotations are drawn directly in CSS px (which
|
||||
equal image px after resize).
|
||||
|
||||
Highlights have x,y,w,h in CSS pixels (what getBoundingClientRect returns,
|
||||
and what CDP Input.dispatchMouseEvent accepts).
|
||||
Falls back to original data if Pillow unavailable or resize fails.
|
||||
Returns ``(new_b64, physical_scale)`` where
|
||||
``physical_scale = orig_png_w / css_width`` (= DPR). Kept for logs
|
||||
and HiDPI debugging only.
|
||||
"""
|
||||
if not css_width or css_width <= 0:
|
||||
# Capture path always supplies css_width; only reach here on a
|
||||
# degraded bridge response. Return the raw image untouched.
|
||||
return data, 1.0
|
||||
|
||||
try:
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
except ImportError:
|
||||
@@ -58,48 +56,39 @@ def _resize_and_annotate(
|
||||
import struct
|
||||
|
||||
orig_w = struct.unpack(">I", raw[16:20])[0]
|
||||
raw_size_bytes = len(raw)
|
||||
physical_scale = orig_w / width if orig_w and width else 1.0
|
||||
css_scale = (css_width / width) if css_width else (physical_scale / max(dpr, 1.0))
|
||||
physical_scale = orig_w / css_width if orig_w else 1.0
|
||||
logger.warning(
|
||||
"PIL not available — screenshot resize SKIPPED (cannot downscale image). "
|
||||
"raw_size=%d bytes, png_width=%d, css_width=%s, dpr=%s, target_width=%d. "
|
||||
"Returning ORIGINAL image with computed scales: physicalScale=%.4f, cssScale=%.4f. "
|
||||
"Agent must use browser_coords() to convert image positions before clicking.",
|
||||
raw_size_bytes,
|
||||
orig_w,
|
||||
"PIL not available — screenshot resize+convert SKIPPED. "
|
||||
"Returning original physical-px PNG. physicalScale=%.4f, "
|
||||
"css_width=%d, dpr=%s. Clicks WILL be misaligned; install Pillow.",
|
||||
physical_scale,
|
||||
css_width,
|
||||
dpr,
|
||||
width,
|
||||
physical_scale,
|
||||
css_scale,
|
||||
)
|
||||
return data, round(physical_scale, 4), round(css_scale, 4)
|
||||
return data, round(physical_scale, 4)
|
||||
|
||||
try:
|
||||
raw = base64.b64decode(data)
|
||||
img = Image.open(io.BytesIO(raw)).convert("RGBA")
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
physical_scale = orig_w / width
|
||||
css_scale = (css_width / width) if css_width else (physical_scale / max(dpr, 1.0))
|
||||
physical_scale = orig_w / css_width
|
||||
new_w = css_width
|
||||
new_h = round(orig_h * new_w / orig_w)
|
||||
if (new_w, new_h) != img.size:
|
||||
img = img.resize((new_w, new_h), Image.LANCZOS)
|
||||
|
||||
logger.info(
|
||||
"Screenshot resize: orig=%dx%d → target=%dx%d, css_width=%s, dpr=%s, physicalScale=%.4f, cssScale=%.4f",
|
||||
"Screenshot: orig=%dx%d → out=%dx%d (css_width=%d, dpr=%s), physicalScale=%.4f",
|
||||
orig_w,
|
||||
orig_h,
|
||||
width,
|
||||
round(orig_h * width / orig_w),
|
||||
new_w,
|
||||
new_h,
|
||||
css_width,
|
||||
dpr,
|
||||
physical_scale,
|
||||
css_scale,
|
||||
)
|
||||
|
||||
new_w = width
|
||||
new_h = round(orig_h * new_w / orig_w)
|
||||
img = img.resize((new_w, new_h), Image.LANCZOS)
|
||||
|
||||
if highlights:
|
||||
overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(overlay)
|
||||
@@ -111,11 +100,11 @@ def _resize_and_annotate(
|
||||
for h in highlights:
|
||||
kind = h.get("kind", "rect")
|
||||
label = h.get("label", "")
|
||||
# Highlights are in CSS px → convert to image px
|
||||
ix = h["x"] / css_scale
|
||||
iy = h["y"] / css_scale
|
||||
iw = h.get("w", 0) / css_scale
|
||||
ih = h.get("h", 0) / css_scale
|
||||
# Highlights are in CSS px. Image px == CSS px, no conversion.
|
||||
ix = h["x"]
|
||||
iy = h["y"]
|
||||
iw = h.get("w", 0)
|
||||
ih = h.get("h", 0)
|
||||
|
||||
if kind == "point":
|
||||
cx, cy, r = ix, iy, 10
|
||||
@@ -135,11 +124,9 @@ def _resize_and_annotate(
|
||||
width=2,
|
||||
)
|
||||
|
||||
# Label: show image pixel position so user knows where to look
|
||||
img_coords = f"img:({round(ix)},{round(iy)})"
|
||||
display_label = f"{img_coords} {label}" if label else img_coords
|
||||
display_label = f"({round(ix)},{round(iy)}) {label}".strip()
|
||||
lx, ly = ix, max(2, iy - 16)
|
||||
lx = max(2, min(lx, width - 120))
|
||||
lx = max(2, min(lx, new_w - 120))
|
||||
bbox = draw.textbbox((lx, ly), display_label, font=font)
|
||||
pad = 3
|
||||
draw.rectangle(
|
||||
@@ -153,22 +140,20 @@ def _resize_and_annotate(
|
||||
img = img.convert("RGB")
|
||||
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG", optimize=True)
|
||||
img.save(buf, format="JPEG", quality=75, optimize=True)
|
||||
return (
|
||||
base64.b64encode(buf.getvalue()).decode(),
|
||||
round(physical_scale, 4),
|
||||
round(css_scale, 4),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Screenshot resize/annotate FAILED — returning original image with scale=1.0. "
|
||||
"css_width=%s, dpr=%s, target_width=%d. Clicks will be misaligned.",
|
||||
"Screenshot resize/annotate FAILED — returning original image. "
|
||||
"css_width=%s, dpr=%s.",
|
||||
css_width,
|
||||
dpr,
|
||||
width,
|
||||
exc_info=True,
|
||||
)
|
||||
return data, 1.0, 1.0
|
||||
return data, 1.0
|
||||
|
||||
|
||||
def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
@@ -180,26 +165,24 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
full_page: bool = False,
|
||||
selector: str | None = None,
|
||||
image_type: Literal["png", "jpeg"] = "png",
|
||||
annotate: bool = True,
|
||||
width: int = _SCREENSHOT_WIDTH,
|
||||
) -> list:
|
||||
"""
|
||||
Take a screenshot of the current page.
|
||||
|
||||
Returns a normalized image alongside text metadata (URL, size, scale
|
||||
factors, etc.). Automatically annotates the last interaction (click,
|
||||
hover, type) with a bounding box overlay.
|
||||
The image is delivered at the CSS viewport's own dimensions, so
|
||||
a pixel you see in the screenshot is the same coordinate you
|
||||
pass to ``browser_click_coordinate`` / ``browser_hover_coordinate``
|
||||
/ ``browser_press_at``. No conversion, no scale factors.
|
||||
|
||||
Output is JPEG quality 75 (~150–250 KB for a typical UI).
|
||||
|
||||
Args:
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
full_page: Capture full scrollable page (default: False)
|
||||
selector: CSS selector to screenshot a specific element (optional)
|
||||
image_type: Image format - png or jpeg (default: png)
|
||||
annotate: Draw bounding box of last interaction on image (default: True)
|
||||
width: Output image width in pixels (default: 600). Use 800+ for fine
|
||||
text, 400 for quick layout checks.
|
||||
|
||||
Returns:
|
||||
List of content blocks: text metadata + image
|
||||
@@ -252,7 +235,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
return [TextContent(type="text", text=json.dumps(screenshot_result))]
|
||||
|
||||
data = screenshot_result.get("data")
|
||||
mime_type = screenshot_result.get("mimeType", "image/png")
|
||||
css_width = screenshot_result.get("cssWidth", 0)
|
||||
dpr = screenshot_result.get("devicePixelRatio", 1.0)
|
||||
|
||||
@@ -263,45 +245,38 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
if annotate and target_tab in _interaction_highlights:
|
||||
highlights = [_interaction_highlights[target_tab]]
|
||||
|
||||
# Normalize to 800px wide and annotate. Offloaded to a
|
||||
# thread because PIL Image.open/resize/ImageDraw/composite on
|
||||
# a 2-megapixel PNG blocks for ~150-300ms of CPU — plenty to
|
||||
# freeze the asyncio event loop and delay every concurrent
|
||||
# tool call during a screenshot. The function is reentrant
|
||||
# (fresh PIL Image per call, no shared state), so to_thread
|
||||
# is safe.
|
||||
data, physical_scale, css_scale = await asyncio.to_thread(
|
||||
# Resize to CSS-viewport dimensions so image px == CSS px,
|
||||
# and re-encode as the chosen lossy format. Offloaded to a
|
||||
# thread because PIL Image.open/resize/ImageDraw/composite
|
||||
# on a 2-megapixel PNG blocks for ~150–300 ms of CPU —
|
||||
# plenty to freeze the asyncio event loop. The function is
|
||||
# reentrant (fresh PIL Image per call, no shared state), so
|
||||
# to_thread is safe.
|
||||
data, physical_scale = await asyncio.to_thread(
|
||||
_resize_and_annotate,
|
||||
data,
|
||||
css_width,
|
||||
dpr,
|
||||
highlights,
|
||||
width,
|
||||
)
|
||||
_screenshot_scales[target_tab] = physical_scale
|
||||
_screenshot_css_scales[target_tab] = css_scale
|
||||
|
||||
meta = json.dumps(
|
||||
{
|
||||
"ok": True,
|
||||
"tabId": target_tab,
|
||||
"url": screenshot_result.get("url", ""),
|
||||
"imageType": mime_type.split("/")[-1],
|
||||
"imageType": "jpeg",
|
||||
"size": len(base64.b64decode(data)) if data else 0,
|
||||
"imageWidth": width,
|
||||
"imageWidth": css_width,
|
||||
"fullPage": full_page,
|
||||
"devicePixelRatio": dpr,
|
||||
"physicalScale": physical_scale,
|
||||
"cssScale": css_scale,
|
||||
"annotated": bool(highlights),
|
||||
"scaleHint": (
|
||||
f"image_coord × {css_scale} = CSS px "
|
||||
f"→ feed to browser_click_coordinate, "
|
||||
f"browser_hover_coordinate, browser_press_at "
|
||||
f"(CDP Input events use CSS pixels). "
|
||||
f"image_coord × {physical_scale} = physical px "
|
||||
f"is debug-only on HiDPI displays and must NOT "
|
||||
f"be used for clicks — it overshoots by DPR×."
|
||||
"Image pixel = CSS pixel. Feed any coord you see "
|
||||
"in this image directly to browser_click_coordinate "
|
||||
"/ browser_hover_coordinate / browser_press_at — "
|
||||
"no conversion needed."
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -313,17 +288,15 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"ok": True,
|
||||
"size": len(base64.b64decode(data)) if data else 0,
|
||||
"url": screenshot_result.get("url", ""),
|
||||
"physicalScale": physical_scale,
|
||||
"cssScale": css_scale,
|
||||
"debug_cssWidth": css_width,
|
||||
"debug_dpr": dpr,
|
||||
"cssWidth": css_width,
|
||||
"dpr": dpr,
|
||||
},
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
|
||||
return [
|
||||
TextContent(type="text", text=meta),
|
||||
ImageContent(type="image", data=data, mimeType=mime_type),
|
||||
ImageContent(type="image", data=data, mimeType="image/jpeg"),
|
||||
]
|
||||
except Exception as e:
|
||||
log_tool_call(
|
||||
@@ -334,73 +307,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
)
|
||||
return [TextContent(type="text", text=json.dumps({"ok": False, "error": str(e)}))]
|
||||
|
||||
@mcp.tool()
|
||||
def browser_coords(
|
||||
x: float,
|
||||
y: float,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert screenshot image coordinates to browser click coordinates.
|
||||
|
||||
After browser_screenshot returns a downscaled image, use this to
|
||||
translate pixel positions you see in the image into the CSS pixel
|
||||
coordinates that Chrome DevTools Protocol expects.
|
||||
|
||||
**CDP Input.dispatchMouseEvent uses CSS pixels**, so you want
|
||||
``css_x`` / ``css_y`` for every click/hover tool. ``physical_x/y``
|
||||
is kept in the return for debugging on HiDPI displays — do NOT
|
||||
feed it to clicks; on a DPR=2 screen it lands 2× too far.
|
||||
|
||||
Edge case: pages using ``zoom`` or ``transform: scale()`` (e.g.
|
||||
LinkedIn's ``#interop-outlet`` shadow DOM) render in a scaled
|
||||
local coordinate space. For those, ``getBoundingClientRect()``
|
||||
reports pre-zoom coordinates and you may still need to multiply
|
||||
by the element's effective zoom. Use browser_shadow_query to
|
||||
get the zoomed rect directly.
|
||||
|
||||
Args:
|
||||
x: X pixel position in the screenshot image
|
||||
y: Y pixel position in the screenshot image
|
||||
tab_id: Chrome tab ID (default: active tab for profile)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with css_x, css_y (primary — use these), physical_x,
|
||||
physical_y (debug only), and scale factors.
|
||||
"""
|
||||
ctx = _get_context(profile)
|
||||
target_tab = tab_id or (ctx.get("activeTabId") if ctx else None)
|
||||
|
||||
physical_scale = _screenshot_scales.get(target_tab, 1.0) if target_tab else 1.0
|
||||
# css_scale stored in second slot via _screenshot_css_scales
|
||||
css_scale = _screenshot_css_scales.get(target_tab, physical_scale) if target_tab else physical_scale
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
# Primary output: CSS pixels. Feed these to click/hover/press.
|
||||
"css_x": round(x * css_scale, 1),
|
||||
"css_y": round(y * css_scale, 1),
|
||||
# Debug output: raw physical pixels. DO NOT feed to clicks on
|
||||
# HiDPI displays — CDP Input events use CSS pixels, so sending
|
||||
# physical coordinates lands the click at roughly DPR× the
|
||||
# intended position.
|
||||
"physical_x": round(x * physical_scale, 1),
|
||||
"physical_y": round(y * physical_scale, 1),
|
||||
"physicalScale": physical_scale,
|
||||
"cssScale": css_scale,
|
||||
"tabId": target_tab,
|
||||
"note": (
|
||||
"Use css_x/css_y with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"Chrome DevTools Protocol Input.dispatchMouseEvent "
|
||||
"operates in CSS pixels. physical_x/y is for debugging "
|
||||
"on HiDPI displays only; feeding it to clicks lands "
|
||||
"them at DPR× the intended coordinate."
|
||||
),
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_shadow_query(
|
||||
selector: str,
|
||||
@@ -412,7 +318,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
|
||||
Traverses shadow roots to find elements inside closed/open shadow DOM,
|
||||
overlays, and virtual-rendered components (e.g. LinkedIn's #interop-outlet).
|
||||
Returns getBoundingClientRect in both CSS and physical pixels.
|
||||
Returns the element's bounding rect in CSS pixels. Screenshot
|
||||
pixels == CSS pixels, so the same numbers also match whatever
|
||||
the agent sees in a browser_screenshot — feed ``css.cx/cy``
|
||||
straight to browser_click_coordinate / hover_coordinate /
|
||||
press_at.
|
||||
|
||||
Args:
|
||||
selector: CSS selectors joined by ' >>> ' to pierce shadow roots.
|
||||
@@ -421,7 +331,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with rect (CSS px) and physical rect (CSS px × DPR) of the element
|
||||
Dict with ``css`` block (x, y, w, h, cx, cy).
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
@@ -438,10 +348,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
rect = result["rect"]
|
||||
physical_scale = _screenshot_scales.get(target_tab, 1.0)
|
||||
css_scale = _screenshot_css_scales.get(target_tab, 1.0)
|
||||
dpr = physical_scale / css_scale if css_scale else 1.0
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"selector": selector,
|
||||
@@ -454,20 +360,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"cx": rect["cx"],
|
||||
"cy": rect["cy"],
|
||||
},
|
||||
"physical": {
|
||||
"x": round(rect["x"] * dpr, 1),
|
||||
"y": round(rect["y"] * dpr, 1),
|
||||
"w": round(rect["w"] * dpr, 1),
|
||||
"h": round(rect["h"] * dpr, 1),
|
||||
"cx": round(rect["cx"] * dpr, 1),
|
||||
"cy": round(rect["cy"] * dpr, 1),
|
||||
},
|
||||
"note": (
|
||||
"Use css.cx/cy with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"CDP Input events operate in CSS pixels. "
|
||||
"physical.* is debug-only; feeding it to clicks "
|
||||
"lands them DPR× too far on HiDPI displays."
|
||||
"Pass css.cx/cy → browser_click_coordinate / "
|
||||
"hover_coordinate / press_at. Screenshot pixels == CSS "
|
||||
"pixels, so these coords also match anything you see in "
|
||||
"browser_screenshot."
|
||||
),
|
||||
}
|
||||
|
||||
@@ -480,11 +377,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"""
|
||||
Get the bounding rect of an element by CSS selector.
|
||||
|
||||
Supports '>>>' shadow-piercing selectors for overlay/shadow DOM content.
|
||||
Returns coordinates in CSS pixels (for clicks and DOM APIs); the
|
||||
physical-pixel variant is returned for debugging on HiDPI displays
|
||||
only — it must not be fed to click/hover/press tools, which use
|
||||
CSS pixels.
|
||||
Supports '>>>' shadow-piercing selectors for overlay/shadow DOM
|
||||
content. Returns the rect in CSS pixels. Screenshot pixels ==
|
||||
CSS pixels, so the same numbers match anything visible in
|
||||
browser_screenshot — feed ``css.cx/cy`` straight to
|
||||
browser_click_coordinate / hover_coordinate / press_at.
|
||||
|
||||
Args:
|
||||
selector: CSS selector, optionally with ' >>> ' to pierce shadow roots.
|
||||
@@ -493,7 +390,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with css and physical bounding rects
|
||||
Dict with ``css`` block (x, y, w, h, cx, cy).
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
@@ -510,10 +407,6 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
rect = result["rect"]
|
||||
physical_scale = _screenshot_scales.get(target_tab, 1.0)
|
||||
css_scale = _screenshot_css_scales.get(target_tab, 1.0)
|
||||
dpr = physical_scale / css_scale if css_scale else 1.0
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"selector": selector,
|
||||
@@ -526,20 +419,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"cx": rect["cx"],
|
||||
"cy": rect["cy"],
|
||||
},
|
||||
"physical": {
|
||||
"x": round(rect["x"] * dpr, 1),
|
||||
"y": round(rect["y"] * dpr, 1),
|
||||
"w": round(rect["w"] * dpr, 1),
|
||||
"h": round(rect["h"] * dpr, 1),
|
||||
"cx": round(rect["cx"] * dpr, 1),
|
||||
"cy": round(rect["cy"] * dpr, 1),
|
||||
},
|
||||
"note": (
|
||||
"Use css.cx/cy with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"CDP Input events operate in CSS pixels. "
|
||||
"physical.* is debug-only; feeding it to clicks "
|
||||
"lands them DPR× too far on HiDPI displays."
|
||||
"Pass css.cx/cy → browser_click_coordinate / "
|
||||
"hover_coordinate / press_at. Screenshot pixels == CSS "
|
||||
"pixels, so these coords also match anything you see in "
|
||||
"browser_screenshot."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -108,24 +108,24 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
button: Literal["left", "right", "middle"] = "left",
|
||||
) -> dict:
|
||||
"""
|
||||
Click at specific viewport coordinates (CSS pixels).
|
||||
Click at viewport coordinates.
|
||||
|
||||
Chrome DevTools Protocol's Input.dispatchMouseEvent operates in
|
||||
**CSS pixels**, not physical pixels. If you have a screenshot
|
||||
image coordinate, convert it with ``browser_coords(x, y)`` and
|
||||
use the returned ``css_x`` / ``css_y`` — not ``physical_x/y``.
|
||||
On a DPR=2 display, feeding physical coordinates lands the click
|
||||
at 2× the intended position.
|
||||
Screenshots are delivered at the CSS viewport's own dimensions
|
||||
(see ``browser_screenshot``), so a pixel you read off the image
|
||||
is the same number you pass here — no conversion, no scale
|
||||
factors. ``browser_get_rect`` likewise returns coords you can
|
||||
feed straight through.
|
||||
|
||||
Args:
|
||||
x: X coordinate in CSS pixels (viewport space)
|
||||
y: Y coordinate in CSS pixels (viewport space)
|
||||
x: X coordinate in the screenshot / CSS viewport.
|
||||
y: Y coordinate in the screenshot / CSS viewport.
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
button: Mouse button to click (left, right, middle)
|
||||
|
||||
Returns:
|
||||
Dict with click result
|
||||
Dict with click result, including ``focused_element``
|
||||
describing what the click focused.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"x": x, "y": y, "tab_id": tab_id, "profile": profile, "button": button}
|
||||
@@ -149,17 +149,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
try:
|
||||
from .inspection import _screenshot_css_scales, _screenshot_scales
|
||||
|
||||
click_result = await bridge.click_coordinate(target_tab, x, y, button=button)
|
||||
log_tool_call(
|
||||
"browser_click_coordinate",
|
||||
params,
|
||||
result={
|
||||
**click_result,
|
||||
"debug_stored_physicalScale": _screenshot_scales.get(target_tab, "unset"),
|
||||
"debug_stored_cssScale": _screenshot_css_scales.get(target_tab, "unset"),
|
||||
},
|
||||
result=click_result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return click_result
|
||||
@@ -179,43 +173,34 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
text: str,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
delay_ms: int = 0,
|
||||
delay_ms: int = 1,
|
||||
clear_first: bool = True,
|
||||
timeout_ms: int = 30000,
|
||||
use_insert_text: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Type text into an input element.
|
||||
Click a selector to focus it, then type text into it.
|
||||
|
||||
Automatically routes through a real CDP pointer click on the
|
||||
element before inserting text — so that rich-text editors like
|
||||
Lexical (Gmail, LinkedIn DMs), Draft.js (X compose), and
|
||||
ProseMirror (Reddit) see a native focus event and enable their
|
||||
submit buttons. See the gcu-browser skill for the full "click-
|
||||
then-type" pattern.
|
||||
|
||||
By default uses CDP Input.insertText which is the most reliable
|
||||
way to insert text into rich editors. Set
|
||||
``use_insert_text=False`` to fall back to per-character
|
||||
keyDown/keyUp events (needed only for code editors that fire
|
||||
on specific keystrokes, or when ``delay_ms`` typing animation
|
||||
is required).
|
||||
Uses CDP ``Input.insertText`` by default, which works for both
|
||||
standard inputs and many rich-text editors. Use
|
||||
``browser_type_focused`` when the target is already focused or
|
||||
you cannot reliably address it with a selector.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the input element
|
||||
text: Text to type
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
delay_ms: Delay between keystrokes in ms (default: 0).
|
||||
Forces the per-keystroke fallback when > 0.
|
||||
clear_first: Clear existing text before typing (default: True)
|
||||
timeout_ms: Timeout waiting for element (default: 30000)
|
||||
selector: CSS selector for the input element.
|
||||
text: Text to type.
|
||||
tab_id: Chrome tab ID (default: active tab).
|
||||
profile: Browser profile name (default: "default").
|
||||
delay_ms: Delay between keystrokes in ms (default: 1).
|
||||
Forces the per-keystroke fallback when > 0.
|
||||
clear_first: Clear existing text before typing (default: True).
|
||||
timeout_ms: Timeout waiting for element (default: 30000).
|
||||
use_insert_text: Use CDP Input.insertText (default: True) for
|
||||
reliable insertion into rich-text editors.
|
||||
Set False for per-keystroke dispatch.
|
||||
reliable insertion into rich-text editors. Set False for
|
||||
per-keystroke dispatch.
|
||||
|
||||
Returns:
|
||||
Dict with type result
|
||||
Dict with type result.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"selector": selector, "text": text, "tab_id": tab_id, "profile": profile}
|
||||
@@ -293,6 +278,77 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
timeout_ms=timeout_ms,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_type_focused(
|
||||
text: str,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
delay_ms: int = 1,
|
||||
clear_first: bool = True,
|
||||
use_insert_text: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Type text into the already-focused element.
|
||||
|
||||
Targets ``document.activeElement`` and is ideal after a
|
||||
coordinate click, or when the editable cannot be reached
|
||||
reliably with a selector. Faster than repeated
|
||||
``browser_press`` calls for multi-character input.
|
||||
|
||||
Args:
|
||||
text: Text to insert at the current cursor position.
|
||||
tab_id: Chrome tab ID (default: active tab).
|
||||
profile: Browser profile name (default: "default").
|
||||
delay_ms: Delay between keystrokes in ms (default: 1).
|
||||
Forces per-keystroke dispatch when > 0.
|
||||
clear_first: Clear existing text before typing (default: True).
|
||||
use_insert_text: Use CDP Input.insertText (default: True).
|
||||
|
||||
Returns:
|
||||
Dict with type result.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"text": text, "tab_id": tab_id, "profile": profile}
|
||||
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
result = {"ok": False, "error": "Browser extension not connected"}
|
||||
log_tool_call("browser_type_focused", params, result=result)
|
||||
return result
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_type_focused", params, result=result)
|
||||
return result
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
result = {"ok": False, "error": "No active tab"}
|
||||
log_tool_call("browser_type_focused", params, result=result)
|
||||
return result
|
||||
|
||||
try:
|
||||
type_result = await bridge.type_text(
|
||||
target_tab,
|
||||
None,
|
||||
text,
|
||||
clear_first=clear_first,
|
||||
delay_ms=delay_ms,
|
||||
use_insert_text=use_insert_text,
|
||||
)
|
||||
log_tool_call(
|
||||
"browser_type_focused",
|
||||
params,
|
||||
result=type_result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return type_result
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call("browser_type_focused", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_press(
|
||||
key: str,
|
||||
@@ -422,15 +478,16 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Hover at CSS pixel coordinates without needing a CSS selector.
|
||||
Hover at viewport coordinates without needing a CSS selector.
|
||||
|
||||
Use this instead of browser_hover when the element is in an overlay,
|
||||
shadow DOM, or virtual-rendered component that isn't in the regular DOM.
|
||||
Pair with browser_coords to convert screenshot image positions to CSS pixels.
|
||||
Screenshot pixels == CSS pixels, so any coord you read off a
|
||||
browser_screenshot image can be fed straight through.
|
||||
|
||||
Args:
|
||||
x: CSS pixel X coordinate
|
||||
y: CSS pixel Y coordinate
|
||||
x: X coordinate in the screenshot / CSS viewport.
|
||||
y: Y coordinate in the screenshot / CSS viewport.
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
@@ -486,16 +543,17 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Move mouse to CSS pixel coordinates then press a key.
|
||||
Move mouse to viewport coordinates then press a key.
|
||||
|
||||
Use this instead of browser_press when the focused element is in an overlay
|
||||
or virtual-rendered component. Moving the mouse first routes the key event
|
||||
through native browser hit-testing instead of the DOM focus chain.
|
||||
Pair with browser_coords to convert screenshot image positions to CSS pixels.
|
||||
Screenshot pixels == CSS pixels, so coords read off a
|
||||
browser_screenshot image can be fed straight through.
|
||||
|
||||
Args:
|
||||
x: CSS pixel X coordinate to position mouse
|
||||
y: CSS pixel Y coordinate to position mouse
|
||||
x: X coordinate in the screenshot / CSS viewport.
|
||||
y: Y coordinate in the screenshot / CSS viewport.
|
||||
key: Key to press (e.g. 'Enter', 'Space', 'Escape', 'ArrowDown')
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import json
|
||||
|
||||
try:
|
||||
with open('data/linkedin_ledger.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
profiles = data.get('messaged_profiles', [])
|
||||
for p in profiles:
|
||||
if 'variant' not in p:
|
||||
p['variant'] = 'Control' # Retroactively label our first runs
|
||||
|
||||
with open('data/linkedin_ledger.json', 'w') as f:
|
||||
json.dump({"messaged_profiles": profiles}, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"replies": [
|
||||
{
|
||||
"original_preview": "NASA Ames@NASAAmes\u00b75hWe\u2019re just getting started\n\nDuring their historic journey around the Moon, Artemis II observed lunar targets to study color, text"
|
||||
},
|
||||
{
|
||||
"original_preview": "NASA Marshall@NASA_Marshall\u00b74h Enjoy these views of the Artemis II launch from cameras affixed to the rocket! On April 1, 2026, the SLS (Space Launch "
|
||||
},
|
||||
{
|
||||
"original_preview": "U.S. Navy@USNavy\u00b711hFirst contact. On April 10, U.S. Navy divers were the first on the scene as the Navy and NASA successfully recovered the Orion s"
|
||||
},
|
||||
{
|
||||
"original_preview": "Alright, I give in. Here\u2019s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook \u2018em!"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user