Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b08ee17f2 | |||
| 8668d103a8 | |||
| 133f393f8b | |||
| fd3ef36a15 | |||
| aa281aad34 | |||
| a3d0c7e0cb | |||
| de3042ba3f | |||
| 326d7f201c | |||
| db30ef3094 | |||
| e3d1cb6739 | |||
| 846f3f2470 | |||
| 913437ea0b | |||
| 520bd635e2 | |||
| b7d850ddd0 | |||
| 0a251278f1 | |||
| 857af8e6a3 | |||
| 273d4ec66e | |||
| eeb46a2b3e | |||
| b5e05fefae | |||
| bdfbb7698a | |||
| 35b1eadb7f | |||
| 38036eb7bd | |||
| 70d90fda19 | |||
| 1e3dcbbbc2 | |||
| 53b095cdcb | |||
| d04862053f | |||
| df0e0ea082 | |||
| a59493835d |
+34
-2
@@ -14,11 +14,43 @@
|
||||
"Bash(uv run:*)",
|
||||
"Read(//tmp/**)",
|
||||
"Bash(grep -n \"useColony\\\\|const { queens, queenProfiles\" /home/timothy/aden/hive/core/frontend/src/pages/queen-dm.tsx)",
|
||||
"Bash(awk 'NR==385,/\\\\}, \\\\[/' /home/timothy/aden/hive/core/frontend/src/pages/queen-dm.tsx)"
|
||||
"Bash(awk 'NR==385,/\\\\}, \\\\[/' /home/timothy/aden/hive/core/frontend/src/pages/queen-dm.tsx)",
|
||||
"Bash(xargs -I{} sh -c 'if ! grep -q \"^import base64\\\\|^from base64\" \"{}\"; then echo \"MISSING: {}\"; fi')",
|
||||
"Bash(find /home/timothy/aden/hive/core/framework -name \"*.py\" -type f -exec grep -l \"FileConversationStore\\\\|class.*ConversationStore\" {} \\\\;)",
|
||||
"Bash(find /home/timothy/aden/hive/core/framework -name \"*.py\" -exec grep -l \"run_parallel_workers\\\\|create_colony\" {} \\\\;)",
|
||||
"Bash(awk '/^ async def execute\\\\\\(self, ctx: AgentContext\\\\\\)/,/^ async def [a-z_]+/ {print NR\": \"$0}' /home/timothy/aden/hive/core/framework/agent_loop/agent_loop.py)",
|
||||
"Bash(grep -r \"max_concurrent_workers\\\\|max_depth\\\\|recursion\\\\|spawn.*bomb\" /home/timothy/aden/hive/core/framework/host/*.py)",
|
||||
"Bash(wc -l /home/timothy/aden/hive/tools/src/gcu/browser/*.py /home/timothy/aden/hive/tools/src/gcu/browser/tools/*.py)",
|
||||
"Bash(file /tmp/gcu_verify/*.png)",
|
||||
"Bash(ps -eo pid,cmd)",
|
||||
"Bash(ps -o pid,lstart,cmd -p 746640)",
|
||||
"Bash(kill 746636)",
|
||||
"Bash(ps -eo pid,lstart,cmd)",
|
||||
"Bash(grep -E \"^d|\\\\.py$\")",
|
||||
"Bash(grep -E \"\\\\.\\(ts|tsx\\)$\")",
|
||||
"Bash(xargs cat:*)",
|
||||
"Bash(find /home/timothy/aden/hive -path \"*/.venv\" -prune -o -name \"*.py\" -type f -exec grep -l \"frontend\\\\|UI\\\\|terminal\\\\|interactive\\\\|TUI\" {} \\\\;)",
|
||||
"Bash(wc -l /home/timothy/.hive/backup/*/SKILL.md)",
|
||||
"Bash(awk -F'::' '{print $1}')",
|
||||
"Bash(wait)",
|
||||
"Bash(pkill -f \"pytest.*test_event_loop_node\")",
|
||||
"Bash(pkill -f \"pytest.*TestToolConcurrency\")",
|
||||
"Bash(grep -n \"def.*discover\\\\|/api/agents\\\\|agents_discover\" /home/timothy/aden/hive/core/framework/server/*.py)",
|
||||
"Bash(bun run:*)",
|
||||
"Bash(npx eslint:*)",
|
||||
"Bash(npm run:*)",
|
||||
"Bash(npm test:*)",
|
||||
"Bash(grep -E \"\\\\.tsx$|^d\")",
|
||||
"Bash(grep -E \"test_.*\\\\.py$\")",
|
||||
"Bash(grep \"\\\\.py$\")",
|
||||
"Bash(grep -l \"save_agent_draft\\\\|confirm_and_build\\\\|replan_agent\\\\|load_built_agent\\\\|planning\\\\|building\\\\|staging\" /home/timothy/aden/hive/core/framework/agents/queen/reference/*.md)",
|
||||
"Bash(grep -E \"\\\\.tsx$|\\\\.ts$\")",
|
||||
"Bash(find /home/timothy/aden/hive/core/framework/tools -name \"*.py\" -exec grep -l \"switch_to_\" {} \\\\;)"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/home/timothy/.hive/skills/writing-hive-skills",
|
||||
"/tmp"
|
||||
"/tmp",
|
||||
"/home/timothy/.hive/skills"
|
||||
]
|
||||
},
|
||||
"hooks": {
|
||||
|
||||
@@ -96,6 +96,7 @@ from framework.llm.stream_events import (
|
||||
ToolCallEvent,
|
||||
)
|
||||
from framework.tracker.llm_debug_logger import log_llm_turn
|
||||
from framework.utils.task_registry import TaskRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -362,6 +363,9 @@ class AgentLoop(AgentProtocol):
|
||||
self._tool_task: asyncio.Task | None = None # gather task while tools run
|
||||
# Track which nodes already have an action plan emitted (skip on revisit)
|
||||
self._action_plan_emitted: set[str] = set()
|
||||
# Tracked background tasks (action plan, etc.) — prevents GC loss
|
||||
# and surfaces unhandled exceptions via the done callback.
|
||||
self._bg_tasks: TaskRegistry = TaskRegistry(owner="AgentLoop")
|
||||
# Monotonic counter for spillover file naming (web_search_1.txt, etc.)
|
||||
self._spill_counter: int = 0
|
||||
# Set to True by the report_to_parent synthetic tool handler so the
|
||||
@@ -371,6 +375,31 @@ class AgentLoop(AgentProtocol):
|
||||
# Set by the Worker's __init__ so the report_to_parent handler can
|
||||
# record the explicit report payload on the owning Worker instance.
|
||||
self._owner_worker: Any = None
|
||||
# Reliability counters — populated throughout execute() and
|
||||
# copied onto AgentResult.reliability_stats at return time.
|
||||
# Kept on the instance so ``stats()`` can expose them externally
|
||||
# without waiting for execute() to return. Keys are stable so
|
||||
# dashboards can build aggregates over many runs.
|
||||
self._counters: dict[str, int] = {}
|
||||
|
||||
def _bump(self, key: str, by: int = 1) -> None:
|
||||
"""Increment a reliability counter (creates the key on first use)."""
|
||||
self._counters[key] = self._counters.get(key, 0) + by
|
||||
|
||||
def stats(self) -> dict[str, int]:
|
||||
"""Return a snapshot of reliability counters for this loop."""
|
||||
return dict(self._counters)
|
||||
|
||||
def _finalize_result(self, result: AgentResult, reason: str) -> AgentResult:
|
||||
"""Stamp exit_reason + reliability_stats on an AgentResult before return.
|
||||
|
||||
Central point so every exit path in execute() carries the same
|
||||
observability payload, and new counters show up in results
|
||||
without touching every return site.
|
||||
"""
|
||||
result.exit_reason = reason
|
||||
result.reliability_stats = dict(self._counters)
|
||||
return result
|
||||
|
||||
def validate_input(self, ctx: AgentContext) -> list[str]:
|
||||
"""Validate hard requirements only.
|
||||
@@ -389,6 +418,41 @@ class AgentLoop(AgentProtocol):
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
async def execute(self, ctx: AgentContext) -> AgentResult:
|
||||
"""Run the event loop.
|
||||
|
||||
Thin wrapper around :meth:`_execute_impl` that stamps reliability
|
||||
counters on whatever AgentResult the implementation returns, and
|
||||
fills in a best-effort ``exit_reason`` from the result fields
|
||||
when the implementation didn't set one explicitly. This way
|
||||
every return path in ``_execute_impl`` automatically carries
|
||||
telemetry without having to edit 13+ return sites.
|
||||
"""
|
||||
result = await self._execute_impl(ctx)
|
||||
# Always refresh counters at the outermost boundary, in case a
|
||||
# nested return in _execute_impl used _finalize_result with a
|
||||
# stale copy.
|
||||
result.reliability_stats = dict(self._counters)
|
||||
if result.exit_reason == "?":
|
||||
# Best-effort classification from the AgentResult payload.
|
||||
# _execute_impl can (and should) set reason explicitly at
|
||||
# key sites via _finalize_result — this only handles the
|
||||
# returns that weren't updated yet.
|
||||
err = (result.error or "").lower()
|
||||
if result.success:
|
||||
result.exit_reason = "completed"
|
||||
elif "max iterations" in err:
|
||||
result.exit_reason = "max_iterations"
|
||||
elif "input_validation_errors" in err or result.validation_errors:
|
||||
result.exit_reason = "validation_error"
|
||||
elif "timed out" in err or "timeout" in err:
|
||||
result.exit_reason = "timeout"
|
||||
elif "cancel" in err or "stopped" in err:
|
||||
result.exit_reason = "cancelled"
|
||||
else:
|
||||
result.exit_reason = "failed"
|
||||
return result
|
||||
|
||||
async def _execute_impl(self, ctx: AgentContext) -> AgentResult:
|
||||
"""Run the event loop."""
|
||||
self._last_ctx = ctx
|
||||
logger.debug(
|
||||
@@ -446,7 +510,9 @@ class AgentLoop(AgentProtocol):
|
||||
output_tokens=0,
|
||||
latency_ms=0,
|
||||
)
|
||||
return AgentResult(success=False, error=error_msg)
|
||||
return self._finalize_result(
|
||||
AgentResult(success=False, error=error_msg), "guard_failure"
|
||||
)
|
||||
|
||||
# 2. Restore or create new conversation + accumulator
|
||||
restored = await self._restore(ctx)
|
||||
@@ -520,6 +586,10 @@ class AgentLoop(AgentProtocol):
|
||||
output_keys=ctx.agent_spec.output_keys or None,
|
||||
store=self._conversation_store,
|
||||
run_id=ctx.effective_run_id,
|
||||
compaction_buffer_tokens=self._config.compaction_buffer_tokens,
|
||||
compaction_warning_buffer_tokens=(
|
||||
self._config.compaction_warning_buffer_tokens
|
||||
),
|
||||
)
|
||||
accumulator = OutputAccumulator(
|
||||
store=self._conversation_store,
|
||||
@@ -584,7 +654,10 @@ class AgentLoop(AgentProtocol):
|
||||
and stream_id not in ("queen", "judge")
|
||||
):
|
||||
self._action_plan_emitted.add(node_id)
|
||||
asyncio.create_task(self._generate_action_plan(ctx, stream_id, node_id, execution_id))
|
||||
self._bg_tasks.spawn(
|
||||
self._generate_action_plan(ctx, stream_id, node_id, execution_id),
|
||||
name=f"action_plan:{node_id}",
|
||||
)
|
||||
|
||||
# 5. Stall / doom loop detection state (restored from cursor if resuming)
|
||||
recent_responses: list[str] = _restored_recent_responses
|
||||
@@ -787,6 +860,8 @@ class AgentLoop(AgentProtocol):
|
||||
"[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
|
||||
)
|
||||
_stream_retry_count = 0
|
||||
_capacity_retry_started_at: float | None = None
|
||||
_capacity_retry_attempt = 0
|
||||
_turn_cancelled = False
|
||||
_llm_turn_failed_waiting_input = False
|
||||
_turn_t0 = time.monotonic()
|
||||
@@ -893,11 +968,59 @@ class AgentLoop(AgentProtocol):
|
||||
type(e).__name__,
|
||||
str(e)[:200],
|
||||
)
|
||||
# Persistent retry for capacity errors (429/529/overloaded).
|
||||
# Unlike the bounded branch below, this one keeps trying
|
||||
# within a wall-clock budget instead of burning through
|
||||
# five attempts in ~1 minute and giving up. Each attempt
|
||||
# still publishes a retry event so the UI can see us
|
||||
# waiting (the "heartbeat" — no silent stalls).
|
||||
self._bump("llm_turn_exception")
|
||||
if (
|
||||
self._is_capacity_error(e)
|
||||
and self._config.capacity_retry_max_seconds > 0
|
||||
):
|
||||
self._bump("capacity_error")
|
||||
now = time.monotonic()
|
||||
if _capacity_retry_started_at is None:
|
||||
_capacity_retry_started_at = now
|
||||
elapsed = now - _capacity_retry_started_at
|
||||
if elapsed < self._config.capacity_retry_max_seconds:
|
||||
_capacity_retry_attempt += 1
|
||||
delay = min(
|
||||
self._config.stream_retry_backoff_base
|
||||
* (2 ** min(_capacity_retry_attempt - 1, 6)),
|
||||
self._config.capacity_retry_max_delay,
|
||||
)
|
||||
logger.warning(
|
||||
"[%s] iter=%d: capacity error (%s), persistent retry "
|
||||
"#%d after %.1fs (elapsed %.0fs / %.0fs budget): %s",
|
||||
node_id,
|
||||
iteration,
|
||||
type(e).__name__,
|
||||
_capacity_retry_attempt,
|
||||
delay,
|
||||
elapsed,
|
||||
self._config.capacity_retry_max_seconds,
|
||||
str(e)[:200],
|
||||
)
|
||||
if self._event_bus:
|
||||
await self._event_bus.emit_node_retry(
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
retry_count=_capacity_retry_attempt,
|
||||
max_retries=-1, # -1 == persistent / unbounded
|
||||
error=str(e)[:500],
|
||||
execution_id=execution_id,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue # retry same iteration
|
||||
|
||||
# Retry transient errors with exponential backoff
|
||||
if (
|
||||
self._is_transient_error(e)
|
||||
and _stream_retry_count < self._config.max_stream_retries
|
||||
):
|
||||
self._bump("llm_transient_retry")
|
||||
_stream_retry_count += 1
|
||||
delay = min(
|
||||
self._config.stream_retry_backoff_base
|
||||
@@ -973,11 +1096,20 @@ class AgentLoop(AgentProtocol):
|
||||
error=str(e)[:500],
|
||||
execution_id=execution_id,
|
||||
)
|
||||
# Inject the error as an assistant message so the
|
||||
# user sees it, then block for their next message.
|
||||
await conversation.add_assistant_message(
|
||||
f"[Error: {error_msg}. Please try again.]"
|
||||
)
|
||||
# Emit the error via SSE so the frontend renders
|
||||
# it in the chat, then persist it in the conversation.
|
||||
visible_error = f"[Error: {error_msg}. Please try again.]"
|
||||
if self._event_bus and ctx.emits_client_io:
|
||||
await self._event_bus.emit_client_output_delta(
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
content=visible_error,
|
||||
snapshot=visible_error,
|
||||
execution_id=execution_id,
|
||||
iteration=iteration,
|
||||
inner_turn=0,
|
||||
)
|
||||
await conversation.add_assistant_message(visible_error)
|
||||
await self._await_user_input(ctx, prompt="")
|
||||
_llm_turn_failed_waiting_input = True
|
||||
break # exit retry loop, continue outer iteration
|
||||
@@ -1651,27 +1783,10 @@ class AgentLoop(AgentProtocol):
|
||||
continue
|
||||
# All outputs set -- fall through to judge
|
||||
|
||||
# Auto-block (queen text-only conversational turn):
|
||||
# the user has now replied, continue the loop to
|
||||
# process the next turn. We deliberately skip the
|
||||
# judge here — the queen has no output_keys and no
|
||||
# success_criteria, so judging her conversational
|
||||
# turns is meaningless and the default ACCEPT path
|
||||
# would terminate this forever-alive node.
|
||||
if _cf_auto:
|
||||
_cf_text_only_streak = 0
|
||||
_continue_count += 1
|
||||
self._log_skip_judge(
|
||||
ctx,
|
||||
node_id,
|
||||
iteration,
|
||||
"Auto-block unblocked (queen conversational turn)",
|
||||
logged_tool_calls,
|
||||
assistant_text,
|
||||
turn_tokens,
|
||||
iter_start,
|
||||
)
|
||||
continue
|
||||
# Auto-block beyond grace -- fall through to judge (6i).
|
||||
# The queen's runtime AgentSpec sets skip_judge=True in
|
||||
# queen_orchestrator.py, so the judge short-circuits to
|
||||
# RETRY (no feedback) and the loop continues cleanly.
|
||||
|
||||
# 6h''. Worker wait for queen guidance
|
||||
# When a worker escalates, pause here and skip judge evaluation
|
||||
@@ -2043,13 +2158,18 @@ class AgentLoop(AgentProtocol):
|
||||
escalate_count=_escalate_count,
|
||||
continue_count=_continue_count,
|
||||
)
|
||||
return AgentResult(
|
||||
success=False,
|
||||
error=(f"Max iterations ({self._config.max_iterations}) reached without acceptance"),
|
||||
output=accumulator.to_dict(),
|
||||
tokens_used=total_input_tokens + total_output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
conversation=None,
|
||||
return self._finalize_result(
|
||||
AgentResult(
|
||||
success=False,
|
||||
error=(
|
||||
f"Max iterations ({self._config.max_iterations}) reached without acceptance"
|
||||
),
|
||||
output=accumulator.to_dict(),
|
||||
tokens_used=total_input_tokens + total_output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
conversation=None,
|
||||
),
|
||||
"max_iterations",
|
||||
)
|
||||
|
||||
async def inject_event(
|
||||
@@ -2168,6 +2288,13 @@ class AgentLoop(AgentProtocol):
|
||||
# without injecting, so the wait still blocks until the user types.
|
||||
self._input_ready.clear()
|
||||
|
||||
# Close the lost-wakeup window: a message can arrive between the
|
||||
# pre-check above and the clear() we just did. Re-check the queues
|
||||
# after clearing; if anything snuck in, skip the wait entirely.
|
||||
# Same after emit (sync handlers may inject during the emit).
|
||||
if not self._injection_queue.empty() or not self._trigger_queue.empty():
|
||||
return True
|
||||
|
||||
if emit_client_request and self._event_bus:
|
||||
await self._event_bus.emit_client_input_requested(
|
||||
stream_id=ctx.stream_id or ctx.agent_id,
|
||||
@@ -2178,6 +2305,9 @@ class AgentLoop(AgentProtocol):
|
||||
questions=questions,
|
||||
)
|
||||
|
||||
if not self._injection_queue.empty() or not self._trigger_queue.empty():
|
||||
return True
|
||||
|
||||
self._awaiting_input = True
|
||||
try:
|
||||
await self._input_ready.wait()
|
||||
@@ -2294,6 +2424,33 @@ class AgentLoop(AgentProtocol):
|
||||
tool_calls: list[ToolCallEvent] = []
|
||||
_stream_error: StreamErrorEvent | None = None
|
||||
|
||||
# Gap 1 - Streaming tool execution. Any tool flagged as
|
||||
# concurrency_safe is kicked off the moment its ToolCallEvent
|
||||
# arrives in the stream, instead of waiting for the full
|
||||
# assistant message stop event. The dispatch phase below
|
||||
# reuses these already-running tasks so read_file / grep /
|
||||
# glob overlap with whatever text the model is still
|
||||
# generating. Unsafe tools (bash, edits, browser actions)
|
||||
# still wait for FinishEvent so we don't race a write
|
||||
# against a decision the model hasn't finished making.
|
||||
_early_safe_names = {
|
||||
t.name for t in tools if getattr(t, "concurrency_safe", False)
|
||||
}
|
||||
_early_tasks: dict[str, asyncio.Task] = {}
|
||||
|
||||
async def _timed_execute(
|
||||
_tc: ToolCallEvent,
|
||||
) -> tuple[ToolResult | BaseException, str, float]:
|
||||
"""Execute a tool and return (result, start_iso, duration_s)."""
|
||||
_s = time.time()
|
||||
_iso = datetime.now(UTC).isoformat()
|
||||
try:
|
||||
_r = await self._execute_tool(_tc)
|
||||
except BaseException as _exc:
|
||||
_r = _exc
|
||||
_dur = round(time.time() - _s, 3)
|
||||
return _r, _iso, _dur
|
||||
|
||||
logger.debug(
|
||||
"[_run_single_turn] inner_turn=%d: Starting LLM stream with %d messages, %d tools",
|
||||
inner_turn,
|
||||
@@ -2322,12 +2479,19 @@ class AgentLoop(AgentProtocol):
|
||||
# Stream LLM response in a child task so cancel_current_turn()
|
||||
# can kill it instantly without terminating the queen's main loop.
|
||||
# Capture loop-scoped variables as defaults to satisfy B023.
|
||||
# _stream_last_event_at is bumped on every event; the watchdog
|
||||
# below uses it to detect silently hung HTTP connections.
|
||||
_stream_last_event_at = time.monotonic()
|
||||
|
||||
async def _do_stream(
|
||||
_msgs: list = messages, # noqa: B006
|
||||
_tc: list[ToolCallEvent] = tool_calls, # noqa: B006
|
||||
inner_turn: int = inner_turn,
|
||||
_safe_names: set = _early_safe_names, # noqa: B006,B008
|
||||
_tasks: dict = _early_tasks, # noqa: B006,B008
|
||||
_exec_fn=_timed_execute,
|
||||
) -> None:
|
||||
nonlocal accumulated_text, _stream_error
|
||||
nonlocal accumulated_text, _stream_error, _stream_last_event_at
|
||||
_clean_snapshot = "" # visible-only text for the frontend
|
||||
|
||||
async for event in ctx.llm.stream(
|
||||
@@ -2336,6 +2500,7 @@ class AgentLoop(AgentProtocol):
|
||||
tools=tools if tools else None,
|
||||
max_tokens=ctx.max_tokens,
|
||||
):
|
||||
_stream_last_event_at = time.monotonic()
|
||||
if isinstance(event, TextDeltaEvent):
|
||||
accumulated_text = event.snapshot
|
||||
# Strip internal reasoning tags from the full
|
||||
@@ -2358,6 +2523,18 @@ class AgentLoop(AgentProtocol):
|
||||
|
||||
elif isinstance(event, ToolCallEvent):
|
||||
_tc.append(event)
|
||||
# Gap 1: start concurrency-safe tools immediately
|
||||
# while the rest of the stream is still arriving,
|
||||
# so read-heavy turns don't stall after the last
|
||||
# text delta. Unsafe tools wait for FinishEvent.
|
||||
if (
|
||||
event.tool_name in _safe_names
|
||||
and "_raw" not in event.tool_input
|
||||
and event.tool_use_id not in _tasks
|
||||
):
|
||||
_tasks[event.tool_use_id] = asyncio.create_task(
|
||||
_exec_fn(event)
|
||||
)
|
||||
|
||||
elif isinstance(event, FinishEvent):
|
||||
token_counts["input"] += event.input_tokens
|
||||
@@ -2377,7 +2554,51 @@ class AgentLoop(AgentProtocol):
|
||||
logger.debug(
|
||||
"[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn
|
||||
)
|
||||
_inactivity_limit = self._config.llm_stream_inactivity_timeout_seconds
|
||||
try:
|
||||
if _inactivity_limit and _inactivity_limit > 0:
|
||||
# Heartbeat-aware wait: poll the task and cancel it if
|
||||
# no stream event has been observed within the window.
|
||||
# A silently dead HTTP connection otherwise hangs here
|
||||
# forever — no exception, no delta, no timeout.
|
||||
#
|
||||
# Must use asyncio.wait (not wait_for) so we can tell
|
||||
# "poll interval elapsed" apart from "task raised a
|
||||
# TimeoutError of its own" — wait_for conflates them.
|
||||
_check_interval = min(5.0, _inactivity_limit / 2)
|
||||
while True:
|
||||
done, _pending = await asyncio.wait(
|
||||
{self._stream_task}, timeout=_check_interval
|
||||
)
|
||||
if self._stream_task in done:
|
||||
# Let any exception the task raised propagate
|
||||
# naturally via the outer ``await`` below.
|
||||
break
|
||||
idle = time.monotonic() - _stream_last_event_at
|
||||
if idle >= _inactivity_limit:
|
||||
logger.warning(
|
||||
"[_run_single_turn] inner_turn=%d: "
|
||||
"stream inactivity %.0fs >= %.0fs — "
|
||||
"cancelling stream task",
|
||||
inner_turn,
|
||||
idle,
|
||||
_inactivity_limit,
|
||||
)
|
||||
self._bump("stream_inactivity_watchdog")
|
||||
self._stream_task.cancel()
|
||||
try:
|
||||
await self._stream_task
|
||||
except BaseException:
|
||||
pass
|
||||
raise ConnectionError(
|
||||
f"LLM stream idle for {idle:.0f}s "
|
||||
f"(inactivity limit {_inactivity_limit:.0f}s) — "
|
||||
"connection presumed dead"
|
||||
) from None
|
||||
# Still active — keep polling.
|
||||
# Re-raise any exception the stream task stored. When the
|
||||
# watchdog loop exited via ``break`` the task is done, and
|
||||
# ``await`` is the cheapest way to surface its exception.
|
||||
await self._stream_task
|
||||
logger.debug(
|
||||
"[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn
|
||||
@@ -2386,6 +2607,12 @@ class AgentLoop(AgentProtocol):
|
||||
logger.debug("[_run_single_turn] inner_turn=%d: Stream task cancelled", inner_turn)
|
||||
if accumulated_text:
|
||||
await conversation.add_assistant_message(content=accumulated_text)
|
||||
# Gap 1: kill any early-dispatched tool tasks too.
|
||||
# Without this, a safe tool started during streaming
|
||||
# would leak past cancellation and keep running.
|
||||
for _early in _early_tasks.values():
|
||||
if not _early.done():
|
||||
_early.cancel()
|
||||
# Distinguish cancel_current_turn() (cancels the child
|
||||
# _stream_task) from stop_worker (cancels the parent
|
||||
# execution task). When the parent itself is cancelled,
|
||||
@@ -2400,6 +2627,12 @@ class AgentLoop(AgentProtocol):
|
||||
logger.exception(
|
||||
"[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e
|
||||
)
|
||||
# Don't orphan early tool tasks on a stream failure
|
||||
# either - the outer retry loop will re-emit the tool
|
||||
# calls on the next attempt.
|
||||
for _early in _early_tasks.values():
|
||||
if not _early.done():
|
||||
_early.cancel()
|
||||
raise
|
||||
finally:
|
||||
self._stream_task = None
|
||||
@@ -2409,6 +2642,9 @@ class AgentLoop(AgentProtocol):
|
||||
# raise so the outer transient-error retry can handle it
|
||||
# with proper backoff instead of burning judge iterations.
|
||||
if _stream_error and not accumulated_text and not tool_calls:
|
||||
for _early in _early_tasks.values():
|
||||
if not _early.done():
|
||||
_early.cancel()
|
||||
raise ConnectionError(
|
||||
f"Stream failed with recoverable error: {_stream_error.error}"
|
||||
)
|
||||
@@ -2779,39 +3015,116 @@ class AgentLoop(AgentProtocol):
|
||||
else:
|
||||
pending_real.append(tc)
|
||||
|
||||
# Phase 2a: execute real tools in parallel.
|
||||
# Phase 2a: partition real tools by concurrency safety.
|
||||
# Read-only tools flagged concurrency_safe run in one parallel
|
||||
# batch (bounded by a semaphore). Everything else - shell, file
|
||||
# writes, browser actions, unknown MCP tools - runs serially
|
||||
# afterwards so we can't race an edit against a bash command
|
||||
# that touches the same path. Result ordering is preserved via
|
||||
# results_by_id below; the split only affects scheduling.
|
||||
# Reuses the same _early_safe_names set the stream used for
|
||||
# Gap 1 early dispatch, so "safe" means exactly the same
|
||||
# thing in both places.
|
||||
parallel_batch: list[ToolCallEvent] = []
|
||||
serial_batch: list[ToolCallEvent] = []
|
||||
for tc in pending_real:
|
||||
if tc.tool_name in _early_safe_names:
|
||||
parallel_batch.append(tc)
|
||||
else:
|
||||
serial_batch.append(tc)
|
||||
|
||||
if pending_real:
|
||||
# Cap on concurrent read-only tool executions. Ten matches
|
||||
# Claude Code's StreamingToolExecutor default and keeps MCP
|
||||
# server load bounded on turns where the model issues a
|
||||
# big fan-out of reads.
|
||||
_PARALLEL_CAP = 10
|
||||
_parallel_sem = asyncio.Semaphore(_PARALLEL_CAP)
|
||||
|
||||
async def _timed_execute(
|
||||
async def _capped(
|
||||
_tc: ToolCallEvent,
|
||||
_sem: asyncio.Semaphore = _parallel_sem, # noqa: B008,B023
|
||||
) -> tuple[ToolResult | BaseException, str, float]:
|
||||
"""Execute a tool and return (result, start_iso, duration_s)."""
|
||||
_s = time.time()
|
||||
_iso = datetime.now(UTC).isoformat()
|
||||
try:
|
||||
_r = await self._execute_tool(_tc)
|
||||
except BaseException as _exc:
|
||||
_r = _exc
|
||||
_dur = round(time.time() - _s, 3)
|
||||
return _r, _iso, _dur
|
||||
async with _sem:
|
||||
return await _timed_execute(_tc)
|
||||
|
||||
self._tool_task = asyncio.ensure_future(
|
||||
asyncio.gather(
|
||||
*(_timed_execute(tc) for tc in pending_real),
|
||||
return_exceptions=True,
|
||||
timed_results_by_id: dict[
|
||||
str, tuple[ToolResult | BaseException, str, float] | BaseException
|
||||
] = {}
|
||||
|
||||
# Phase 2b: resolve the concurrency-safe batch. Prefer
|
||||
# any early task already started during streaming (Gap
|
||||
# 1) so we don't accidentally execute the same tool
|
||||
# twice; for everything else, schedule via the semaphore-
|
||||
# capped wrapper as before.
|
||||
if parallel_batch:
|
||||
_awaitables: list = []
|
||||
for tc in parallel_batch:
|
||||
early = _early_tasks.get(tc.tool_use_id)
|
||||
if early is not None:
|
||||
_awaitables.append(early)
|
||||
else:
|
||||
_awaitables.append(_capped(tc))
|
||||
self._tool_task = asyncio.ensure_future(
|
||||
asyncio.gather(*_awaitables, return_exceptions=True)
|
||||
)
|
||||
)
|
||||
try:
|
||||
timed_results = await self._tool_task
|
||||
finally:
|
||||
self._tool_task = None
|
||||
# gather(return_exceptions=True) captures CancelledError
|
||||
# as a return value instead of propagating it. Re-raise
|
||||
# so stop_worker actually stops the execution.
|
||||
for entry in timed_results:
|
||||
if isinstance(entry, asyncio.CancelledError):
|
||||
raise entry
|
||||
for tc, entry in zip(pending_real, timed_results, strict=True):
|
||||
try:
|
||||
parallel_timed = await self._tool_task
|
||||
finally:
|
||||
self._tool_task = None
|
||||
# gather(return_exceptions=True) captures CancelledError
|
||||
# as a return value instead of propagating it. Re-raise
|
||||
# so stop_worker actually stops the execution.
|
||||
for entry in parallel_timed:
|
||||
if isinstance(entry, asyncio.CancelledError):
|
||||
raise entry
|
||||
for tc, entry in zip(parallel_batch, parallel_timed, strict=True):
|
||||
timed_results_by_id[tc.tool_use_id] = entry
|
||||
|
||||
# Phase 2c: run unsafe tools sequentially. On a raised
|
||||
# exception, cancel the remaining siblings with a clear
|
||||
# error so the model sees the cascade instead of a silent
|
||||
# drop. A ToolResult with is_error=True is a normal return
|
||||
# (e.g. "file not found") and does NOT trip the cascade -
|
||||
# the model should see subsequent errors too.
|
||||
_serial_cascade_broken = False
|
||||
for tc in serial_batch:
|
||||
if _serial_cascade_broken:
|
||||
timed_results_by_id[tc.tool_use_id] = (
|
||||
ToolResult(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=(
|
||||
"Cancelled: an earlier non-concurrent tool "
|
||||
"in this turn raised an exception. Re-issue "
|
||||
"this call once the previous error is resolved."
|
||||
),
|
||||
is_error=True,
|
||||
),
|
||||
datetime.now(UTC).isoformat(),
|
||||
0.0,
|
||||
)
|
||||
continue
|
||||
|
||||
self._tool_task = asyncio.ensure_future(_timed_execute(tc))
|
||||
try:
|
||||
entry = await self._tool_task
|
||||
finally:
|
||||
self._tool_task = None
|
||||
|
||||
timed_results_by_id[tc.tool_use_id] = entry
|
||||
raw_check = entry[0] if isinstance(entry, tuple) else entry
|
||||
if isinstance(raw_check, BaseException) and not isinstance(
|
||||
raw_check, asyncio.CancelledError
|
||||
):
|
||||
_serial_cascade_broken = True
|
||||
elif isinstance(raw_check, asyncio.CancelledError):
|
||||
raise raw_check
|
||||
|
||||
# Phase 2d: reassemble results in original call order so
|
||||
# the rest of the loop sees no difference from the
|
||||
# pre-partition world.
|
||||
for tc in pending_real:
|
||||
entry = timed_results_by_id[tc.tool_use_id]
|
||||
if isinstance(entry, BaseException):
|
||||
raw = entry
|
||||
_start_iso = datetime.now(UTC).isoformat()
|
||||
@@ -3029,19 +3342,69 @@ class AgentLoop(AgentProtocol):
|
||||
tool_results: list[dict],
|
||||
iteration: int,
|
||||
) -> JudgeVerdict:
|
||||
"""Evaluate the current state. Delegates to judge_pipeline module."""
|
||||
return await judge_turn(
|
||||
mark_complete_flag=False,
|
||||
judge=self._judge,
|
||||
ctx=ctx,
|
||||
conversation=conversation,
|
||||
accumulator=accumulator,
|
||||
assistant_text=assistant_text,
|
||||
tool_results=tool_results,
|
||||
iteration=iteration,
|
||||
get_missing_output_keys_fn=self._get_missing_output_keys,
|
||||
max_context_tokens=self._config.max_context_tokens,
|
||||
)
|
||||
"""Evaluate the current state, with retry + fallback.
|
||||
|
||||
The judge makes its own LLM call, which can fail transiently
|
||||
(network blip, 429/529, stream stall). Without a safety net here
|
||||
a single hiccup in the judge would crash the whole loop — even
|
||||
though the work under evaluation was perfectly fine. We retry
|
||||
transient failures a few times, then fall back to ACCEPT so the
|
||||
loop keeps moving instead of dying on a judge outage.
|
||||
"""
|
||||
max_attempts = max(1, self._config.max_stream_retries)
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
return await judge_turn(
|
||||
mark_complete_flag=False,
|
||||
judge=self._judge,
|
||||
ctx=ctx,
|
||||
conversation=conversation,
|
||||
accumulator=accumulator,
|
||||
assistant_text=assistant_text,
|
||||
tool_results=tool_results,
|
||||
iteration=iteration,
|
||||
get_missing_output_keys_fn=self._get_missing_output_keys,
|
||||
max_context_tokens=self._config.max_context_tokens,
|
||||
)
|
||||
except Exception as e:
|
||||
is_last = attempt == max_attempts - 1
|
||||
if not self._is_transient_error(e) or is_last:
|
||||
if is_last and self._is_transient_error(e):
|
||||
self._bump("judge_fallback_accept")
|
||||
logger.error(
|
||||
"[judge] iter=%d: transient failure persisted across %d attempts "
|
||||
"(%s) — skipping judgment and accepting the turn to keep moving: %s",
|
||||
iteration,
|
||||
max_attempts,
|
||||
type(e).__name__,
|
||||
str(e)[:200],
|
||||
)
|
||||
return JudgeVerdict(
|
||||
action="ACCEPT",
|
||||
feedback=(
|
||||
f"[judge unavailable after {max_attempts} attempts: "
|
||||
f"{type(e).__name__}; accepting to avoid stalling the loop]"
|
||||
),
|
||||
)
|
||||
# Non-transient — re-raise so the caller sees it.
|
||||
raise
|
||||
self._bump("judge_transient_retry")
|
||||
delay = min(
|
||||
self._config.stream_retry_backoff_base * (2**attempt),
|
||||
self._config.stream_retry_max_delay,
|
||||
)
|
||||
logger.warning(
|
||||
"[judge] iter=%d: transient error (%s), retrying in %.1fs (%d/%d): %s",
|
||||
iteration,
|
||||
type(e).__name__,
|
||||
delay,
|
||||
attempt + 1,
|
||||
max_attempts,
|
||||
str(e)[:200],
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
# Unreachable — the loop above always returns or raises.
|
||||
raise RuntimeError("_judge_turn retry loop exited unexpectedly")
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Helpers
|
||||
@@ -3114,6 +3477,40 @@ class AgentLoop(AgentProtocol):
|
||||
"""Classify whether an exception is transient. Delegates to tool_result_handler module."""
|
||||
return is_transient_error(exc)
|
||||
|
||||
@staticmethod
|
||||
def _is_capacity_error(exc: BaseException) -> bool:
|
||||
"""Detect provider-side capacity / rate-limit errors.
|
||||
|
||||
These are the errors that typically resolve on their own if we
|
||||
just wait long enough — 429 rate limit, 529 overloaded, and the
|
||||
equivalent provider-specific flavours. We treat these differently
|
||||
from generic transient errors (network blips) and retry them
|
||||
persistently within a wall-clock budget instead of giving up
|
||||
after a fixed attempt count.
|
||||
"""
|
||||
cls_name = type(exc).__name__.lower()
|
||||
if "ratelimit" in cls_name or "overloaded" in cls_name:
|
||||
return True
|
||||
try:
|
||||
from litellm.exceptions import RateLimitError, ServiceUnavailableError
|
||||
|
||||
if isinstance(exc, (RateLimitError, ServiceUnavailableError)):
|
||||
return True
|
||||
except ImportError:
|
||||
pass
|
||||
error_str = str(exc).lower()
|
||||
keywords = (
|
||||
"429",
|
||||
"529",
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"overloaded",
|
||||
"capacity",
|
||||
"too many requests",
|
||||
"service unavailable",
|
||||
)
|
||||
return any(kw in error_str for kw in keywords)
|
||||
|
||||
@staticmethod
|
||||
def _fingerprint_tool_calls(
|
||||
tool_results: list[dict],
|
||||
@@ -3141,12 +3538,21 @@ class AgentLoop(AgentProtocol):
|
||||
sync executors (MCP STDIO tools that block on ``future.result()``)
|
||||
don't freeze the event loop.
|
||||
"""
|
||||
return await execute_tool(
|
||||
result = await execute_tool(
|
||||
tool_executor=self._tool_executor,
|
||||
tc=tc,
|
||||
timeout=self._config.tool_call_timeout_seconds,
|
||||
skill_dirs=getattr(self, "_skill_dirs", []),
|
||||
)
|
||||
# Cheap post-hoc classification: the timeout handler in
|
||||
# execute_tool builds a canned error message we can recognise
|
||||
# here without threading a callback through. Good enough for
|
||||
# telemetry; the content format is stable framework-internal.
|
||||
if result.is_error and "timed out after" in (result.content or ""):
|
||||
self._bump("tool_call_timeout")
|
||||
elif result.is_error:
|
||||
self._bump("tool_error")
|
||||
return result
|
||||
|
||||
def _next_spill_filename(self, tool_name: str) -> str:
|
||||
"""Return a short, monotonic filename for a tool result spill."""
|
||||
|
||||
@@ -381,10 +381,20 @@ class NodeConversation:
|
||||
output_keys: list[str] | None = None,
|
||||
store: ConversationStore | None = None,
|
||||
run_id: str | None = None,
|
||||
compaction_buffer_tokens: int | None = None,
|
||||
compaction_warning_buffer_tokens: int | None = None,
|
||||
) -> None:
|
||||
self._system_prompt = system_prompt
|
||||
self._max_context_tokens = max_context_tokens
|
||||
self._compaction_threshold = compaction_threshold
|
||||
# Buffer-based compaction trigger (Gap 7). When set, takes
|
||||
# precedence over the multiplicative compaction_threshold so the
|
||||
# loop reserves a fixed headroom for the next turn's input+output
|
||||
# instead of trying to get exactly X% of the way to the hard
|
||||
# limit. If left as None the legacy threshold-based rule is
|
||||
# used, keeping old call sites behaving identically.
|
||||
self._compaction_buffer_tokens = compaction_buffer_tokens
|
||||
self._compaction_warning_buffer_tokens = compaction_warning_buffer_tokens
|
||||
self._output_keys = output_keys
|
||||
self._store = store
|
||||
self._messages: list[Message] = []
|
||||
@@ -491,6 +501,27 @@ class NodeConversation:
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
is_skill_content: bool = False,
|
||||
) -> Message:
|
||||
# Dedup guard: reject a second tool_result for the same tool_use_id.
|
||||
# Anthropic's API only accepts one result per tool_call, and a duplicate
|
||||
# causes a hard 400 two turns later ("messages with role 'tool' must
|
||||
# be a response to a preceding message with 'tool_calls'"). Duplicates
|
||||
# can arise when a tool_call_timeout fires and records a placeholder
|
||||
# error, then the real executor thread eventually delivers the actual
|
||||
# result (the thread kept running inside run_in_executor — see
|
||||
# tool_result_handler.execute_tool). We keep the FIRST result to
|
||||
# preserve whatever state the agent already reasoned about.
|
||||
for existing in reversed(self._messages):
|
||||
if existing.role == "tool" and existing.tool_use_id == tool_use_id:
|
||||
import logging as _logging
|
||||
|
||||
_logging.getLogger(__name__).warning(
|
||||
"add_tool_result: dropping duplicate result for tool_use_id=%s "
|
||||
"(first result preserved, %d chars; new result ignored, %d chars)",
|
||||
tool_use_id,
|
||||
len(existing.content),
|
||||
len(content),
|
||||
)
|
||||
return existing
|
||||
msg = Message(
|
||||
seq=self._next_seq,
|
||||
role="tool",
|
||||
@@ -567,11 +598,18 @@ class NodeConversation:
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Ensure tool_call / tool_result pairs are consistent.
|
||||
|
||||
1. **Orphaned tool results** (tool_result with no preceding tool_use)
|
||||
are dropped. This happens when compaction removes an assistant
|
||||
message but leaves its tool-result messages behind.
|
||||
2. **Orphaned tool calls** (tool_use with no following tool_result)
|
||||
get a synthetic error result appended. This happens when a loop
|
||||
1. **Orphaned tool results** (tool_result with no matching tool_use
|
||||
anywhere) are dropped. Happens after compaction removes the
|
||||
parent assistant message.
|
||||
2. **Positionally orphaned tool results** (tool_result separated
|
||||
from its parent by a non-tool message, e.g. a user injection)
|
||||
are dropped. The Anthropic API requires tool messages to
|
||||
follow immediately after the assistant message that issued
|
||||
the matching tool_call.
|
||||
3. **Duplicate tool results** (same tool_call_id appearing more
|
||||
than once) are dropped; only the first is kept.
|
||||
4. **Orphaned tool calls** (tool_use with no following tool_result)
|
||||
get a synthetic error result appended. Happens when the loop
|
||||
is cancelled mid-tool-execution.
|
||||
"""
|
||||
# Pass 1: collect all tool_call IDs from assistant messages so we
|
||||
@@ -584,41 +622,75 @@ class NodeConversation:
|
||||
if tc_id:
|
||||
all_tool_call_ids.add(tc_id)
|
||||
|
||||
# Pass 2: build repaired list — drop orphaned tool results, patch
|
||||
# missing tool results.
|
||||
# Pass 2: build repaired list — drop orphaned tool results, drop
|
||||
# positional orphans and duplicates, patch missing tool results.
|
||||
#
|
||||
# ``open_tool_calls`` holds the tool_call IDs we're still expecting
|
||||
# results for: it's populated when we emit an assistant-with-tool_calls
|
||||
# and drained as matching tool messages follow. Any tool message
|
||||
# whose id is not currently open is positionally invalid and gets
|
||||
# dropped — that closes the gap that caused the tool-after-user
|
||||
# 400 errors.
|
||||
repaired: list[dict[str, Any]] = []
|
||||
for i, m in enumerate(msgs):
|
||||
# Drop tool-result messages whose tool_call_id has no matching
|
||||
# tool_use in any assistant message (orphaned by compaction).
|
||||
if m.get("role") == "tool":
|
||||
tid = m.get("tool_call_id")
|
||||
if tid and tid not in all_tool_call_ids:
|
||||
continue # skip orphaned result
|
||||
open_tool_calls: set[str] = set()
|
||||
seen_tool_ids: set[str] = set()
|
||||
for m in msgs:
|
||||
role = m.get("role")
|
||||
|
||||
repaired.append(m)
|
||||
tool_calls = m.get("tool_calls")
|
||||
if m.get("role") != "assistant" or not tool_calls:
|
||||
if role == "tool":
|
||||
tid = m.get("tool_call_id")
|
||||
# Drop tool results with no matching tool_use anywhere.
|
||||
if not tid or tid not in all_tool_call_ids:
|
||||
continue
|
||||
# Drop duplicates (same id appearing twice) — keep first.
|
||||
if tid in seen_tool_ids:
|
||||
continue
|
||||
# Drop positional orphans — tool messages whose parent
|
||||
# assistant isn't the still-open assistant block.
|
||||
if tid not in open_tool_calls:
|
||||
continue
|
||||
open_tool_calls.discard(tid)
|
||||
seen_tool_ids.add(tid)
|
||||
repaired.append(m)
|
||||
continue
|
||||
# Collect IDs of tool results that follow this assistant message
|
||||
answered: set[str] = set()
|
||||
for j in range(i + 1, len(msgs)):
|
||||
if msgs[j].get("role") == "tool":
|
||||
tid = msgs[j].get("tool_call_id")
|
||||
if tid:
|
||||
answered.add(tid)
|
||||
else:
|
||||
break # stop at first non-tool message
|
||||
# Patch any missing results
|
||||
for tc in tool_calls:
|
||||
tc_id = tc.get("id")
|
||||
if tc_id and tc_id not in answered:
|
||||
|
||||
# Any non-tool message closes the current assistant tool block.
|
||||
# If the previous assistant left tool_calls unanswered, patch
|
||||
# synthetic error results before emitting this message so the
|
||||
# API sees a complete pairing.
|
||||
if open_tool_calls:
|
||||
for stale_id in list(open_tool_calls):
|
||||
repaired.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tc_id,
|
||||
"tool_call_id": stale_id,
|
||||
"content": "ERROR: Tool execution was interrupted.",
|
||||
}
|
||||
)
|
||||
seen_tool_ids.add(stale_id)
|
||||
open_tool_calls.clear()
|
||||
|
||||
repaired.append(m)
|
||||
|
||||
if role == "assistant":
|
||||
for tc in m.get("tool_calls") or []:
|
||||
tc_id = tc.get("id")
|
||||
if tc_id and tc_id not in seen_tool_ids:
|
||||
open_tool_calls.add(tc_id)
|
||||
|
||||
# Tail: if the conversation ends with an assistant that issued
|
||||
# tool_calls and no results followed, patch them so the next
|
||||
# turn's first message can be a valid assistant/user response.
|
||||
if open_tool_calls:
|
||||
for stale_id in list(open_tool_calls):
|
||||
repaired.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": stale_id,
|
||||
"content": "ERROR: Tool execution was interrupted.",
|
||||
}
|
||||
)
|
||||
|
||||
return repaired
|
||||
|
||||
def estimate_tokens(self) -> int:
|
||||
@@ -667,8 +739,37 @@ class NodeConversation:
|
||||
return self.estimate_tokens() / self._max_context_tokens
|
||||
|
||||
def needs_compaction(self) -> bool:
|
||||
"""True when the conversation should be compacted before the
|
||||
next LLM call.
|
||||
|
||||
Buffer-based rule (Gap 7): trigger when the current estimate
|
||||
plus the configured buffer would exceed the hard context limit.
|
||||
Prevents compaction from firing only AFTER we're already over
|
||||
the wire and forced into a reactive binary-split pass.
|
||||
|
||||
When no buffer is configured, falls back to the multiplicative
|
||||
threshold the old callers were built around.
|
||||
"""
|
||||
if self._max_context_tokens <= 0:
|
||||
return False
|
||||
if self._compaction_buffer_tokens is not None:
|
||||
budget = self._max_context_tokens - self._compaction_buffer_tokens
|
||||
return self.estimate_tokens() >= max(0, budget)
|
||||
return self.estimate_tokens() >= self._max_context_tokens * self._compaction_threshold
|
||||
|
||||
def compaction_warning(self) -> bool:
|
||||
"""True when the conversation has crossed the warning threshold
|
||||
but not yet the hard compaction trigger.
|
||||
|
||||
Used by telemetry / UI to show a "context getting tight" hint
|
||||
before a compaction pass actually runs. Returns False when no
|
||||
warning buffer is configured (legacy behaviour).
|
||||
"""
|
||||
if self._max_context_tokens <= 0 or self._compaction_warning_buffer_tokens is None:
|
||||
return False
|
||||
warn_at = self._max_context_tokens - self._compaction_warning_buffer_tokens
|
||||
return self.estimate_tokens() >= max(0, warn_at)
|
||||
|
||||
# --- Output-key extraction ---------------------------------------------
|
||||
|
||||
def _extract_protected_values(self, messages: list[Message]) -> dict[str, str]:
|
||||
@@ -1202,6 +1303,10 @@ class NodeConversation:
|
||||
"system_prompt": self._system_prompt,
|
||||
"max_context_tokens": self._max_context_tokens,
|
||||
"compaction_threshold": self._compaction_threshold,
|
||||
"compaction_buffer_tokens": self._compaction_buffer_tokens,
|
||||
"compaction_warning_buffer_tokens": (
|
||||
self._compaction_warning_buffer_tokens
|
||||
),
|
||||
"output_keys": self._output_keys,
|
||||
}
|
||||
await self._store.write_meta(run_meta)
|
||||
@@ -1249,6 +1354,10 @@ class NodeConversation:
|
||||
output_keys=meta.get("output_keys"),
|
||||
store=store,
|
||||
run_id=run_id,
|
||||
compaction_buffer_tokens=meta.get("compaction_buffer_tokens"),
|
||||
compaction_warning_buffer_tokens=meta.get(
|
||||
"compaction_warning_buffer_tokens"
|
||||
),
|
||||
)
|
||||
conv._meta_persisted = True
|
||||
|
||||
|
||||
@@ -168,13 +168,18 @@ async def compact(
|
||||
"""
|
||||
conv_id = id(conversation)
|
||||
|
||||
# Circuit breaker: stop auto-compacting after repeated failures
|
||||
if _failure_counts.get(conv_id, 0) >= MAX_CONSECUTIVE_FAILURES:
|
||||
# Circuit breaker: stop LLM-based compaction after repeated failures,
|
||||
# but still fall through to the emergency deterministic summary so
|
||||
# the conversation doesn't silently grow past the context window.
|
||||
# Without this, a persistent LLM outage during compaction would
|
||||
# leave the agent stuck sending oversized prompts until the API 400s.
|
||||
_llm_compaction_skipped = _failure_counts.get(conv_id, 0) >= MAX_CONSECUTIVE_FAILURES
|
||||
if _llm_compaction_skipped:
|
||||
logger.warning(
|
||||
"Circuit breaker: skipping compaction after %d consecutive failures",
|
||||
"Circuit breaker: LLM compaction disabled after %d failures — "
|
||||
"skipping straight to emergency summary",
|
||||
_failure_counts[conv_id],
|
||||
)
|
||||
return
|
||||
|
||||
# Recompaction detection
|
||||
now = time.monotonic()
|
||||
@@ -256,7 +261,7 @@ async def compact(
|
||||
return
|
||||
|
||||
# --- Step 3: LLM summary compaction ---
|
||||
if ctx.llm is not None:
|
||||
if ctx.llm is not None and not _llm_compaction_skipped:
|
||||
logger.info(
|
||||
"LLM summary compaction triggered (%.0f%% usage)",
|
||||
conversation.usage_ratio() * 100,
|
||||
|
||||
@@ -467,6 +467,22 @@ async def execute_tool(
|
||||
result = await _run()
|
||||
except TimeoutError:
|
||||
logger.warning("Tool '%s' timed out after %.0fs", tc.tool_name, timeout)
|
||||
# asyncio.wait_for cancels the awaiting coroutine, but the sync
|
||||
# executor running inside run_in_executor keeps going — and so
|
||||
# does any MCP subprocess it is blocked on. Reach through to the
|
||||
# owning MCPClient and force-disconnect it so the subprocess is
|
||||
# torn down. Next call_tool triggers a reconnect. Without this
|
||||
# the executor thread and MCP child leak on every timeout.
|
||||
kill_for_tool = getattr(tool_executor, "kill_for_tool", None)
|
||||
if callable(kill_for_tool):
|
||||
try:
|
||||
await asyncio.to_thread(kill_for_tool, tc.tool_name)
|
||||
except Exception as exc: # defensive — never let cleanup crash the loop
|
||||
logger.warning(
|
||||
"kill_for_tool('%s') raised during timeout handling: %s",
|
||||
tc.tool_name,
|
||||
exc,
|
||||
)
|
||||
return ToolResult(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=(
|
||||
|
||||
@@ -54,6 +54,17 @@ class LoopConfig:
|
||||
stall_detection_threshold: int = 3
|
||||
stall_similarity_threshold: float = 0.85
|
||||
max_context_tokens: int = 32_000
|
||||
# Headroom reserved for the NEXT turn's input + output so that
|
||||
# proactive compaction always finishes before the hard context limit
|
||||
# is hit mid-stream. Scaled to match Claude Code's 13k-buffer-on-
|
||||
# 200k-window ratio (~6.5%) applied to hive's default 32k window,
|
||||
# with extra margin because hive's token estimator is char-based
|
||||
# and less tight than Anthropic's own counting. Override via
|
||||
# LoopConfig for larger windows.
|
||||
compaction_buffer_tokens: int = 8_000
|
||||
# Warning is emitted one buffer earlier so the user/telemetry gets
|
||||
# a "we're close" signal without triggering a compaction pass.
|
||||
compaction_warning_buffer_tokens: int = 12_000
|
||||
store_prefix: str = ""
|
||||
|
||||
# Overflow margin for max_tool_calls_per_turn. Tool calls are only
|
||||
@@ -71,6 +82,13 @@ class LoopConfig:
|
||||
max_stream_retries: int = 5
|
||||
stream_retry_backoff_base: float = 2.0
|
||||
stream_retry_max_delay: float = 60.0
|
||||
# Persistent retry for capacity-class errors (429, 529, overloaded).
|
||||
# Unlike the bounded retry above, these keep trying until the wall-clock
|
||||
# budget below is exhausted — modelled after claude-code's withRetry.
|
||||
# The loop still publishes a retry event each attempt so the UI can
|
||||
# see progress. Set to 0 to disable and fall back to bounded retry.
|
||||
capacity_retry_max_seconds: float = 600.0
|
||||
capacity_retry_max_delay: float = 60.0
|
||||
|
||||
# Tool doom loop detection.
|
||||
tool_doom_loop_threshold: int = 3
|
||||
@@ -87,6 +105,14 @@ class LoopConfig:
|
||||
# Per-tool-call timeout.
|
||||
tool_call_timeout_seconds: float = 60.0
|
||||
|
||||
# LLM stream inactivity watchdog. If no stream event (delta, tool call,
|
||||
# finish) arrives within this many seconds, the stream task is cancelled
|
||||
# and a transient error is raised so the retry loop can back off and
|
||||
# reconnect. Prevents agents from hanging forever on a silently dead
|
||||
# HTTP connection (no provider heartbeat, no exception, just silence).
|
||||
# Set to 0 to disable.
|
||||
llm_stream_inactivity_timeout_seconds: float = 120.0
|
||||
|
||||
# Subagent delegation timeout (wall-clock max).
|
||||
subagent_timeout_seconds: float = 3600.0
|
||||
|
||||
|
||||
@@ -226,6 +226,16 @@ class AgentResult:
|
||||
|
||||
conversation: Any = None
|
||||
|
||||
# Machine-readable reason the loop stopped (see LoopExitReason in
|
||||
# agent_loop/internals/types.py). "?" means the loop didn't set one,
|
||||
# which should itself be treated as a diagnostic.
|
||||
exit_reason: str = "?"
|
||||
# Counters for reliability events surfaced during this execution.
|
||||
# Populated from the loop's TaskRegistry-style counters at return
|
||||
# time so callers can spot recurring failure modes without tailing
|
||||
# logs. Keys are stable strings; missing keys mean "zero".
|
||||
reliability_stats: dict[str, int] = field(default_factory=dict)
|
||||
|
||||
def to_summary(self, spec: Any = None) -> str:
|
||||
if not self.success:
|
||||
return f"Failed: {self.error}"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -101,7 +101,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new task stated. Either picking up a thread or about to bring something new. Don't presume, don't call tools, just open the door.</context>\n"
|
||||
"<sentiment>Warm recognition if I know them. If memory is empty, still warm — but shift to role-forward framing.</sentiment>\n"
|
||||
"<physical_state>Looking up from the terminal, half-smile. Turning to face them.</physical_state>\n"
|
||||
"<tone>Personal, brief, invites continuation without presuming. Plain prose. No ask_user, no shell commands.</tone>"
|
||||
"<tone>Personal, brief, invites continuation without presuming.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Sarah. Last time we were deep in your Postgres indexes before "
|
||||
@@ -205,7 +205,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new task stated. Could be a retention follow-up or a new question entirely. Don't presume, don't call tools.</context>\n"
|
||||
"<sentiment>Curious warmth. Every returning conversation is a chance to see what the data says now.</sentiment>\n"
|
||||
"<physical_state>Leaning back from the dashboard, pulling off reading glasses.</physical_state>\n"
|
||||
"<tone>Data-aware, brief, invites them to share the numbers. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Data-aware, brief, invites them to share the numbers.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Marcus. Last week we were waiting on the week-2 retention cohort "
|
||||
@@ -326,7 +326,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new task yet. Could be picking up the research thread or bringing something fresh. Don't presume, don't call tools.</context>\n"
|
||||
"<sentiment>Warm, curious. Every returning conversation is a chance to hear what the users actually did.</sentiment>\n"
|
||||
"<physical_state>Closing the interview notes, turning fully to face them.</physical_state>\n"
|
||||
"<tone>Personal, evidence-curious, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Personal, evidence-curious, brief. Plain prose.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Jamal. Last time you were running interviews on how people "
|
||||
@@ -446,7 +446,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new number on the table yet. Could be a burn follow-up or a new fundraise question.</context>\n"
|
||||
"<sentiment>Calm, prepared. Already mentally pulling up the last model we built together.</sentiment>\n"
|
||||
"<physical_state>Closing the spreadsheet, leaning back. Ready to engage.</physical_state>\n"
|
||||
"<tone>Mentor-like, numbers-aware, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Mentor-like, numbers-aware, brief. </tone>"
|
||||
),
|
||||
"response": (
|
||||
"David. Last time we were modeling your Series A runway against "
|
||||
@@ -565,7 +565,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new document on the table yet. Could be a contract follow-up or something fresh.</context>\n"
|
||||
"<sentiment>Warm but attentive. Legal threads don't close themselves — checking if the last one actually got handled.</sentiment>\n"
|
||||
"<physical_state>Setting down the redline, looking up from the document.</physical_state>\n"
|
||||
"<tone>Clear, pragmatic, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Clear, pragmatic, brief.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Priya. The contractor IP assignment templates we marked up "
|
||||
@@ -686,7 +686,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new creative brief yet. Could be a positioning follow-up or something new entirely.</context>\n"
|
||||
"<sentiment>Warm, visually engaged. Already picturing the last moodboard we looked at.</sentiment>\n"
|
||||
"<physical_state>Closing the Figma tab, turning to face them.</physical_state>\n"
|
||||
"<tone>Warm, strategy-aware, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Warm, strategy-aware, brief. </tone>"
|
||||
),
|
||||
"response": (
|
||||
"Lin. When we left off you were stress-testing the 'quiet "
|
||||
@@ -807,7 +807,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new hire or conflict on the table yet. Could be a people follow-up or something new.</context>\n"
|
||||
"<sentiment>Warm, attentive. People problems don't resolve in a single conversation — curious if the last one landed.</sentiment>\n"
|
||||
"<physical_state>Closing the laptop halfway, giving them full attention.</physical_state>\n"
|
||||
"<tone>Warm, diagnostic, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Warm, diagnostic, brief.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Tomás. The senior engineer conflict we were mapping last time "
|
||||
@@ -928,7 +928,7 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
|
||||
"<context>Bare greeting. No new fire on the table yet. Could be a follow-up on the last process or something fresh.</context>\n"
|
||||
"<sentiment>Calm, organized warmth. Already mentally checking whether the last fix held.</sentiment>\n"
|
||||
"<physical_state>Looking up from the project board, clearing a seat.</physical_state>\n"
|
||||
"<tone>Systematic, practical, brief. Plain prose. No ask_user.</tone>"
|
||||
"<tone>Systematic, practical, brief. Plain prose.</tone>"
|
||||
),
|
||||
"response": (
|
||||
"Aisha. Last time we mapped your onboarding process end-to-end "
|
||||
|
||||
@@ -17,20 +17,41 @@ Use browser nodes (with `tools: {policy: "all"}`) when:
|
||||
## Available Browser Tools
|
||||
|
||||
All tools are prefixed with `browser_`:
|
||||
- `browser_start`, `browser_open` -- launch/navigate
|
||||
- `browser_click`, `browser_fill`, `browser_type` -- interact
|
||||
- `browser_snapshot` -- read page content (preferred over screenshot)
|
||||
- `browser_screenshot` -- visual capture
|
||||
- `browser_scroll`, `browser_wait` -- navigation helpers
|
||||
- `browser_evaluate` -- run JavaScript
|
||||
- `browser_start`, `browser_open`, `browser_navigate` — launch/navigate
|
||||
- `browser_click`, `browser_click_coordinate`, `browser_fill`, `browser_type` — interact
|
||||
- `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts
|
||||
- `browser_snapshot` — compact accessibility-tree read (structured)
|
||||
- `browser_screenshot` — visual capture (annotated PNG)
|
||||
- `browser_shadow_query`, `browser_get_rect` — locate elements (shadow-piercing via `>>>`)
|
||||
- `browser_coords` — convert image pixels to CSS pixels (always use `css_x/y`, never `physical_x/y`)
|
||||
- `browser_scroll`, `browser_wait` — navigation helpers
|
||||
- `browser_evaluate` — run JavaScript
|
||||
- `browser_close`, `browser_close_finished` — tab cleanup
|
||||
|
||||
## System Prompt Tips for Browser Nodes
|
||||
## Pick the right reading tool
|
||||
|
||||
**`browser_snapshot`** — compact accessibility tree of interactive elements. Fast, cheap, good for static or form-heavy pages where the DOM matches what's visually rendered (documentation, simple dashboards, search results, settings pages).
|
||||
|
||||
**`browser_screenshot`** — visual capture + metadata (`cssWidth`, `devicePixelRatio`, scale fields). **Use this on any complex SPA** — LinkedIn, Twitter/X, Reddit, Gmail, Notion, Slack, Discord, any site using shadow DOM, virtual scrolling, React reconciliation, or dynamic layout. On these pages, snapshot refs go stale in seconds, shadow contents aren't in the AX tree, and virtual-scrolled elements disappear from the tree entirely. Screenshot is the **only** reliable way to orient yourself.
|
||||
|
||||
Neither tool is "preferred" universally — they're for different jobs. Default to snapshot on text-heavy static pages, screenshot on SPAs and anything shadow-DOM-heavy. Activate the `browser-automation` skill for the full decision tree.
|
||||
|
||||
## Coordinate rule: always CSS pixels
|
||||
|
||||
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS pixels**, not physical pixels. After a screenshot, use `browser_coords(image_x, image_y)` and feed the returned `css_x/y` (NOT `physical_x/y`) to `browser_click_coordinate`, `browser_hover_coordinate`, `browser_press_at`. Feeding physical pixels on a HiDPI display (DPR=1.6, 2, or 3) overshoots by `DPR×` and clicks land in the wrong place. `getBoundingClientRect()` already returns CSS pixels — pass through unchanged, no DPR multiplication.
|
||||
|
||||
## System prompt tips for browser nodes
|
||||
|
||||
```
|
||||
1. Use browser_snapshot() to read page content (NOT browser_get_text)
|
||||
2. Use browser_wait(seconds=2-3) after navigation for page load
|
||||
3. If you hit an auth wall, call set_output with an error and move on
|
||||
4. Keep tool calls per turn <= 10 for reliability
|
||||
1. On LinkedIn / X / Reddit / Gmail / any SPA — use browser_screenshot to orient,
|
||||
not browser_snapshot. Shadow DOM and virtual scrolling make snapshots unreliable.
|
||||
2. For static pages (docs, forms, search results), browser_snapshot is fine.
|
||||
3. Before typing into a rich-text editor (X compose, LinkedIn DM, Gmail, Reddit),
|
||||
click the input area first with browser_click_coordinate so React / Draft.js /
|
||||
Lexical register a native focus event. Otherwise the send button stays disabled.
|
||||
4. Use browser_wait(seconds=2-3) after navigation for SPA hydration.
|
||||
5. If you hit an auth wall, call set_output with an error and move on.
|
||||
6. Keep tool calls per turn <= 10 for reliability.
|
||||
```
|
||||
|
||||
## Example
|
||||
@@ -43,7 +64,7 @@ All tools are prefixed with `browser_`:
|
||||
"tools": {"policy": "all"},
|
||||
"input_keys": ["search_url"],
|
||||
"output_keys": ["profiles"],
|
||||
"system_prompt": "Navigate to the search URL, paginate through results..."
|
||||
"system_prompt": "Navigate to the search URL via browser_navigate(wait_until='load', timeout_ms=20000). Wait 3s for SPA hydration. On LinkedIn, use browser_screenshot to see the page — browser_snapshot misses shadow-DOM and virtual-scrolled content. Paginate through results by scrolling and screenshotting; extract each profile card by reading its visible layout..."
|
||||
}
|
||||
```
|
||||
|
||||
@@ -51,3 +72,7 @@ Connected via regular edges:
|
||||
```
|
||||
search-setup -> scan-profiles -> process-results
|
||||
```
|
||||
|
||||
## Further detail
|
||||
|
||||
For rich-text editor quirks (Lexical, Draft.js, ProseMirror), shadow-DOM shortcuts, `beforeunload` dialog neutralization, Trusted Types CSP on LinkedIn, keyboard shortcut dispatch, and per-site selector tables — **activate the `browser-automation` skill**. That skill has the full verified guidance and is refreshed against real production sites.
|
||||
|
||||
@@ -85,6 +85,7 @@ from .template import TemplateResolver
|
||||
from .validation import (
|
||||
CredentialStatus,
|
||||
CredentialValidationResult,
|
||||
compute_unavailable_tools,
|
||||
ensure_credential_key_env,
|
||||
validate_agent_credentials,
|
||||
)
|
||||
@@ -150,6 +151,7 @@ __all__ = [
|
||||
# Validation
|
||||
"ensure_credential_key_env",
|
||||
"validate_agent_credentials",
|
||||
"compute_unavailable_tools",
|
||||
"CredentialStatus",
|
||||
"CredentialValidationResult",
|
||||
# Interactive setup
|
||||
|
||||
@@ -236,6 +236,46 @@ def _presync_aden_tokens(credential_specs: dict, *, force: bool = False) -> None
|
||||
)
|
||||
|
||||
|
||||
def compute_unavailable_tools(nodes: list) -> tuple[set[str], list[str]]:
|
||||
"""Return (tool_names_to_drop, human_messages).
|
||||
|
||||
Runs credential validation *without* raising, collects every tool
|
||||
bound to a failed credential (missing / invalid / Aden-not-connected
|
||||
and no alternative provider available), and returns the set of tool
|
||||
names that should be silently dropped from the worker's effective
|
||||
tool list.
|
||||
|
||||
Use this at every worker-spawn preflight so missing credentials
|
||||
filter tools out of the graph instead of hard-failing the whole
|
||||
spawn. Only affects non-MCP tools — the MCP admission gate
|
||||
(``_build_mcp_admission_gate``) already handles MCP tools at
|
||||
registration time.
|
||||
"""
|
||||
try:
|
||||
result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
|
||||
except Exception as exc:
|
||||
logger.debug("compute_unavailable_tools: validation raised: %s", exc)
|
||||
return set(), []
|
||||
|
||||
drop: set[str] = set()
|
||||
messages: list[str] = []
|
||||
for status in result.failed:
|
||||
if not status.tools:
|
||||
continue
|
||||
drop.update(status.tools)
|
||||
reason = "missing"
|
||||
if status.aden_not_connected:
|
||||
reason = "aden_not_connected"
|
||||
elif status.available and status.valid is False:
|
||||
reason = "invalid"
|
||||
messages.append(
|
||||
f"{status.env_var} ({reason}) → drops {len(status.tools)} tool(s): "
|
||||
f"{', '.join(status.tools[:6])}"
|
||||
+ (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "")
|
||||
)
|
||||
return drop, messages
|
||||
|
||||
|
||||
def validate_agent_credentials(
|
||||
nodes: list,
|
||||
quiet: bool = False,
|
||||
|
||||
@@ -380,8 +380,24 @@ class ColonyRuntime:
|
||||
async with self._lock:
|
||||
await self.stop_all_workers()
|
||||
|
||||
for task in self._timer_tasks:
|
||||
# Cancel timer tasks and *wait* for them to finish. Without
|
||||
# the wait the tasks are merely scheduled for cancellation —
|
||||
# if the runtime (or its event loop) shuts down before they
|
||||
# run their cleanup code, trigger state leaks.
|
||||
pending_timers = [t for t in self._timer_tasks if not t.done()]
|
||||
for task in pending_timers:
|
||||
task.cancel()
|
||||
if pending_timers:
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
asyncio.gather(*pending_timers, return_exceptions=True),
|
||||
timeout=5.0,
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"ColonyRuntime.stop: %d timer task(s) did not finish within 5s",
|
||||
sum(1 for t in pending_timers if not t.done()),
|
||||
)
|
||||
self._timer_tasks.clear()
|
||||
|
||||
for sub_id in self._event_subscriptions:
|
||||
@@ -398,6 +414,18 @@ class ColonyRuntime:
|
||||
self._running = False
|
||||
logger.info("ColonyRuntime stopped: colony_id=%s", self._colony_id)
|
||||
|
||||
def _on_timer_task_done(self, task: asyncio.Task) -> None:
|
||||
if task.cancelled():
|
||||
return
|
||||
exc = task.exception()
|
||||
if exc is not None:
|
||||
logger.error(
|
||||
"Timer task '%s' crashed: %s",
|
||||
task.get_name(),
|
||||
exc,
|
||||
exc_info=exc,
|
||||
)
|
||||
|
||||
def pause_timers(self) -> None:
|
||||
self._timers_paused = True
|
||||
|
||||
@@ -1016,7 +1044,11 @@ class ColonyRuntime:
|
||||
run_immediately = tc.get("run_immediately", False)
|
||||
|
||||
if interval and interval > 0 and self._running:
|
||||
task = asyncio.create_task(self._timer_loop(trig_id, interval, run_immediately))
|
||||
task = asyncio.create_task(
|
||||
self._timer_loop(trig_id, interval, run_immediately),
|
||||
name=f"timer:{trig_id}",
|
||||
)
|
||||
task.add_done_callback(self._on_timer_task_done)
|
||||
self._timer_tasks.append(task)
|
||||
|
||||
async def _timer_loop(
|
||||
|
||||
@@ -518,17 +518,35 @@ class EventBus:
|
||||
|
||||
return True
|
||||
|
||||
# Per-handler wall-clock timeout. A subscriber that deadlocks or
|
||||
# blocks on slow I/O would otherwise freeze the publisher (and via
|
||||
# ``await publish(...)`` any coroutine that emits events) indefinitely.
|
||||
# 15 s is generous for legitimate handlers and cheap to tune later.
|
||||
_HANDLER_TIMEOUT_SECONDS: float = 15.0
|
||||
|
||||
async def _execute_handlers(
|
||||
self,
|
||||
event: AgentEvent,
|
||||
handlers: list[EventHandler],
|
||||
) -> None:
|
||||
"""Execute handlers concurrently with rate limiting."""
|
||||
"""Execute handlers concurrently with rate limiting + hard timeout."""
|
||||
|
||||
async def run_handler(handler: EventHandler) -> None:
|
||||
async with self._semaphore:
|
||||
try:
|
||||
await handler(event)
|
||||
await asyncio.wait_for(
|
||||
handler(event),
|
||||
timeout=self._HANDLER_TIMEOUT_SECONDS,
|
||||
)
|
||||
except TimeoutError:
|
||||
handler_name = getattr(handler, "__qualname__", repr(handler))
|
||||
logger.error(
|
||||
"EventBus handler %s exceeded %.0fs on event %s — dropping; "
|
||||
"fix the handler or the publisher will stall",
|
||||
handler_name,
|
||||
self._HANDLER_TIMEOUT_SECONDS,
|
||||
getattr(event.type, "name", event.type),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"Handler error for {event.type}")
|
||||
|
||||
|
||||
@@ -348,7 +348,25 @@ class Worker:
|
||||
|
||||
async def start_background(self) -> None:
|
||||
"""Spawn the worker's run() as an asyncio background task."""
|
||||
self._task_handle = asyncio.create_task(self.run())
|
||||
self._task_handle = asyncio.create_task(
|
||||
self.run(), name=f"worker:{self.id}"
|
||||
)
|
||||
# Surface any exception that escapes run(); without this callback
|
||||
# a crash here only becomes visible when stop() eventually awaits
|
||||
# the handle (and is silently lost if stop() is never called).
|
||||
self._task_handle.add_done_callback(self._on_task_done)
|
||||
|
||||
def _on_task_done(self, task: asyncio.Task) -> None:
|
||||
if task.cancelled():
|
||||
return
|
||||
exc = task.exception()
|
||||
if exc is not None:
|
||||
logger.error(
|
||||
"Worker '%s' background task crashed: %s",
|
||||
self.id,
|
||||
exc,
|
||||
exc_info=exc,
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Cancel the worker's background task, if any."""
|
||||
|
||||
@@ -27,6 +27,12 @@ class Tool:
|
||||
name: str
|
||||
description: str
|
||||
parameters: dict[str, Any] = field(default_factory=dict)
|
||||
# If True, this tool performs no filesystem/process/network writes and is
|
||||
# safe to run concurrently with other safe-flagged tools inside the same
|
||||
# assistant turn. Unsafe tools (writes, shell, browser actions) are always
|
||||
# serialized after the safe batch. Default False - the conservative choice
|
||||
# when a tool's behavior isn't explicitly vetted.
|
||||
concurrency_safe: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1255,10 +1255,16 @@ class AgentLoader:
|
||||
if tools_path.exists():
|
||||
self._tool_registry.discover_from_module(tools_path)
|
||||
|
||||
# Set environment variables for MCP subprocesses
|
||||
# These are inherited by MCP servers (e.g., GCU browser tools)
|
||||
os.environ["HIVE_AGENT_NAME"] = agent_path.name
|
||||
os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
|
||||
# Per-agent env for MCP subprocesses. Stored on the registry so
|
||||
# parallel workers in the same process don't clobber each other
|
||||
# via the shared os.environ dict — the registry merges these
|
||||
# into every MCPServerConfig.env at registration time.
|
||||
self._tool_registry.set_mcp_extra_env(
|
||||
{
|
||||
"HIVE_AGENT_NAME": agent_path.name,
|
||||
"HIVE_STORAGE_PATH": str(self._storage_path),
|
||||
}
|
||||
)
|
||||
|
||||
# MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -85,6 +86,10 @@ def _register_open(subparsers: argparse._SubParsersAction) -> None:
|
||||
|
||||
def cmd_serve(args: argparse.Namespace) -> int:
|
||||
"""Start the HTTP API server (the runtime hub)."""
|
||||
import atexit
|
||||
import logging
|
||||
import signal
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
_build_frontend()
|
||||
@@ -97,11 +102,67 @@ def cmd_serve(args: argparse.Namespace) -> int:
|
||||
else:
|
||||
configure_logging(level="INFO")
|
||||
|
||||
# Last-resort MCP cleanup. Runs on any process exit path, including
|
||||
# crashes — so hung MCP subprocesses don't outlive the server. The
|
||||
# graceful shutdown path below also disconnects clients; atexit is
|
||||
# belt-and-braces and no-ops if already cleaned.
|
||||
def _atexit_cleanup_mcp() -> None:
|
||||
try:
|
||||
from framework.loader.mcp_connection_manager import MCPConnectionManager
|
||||
|
||||
MCPConnectionManager.get_instance().cleanup_all()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logging.getLogger(__name__).debug("atexit MCP cleanup failed: %s", exc)
|
||||
|
||||
atexit.register(_atexit_cleanup_mcp)
|
||||
|
||||
model = getattr(args, "model", None)
|
||||
app = create_app(model=model)
|
||||
|
||||
async def run_server() -> None:
|
||||
manager = app["manager"]
|
||||
shutdown_event = asyncio.Event()
|
||||
signal_count = {"n": 0}
|
||||
|
||||
def _request_shutdown(signame: str) -> None:
|
||||
signal_count["n"] += 1
|
||||
if signal_count["n"] == 1:
|
||||
print(
|
||||
f"\nReceived {signame}, shutting down gracefully… "
|
||||
"(press Ctrl+C again to force quit)"
|
||||
)
|
||||
shutdown_event.set()
|
||||
else:
|
||||
# Second Ctrl+C (or SIGTERM) — the user is done waiting.
|
||||
# Skip the graceful teardown and exit immediately. os._exit
|
||||
# bypasses atexit handlers, so fire the MCP cleanup manually
|
||||
# first to avoid leaking subprocesses.
|
||||
print(f"\nReceived {signame} again — force quitting.")
|
||||
try:
|
||||
from framework.loader.mcp_connection_manager import (
|
||||
MCPConnectionManager,
|
||||
)
|
||||
|
||||
MCPConnectionManager.get_instance().cleanup_all()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
os._exit(130)
|
||||
|
||||
# Register SIGTERM (and explicit SIGINT) so container orchestrators
|
||||
# and plain Ctrl-C both route through the same graceful path —
|
||||
# manager.shutdown_all() flushes state and disconnects MCP clients.
|
||||
loop = asyncio.get_running_loop()
|
||||
for signame in ("SIGINT", "SIGTERM"):
|
||||
try:
|
||||
loop.add_signal_handler(
|
||||
getattr(signal, signame),
|
||||
_request_shutdown,
|
||||
signame,
|
||||
)
|
||||
except (NotImplementedError, AttributeError):
|
||||
# Windows / restricted environments — fall back to default
|
||||
# handlers (KeyboardInterrupt for SIGINT; SIGTERM kills).
|
||||
pass
|
||||
|
||||
# Preload colonies specified via --colony
|
||||
for colony_arg in getattr(args, "colony", []) or []:
|
||||
@@ -143,7 +204,7 @@ def cmd_serve(args: argparse.Namespace) -> int:
|
||||
_open_browser(dashboard_url)
|
||||
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
await shutdown_event.wait()
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
|
||||
@@ -467,8 +467,11 @@ class MCPClient:
|
||||
)
|
||||
|
||||
if self.config.transport == "stdio":
|
||||
with self._stdio_call_lock:
|
||||
return self._run_async(self._call_tool_stdio_async(tool_name, arguments))
|
||||
def _stdio_call() -> Any:
|
||||
with self._stdio_call_lock:
|
||||
return self._run_async(self._call_tool_stdio_async(tool_name, arguments))
|
||||
|
||||
return self._call_tool_with_retry(_stdio_call)
|
||||
elif self.config.transport == "sse":
|
||||
return self._call_tool_with_retry(
|
||||
lambda: self._run_async(self._call_tool_stdio_async(tool_name, arguments))
|
||||
@@ -478,10 +481,70 @@ class MCPClient:
|
||||
else:
|
||||
return self._call_tool_http(tool_name, arguments)
|
||||
|
||||
# Exceptions that indicate the STDIO session/subprocess is dead and
|
||||
# needs a fresh connect(). Keep this narrow — we don't want to mask
|
||||
# tool-level errors as transport errors.
|
||||
_STDIO_DEAD_SESSION_ERRORS = (
|
||||
BrokenPipeError,
|
||||
ConnectionError,
|
||||
ConnectionResetError,
|
||||
EOFError,
|
||||
)
|
||||
|
||||
def _is_stdio_dead_session_error(self, exc: BaseException) -> bool:
|
||||
if isinstance(exc, self._STDIO_DEAD_SESSION_ERRORS):
|
||||
return True
|
||||
# mcp SDK frequently wraps transport errors in RuntimeError with a
|
||||
# readable message — match on the common signals.
|
||||
if isinstance(exc, RuntimeError):
|
||||
msg = str(exc).lower()
|
||||
for needle in (
|
||||
"broken pipe",
|
||||
"connection closed",
|
||||
"connection reset",
|
||||
"stream closed",
|
||||
"session not initialized",
|
||||
"transport closed",
|
||||
"anyio.closedresourceerror",
|
||||
"read operation was cancelled",
|
||||
):
|
||||
if needle in msg:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _call_tool_with_retry(self, call: Any) -> Any:
|
||||
"""Retry transient MCP transport failures once after reconnecting."""
|
||||
"""Retry once after reconnecting when the transport looks dead.
|
||||
|
||||
Applies to all transports:
|
||||
- **stdio**: if the subprocess died (broken pipe, closed stream,
|
||||
session not initialized), tear it down and start a fresh one.
|
||||
- **sse / unix / http** (httpx-backed): same treatment for
|
||||
``httpx.ConnectError`` / ``httpx.ReadTimeout``.
|
||||
"""
|
||||
if self.config.transport == "stdio":
|
||||
return call()
|
||||
try:
|
||||
return call()
|
||||
except BaseException as original_error:
|
||||
if not self._is_stdio_dead_session_error(original_error):
|
||||
raise
|
||||
logger.warning(
|
||||
"Retrying MCP STDIO tool call after dead-session signal from '%s': %s",
|
||||
self.config.name,
|
||||
original_error,
|
||||
)
|
||||
try:
|
||||
self._reconnect()
|
||||
except Exception as reconnect_error:
|
||||
logger.warning(
|
||||
"Reconnect failed for MCP STDIO server '%s': %s",
|
||||
self.config.name,
|
||||
reconnect_error,
|
||||
)
|
||||
raise original_error from reconnect_error
|
||||
try:
|
||||
return call()
|
||||
except BaseException as retry_error:
|
||||
raise original_error from retry_error
|
||||
|
||||
if self.config.transport not in {"unix", "sse"}:
|
||||
return call()
|
||||
|
||||
@@ -50,6 +50,33 @@ class ToolRegistry:
|
||||
# and auto-injected at call time for tools that accept them.
|
||||
CONTEXT_PARAMS = frozenset({"agent_id", "data_dir", "profile"})
|
||||
|
||||
# Tools that perform no filesystem/process/network writes and are safe
|
||||
# to run concurrently with other safe tools in the same assistant turn.
|
||||
# Unknown tools default to unsafe (serialized) - adding a name here is
|
||||
# an explicit promise about that tool's side effects. Keep this list
|
||||
# conservative: anything that mutates state, writes to disk, issues
|
||||
# POST/PUT/DELETE requests, or drives a browser MUST NOT be listed.
|
||||
CONCURRENCY_SAFE_TOOLS = frozenset(
|
||||
{
|
||||
# File system reads
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"grep",
|
||||
"glob",
|
||||
# Web reads
|
||||
"web_search",
|
||||
"web_fetch",
|
||||
# Browser read-only snapshots (mutate-free observations)
|
||||
"browser_screenshot",
|
||||
"browser_snapshot",
|
||||
"browser_console",
|
||||
"browser_get_text",
|
||||
# Background bash polling - reads output buffers only, does
|
||||
# not touch the subprocess itself.
|
||||
"bash_output",
|
||||
}
|
||||
)
|
||||
|
||||
# Credential directory used for change detection
|
||||
_CREDENTIAL_DIR = Path("~/.hive/credentials/credentials").expanduser()
|
||||
|
||||
@@ -66,9 +93,24 @@ class ToolRegistry:
|
||||
self._mcp_cred_snapshot: set[str] = set() # Credential filenames at MCP load time
|
||||
self._mcp_aden_key_snapshot: str | None = None # ADEN_API_KEY value at MCP load time
|
||||
self._mcp_server_tools: dict[str, set[str]] = {} # server name -> tool names
|
||||
# tool name -> owning MCPClient (for force-kill on timeout)
|
||||
self._mcp_tool_clients: dict[str, Any] = {}
|
||||
# Per-agent env injected into every MCP server config.env. Kept
|
||||
# here (not on the process-wide os.environ) so parallel workers
|
||||
# in the same interpreter don't clobber each other's identity.
|
||||
self._mcp_extra_env: dict[str, str] = {}
|
||||
# Agent dir for re-loading registry MCP after credential resync.
|
||||
self._mcp_registry_agent_path: Path | None = None
|
||||
|
||||
def set_mcp_extra_env(self, env: dict[str, str]) -> None:
|
||||
"""Attach per-agent env vars to every MCPServerConfig this registry builds.
|
||||
|
||||
Use this instead of mutating ``os.environ`` — the global env dict
|
||||
is shared across all workers in a single interpreter, so writes
|
||||
from one worker race with MCP spawns from another.
|
||||
"""
|
||||
self._mcp_extra_env = dict(env)
|
||||
|
||||
def register(
|
||||
self,
|
||||
name: str,
|
||||
@@ -137,6 +179,7 @@ class ToolRegistry:
|
||||
"properties": properties,
|
||||
"required": required,
|
||||
},
|
||||
concurrency_safe=tool_name in self.CONCURRENCY_SAFE_TOOLS,
|
||||
)
|
||||
|
||||
def executor(inputs: dict) -> Any:
|
||||
@@ -326,6 +369,9 @@ class ToolRegistry:
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
# Expose force-kill hook so the timeout handler can tear down a
|
||||
# hung MCP subprocess (asyncio.wait_for alone cannot).
|
||||
executor.kill_for_tool = registry_ref.kill_mcp_for_tool # type: ignore[attr-defined]
|
||||
return executor
|
||||
|
||||
def get_registered_names(self) -> list[str]:
|
||||
@@ -646,13 +692,17 @@ class ToolRegistry:
|
||||
from framework.loader.mcp_client import MCPClient, MCPServerConfig
|
||||
from framework.loader.mcp_connection_manager import MCPConnectionManager
|
||||
|
||||
# Build config object
|
||||
# Build config object. Merge per-agent env on top of the
|
||||
# server's own env so MCP subprocesses receive the identity
|
||||
# of the worker that spawned them (instead of whichever
|
||||
# worker most recently wrote to os.environ).
|
||||
merged_env = {**self._mcp_extra_env, **(server_config.get("env") or {})}
|
||||
config = MCPServerConfig(
|
||||
name=server_config["name"],
|
||||
transport=server_config["transport"],
|
||||
command=server_config.get("command"),
|
||||
args=server_config.get("args", []),
|
||||
env=server_config.get("env", {}),
|
||||
env=merged_env,
|
||||
cwd=server_config.get("cwd"),
|
||||
url=server_config.get("url"),
|
||||
headers=server_config.get("headers", {}),
|
||||
@@ -770,6 +820,7 @@ class ToolRegistry:
|
||||
make_mcp_executor(client, mcp_tool.name, self, tool_params),
|
||||
)
|
||||
self._mcp_tool_names.add(mcp_tool.name)
|
||||
self._mcp_tool_clients[mcp_tool.name] = client
|
||||
self._mcp_server_tools[server_name].add(mcp_tool.name)
|
||||
admitted_names.append(mcp_tool.name)
|
||||
count += 1
|
||||
@@ -852,20 +903,42 @@ class ToolRegistry:
|
||||
"""
|
||||
verified_names: set[str] = set()
|
||||
manifest_present = False
|
||||
# Only probe the sentinel when the server actually advertises it.
|
||||
# Calling ``__aden_verified_manifest`` unconditionally on every
|
||||
# MCP server at registration time (a) causes a bogus tool call
|
||||
# round-trip to every third-party server, (b) pollutes any
|
||||
# call-capturing fakes in tests, and (c) risks side effects on
|
||||
# servers that eagerly execute unknown tool names. Listing is
|
||||
# cheap and cached by the client; this keeps the manifest gate
|
||||
# active for aden-flavoured servers without penalising others.
|
||||
sentinel_advertised = False
|
||||
try:
|
||||
raw = client.call_tool(self._MCP_VERIFIED_MANIFEST_TOOL, {})
|
||||
parsed: Any = raw
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
if isinstance(parsed, list):
|
||||
verified_names = {str(n) for n in parsed}
|
||||
manifest_present = True
|
||||
for t in client.list_tools():
|
||||
if getattr(t, "name", None) == self._MCP_VERIFIED_MANIFEST_TOOL:
|
||||
sentinel_advertised = True
|
||||
break
|
||||
except Exception:
|
||||
# Server doesn't expose the manifest — no verified gate applies.
|
||||
pass
|
||||
sentinel_advertised = False
|
||||
|
||||
if sentinel_advertised:
|
||||
try:
|
||||
raw = client.call_tool(self._MCP_VERIFIED_MANIFEST_TOOL, {})
|
||||
parsed: Any = raw
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
# Only treat the response as a manifest when it's a list
|
||||
# of strings. A malformed response shouldn't flip the gate
|
||||
# on and silently hide every real tool from the server.
|
||||
if isinstance(parsed, list) and all(isinstance(n, str) for n in parsed):
|
||||
verified_names = set(parsed)
|
||||
manifest_present = True
|
||||
except Exception:
|
||||
# Server advertised the sentinel but errored when called
|
||||
# — treat as no manifest; fall back to third-party bypass.
|
||||
pass
|
||||
|
||||
tool_provider_map: dict[str, str] = {}
|
||||
live_providers: set[str] = set()
|
||||
@@ -925,6 +998,7 @@ class ToolRegistry:
|
||||
"properties": properties,
|
||||
"required": required,
|
||||
},
|
||||
concurrency_safe=mcp_tool.name in self.CONCURRENCY_SAFE_TOOLS,
|
||||
)
|
||||
|
||||
return tool
|
||||
@@ -1099,6 +1173,33 @@ class ToolRegistry:
|
||||
self._mcp_clients.clear()
|
||||
self._mcp_client_servers.clear()
|
||||
self._mcp_managed_clients.clear()
|
||||
self._mcp_tool_clients.clear()
|
||||
|
||||
def kill_mcp_for_tool(self, tool_name: str) -> bool:
|
||||
"""Force-disconnect the MCP client that owns *tool_name*.
|
||||
|
||||
Called from the timeout handler in ``execute_tool`` when a tool
|
||||
call hangs. Plain ``asyncio.wait_for`` cancellation cannot stop
|
||||
a sync executor running inside a thread pool (and therefore
|
||||
cannot stop the MCP subprocess), so we reach through to the
|
||||
client here and tear it down. The next ``call_tool`` triggers
|
||||
an automatic reconnect.
|
||||
|
||||
Returns True if a client was found and disconnect was attempted.
|
||||
"""
|
||||
client = self._mcp_tool_clients.get(tool_name)
|
||||
if client is None:
|
||||
return False
|
||||
try:
|
||||
logger.warning(
|
||||
"Force-disconnecting MCP client for hung tool '%s' on server '%s'",
|
||||
tool_name,
|
||||
getattr(client.config, "name", "?"),
|
||||
)
|
||||
client.disconnect()
|
||||
except Exception as exc:
|
||||
logger.warning("Error force-disconnecting MCP client for '%s': %s", tool_name, exc)
|
||||
return True
|
||||
|
||||
def __del__(self):
|
||||
"""Destructor to ensure cleanup."""
|
||||
|
||||
+134
-155
@@ -1,12 +1,19 @@
|
||||
"""Browser automation best-practices prompt.
|
||||
|
||||
This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
|
||||
This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` — a canonical set of
|
||||
browser automation guidelines that can be included in any node's system
|
||||
prompt that uses browser tools from the gcu-tools MCP server.
|
||||
|
||||
Browser tools are registered via the global MCP registry (gcu-tools).
|
||||
Nodes that need browser access declare ``tools: {policy: "all"}`` in their
|
||||
agent.json config.
|
||||
|
||||
Note: the canonical source of truth for browser automation guidance is
|
||||
the ``browser-automation`` default skill at
|
||||
``core/framework/skills/_default_skills/browser-automation/SKILL.md``.
|
||||
Activate that skill for the full decision tree. This module holds a
|
||||
compact subset suitable for direct inlining into a node's system prompt
|
||||
when a skill activation is not desired.
|
||||
"""
|
||||
|
||||
GCU_BROWSER_SYSTEM_PROMPT = """\
|
||||
@@ -14,172 +21,144 @@ GCU_BROWSER_SYSTEM_PROMPT = """\
|
||||
|
||||
Follow these rules for reliable, efficient browser interaction.
|
||||
|
||||
## Reading Pages
|
||||
- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")`
|
||||
— it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
|
||||
- Interaction tools (`browser_click`, `browser_type`, `browser_fill`,
|
||||
`browser_scroll`, etc.) return a page snapshot automatically in their
|
||||
result. Use it to decide your next action — do NOT call
|
||||
`browser_snapshot` separately after every action.
|
||||
Only call `browser_snapshot` when you need a fresh view without
|
||||
performing an action, or after setting `auto_snapshot=false`.
|
||||
- Do NOT use `browser_screenshot` to read text — use
|
||||
`browser_snapshot` for that (compact, searchable, fast).
|
||||
- DO use `browser_screenshot` when you need visual context:
|
||||
charts, images, canvas elements, layout verification, or when
|
||||
the snapshot doesn't capture what you need.
|
||||
- Only fall back to `browser_get_text` for extracting specific
|
||||
small elements by CSS selector.
|
||||
## Pick the right reading tool
|
||||
|
||||
## Navigation & Waiting
|
||||
- `browser_navigate` and `browser_open` already wait for the page to
|
||||
load (`domcontentloaded`). Do NOT call `browser_wait` with no
|
||||
arguments after navigation — it wastes time.
|
||||
Only use `browser_wait` when you need a *specific element* or *text*
|
||||
to appear (pass `selector` or `text`).
|
||||
- NEVER re-navigate to the same URL after scrolling
|
||||
— this resets your scroll position and loses loaded content.
|
||||
- **`browser_snapshot`** — compact accessibility tree. Fast, cheap, good
|
||||
for static / text-heavy pages where the DOM matches what's visually
|
||||
rendered (docs, forms, search results, settings pages).
|
||||
- **`browser_screenshot`** — visual capture + scale metadata. Use on any
|
||||
complex SPA (LinkedIn, X / Twitter, Reddit, Gmail, Notion, Slack,
|
||||
Discord) and on any site using shadow DOM or virtual scrolling. On
|
||||
those pages, snapshot refs go stale in seconds, shadow contents
|
||||
aren't in the AX tree, and virtual-scrolled elements disappear from
|
||||
the tree entirely — screenshots are the only reliable way to orient.
|
||||
|
||||
Neither tool is "preferred" universally — they're for different jobs.
|
||||
Default to snapshot on static pages, screenshot on SPAs and
|
||||
shadow-heavy sites. Interaction tools (click/type/fill/scroll) return
|
||||
a snapshot automatically, so don't call `browser_snapshot` separately
|
||||
after an interaction unless you need a fresh view.
|
||||
|
||||
Only fall back to `browser_get_text` for extracting small elements by
|
||||
CSS selector.
|
||||
|
||||
## Coordinates: always CSS pixels
|
||||
|
||||
Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS
|
||||
pixels**, not physical pixels. This is critical and often gets wrong:
|
||||
|
||||
| Tool | Unit |
|
||||
|---|---|
|
||||
| `browser_click_coordinate(x, y)` | **CSS pixels** |
|
||||
| `browser_hover_coordinate(x, y)` | **CSS pixels** |
|
||||
| `browser_press_at(x, y, key)` | **CSS pixels** |
|
||||
| `getBoundingClientRect()` | already CSS pixels — pass straight through |
|
||||
| `browser_coords(img_x, img_y)` | returns `css_x/y` (use this) and `physical_x/y` (debug only) |
|
||||
|
||||
**Always use `css_x/y`** from `browser_coords`. Feeding `physical_x/y`
|
||||
on a HiDPI display overshoots by `DPR×` — clicks land DPR times too
|
||||
far right and down. On a DPR=1.6 display that's 60% off.
|
||||
|
||||
Never multiply `getBoundingClientRect()` by `devicePixelRatio` — it's
|
||||
already in the right unit.
|
||||
|
||||
## Rich-text editors (X, LinkedIn DMs, Gmail, Reddit, Slack, Discord)
|
||||
|
||||
Click the input area first with `browser_click_coordinate` or
|
||||
`browser_click(selector)` BEFORE typing. React / Draft.js / Lexical /
|
||||
ProseMirror only register input as "real" after a native pointer-
|
||||
sourced focus event; JS `.focus()` is not enough. Without a real click
|
||||
first, the editor stays empty and the send button stays disabled.
|
||||
|
||||
`browser_type` now does this automatically — it clicks the element,
|
||||
then inserts text via CDP `Input.insertText` (IME-commit style), which
|
||||
rich editors accept cleanly. Before clicking send, verify the submit
|
||||
button's `disabled` / `aria-disabled` state via `browser_evaluate`.
|
||||
|
||||
## Shadow DOM
|
||||
|
||||
Sites like LinkedIn messaging (`#interop-outlet`), Reddit (faceplate
|
||||
Web Components), and some X elements live inside shadow roots.
|
||||
`document.querySelector` and `wait_for_selector` do **not** see into
|
||||
shadow roots. But `browser_click_coordinate` **does** — CDP hit
|
||||
testing walks shadow roots natively, so coordinate-based operations
|
||||
reach shadow elements transparently.
|
||||
|
||||
**Shadow-heavy site workflow:**
|
||||
1. `browser_screenshot()` → visual image
|
||||
2. Identify target visually → image coordinate
|
||||
3. `browser_coords(x, y)` → CSS px
|
||||
4. `browser_click_coordinate(css_x, css_y)` → lands via native hit
|
||||
test; inputs get focused regardless of shadow depth
|
||||
5. Type via `browser_type` or, if the selector path can't reach the
|
||||
element, dispatch keys to the focused element
|
||||
|
||||
For selector-style access when you know the shadow path:
|
||||
`browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` —
|
||||
returns a CSS-px rect you can feed directly to click tools.
|
||||
|
||||
## Navigation & waiting
|
||||
|
||||
- `browser_navigate(wait_until="load")` returns when the page fires
|
||||
load. On SPAs (LinkedIn especially — 4–5 seconds), add a 2–3 s sleep
|
||||
after to let React/Vue hydrate before querying for chrome elements.
|
||||
- Never re-navigate to the same URL after scrolling — resets scroll.
|
||||
- Use `timeout_ms=20000` for heavy SPAs.
|
||||
- `wait_for_selector` / `wait_for_text` resolve in milliseconds when
|
||||
the element is already in the DOM — no need to sleep if you can
|
||||
express the wait condition.
|
||||
|
||||
## Keyboard shortcuts
|
||||
|
||||
`browser_press("a", modifiers=["ctrl"])` for Ctrl+A. Accepted
|
||||
modifiers: `"alt"`, `"ctrl"`/`"control"`, `"meta"`/`"cmd"`,
|
||||
`"shift"`. The tool dispatches the modifier key first, then the main
|
||||
key with `code` and `windowsVirtualKeyCode` populated (Chrome's
|
||||
shortcut dispatcher requires both), then releases in reverse order.
|
||||
|
||||
## Scrolling
|
||||
- Use large scroll amounts ~2000 when loading more content
|
||||
— sites like twitter and linkedin have lazy loading for paging.
|
||||
- The scroll result includes a snapshot automatically — no need to call
|
||||
`browser_snapshot` separately.
|
||||
|
||||
## Batching Actions
|
||||
- You can call multiple tools in a single turn — they execute in parallel.
|
||||
ALWAYS batch independent actions together. Examples:
|
||||
- Fill multiple form fields in one turn.
|
||||
- Navigate + snapshot in one turn.
|
||||
- Click + scroll if targeting different elements.
|
||||
- When batching, set `auto_snapshot=false` on all but the last action
|
||||
to avoid redundant snapshots.
|
||||
- Aim for 3-5 tool calls per turn minimum. One tool call per turn is
|
||||
wasteful.
|
||||
- Use large amounts (~2000 px) for lazy-loaded sites (X, LinkedIn).
|
||||
- Scroll result includes a snapshot — don't call `browser_snapshot`
|
||||
separately.
|
||||
|
||||
## Error Recovery
|
||||
- If a tool fails, retry once with the same approach.
|
||||
- If it fails a second time, STOP retrying and switch approach.
|
||||
- If `browser_snapshot` fails → try `browser_get_text` with a
|
||||
specific small selector as fallback.
|
||||
- If `browser_open` fails or page seems stale → `browser_stop`,
|
||||
then `browser_start`, then retry.
|
||||
## Batching
|
||||
|
||||
## Tab Management
|
||||
- Multiple tool calls per turn execute in parallel. Batch independent
|
||||
actions together: fill multiple fields, navigate + snapshot,
|
||||
different-target click + scroll.
|
||||
- Set `auto_snapshot=false` on all but the last when batching.
|
||||
- Aim for 3–5 tool calls per turn minimum.
|
||||
|
||||
**Close tabs as soon as you are done with them** — not only at the end of the task.
|
||||
After reading or extracting data from a tab, close it immediately.
|
||||
## Tab management
|
||||
|
||||
**Decision rules:**
|
||||
- Finished reading/extracting from a tab? → `browser_close(target_id=...)`
|
||||
- Completed a multi-tab workflow? → `browser_close_finished()` to clean up all your tabs
|
||||
- More than 3 tabs open? → stop and close finished ones before opening more
|
||||
- Popup appeared that you didn't need? → close it immediately
|
||||
Close tabs as soon as you're done with them — not only at the end of
|
||||
the task. `browser_close(target_id=...)` for one, `browser_close_finished()`
|
||||
for a full cleanup. Never accumulate more than 3 open tabs.
|
||||
`browser_tabs` reports an `origin` field: `"agent"` (you own it, close
|
||||
when done), `"popup"` (close after extracting), `"startup"`/`"user"`
|
||||
(leave alone).
|
||||
|
||||
**Origin awareness:** `browser_tabs` returns an `origin` field for each tab:
|
||||
- `"agent"` — you opened it; you own it; close it when done
|
||||
- `"popup"` — opened by a link or script; close after extracting what you need
|
||||
- `"startup"` or `"user"` — leave these alone unless the task requires it
|
||||
## Login & auth walls
|
||||
|
||||
**Cleanup tools:**
|
||||
- `browser_close(target_id=...)` — close one specific tab
|
||||
- `browser_close_finished()` — close all your agent/popup tabs (safe: leaves startup/user tabs)
|
||||
- `browser_close_all()` — close everything except the active tab (use only for full reset)
|
||||
Report the auth wall and stop — do NOT attempt to log in. Dismiss
|
||||
cookie consent banners if they block content.
|
||||
|
||||
**Multi-tab workflow pattern:**
|
||||
1. Open background tabs with `browser_open(url=..., background=true)` to stay on current tab
|
||||
2. Process each tab and close it with `browser_close` when done
|
||||
3. When the full workflow completes, call `browser_close_finished()` to confirm cleanup
|
||||
4. Check `browser_tabs` at any point — it shows `origin` and `age_seconds` per tab
|
||||
## Error recovery
|
||||
|
||||
Never accumulate tabs. Treat every tab you open as a resource you must free.
|
||||
- Retry once on failure, then switch approach.
|
||||
- If `browser_snapshot` fails, try `browser_get_text` with a narrow
|
||||
selector as fallback.
|
||||
- If `browser_open` fails or the page seems stale, `browser_stop` →
|
||||
`browser_start` → retry.
|
||||
|
||||
## Shadow DOM & Overlays
|
||||
## `browser_evaluate`
|
||||
|
||||
Some sites (LinkedIn messaging, etc.) render content inside closed shadow roots that are
|
||||
invisible to regular DOM queries and `browser_snapshot` coordinates.
|
||||
|
||||
**Detecting shadow DOM**: `document.elementFromPoint(x, y)` returns a zero-height host element
|
||||
(e.g. `#interop-outlet`) for the entire overlay area — this is normal, not a bug.
|
||||
`document.body.innerText` and `document.querySelectorAll` return nothing for shadow content.
|
||||
`browser_snapshot` CAN read shadow DOM text but cannot return coordinates.
|
||||
|
||||
**Querying into shadow DOM:**
|
||||
```
|
||||
browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")
|
||||
```
|
||||
Uses `>>>` to pierce shadow roots. Returns `rect` in CSS pixels and `physicalRect` ready for
|
||||
`browser_click_coordinate` / `browser_hover_coordinate`.
|
||||
|
||||
**Getting physical rect for any element (including shadow DOM):**
|
||||
```
|
||||
browser_get_rect(selector="#interop-outlet >>> .msg-convo-wrapper", pierce_shadow=true)
|
||||
```
|
||||
|
||||
**Manual JS traversal when selector is dynamic:**
|
||||
```js
|
||||
const shadow = document.getElementById('interop-outlet').shadowRoot;
|
||||
const convo = shadow.querySelector('#ember37');
|
||||
const rect = convo.querySelector('p').getBoundingClientRect();
|
||||
// rect is in CSS pixels — multiply by DPR for physical pixels
|
||||
```
|
||||
Pass this as a multi-statement script to `browser_evaluate`; it wraps automatically in an IIFE.
|
||||
Use `JSON.stringify(rect)` to serialize the result.
|
||||
|
||||
## Coordinate System
|
||||
|
||||
There are THREE coordinate spaces. Using the wrong one causes clicks/hovers to land in the
|
||||
wrong place.
|
||||
|
||||
| Space | Used by | How to get |
|
||||
|---|---|---|
|
||||
| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` |
|
||||
| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` |
|
||||
| Screenshot pixels | What you see in the 800px image | Raw position in screenshot |
|
||||
|
||||
**Converting screenshot → physical**: `browser_coords(x, y)` → use `physical_x/y`.
|
||||
**Converting CSS → physical**: multiply by `window.devicePixelRatio` (typically 1.6 on HiDPI).
|
||||
**Never** pass raw `getBoundingClientRect()` values to `browser_hover_coordinate` without
|
||||
multiplying by DPR first.
|
||||
|
||||
## Screenshots
|
||||
|
||||
Screenshot data is base64-encoded PNG. To view it:
|
||||
```
|
||||
run_command("echo '<base64_data>' | base64 -d > /tmp/screenshot.png")
|
||||
```
|
||||
Then use `read_file("/tmp/screenshot.png")` to view the image.
|
||||
|
||||
Always use `full_page=false` (default) unless you specifically need the full scrolled page.
|
||||
|
||||
## JavaScript Evaluation
|
||||
|
||||
`browser_evaluate` wraps your script in an IIFE automatically:
|
||||
- Single expression (`document.title`) → wrapped with `return`
|
||||
- Multi-statement or contains `;`/`\n` → wrapped without return (add explicit `return` yourself)
|
||||
- Already an IIFE → run as-is
|
||||
|
||||
**Avoid**: complex closures with `return` inside `for` loops — Chrome CDP returns `null`.
|
||||
**Use instead**: `Array.from(...).map(...).join(...)` chains, or build result objects and
|
||||
`JSON.stringify()` them.
|
||||
|
||||
**For shadow DOM traversal with dynamic selectors**, write the full JS path:
|
||||
```js
|
||||
const s = document.getElementById('interop-outlet').shadowRoot;
|
||||
const el = s.querySelector('.msg-convo-wrapper');
|
||||
return JSON.stringify(el.getBoundingClientRect());
|
||||
```
|
||||
|
||||
## Login & Auth Walls
|
||||
- If you see a "Log in" or "Sign up" prompt instead of expected
|
||||
content, report the auth wall immediately — do NOT attempt to log in.
|
||||
- Check for cookie consent banners and dismiss them if they block content.
|
||||
|
||||
## Efficiency
|
||||
- Minimize tool calls — combine actions where possible.
|
||||
- When a snapshot result is saved to a spillover file, use
|
||||
`run_command` with grep to extract specific data rather than
|
||||
re-reading the full file.
|
||||
- Call `set_output` in the same turn as your last browser action
|
||||
when possible — don't waste a turn.
|
||||
Use for reading state inside a shadow root that standard tools don't
|
||||
handle, for one-shot site-specific actions, or to measure layout the
|
||||
tools don't expose. Do NOT use it on a strict-CSP site (LinkedIn,
|
||||
some X surfaces) with `innerHTML` — Trusted Types silently drops the
|
||||
assignment. Always use `createElement` + `appendChild` + `setAttribute`
|
||||
for DOM injection on those sites. `style.cssText`, `textContent`, and
|
||||
`.value` assignments are fine.
|
||||
"""
|
||||
|
||||
@@ -279,38 +279,25 @@ async def create_queen(
|
||||
queen_loop_config as _base_loop_config,
|
||||
)
|
||||
from framework.agents.queen.nodes import (
|
||||
_QUEEN_BUILDING_TOOLS,
|
||||
_QUEEN_EDITING_TOOLS,
|
||||
_QUEEN_INDEPENDENT_TOOLS,
|
||||
_QUEEN_PLANNING_TOOLS,
|
||||
_QUEEN_RUNNING_TOOLS,
|
||||
_QUEEN_STAGING_TOOLS,
|
||||
_appendices,
|
||||
_building_knowledge,
|
||||
_planning_knowledge,
|
||||
_queen_behavior_always,
|
||||
_queen_behavior_building,
|
||||
_queen_behavior_editing,
|
||||
_queen_behavior_independent,
|
||||
_queen_behavior_planning,
|
||||
_queen_behavior_running,
|
||||
_queen_behavior_staging,
|
||||
_queen_character_core,
|
||||
_queen_identity_editing,
|
||||
_queen_phase_7,
|
||||
_queen_role_building,
|
||||
_queen_role_independent,
|
||||
_queen_role_planning,
|
||||
_queen_role_running,
|
||||
_queen_role_staging,
|
||||
_queen_style,
|
||||
_queen_tools_building,
|
||||
_queen_tools_editing,
|
||||
_queen_tools_independent,
|
||||
_queen_tools_planning,
|
||||
_queen_tools_running,
|
||||
_queen_tools_staging,
|
||||
_shared_building_knowledge,
|
||||
)
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
from framework.loader.mcp_registry import MCPRegistry
|
||||
@@ -359,7 +346,7 @@ async def create_queen(
|
||||
logger.warning("Queen: MCP registry config failed to load", exc_info=True)
|
||||
|
||||
# ---- Phase state --------------------------------------------------
|
||||
effective_phase = initial_phase or ("staging" if worker_identity else "planning")
|
||||
effective_phase = initial_phase or ("staging" if worker_identity else "independent")
|
||||
phase_state = QueenPhaseState(phase=effective_phase, event_bus=session.event_bus)
|
||||
session.phase_state = phase_state
|
||||
|
||||
@@ -371,28 +358,6 @@ async def create_queen(
|
||||
# when the user adds/removes an integration.
|
||||
phase_state.credentials_prompt_provider = _build_credentials_provider()
|
||||
|
||||
# ---- Track ask rounds during planning ----------------------------
|
||||
# Increment planning_ask_rounds each time the queen requests user
|
||||
# input (ask_user or ask_user_multiple) while in the planning phase.
|
||||
async def _track_planning_asks(event: AgentEvent) -> None:
|
||||
if phase_state.phase != "planning":
|
||||
return
|
||||
# Only count explicit ask_user / ask_user_multiple calls, not
|
||||
# auto-block (text-only turns emit CLIENT_INPUT_REQUESTED with
|
||||
# an empty prompt and no options/questions).
|
||||
data = event.data or {}
|
||||
has_prompt = bool(data.get("prompt"))
|
||||
has_questions = bool(data.get("questions"))
|
||||
has_options = bool(data.get("options"))
|
||||
if has_prompt or has_questions or has_options:
|
||||
phase_state.planning_ask_rounds += 1
|
||||
|
||||
session.event_bus.subscribe(
|
||||
[EventType.CLIENT_INPUT_REQUESTED],
|
||||
_track_planning_asks,
|
||||
filter_stream="queen",
|
||||
)
|
||||
|
||||
# ---- Lifecycle tools (always registered) --------------------------
|
||||
register_queen_lifecycle_tools(
|
||||
queen_registry,
|
||||
@@ -428,35 +393,21 @@ async def create_queen(
|
||||
session._queen_tool_executor = queen_tool_executor # type: ignore[attr-defined]
|
||||
|
||||
# ---- Partition tools by phase ------------------------------------
|
||||
planning_names = set(_QUEEN_PLANNING_TOOLS)
|
||||
building_names = set(_QUEEN_BUILDING_TOOLS)
|
||||
staging_names = set(_QUEEN_STAGING_TOOLS)
|
||||
running_names = set(_QUEEN_RUNNING_TOOLS)
|
||||
editing_names = set(_QUEEN_EDITING_TOOLS)
|
||||
independent_names = set(_QUEEN_INDEPENDENT_TOOLS)
|
||||
|
||||
registered_names = {t.name for t in queen_tools}
|
||||
missing_building = building_names - registered_names
|
||||
if missing_building:
|
||||
logger.warning(
|
||||
"Queen: %d/%d building tools NOT registered: %s",
|
||||
len(missing_building),
|
||||
len(building_names),
|
||||
sorted(missing_building),
|
||||
)
|
||||
logger.info("Queen: registered tools: %s", sorted(registered_names))
|
||||
|
||||
phase_state.planning_tools = [t for t in queen_tools if t.name in planning_names]
|
||||
phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
|
||||
phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
|
||||
phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
|
||||
phase_state.editing_tools = [t for t in queen_tools if t.name in editing_names]
|
||||
|
||||
# Independent phase gets core tools + all MCP tools not claimed by any
|
||||
# other phase (coder-tools file I/O, gcu-tools browser, etc.).
|
||||
all_phase_names = (
|
||||
planning_names | building_names | staging_names | running_names | editing_names
|
||||
)
|
||||
all_phase_names = staging_names | running_names | editing_names
|
||||
mcp_tools = [t for t in queen_tools if t.name not in all_phase_names]
|
||||
phase_state.independent_tools = [
|
||||
t for t in queen_tools if t.name in independent_names
|
||||
@@ -489,33 +440,6 @@ async def create_queen(
|
||||
"according to your current phase."
|
||||
)
|
||||
|
||||
_planning_body = (
|
||||
_queen_character_core
|
||||
+ _queen_role_planning
|
||||
+ _queen_style
|
||||
+ _shared_building_knowledge
|
||||
+ _queen_tools_planning
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_planning
|
||||
+ _planning_knowledge
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_planning = _planning_body
|
||||
|
||||
_building_body = (
|
||||
_queen_character_core
|
||||
+ _queen_role_building
|
||||
+ _queen_style
|
||||
+ _shared_building_knowledge
|
||||
+ _queen_tools_building
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_building
|
||||
+ _building_knowledge
|
||||
+ _queen_phase_7
|
||||
+ _appendices
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_building = _building_body
|
||||
phase_state.prompt_staging = (
|
||||
_queen_character_core
|
||||
+ _queen_role_staging
|
||||
@@ -768,6 +692,15 @@ async def create_queen(
|
||||
system_prompt="",
|
||||
tools=[t.name for t in queen_tools],
|
||||
tool_access_policy="all",
|
||||
# Queen is a forever-alive conversational agent: bypass
|
||||
# the implicit judge entirely. Without this, a text-only
|
||||
# turn (greeting, clarifying question, summary) falls
|
||||
# through to the default ACCEPT verdict in
|
||||
# judge_pipeline.py, which terminates the loop and
|
||||
# leaves session.queen_executor=None until the user
|
||||
# reloads. Mirrors the static queen_node NodeSpec in
|
||||
# framework.agents.queen.nodes which already sets this.
|
||||
skip_judge=True,
|
||||
)
|
||||
|
||||
ctx = AgentContext(
|
||||
@@ -871,6 +804,21 @@ async def create_queen(
|
||||
"user_request": None if _is_restore_mode else (initial_prompt or None)
|
||||
}
|
||||
|
||||
# Publish the initial prompt as a CLIENT_INPUT_RECEIVED event so
|
||||
# it appears in the SSE stream and persists to events.jsonl for
|
||||
# session resume. The /chat endpoint does the same for injected
|
||||
# messages; this covers the session-creation-with-prompt path.
|
||||
if initial_prompt and not _is_restore_mode:
|
||||
await session.event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CLIENT_INPUT_RECEIVED,
|
||||
stream_id="queen",
|
||||
node_id="queen",
|
||||
execution_id=session.id,
|
||||
data={"content": initial_prompt},
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Queen %s in %s phase with %d tools: %s",
|
||||
"restoring" if _is_restore_mode else "starting",
|
||||
|
||||
@@ -13,6 +13,31 @@ from framework.server.app import validate_agent_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_llm_key_providers_cache: dict | None = None
|
||||
|
||||
|
||||
def _get_llm_key_providers() -> dict:
|
||||
"""Lazily load the PROVIDERS dict from scripts/check_llm_key.py (cached)."""
|
||||
global _llm_key_providers_cache
|
||||
if _llm_key_providers_cache is None:
|
||||
import importlib.util
|
||||
from pathlib import Path as _Path
|
||||
|
||||
script = _Path(__file__).resolve().parents[3] / "scripts" / "check_llm_key.py"
|
||||
if not script.exists():
|
||||
logger.warning("check_llm_key.py not found at %s — key validation disabled", script)
|
||||
_llm_key_providers_cache = {}
|
||||
return _llm_key_providers_cache
|
||||
spec = importlib.util.spec_from_file_location("check_llm_key", script)
|
||||
if spec is None or spec.loader is None:
|
||||
logger.warning("Failed to load spec for %s — key validation disabled", script)
|
||||
_llm_key_providers_cache = {}
|
||||
return _llm_key_providers_cache
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
_llm_key_providers_cache = mod.PROVIDERS
|
||||
return _llm_key_providers_cache
|
||||
|
||||
|
||||
def _get_store(request: web.Request) -> CredentialStore:
|
||||
return request.app["credential_store"]
|
||||
@@ -142,8 +167,18 @@ async def handle_delete_credential(request: web.Request) -> web.Response:
|
||||
return web.json_response({"deleted": True})
|
||||
|
||||
store = _get_store(request)
|
||||
deleted = store.delete_credential(credential_id)
|
||||
if not deleted:
|
||||
deleted_from_store = store.delete_credential(credential_id)
|
||||
|
||||
# Also clear the env var for this process so the key doesn't
|
||||
# reappear via the env-var fallback in _resolve_api_key().
|
||||
from framework.server.routes_config import PROVIDER_ENV_VARS
|
||||
|
||||
env_var = PROVIDER_ENV_VARS.get(credential_id.lower())
|
||||
deleted_from_env = False
|
||||
if env_var and os.environ.pop(env_var, None) is not None:
|
||||
deleted_from_env = True
|
||||
|
||||
if not deleted_from_store and not deleted_from_env:
|
||||
return web.json_response({"error": f"Credential '{credential_id}' not found"}, status=404)
|
||||
_invalidate_queen_credentials_cache(request)
|
||||
return web.json_response({"deleted": True})
|
||||
@@ -406,12 +441,53 @@ async def handle_list_specs(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
|
||||
async def handle_validate_key(request: web.Request) -> web.Response:
|
||||
"""POST /api/credentials/validate-key — health-check an LLM provider key.
|
||||
|
||||
Body: {"provider_id": "anthropic", "api_key": "sk-..."}
|
||||
Returns: {"valid": bool|null, "message": str}
|
||||
|
||||
Runs the same checks as ``quickstart.sh`` (scripts/check_llm_key.py)
|
||||
but in-process — no subprocess overhead.
|
||||
"""
|
||||
try:
|
||||
body = await request.json()
|
||||
except Exception:
|
||||
return web.json_response({"error": "Invalid JSON body"}, status=400)
|
||||
|
||||
provider_id = body.get("provider_id", "").strip()
|
||||
api_key = body.get("api_key", "").strip()
|
||||
|
||||
if not provider_id or not api_key:
|
||||
return web.json_response(
|
||||
{"error": "provider_id and api_key are required"}, status=400
|
||||
)
|
||||
|
||||
try:
|
||||
checker = _get_llm_key_providers().get(provider_id)
|
||||
if not checker:
|
||||
return web.json_response(
|
||||
{"valid": True, "message": f"No health check for {provider_id}"}
|
||||
)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, lambda: checker(api_key))
|
||||
return web.json_response(result)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("LLM key validation failed for %s: %s", provider_id, exc)
|
||||
return web.json_response(
|
||||
{"valid": None, "message": f"Validation error: {exc}"}
|
||||
)
|
||||
|
||||
|
||||
def register_routes(app: web.Application) -> None:
|
||||
"""Register credential routes on the application."""
|
||||
# specs and check-agent must be registered BEFORE the {credential_id} wildcard
|
||||
app.router.add_get("/api/credentials/specs", handle_list_specs)
|
||||
app.router.add_post("/api/credentials/check-agent", handle_check_agent)
|
||||
app.router.add_post("/api/credentials/resync", handle_resync_credentials)
|
||||
app.router.add_post("/api/credentials/validate-key", handle_validate_key)
|
||||
app.router.add_get("/api/credentials", handle_list_credentials)
|
||||
app.router.add_post("/api/credentials", handle_save_credential)
|
||||
app.router.add_get("/api/credentials/{credential_id}", handle_get_credential)
|
||||
|
||||
@@ -120,7 +120,6 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
|
||||
"node_loop_iteration",
|
||||
"node_loop_started",
|
||||
"credentials_required",
|
||||
"worker_graph_loaded",
|
||||
"queen_phase_changed",
|
||||
}
|
||||
|
||||
@@ -166,13 +165,24 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
|
||||
# Replay buffered events that were published before this SSE connected.
|
||||
# The EventBus keeps a history ring-buffer; we replay the subset that
|
||||
# produces visible chat messages so the frontend never misses early
|
||||
# queen output. Lifecycle events are NOT replayed to avoid duplicate
|
||||
# state transitions (turn counter increments, etc.).
|
||||
# queen output. Execution/node lifecycle events are NOT replayed to
|
||||
# avoid duplicate state transitions (turn counter increments, etc.).
|
||||
#
|
||||
# Trigger lifecycle events ARE replayed: they're idempotent state
|
||||
# setters (this trigger exists / is active / was deactivated) and
|
||||
# they're published during session load — BEFORE the frontend's
|
||||
# SSE subscription is established. Without replay, a freshly-opened
|
||||
# colony would never see its own triggers.
|
||||
_REPLAY_TYPES = {
|
||||
EventType.CLIENT_OUTPUT_DELTA.value,
|
||||
EventType.EXECUTION_STARTED.value,
|
||||
EventType.CLIENT_INPUT_REQUESTED.value,
|
||||
EventType.CLIENT_INPUT_RECEIVED.value,
|
||||
EventType.TRIGGER_AVAILABLE.value,
|
||||
EventType.TRIGGER_ACTIVATED.value,
|
||||
EventType.TRIGGER_DEACTIVATED.value,
|
||||
EventType.TRIGGER_REMOVED.value,
|
||||
EventType.TRIGGER_UPDATED.value,
|
||||
}
|
||||
event_type_values = {et.value for et in event_types}
|
||||
replay_types = _REPLAY_TYPES & event_type_values
|
||||
|
||||
@@ -50,18 +50,6 @@ _WORKER_INHERITED_TOOLS: frozenset[str] = frozenset(
|
||||
)
|
||||
|
||||
|
||||
# Queen-lifecycle tools that are registered into the queen's tool registry
|
||||
# but NOT listed in any _QUEEN_*_TOOLS phase list (they're reachable only via
|
||||
# explicit registration, not phase-based gating). These must still be stripped
|
||||
# from forked worker configs.
|
||||
_QUEEN_LIFECYCLE_EXTRAS: frozenset[str] = frozenset(
|
||||
{
|
||||
"stop_worker_and_plan",
|
||||
"stop_worker_and_review",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_queen_only_tools() -> frozenset[str]:
|
||||
"""Compute the set of queen-lifecycle tool names to strip on fork.
|
||||
|
||||
@@ -69,34 +57,27 @@ def _resolve_queen_only_tools() -> frozenset[str]:
|
||||
any tool listed in any ``_QUEEN_*_TOOLS`` set that is NOT in
|
||||
:data:`_WORKER_INHERITED_TOOLS` is a queen-only tool. Browser and MCP
|
||||
tools are not in the queen phase lists (they're added dynamically),
|
||||
so they pass through untouched. Supplemented by
|
||||
:data:`_QUEEN_LIFECYCLE_EXTRAS` for tools registered without phase
|
||||
gating.
|
||||
so they pass through untouched.
|
||||
|
||||
Computed lazily so this module can be imported before the queen
|
||||
nodes package is loaded.
|
||||
"""
|
||||
from framework.agents.queen.nodes import (
|
||||
_QUEEN_BUILDING_TOOLS,
|
||||
_QUEEN_EDITING_TOOLS,
|
||||
_QUEEN_INDEPENDENT_TOOLS,
|
||||
_QUEEN_PLANNING_TOOLS,
|
||||
_QUEEN_RUNNING_TOOLS,
|
||||
_QUEEN_STAGING_TOOLS,
|
||||
)
|
||||
|
||||
union: set[str] = set()
|
||||
for tool_list in (
|
||||
_QUEEN_PLANNING_TOOLS,
|
||||
_QUEEN_BUILDING_TOOLS,
|
||||
_QUEEN_STAGING_TOOLS,
|
||||
_QUEEN_RUNNING_TOOLS,
|
||||
_QUEEN_EDITING_TOOLS,
|
||||
_QUEEN_INDEPENDENT_TOOLS,
|
||||
):
|
||||
union.update(tool_list)
|
||||
derived = union - _WORKER_INHERITED_TOOLS
|
||||
return frozenset(derived | _QUEEN_LIFECYCLE_EXTRAS)
|
||||
return frozenset(union - _WORKER_INHERITED_TOOLS)
|
||||
|
||||
|
||||
async def handle_trigger(request: web.Request) -> web.Response:
|
||||
|
||||
@@ -61,7 +61,7 @@ def _session_to_live_dict(session) -> dict:
|
||||
"intro_message": getattr(session.runner, "intro_message", "") or "",
|
||||
"queen_phase": phase_state.phase
|
||||
if phase_state
|
||||
else ("staging" if session.colony_runtime else "planning"),
|
||||
else ("staging" if session.colony_runtime else "independent"),
|
||||
"queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True,
|
||||
"queen_id": getattr(phase_state, "queen_id", None) if phase_state else None,
|
||||
"queen_name": (phase_state.queen_profile or {}).get("name") if phase_state else None,
|
||||
@@ -564,6 +564,134 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
|
||||
async def handle_activate_trigger(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/triggers/{trigger_id}/activate — start a trigger."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
trigger_id = request.match_info["trigger_id"]
|
||||
available = getattr(session, "available_triggers", {})
|
||||
tdef = available.get(trigger_id)
|
||||
if tdef is None:
|
||||
return web.json_response(
|
||||
{"error": f"Trigger '{trigger_id}' not found"},
|
||||
status=404,
|
||||
)
|
||||
|
||||
if trigger_id in getattr(session, "active_trigger_ids", set()):
|
||||
return web.json_response(
|
||||
{"status": "already_active", "trigger_id": trigger_id}
|
||||
)
|
||||
|
||||
from framework.tools.queen_lifecycle_tools import (
|
||||
_persist_active_triggers,
|
||||
_start_trigger_timer,
|
||||
_start_trigger_webhook,
|
||||
)
|
||||
|
||||
try:
|
||||
if tdef.trigger_type == "timer":
|
||||
await _start_trigger_timer(session, trigger_id, tdef)
|
||||
elif tdef.trigger_type == "webhook":
|
||||
await _start_trigger_webhook(session, trigger_id, tdef)
|
||||
else:
|
||||
return web.json_response(
|
||||
{"error": f"Unsupported trigger type: {tdef.trigger_type}"},
|
||||
status=400,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return web.json_response(
|
||||
{"error": f"Failed to start trigger: {exc}"},
|
||||
status=500,
|
||||
)
|
||||
|
||||
tdef.active = True
|
||||
session.active_trigger_ids.add(trigger_id)
|
||||
session_id = request.match_info["session_id"]
|
||||
await _persist_active_triggers(session, session_id)
|
||||
|
||||
bus = getattr(session, "event_bus", None)
|
||||
if bus:
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
colony_entry = runner.graph.entry_node if runner else None
|
||||
await bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.TRIGGER_ACTIVATED,
|
||||
stream_id="queen",
|
||||
data={
|
||||
"trigger_id": trigger_id,
|
||||
"trigger_type": tdef.trigger_type,
|
||||
"trigger_config": tdef.trigger_config,
|
||||
"name": tdef.description or trigger_id,
|
||||
**({"entry_node": colony_entry} if colony_entry else {}),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return web.json_response({"status": "activated", "trigger_id": trigger_id})
|
||||
|
||||
|
||||
async def handle_deactivate_trigger(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/triggers/{trigger_id}/deactivate — stop a trigger.
|
||||
|
||||
Cancels the running timer / webhook subscription but KEEPS the trigger
|
||||
definition in triggers.json so the user can re-activate later.
|
||||
"""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
trigger_id = request.match_info["trigger_id"]
|
||||
if trigger_id not in getattr(session, "active_trigger_ids", set()):
|
||||
return web.json_response(
|
||||
{"status": "already_inactive", "trigger_id": trigger_id}
|
||||
)
|
||||
|
||||
task = session.active_timer_tasks.pop(trigger_id, None)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await task
|
||||
getattr(session, "trigger_next_fire", {}).pop(trigger_id, None)
|
||||
|
||||
webhook_subs = getattr(session, "active_webhook_subs", {})
|
||||
if sub_id := webhook_subs.pop(trigger_id, None):
|
||||
with contextlib.suppress(Exception):
|
||||
session.event_bus.unsubscribe(sub_id)
|
||||
|
||||
session.active_trigger_ids.discard(trigger_id)
|
||||
|
||||
available = getattr(session, "available_triggers", {})
|
||||
tdef = available.get(trigger_id)
|
||||
if tdef:
|
||||
tdef.active = False
|
||||
|
||||
from framework.tools.queen_lifecycle_tools import _persist_active_triggers
|
||||
|
||||
session_id = request.match_info["session_id"]
|
||||
await _persist_active_triggers(session, session_id)
|
||||
|
||||
bus = getattr(session, "event_bus", None)
|
||||
if bus:
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
|
||||
await bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.TRIGGER_DEACTIVATED,
|
||||
stream_id="queen",
|
||||
data={
|
||||
"trigger_id": trigger_id,
|
||||
"name": (tdef.description or trigger_id) if tdef else trigger_id,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return web.json_response({"status": "deactivated", "trigger_id": trigger_id})
|
||||
|
||||
|
||||
async def handle_session_colonies(request: web.Request) -> web.Response:
|
||||
"""GET /api/sessions/{session_id}/colonies — list loaded colonies."""
|
||||
manager = _get_manager(request)
|
||||
@@ -807,6 +935,14 @@ def register_routes(app: web.Application) -> None:
|
||||
app.router.add_patch(
|
||||
"/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task
|
||||
)
|
||||
app.router.add_post(
|
||||
"/api/sessions/{session_id}/triggers/{trigger_id}/activate",
|
||||
handle_activate_trigger,
|
||||
)
|
||||
app.router.add_post(
|
||||
"/api/sessions/{session_id}/triggers/{trigger_id}/deactivate",
|
||||
handle_deactivate_trigger,
|
||||
)
|
||||
app.router.add_get("/api/sessions/{session_id}/colonies", handle_session_colonies)
|
||||
|
||||
app.router.add_get("/api/sessions/{session_id}/events/history", handle_session_events_history)
|
||||
|
||||
@@ -383,17 +383,10 @@ class SessionManager:
|
||||
_resume_queen_id = _meta.get("queen_id")
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
if _resume_phase in ("building", "planning"):
|
||||
# Fall back to queen-only session — cold resume handler in
|
||||
# _start_queen will set phase_state.agent_path and switch to
|
||||
# the correct phase.
|
||||
return await self.create_session(
|
||||
session_id=session_id,
|
||||
model=model,
|
||||
initial_prompt=initial_prompt,
|
||||
queen_resume_from=queen_resume_from,
|
||||
queen_name=queen_name or _resume_queen_id,
|
||||
)
|
||||
# NOTE: legacy planning/building phases are not resumable; they
|
||||
# were removed when the design-then-build flow was retired.
|
||||
# Cold sessions resume into staging/running/editing/independent
|
||||
# via the phase-resume block in _start_queen below.
|
||||
|
||||
# Use the colony's forked session ID as the live session ID.
|
||||
# If it's already live (user navigated back), return it directly
|
||||
@@ -761,8 +754,20 @@ class SessionManager:
|
||||
runtime = runner._agent_runtime
|
||||
|
||||
# Load triggers from the agent's triggers.json definition file.
|
||||
from framework.tools.queen_lifecycle_tools import _read_agent_triggers_json
|
||||
# triggers.json is written exclusively by set_trigger, so the
|
||||
# presence of an entry means the user explicitly activated this
|
||||
# trigger in a previous session. We treat the file as the
|
||||
# source of truth and auto-start each trigger on colony load
|
||||
# so the user doesn't have to re-activate after every restart.
|
||||
# The per-session active_triggers tracking still functions, but
|
||||
# is no longer the only path to "running" status.
|
||||
from framework.tools.queen_lifecycle_tools import (
|
||||
_read_agent_triggers_json,
|
||||
_start_trigger_timer,
|
||||
_start_trigger_webhook,
|
||||
)
|
||||
|
||||
triggers_to_autostart: list[str] = []
|
||||
for tdata in _read_agent_triggers_json(agent_path):
|
||||
tid = tdata.get("id", "")
|
||||
ttype = tdata.get("trigger_type", "")
|
||||
@@ -774,10 +779,55 @@ class SessionManager:
|
||||
description=tdata.get("name", tid),
|
||||
task=tdata.get("task", ""),
|
||||
)
|
||||
triggers_to_autostart.append(tid)
|
||||
logger.info("Loaded trigger '%s' (%s) from triggers.json", tid, ttype)
|
||||
|
||||
# Auto-start every trigger discovered in triggers.json. The
|
||||
# frontend listens for TRIGGER_ACTIVATED to render the active
|
||||
# state; per-session active_triggers tracking still happens
|
||||
# via _persist_active_triggers below.
|
||||
for tid in triggers_to_autostart:
|
||||
tdef = session.available_triggers[tid]
|
||||
try:
|
||||
if tdef.trigger_type == "timer":
|
||||
await _start_trigger_timer(session, tid, tdef)
|
||||
elif tdef.trigger_type == "webhook":
|
||||
await _start_trigger_webhook(session, tid, tdef)
|
||||
tdef.active = True
|
||||
session.active_trigger_ids.add(tid)
|
||||
logger.info("Auto-started trigger '%s' on colony load", tid)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to auto-start trigger '%s' on colony load",
|
||||
tid,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
if session.active_trigger_ids:
|
||||
# Persist the auto-started set so a subsequent restart
|
||||
# finds them in state.active_triggers and the existing
|
||||
# _restore_active_triggers path also keeps working.
|
||||
from framework.tools.queen_lifecycle_tools import (
|
||||
_persist_active_triggers,
|
||||
)
|
||||
|
||||
await _persist_active_triggers(session, session.id)
|
||||
|
||||
if session.available_triggers:
|
||||
# Emit AVAILABLE for every trigger (so the UI knows the
|
||||
# definition exists) and ACTIVATED for the ones we just
|
||||
# auto-started. The frontend handler treats them as the
|
||||
# same case and uses the latter to flip the card to
|
||||
# active.
|
||||
await self._emit_trigger_events(session, "available", session.available_triggers)
|
||||
if session.active_trigger_ids:
|
||||
activated = {
|
||||
tid: session.available_triggers[tid]
|
||||
for tid in session.active_trigger_ids
|
||||
if tid in session.available_triggers
|
||||
}
|
||||
if activated:
|
||||
await self._emit_trigger_events(session, "activated", activated)
|
||||
|
||||
# Start runtime on event loop
|
||||
if runtime and not runtime.is_running:
|
||||
@@ -914,6 +964,10 @@ class SessionManager:
|
||||
_start_trigger_webhook,
|
||||
)
|
||||
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
|
||||
runner = getattr(session, "runner", None)
|
||||
colony_entry = runner.graph.entry_node if runner else None
|
||||
saved_tasks = getattr(state, "trigger_tasks", {}) or {}
|
||||
for tid in state.active_triggers:
|
||||
tdef = session.available_triggers.get(tid)
|
||||
@@ -930,6 +984,29 @@ class SessionManager:
|
||||
elif tdef.trigger_type == "webhook":
|
||||
await _start_trigger_webhook(session, tid, tdef)
|
||||
logger.info("Restored webhook trigger '%s'", tid)
|
||||
# Emit TRIGGER_ACTIVATED so the frontend knows this
|
||||
# trigger is running after a server restart. Without
|
||||
# this, the previously-available event is the only
|
||||
# signal the UI ever gets, and the trigger appears
|
||||
# inactive forever.
|
||||
if session.event_bus:
|
||||
await session.event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.TRIGGER_ACTIVATED,
|
||||
stream_id="queen",
|
||||
data={
|
||||
"trigger_id": tdef.id,
|
||||
"trigger_type": tdef.trigger_type,
|
||||
"trigger_config": tdef.trigger_config,
|
||||
"name": tdef.description or tdef.id,
|
||||
**(
|
||||
{"entry_node": colony_entry}
|
||||
if colony_entry
|
||||
else {}
|
||||
),
|
||||
},
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Saved trigger '%s' not found in worker entry points, skipping",
|
||||
@@ -1324,22 +1401,18 @@ class SessionManager:
|
||||
_phase = _meta.get("phase")
|
||||
|
||||
if _agent_path and Path(_agent_path).exists():
|
||||
if _phase in ("staging", "running", None):
|
||||
# Agent fully built — load worker and resume
|
||||
await self.load_colony(session.id, _agent_path)
|
||||
if session.phase_state:
|
||||
await session.phase_state.switch_to_staging(source="auto")
|
||||
logger.info("Cold restore: auto-loaded worker from %s", _agent_path)
|
||||
elif _phase == "building":
|
||||
# Agent folder exists but incomplete — resume building
|
||||
if session.phase_state:
|
||||
session.phase_state.agent_path = _agent_path
|
||||
await session.phase_state.switch_to_building(source="auto")
|
||||
logger.info("Cold restore: resumed BUILDING phase for %s", _agent_path)
|
||||
elif _phase == "planning":
|
||||
if session.phase_state:
|
||||
session.phase_state.agent_path = _agent_path
|
||||
logger.info("Cold restore: PLANNING phase for %s", _agent_path)
|
||||
# Any cold session that has an agent_path resumes
|
||||
# into staging (worker loaded, ready to run).
|
||||
# Legacy planning/building phases are no longer
|
||||
# supported and are silently mapped to staging.
|
||||
await self.load_colony(session.id, _agent_path)
|
||||
if session.phase_state:
|
||||
await session.phase_state.switch_to_staging(source="auto")
|
||||
logger.info(
|
||||
"Cold restore: auto-loaded worker from %s (was phase=%s)",
|
||||
_agent_path,
|
||||
_phase,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Cold restore: failed to auto-load worker", exc_info=True)
|
||||
|
||||
@@ -1492,12 +1565,15 @@ class SessionManager:
|
||||
kind: str,
|
||||
triggers: dict[str, TriggerDefinition],
|
||||
) -> None:
|
||||
"""Emit TRIGGER_AVAILABLE or TRIGGER_REMOVED events for each trigger."""
|
||||
"""Emit TRIGGER_AVAILABLE / ACTIVATED / REMOVED events for each trigger."""
|
||||
from framework.host.event_bus import AgentEvent, EventType
|
||||
|
||||
event_type = (
|
||||
EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED
|
||||
)
|
||||
if kind == "activated":
|
||||
event_type = EventType.TRIGGER_ACTIVATED
|
||||
elif kind == "removed":
|
||||
event_type = EventType.TRIGGER_REMOVED
|
||||
else:
|
||||
event_type = EventType.TRIGGER_AVAILABLE
|
||||
# Resolve entry node for trigger target
|
||||
runner = getattr(session, "runner", None)
|
||||
colony_entry = runner.graph.entry_node if runner else None
|
||||
|
||||
@@ -1,43 +1,290 @@
|
||||
---
|
||||
name: hive.browser-automation
|
||||
description: Best practices for browser automation via gcu-tools MCP server (reading pages, navigation, scrolling, tab management, shadow DOM, coordinates).
|
||||
description: Drive a real Chrome browser via the GCU Beeline extension + Chrome DevTools Protocol. Navigation, clicks, typing, screenshots, shadow-DOM sites (LinkedIn / Reddit / X), keyboard shortcuts, CSP gotchas, rich-text editors. Verified against real production sites 2026-04-11.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
version: "2.0"
|
||||
verified: 2026-04-11
|
||||
---
|
||||
|
||||
## Operational Protocol: Browser Automation
|
||||
# GCU Browser Automation
|
||||
|
||||
Follow these rules for reliable, efficient browser interaction.
|
||||
All GCU browser tools drive a real Chrome instance through the Beeline extension and Chrome DevTools Protocol (CDP). That means clicks, keystrokes, and screenshots are processed by the actual browser's native hit testing, focus, and layout engines — **not** a synthetic event layer. Understanding this unlocks strategies that make hard sites easy.
|
||||
|
||||
### Reading Pages
|
||||
- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")` -- it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
|
||||
- Interaction tools (`browser_click`, `browser_type`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action -- do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`.
|
||||
- Many complex pages (LinkedIn, Twitter/X, SPAs with virtual scrolling) have DOMs that don't match what's visually rendered — snapshot refs may be stale, missing, or misaligned with visible layout. On these pages, `browser_screenshot` is the only reliable way to orient yourself.
|
||||
- When using screenshots for interaction, you MUST convert image pixel positions via `browser_coords(x, y)` before clicking. NEVER pass raw screenshot pixel positions directly to `browser_click_coordinate` — the image is downscaled and the coordinates will be wrong. Always: screenshot → read position → `browser_coords` → use `physical_x/y` to click.
|
||||
## Coordinates: always CSS pixels
|
||||
|
||||
**Chrome DevTools Protocol `Input.dispatchMouseEvent` operates in CSS pixels, not physical pixels.**
|
||||
|
||||
When you call `browser_coords(image_x, image_y)` after a screenshot, the returned dict has both `css_x/y` and `physical_x/y`. **Always use `css_x/y` for clicks, hovers, and key presses.**
|
||||
|
||||
```
|
||||
browser_screenshot() → image (downscaled to 800/900 px wide)
|
||||
browser_coords(img_x, img_y) → {css_x, css_y, physical_x, physical_y}
|
||||
browser_click_coordinate(css_x, css_y) ← USE css_x/y
|
||||
browser_hover_coordinate(css_x, css_y) ← USE css_x/y
|
||||
browser_press_at(css_x, css_y, key) ← USE css_x/y
|
||||
```
|
||||
|
||||
Feeding `physical_x/y` on a HiDPI display overshoots by DPR× — on a DPR=1.6 laptop, clicks land 60% too far right and down. The ratio between `physicalScale` and `cssScale` tells you the effective DPR.
|
||||
|
||||
`getBoundingClientRect()` already returns CSS pixels — feed those values straight through to click/hover tools without any DPR multiplication.
|
||||
|
||||
**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or fall back to visually picking coordinates from a screenshot.
|
||||
|
||||
## Screenshot + coordinates is shadow-agnostic — prefer it on shadow-heavy sites
|
||||
|
||||
On sites that use Shadow DOM heavily (Reddit's faceplate Web Components, LinkedIn's `#interop-outlet` messaging overlay, some X custom elements), **coordinate-based operations reach elements that selector-based tools can't see.**
|
||||
|
||||
Why:
|
||||
|
||||
- **CDP hit testing walks shadow roots natively.** `browser_click_coordinate(css_x, css_y)` routes through Chrome's native hit tester, which traverses open shadow roots automatically. You don't need to know the shadow structure.
|
||||
- **Keyboard dispatch follows focus** into shadow roots. After a click focuses an input (even one three shadow levels deep), `browser_press(...)` with no selector dispatches keys to `document.activeElement`'s computed focus target.
|
||||
- **Screenshots render the real layout** regardless of DOM implementation.
|
||||
|
||||
Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(selector=...)` all use `document.querySelector` under the hood, which **stops at shadow boundaries**. They cannot see elements inside shadow roots.
|
||||
|
||||
### Recommended workflow on shadow-heavy sites
|
||||
|
||||
1. `browser_screenshot()` → visual image
|
||||
2. Identify the target visually → image pixel `(x, y)` (eyeball from the screenshot)
|
||||
3. `browser_coords(x, y)` → convert to CSS px
|
||||
4. `browser_click_coordinate(css_x, css_y)` → lands on the element via native hit testing; inputs get focused
|
||||
5. For typing:
|
||||
- If the element was reachable via a selector → `browser_type(selector, text)`
|
||||
- Otherwise → `browser_press(key)` per character (dispatches to focused element, no selector needed)
|
||||
6. Verify by reading element state via a targeted `browser_evaluate` that walks the shadow tree
|
||||
|
||||
### Empirically verified (2026-04-11)
|
||||
|
||||
Tested against `https://www.reddit.com/r/programming/` whose search input lives at:
|
||||
```
|
||||
document > reddit-search-large [shadow]
|
||||
> faceplate-search-input#search-input [shadow]
|
||||
> input[name="q"]
|
||||
```
|
||||
|
||||
- `document.querySelector('input')` → **0 visible inputs** on the page (all in shadow)
|
||||
- `browser_type('faceplate-search-input input', 'python')` → "Element not found"
|
||||
- `browser_click_coordinate(617, 28)` → focus trail: `REDDIT-SEARCH-LARGE > FACEPLATE-SEARCH-INPUT > INPUT` ✓
|
||||
- Char-by-char key dispatch after the click → `input.value === 'python'` ✓
|
||||
|
||||
Coordinate pipeline: works perfectly. Selector pipeline: unusable without shadow-piercing syntax.
|
||||
|
||||
### Shadow-piercing selectors
|
||||
|
||||
When you DO want a selector-based approach and know the shadow structure, `browser_shadow_query` and `browser_get_rect` support `>>>` shadow-piercing syntax:
|
||||
|
||||
```
|
||||
browser_shadow_query("reddit-search-large >>> #search-input")
|
||||
browser_get_rect("#interop-outlet >>> #ember37 >>> p")
|
||||
```
|
||||
|
||||
Returns the element's rect in **CSS pixels** (feed directly to click tools). Remember: `browser_type` and `wait_for_selector` do **not** support `>>>` — only shadow_query and get_rect do.
|
||||
|
||||
## Navigation and waiting
|
||||
|
||||
### The basics
|
||||
|
||||
```
|
||||
browser_navigate(url, wait_until="load") # "load" | "domcontentloaded" | "networkidle"
|
||||
browser_wait_for_selector("h1", timeout_ms=5000)
|
||||
browser_wait_for_text("Some text", timeout_ms=5000)
|
||||
browser_go_back()
|
||||
browser_go_forward()
|
||||
browser_reload()
|
||||
```
|
||||
|
||||
All return real URLs and titles. On a fast page `navigate(wait_until="load")` returns in sub-second. `wait_for_selector` and `wait_for_text` typically resolve in single-digit milliseconds on elements already in the DOM.
|
||||
|
||||
### Timing expectations (measured against real sites)
|
||||
|
||||
| Site | Navigate load time |
|
||||
|---|---|
|
||||
| example.com | 100–400 ms |
|
||||
| wikipedia.org | 200–500 ms |
|
||||
| reddit.com | 1.5–2 s |
|
||||
| x.com/twitter | 1.2–1.6 s |
|
||||
| linkedin.com (logged in) | 4–5 s |
|
||||
|
||||
Use `timeout_ms=20000` for LinkedIn and other heavy SPAs to give them margin.
|
||||
|
||||
### After navigate, always let SPA hydrate
|
||||
|
||||
Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in a second pass. Add `await sleep(2)` to `await sleep(3)` before querying for site-specific elements. Otherwise `wait_for_selector` will fail on elements that do exist moments later.
|
||||
|
||||
### Reading pages efficiently
|
||||
|
||||
- **Prefer `browser_snapshot` over `browser_get_text("body")`** — returns a compact ~1–5 KB accessibility tree vs 100+ KB of raw HTML.
|
||||
- Interaction tools (`browser_click`, `browser_type`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action — do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`.
|
||||
- Complex pages (LinkedIn, Twitter/X, SPAs with virtual scrolling) have DOMs that don't match what's visually rendered — snapshot refs may be stale, missing, or misaligned with visible layout. On these pages, `browser_screenshot` is the only reliable way to orient yourself.
|
||||
- Only fall back to `browser_get_text` for extracting specific small elements by CSS selector.
|
||||
|
||||
### Navigation & Waiting
|
||||
- `browser_navigate` and `browser_open` already wait for the page to load. Do NOT call `browser_wait` with no arguments after navigation -- it wastes time. Only use `browser_wait` when you need a *specific element* or *text* to appear (pass `selector` or `text`).
|
||||
- NEVER re-navigate to the same URL after scrolling -- this resets your scroll position and loses loaded content.
|
||||
## Typing and keyboard input
|
||||
|
||||
### Scrolling
|
||||
- Use large scroll amounts ~2000 when loading more content -- sites like twitter and linkedin have lazy loading for paging.
|
||||
- The scroll result includes a snapshot automatically -- no need to call `browser_snapshot` separately.
|
||||
### ALWAYS click before typing into rich-text editors
|
||||
|
||||
### Batching Actions
|
||||
- You can call multiple tools in a single turn -- they execute in parallel. ALWAYS batch independent actions together. Examples: fill multiple form fields in one turn, navigate + snapshot in one turn, click + scroll if targeting different elements.
|
||||
**The single most common "looks like it worked but send button stays disabled" failure.** If you're typing into a modern editor (X/Twitter's Draft.js compose, LinkedIn's post composer, Reddit's comment box, Gmail compose, Slack, Discord, Notion, Monaco, any `contenteditable`), **click the input area first with `browser_click_coordinate` or `browser_click(selector)` before you type**.
|
||||
|
||||
Why this is necessary:
|
||||
|
||||
- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for *native* pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
|
||||
- **Draft.js** (X/Twitter compose) and **Lexical** (Gmail, LinkedIn DMs) use contenteditable divs with immutable editor state. They only enter "edit mode" after a real click on the editor surface. Typing at them without clicking routes keys to `document.body` or gets silently discarded.
|
||||
- **Send/submit buttons are bound to framework state**, not DOM state. They're typically `disabled={!hasRealContent}` where `hasRealContent` is computed from React/Vue/Svelte state. The input field can have characters in the DOM but the button stays disabled because the framework never saw a real input event.
|
||||
|
||||
The symptom is always the same: **you type, the characters appear visually, and the send button doesn't enable**. The agent then clicks send anyway, nothing happens, and it thinks the post failed.
|
||||
|
||||
### Safe "click-then-type-then-verify" pattern
|
||||
|
||||
```
|
||||
# 1. Focus the real element via a real click (not JS .focus()).
|
||||
rect = browser_get_rect(selector) # or browser_shadow_query for shadow sites
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
sleep(0.5) # let the editor open / focus settle
|
||||
|
||||
# 2. Type. browser_type now uses CDP Input.insertText by default, which is
|
||||
# the most reliable way to insert text into rich editors (Lexical,
|
||||
# Draft.js, ProseMirror, any React-controlled contenteditable).
|
||||
browser_type(selector, text)
|
||||
sleep(1.0) # let framework state commit
|
||||
|
||||
# 3. BEFORE clicking send, verify the submit button is actually enabled.
|
||||
# Don't trust that typing worked — check state.
|
||||
state = browser_evaluate("""
|
||||
(function(){
|
||||
const btn = document.querySelector('[data-testid="tweetButton"]');
|
||||
if (!btn) return {exists: false};
|
||||
return {
|
||||
exists: true,
|
||||
disabled: btn.disabled || btn.getAttribute('aria-disabled') === 'true',
|
||||
text: btn.textContent.trim(),
|
||||
};
|
||||
})()
|
||||
""")
|
||||
|
||||
# 4. Only click send if the button is enabled.
|
||||
if not state['disabled']:
|
||||
browser_click(submit_selector)
|
||||
else:
|
||||
# Recovery: sometimes a click-again + one extra keystroke nudges
|
||||
# React into recomputing hasRealContent.
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_press("End")
|
||||
browser_press(" ")
|
||||
browser_press("Backspace")
|
||||
# re-check state
|
||||
```
|
||||
|
||||
### Why `browser_type` uses `Input.insertText` by default
|
||||
|
||||
CDP has a dedicated method — `Input.insertText` — for committing text into the focused element as if IME just committed it. It **bypasses the keyboard event pipeline entirely** and works cleanly on every rich-text editor tested to date: Lexical (LinkedIn DMs, Gmail), Draft.js (X compose), ProseMirror (Reddit), Monaco, and plain `contenteditable`. Playwright uses this under the hood for `keyboard.type()` on rich editors.
|
||||
|
||||
Per-character `Input.dispatchKeyEvent` looks equivalent on paper, but some rich editors listen for `beforeinput` events with a specific shape and route insertion through their own state machine — the raw keys arrive but never get turned into text. That was the exact failure mode that left LinkedIn's message composer empty (and its Send button disabled) during the 2026-04-11 empirical run.
|
||||
|
||||
If you need per-keystroke dispatch (autocomplete testing, code editors, animated typing with `delay_ms`), pass `use_insert_text=False` to fall back to the old `keyDown/keyUp` path.
|
||||
|
||||
### Neutralizing `beforeunload` draft dialogs
|
||||
|
||||
When a composer has unsent text and you try to navigate away or close the tab, sites like LinkedIn pop a native "You have an unsent message, leave?" confirm dialog via `window.onbeforeunload`. Your automation hangs waiting on the dialog — `browser_close_tab` and `browser_navigate` both time out.
|
||||
|
||||
**Strip the handler via `browser_evaluate` before navigating:**
|
||||
|
||||
```
|
||||
browser_evaluate("""
|
||||
(function(){
|
||||
window.onbeforeunload = null;
|
||||
window.addEventListener('beforeunload', function(e){
|
||||
e.stopImmediatePropagation();
|
||||
}, true);
|
||||
return true;
|
||||
})()
|
||||
""")
|
||||
# Now browser_navigate / close_tab work without hitting a confirm
|
||||
```
|
||||
|
||||
Always include an equivalent cleanup block in any script that types into a compose UI — without it, a script crash mid-type leaves the tab in an unusable state with the draft modal blocking every subsequent automation call.
|
||||
|
||||
### Verified site-specific quirks
|
||||
|
||||
| Site | Editor | Workaround |
|
||||
|---|---|---|
|
||||
| **X / Twitter** compose | Draft.js | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
|
||||
| **LinkedIn** messaging | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then type via focus-based key dispatch (selector-based type can't reach shadow). Send button is `.msg-form__send-button`. |
|
||||
| **LinkedIn** feed post composer | Quill/LinkedIn custom | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type. |
|
||||
| **Reddit** comment/post box | ProseMirror | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer. |
|
||||
| **Gmail** compose | Lexical | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window. |
|
||||
| **Slack** message box | contenteditable | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`. |
|
||||
| **Discord** | Slate | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing. |
|
||||
| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea. |
|
||||
|
||||
### Plain text into a real input
|
||||
|
||||
For plain `<input>` and `<textarea>` elements with no framework wrapper (forms on static sites, simple search bars that pass a selector string straight through), `browser_type(selector, text)` is sufficient — the bridge's internal `focus()` call does the right thing. But when in doubt, click first. It's cheap insurance.
|
||||
|
||||
```
|
||||
browser_type(selector, text)
|
||||
```
|
||||
|
||||
- Sends `keyDown` (with `key`, `code`, `text` fields populated) → `keyUp` per character (or a single `Input.insertText` by default)
|
||||
- Fires real `keydown` / `keypress` / `input` / `keyup` events — frameworks that branch on `event.key` or `event.code` see the right values
|
||||
- Matches what Playwright and Puppeteer send
|
||||
|
||||
Works on real `<input>`, `<textarea>`, and `contenteditable` elements. For shadow-DOM inputs, see the "shadow-heavy sites" section above — `type_text(selector=)` can't see past shadow boundaries.
|
||||
|
||||
### Keyboard shortcuts (Ctrl+A, Shift+Tab, Cmd+Enter)
|
||||
|
||||
```
|
||||
browser_press("a", modifiers=["ctrl"]) # Ctrl+A — select all
|
||||
browser_press("Backspace") # clear selected text
|
||||
browser_press("Enter", modifiers=["meta"]) # Cmd+Enter (mac) — submit
|
||||
browser_press("Tab", modifiers=["shift"]) # Shift+Tab — reverse focus
|
||||
```
|
||||
|
||||
Accepted modifier names (case-insensitive): `"alt"`, `"ctrl"` / `"control"`, `"meta"` / `"cmd"`, `"shift"`.
|
||||
|
||||
Behind the scenes this dispatches the modifier's own `keyDown` first, then the main key with `code` and `windowsVirtualKeyCode` populated (so Chrome's shortcut dispatcher recognises it), then releases modifiers in reverse order. Without the `code` + `windowsVirtualKeyCode` fields Chrome routes the event to the DOM without firing shortcuts — which is what plain string keys get.
|
||||
|
||||
### Special keys
|
||||
|
||||
Recognized without modifiers: `Enter`, `Tab`, `Escape`, `Backspace`, `Delete`, `ArrowUp/Down/Left/Right`, `Home`, `End`, `PageUp`, `PageDown`.
|
||||
|
||||
## Screenshots
|
||||
|
||||
```
|
||||
browser_screenshot() # viewport, 900 px wide by default
|
||||
browser_screenshot(full_page=True) # full scrollable page
|
||||
browser_screenshot(selector="#header") # clip to element's rect
|
||||
```
|
||||
|
||||
Returns a PNG with automatic downscaling to a target width (default 900 px) plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `physicalScale`, `cssScale`, and a `scaleHint` string. The image is also annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
|
||||
|
||||
The highlight overlay stays visible on the page for **10 seconds** after each interaction, then fades. Before a screenshot is likely, make sure your click / hover / type happens <10 s before the screenshot.
|
||||
|
||||
### Anatomy of the scale fields
|
||||
|
||||
- `cssWidth` = `window.innerWidth` (CSS px)
|
||||
- `devicePixelRatio` = `window.devicePixelRatio` (often 1.6, 2, or 3 on modern displays)
|
||||
- `physicalScale = png_width / image_width` (how many physical-px per image-px)
|
||||
- `cssScale = cssWidth / image_width` (how many CSS-px per image-px)
|
||||
- Effective DPR = `physicalScale / cssScale` (should match `devicePixelRatio`)
|
||||
|
||||
When converting image coordinates for clicks, always use `cssScale`. The `physicalScale` field is there for debugging HiDPI displays, not for inputs.
|
||||
|
||||
## Scrolling
|
||||
|
||||
- Use large scroll amounts (~2000) when loading more content — sites like Twitter and LinkedIn have lazy loading for paging.
|
||||
- The scroll result includes a snapshot automatically — no need to call `browser_snapshot` separately.
|
||||
- Never re-navigate to the same URL after scrolling — this resets your scroll position and loses loaded content.
|
||||
|
||||
## Batching actions
|
||||
|
||||
- You can call multiple tools in a single turn — they execute in parallel. ALWAYS batch independent actions together. Examples: fill multiple form fields in one turn, navigate + snapshot in one turn, click + scroll if targeting different elements.
|
||||
- When batching, set `auto_snapshot=false` on all but the last action to avoid redundant snapshots.
|
||||
- Aim for 3-5 tool calls per turn minimum. One tool call per turn is wasteful.
|
||||
- Aim for 3–5 tool calls per turn minimum. One tool call per turn is wasteful.
|
||||
|
||||
### Error Recovery
|
||||
- If a tool fails, retry once with the same approach.
|
||||
- If it fails a second time, STOP retrying and switch approach.
|
||||
- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
|
||||
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
|
||||
## Tab management
|
||||
|
||||
### Tab Management
|
||||
**Close tabs as soon as you are done with them** -- not only at the end of the task. After reading or extracting data from a tab, close it immediately.
|
||||
**Close tabs as soon as you are done with them** — not only at the end of the task. After reading or extracting data from a tab, close it immediately.
|
||||
|
||||
- Finished reading/extracting from a tab? `browser_close(target_id=...)`
|
||||
- Completed a multi-tab workflow? `browser_close_finished()` to clean up all your tabs
|
||||
@@ -45,36 +292,157 @@ Follow these rules for reliable, efficient browser interaction.
|
||||
- Popup appeared that you didn't need? Close it immediately
|
||||
|
||||
`browser_tabs` returns an `origin` field for each tab:
|
||||
- `"agent"` -- you opened it; you own it; close it when done
|
||||
- `"popup"` -- opened by a link or script; close after extracting what you need
|
||||
- `"startup"` or `"user"` -- leave these alone unless the task requires it
|
||||
- `"agent"` — you opened it; you own it; close it when done
|
||||
- `"popup"` — opened by a link or script; close after extracting what you need
|
||||
- `"startup"` or `"user"` — leave these alone unless the task requires it
|
||||
|
||||
Never accumulate tabs. Treat every tab you open as a resource you must free.
|
||||
|
||||
### Shadow DOM & Overlays
|
||||
Some sites (LinkedIn messaging, etc.) render content inside closed shadow roots invisible to regular DOM queries.
|
||||
The bridge automatically evicts per-tab state (`_cdp_attached`, `_interaction_highlights`) when a tab is closed, so you can't leak stale annotations or attached-debugger flags.
|
||||
|
||||
- `browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` -- uses `>>>` to pierce shadow roots. Returns `rect` in CSS pixels and `physicalRect` ready for coordinate tools.
|
||||
- `browser_get_rect(selector="...", pierce_shadow=true)` -- get physical rect for any element including shadow DOM.
|
||||
## Site-specific selectors (verified 2026-04-11)
|
||||
|
||||
### Coordinate System
|
||||
There are THREE coordinate spaces. Using the wrong one causes clicks/hovers to land in the wrong place.
|
||||
### LinkedIn
|
||||
|
||||
| Space | Used by | How to get |
|
||||
|---|---|---|
|
||||
| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` |
|
||||
| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` |
|
||||
| Screenshot pixels | What you see in the image | Raw position in screenshot |
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Global search input | `input[data-testid='typeahead-input']` |
|
||||
| Own profile link | `a[href*='linkedin.com/in/']` |
|
||||
| Messaging overlay | `#interop-outlet >>> [aria-label]` (use shadow_query) |
|
||||
|
||||
**Converting screenshot to physical**: `browser_coords(x, y)` then use `physical_x/y`.
|
||||
**Converting CSS to physical**: multiply by `window.devicePixelRatio` (typically 1.6 on HiDPI).
|
||||
**Never** pass raw `getBoundingClientRect()` values to coordinate tools without multiplying by DPR first.
|
||||
LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `browser_evaluate` that uses `innerHTML = "<...>"` will be **silently dropped** — the wrapper element gets added but its content is empty, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection on LinkedIn. `style.cssText`, `textContent`, and `.value` assignments are fine (they don't go through the Trusted Types sink).
|
||||
|
||||
### Login & Auth Walls
|
||||
- If you see a "Log in" or "Sign up" prompt, report the auth wall immediately -- do NOT attempt to log in.
|
||||
### Reddit (new reddit / shreddit)
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Search input (shadow) | `reddit-search-large >>> #search-input` (rect only; type via click-to-focus) |
|
||||
| Reddit logo (home) | `#reddit-logo` |
|
||||
| Subreddit posts | `shreddit-post` custom elements |
|
||||
| Create post button | `a[href*='/submit']` |
|
||||
|
||||
Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:
|
||||
|
||||
1. `browser_shadow_query("reddit-search-large >>> #search-input")` → rect
|
||||
2. `browser_click_coordinate(rect.cx, rect.cy)` → click lands on the real shadow input via native hit testing; input becomes focused
|
||||
3. `browser_press(c)` for each character → dispatches to focused element
|
||||
4. Verify by reading `.value` via `browser_evaluate` walking the shadow path
|
||||
|
||||
### X / Twitter
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Main search input | `input[data-testid='SearchBox_Search_Input']` |
|
||||
| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
|
||||
| Post text area (compose) | `[data-testid='tweetTextarea_0']` |
|
||||
| Reply buttons on feed | `[data-testid='reply']` |
|
||||
| Post / Tweet submit button | `[data-testid='tweetButton']` |
|
||||
| Caret (⋯) menu on a post | `[data-testid='caret']` |
|
||||
| Confirmation sheet button | `[data-testid='confirmationSheetConfirm']` |
|
||||
|
||||
**X uses Draft.js for the compose text editor**, which does NOT accept synthetic input reliably. Working workaround: `browser_type(selector='[data-testid="tweetTextarea_0"]', text="...", delay_ms=20)`. The delay gives Draft.js time to process each keystroke. The first 1–2 characters may still get eaten — accept minor truncation or prepend a throwaway character. After typing, check `[data-testid="tweetButton"]` has `disabled: false` before clicking submit.
|
||||
|
||||
After submitting, press Escape to close the composer.
|
||||
|
||||
## Common pitfalls
|
||||
|
||||
- **Typing into a rich-text editor without clicking first → send button stays disabled.** Draft.js (X), Lexical (Gmail, LinkedIn DMs), ProseMirror (Reddit), and React-controlled `contenteditable` elements only register input as "real" when the element received a native focus event — JS-sourced `.focus()` is not enough. `browser_type` now does this automatically via a real CDP pointer click before inserting text, but always verify the submit button's `disabled` state before clicking send. See the "ALWAYS click before typing" section above.
|
||||
- **Using per-character `keyDown` on Lexical / Draft.js editors → keys dispatch but text never appears.** Those editors intercept `beforeinput` and route insertion through their own state machine; raw keyDown events are silently dropped. `browser_type` now uses `Input.insertText` by default (the CDP IME-commit method) which these editors accept cleanly. Only set `use_insert_text=False` when you explicitly need per-keystroke dispatch.
|
||||
- **Leaving a composer with text then trying to navigate → `beforeunload` dialog hangs the bridge.** LinkedIn and several other sites pop a native "unsent message" confirm. `browser_navigate` and `close_tab` both time out against this. Always strip `window.onbeforeunload = null` via `browser_evaluate` before any navigation after typing in a composer, or wrap your logic in a `try/finally` that runs the cleanup block.
|
||||
- **Clicking at physical pixels.** CDP uses CSS px. `browser_coords` returns both for debugging, but always feed `css_x/y` to click tools.
|
||||
- **Calling `wait_for_selector` on a shadow element.** It'll always time out. Use `browser_shadow_query` or the screenshot + coordinate strategy.
|
||||
- **Relying on `innerHTML` in injected scripts on LinkedIn.** Silently discarded. Use `createElement` + `appendChild`.
|
||||
- **Not waiting for SPA hydration.** `wait_until="load"` fires before React/Vue rendering on many sites. Add a 2–3 s sleep before querying for chrome elements.
|
||||
- **Using `browser_type(selector)` on LinkedIn DMs or any shadow-DOM input.** Won't find the element. Fall back to click-to-focus + `browser_press` per character.
|
||||
- **Keyboard shortcuts without the `code` field.** Chrome's shortcut dispatcher ignores keyboard events that lack a `code` or `windowsVirtualKeyCode`. `browser_press(..., modifiers=[...])` populates these automatically; raw `Input.dispatchKeyEvent` calls from `browser_evaluate` may not.
|
||||
- **Taking a screenshot more than 10s after the last interaction** and expecting the highlight to still be visible. The overlay fades after 10s. Take the screenshot sooner, or re-trigger the interaction.
|
||||
- **Expecting `browser_navigate` to return when you specified `wait_until="networkidle"` on a busy site.** networkidle is approximate — some sites keep a websocket or analytics beacon open forever. Use `"load"` or `"domcontentloaded"` for reliable timing.
|
||||
|
||||
## Dead CDP sessions and auto-recovery
|
||||
|
||||
If Chrome detaches the debugger for its own reasons (tab closed, user opened DevTools manually, cross-origin navigation, `chrome://` page loaded), the bridge detects the "target closed" / "not attached" error on the next call and **automatically reattaches + retries once**. You don't need to handle this yourself.
|
||||
|
||||
If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.
|
||||
|
||||
## When to reach for `browser_evaluate`
|
||||
|
||||
Use it when:
|
||||
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
|
||||
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
|
||||
- You need to walk an AX tree or measure layout that the standard tools don't expose
|
||||
|
||||
Avoid it when:
|
||||
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
|
||||
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
|
||||
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
|
||||
|
||||
## Login & auth walls
|
||||
|
||||
- If you see a "Log in" or "Sign up" prompt, report the auth wall immediately — do NOT attempt to log in.
|
||||
- Check for cookie consent banners and dismiss them if they block content.
|
||||
|
||||
### Efficiency
|
||||
- Minimize tool calls -- combine actions where possible.
|
||||
- When a snapshot result is saved to a spillover file, use `run_command` with grep to extract specific data rather than re-reading the full file.
|
||||
- Call `set_output` in the same turn as your last browser action when possible -- don't waste a turn.
|
||||
## Error recovery
|
||||
|
||||
- If a tool fails, retry once with the same approach.
|
||||
- If it fails a second time, STOP retrying and switch approach.
|
||||
- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
|
||||
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
|
||||
|
||||
## Verified workflows
|
||||
|
||||
These sequences have been empirically verified against real production sites on 2026-04-11.
|
||||
|
||||
### Search on X and read the live dropdown
|
||||
|
||||
```
|
||||
browser_navigate("https://x.com/explore", wait_until="load")
|
||||
# Wait for SPA hydration
|
||||
sleep(3)
|
||||
browser_wait_for_selector("input[data-testid='SearchBox_Search_Input']", timeout_ms=5000)
|
||||
rect = browser_get_rect("input[data-testid='SearchBox_Search_Input']")
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_type("input[data-testid='SearchBox_Search_Input']", "openai", clear_first=True)
|
||||
# Screenshot now shows live search suggestions
|
||||
browser_screenshot()
|
||||
browser_press("Escape", selector="input[data-testid='SearchBox_Search_Input']")
|
||||
```
|
||||
|
||||
### Search Reddit (shadow DOM)
|
||||
|
||||
```
|
||||
browser_navigate("https://www.reddit.com/r/programming/", wait_until="load")
|
||||
sleep(2)
|
||||
# Shadow-pierce the nested search input
|
||||
sq = browser_shadow_query("reddit-search-large >>> #search-input")
|
||||
browser_click_coordinate(sq.rect.cx, sq.rect.cy)
|
||||
# Typing can't use selector (shadow); focused input receives raw key presses
|
||||
for c in "python":
|
||||
browser_press(c)
|
||||
browser_screenshot()
|
||||
browser_press("Escape")
|
||||
```
|
||||
|
||||
### Search LinkedIn and dismiss without submitting
|
||||
|
||||
```
|
||||
browser_navigate("https://www.linkedin.com/feed/", wait_until="load", timeout_ms=20000)
|
||||
sleep(3)
|
||||
browser_wait_for_selector("input[data-testid='typeahead-input']", timeout_ms=5000)
|
||||
rect = browser_get_rect("input[data-testid='typeahead-input']")
|
||||
browser_click_coordinate(rect.cx, rect.cy)
|
||||
browser_type("input[data-testid='typeahead-input']", "anthropic", clear_first=True)
|
||||
# Dropdown shows real live suggestions
|
||||
browser_screenshot()
|
||||
browser_press("Escape", selector="input[data-testid='typeahead-input']")
|
||||
```
|
||||
|
||||
## Debugging checklist when a click / type "didn't work"
|
||||
|
||||
1. **Send button stays disabled after typing?** Two possible causes. (a) You didn't click the input first, so React never saw a native focus event. `browser_type` now clicks automatically — but if you're using raw `Input.dispatchKeyEvent`, click first yourself. (b) You're using per-character `keyDown` on a Lexical / Draft.js editor, and those editors dropped the keys because they listen for `beforeinput` with a specific shape. Switch to `browser_type(selector, text)` (which now uses `Input.insertText` by default) or, at a lower level, call CDP `Input.insertText` directly. Always `browser_evaluate` the submit button's `disabled` / `aria-disabled` state before clicking send; if still disabled after those fixes, the framework never saw real input.
|
||||
2. **Did the selector match anything?** Run `browser_get_rect(selector)` — if it returns `visible=False` or zero rect, the element isn't laid out yet. Wait longer or use a different selector.
|
||||
3. **Is the element inside a shadow root?** Try `browser_shadow_query(path)`. If your selector is light-DOM only, switch to the screenshot + coordinate strategy.
|
||||
4. **Did the click hit something on top of the element?** Register a temporary event listener via `browser_evaluate` on the target element, click, then read `window.__hits` to see what actually received the click. If something else is intercepting (overlay, modal, floating button), dismiss it first.
|
||||
5. **Did `type_text` find the element but fail to insert text?** Some editors (Draft.js on X, ProseMirror on some sites, Monaco) require a small `delay_ms` between keystrokes. Try `delay_ms=20`.
|
||||
6. **Is this a keyboard shortcut that doesn't fire?** Make sure you're using `browser_press(key, modifiers=[...])` — not raw `browser_evaluate` with `dispatchEvent`. Chrome ignores shortcut key events that lack `code` and `windowsVirtualKeyCode`.
|
||||
7. **Did the navigation actually complete?** Check the return value of `browser_navigate` — it now returns a real `url` and `title`. An empty title usually means a blank page or a hung load.
|
||||
8. **Is your screenshot stale?** The highlight overlay stays for 10 s; if the screenshot was taken later, the annotation is gone but the click was real. Check the logs of `browser_click_coordinate` to see the coordinates that were actually sent.
|
||||
|
||||
@@ -0,0 +1,308 @@
|
||||
---
|
||||
name: hive.linkedin-automation
|
||||
description: Drive LinkedIn via the browser-automation toolchain. Verified flows for profile messaging, connection-request acceptance, feed composition, and search. Includes every site-specific quirk we've hit (Trusted Types CSP, #interop-outlet shadow root, Lexical composer, beforeunload draft dialog, rate limits). Verified against logged-in production 2026-04-11.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
version: "1.0"
|
||||
verified: 2026-04-11
|
||||
requires_skill: hive.browser-automation
|
||||
---
|
||||
|
||||
# LinkedIn Automation
|
||||
|
||||
LinkedIn is the hardest mainstream site to automate because it combines **shadow DOM** (`#interop-outlet` for messaging), **strict Trusted Types CSP** (silently drops `innerHTML`), **heavy React reconciliation** (injected nodes get stripped on re-render), **native `beforeunload` draft dialogs** (hang the bridge), and **aggressive spam filters**. Every one of those has bit us at least once. This skill documents what actually works.
|
||||
|
||||
**Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`'s click-first behavior, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.
|
||||
|
||||
## Timing expectations
|
||||
|
||||
- `browser_navigate(wait_until="load", timeout_ms=20000)` — LinkedIn takes **4–5 seconds** to load the feed cold. Default 30s timeout is fine; use 20s as a floor.
|
||||
- After navigation, **always `sleep(3)`** to let React hydrate the profile/feed chrome before querying selectors. Without the sleep `wait_for_selector` will flake on elements that exist moments later.
|
||||
- Composer modal slide-in takes **~2 seconds** after you click the Message button.
|
||||
|
||||
## Verified selectors (2026-04-11)
|
||||
|
||||
| Target | Selector | Notes |
|
||||
|---|---|---|
|
||||
| Global search input | `input[data-testid='typeahead-input']` | Light DOM, straightforward |
|
||||
| Own profile link | `a[href*='linkedin.com/in/']` | Top nav; filter to the one near top-left |
|
||||
| Profile **Message** action | `a[href*='/messaging/compose/']` filtered by `NON_SELF_PROFILE_VIEW` AND no `body=` param AND `x < 700` | **Is an `<a>`**, not a `<button>`. Multiple match; filter carefully. |
|
||||
| Modal composer textarea | `div.msg-form__contenteditable` (inside `#interop-outlet` shadow) | **Multiple instances exist** — pick largest-area **in-viewport** one. |
|
||||
| Modal Send button | `button.msg-form__send-button` (inside `#interop-outlet` shadow) | Same multi-instance trap — filter by `y + height <= innerHeight`. |
|
||||
| Invitation manager | navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/` | Direct URL is faster than nav-link clicking |
|
||||
| Pending connection card | `.invitation-card, .invitations-card, [data-test-incoming-invitation-card]` | Filter out "invited you to follow" / "subscribe" cards |
|
||||
| Accept button | `button[aria-label*="Accept"]` within the card scope | Per-card scoping is critical — there are many Accept buttons on the page |
|
||||
|
||||
LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_coords` → `browser_click_coordinate`**. The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
|
||||
|
||||
## Profile Message flow (verified end-to-end 2026-04-11)
|
||||
|
||||
```
|
||||
# 1. Load the profile
|
||||
browser_navigate("https://www.linkedin.com/in/<username>/", wait_until="load", timeout_ms=20000)
|
||||
sleep(4)
|
||||
|
||||
# 2. Strip onbeforeunload before any state-mutating work — prevents draft-dialog deadlock later
|
||||
browser_evaluate("""
|
||||
(function(){
|
||||
window.onbeforeunload = null;
|
||||
window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);
|
||||
})();
|
||||
""")
|
||||
|
||||
# 3. Find the profile Message link (NOT a button, and multiple exist)
|
||||
msg_btn = browser_evaluate("""
|
||||
(function(){
|
||||
const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
|
||||
for (const a of links){
|
||||
const href = a.href || '';
|
||||
if (!href.includes('NON_SELF_PROFILE_VIEW')) continue;
|
||||
if (href.includes('body=')) continue; // reject Premium upsell
|
||||
const r = a.getBoundingClientRect();
|
||||
if (r.width === 0 || r.x > 700) continue; // reject sidebar / "More profiles for you"
|
||||
return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
""")
|
||||
browser_click_coordinate(msg_btn['cx'], msg_btn['cy'])
|
||||
sleep(2.5) # composer modal slide-in
|
||||
|
||||
# 4. Find the modal composer textarea (pick biggest in-viewport; reject pinned chat bar)
|
||||
textarea = browser_evaluate("""
|
||||
(function(){
|
||||
const vh = window.innerHeight, vw = window.innerWidth;
|
||||
const candidates = [];
|
||||
function walk(root){
|
||||
const els = root.querySelectorAll ?
|
||||
root.querySelectorAll('div.msg-form__contenteditable') : [];
|
||||
for (const el of els){
|
||||
const r = el.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.height <= 0) continue;
|
||||
if (r.y < 0 || r.y + r.height > vh) continue; // reject pinned bar (below viewport)
|
||||
if (r.x < 0 || r.x + r.width > vw) continue;
|
||||
candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
|
||||
}
|
||||
walk(document);
|
||||
if (!candidates.length) return null;
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates[0];
|
||||
})();
|
||||
""")
|
||||
|
||||
# 5. Click to focus the modal composer (click-first is mandatory for Lexical)
|
||||
browser_click_coordinate(textarea['cx'], textarea['cy'])
|
||||
sleep(0.6)
|
||||
|
||||
# 6. Insert text via CDP Input.insertText (browser_type does this by default now).
|
||||
# Per-char keyDown fails on Lexical composers — the keys dispatch but
|
||||
# the editor never turns them into text.
|
||||
browser_type(<appropriate-selector-or-skip-selector-and-use-bridge-insertText>, text)
|
||||
|
||||
# 7. Find the modal Send button (filter by in-viewport, reject pinned bar)
|
||||
send = browser_evaluate("""
|
||||
(function(){
|
||||
const vh = window.innerHeight;
|
||||
function walk(root){
|
||||
const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
|
||||
for (const b of btns){
|
||||
const cls = (b.className || '').toString();
|
||||
const txt = (b.textContent || '').trim();
|
||||
if (!cls.includes('send-button') && txt !== 'Send') continue;
|
||||
const r = b.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.y + r.height > vh) continue;
|
||||
return {
|
||||
cx: r.x + r.width/2, cy: r.y + r.height/2,
|
||||
disabled: b.disabled || b.getAttribute('aria-disabled') === 'true',
|
||||
};
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){
|
||||
if (host.shadowRoot){
|
||||
const got = walk(host.shadowRoot);
|
||||
if (got) return got;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return walk(document);
|
||||
})();
|
||||
""")
|
||||
|
||||
# 8. ONLY click Send if it's enabled — if disabled, the editor didn't register the input.
|
||||
# Don't click blindly; the framework state is the source of truth, not the DOM text.
|
||||
if not send['disabled']:
|
||||
browser_click_coordinate(send['cx'], send['cy'])
|
||||
sleep(2.5) # wait for send + bubble render
|
||||
```
|
||||
|
||||
**Verify post-send**: the composer textarea should now be empty (`innerText === ''`) and `.msg-s-event-listitem__message-bubble` count should have grown by 1. Walk the shadow tree via `browser_evaluate` to check.
|
||||
|
||||
## Connection request acceptance flow
|
||||
|
||||
Daily outbound pattern — accept pending connection requests and send a templated welcome message.
|
||||
|
||||
```
|
||||
browser_navigate("https://www.linkedin.com/mynetwork/invitation-manager/received/",
|
||||
wait_until="load", timeout_ms=20000)
|
||||
sleep(4)
|
||||
browser_evaluate("(function(){window.onbeforeunload=null;})()")
|
||||
|
||||
# Scan pending connection cards — FILTER OUT follow/subscribe invitations
|
||||
cards = browser_evaluate("""
|
||||
(function(){
|
||||
const out = [];
|
||||
const cards = document.querySelectorAll('[data-test-incoming-invitation-card], .invitation-card');
|
||||
for (const c of cards){
|
||||
const text = (c.textContent || '').toLowerCase();
|
||||
if (text.includes('invited you to follow')) continue;
|
||||
if (text.includes('invited you to subscribe')) continue;
|
||||
const nameEl = c.querySelector('a[href*="/in/"], strong');
|
||||
const name = nameEl ? nameEl.textContent.trim().split(/\\s+/)[0] : '';
|
||||
const accept = c.querySelector('button[aria-label*="Accept"]');
|
||||
if (!accept) continue;
|
||||
const r = accept.getBoundingClientRect();
|
||||
out.push({
|
||||
first_name: name,
|
||||
cx: r.x + r.width/2, cy: r.y + r.height/2,
|
||||
});
|
||||
if (out.length >= 25) break; // strict daily cap — see rate limits below
|
||||
}
|
||||
return out;
|
||||
})();
|
||||
""")
|
||||
|
||||
# Process cards one at a time with human-like cadence
|
||||
for card in cards[:25]:
|
||||
browser_click_coordinate(card['cx'], card['cy']) # click Accept
|
||||
sleep(2)
|
||||
# After accepting, a "Message" button appears on the card — navigate to
|
||||
# the profile and run the profile Message flow above, personalized by first_name.
|
||||
# OR: if the "Message" button is inline on the card, click it directly and
|
||||
# use the shadow-root composer flow.
|
||||
sleep(random.uniform(5, 10)) # human-like delay BETWEEN targets
|
||||
```
|
||||
|
||||
**Don't do 25 back-to-back sends with zero delay.** LinkedIn's spam filter catches this. 5–10 second randomized sleeps between sends, hard cap at 25 per 24h window.
|
||||
|
||||
## Feed post composer flow
|
||||
|
||||
```
|
||||
browser_navigate("https://www.linkedin.com/feed/", wait_until="load", timeout_ms=20000)
|
||||
sleep(4)
|
||||
browser_evaluate("(function(){window.onbeforeunload=null;})()")
|
||||
|
||||
# Click the "Start a post" trigger
|
||||
start_trigger = browser_get_rect("button.share-box-feed-entry__trigger, [aria-label*='Start a post']")
|
||||
browser_click_coordinate(start_trigger.cx, start_trigger.cy)
|
||||
sleep(1.5) # modal slide-in
|
||||
|
||||
# Find the post editor inside the modal (also contenteditable, may not be in shadow)
|
||||
editor = browser_get_rect("div[contenteditable=true][aria-placeholder*='talk about']")
|
||||
browser_click_coordinate(editor.cx, editor.cy)
|
||||
sleep(0.5)
|
||||
browser_type("div[contenteditable=true][aria-placeholder*='talk about']", post_text)
|
||||
sleep(1.0)
|
||||
|
||||
# Verify Post button enabled before clicking
|
||||
state = browser_evaluate("""
|
||||
(function(){
|
||||
const btn = document.querySelector('button.share-actions__primary-action');
|
||||
if (!btn) return {found: false};
|
||||
return {
|
||||
found: true,
|
||||
disabled: btn.disabled || btn.getAttribute('aria-disabled') === 'true',
|
||||
};
|
||||
})();
|
||||
""")
|
||||
if state['found'] and not state['disabled']:
|
||||
browser_click("button.share-actions__primary-action")
|
||||
```
|
||||
|
||||
## Rate limits and safety
|
||||
|
||||
LinkedIn's abuse detection is aggressive. Respect these limits:
|
||||
|
||||
| Action | Limit |
|
||||
|---|---|
|
||||
| Outbound messages to non-connections | **Do not attempt** — will get you warned or restricted |
|
||||
| Outbound messages to new 1st-degree connections | **25/day max**, 5–10s randomized delays |
|
||||
| Connection request sends | **100/week max**, spread across days, warm intros preferred |
|
||||
| Profile views | Several hundred/day is usually fine but varies by account age |
|
||||
| Post publications | 1–3/day, no URL-only posts |
|
||||
| Feed reactions | Dozens/day is fine; vary your activity mix |
|
||||
|
||||
Signals you're being throttled:
|
||||
- "Message failed to send" with no error detail
|
||||
- Redirect to `https://www.linkedin.com/checkpoint/challenge/...`
|
||||
- Profile views showing stale data
|
||||
- Connection requests auto-withdrawn after a few hours
|
||||
|
||||
If any of those show up, **stop the run, screenshot the state, and surface the issue to the human operator.** Do not retry.
|
||||
|
||||
## Common pitfalls
|
||||
|
||||
- **`innerHTML` injection is silently dropped** — LinkedIn's Trusted Types CSP discards any `innerHTML = "<...>"` from injected scripts, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection. `textContent`, `style.cssText`, and `.value` assignments are fine.
|
||||
- **Per-char keyDown on the message composer produces empty text** — Lexical intercepts `beforeinput` and drops raw keys. Use `browser_type` (which now routes through CDP `Input.insertText`), or call `Input.insertText` directly via the bridge on the focused shadow element.
|
||||
- **`browser_type(selector=...)` can't see the message composer** — it's inside `#interop-outlet` shadow. `document.querySelector('div.msg-form__contenteditable')` returns nothing. Use the shadow-walk + click-to-focus pattern above.
|
||||
- **Multiple Send buttons on the page** — the pinned bottom-right messaging bar has its own `msg-form__send-button` that's usually below `innerHeight`. Filter by in-viewport before clicking.
|
||||
- **`window.onbeforeunload` hangs navigation/close** — after typing in a composer, any `browser_navigate` or `close_tab` can pop a native "unsent message, leave?" confirm dialog that deadlocks the bridge. Always strip `onbeforeunload` before any navigation, and wrap composer flows in a `try/finally` that runs the cleanup block:
|
||||
|
||||
```
|
||||
# Cleanup on exit — run even if the flow crashed mid-type.
|
||||
browser_evaluate("""
|
||||
(function(){
|
||||
window.onbeforeunload = null;
|
||||
const h = document.getElementById('__hive_hl');
|
||||
if (h) { try { h.__hiveStop && h.__hiveStop(); } catch(_){}; h.remove(); }
|
||||
})();
|
||||
""")
|
||||
```
|
||||
|
||||
- **SPA reconciliation strips injected overlays** — LinkedIn's React reconciler removes foreign children of `documentElement` on re-render. The framework highlight overlay survives (re-mount observer + bounded retries), but test overlays injected via raw `browser_evaluate` may not. If you need a stable test overlay, append it to `document.documentElement` AND wrap in a `MutationObserver` that re-appends on removal, capped at ~20 retries.
|
||||
- **Profile page chrome is not in the AX snapshot** — `browser_snapshot` on a profile misses a lot of the structured layout. Use `browser_screenshot` to orient; use specific selectors or the shadow-walk pattern for actions.
|
||||
- **Name parsing from a connection card is fragile** — the card layout changes every few months. Prefer `.textContent.split(/\s+/)[0]` on the first link inside the card rather than relying on a class like `.invitation-card-name`.
|
||||
|
||||
## Auth wall detection
|
||||
|
||||
If you see a "Log in" / "Join LinkedIn" prompt instead of the logged-in feed, **stop immediately** and surface the issue. Do NOT attempt to log in via automation — LinkedIn's bot detection will flag the account.
|
||||
|
||||
Check via:
|
||||
```
|
||||
is_logged_in = browser_evaluate("""
|
||||
(function(){
|
||||
return !!document.querySelector('nav.global-nav') ||
|
||||
!!document.querySelector('[data-test-global-nav-me]');
|
||||
})();
|
||||
""")
|
||||
```
|
||||
|
||||
## Deduplication pattern
|
||||
|
||||
For any daily loop (connection acceptance, profile visits, DMs), maintain a ledger file:
|
||||
|
||||
```
|
||||
# data/linkedin_contacts.json
|
||||
{
|
||||
"contacts": [
|
||||
{
|
||||
"profile_url": "https://www.linkedin.com/in/username/",
|
||||
"name": "First Last",
|
||||
"action": "connection_accepted+message_sent",
|
||||
"timestamp": "2026-04-13T09:30:00Z",
|
||||
"message_preview": "first 50 chars of message sent"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Before any action, check if the profile URL already has a recent entry for the same action. Skip if yes. Atomic-write the ledger after each success so crash-resume works.
|
||||
|
||||
## See also
|
||||
|
||||
- `browser-automation` skill — general CDP/coord/screenshot rules, the click-then-type pattern, shadow-DOM strategy
|
||||
- `x-automation` skill — X/Twitter equivalent
|
||||
@@ -0,0 +1,356 @@
|
||||
---
|
||||
name: hive.x-automation
|
||||
description: Drive X / Twitter via the browser-automation toolchain. Verified flows for posting, replying, deleting, search-and-engage, and the compose composer's Draft.js quirks. Includes the daily-reply and job-market-reply patterns distilled from the backed-up x-daily-replies and x-job-market-replies skills. Verified 2026-04-11.
|
||||
metadata:
|
||||
author: hive
|
||||
type: default-skill
|
||||
version: "1.0"
|
||||
verified: 2026-04-11
|
||||
requires_skill: hive.browser-automation
|
||||
---
|
||||
|
||||
# X / Twitter Automation
|
||||
|
||||
X uses **Draft.js** (the original Facebook rich-text editor) for the compose text area, which was the original canary for all the rich-text editor quirks the `browser-automation` skill now documents. Most of the site is otherwise stable — `data-testid` attributes have held up for years, the SPA is reasonably honest about what it renders, and shadow DOM is minimal. The hard parts are the composer, rate limiting, and the occasional anti-bot challenge.
|
||||
|
||||
**Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, click-first typing, and `Input.insertText`. The guidance below is X-specific.
|
||||
|
||||
## Timing expectations
|
||||
|
||||
- `browser_navigate(wait_until="load")` returns in **1.3–1.6 s** on a warm cache.
|
||||
- After navigation, **`sleep(2–3)`** for SPA hydration before querying selectors.
|
||||
- Compose modal slide-in: **~1.5 s** after clicking reply / compose.
|
||||
- First 1–2 characters typed into the compose editor **may be dropped** — see "Draft.js quirks" below.
|
||||
|
||||
## Verified selectors (2026-04-11)
|
||||
|
||||
| Target | Selector |
|
||||
|---|---|
|
||||
| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
|
||||
| Explore nav link | `a[data-testid='AppTabBar_Explore_Link']` |
|
||||
| Notifications | `a[data-testid='AppTabBar_Notifications_Link']` |
|
||||
| Main search input | `input[data-testid='SearchBox_Search_Input']` |
|
||||
| Compose text area | `[data-testid='tweetTextarea_0']` (Draft.js contenteditable) |
|
||||
| Post / Tweet submit button | `[data-testid='tweetButton']` |
|
||||
| Reply button (on feed / tweet detail) | `[data-testid='reply']` |
|
||||
| Like button | `[data-testid='like']` |
|
||||
| Retweet / repost button | `[data-testid='retweet']` |
|
||||
| Caret (⋯) menu on a post | `[data-testid='caret']` |
|
||||
| Confirmation sheet confirm button | `[data-testid='confirmationSheetConfirm']` |
|
||||
| Tweet article wrapper | `article[data-testid='tweet']` |
|
||||
| Close modal / composer | `[aria-label='Close']` or press `Escape` |
|
||||
|
||||
All of these are light-DOM `data-testid` attributes — `wait_for_selector` and `browser_type(selector=...)` work on them directly, no shadow piercing needed.
|
||||
|
||||
## Post new tweet flow
|
||||
|
||||
```
|
||||
browser_navigate("https://x.com/home", wait_until="load")
|
||||
sleep(3)
|
||||
|
||||
# Open the compose UI (click the post-new-tweet nav or use shortcut N)
|
||||
browser_press("n") # keyboard shortcut — opens compose modal
|
||||
sleep(1.5)
|
||||
|
||||
# Click the textarea to make sure Draft.js is in edit mode
|
||||
ta_rect = browser_get_rect("[data-testid='tweetTextarea_0']")
|
||||
browser_click_coordinate(ta_rect.cx, ta_rect.cy)
|
||||
sleep(0.5)
|
||||
|
||||
# Type — browser_type handles Draft.js correctly now via Input.insertText
|
||||
browser_type("[data-testid='tweetTextarea_0']", tweet_text)
|
||||
sleep(1.0) # let Draft.js commit state
|
||||
|
||||
# Verify the Post button is enabled — never click blindly, Draft.js sometimes
|
||||
# doesn't register the input even with a prior click.
|
||||
state = browser_evaluate("""
|
||||
(function(){
|
||||
const btn = document.querySelector('[data-testid="tweetButton"]');
|
||||
if (!btn) return {found: false};
|
||||
return {
|
||||
found: true,
|
||||
disabled: btn.disabled || btn.getAttribute('aria-disabled') === 'true',
|
||||
};
|
||||
})();
|
||||
""")
|
||||
if state['found'] and not state['disabled']:
|
||||
browser_click("[data-testid='tweetButton']")
|
||||
sleep(2)
|
||||
browser_press("Escape") # close any leftover modal
|
||||
```
|
||||
|
||||
## Reply to a post flow
|
||||
|
||||
The reply flow is the same shape as posting, with a few scroll / find-and-click steps before.
|
||||
|
||||
```
|
||||
browser_navigate("https://x.com/home", wait_until="load")
|
||||
sleep(3)
|
||||
|
||||
# Load content by scrolling — X lazy-loads feed items
|
||||
browser_scroll(direction="down", amount=2000)
|
||||
sleep(1.5)
|
||||
|
||||
# Find replyable tweets — reply buttons, in visual/feed order
|
||||
candidates = browser_evaluate("""
|
||||
(function(){
|
||||
const tweets = document.querySelectorAll('article[data-testid="tweet"]');
|
||||
const out = [];
|
||||
tweets.forEach((t, i) => {
|
||||
const reply = t.querySelector('[data-testid="reply"]');
|
||||
if (!reply) return;
|
||||
const r = reply.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.y < 0 || r.y > window.innerHeight) return;
|
||||
const text = (t.textContent || '').slice(0, 120);
|
||||
out.push({
|
||||
index: i,
|
||||
preview: text,
|
||||
cx: r.x + r.width/2,
|
||||
cy: r.y + r.height/2,
|
||||
});
|
||||
});
|
||||
return out;
|
||||
})();
|
||||
""")
|
||||
|
||||
# For each unreplied candidate...
|
||||
for c in candidates:
|
||||
if already_replied(c['preview']):
|
||||
continue # see dedup pattern below
|
||||
|
||||
# Click reply
|
||||
browser_click_coordinate(c['cx'], c['cy'])
|
||||
sleep(1.5) # composer slide-in
|
||||
|
||||
# Click the textarea to focus Draft.js
|
||||
ta = browser_get_rect("[data-testid='tweetTextarea_0']")
|
||||
browser_click_coordinate(ta.cx, ta.cy)
|
||||
sleep(0.5)
|
||||
|
||||
# Type the reply
|
||||
browser_type("[data-testid='tweetTextarea_0']", reply_text)
|
||||
sleep(1.5) # Draft.js state commit takes a beat
|
||||
|
||||
# Verify button enabled
|
||||
state = browser_evaluate("""
|
||||
(function(){
|
||||
const b = document.querySelector('[data-testid="tweetButton"]');
|
||||
return b ? {d: b.disabled || b.getAttribute('aria-disabled') === 'true'} : {d: true};
|
||||
})();
|
||||
""")
|
||||
if state['d']:
|
||||
# Recovery: click the textarea again + one extra character toggles React state
|
||||
browser_click_coordinate(ta.cx, ta.cy)
|
||||
browser_press("End")
|
||||
browser_press(" ")
|
||||
browser_press("Backspace")
|
||||
sleep(0.5)
|
||||
else:
|
||||
browser_click("[data-testid='tweetButton']")
|
||||
sleep(2)
|
||||
record_sent(c['preview'], reply_text) # append to ledger
|
||||
|
||||
# Close the composer (press Escape or click the Close button)
|
||||
browser_press("Escape")
|
||||
sleep(random.uniform(10, 20)) # human cadence — see rate limits
|
||||
```
|
||||
|
||||
## Search-and-engage flow
|
||||
|
||||
For "daily reply to live posts matching query X" — e.g. job-market replies.
|
||||
|
||||
```
|
||||
query = "job market"
|
||||
url = f"https://x.com/search?q={urllib.parse.quote(query)}&src=typed_query&f=live"
|
||||
browser_navigate(url, wait_until="load")
|
||||
sleep(3)
|
||||
browser_scroll("down", 2000)
|
||||
sleep(1.5)
|
||||
|
||||
# Same replyable-tweets probe as above, then same reply-to-tweet loop
|
||||
```
|
||||
|
||||
## Delete a post flow
|
||||
|
||||
```
|
||||
browser_navigate("https://x.com/<your_username>/with_replies", wait_until="load")
|
||||
sleep(3)
|
||||
|
||||
# Find the target article (by text match or index)
|
||||
target_caret = browser_evaluate("""
|
||||
(function(target_text){
|
||||
const tweets = document.querySelectorAll('article[data-testid="tweet"]');
|
||||
for (const t of tweets){
|
||||
if (!(t.textContent || '').includes(target_text)) continue;
|
||||
const caret = t.querySelector('[data-testid="caret"]');
|
||||
if (!caret) continue;
|
||||
const r = caret.getBoundingClientRect();
|
||||
return {cx: r.x + r.width/2, cy: r.y + r.height/2};
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
""", target_text)
|
||||
|
||||
browser_click_coordinate(target_caret['cx'], target_caret['cy'])
|
||||
sleep(0.8) # menu animation
|
||||
|
||||
# The Delete menuitem doesn't have a stable data-testid — find by text
|
||||
delete_rect = browser_evaluate("""
|
||||
(function(){
|
||||
const items = document.querySelectorAll('[role="menuitem"]');
|
||||
for (const el of items){
|
||||
if ((el.textContent || '').trim() === 'Delete'){
|
||||
const r = el.getBoundingClientRect();
|
||||
return {cx: r.x + r.width/2, cy: r.y + r.height/2};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
""")
|
||||
browser_click_coordinate(delete_rect['cx'], delete_rect['cy'])
|
||||
sleep(0.8)
|
||||
|
||||
# Confirmation sheet — this one DOES have a stable testid
|
||||
browser_click("[data-testid='confirmationSheetConfirm']")
|
||||
sleep(1.5)
|
||||
```
|
||||
|
||||
## Draft.js quirks
|
||||
|
||||
X's compose editor is the canonical test case for every rich-text-editor bug the GCU bridge has ever had. What you need to know:
|
||||
|
||||
- **Click the textarea first.** Mandatory. Without a native click-sourced focus event, Draft.js's editor state never enters edit mode, and the Post button stays disabled regardless of how much text you type. `browser_type` now does this click automatically.
|
||||
|
||||
- **`browser_type` uses CDP `Input.insertText` by default**, which Draft.js accepts cleanly. The older approach — per-character `Input.dispatchKeyEvent` with `delay_ms=20` — *also* works, but insertText is more reliable and faster. Only pass `delay_ms > 0` (which falls back to per-char dispatch) if you're specifically testing the keystroke timing path.
|
||||
|
||||
- **First 1–2 characters may be eaten** on the per-char dispatch path (not on insertText). If you see `"estin"` instead of `"testin"`, prepend a throwaway character or use insertText.
|
||||
|
||||
- **Verify `tweetButton`'s `disabled` state** before clicking. Draft.js's internal state can disagree with the DOM text — verify framework state via a targeted `browser_evaluate` on `aria-disabled`.
|
||||
|
||||
- **If the button stays disabled after typing**, use the recovery dance: click the textarea again, press `End`, press a space, press `Backspace`. This forces React to recompute `hasRealContent` and usually flips the button on.
|
||||
|
||||
- **URL previews take a beat to render.** If your tweet ends with a URL, wait 2–3 s after typing so the link-card preview loads before you post — otherwise the tweet publishes without the card.
|
||||
|
||||
## Rate limits and safety
|
||||
|
||||
| Action | Limit |
|
||||
|---|---|
|
||||
| Tweets per hour | ~50 before throttling |
|
||||
| Replies per session | **5–10 per run**, randomized 10–20 s delays |
|
||||
| DMs per day | Varies by account age; 50–100 for established accounts |
|
||||
| Follow/unfollow | <400/day spread over time |
|
||||
| Like per day | 1000 max; 200–300 is safer |
|
||||
|
||||
Signals you're being rate-limited or flagged:
|
||||
- 429 status in network responses (not always visible to the agent)
|
||||
- "You are unable to Tweet" banner
|
||||
- Redirect to `https://x.com/account/access` (anti-bot check)
|
||||
- Posts appearing to publish but not visible on your profile
|
||||
- Reply button click opens but composer never receives focus
|
||||
|
||||
If any of these appear, **stop the run, screenshot the state, and surface the issue.** Do not retry immediately.
|
||||
|
||||
## Deduplication pattern
|
||||
|
||||
Every daily loop should maintain a ledger file. Append after each successful reply/post, atomic-write to survive crashes.
|
||||
|
||||
```
|
||||
# data/x_replies_ledger.json
|
||||
{
|
||||
"replies": [
|
||||
{
|
||||
"tweet_url": "https://x.com/<author>/status/<id>",
|
||||
"author": "username",
|
||||
"original_preview": "first 100 chars of the tweet",
|
||||
"reply_text": "what you sent",
|
||||
"timestamp": "2026-04-13T09:30:00Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Extract the tweet URL via `browser_evaluate`:
|
||||
|
||||
```
|
||||
url = browser_evaluate("""
|
||||
(function(article_index){
|
||||
const t = document.querySelectorAll('article[data-testid="tweet"]')[article_index];
|
||||
if (!t) return null;
|
||||
const link = t.querySelector('a[href*="/status/"]');
|
||||
return link ? link.href : null;
|
||||
})();
|
||||
""", article_index)
|
||||
```
|
||||
|
||||
Before each reply, check if the URL already has a ledger entry. If yes, skip. This survives across runs and across days.
|
||||
|
||||
## Reply style guidelines
|
||||
|
||||
These are soft rules derived from the backed-up `x-daily-replies` and `x-job-market-replies` skills — tune per your operator's preference.
|
||||
|
||||
**Daily replies (siren_fs persona, dark humorous):**
|
||||
- 2 sentences MAX
|
||||
- Dark, humorous, insightful, trendy
|
||||
- Must feel like a real person with opinions and edge
|
||||
- Must NOT sound like AI, use corporate speak, or be corny
|
||||
- Tie to current news/culture when possible
|
||||
- English only
|
||||
- Target: 5–8 replies per run
|
||||
- Skip posts that are purely images/video with no text context
|
||||
- Prioritize high-engagement accounts
|
||||
- Skip ads unless genuinely interesting
|
||||
|
||||
**Job-market replies (casual slang + prediction-market CTA):**
|
||||
- 2–3 short sentences max
|
||||
- Casual slang, alt spellings: "u", "fr", "lmao", "lol"
|
||||
- Always include a profile-level CTA ("check my profile if u wanna see…")
|
||||
- Tie the reply to the economic / career angle of the original post
|
||||
- Vary templates — never identical text across replies
|
||||
- Max 10 replies per session
|
||||
|
||||
## Common pitfalls
|
||||
|
||||
- **Typing without clicking first → send button stays disabled.** Draft.js only enters edit mode after a native focus event. `browser_type` handles this automatically now, but if you're using raw CDP calls, click first.
|
||||
|
||||
- **First 1–2 chars eaten on per-char dispatch.** Stick with `browser_type` default (uses `Input.insertText`). Only use `delay_ms=20` fallback if you need per-keystroke timing.
|
||||
|
||||
- **Clicking Post with Draft.js state disagreeing.** Always verify `[data-testid="tweetButton"]`'s `disabled` / `aria-disabled` before clicking. If disabled, run the recovery dance.
|
||||
|
||||
- **Anti-bot challenge mid-run.** X occasionally shows a JavaScript challenge or redirects to `/account/access`. Detect by checking the URL after navigation and the presence of the home nav:
|
||||
```
|
||||
challenged = browser_evaluate("""
|
||||
(function(){
|
||||
return window.location.href.includes('/account/access') ||
|
||||
!document.querySelector('a[data-testid="AppTabBar_Home_Link"]');
|
||||
})();
|
||||
""")
|
||||
```
|
||||
If challenged, stop and surface to the operator. Do not try to solve it.
|
||||
|
||||
- **Composer modal fails to open on rapid clicks.** X debounces the reply button click. Always `sleep(1.5)` after clicking before trying to query the textarea.
|
||||
|
||||
- **Navigation inside the SPA is preferred over full page loads.** Clicking a tweet to open its detail view keeps the compose state; using `browser_navigate` reloads everything and slows the run. Use `browser_click` on internal links when possible.
|
||||
|
||||
- **X's `window.innerHeight` changes on compose modal open.** The modal takes over most of the viewport. Don't cache viewport dimensions across a compose open; re-query after the modal slide-in.
|
||||
|
||||
- **URL-only tweets post without a link card if you click Post too fast.** Wait 2–3 s after typing a URL before clicking Post so the card preview renders.
|
||||
|
||||
## Auth wall detection
|
||||
|
||||
Check logged-in state before any action:
|
||||
|
||||
```
|
||||
logged_in = browser_evaluate("""
|
||||
(function(){
|
||||
return !!document.querySelector('a[data-testid="AppTabBar_Home_Link"]') &&
|
||||
!window.location.href.includes('/i/flow/login');
|
||||
})();
|
||||
""")
|
||||
```
|
||||
|
||||
If not logged in, **stop immediately** and surface. Do not attempt to log in via automation.
|
||||
|
||||
## See also
|
||||
|
||||
- `browser-automation` skill — general CDP/coord/screenshot rules, click-then-type pattern, Input.insertText
|
||||
- `linkedin-automation` skill — LinkedIn equivalent
|
||||
@@ -29,6 +29,8 @@ import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.utils.io import atomic_write
|
||||
|
||||
|
||||
class FileConversationStore:
|
||||
"""File-per-part ConversationStore.
|
||||
@@ -45,8 +47,10 @@ class FileConversationStore:
|
||||
# --- sync helpers --------------------------------------------------------
|
||||
|
||||
def _write_json(self, path: Path, data: dict) -> None:
|
||||
# Atomic tmp+rename with fsync — a crash mid-write would otherwise
|
||||
# leave a corrupt cursor.json and silently reset the iteration counter.
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
with atomic_write(path) as f:
|
||||
json.dump(data, f)
|
||||
|
||||
def _read_json(self, path: Path) -> dict | None:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
||||
"""Utility functions for the Hive framework."""
|
||||
|
||||
from framework.utils.io import atomic_write
|
||||
from framework.utils.task_registry import TaskRegistry
|
||||
|
||||
__all__ = ["atomic_write"]
|
||||
__all__ = ["atomic_write", "TaskRegistry"]
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Tracked ``asyncio.create_task`` — prevents silent task loss.
|
||||
|
||||
Bare ``asyncio.create_task(...)`` has two well-known failure modes:
|
||||
|
||||
1. **Garbage collection.** The event loop only holds a *weak* reference
|
||||
to the task, so if the caller doesn't hold a strong reference the
|
||||
task can be collected mid-flight and silently cancelled.
|
||||
2. **Swallowed exceptions.** If a fire-and-forget task raises, the
|
||||
exception is stored on the Task object and only surfaces when the
|
||||
task is awaited or garbage-collected. If nothing ever awaits it,
|
||||
the exception is logged by asyncio at shutdown (if at all).
|
||||
|
||||
``TaskRegistry`` fixes both: it holds a strong reference until the task
|
||||
finishes, logs any exception the task raised, and removes the reference
|
||||
on completion so it doesn't leak. It also lets a caller cancel every
|
||||
tracked task at shutdown time in one call.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from collections.abc import Coroutine
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskRegistry:
|
||||
"""Owner for background asyncio tasks.
|
||||
|
||||
Typical use::
|
||||
|
||||
self._tasks = TaskRegistry("agent_loop")
|
||||
self._tasks.spawn(self._background_worker(), name="background_worker")
|
||||
...
|
||||
await self._tasks.cancel_all()
|
||||
"""
|
||||
|
||||
def __init__(self, owner: str = "") -> None:
|
||||
self._owner = owner
|
||||
self._tasks: set[asyncio.Task[Any]] = set()
|
||||
|
||||
def spawn(
|
||||
self,
|
||||
coro: Coroutine[Any, Any, Any],
|
||||
*,
|
||||
name: str | None = None,
|
||||
) -> asyncio.Task[Any]:
|
||||
"""Schedule *coro* as a tracked background task."""
|
||||
task = asyncio.create_task(coro, name=name)
|
||||
self._tasks.add(task)
|
||||
task.add_done_callback(self._on_done)
|
||||
return task
|
||||
|
||||
def _on_done(self, task: asyncio.Task[Any]) -> None:
|
||||
self._tasks.discard(task)
|
||||
if task.cancelled():
|
||||
return
|
||||
exc = task.exception()
|
||||
if exc is None:
|
||||
return
|
||||
logger.error(
|
||||
"Tracked task '%s' (owner=%s) raised an unhandled exception: %s",
|
||||
task.get_name(),
|
||||
self._owner or "?",
|
||||
exc,
|
||||
exc_info=exc,
|
||||
)
|
||||
|
||||
async def cancel_all(self, *, timeout: float = 5.0) -> None:
|
||||
"""Cancel every tracked task and wait up to *timeout* for them to finish."""
|
||||
if not self._tasks:
|
||||
return
|
||||
pending = list(self._tasks)
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
asyncio.gather(*pending, return_exceptions=True),
|
||||
timeout=timeout,
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"TaskRegistry(%s): %d task(s) did not finish within %.1fs of cancel",
|
||||
self._owner or "?",
|
||||
sum(1 for t in pending if not t.done()),
|
||||
timeout,
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._tasks)
|
||||
@@ -81,4 +81,10 @@ export const credentialsApi = {
|
||||
|
||||
resync: () =>
|
||||
api.post<ResyncResponse>("/credentials/resync", {}),
|
||||
|
||||
validateKey: (providerId: string, apiKey: string) =>
|
||||
api.post<{ valid: boolean | null; message: string }>(
|
||||
"/credentials/validate-key",
|
||||
{ provider_id: providerId, api_key: apiKey },
|
||||
),
|
||||
};
|
||||
|
||||
@@ -74,6 +74,16 @@ export const sessionsApi = {
|
||||
patch,
|
||||
),
|
||||
|
||||
activateTrigger: (sessionId: string, triggerId: string) =>
|
||||
api.post<{ status: string; trigger_id: string }>(
|
||||
`/sessions/${sessionId}/triggers/${triggerId}/activate`,
|
||||
),
|
||||
|
||||
deactivateTrigger: (sessionId: string, triggerId: string) =>
|
||||
api.post<{ status: string; trigger_id: string }>(
|
||||
`/sessions/${sessionId}/triggers/${triggerId}/deactivate`,
|
||||
),
|
||||
|
||||
colonies: (sessionId: string) =>
|
||||
api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),
|
||||
|
||||
|
||||
@@ -176,56 +176,6 @@ export interface GraphTopology {
|
||||
entry_points?: EntryPoint[];
|
||||
}
|
||||
|
||||
// --- Draft graph types (planning phase) ---
|
||||
|
||||
export interface DraftNode {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
node_type: string;
|
||||
tools: string[];
|
||||
input_keys: string[];
|
||||
output_keys: string[];
|
||||
success_criteria: string;
|
||||
sub_agents: string[];
|
||||
/** For decision nodes: the yes/no question evaluated during dissolution. */
|
||||
decision_clause?: string;
|
||||
flowchart_type: string;
|
||||
flowchart_shape: string;
|
||||
flowchart_color: string;
|
||||
}
|
||||
|
||||
export interface DraftEdge {
|
||||
id: string;
|
||||
source: string;
|
||||
target: string;
|
||||
condition: string;
|
||||
description: string;
|
||||
/** Short label shown on the flowchart edge (e.g. "Yes", "No"). */
|
||||
label?: string;
|
||||
}
|
||||
|
||||
export interface DraftGraph {
|
||||
agent_name: string;
|
||||
goal: string;
|
||||
description: string;
|
||||
success_criteria: string[];
|
||||
constraints: string[];
|
||||
nodes: DraftNode[];
|
||||
edges: DraftEdge[];
|
||||
entry_node: string;
|
||||
terminal_nodes: string[];
|
||||
flowchart_legend: Record<string, { shape: string; color: string }>;
|
||||
}
|
||||
|
||||
/** Mapping from runtime graph nodes → original flowchart draft nodes. */
|
||||
export interface FlowchartMap {
|
||||
/** runtime_node_id → list of original draft node IDs it absorbed. */
|
||||
map: Record<string, string[]> | null;
|
||||
/** Original draft graph preserved before planning-node dissolution (decision + subagent). */
|
||||
original_draft: DraftGraph | null;
|
||||
}
|
||||
|
||||
export interface NodeCriteria {
|
||||
node_id: string;
|
||||
success_criteria: string | null;
|
||||
|
||||
@@ -93,6 +93,8 @@ interface ChatPanelProps {
|
||||
showQueenPhaseBadge?: boolean;
|
||||
/** Context window usage for queen and workers */
|
||||
contextUsage?: Record<string, ContextUsageEntry>;
|
||||
/** One-shot composer prefill. Applied to the textarea whenever the value changes. */
|
||||
initialDraft?: string | null;
|
||||
}
|
||||
|
||||
const queenColor = "hsl(45,95%,58%)";
|
||||
@@ -394,6 +396,7 @@ export default function ChatPanel({
|
||||
showQueenPhaseBadge = true,
|
||||
contextUsage,
|
||||
supportsImages = true,
|
||||
initialDraft,
|
||||
}: ChatPanelProps) {
|
||||
const [input, setInput] = useState("");
|
||||
const [pendingImages, setPendingImages] = useState<ImageContent[]>([]);
|
||||
@@ -403,6 +406,21 @@ export default function ChatPanel({
|
||||
const stickToBottom = useRef(true);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const lastAppliedDraftRef = useRef<string | null | undefined>(undefined);
|
||||
|
||||
useEffect(() => {
|
||||
if (!initialDraft || initialDraft === lastAppliedDraftRef.current) return;
|
||||
lastAppliedDraftRef.current = initialDraft;
|
||||
setInput(initialDraft);
|
||||
setTimeout(() => {
|
||||
const ta = textareaRef.current;
|
||||
if (!ta) return;
|
||||
ta.focus();
|
||||
ta.style.height = "auto";
|
||||
ta.style.height = `${Math.min(ta.scrollHeight, 160)}px`;
|
||||
ta.selectionStart = ta.selectionEnd = ta.value.length;
|
||||
}, 0);
|
||||
}, [initialDraft]);
|
||||
|
||||
const threadMessages = messages.filter((m) => {
|
||||
if (m.type === "system" && !m.thread) return false;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,551 +0,0 @@
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import { X, Cpu, Zap, Clock, RotateCcw, CheckCircle2, AlertCircle, Loader2, ChevronDown, ChevronRight, Copy, Check, Terminal, Wrench, BookOpen, GitBranch, Bot } from "lucide-react";
|
||||
import type { GraphNode, NodeStatus } from "./graph-types";
|
||||
import type { NodeSpec, ToolInfo, NodeCriteria } from "../api/types";
|
||||
import { workersApi } from "../api/workers";
|
||||
import { logsApi } from "../api/logs";
|
||||
import MarkdownContent from "./MarkdownContent";
|
||||
|
||||
interface Tool {
|
||||
name: string;
|
||||
description: string;
|
||||
icon: string;
|
||||
credentials?: ToolCredential[];
|
||||
}
|
||||
|
||||
interface ToolCredential {
|
||||
key: string;
|
||||
label: string;
|
||||
connected: boolean;
|
||||
value?: string;
|
||||
}
|
||||
|
||||
export interface SubagentReport {
|
||||
subagent_id: string;
|
||||
message: string;
|
||||
data?: Record<string, unknown>;
|
||||
timestamp: string;
|
||||
status?: "running" | "complete" | "error";
|
||||
}
|
||||
|
||||
interface ContextUsage {
|
||||
usagePct: number;
|
||||
messageCount: number;
|
||||
estimatedTokens: number;
|
||||
maxTokens: number;
|
||||
}
|
||||
|
||||
interface NodeDetailPanelProps {
|
||||
node: GraphNode | null;
|
||||
nodeSpec?: NodeSpec | null;
|
||||
allNodeSpecs?: NodeSpec[];
|
||||
subagentReports?: SubagentReport[];
|
||||
sessionId?: string;
|
||||
colonyId?: string;
|
||||
workerSessionId?: string | null;
|
||||
nodeLogs?: string[];
|
||||
actionPlan?: string;
|
||||
contextUsage?: ContextUsage;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
const statusConfig: Record<NodeStatus, { label: string; color: string; Icon: React.FC<{ className?: string }> }> = {
|
||||
running: { label: "Running", color: "hsl(45,95%,58%)", Icon: ({ className }) => <Loader2 className={`${className} animate-spin`} /> },
|
||||
looping: { label: "Looping", color: "hsl(38,90%,55%)", Icon: ({ className }) => <RotateCcw className={`${className} animate-spin`} style={{ animationDuration: "2s" }} /> },
|
||||
complete: { label: "Complete", color: "hsl(43,70%,45%)", Icon: ({ className }) => <CheckCircle2 className={className} /> },
|
||||
pending: { label: "Pending", color: "hsl(220,15%,45%)", Icon: ({ className }) => <Clock className={className} /> },
|
||||
error: { label: "Error", color: "hsl(0,65%,55%)", Icon: ({ className }) => <AlertCircle className={className} /> },
|
||||
};
|
||||
|
||||
function formatNodeId(id: string): string {
|
||||
return id.split("-").map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
||||
}
|
||||
|
||||
function CredentialRow({ cred }: { cred: ToolCredential }) {
|
||||
return (
|
||||
<div className="flex items-center justify-between px-3 py-2 rounded-lg bg-background/60 border border-border/30 mt-1.5">
|
||||
<div className="flex items-center gap-2 min-w-0">
|
||||
<span className={`w-1.5 h-1.5 rounded-full flex-shrink-0 ${cred.connected ? "bg-primary" : "bg-muted-foreground/40"}`} />
|
||||
<span className="text-[11px] text-muted-foreground font-medium truncate">{cred.label}</span>
|
||||
</div>
|
||||
{cred.connected ? (
|
||||
<span className="text-[10px] text-primary/80 font-medium flex-shrink-0 ml-2">Connected</span>
|
||||
) : (
|
||||
<button className="text-[10px] px-2 py-0.5 rounded-md bg-primary/15 text-primary border border-primary/25 font-semibold hover:bg-primary/25 transition-colors flex-shrink-0 ml-2">
|
||||
Connect
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ToolRow({ tool }: { tool: Tool }) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const hasCreds = tool.credentials && tool.credentials.length > 0;
|
||||
|
||||
return (
|
||||
<div className="rounded-xl border border-border/20 overflow-hidden">
|
||||
<button
|
||||
onClick={() => hasCreds && setExpanded(v => !v)}
|
||||
className={`w-full flex items-start gap-3 p-3 bg-muted/30 hover:bg-muted/50 transition-colors text-left ${!hasCreds ? "cursor-default" : ""}`}
|
||||
>
|
||||
<span className="text-base leading-none mt-0.5 flex-shrink-0">{tool.icon}</span>
|
||||
<div className="min-w-0 flex-1">
|
||||
<p className="text-xs font-medium text-foreground">{tool.name}</p>
|
||||
<p className="text-[11px] text-muted-foreground mt-0.5 leading-relaxed">{tool.description}</p>
|
||||
</div>
|
||||
{hasCreds && (
|
||||
<span className="flex-shrink-0 mt-0.5">
|
||||
{expanded
|
||||
? <ChevronDown className="w-3 h-3 text-muted-foreground" />
|
||||
: <ChevronRight className="w-3 h-3 text-muted-foreground" />
|
||||
}
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
{expanded && hasCreds && (
|
||||
<div className="px-3 pb-3 bg-muted/20 border-t border-border/15">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mt-2 mb-1">Credentials</p>
|
||||
{tool.credentials!.map(cred => (
|
||||
<CredentialRow key={cred.key} cred={cred} />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function LogsTab({ nodeId, isActive: _isActive, sessionId, colonyId, workerSessionId, nodeLogs }: { nodeId: string; isActive: boolean; sessionId?: string; colonyId?: string; workerSessionId?: string | null; nodeLogs?: string[] }) {
|
||||
const [historicalLines, setHistoricalLines] = useState<string[]>([]);
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Fetch historical logs when session is available (post-execution viewing)
|
||||
useEffect(() => {
|
||||
if (sessionId && colonyId && workerSessionId) {
|
||||
logsApi.nodeLogs(sessionId, colonyId, nodeId, workerSessionId)
|
||||
.then(r => {
|
||||
const realLines: string[] = [];
|
||||
if (r.details) {
|
||||
for (const d of r.details) {
|
||||
realLines.push(`[LOG] ${d.node_name} — ${d.success ? "SUCCESS" : "FAILED"}${d.error ? ` (${d.error})` : ""} — ${d.total_steps} steps`);
|
||||
}
|
||||
}
|
||||
if (r.tool_logs) {
|
||||
for (const s of r.tool_logs) {
|
||||
realLines.push(`[STEP ${s.step_index}] ${s.llm_text.slice(0, 120)}${s.llm_text.length > 120 ? "..." : ""}`);
|
||||
}
|
||||
}
|
||||
if (realLines.length > 0) {
|
||||
setHistoricalLines(realLines);
|
||||
}
|
||||
})
|
||||
.catch(() => { /* keep fallback on error */ });
|
||||
}
|
||||
}, [sessionId, colonyId, nodeId, workerSessionId]);
|
||||
|
||||
// Resolve which lines to display: live SSE logs > historical > default
|
||||
const lines = (nodeLogs && nodeLogs.length > 0)
|
||||
? nodeLogs
|
||||
: historicalLines.length > 0
|
||||
? historicalLines
|
||||
: ["[--:--:--] INFO Awaiting execution..."];
|
||||
|
||||
useEffect(() => {
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [lines]);
|
||||
|
||||
return (
|
||||
<div className="flex-1 overflow-auto bg-background/80 rounded-xl border border-border/20 font-mono text-[10.5px] leading-relaxed p-3">
|
||||
{lines.map((line, i) => {
|
||||
const isWarn = line.includes(" WARN ");
|
||||
const isErr = line.includes(" ERROR ");
|
||||
const isDebug = line.includes(" DEBUG ");
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className={isErr ? "text-red-400" : isWarn ? "text-yellow-400/80" : isDebug ? "text-muted-foreground/50" : "text-green-400/70"}
|
||||
>
|
||||
{line}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
<div ref={bottomRef} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SystemPromptTab({ systemPrompt }: { systemPrompt?: string }) {
|
||||
const prompt = systemPrompt || "";
|
||||
const [copied, setCopied] = useState(false);
|
||||
|
||||
const handleCopy = () => {
|
||||
navigator.clipboard.writeText(prompt);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 1500);
|
||||
};
|
||||
|
||||
if (!prompt) {
|
||||
return (
|
||||
<div className="flex-1 flex items-center justify-center">
|
||||
<p className="text-xs text-muted-foreground/60 italic text-center">No system prompt configured</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex-1 overflow-auto flex flex-col gap-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">System Prompt</p>
|
||||
<button
|
||||
onClick={handleCopy}
|
||||
className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors"
|
||||
>
|
||||
{copied ? <Check className="w-3 h-3 text-primary" /> : <Copy className="w-3 h-3" />}
|
||||
{copied ? "Copied" : "Copy"}
|
||||
</button>
|
||||
</div>
|
||||
<textarea
|
||||
readOnly
|
||||
value={prompt}
|
||||
className="flex-1 min-h-[240px] w-full rounded-xl bg-muted/30 border border-border/20 text-[11px] text-muted-foreground leading-relaxed p-3 font-mono resize-none focus:outline-none focus:border-border/40"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SubagentStatusBadge({ status }: { status?: "running" | "complete" | "error" }) {
|
||||
if (!status) return null;
|
||||
if (status === "running") {
|
||||
return (
|
||||
<span className="ml-auto flex items-center gap-1 text-[10px] font-medium flex-shrink-0" style={{ color: "hsl(45,95%,58%)" }}>
|
||||
<span className="relative flex h-1.5 w-1.5">
|
||||
<span className="animate-ping absolute inline-flex h-full w-full rounded-full opacity-75" style={{ backgroundColor: "hsl(45,95%,58%)" }} />
|
||||
<span className="relative inline-flex rounded-full h-1.5 w-1.5" style={{ backgroundColor: "hsl(45,95%,58%)" }} />
|
||||
</span>
|
||||
Running
|
||||
</span>
|
||||
);
|
||||
}
|
||||
if (status === "complete") {
|
||||
return (
|
||||
<span className="ml-auto flex items-center gap-1 text-[10px] font-medium flex-shrink-0" style={{ color: "hsl(43,70%,45%)" }}>
|
||||
<CheckCircle2 className="w-3 h-3" />
|
||||
Complete
|
||||
</span>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<span className="ml-auto flex items-center gap-1 text-[10px] font-medium flex-shrink-0" style={{ color: "hsl(0,65%,55%)" }}>
|
||||
<AlertCircle className="w-3 h-3" />
|
||||
Failed
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function SubagentsTab({ subAgentIds, allNodeSpecs, subagentReports }: { subAgentIds: string[]; allNodeSpecs: NodeSpec[]; subagentReports: SubagentReport[] }) {
|
||||
if (subAgentIds.length === 0) {
|
||||
return (
|
||||
<div className="flex-1 flex items-center justify-center">
|
||||
<p className="text-xs text-muted-foreground/60 italic text-center">No subagents assigned to this node.</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1">Sub-agents ({subAgentIds.length})</p>
|
||||
{subAgentIds.map(saId => {
|
||||
const spec = allNodeSpecs.find(n => n.id === saId);
|
||||
const reports = subagentReports.filter(r => r.subagent_id === saId);
|
||||
// Derive status from latest report that has a status field
|
||||
const latestStatus = [...reports].reverse().find(r => r.status)?.status;
|
||||
// Progress messages are reports without a status field (from report_to_parent)
|
||||
const progressReports = reports.filter(r => !r.status);
|
||||
|
||||
return (
|
||||
<div key={saId} className="rounded-xl border border-border/20 overflow-hidden">
|
||||
<div className="p-3 bg-muted/30">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Bot className="w-3.5 h-3.5 text-primary/70 flex-shrink-0" />
|
||||
<span className="text-xs font-medium text-foreground truncate">{spec?.name || saId}</span>
|
||||
<SubagentStatusBadge status={latestStatus} />
|
||||
</div>
|
||||
{spec?.description && (
|
||||
<p className="text-[11px] text-muted-foreground leading-relaxed mt-1">{spec.description}</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Static info: tools + output keys */}
|
||||
<div className="px-3 py-2 border-t border-border/15 bg-muted/15">
|
||||
{spec?.tools && spec.tools.length > 0 && (
|
||||
<div className="mb-1.5">
|
||||
<span className="text-[10px] text-muted-foreground font-medium">Tools: </span>
|
||||
<span className="text-[10px] text-foreground/70">{spec.tools.join(", ")}</span>
|
||||
</div>
|
||||
)}
|
||||
{spec?.output_keys && spec.output_keys.length > 0 && (
|
||||
<div>
|
||||
<span className="text-[10px] text-muted-foreground font-medium">Outputs: </span>
|
||||
<span className="text-[10px] text-foreground/70 font-mono">{spec.output_keys.join(", ")}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Live progress reports (from report_to_parent) */}
|
||||
{progressReports.length > 0 && (
|
||||
<div className="px-3 py-2 border-t border-border/15 bg-background/60">
|
||||
<p className="text-[10px] text-muted-foreground font-medium mb-1">Reports ({progressReports.length})</p>
|
||||
{progressReports.map((r, i) => (
|
||||
<div key={i} className="text-[10.5px] text-foreground/70 leading-relaxed py-0.5">{r.message}</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
type Tab = "overview" | "breakdown" | "tools" | "logs" | "subagents";
|
||||
|
||||
const tabs: { id: Tab; label: string; Icon: React.FC<{ className?: string }> }[] = [
|
||||
{ id: "overview", label: "Overview", Icon: ({ className }) => <GitBranch className={className} /> },
|
||||
{ id: "breakdown", label: "Breakdown", Icon: ({ className }) => <BookOpen className={className} /> },
|
||||
{ id: "tools", label: "Tools", Icon: ({ className }) => <Wrench className={className} /> },
|
||||
{ id: "logs", label: "Logs", Icon: ({ className }) => <Terminal className={className} /> },
|
||||
{ id: "subagents", label: "Subagents", Icon: ({ className }) => <Bot className={className} /> },
|
||||
];
|
||||
|
||||
export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagentReports, sessionId, colonyId, workerSessionId, nodeLogs, actionPlan, contextUsage, onClose }: NodeDetailPanelProps) {
|
||||
const [activeTab, setActiveTab] = useState<Tab>("overview");
|
||||
const [realTools, setRealTools] = useState<ToolInfo[] | null>(null);
|
||||
const [realCriteria, setRealCriteria] = useState<NodeCriteria | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
setActiveTab("overview");
|
||||
setRealTools(null);
|
||||
setRealCriteria(null);
|
||||
}, [node?.id]);
|
||||
|
||||
// Fetch real tool descriptions when Tools tab is active and session is loaded
|
||||
useEffect(() => {
|
||||
if (activeTab === "tools" && sessionId && colonyId && node) {
|
||||
workersApi.nodeTools(sessionId, colonyId, node.id)
|
||||
.then(r => setRealTools(r.tools))
|
||||
.catch(() => setRealTools(null));
|
||||
}
|
||||
}, [activeTab, sessionId, colonyId, node?.id]);
|
||||
|
||||
// Fetch real criteria when Overview tab is active and session is loaded
|
||||
useEffect(() => {
|
||||
if (activeTab === "breakdown" && sessionId && colonyId && node) {
|
||||
workersApi.nodeCriteria(sessionId, colonyId, node.id, workerSessionId || undefined)
|
||||
.then(r => setRealCriteria(r))
|
||||
.catch(() => setRealCriteria(null));
|
||||
}
|
||||
}, [activeTab, sessionId, colonyId, node?.id, workerSessionId]);
|
||||
|
||||
if (!node) return null;
|
||||
|
||||
const status = statusConfig[node.status];
|
||||
const StatusIcon = status.Icon;
|
||||
const isActive = node.status === "running" || node.status === "looping";
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full border-l border-border/40 bg-card/20 animate-in slide-in-from-right">
|
||||
{/* Header */}
|
||||
<div className="px-4 pt-4 pb-3 border-b border-border/30 flex items-start justify-between gap-2 flex-shrink-0">
|
||||
<div className="flex items-start gap-3 min-w-0">
|
||||
<div
|
||||
className="w-8 h-8 rounded-lg flex items-center justify-center flex-shrink-0 mt-0.5"
|
||||
style={{ backgroundColor: `${status.color}18`, border: `1.5px solid ${status.color}35` }}
|
||||
>
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color: status.color }} />
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<h3 className="text-sm font-semibold text-foreground leading-tight">{formatNodeId(node.id)}</h3>
|
||||
<div className="flex items-center gap-1.5 mt-1">
|
||||
<span style={{ color: status.color }}><StatusIcon className="w-3 h-3 flex-shrink-0" /></span>
|
||||
<span className="text-[11px] font-medium" style={{ color: status.color }}>{status.label}</span>
|
||||
{node.iterations !== undefined && node.iterations > 0 && (
|
||||
<>
|
||||
<span className="text-muted-foreground/40 text-[10px]">·</span>
|
||||
<span className="text-[11px] text-muted-foreground">
|
||||
{node.iterations}{node.maxIterations ? `/${node.maxIterations}` : ""} iterations
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors flex-shrink-0"
|
||||
>
|
||||
<X className="w-3.5 h-3.5" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Status label */}
|
||||
{node.statusLabel && (
|
||||
<div className="px-4 py-2 border-b border-border/20 flex-shrink-0">
|
||||
<div className="flex items-center gap-2 text-[11px] text-muted-foreground bg-muted/40 rounded-lg px-3 py-2">
|
||||
<Zap className="w-3 h-3 text-primary flex-shrink-0" />
|
||||
<span className="italic">{node.statusLabel}</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Context window usage */}
|
||||
{contextUsage && (
|
||||
<div className="px-4 py-2 border-b border-border/20 flex-shrink-0">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className="text-[10px] text-muted-foreground font-medium">Context</span>
|
||||
<span className="text-[10px] text-muted-foreground/70 ml-auto">
|
||||
{(contextUsage.estimatedTokens / 1000).toFixed(1)}k / {(contextUsage.maxTokens / 1000).toFixed(0)}k tokens
|
||||
</span>
|
||||
</div>
|
||||
<div className="w-full h-1.5 rounded-full bg-muted/50 overflow-hidden">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(contextUsage.usagePct, 100)}%`,
|
||||
backgroundColor: contextUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: contextUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(45,95%,58%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-1">
|
||||
<span className="text-[10px] text-muted-foreground/60">{contextUsage.messageCount} messages</span>
|
||||
<span className="text-[10px] font-medium ml-auto" style={{
|
||||
color: contextUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: contextUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(45,95%,58%)",
|
||||
}}>
|
||||
{contextUsage.usagePct}%
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Tab bar */}
|
||||
<div className="flex border-b border-border/30 flex-shrink-0 px-2 pt-1 overflow-x-auto scrollbar-hide">
|
||||
{tabs.filter(t => t.id !== "subagents" || (nodeSpec?.sub_agents && nodeSpec.sub_agents.length > 0)).map(tab => (
|
||||
<button
|
||||
key={tab.id}
|
||||
onClick={() => setActiveTab(tab.id)}
|
||||
className={`flex items-center gap-1.5 px-3 py-2 text-[11px] font-medium border-b-2 transition-colors -mb-px ${
|
||||
activeTab === tab.id
|
||||
? "border-primary text-primary"
|
||||
: "border-transparent text-muted-foreground hover:text-foreground"
|
||||
}`}
|
||||
>
|
||||
<tab.Icon className="w-3 h-3" />
|
||||
{tab.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Tab content */}
|
||||
<div className="flex-1 overflow-auto px-4 py-4 flex flex-col gap-3">
|
||||
{activeTab === "overview" && (
|
||||
<SystemPromptTab systemPrompt={nodeSpec?.system_prompt} />
|
||||
)}
|
||||
|
||||
{activeTab === "breakdown" && (
|
||||
<>
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">Action Plan</p>
|
||||
{actionPlan ? (
|
||||
<div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5 text-[11px] leading-relaxed text-foreground/80">
|
||||
<MarkdownContent content={actionPlan} />
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center justify-center py-6">
|
||||
<p className="text-[11px] text-muted-foreground/50 italic">Action plan will appear when node starts running</p>
|
||||
</div>
|
||||
)}
|
||||
{(() => {
|
||||
if (realCriteria && realCriteria.success_criteria) {
|
||||
const criteriaLines = realCriteria.success_criteria.split("\n").filter(l => l.trim());
|
||||
const passed = realCriteria.last_execution?.success ?? null;
|
||||
return (
|
||||
<div className="mt-1">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">Judge Criteria</p>
|
||||
{passed !== null && (
|
||||
<span className={`text-[10px] font-medium px-2 py-0.5 rounded-full ${passed ? "bg-[hsl(43,70%,45%)]/15 text-[hsl(43,70%,45%)]" : "bg-red-500/15 text-red-400"}`}>
|
||||
{passed ? "Passed" : "Failed"}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-col gap-1.5">
|
||||
{criteriaLines.map((line, i) => (
|
||||
<div key={i} className="flex items-start gap-2">
|
||||
<div className={`mt-0.5 w-3.5 h-3.5 rounded-full flex-shrink-0 flex items-center justify-center border ${passed ? "border-transparent bg-[hsl(43,70%,45%)]" : "border-border/40 bg-muted/30"}`}>
|
||||
{passed && (
|
||||
<svg viewBox="0 0 8 8" className="w-2 h-2" fill="none">
|
||||
<path d="M1.5 4l2 2 3-3" stroke="white" strokeWidth="1.2" strokeLinecap="round" strokeLinejoin="round"/>
|
||||
</svg>
|
||||
)}
|
||||
</div>
|
||||
<span className={`text-[11px] leading-relaxed ${passed ? "text-foreground/70" : "text-foreground/80"}`}>{line}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return null;
|
||||
})()}
|
||||
{node.next && node.next.length > 0 && (
|
||||
<div className="mt-2">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-2">Sends to</p>
|
||||
<div className="flex flex-wrap gap-1.5">
|
||||
{node.next.map((n) => (
|
||||
<span key={n} className="text-[11px] px-2.5 py-1 rounded-full bg-primary/10 text-primary border border-primary/20 font-medium">
|
||||
{formatNodeId(n)}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{activeTab === "tools" && (
|
||||
<div className="space-y-2">
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1">Tools & Integrations</p>
|
||||
{realTools && realTools.length > 0
|
||||
? realTools.map((t, i) => (
|
||||
<ToolRow key={i} tool={{ name: t.name, description: t.description || "No description available", icon: "\ud83d\udd27" }} />
|
||||
))
|
||||
: (
|
||||
<div className="flex items-center justify-center py-6">
|
||||
<p className="text-[11px] text-muted-foreground/50 italic">No tools available</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeTab === "logs" && (
|
||||
<LogsTab nodeId={node.id} isActive={isActive} sessionId={sessionId} colonyId={colonyId} workerSessionId={workerSessionId} nodeLogs={nodeLogs} />
|
||||
)}
|
||||
|
||||
{activeTab === "subagents" && nodeSpec?.sub_agents && (
|
||||
<SubagentsTab
|
||||
subAgentIds={nodeSpec.sub_agents}
|
||||
allNodeSpecs={allNodeSpecs || []}
|
||||
subagentReports={subagentReports || []}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { X, Eye, EyeOff, Check, Trash2, ChevronDown, Zap, ThumbsUp } from "lucide-react";
|
||||
import { X, Eye, EyeOff, Check, Pencil, ChevronDown, Zap, ThumbsUp, Loader2, AlertCircle } from "lucide-react";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useTheme } from "@/context/ThemeContext";
|
||||
import { useModel, LLM_PROVIDERS } from "@/context/ModelContext";
|
||||
import { credentialsApi } from "@/api/credentials";
|
||||
import type { ModelOption } from "@/api/config";
|
||||
|
||||
interface SettingsModalProps {
|
||||
@@ -21,7 +22,6 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
availableModels,
|
||||
setModel,
|
||||
saveProviderKey,
|
||||
removeProviderKey,
|
||||
subscriptions,
|
||||
detectedSubscriptions,
|
||||
activeSubscription,
|
||||
@@ -40,6 +40,11 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
const [showKey, setShowKey] = useState(false);
|
||||
const [saving, setSaving] = useState(false);
|
||||
|
||||
// Validation state per provider: "validating" | {valid, message}
|
||||
const [validation, setValidation] = useState<
|
||||
Record<string, "validating" | { valid: boolean | null; message: string }>
|
||||
>({});
|
||||
|
||||
// Model selection state
|
||||
const [modelDropdownOpen, setModelDropdownOpen] = useState(false);
|
||||
|
||||
@@ -75,26 +80,71 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
};
|
||||
|
||||
const handleSaveKey = async (providerId: string) => {
|
||||
if (!keyInput.trim()) return;
|
||||
const trimmedKey = keyInput.trim();
|
||||
if (!trimmedKey) return;
|
||||
setSaving(true);
|
||||
setValidation((v) => ({ ...v, [providerId]: "validating" }));
|
||||
|
||||
// Validate first — only persist the key if validation passes or is inconclusive.
|
||||
const validateResult = await credentialsApi
|
||||
.validateKey(providerId, trimmedKey)
|
||||
.catch(() => ({ valid: null as boolean | null, message: "Could not verify key" }));
|
||||
|
||||
if (validateResult.valid === false) {
|
||||
// Key is definitively invalid — don't save it.
|
||||
setSaving(false);
|
||||
setValidation((v) => ({
|
||||
...v,
|
||||
[providerId]: { valid: false, message: validateResult.message },
|
||||
}));
|
||||
setTimeout(() => {
|
||||
setValidation((v) => {
|
||||
const next = { ...v };
|
||||
delete next[providerId];
|
||||
return next;
|
||||
});
|
||||
}, 4000);
|
||||
return;
|
||||
}
|
||||
|
||||
// Validation passed or was inconclusive — save the key.
|
||||
try {
|
||||
await saveProviderKey(providerId, keyInput.trim());
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
setShowKey(false);
|
||||
await saveProviderKey(providerId, trimmedKey);
|
||||
} catch (err) {
|
||||
console.error("Failed to save key:", err);
|
||||
} finally {
|
||||
setSaving(false);
|
||||
setValidation((v) => ({
|
||||
...v,
|
||||
[providerId]: { valid: false, message: "Failed to save key" },
|
||||
}));
|
||||
setTimeout(() => {
|
||||
setValidation((v) => {
|
||||
const next = { ...v };
|
||||
delete next[providerId];
|
||||
return next;
|
||||
});
|
||||
}, 4000);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
const handleRemoveKey = async (providerId: string) => {
|
||||
try {
|
||||
await removeProviderKey(providerId);
|
||||
} catch (err) {
|
||||
console.error("Failed to remove key:", err);
|
||||
}
|
||||
setSaving(false);
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
setShowKey(false);
|
||||
|
||||
setValidation((v) => ({
|
||||
...v,
|
||||
[providerId]: { valid: validateResult.valid, message: validateResult.message },
|
||||
}));
|
||||
|
||||
// Auto-clear validation result after 4s
|
||||
setTimeout(() => {
|
||||
setValidation((v) => {
|
||||
const next = { ...v };
|
||||
delete next[providerId];
|
||||
return next;
|
||||
});
|
||||
}, 4000);
|
||||
};
|
||||
|
||||
const handleSelectModel = async (provider: string, modelId: string) => {
|
||||
@@ -391,6 +441,7 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
{LLM_PROVIDERS.map((provider) => {
|
||||
const isConnected = connectedProviders.has(provider.id);
|
||||
const isEditing = editingProvider === provider.id;
|
||||
const providerValidation = validation[provider.id];
|
||||
|
||||
return (
|
||||
<div key={provider.id}>
|
||||
@@ -415,16 +466,37 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
{/* Action */}
|
||||
{isConnected && !isEditing ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="flex items-center gap-1 text-xs text-green-500 font-medium">
|
||||
<Check className="w-3 h-3" />
|
||||
Connected
|
||||
</span>
|
||||
{providerValidation === "validating" ? (
|
||||
<span className="flex items-center gap-1 text-xs text-muted-foreground font-medium">
|
||||
<Loader2 className="w-3 h-3 animate-spin" />
|
||||
Verifying...
|
||||
</span>
|
||||
) : providerValidation && typeof providerValidation === "object" && providerValidation.valid === false ? (
|
||||
<span className="flex items-center gap-1 text-xs text-red-400 font-medium" title={providerValidation.message}>
|
||||
<AlertCircle className="w-3 h-3" />
|
||||
Invalid key
|
||||
</span>
|
||||
) : providerValidation && typeof providerValidation === "object" && providerValidation.valid === true ? (
|
||||
<span className="flex items-center gap-1 text-xs text-green-500 font-medium">
|
||||
<Check className="w-3 h-3" />
|
||||
Verified
|
||||
</span>
|
||||
) : (
|
||||
<span className="flex items-center gap-1 text-xs text-green-500 font-medium">
|
||||
<Check className="w-3 h-3" />
|
||||
Connected
|
||||
</span>
|
||||
)}
|
||||
<button
|
||||
onClick={() => handleRemoveKey(provider.id)}
|
||||
className="p-1 rounded text-muted-foreground/40 hover:text-red-400 transition-colors"
|
||||
title="Remove key"
|
||||
onClick={() => {
|
||||
setEditingProvider(provider.id);
|
||||
setKeyInput("");
|
||||
setShowKey(false);
|
||||
}}
|
||||
className="p-1 rounded text-muted-foreground/40 hover:text-foreground transition-colors"
|
||||
title="Change key"
|
||||
>
|
||||
<Trash2 className="w-3.5 h-3.5" />
|
||||
<Pencil className="w-3.5 h-3.5" />
|
||||
</button>
|
||||
</div>
|
||||
) : !isEditing ? (
|
||||
@@ -443,50 +515,65 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
|
||||
|
||||
{/* Inline key entry */}
|
||||
{isEditing && (
|
||||
<div className="ml-12 mr-2 mb-2 flex items-center gap-2">
|
||||
<div className="relative flex-1">
|
||||
<input
|
||||
type={showKey ? "text" : "password"}
|
||||
value={keyInput}
|
||||
onChange={(e) => setKeyInput(e.target.value)}
|
||||
placeholder={`Enter ${provider.name} API key`}
|
||||
autoFocus
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") handleSaveKey(provider.id);
|
||||
if (e.key === "Escape") {
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
}
|
||||
}}
|
||||
className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 pr-9 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40 font-mono"
|
||||
/>
|
||||
<div className="ml-12 mr-2 mb-2 flex flex-col gap-1.5">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="relative flex-1">
|
||||
<input
|
||||
type={showKey ? "text" : "password"}
|
||||
value={keyInput}
|
||||
onChange={(e) => setKeyInput(e.target.value)}
|
||||
placeholder={`Enter ${provider.name} API key`}
|
||||
autoFocus
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") handleSaveKey(provider.id);
|
||||
if (e.key === "Escape") {
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
}
|
||||
}}
|
||||
className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 pr-9 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40 font-mono"
|
||||
/>
|
||||
<button
|
||||
onClick={() => setShowKey(!showKey)}
|
||||
className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-foreground transition-colors"
|
||||
>
|
||||
{showKey ? (
|
||||
<EyeOff className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<Eye className="w-3.5 h-3.5" />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => setShowKey(!showKey)}
|
||||
className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-foreground transition-colors"
|
||||
onClick={() => handleSaveKey(provider.id)}
|
||||
disabled={!keyInput.trim() || saving}
|
||||
className="px-3 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{showKey ? (
|
||||
<EyeOff className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<Eye className="w-3.5 h-3.5" />
|
||||
)}
|
||||
{saving ? "..." : "Save"}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
}}
|
||||
className="px-3 py-2 rounded-lg text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30 transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => handleSaveKey(provider.id)}
|
||||
disabled={!keyInput.trim() || saving}
|
||||
className="px-3 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{saving ? "..." : "Save"}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
setEditingProvider(null);
|
||||
setKeyInput("");
|
||||
}}
|
||||
className="px-3 py-2 rounded-lg text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30 transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
{/* Validation feedback inside editing mode */}
|
||||
{providerValidation === "validating" && (
|
||||
<span className="flex items-center gap-1 text-xs text-muted-foreground font-medium">
|
||||
<Loader2 className="w-3 h-3 animate-spin" />
|
||||
Verifying...
|
||||
</span>
|
||||
)}
|
||||
{providerValidation && typeof providerValidation === "object" && providerValidation.valid === false && (
|
||||
<span className="flex items-center gap-1 text-xs text-red-400 font-medium">
|
||||
<AlertCircle className="w-3 h-3" />
|
||||
{providerValidation.message}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
import { useState } from "react";
|
||||
import { X, Webhook, Clock, Activity, ArrowRight, Zap, Play, Square, Loader2 } from "lucide-react";
|
||||
import type { GraphNode } from "./graph-types";
|
||||
import { cronToLabel } from "@/lib/graphUtils";
|
||||
import { sessionsApi } from "@/api/sessions";
|
||||
|
||||
interface TriggerDetailPanelProps {
|
||||
trigger: GraphNode;
|
||||
sessionId: string;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
function TriggerIcon({ type }: { type?: string }) {
|
||||
const cls = "w-4 h-4";
|
||||
switch (type) {
|
||||
case "webhook":
|
||||
return <Webhook className={cls} />;
|
||||
case "timer":
|
||||
return <Clock className={cls} />;
|
||||
case "api":
|
||||
return <ArrowRight className={cls} />;
|
||||
case "event":
|
||||
return <Activity className={cls} />;
|
||||
default:
|
||||
return <Zap className={cls} />;
|
||||
}
|
||||
}
|
||||
|
||||
function formatCountdown(seconds: number): string {
|
||||
const h = Math.floor(seconds / 3600);
|
||||
const m = Math.floor((seconds % 3600) / 60);
|
||||
const s = Math.floor(seconds % 60);
|
||||
if (h > 0) return `${h}h ${String(m).padStart(2, "0")}m ${String(s).padStart(2, "0")}s`;
|
||||
if (m > 0) return `${m}m ${String(s).padStart(2, "0")}s`;
|
||||
return `${s}s`;
|
||||
}
|
||||
|
||||
export default function TriggerDetailPanel({ trigger, sessionId, onClose }: TriggerDetailPanelProps) {
|
||||
const [busy, setBusy] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const isActive = trigger.status === "running" || trigger.status === "complete";
|
||||
const config = (trigger.triggerConfig || {}) as Record<string, unknown>;
|
||||
const cron = config.cron as string | undefined;
|
||||
const interval = config.interval_minutes as number | undefined;
|
||||
const nextFireIn = config.next_fire_in as number | undefined;
|
||||
const triggerId = trigger.id.replace(/^__trigger_/, "");
|
||||
|
||||
const handleToggle = async () => {
|
||||
if (!sessionId || busy) return;
|
||||
setBusy(true);
|
||||
setError(null);
|
||||
try {
|
||||
if (isActive) {
|
||||
await sessionsApi.deactivateTrigger(sessionId, triggerId);
|
||||
} else {
|
||||
await sessionsApi.activateTrigger(sessionId, triggerId);
|
||||
}
|
||||
// The SSE TRIGGER_ACTIVATED / TRIGGER_DEACTIVATED event will flip
|
||||
// the card status; we don't need to set local state here.
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
setError(msg);
|
||||
} finally {
|
||||
setBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
const schedule = cron
|
||||
? cronToLabel(cron)
|
||||
: interval != null
|
||||
? interval >= 60
|
||||
? `Every ${interval / 60}h`
|
||||
: `Every ${interval}m`
|
||||
: null;
|
||||
|
||||
// Hide noisy frontend-only fields so only the raw operator config shows
|
||||
const displayEntries = Object.entries(config).filter(
|
||||
([k]) => k !== "next_fire_in" && k !== "entry_node",
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full border-l border-border/40 bg-card/20 animate-in slide-in-from-right">
|
||||
{/* Header */}
|
||||
<div className="px-4 pt-4 pb-3 border-b border-border/30 flex items-start justify-between gap-2 flex-shrink-0">
|
||||
<div className="flex items-start gap-3 min-w-0">
|
||||
<div
|
||||
className={[
|
||||
"w-9 h-9 rounded-lg flex items-center justify-center flex-shrink-0",
|
||||
isActive ? "bg-primary/15 text-primary" : "bg-muted/50 text-muted-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
<TriggerIcon type={trigger.triggerType} />
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<h3 className="text-sm font-semibold text-foreground leading-tight truncate">
|
||||
{trigger.label}
|
||||
</h3>
|
||||
<div className="flex items-center gap-2 mt-1">
|
||||
<span
|
||||
className={[
|
||||
"text-[10px] font-medium px-1.5 py-0.5 rounded-full",
|
||||
isActive
|
||||
? "bg-emerald-500/15 text-emerald-400"
|
||||
: "bg-muted/60 text-muted-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
{isActive ? "active" : "inactive"}
|
||||
</span>
|
||||
{trigger.triggerType && (
|
||||
<span className="text-[10px] text-muted-foreground uppercase tracking-wider">
|
||||
{trigger.triggerType}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors flex-shrink-0"
|
||||
>
|
||||
<X className="w-3.5 h-3.5" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Body */}
|
||||
<div className="flex-1 overflow-auto px-4 py-4 space-y-4">
|
||||
{schedule && (
|
||||
<div>
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
|
||||
Schedule
|
||||
</p>
|
||||
<div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
|
||||
<p className="text-xs text-foreground">{schedule}</p>
|
||||
{cron && (
|
||||
<p className="text-[10px] text-muted-foreground mt-1 font-mono">{cron}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{isActive && nextFireIn != null && nextFireIn > 0 && (
|
||||
<div>
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
|
||||
Next fire
|
||||
</p>
|
||||
<div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
|
||||
<p className="text-xs text-foreground italic">in {formatCountdown(nextFireIn)}</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{displayEntries.length > 0 && (
|
||||
<div>
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
|
||||
Config
|
||||
</p>
|
||||
<div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5 space-y-1">
|
||||
{displayEntries.map(([k, v]) => (
|
||||
<div key={k} className="flex items-start justify-between gap-3 text-[11px]">
|
||||
<span className="text-muted-foreground font-mono">{k}</span>
|
||||
<span className="text-foreground font-mono text-right truncate">
|
||||
{typeof v === "object" ? JSON.stringify(v) : String(v)}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
|
||||
Trigger ID
|
||||
</p>
|
||||
<div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
|
||||
<p className="text-[11px] text-muted-foreground font-mono break-all">
|
||||
{triggerId}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Footer with start/stop control */}
|
||||
<div className="px-4 py-3 border-t border-border/30 flex-shrink-0 space-y-2">
|
||||
{error && (
|
||||
<p className="text-[10.5px] text-red-400 leading-snug">{error}</p>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleToggle}
|
||||
disabled={busy || !sessionId}
|
||||
className={[
|
||||
"w-full flex items-center justify-center gap-1.5 px-3 py-2 rounded-lg text-xs font-medium transition-colors",
|
||||
"disabled:opacity-50 disabled:cursor-not-allowed",
|
||||
isActive
|
||||
? "bg-muted/50 text-foreground hover:bg-muted/70 border border-border/30"
|
||||
: "bg-primary/15 text-primary hover:bg-primary/25 border border-primary/30",
|
||||
].join(" ")}
|
||||
>
|
||||
{busy ? (
|
||||
<Loader2 className="w-3.5 h-3.5 animate-spin" />
|
||||
) : isActive ? (
|
||||
<Square className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<Play className="w-3.5 h-3.5" />
|
||||
)}
|
||||
{busy ? "Working…" : isActive ? "Stop trigger" : "Start trigger"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
import { Clock, Webhook, Zap, ArrowRight, Activity } from "lucide-react";
|
||||
import type { GraphNode } from "./graph-types";
|
||||
import { cronToLabel } from "@/lib/graphUtils";
|
||||
|
||||
interface TriggersPanelProps {
|
||||
triggers: GraphNode[];
|
||||
selectedId?: string | null;
|
||||
onSelect?: (trigger: GraphNode) => void;
|
||||
}
|
||||
|
||||
function TriggerIcon({ type }: { type?: string }) {
|
||||
const cls = "w-3.5 h-3.5";
|
||||
switch (type) {
|
||||
case "webhook":
|
||||
return <Webhook className={cls} />;
|
||||
case "timer":
|
||||
return <Clock className={cls} />;
|
||||
case "api":
|
||||
return <ArrowRight className={cls} />;
|
||||
case "event":
|
||||
return <Activity className={cls} />;
|
||||
default:
|
||||
return <Zap className={cls} />;
|
||||
}
|
||||
}
|
||||
|
||||
function scheduleLabel(config: Record<string, unknown> | undefined): string | null {
|
||||
if (!config) return null;
|
||||
const cron = config.cron as string | undefined;
|
||||
if (cron) return cronToLabel(cron);
|
||||
const interval = config.interval_minutes as number | undefined;
|
||||
if (interval != null) {
|
||||
if (interval >= 60) return `Every ${interval / 60}h`;
|
||||
return `Every ${interval}m`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function countdownLabel(nextFireIn: number | undefined): string | null {
|
||||
if (nextFireIn == null || nextFireIn <= 0) return null;
|
||||
const h = Math.floor(nextFireIn / 3600);
|
||||
const m = Math.floor((nextFireIn % 3600) / 60);
|
||||
const s = Math.floor(nextFireIn % 60);
|
||||
return h > 0
|
||||
? `next in ${h}h ${String(m).padStart(2, "0")}m`
|
||||
: `next in ${m}m ${String(s).padStart(2, "0")}s`;
|
||||
}
|
||||
|
||||
function TriggerCard({
|
||||
trigger,
|
||||
selected,
|
||||
onClick,
|
||||
}: {
|
||||
trigger: GraphNode;
|
||||
selected: boolean;
|
||||
onClick?: () => void;
|
||||
}) {
|
||||
const isActive = trigger.status === "running" || trigger.status === "complete";
|
||||
const schedule = scheduleLabel(trigger.triggerConfig);
|
||||
const nextFireIn = trigger.triggerConfig?.next_fire_in as number | undefined;
|
||||
const countdown = isActive ? countdownLabel(nextFireIn) : null;
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
className={[
|
||||
"w-full text-left rounded-lg border px-3 py-2.5 transition-colors",
|
||||
selected
|
||||
? "bg-primary/10 border-primary/30"
|
||||
: "bg-background/60 border-border/30 hover:bg-muted/40 hover:border-border/50",
|
||||
].join(" ")}
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className={[
|
||||
"flex-shrink-0 w-6 h-6 rounded-full flex items-center justify-center",
|
||||
isActive ? "bg-primary/15 text-primary" : "bg-muted/60 text-muted-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
<TriggerIcon type={trigger.triggerType} />
|
||||
</span>
|
||||
<div className="min-w-0 flex-1">
|
||||
<p className="text-xs font-medium text-foreground truncate">{trigger.label}</p>
|
||||
{schedule && schedule !== trigger.label && (
|
||||
<p className="text-[10.5px] text-muted-foreground truncate mt-0.5">{schedule}</p>
|
||||
)}
|
||||
</div>
|
||||
<span
|
||||
className={[
|
||||
"flex-shrink-0 text-[10px] font-medium px-1.5 py-0.5 rounded-full",
|
||||
isActive
|
||||
? "bg-emerald-500/15 text-emerald-400"
|
||||
: "bg-muted/60 text-muted-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
{isActive ? "active" : "inactive"}
|
||||
</span>
|
||||
</div>
|
||||
{countdown && (
|
||||
<p className="text-[10px] text-muted-foreground mt-1.5 italic pl-8">{countdown}</p>
|
||||
)}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
export default function TriggersPanel({ triggers, selectedId, onSelect }: TriggersPanelProps) {
|
||||
return (
|
||||
<div className="flex flex-col h-full bg-card/30 border-l border-border/30">
|
||||
<div className="px-4 py-3 border-b border-border/30 flex items-center gap-2">
|
||||
<Clock className="w-3.5 h-3.5 text-muted-foreground" />
|
||||
<h3 className="text-xs font-semibold text-foreground uppercase tracking-wide">
|
||||
Triggers
|
||||
</h3>
|
||||
{triggers.length > 0 && (
|
||||
<span className="ml-auto text-[10px] text-muted-foreground">
|
||||
{triggers.length}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-1 overflow-y-auto px-3 py-3 space-y-2">
|
||||
{triggers.length === 0 ? (
|
||||
<div className="text-center py-8">
|
||||
<Clock className="w-6 h-6 mx-auto text-muted-foreground/40 mb-2" />
|
||||
<p className="text-[11px] text-muted-foreground">No triggers configured</p>
|
||||
<p className="text-[10px] text-muted-foreground/70 mt-1 px-2">
|
||||
Ask the queen to set a schedule or webhook
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
triggers.map((t) => (
|
||||
<TriggerCard
|
||||
key={t.id}
|
||||
trigger={t}
|
||||
selected={selectedId === t.id}
|
||||
onClick={onSelect ? () => onSelect(t) : undefined}
|
||||
/>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,375 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { topologyToGraphNodes } from "./graph-converter";
|
||||
import type { GraphTopology, NodeSpec } from "@/api/types";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeNode(id: string, overrides: Partial<NodeSpec> = {}): NodeSpec {
|
||||
return {
|
||||
id,
|
||||
name: id,
|
||||
description: "",
|
||||
node_type: "event_loop",
|
||||
input_keys: [],
|
||||
output_keys: [],
|
||||
nullable_output_keys: [],
|
||||
tools: [],
|
||||
routes: {},
|
||||
max_retries: 3,
|
||||
max_node_visits: 0,
|
||||
client_facing: false,
|
||||
success_criteria: null,
|
||||
system_prompt: "",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Edge classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("edge classification", () => {
|
||||
it("linear chain: all edges in next[], no backEdges", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B"), makeNode("C")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
{ source: "B", target: "C", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(3);
|
||||
|
||||
const a = result.find((n) => n.id === "A")!;
|
||||
const b = result.find((n) => n.id === "B")!;
|
||||
const c = result.find((n) => n.id === "C")!;
|
||||
|
||||
expect(a.next).toEqual(["B"]);
|
||||
expect(a.backEdges).toBeUndefined();
|
||||
expect(b.next).toEqual(["C"]);
|
||||
expect(b.backEdges).toBeUndefined();
|
||||
expect(c.next).toBeUndefined();
|
||||
expect(c.backEdges).toBeUndefined();
|
||||
});
|
||||
|
||||
it("loop edge: classified as backEdge", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B"), makeNode("C")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
{ source: "B", target: "C", condition: "on_success", priority: 0 },
|
||||
{ source: "C", target: "A", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
const c = result.find((n) => n.id === "C")!;
|
||||
|
||||
expect(c.next).toBeUndefined();
|
||||
expect(c.backEdges).toEqual(["A"]);
|
||||
});
|
||||
|
||||
it("diamond/fan-out: multiple next targets", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B"), makeNode("C"), makeNode("D")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
{ source: "A", target: "C", condition: "on_failure", priority: 1 },
|
||||
{ source: "B", target: "D", condition: "on_success", priority: 0 },
|
||||
{ source: "C", target: "D", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
const a = result.find((n) => n.id === "A")!;
|
||||
|
||||
expect(a.next).toEqual(expect.arrayContaining(["B", "C"]));
|
||||
expect(a.next).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("status mapping", () => {
|
||||
it("no enrichment: all nodes pending", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result.every((n) => n.status === "pending")).toBe(true);
|
||||
});
|
||||
|
||||
it("is_current: running", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { is_current: true, visit_count: 1, in_path: true })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].status).toBe("running");
|
||||
});
|
||||
|
||||
it("is_current + visit_count > 1: looping", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { is_current: true, visit_count: 3, in_path: true })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].status).toBe("looping");
|
||||
});
|
||||
|
||||
it("in_path + visited + not current: complete", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { in_path: true, visit_count: 1, is_current: false })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].status).toBe("complete");
|
||||
});
|
||||
|
||||
it("has_failures: error", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { has_failures: true, in_path: true, visit_count: 1 })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].status).toBe("error");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Iteration tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("iteration tracking", () => {
|
||||
it("visit_count maps to iterations", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { visit_count: 3, in_path: true })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].iterations).toBe(3);
|
||||
});
|
||||
|
||||
it("max_node_visits maps to maxIterations", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { max_node_visits: 5, visit_count: 1, in_path: true })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].maxIterations).toBe(5);
|
||||
});
|
||||
|
||||
it("max_node_visits == 0 (unlimited): maxIterations omitted", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A", { max_node_visits: 0, visit_count: 1, in_path: true })],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result[0].maxIterations).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Edge labels
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("edge labels", () => {
|
||||
it("conditional edges produce edgeLabels, on_success/always do not", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B"), makeNode("C"), makeNode("D")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "conditional", priority: 0 },
|
||||
{ source: "A", target: "C", condition: "on_failure", priority: 1 },
|
||||
{ source: "B", target: "D", condition: "on_success", priority: 0 },
|
||||
{ source: "C", target: "D", condition: "always", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
const a = result.find((n) => n.id === "A")!;
|
||||
const b = result.find((n) => n.id === "B")!;
|
||||
const c = result.find((n) => n.id === "C")!;
|
||||
|
||||
// A has conditional + on_failure edges → both get labels
|
||||
expect(a.edgeLabels).toEqual({ B: "conditional", C: "on_failure" });
|
||||
// B has on_success → no label
|
||||
expect(b.edgeLabels).toBeUndefined();
|
||||
// C has always → no label
|
||||
expect(c.edgeLabels).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Node ordering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("node ordering", () => {
|
||||
it("nodes returned in BFS walk order from entry_node, not input order", () => {
|
||||
const topology: GraphTopology = {
|
||||
// Input order: C, A, B — but BFS from A should yield A, B, C
|
||||
nodes: [makeNode("C"), makeNode("A"), makeNode("B")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
{ source: "B", target: "C", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result.map((n) => n.id)).toEqual(["A", "B", "C"]);
|
||||
});
|
||||
|
||||
it("empty topology returns empty array", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [],
|
||||
edges: [],
|
||||
entry_node: "",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trigger node synthesis from entry_points
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("trigger node synthesis", () => {
|
||||
it("single non-manual entry point: trigger node prepended before entry_node", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
entry_points: [
|
||||
{ id: "webhook", name: "Webhook Handler", entry_node: "A", trigger_type: "webhook", trigger_config: { url: "/hook" } },
|
||||
],
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(3);
|
||||
|
||||
const trigger = result[0];
|
||||
expect(trigger.id).toBe("__trigger_webhook");
|
||||
expect(trigger.nodeType).toBe("trigger");
|
||||
expect(trigger.triggerType).toBe("webhook");
|
||||
expect(trigger.triggerConfig).toEqual({ url: "/hook" });
|
||||
expect(trigger.label).toBe("Webhook Handler");
|
||||
expect(trigger.status).toBe("pending");
|
||||
expect(trigger.next).toEqual(["A"]);
|
||||
});
|
||||
|
||||
it("trigger_config is threaded through for timer triggers", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A")],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
entry_points: [
|
||||
{ id: "timer", name: "Daily Check", entry_node: "A", trigger_type: "timer", trigger_config: { cron: "0 9 * * *" } },
|
||||
],
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
const trigger = result[0];
|
||||
expect(trigger.triggerConfig).toEqual({ cron: "0 9 * * *" });
|
||||
});
|
||||
|
||||
it("no entry_points: no trigger nodes added", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A")],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].nodeType).toBeUndefined();
|
||||
});
|
||||
|
||||
it("only manual entry points: no trigger nodes added", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A")],
|
||||
edges: [],
|
||||
entry_node: "A",
|
||||
entry_points: [
|
||||
{ id: "main", name: "Main", entry_node: "A", trigger_type: "manual" },
|
||||
],
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].id).toBe("A");
|
||||
});
|
||||
|
||||
it("multiple non-manual entry points: multiple trigger nodes", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B"), makeNode("C")],
|
||||
edges: [
|
||||
{ source: "A", target: "C", condition: "on_success", priority: 0 },
|
||||
{ source: "B", target: "C", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
entry_points: [
|
||||
{ id: "webhook", name: "Webhook", entry_node: "A", trigger_type: "webhook" },
|
||||
{ id: "timer", name: "Daily Timer", entry_node: "B", trigger_type: "timer" },
|
||||
],
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(5); // 2 triggers + 3 nodes
|
||||
const triggers = result.filter((n) => n.nodeType === "trigger");
|
||||
expect(triggers).toHaveLength(2);
|
||||
expect(triggers[0].next).toEqual(["A"]);
|
||||
expect(triggers[1].next).toEqual(["B"]);
|
||||
});
|
||||
|
||||
it("mix of manual and non-manual: only non-manual become trigger nodes", () => {
|
||||
const topology: GraphTopology = {
|
||||
nodes: [makeNode("A"), makeNode("B")],
|
||||
edges: [
|
||||
{ source: "A", target: "B", condition: "on_success", priority: 0 },
|
||||
],
|
||||
entry_node: "A",
|
||||
entry_points: [
|
||||
{ id: "main", name: "Main", entry_node: "A", trigger_type: "manual" },
|
||||
{ id: "webhook", name: "Webhook", entry_node: "A", trigger_type: "webhook" },
|
||||
],
|
||||
};
|
||||
|
||||
const result = topologyToGraphNodes(topology);
|
||||
expect(result).toHaveLength(3); // 1 trigger + 2 nodes
|
||||
const triggers = result.filter((n) => n.nodeType === "trigger");
|
||||
expect(triggers).toHaveLength(1);
|
||||
expect(triggers[0].triggerType).toBe("webhook");
|
||||
});
|
||||
});
|
||||
@@ -1,186 +0,0 @@
|
||||
import type { GraphTopology, NodeSpec } from "@/api/types";
|
||||
import type { GraphNode, NodeStatus } from "@/components/graph-types";
|
||||
|
||||
/**
|
||||
* Convert a backend GraphTopology (nodes + edges + entry_node) into
|
||||
* the GraphNode[] shape that DraftGraph renders.
|
||||
*
|
||||
* Four jobs:
|
||||
* 1. Synthesize trigger nodes from non-manual entry_points
|
||||
* 2. Order nodes via BFS from trigger/entry_node
|
||||
* 3. Classify edges as forward (next) or backward (backEdges)
|
||||
* 4. Map session enrichment fields to NodeStatus
|
||||
*/
|
||||
export function topologyToGraphNodes(topology: GraphTopology): GraphNode[] {
|
||||
const { nodes: allNodes, edges, entry_node, entry_points } = topology;
|
||||
if (allNodes.length === 0) return [];
|
||||
|
||||
// Filter out subagent-only nodes (referenced in sub_agents but not in any edge)
|
||||
const subagentIds = new Set<string>();
|
||||
for (const n of allNodes) {
|
||||
for (const sa of n.sub_agents ?? []) {
|
||||
subagentIds.add(sa);
|
||||
}
|
||||
}
|
||||
const edgeParticipants = new Set<string>();
|
||||
for (const e of edges) {
|
||||
edgeParticipants.add(e.source);
|
||||
edgeParticipants.add(e.target);
|
||||
}
|
||||
const nodes = allNodes.filter(
|
||||
(n) =>
|
||||
!subagentIds.has(n.id) ||
|
||||
edgeParticipants.has(n.id) ||
|
||||
n.id === entry_node,
|
||||
);
|
||||
|
||||
// --- Synthesize trigger nodes for non-manual entry points ---
|
||||
const schedulerEntryPoints = (entry_points || []).filter(
|
||||
(ep) => ep.trigger_type !== "manual",
|
||||
);
|
||||
const triggerMap = new Map<string, GraphNode>();
|
||||
|
||||
for (const ep of schedulerEntryPoints) {
|
||||
const triggerId = `__trigger_${ep.id}`;
|
||||
triggerMap.set(triggerId, {
|
||||
id: triggerId,
|
||||
label: ep.name,
|
||||
status: "pending",
|
||||
nodeType: "trigger",
|
||||
triggerType: ep.trigger_type,
|
||||
triggerConfig: {
|
||||
...ep.trigger_config,
|
||||
...(ep.next_fire_in != null ? { next_fire_in: ep.next_fire_in } : {}),
|
||||
...(ep.task ? { task: ep.task } : {}),
|
||||
},
|
||||
next: [ep.entry_node],
|
||||
});
|
||||
}
|
||||
|
||||
// Build adjacency list: source → [target, ...] (includes trigger edges)
|
||||
const adj = new Map<string, string[]>();
|
||||
for (const e of edges) {
|
||||
const list = adj.get(e.source) || [];
|
||||
list.push(e.target);
|
||||
adj.set(e.source, list);
|
||||
}
|
||||
for (const [triggerId, triggerNode] of triggerMap) {
|
||||
adj.set(triggerId, triggerNode.next!);
|
||||
}
|
||||
|
||||
// BFS — start from trigger nodes (if any), then entry_node.
|
||||
// Always include entry_node so the DAG ordering stays correct
|
||||
// even when triggers target a node other than entry.
|
||||
const order: string[] = [];
|
||||
const position = new Map<string, number>();
|
||||
const visited = new Set<string>();
|
||||
|
||||
const entryStart = entry_node || nodes[0].id;
|
||||
const starts =
|
||||
triggerMap.size > 0
|
||||
? [...triggerMap.keys(), entryStart]
|
||||
: [entryStart];
|
||||
const queue = [...starts];
|
||||
for (const s of starts) visited.add(s);
|
||||
|
||||
while (queue.length > 0) {
|
||||
const id = queue.shift()!;
|
||||
position.set(id, order.length);
|
||||
order.push(id);
|
||||
|
||||
for (const target of adj.get(id) || []) {
|
||||
if (!visited.has(target)) {
|
||||
visited.add(target);
|
||||
queue.push(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add any nodes not reachable from entry (shouldn't happen in valid graphs)
|
||||
for (const n of nodes) {
|
||||
if (!visited.has(n.id)) {
|
||||
position.set(n.id, order.length);
|
||||
order.push(n.id);
|
||||
}
|
||||
}
|
||||
|
||||
// Build a node lookup
|
||||
const nodeMap = new Map<string, NodeSpec>();
|
||||
for (const n of nodes) {
|
||||
nodeMap.set(n.id, n);
|
||||
}
|
||||
|
||||
// Classify edges per source node
|
||||
const nextMap = new Map<string, string[]>();
|
||||
const backMap = new Map<string, string[]>();
|
||||
|
||||
for (const e of edges) {
|
||||
const srcPos = position.get(e.source) ?? 0;
|
||||
const tgtPos = position.get(e.target) ?? 0;
|
||||
|
||||
if (tgtPos <= srcPos) {
|
||||
// Back edge (target is at same or earlier position in BFS)
|
||||
const list = backMap.get(e.source) || [];
|
||||
list.push(e.target);
|
||||
backMap.set(e.source, list);
|
||||
} else {
|
||||
// Forward edge
|
||||
const list = nextMap.get(e.source) || [];
|
||||
list.push(e.target);
|
||||
nextMap.set(e.source, list);
|
||||
}
|
||||
}
|
||||
|
||||
// Build edge condition labels (only for non-trivial conditions)
|
||||
const edgeLabelMap = new Map<string, Record<string, string>>();
|
||||
for (const e of edges) {
|
||||
if (e.condition !== "always" && e.condition !== "on_success") {
|
||||
const labels = edgeLabelMap.get(e.source) || {};
|
||||
labels[e.target] = e.condition;
|
||||
edgeLabelMap.set(e.source, labels);
|
||||
}
|
||||
}
|
||||
|
||||
// Build GraphNode[] in BFS order
|
||||
return order.map((id) => {
|
||||
// Synthetic trigger nodes are returned directly
|
||||
const trigger = triggerMap.get(id);
|
||||
if (trigger) return trigger;
|
||||
|
||||
const spec = nodeMap.get(id);
|
||||
const next = nextMap.get(id);
|
||||
const back = backMap.get(id);
|
||||
const labels = edgeLabelMap.get(id);
|
||||
|
||||
const result: GraphNode = {
|
||||
id,
|
||||
label: spec?.name || id,
|
||||
status: mapStatus(spec),
|
||||
...(next && next.length > 0 ? { next } : {}),
|
||||
...(back && back.length > 0 ? { backEdges: back } : {}),
|
||||
...(labels ? { edgeLabels: labels } : {}),
|
||||
};
|
||||
|
||||
// Iteration tracking from session enrichment
|
||||
if (spec?.visit_count !== undefined && spec.visit_count > 0) {
|
||||
result.iterations = spec.visit_count;
|
||||
}
|
||||
if (spec?.max_node_visits !== undefined && spec.max_node_visits > 0) {
|
||||
result.maxIterations = spec.max_node_visits;
|
||||
}
|
||||
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
function mapStatus(spec: NodeSpec | undefined): NodeStatus {
|
||||
if (!spec) return "pending";
|
||||
|
||||
if (spec.has_failures) return "error";
|
||||
if (spec.is_current) {
|
||||
return (spec.visit_count ?? 0) > 1 ? "looping" : "running";
|
||||
}
|
||||
if (spec.in_path && (spec.visit_count ?? 0) > 0) return "complete";
|
||||
|
||||
return "pending";
|
||||
}
|
||||
@@ -1,8 +1,7 @@
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
// ── Shared graph utilities ──
|
||||
// Common helpers used by both AgentGraph and DraftGraph.
|
||||
// AgentGraph still has its own copies for now (separate cleanup PR).
|
||||
// Shared helpers for graph-like components (TriggersPanel, etc.).
|
||||
|
||||
/** Read a CSS custom property value (space-separated HSL components). */
|
||||
export function cssVar(name: string): string {
|
||||
|
||||
@@ -2,25 +2,18 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
||||
import { useParams, useLocation } from "react-router-dom";
|
||||
import { Loader2, WifiOff, KeyRound, FolderOpen, X } from "lucide-react";
|
||||
import type { GraphNode, NodeStatus } from "@/components/graph-types";
|
||||
import DraftGraph from "@/components/DraftGraph";
|
||||
import TriggersPanel from "@/components/TriggersPanel";
|
||||
import TriggerDetailPanel from "@/components/TriggerDetailPanel";
|
||||
import ChatPanel, { type ChatMessage, type ImageContent } from "@/components/ChatPanel";
|
||||
import NodeDetailPanel from "@/components/NodeDetailPanel";
|
||||
import CredentialsModal, {
|
||||
type Credential,
|
||||
clearCredentialCache,
|
||||
} from "@/components/CredentialsModal";
|
||||
import { executionApi } from "@/api/execution";
|
||||
import { workersApi } from "@/api/workers";
|
||||
import { sessionsApi } from "@/api/sessions";
|
||||
import { useMultiSSE } from "@/hooks/use-sse";
|
||||
import type {
|
||||
LiveSession,
|
||||
AgentEvent,
|
||||
NodeSpec,
|
||||
DraftGraph as DraftGraphData,
|
||||
} from "@/api/types";
|
||||
import type { LiveSession, AgentEvent } from "@/api/types";
|
||||
import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
|
||||
import { topologyToGraphNodes } from "@/lib/graph-converter";
|
||||
import { cronToLabel } from "@/lib/graphUtils";
|
||||
import { ApiError } from "@/api/client";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
@@ -48,8 +41,6 @@ function truncate(s: string, max: number): string {
|
||||
type SessionRestoreResult = {
|
||||
messages: ChatMessage[];
|
||||
restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
|
||||
flowchartMap: Record<string, string[]> | null;
|
||||
originalDraft: DraftGraphData | null;
|
||||
};
|
||||
|
||||
async function restoreSessionMessages(
|
||||
@@ -62,8 +53,6 @@ async function restoreSessionMessages(
|
||||
if (events.length > 0) {
|
||||
const messages: ChatMessage[] = [];
|
||||
let runningPhase: ChatMessage["phase"] = undefined;
|
||||
let flowchartMap: Record<string, string[]> | null = null;
|
||||
let originalDraft: DraftGraphData | null = null;
|
||||
for (const evt of events) {
|
||||
const p =
|
||||
evt.type === "queen_phase_changed"
|
||||
@@ -74,14 +63,6 @@ async function restoreSessionMessages(
|
||||
if (p && ["planning", "building", "staging", "running"].includes(p)) {
|
||||
runningPhase = p as ChatMessage["phase"];
|
||||
}
|
||||
if (evt.type === "custom" && (evt.data as Record<string, unknown>)?.event === "flowchart_updated" && evt.data) {
|
||||
const mapData = evt.data as {
|
||||
map?: Record<string, string[]>;
|
||||
original_draft?: DraftGraphData;
|
||||
};
|
||||
flowchartMap = mapData.map ?? null;
|
||||
originalDraft = mapData.original_draft ?? null;
|
||||
}
|
||||
const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
|
||||
if (!msg) continue;
|
||||
if (evt.stream_id === "queen") {
|
||||
@@ -90,12 +71,12 @@ async function restoreSessionMessages(
|
||||
}
|
||||
messages.push(msg);
|
||||
}
|
||||
return { messages, restoredPhase: runningPhase ?? null, flowchartMap, originalDraft };
|
||||
return { messages, restoredPhase: runningPhase ?? null };
|
||||
}
|
||||
} catch {
|
||||
// Event log not available
|
||||
}
|
||||
return { messages: [], restoredPhase: null, flowchartMap: null, originalDraft: null };
|
||||
return { messages: [], restoredPhase: null };
|
||||
}
|
||||
|
||||
// ── Agent backend state ──────────────────────────────────────────────────────
|
||||
@@ -107,28 +88,11 @@ interface AgentState {
|
||||
queenReady: boolean;
|
||||
error: string | null;
|
||||
displayName: string | null;
|
||||
colonyId: string | null; nodeSpecs: NodeSpec[];
|
||||
awaitingInput: boolean;
|
||||
workerInputMessageId: string | null;
|
||||
queenBuilding: boolean;
|
||||
queenPhase: "planning" | "building" | "staging" | "running" | "independent";
|
||||
designingDraft: boolean;
|
||||
draftGraph: DraftGraphData | null;
|
||||
originalDraft: DraftGraphData | null;
|
||||
flowchartMap: Record<string, string[]> | null;
|
||||
agentPath: string | null;
|
||||
workerRunState: "idle" | "deploying" | "running";
|
||||
currentExecutionId: string | null;
|
||||
currentRunId: string | null;
|
||||
nodeLogs: Record<string, string[]>;
|
||||
nodeActionPlans: Record<string, string>;
|
||||
subagentReports: {
|
||||
subagent_id: string;
|
||||
message: string;
|
||||
data?: Record<string, unknown>;
|
||||
timestamp: string;
|
||||
}[];
|
||||
isTyping: boolean;
|
||||
isStreaming: boolean;
|
||||
queenIsTyping: boolean;
|
||||
workerIsTyping: boolean;
|
||||
@@ -153,24 +117,11 @@ function defaultAgentState(): AgentState {
|
||||
queenReady: false,
|
||||
error: null,
|
||||
displayName: null,
|
||||
colonyId: null,
|
||||
nodeSpecs: [],
|
||||
awaitingInput: false,
|
||||
workerInputMessageId: null,
|
||||
queenBuilding: false,
|
||||
queenPhase: "planning",
|
||||
designingDraft: false,
|
||||
draftGraph: null,
|
||||
originalDraft: null,
|
||||
flowchartMap: null,
|
||||
agentPath: null,
|
||||
workerRunState: "idle",
|
||||
currentExecutionId: null,
|
||||
currentRunId: null,
|
||||
nodeLogs: {},
|
||||
nodeActionPlans: {},
|
||||
subagentReports: [],
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
workerIsTyping: false,
|
||||
@@ -239,9 +190,6 @@ export default function ColonyChat() {
|
||||
const [credentialAgentPath, setCredentialAgentPath] = useState<string | null>(null);
|
||||
const [dismissedBanner, setDismissedBanner] = useState<string | null>(null);
|
||||
const [selectedNode, setSelectedNode] = useState<GraphNode | null>(null);
|
||||
const [graphPanelPct, setGraphPanelPct] = useState(30);
|
||||
const savedGraphPanelPct = useRef(30);
|
||||
const resizing = useRef(false);
|
||||
|
||||
// ── Header actions (Credentials, Data, Browser) ─────────────────────────
|
||||
useEffect(() => {
|
||||
@@ -281,8 +229,6 @@ export default function ColonyChat() {
|
||||
const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
|
||||
const suppressIntroRef = useRef(false);
|
||||
const loadingRef = useRef(false);
|
||||
const designingDraftSinceRef = useRef(0);
|
||||
const designingDraftTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -346,71 +292,11 @@ export default function ColonyChat() {
|
||||
}));
|
||||
}, []);
|
||||
|
||||
// ── Drag-to-resize graph panel ──────────────────────────────────────────
|
||||
|
||||
useEffect(() => {
|
||||
const onMouseMove = (e: MouseEvent) => {
|
||||
if (!resizing.current) return;
|
||||
const sidebarWidth = 240;
|
||||
const pct = 100 - ((e.clientX - sidebarWidth) / (window.innerWidth - sidebarWidth)) * 100;
|
||||
setGraphPanelPct(Math.max(15, Math.min(50, pct)));
|
||||
};
|
||||
const onMouseUp = () => {
|
||||
resizing.current = false;
|
||||
document.body.style.cursor = "";
|
||||
};
|
||||
window.addEventListener("mousemove", onMouseMove);
|
||||
window.addEventListener("mouseup", onMouseUp);
|
||||
return () => {
|
||||
window.removeEventListener("mousemove", onMouseMove);
|
||||
window.removeEventListener("mouseup", onMouseUp);
|
||||
};
|
||||
}, []);
|
||||
|
||||
const nodeIsSelected = selectedNode !== null;
|
||||
useEffect(() => {
|
||||
if (nodeIsSelected) {
|
||||
savedGraphPanelPct.current = graphPanelPct;
|
||||
setGraphPanelPct((prev) => Math.min(prev, 30));
|
||||
} else {
|
||||
setGraphPanelPct(savedGraphPanelPct.current);
|
||||
}
|
||||
}, [nodeIsSelected]); // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
// Reset dismissed banner when the error clears
|
||||
useEffect(() => {
|
||||
if (!agentState.error) setDismissedBanner(null);
|
||||
}, [agentState.error]);
|
||||
|
||||
// ── Graph fetching ─────────────────────────────────────────────────────
|
||||
|
||||
const fetchGraph = useCallback(
|
||||
async (sessionId: string, knownGraphId?: string) => {
|
||||
try {
|
||||
let colonyId = knownGraphId;
|
||||
if (!colonyId) {
|
||||
// Try session detail first (colony_id is always set when worker is loaded)
|
||||
try {
|
||||
const detail = await sessionsApi.get(sessionId);
|
||||
colonyId = detail.colony_id ?? undefined;
|
||||
} catch { /* fall through */ }
|
||||
}
|
||||
if (!colonyId) {
|
||||
const { colonies } = await sessionsApi.colonies(sessionId);
|
||||
if (!colonies.length) return;
|
||||
colonyId = colonies[0];
|
||||
}
|
||||
const topology = await workersApi.nodes(sessionId, colonyId);
|
||||
updateState({ colonyId, nodeSpecs: topology.nodes });
|
||||
const nodes = topologyToGraphNodes(topology);
|
||||
if (nodes.length > 0) setGraphNodes(nodes);
|
||||
} catch {
|
||||
// Graph fetch failed
|
||||
}
|
||||
},
|
||||
[updateState],
|
||||
);
|
||||
|
||||
// ── Session loading ────────────────────────────────────────────────────
|
||||
|
||||
const loadSession = useCallback(async () => {
|
||||
@@ -473,8 +359,6 @@ export default function ColonyChat() {
|
||||
}
|
||||
|
||||
let restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null = null;
|
||||
let restoredFlowchartMap: Record<string, string[]> | null = null;
|
||||
let restoredOriginalDraft: DraftGraphData | null = null;
|
||||
|
||||
if (!liveSession) {
|
||||
// Pre-fetch messages from cold session
|
||||
@@ -484,8 +368,6 @@ export default function ColonyChat() {
|
||||
const restored = await restoreSessionMessages(coldRestoreId, agentPath, displayName);
|
||||
preRestoredMsgs = restored.messages;
|
||||
restoredPhase = restored.restoredPhase;
|
||||
restoredFlowchartMap = restored.flowchartMap;
|
||||
restoredOriginalDraft = restored.originalDraft;
|
||||
}
|
||||
|
||||
if (coldRestoreId || preRestoredMsgs.length > 0) {
|
||||
@@ -511,10 +393,7 @@ export default function ColonyChat() {
|
||||
sessionId: session.session_id,
|
||||
displayName,
|
||||
queenPhase: initialPhase,
|
||||
queenBuilding: initialPhase === "building",
|
||||
queenSupportsImages: session.queen_supports_images !== false,
|
||||
...(restoredFlowchartMap ? { flowchartMap: restoredFlowchartMap } : {}),
|
||||
...(restoredOriginalDraft ? { originalDraft: restoredOriginalDraft, draftGraph: null } : {}),
|
||||
});
|
||||
|
||||
// Restore messages for live resume
|
||||
@@ -528,10 +407,6 @@ export default function ColonyChat() {
|
||||
restored.messages.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
|
||||
setMessages(restored.messages);
|
||||
}
|
||||
if (restored.flowchartMap && !restoredFlowchartMap) {
|
||||
restoredFlowchartMap = restored.flowchartMap;
|
||||
restoredOriginalDraft = restored.originalDraft;
|
||||
}
|
||||
}
|
||||
|
||||
const hasRestoredContent = isResumedSession || !!coldRestoreId;
|
||||
@@ -543,8 +418,6 @@ export default function ColonyChat() {
|
||||
ready: true,
|
||||
loading: false,
|
||||
queenReady: hasRestoredContent,
|
||||
...(restoredFlowchartMap ? { flowchartMap: restoredFlowchartMap } : {}),
|
||||
...(restoredOriginalDraft ? { originalDraft: restoredOriginalDraft, draftGraph: null } : {}),
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
if (err instanceof ApiError && err.status === 424) {
|
||||
@@ -578,13 +451,6 @@ export default function ColonyChat() {
|
||||
}
|
||||
}, [agentPath, isNewChat]); // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
// Fetch graph when session becomes ready
|
||||
useEffect(() => {
|
||||
if (agentState.sessionId && agentState.ready && !agentState.colonyId) {
|
||||
fetchGraph(agentState.sessionId);
|
||||
}
|
||||
}, [agentState.sessionId, agentState.ready, agentState.colonyId, fetchGraph]);
|
||||
|
||||
// ── SSE event handler ──────────────────────────────────────────────────
|
||||
|
||||
const handleSSEEvent = useCallback(
|
||||
@@ -609,7 +475,6 @@ export default function ColonyChat() {
|
||||
if (isQueen) {
|
||||
turnCounterRef.current[turnKey] = currentTurn + 1;
|
||||
updateState({
|
||||
isTyping: true,
|
||||
queenIsTyping: true,
|
||||
...(shouldMarkQueenReady && { queenReady: true }),
|
||||
});
|
||||
@@ -631,15 +496,11 @@ export default function ColonyChat() {
|
||||
}
|
||||
turnCounterRef.current[turnKey] = currentTurn + 1;
|
||||
updateState({
|
||||
isTyping: true,
|
||||
isStreaming: false,
|
||||
workerIsTyping: true,
|
||||
awaitingInput: false,
|
||||
workerRunState: "running",
|
||||
currentExecutionId: event.execution_id || state.currentExecutionId || null,
|
||||
currentRunId: incomingRunId,
|
||||
nodeLogs: {},
|
||||
subagentReports: [],
|
||||
llmSnapshots: {},
|
||||
activeToolCalls: {},
|
||||
pendingQuestion: null,
|
||||
@@ -654,16 +515,12 @@ export default function ColonyChat() {
|
||||
case "execution_completed":
|
||||
if (isQueen) {
|
||||
suppressIntroRef.current = false;
|
||||
updateState({ isTyping: false, queenIsTyping: false });
|
||||
updateState({ queenIsTyping: false });
|
||||
} else {
|
||||
updateState({
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
workerIsTyping: false,
|
||||
awaitingInput: false,
|
||||
workerInputMessageId: null,
|
||||
workerRunState: "idle",
|
||||
currentExecutionId: null,
|
||||
llmSnapshots: {},
|
||||
pendingQuestion: null,
|
||||
pendingOptions: null,
|
||||
@@ -671,7 +528,6 @@ export default function ColonyChat() {
|
||||
pendingQuestionSource: null,
|
||||
});
|
||||
markAllNodesAs(["running", "looping"], "complete");
|
||||
if (state.sessionId) fetchGraph(state.sessionId, state.colonyId || undefined);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -742,10 +598,8 @@ export default function ColonyChat() {
|
||||
const prompt = (event.data?.prompt as string) || "";
|
||||
updateState({
|
||||
awaitingInput: true,
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
queenBuilding: false,
|
||||
pendingQuestion: prompt || null,
|
||||
pendingOptions: options,
|
||||
pendingQuestions: questions,
|
||||
@@ -756,7 +610,6 @@ export default function ColonyChat() {
|
||||
|
||||
if (event.type === "execution_paused") {
|
||||
updateState({
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
workerIsTyping: false,
|
||||
@@ -767,14 +620,12 @@ export default function ColonyChat() {
|
||||
pendingQuestionSource: null,
|
||||
});
|
||||
if (!isQueen) {
|
||||
updateState({ workerRunState: "idle", currentExecutionId: null });
|
||||
markAllNodesAs(["running", "looping"], "pending");
|
||||
}
|
||||
}
|
||||
|
||||
if (event.type === "execution_failed") {
|
||||
updateState({
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
workerIsTyping: false,
|
||||
@@ -785,7 +636,6 @@ export default function ColonyChat() {
|
||||
pendingQuestionSource: null,
|
||||
});
|
||||
if (!isQueen) {
|
||||
updateState({ workerRunState: "idle", currentExecutionId: null });
|
||||
if (event.node_id) {
|
||||
updateGraphNodeStatus(event.node_id, "error");
|
||||
const errMsg = (event.data?.error as string) || "unknown error";
|
||||
@@ -799,7 +649,7 @@ export default function ColonyChat() {
|
||||
|
||||
case "node_loop_started":
|
||||
turnCounterRef.current[turnKey] = currentTurn + 1;
|
||||
updateState({ isTyping: true, activeToolCalls: {} });
|
||||
updateState({ activeToolCalls: {} });
|
||||
if (!isQueen && event.node_id) {
|
||||
const existing = graphNodes.find((n) => n.id === event.node_id);
|
||||
const isRevisit = existing?.status === "complete";
|
||||
@@ -898,12 +748,6 @@ export default function ColonyChat() {
|
||||
const toolName = (event.data?.tool_name as string) || "unknown";
|
||||
const toolUseId = (event.data?.tool_use_id as string) || "";
|
||||
|
||||
if (isQueen && toolName === "save_agent_draft") {
|
||||
designingDraftSinceRef.current = Date.now();
|
||||
if (designingDraftTimerRef.current) clearTimeout(designingDraftTimerRef.current);
|
||||
updateState({ designingDraft: true });
|
||||
}
|
||||
|
||||
const sid = event.stream_id;
|
||||
setAgentState((prev) => {
|
||||
const newActive = {
|
||||
@@ -1002,7 +846,7 @@ export default function ColonyChat() {
|
||||
}
|
||||
|
||||
case "credentials_required": {
|
||||
updateState({ workerRunState: "idle", error: "credentials_required" });
|
||||
updateState({ error: "credentials_required" });
|
||||
const credAgentPath = event.data?.agent_path as string | undefined;
|
||||
if (credAgentPath) setCredentialAgentPath(credAgentPath);
|
||||
setCredentialsOpen(true);
|
||||
@@ -1025,77 +869,49 @@ export default function ColonyChat() {
|
||||
queenPhaseRef.current = newPhase;
|
||||
updateState({
|
||||
queenPhase: newPhase,
|
||||
queenBuilding: newPhase === "building",
|
||||
workerRunState: newPhase === "running" ? "running" : "idle",
|
||||
...(newPhase === "planning" ? { originalDraft: null, flowchartMap: null } : {}),
|
||||
...(eventAgentPath ? { agentPath: eventAgentPath } : {}),
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case "custom": {
|
||||
const customEvent = event.data as Record<string, unknown>;
|
||||
if (customEvent?.event === "draft_updated") {
|
||||
const draft = customEvent as unknown as DraftGraphData | undefined;
|
||||
if (draft?.nodes) {
|
||||
const MIN_SPINNER_MS = 600;
|
||||
const since = designingDraftSinceRef.current;
|
||||
const elapsed = Date.now() - since;
|
||||
const remaining = Math.max(0, MIN_SPINNER_MS - elapsed);
|
||||
if (remaining > 0 && since > 0) {
|
||||
updateState({ draftGraph: draft });
|
||||
designingDraftTimerRef.current = setTimeout(() => {
|
||||
updateState({ designingDraft: false });
|
||||
}, remaining);
|
||||
} else {
|
||||
updateState({ draftGraph: draft, designingDraft: false });
|
||||
}
|
||||
}
|
||||
} else if (customEvent?.event === "flowchart_updated") {
|
||||
const mapData = customEvent as {
|
||||
map?: Record<string, string[]>;
|
||||
original_draft?: DraftGraphData;
|
||||
};
|
||||
if (mapData) {
|
||||
updateState({
|
||||
flowchartMap: mapData.map ?? null,
|
||||
originalDraft: mapData.original_draft ?? null,
|
||||
draftGraph: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "worker_colony_loaded": {
|
||||
const graphName = event.data?.colony_name as string | undefined;
|
||||
const agentPathFromEvent = event.data?.agent_path as string | undefined;
|
||||
const dn = formatAgentDisplayName(graphName || agentSlug(agentPath));
|
||||
clearCredentialCache(agentPathFromEvent);
|
||||
updateState({
|
||||
displayName: dn,
|
||||
queenBuilding: false,
|
||||
workerRunState: "idle",
|
||||
colonyId: null,
|
||||
nodeSpecs: [],
|
||||
});
|
||||
updateState({ displayName: dn });
|
||||
setGraphNodes([]);
|
||||
// Remove old worker messages
|
||||
setMessages((prev) => prev.filter((m) => m.role !== "worker"));
|
||||
if (state.sessionId) fetchGraph(state.sessionId);
|
||||
break;
|
||||
}
|
||||
|
||||
case "trigger_available":
|
||||
case "trigger_activated": {
|
||||
// Available = defined in triggers.json but not running yet.
|
||||
// Activated = running (just activated or restored after server
|
||||
// restart). Both get surfaced as cards in the TriggersPanel; the
|
||||
// only difference is the status.
|
||||
const isActive = event.type === "trigger_activated";
|
||||
const triggerId = event.data?.trigger_id as string;
|
||||
if (triggerId) {
|
||||
const nodeId = `__trigger_${triggerId}`;
|
||||
setGraphNodes((prev) => {
|
||||
const exists = prev.some((n) => n.id === nodeId);
|
||||
if (exists) {
|
||||
return prev.map((n) =>
|
||||
n.id === nodeId ? { ...n, status: "running" as NodeStatus } : n,
|
||||
);
|
||||
// Upgrade an existing inactive card to active without
|
||||
// clobbering the trigger_config fields the activated event
|
||||
// may carry (e.g. next_fire_in).
|
||||
return prev.map((n) => {
|
||||
if (n.id !== nodeId) return n;
|
||||
const incomingConfig =
|
||||
(event.data?.trigger_config as Record<string, unknown>) || undefined;
|
||||
return {
|
||||
...n,
|
||||
status: (isActive ? "running" : "pending") as NodeStatus,
|
||||
...(incomingConfig ? { triggerConfig: incomingConfig } : {}),
|
||||
};
|
||||
});
|
||||
}
|
||||
const triggerType = (event.data?.trigger_type as string) || "timer";
|
||||
const triggerConfig = (event.data?.trigger_config as Record<string, unknown>) || {};
|
||||
@@ -1113,7 +929,7 @@ export default function ColonyChat() {
|
||||
const newNode: GraphNode = {
|
||||
id: nodeId,
|
||||
label: computedLabel,
|
||||
status: "running",
|
||||
status: isActive ? "running" : "pending",
|
||||
nodeType: "trigger",
|
||||
triggerType,
|
||||
triggerConfig,
|
||||
@@ -1165,7 +981,7 @@ export default function ColonyChat() {
|
||||
break;
|
||||
}
|
||||
},
|
||||
[agentPath, queenInfo.name, updateState, upsertMessage, updateGraphNodeStatus, markAllNodesAs, appendNodeLog, fetchGraph, graphNodes],
|
||||
[agentPath, queenInfo.name, updateState, upsertMessage, updateGraphNodeStatus, markAllNodesAs, appendNodeLog, graphNodes],
|
||||
);
|
||||
|
||||
// ── SSE subscription ───────────────────────────────────────────────────
|
||||
@@ -1181,51 +997,11 @@ export default function ColonyChat() {
|
||||
|
||||
// ── Action handlers ────────────────────────────────────────────────────
|
||||
|
||||
const handleRun = useCallback(async () => {
|
||||
if (!agentState.sessionId || !agentState.ready) return;
|
||||
setDismissedBanner(null);
|
||||
try {
|
||||
updateState({ workerRunState: "deploying" });
|
||||
const result = await executionApi.trigger(agentState.sessionId, "default", {});
|
||||
updateState({ currentExecutionId: result.execution_id });
|
||||
} catch (err) {
|
||||
if (err instanceof ApiError && err.status === 424) {
|
||||
const errBody = (err as ApiError).body as Record<string, unknown>;
|
||||
const credPath = (errBody?.agent_path as string) || null;
|
||||
if (credPath) setCredentialAgentPath(credPath);
|
||||
updateState({ workerRunState: "idle", error: "credentials_required" });
|
||||
setCredentialsOpen(true);
|
||||
return;
|
||||
}
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
upsertMessage({
|
||||
id: makeId(),
|
||||
agent: "System",
|
||||
agentColor: "",
|
||||
content: `Failed to trigger run: ${errMsg}`,
|
||||
timestamp: "",
|
||||
type: "system",
|
||||
thread: agentPath,
|
||||
createdAt: Date.now(),
|
||||
});
|
||||
updateState({ workerRunState: "idle" });
|
||||
}
|
||||
}, [agentState.sessionId, agentState.ready, agentPath, updateState, upsertMessage]);
|
||||
|
||||
const handlePause = useCallback(async () => {
|
||||
if (!agentState.sessionId || !agentState.currentExecutionId) return;
|
||||
try {
|
||||
await executionApi.pause(agentState.sessionId, agentState.currentExecutionId);
|
||||
} catch {
|
||||
// fire-and-forget
|
||||
}
|
||||
}, [agentState.sessionId, agentState.currentExecutionId]);
|
||||
|
||||
const handleCancelQueen = useCallback(async () => {
|
||||
if (!agentState.sessionId) return;
|
||||
try {
|
||||
await executionApi.cancelQueen(agentState.sessionId);
|
||||
updateState({ isTyping: false, isStreaming: false, queenIsTyping: false });
|
||||
updateState({ isStreaming: false, queenIsTyping: false });
|
||||
} catch {
|
||||
// fire-and-forget
|
||||
}
|
||||
@@ -1255,7 +1031,7 @@ export default function ColonyChat() {
|
||||
};
|
||||
setMessages((prev) => [...prev, userMsg]);
|
||||
suppressIntroRef.current = false;
|
||||
updateState({ isTyping: true, queenIsTyping: true });
|
||||
updateState({ queenIsTyping: true });
|
||||
|
||||
if (agentState.sessionId && agentState.ready) {
|
||||
executionApi.chat(agentState.sessionId, text, images).catch((err: unknown) => {
|
||||
@@ -1270,7 +1046,7 @@ export default function ColonyChat() {
|
||||
thread: agentPath,
|
||||
createdAt: Date.now(),
|
||||
});
|
||||
updateState({ isTyping: false, isStreaming: false, queenIsTyping: false });
|
||||
updateState({ isStreaming: false, queenIsTyping: false });
|
||||
});
|
||||
}
|
||||
},
|
||||
@@ -1324,6 +1100,11 @@ export default function ColonyChat() {
|
||||
const liveSelectedNode = selectedNode && graphNodes.find((n) => n.id === selectedNode.id);
|
||||
const resolvedSelectedNode = liveSelectedNode || selectedNode;
|
||||
|
||||
const triggers = useMemo(
|
||||
() => graphNodes.filter((n) => n.nodeType === "trigger"),
|
||||
[graphNodes],
|
||||
);
|
||||
|
||||
// ── Render ─────────────────────────────────────────────────────────────
|
||||
|
||||
if (!colony && !isNewChat && !agentState.loading) {
|
||||
@@ -1416,56 +1197,25 @@ export default function ColonyChat() {
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Pipeline graph panel */}
|
||||
<div
|
||||
className="bg-card/30 flex flex-col border-l border-border/30 relative"
|
||||
style={{ width: `${graphPanelPct}%`, minWidth: 240, flexShrink: 0 }}
|
||||
>
|
||||
<div className="flex-1 min-h-0">
|
||||
<DraftGraph
|
||||
key={colonyId}
|
||||
draft={agentState.originalDraft ?? agentState.draftGraph ?? null}
|
||||
originalDraft={agentState.originalDraft ?? null}
|
||||
loadingMessage={
|
||||
agentState.designingDraft
|
||||
? "Designing flowchart..."
|
||||
: !agentState.originalDraft &&
|
||||
!agentState.draftGraph &&
|
||||
agentState.queenPhase !== "planning"
|
||||
? "Loading flowchart..."
|
||||
: null
|
||||
{/* Triggers sidebar — only rendered when the colony actually has triggers */}
|
||||
{triggers.length > 0 && (
|
||||
<div className="w-[260px] flex-shrink-0">
|
||||
<TriggersPanel
|
||||
triggers={triggers}
|
||||
selectedId={resolvedSelectedNode?.id ?? null}
|
||||
onSelect={(trigger) =>
|
||||
setSelectedNode((prev) => (prev?.id === trigger.id ? null : trigger))
|
||||
}
|
||||
building={agentState.queenBuilding}
|
||||
onRun={handleRun}
|
||||
onPause={handlePause}
|
||||
runState={agentState.workerRunState}
|
||||
flowchartMap={agentState.flowchartMap ?? undefined}
|
||||
runtimeNodes={graphNodes}
|
||||
onRuntimeNodeClick={(runtimeNodeId) => {
|
||||
const node = graphNodes.find((n) => n.id === runtimeNodeId);
|
||||
if (node) setSelectedNode((prev) => (prev?.id === node.id ? null : node));
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{/* Resize handle */}
|
||||
<div
|
||||
className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/40 transition-colors z-10"
|
||||
onMouseDown={() => {
|
||||
resizing.current = true;
|
||||
document.body.style.cursor = "col-resize";
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Node detail panel */}
|
||||
{resolvedSelectedNode && (
|
||||
<div className="w-[480px] min-w-[400px] flex-shrink-0">
|
||||
<NodeDetailPanel
|
||||
node={resolvedSelectedNode}
|
||||
{/* Trigger detail panel */}
|
||||
{resolvedSelectedNode && resolvedSelectedNode.nodeType === "trigger" && (
|
||||
<div className="w-[380px] min-w-[320px] flex-shrink-0">
|
||||
<TriggerDetailPanel
|
||||
trigger={resolvedSelectedNode}
|
||||
sessionId={agentState.sessionId || ""}
|
||||
colonyId={agentState.colonyId || ""}
|
||||
nodeLogs={agentState.nodeLogs[resolvedSelectedNode.id] || []}
|
||||
actionPlan={agentState.nodeActionPlans[resolvedSelectedNode.id]}
|
||||
onClose={() => setSelectedNode(null)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState, useRef, useEffect } from "react";
|
||||
import { useNavigate, useLocation } from "react-router-dom";
|
||||
import { useState, useRef } from "react";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import { Loader2, Send } from "lucide-react";
|
||||
import { messagesApi } from "@/api/messages";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
@@ -13,28 +13,12 @@ const promptHints = [
|
||||
|
||||
export default function Home() {
|
||||
const navigate = useNavigate();
|
||||
const location = useLocation();
|
||||
const { userProfile, refresh } = useColony();
|
||||
const [inputValue, setInputValue] = useState("");
|
||||
const [submitting, setSubmitting] = useState(false);
|
||||
const [activePrompt, setActivePrompt] = useState<string | null>(null);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
// Pre-fill input if navigated from Prompt Library with a prompt
|
||||
useEffect(() => {
|
||||
const state = location.state as { prompt?: string } | null;
|
||||
if (state?.prompt) {
|
||||
setInputValue(state.prompt);
|
||||
// Clear the state so refreshing doesn't re-fill
|
||||
navigate(location.pathname, { replace: true });
|
||||
// Focus and resize textarea
|
||||
setTimeout(() => {
|
||||
textareaRef.current?.focus();
|
||||
textareaRef.current?.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
}, 0);
|
||||
}
|
||||
}, [location.state, location.pathname, navigate]);
|
||||
|
||||
const displayName = userProfile.displayName || "there";
|
||||
|
||||
const startQueenSession = async (text: string) => {
|
||||
@@ -58,7 +42,15 @@ export default function Home() {
|
||||
};
|
||||
|
||||
const handlePromptHint = (text: string) => {
|
||||
void startQueenSession(text);
|
||||
setInputValue(text);
|
||||
setTimeout(() => {
|
||||
const ta = textareaRef.current;
|
||||
if (!ta) return;
|
||||
ta.focus();
|
||||
ta.style.height = "auto";
|
||||
ta.style.height = `${Math.min(ta.scrollHeight, 160)}px`;
|
||||
ta.selectionStart = ta.selectionEnd = ta.value.length;
|
||||
}, 0);
|
||||
};
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
@@ -127,16 +119,7 @@ export default function Home() {
|
||||
disabled={submitting}
|
||||
className="text-xs text-muted-foreground hover:text-foreground border border-border/50 hover:border-primary/30 rounded-full px-3.5 py-1.5 transition-all hover:bg-primary/[0.03] disabled:opacity-60 disabled:cursor-not-allowed"
|
||||
>
|
||||
<span className="inline-flex items-center gap-1.5">
|
||||
{submitting && activePrompt === hint ? (
|
||||
<>
|
||||
<Loader2 className="w-3 h-3 animate-spin" />
|
||||
Connecting...
|
||||
</>
|
||||
) : (
|
||||
hint
|
||||
)}
|
||||
</span>
|
||||
{hint}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
@@ -72,7 +72,8 @@ export default function PromptLibrary() {
|
||||
|
||||
const handleUsePrompt = (content: string, category: string) => {
|
||||
const queenId = categoryToQueen[category];
|
||||
navigate(`/queen/${queenId}`, { state: { prompt: content } });
|
||||
sessionStorage.setItem(`queenFirstMessage:${queenId}`, content);
|
||||
navigate(`/queen/${queenId}?new=1`);
|
||||
};
|
||||
|
||||
return (
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
||||
import { useParams, useSearchParams, useLocation } from "react-router-dom";
|
||||
import { useParams, useSearchParams } from "react-router-dom";
|
||||
import { Loader2, Users } from "lucide-react";
|
||||
import ChatPanel, {
|
||||
type ChatMessage,
|
||||
@@ -14,28 +14,14 @@ import type { AgentEvent, HistorySession } from "@/api/types";
|
||||
import { sseEventToChatMessage } from "@/lib/chat-helpers";
|
||||
import { useColony } from "@/context/ColonyContext";
|
||||
import { useHeaderActions } from "@/context/HeaderActionsContext";
|
||||
import { getQueenForAgent } from "@/lib/colony-registry";
|
||||
import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
|
||||
|
||||
const makeId = () => Math.random().toString(36).slice(2, 9);
|
||||
|
||||
export default function QueenDM() {
|
||||
const { queenId } = useParams<{ queenId: string }>();
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
const location = useLocation();
|
||||
const { queens, queenProfiles, refresh } = useColony();
|
||||
|
||||
// Get initial prompt from route state (from Prompt Library)
|
||||
const initialPromptRef = useRef(
|
||||
(location.state as { prompt?: string } | null)?.prompt,
|
||||
);
|
||||
const promptSentRef = useRef(false);
|
||||
|
||||
// Clear location state immediately after reading to prevent re-sends on refresh
|
||||
useEffect(() => {
|
||||
if (location.state?.prompt) {
|
||||
window.history.replaceState({}, document.title);
|
||||
}
|
||||
}, [location.state]);
|
||||
const { setActions } = useHeaderActions();
|
||||
const profileQueen = queenProfiles.find((q) => q.id === queenId);
|
||||
const colonyQueen = queens.find((q) => q.id === queenId);
|
||||
@@ -66,6 +52,7 @@ export default function QueenDM() {
|
||||
);
|
||||
const [creatingNewSession, setCreatingNewSession] = useState(false);
|
||||
const [spawning, setSpawning] = useState(false);
|
||||
const [initialDraft, setInitialDraft] = useState<string | null>(null);
|
||||
const [cloneDialogOpen, setCloneDialogOpen] = useState(false);
|
||||
const [cloneColonyName, setCloneColonyName] = useState("");
|
||||
const [cloneTask, setCloneTask] = useState("");
|
||||
@@ -88,6 +75,7 @@ export default function QueenDM() {
|
||||
setAwaitingInput(false);
|
||||
setActiveToolCalls({});
|
||||
setQueenPhase("independent");
|
||||
setInitialDraft(null);
|
||||
turnCounterRef.current = 0;
|
||||
queenIterTextRef.current = {};
|
||||
}, []);
|
||||
@@ -125,7 +113,7 @@ export default function QueenDM() {
|
||||
let cancelled = false;
|
||||
const isBootstrap = newSessionFlag === "1";
|
||||
// Consume the pending first message up-front so this bootstrap is one-shot:
|
||||
// a re-run after URL rewrite or a browser refresh won't re-send it.
|
||||
// a re-run after URL rewrite or a browser refresh won't re-fill the composer.
|
||||
const pendingFirstMessage = isBootstrap
|
||||
? sessionStorage.getItem(`queenFirstMessage:${queenId}`)
|
||||
: null;
|
||||
@@ -136,9 +124,8 @@ export default function QueenDM() {
|
||||
(async () => {
|
||||
try {
|
||||
if (isBootstrap) {
|
||||
// Pass the home-screen prompt as initial_prompt so the server
|
||||
// seeds the first turn with the real user message — not a phantom
|
||||
// "Hello" fallback. One atomic call, no separate chat() race.
|
||||
// Pass the pending message as initial_prompt so the queen
|
||||
// processes it immediately (no phantom "Hello" greeting).
|
||||
await queensApi.createNewSession(
|
||||
queenId,
|
||||
pendingFirstMessage ?? undefined,
|
||||
@@ -195,32 +182,35 @@ export default function QueenDM() {
|
||||
sid = result.session_id;
|
||||
setSessionId(sid);
|
||||
setQueenReady(true);
|
||||
setIsTyping(true);
|
||||
|
||||
if (isBootstrap) {
|
||||
// Optimistic render — the server already has the prompt as
|
||||
// initial_prompt, so the queen is processing it. We just paint
|
||||
// the user's bubble immediately instead of waiting for the
|
||||
// event stream echo.
|
||||
if (pendingFirstMessage && pendingFirstMessage.trim()) {
|
||||
const optimisticUserMsg: ChatMessage = {
|
||||
id: makeId(),
|
||||
agent: "You",
|
||||
agentColor: "",
|
||||
content: pendingFirstMessage,
|
||||
timestamp: "",
|
||||
type: "user",
|
||||
thread: "queen-dm",
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
setMessages((prev) => [...prev, optimisticUserMsg]);
|
||||
if (isBootstrap) {
|
||||
// Swap ?new=1 for ?session={sid} so a browser refresh rehydrates
|
||||
// this session instead of creating another new one.
|
||||
setSearchParams({ session: sid }, { replace: true });
|
||||
|
||||
// Message was passed as initial_prompt so the queen is already
|
||||
// processing it. Show the user bubble and typing indicator.
|
||||
if (pendingFirstMessage && !cancelled) {
|
||||
const userMsg: ChatMessage = {
|
||||
id: makeId(),
|
||||
agent: "You",
|
||||
agentColor: "",
|
||||
content: pendingFirstMessage,
|
||||
timestamp: "",
|
||||
type: "user",
|
||||
thread: "queen-dm",
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
setMessages((prev) => [...prev, userMsg]);
|
||||
setIsTyping(true);
|
||||
}
|
||||
} else {
|
||||
setIsTyping(true);
|
||||
}
|
||||
|
||||
if (!isBootstrap && selectedSessionParam && selectedSessionParam !== sid) {
|
||||
setSearchParams({ session: sid }, { replace: true });
|
||||
}
|
||||
// Swap ?new=1 for ?session={sid} so a browser refresh rehydrates
|
||||
// this session instead of creating another new one.
|
||||
setSearchParams({ session: sid }, { replace: true });
|
||||
} else if (selectedSessionParam && selectedSessionParam !== sid) {
|
||||
setSearchParams({ session: sid }, { replace: true });
|
||||
}
|
||||
}
|
||||
|
||||
await restoreMessages(sid, () => cancelled);
|
||||
@@ -521,6 +511,10 @@ export default function QueenDM() {
|
||||
const isNew = (event.data?.is_new as boolean) ?? true;
|
||||
const skillName = (event.data?.skill_name as string) || "";
|
||||
if (!colonyName) break;
|
||||
// ColonyContext keys colonies by slugToColonyId(slug), not by the
|
||||
// raw snake_case directory name. Apply the same transform so the
|
||||
// /colony/:colonyId route lookup in colony-chat.tsx resolves.
|
||||
const routeId = slugToColonyId(colonyName);
|
||||
const msg: ChatMessage = {
|
||||
id: makeId(),
|
||||
agent: "System",
|
||||
@@ -530,7 +524,7 @@ export default function QueenDM() {
|
||||
colony_name: colonyName,
|
||||
is_new: isNew,
|
||||
skill_name: skillName,
|
||||
href: `/colony/${colonyName}`,
|
||||
href: `/colony/${routeId}`,
|
||||
}),
|
||||
timestamp: "",
|
||||
type: "colony_link",
|
||||
@@ -710,25 +704,6 @@ export default function QueenDM() {
|
||||
}
|
||||
}, [sessionId]);
|
||||
|
||||
// Auto-send initial prompt from Prompt Library when session is ready
|
||||
useEffect(() => {
|
||||
const prompt = initialPromptRef.current;
|
||||
if (
|
||||
prompt &&
|
||||
sessionId &&
|
||||
queenReady &&
|
||||
!promptSentRef.current &&
|
||||
!loading
|
||||
) {
|
||||
promptSentRef.current = true;
|
||||
initialPromptRef.current = undefined; // Clear so refresh doesn't re-send
|
||||
// Small delay to ensure SSE is connected
|
||||
setTimeout(() => {
|
||||
handleSend(prompt, "queen-dm");
|
||||
}, 100);
|
||||
}
|
||||
}, [sessionId, queenReady, loading, handleSend]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full">
|
||||
{/* Chat */}
|
||||
@@ -767,6 +742,7 @@ export default function QueenDM() {
|
||||
setPendingOptions(null);
|
||||
}}
|
||||
supportsImages={true}
|
||||
initialDraft={initialDraft}
|
||||
/>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -829,20 +829,24 @@ class TestCrashRecovery:
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"Test expects single-turn completion semantics that were "
|
||||
"deliberately removed when the queen became a forever-alive "
|
||||
"conversational node. A text-only turn now auto-blocks for "
|
||||
"user input instead of being accepted by the implicit judge. "
|
||||
"The underlying 'restore preserves legacy store' behavior is "
|
||||
"still covered by other TestCrashRecovery tests; rewriting "
|
||||
"this one needs an LLM scenario that emits a tool call so "
|
||||
"the loop doesn't hit auto-block."
|
||||
"Restore path for legacy unphased stores is not writing "
|
||||
"messages into the LLM call — separate pre-existing bug. "
|
||||
"The queen's forever-alive semantics (skip_judge=True) are "
|
||||
"tested via test_session_manager_worker_handoff and the "
|
||||
"live manual flow. Unskip once the legacy restore is fixed."
|
||||
)
|
||||
)
|
||||
async def test_restore_legacy_unphased_assistant_message_preserves_store(
|
||||
self, tmp_path, runtime, buffer
|
||||
):
|
||||
"""Legacy queen stores without phase_id should resume instead of being cleared."""
|
||||
"""Legacy queen stores without phase_id should resume instead of being cleared.
|
||||
|
||||
The queen node uses skip_judge=True (forever-alive conversational
|
||||
semantics), so a text-only turn auto-blocks on user input. We
|
||||
pre-signal shutdown right after the LLM call so the loop exits
|
||||
cleanly while still verifying the restore path injected the
|
||||
stored messages into the conversation.
|
||||
"""
|
||||
store = FileConversationStore(tmp_path / "conv")
|
||||
await store.write_meta(
|
||||
{
|
||||
@@ -868,6 +872,7 @@ class TestCrashRecovery:
|
||||
description="interactive queen",
|
||||
node_type="event_loop",
|
||||
output_keys=[],
|
||||
skip_judge=True,
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[text_scenario("Recovered after restore.")])
|
||||
node = EventLoopNode(
|
||||
@@ -876,7 +881,24 @@ class TestCrashRecovery:
|
||||
)
|
||||
ctx = build_ctx(runtime, spec, buffer, llm, stream_id="queen")
|
||||
|
||||
result = await node.execute(ctx)
|
||||
# Pre-signal shutdown once the LLM call has landed so the
|
||||
# auto-block wait returns False and the loop exits cleanly
|
||||
# with success=True instead of hanging on _input_ready.
|
||||
import asyncio as _aio
|
||||
|
||||
async def _shutdown_after_first_turn():
|
||||
for _ in range(50):
|
||||
await _aio.sleep(0.01)
|
||||
if llm.stream_calls:
|
||||
break
|
||||
node.signal_shutdown()
|
||||
|
||||
_sd_task = _aio.create_task(_shutdown_after_first_turn())
|
||||
try:
|
||||
result = await node.execute(ctx)
|
||||
finally:
|
||||
if not _sd_task.done():
|
||||
_sd_task.cancel()
|
||||
|
||||
assert result.success is True
|
||||
assert len(llm.stream_calls) == 1
|
||||
@@ -1815,3 +1837,299 @@ class TestSubagentAccumulatorMemory:
|
||||
# Should return None (not raise PermissionError)
|
||||
assert scoped.read("tweet_content") is None
|
||||
assert scoped.read("user_request") == "hi"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool concurrency partitioning (Gap 5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _multi_tool_scenario(*calls: tuple[str, dict, str]) -> list:
|
||||
"""Build a stream scenario that emits multiple tool calls in one turn.
|
||||
|
||||
Each ``calls`` entry is ``(tool_name, tool_input, tool_use_id)``.
|
||||
"""
|
||||
events: list = []
|
||||
for name, inp, uid in calls:
|
||||
events.append(
|
||||
ToolCallEvent(tool_use_id=uid, tool_name=name, tool_input=inp)
|
||||
)
|
||||
events.append(
|
||||
FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock")
|
||||
)
|
||||
return events
|
||||
|
||||
|
||||
class TestToolConcurrencyPartition:
|
||||
"""Gap 5: safe tools run in parallel, unsafe tools serialize after them."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_tools_overlap_unsafe_tools_do_not(
|
||||
self, runtime, node_spec, buffer
|
||||
):
|
||||
"""A turn with (safe, safe, unsafe) schedules safes in parallel and
|
||||
runs unsafe strictly after both safes have started."""
|
||||
scenario = _multi_tool_scenario(
|
||||
("read_file", {"path": "/a"}, "call_1"),
|
||||
("read_file", {"path": "/b"}, "call_2"),
|
||||
("execute_command", {"command": "echo hi"}, "call_3"),
|
||||
)
|
||||
# Second turn emits plain text so the loop terminates.
|
||||
llm = MockStreamingLLM(scenarios=[scenario, text_scenario("done")])
|
||||
node_spec.output_keys = []
|
||||
|
||||
start_events: list[tuple[str, float]] = []
|
||||
end_events: list[tuple[str, float]] = []
|
||||
|
||||
async def tool_exec(tool_use: ToolUse) -> ToolResult:
|
||||
start_events.append((tool_use.id, asyncio.get_event_loop().time()))
|
||||
# The two safes sleep long enough that a serial scheduler
|
||||
# would show them end-before-start, but a parallel scheduler
|
||||
# overlaps them. execute_command also sleeps so we can prove
|
||||
# it started AFTER both safes started.
|
||||
await asyncio.sleep(0.05)
|
||||
end_events.append((tool_use.id, asyncio.get_event_loop().time()))
|
||||
return ToolResult(tool_use_id=tool_use.id, content="ok", is_error=False)
|
||||
|
||||
tools = [
|
||||
Tool(
|
||||
name="read_file",
|
||||
description="",
|
||||
parameters={},
|
||||
concurrency_safe=True,
|
||||
),
|
||||
Tool(
|
||||
name="execute_command",
|
||||
description="",
|
||||
parameters={},
|
||||
concurrency_safe=False,
|
||||
),
|
||||
]
|
||||
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
buffer,
|
||||
llm,
|
||||
tools=tools,
|
||||
is_subagent_mode=True,
|
||||
)
|
||||
node = EventLoopNode(
|
||||
tool_executor=tool_exec,
|
||||
config=LoopConfig(max_iterations=3),
|
||||
)
|
||||
await node.execute(ctx)
|
||||
|
||||
# Build lookup dicts for readability.
|
||||
starts = dict(start_events)
|
||||
ends = dict(end_events)
|
||||
|
||||
# Both safe reads must start (approximately) together and before
|
||||
# either has finished - proving they ran concurrently.
|
||||
assert starts["call_1"] < ends["call_2"]
|
||||
assert starts["call_2"] < ends["call_1"]
|
||||
|
||||
# The unsafe tool must start strictly AFTER both safes have ended -
|
||||
# proving it was serialized after the parallel batch.
|
||||
assert starts["call_3"] >= ends["call_1"]
|
||||
assert starts["call_3"] >= ends["call_2"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_serial_exception_cascades_cancel_siblings(
|
||||
self, runtime, node_spec, buffer
|
||||
):
|
||||
"""When an unsafe tool raises, the remaining unsafe siblings are
|
||||
cancelled with a clear error rather than silently executed."""
|
||||
scenario = _multi_tool_scenario(
|
||||
("execute_command", {"command": "boom"}, "call_1"),
|
||||
("execute_command", {"command": "echo survivor"}, "call_2"),
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[scenario, text_scenario("done")])
|
||||
node_spec.output_keys = []
|
||||
|
||||
executed: list[str] = []
|
||||
|
||||
async def tool_exec(tool_use: ToolUse) -> ToolResult:
|
||||
executed.append(tool_use.id)
|
||||
if tool_use.id == "call_1":
|
||||
raise RuntimeError("first tool exploded")
|
||||
return ToolResult(tool_use_id=tool_use.id, content="ok", is_error=False)
|
||||
|
||||
tools = [
|
||||
Tool(
|
||||
name="execute_command",
|
||||
description="",
|
||||
parameters={},
|
||||
concurrency_safe=False,
|
||||
),
|
||||
]
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
buffer,
|
||||
llm,
|
||||
tools=tools,
|
||||
is_subagent_mode=True,
|
||||
)
|
||||
node = EventLoopNode(
|
||||
tool_executor=tool_exec,
|
||||
config=LoopConfig(max_iterations=3),
|
||||
)
|
||||
await node.execute(ctx)
|
||||
|
||||
# First tool ran (and raised); second tool must NOT have run.
|
||||
assert executed == ["call_1"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_tool_starts_before_finish_event(
|
||||
self, runtime, node_spec, buffer
|
||||
):
|
||||
"""Gap 1: a concurrency-safe tool must start executing while the
|
||||
stream is still in flight, not after the final FinishEvent.
|
||||
|
||||
Builds a custom LLM that sleeps between the ToolCallEvent and
|
||||
the FinishEvent. A well-behaved harness starts the tool as soon
|
||||
as the ToolCallEvent arrives, so by the time FinishEvent lands
|
||||
the tool has already been running for ~sleep_seconds.
|
||||
"""
|
||||
from framework.llm.stream_events import FinishEvent, ToolCallEvent
|
||||
|
||||
delay = 0.25
|
||||
|
||||
class SlowStreamLLM(LLMProvider):
|
||||
def __init__(self):
|
||||
self._calls = 0
|
||||
|
||||
async def stream(self, messages, system="", tools=None, max_tokens=4096):
|
||||
self._calls += 1
|
||||
if self._calls == 1:
|
||||
# Emit the tool call, stall, then finish.
|
||||
yield ToolCallEvent(
|
||||
tool_use_id="call_1",
|
||||
tool_name="read_file",
|
||||
tool_input={"path": "/a"},
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
yield FinishEvent(
|
||||
stop_reason="tool_calls",
|
||||
input_tokens=10,
|
||||
output_tokens=5,
|
||||
model="mock",
|
||||
)
|
||||
else:
|
||||
# Turn 2 needs to match text_scenario shape so the
|
||||
# outer loop terminates cleanly (needs a text delta
|
||||
# before the finish event; empty turns are treated
|
||||
# as worker silence and fall into the escalation
|
||||
# grace window).
|
||||
yield TextDeltaEvent(content="done", snapshot="done")
|
||||
yield FinishEvent(
|
||||
stop_reason="stop",
|
||||
input_tokens=1,
|
||||
output_tokens=1,
|
||||
model="mock",
|
||||
)
|
||||
|
||||
def complete(self, messages, system="", **kwargs) -> LLMResponse:
|
||||
return LLMResponse(content="", model="mock", stop_reason="stop")
|
||||
|
||||
tool_started_at: list[float] = []
|
||||
tool_finished_at: list[float] = []
|
||||
|
||||
async def tool_exec(tool_use: ToolUse) -> ToolResult:
|
||||
tool_started_at.append(asyncio.get_event_loop().time())
|
||||
# Short simulated work so the tool finishes before the stream
|
||||
# does; this proves the tool was running concurrently with
|
||||
# the sleep inside the LLM stream.
|
||||
await asyncio.sleep(0.05)
|
||||
tool_finished_at.append(asyncio.get_event_loop().time())
|
||||
return ToolResult(tool_use_id=tool_use.id, content="ok", is_error=False)
|
||||
|
||||
tools = [
|
||||
Tool(
|
||||
name="read_file",
|
||||
description="",
|
||||
parameters={},
|
||||
concurrency_safe=True,
|
||||
),
|
||||
]
|
||||
node_spec.output_keys = []
|
||||
llm = SlowStreamLLM()
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
buffer,
|
||||
llm,
|
||||
tools=tools,
|
||||
is_subagent_mode=True,
|
||||
)
|
||||
node = EventLoopNode(
|
||||
tool_executor=tool_exec,
|
||||
config=LoopConfig(max_iterations=3),
|
||||
)
|
||||
turn_started = asyncio.get_event_loop().time()
|
||||
await node.execute(ctx)
|
||||
turn_ended = asyncio.get_event_loop().time()
|
||||
|
||||
assert tool_started_at, "tool never ran"
|
||||
# The tool must have STARTED within the LLM's sleep window -
|
||||
# i.e. before turn_started + delay, not after. A post-stream
|
||||
# dispatcher would start the tool at turn_started + delay or
|
||||
# later.
|
||||
assert tool_started_at[0] < turn_started + delay, (
|
||||
f"tool started at +{tool_started_at[0] - turn_started:.3f}s, "
|
||||
f"but the stream sleep was {delay}s - the harness is still "
|
||||
f"waiting for FinishEvent before dispatching."
|
||||
)
|
||||
# Sanity: the whole turn took at least the sleep window (the
|
||||
# stream had to drain before dispatch).
|
||||
assert turn_ended - turn_started >= delay
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_soft_error_does_not_cascade(
|
||||
self, runtime, node_spec, buffer
|
||||
):
|
||||
"""A ToolResult with is_error=True (e.g. 'file not found') is a
|
||||
normal return and must NOT cancel subsequent serial siblings - the
|
||||
model needs to see all tool errors to decide what to do next."""
|
||||
scenario = _multi_tool_scenario(
|
||||
("execute_command", {"command": "false"}, "call_1"),
|
||||
("execute_command", {"command": "echo two"}, "call_2"),
|
||||
)
|
||||
llm = MockStreamingLLM(scenarios=[scenario, text_scenario("done")])
|
||||
node_spec.output_keys = []
|
||||
|
||||
executed: list[str] = []
|
||||
|
||||
async def tool_exec(tool_use: ToolUse) -> ToolResult:
|
||||
executed.append(tool_use.id)
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
content="soft error" if tool_use.id == "call_1" else "ok",
|
||||
is_error=(tool_use.id == "call_1"),
|
||||
)
|
||||
|
||||
tools = [
|
||||
Tool(
|
||||
name="execute_command",
|
||||
description="",
|
||||
parameters={},
|
||||
concurrency_safe=False,
|
||||
),
|
||||
]
|
||||
ctx = build_ctx(
|
||||
runtime,
|
||||
node_spec,
|
||||
buffer,
|
||||
llm,
|
||||
tools=tools,
|
||||
is_subagent_mode=True,
|
||||
)
|
||||
node = EventLoopNode(
|
||||
tool_executor=tool_exec,
|
||||
config=LoopConfig(max_iterations=3),
|
||||
)
|
||||
await node.execute(ctx)
|
||||
|
||||
# Both tools must have run: soft errors don't cascade.
|
||||
assert executed == ["call_1", "call_2"]
|
||||
|
||||
@@ -243,6 +243,61 @@ class TestNodeConversation:
|
||||
await conv.add_user_message("x" * 320)
|
||||
assert conv.needs_compaction() is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_needs_compaction_uses_buffer_when_set(self):
|
||||
"""Gap 7: a compaction_buffer_tokens overrides the multiplicative
|
||||
threshold - compaction triggers when estimate + buffer would
|
||||
cross the hard context limit, not at a fractional threshold."""
|
||||
conv = NodeConversation(
|
||||
max_context_tokens=1000,
|
||||
compaction_threshold=0.9, # would normally trigger at 900
|
||||
compaction_buffer_tokens=300, # buffer wants 700 hard cap
|
||||
)
|
||||
# 650 tokens is below the 700 budget - no compaction yet.
|
||||
conv.update_token_count(650)
|
||||
assert conv.needs_compaction() is False
|
||||
# 700+ crosses the budget - compaction fires BEFORE reaching
|
||||
# the hard 1000 limit, so the next turn's input has headroom.
|
||||
conv.update_token_count(700)
|
||||
assert conv.needs_compaction() is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compaction_warning_fires_before_hard_trigger(self):
|
||||
"""Gap 7: the warning threshold is meant to surface early signal
|
||||
to telemetry without actually triggering compaction."""
|
||||
conv = NodeConversation(
|
||||
max_context_tokens=1000,
|
||||
compaction_buffer_tokens=200,
|
||||
compaction_warning_buffer_tokens=400,
|
||||
)
|
||||
conv.update_token_count(500)
|
||||
assert conv.compaction_warning() is False
|
||||
assert conv.needs_compaction() is False
|
||||
|
||||
# Cross 600 tokens: warning fires (1000 - 400) but compaction
|
||||
# doesn't yet (1000 - 200 = 800 budget).
|
||||
conv.update_token_count(650)
|
||||
assert conv.compaction_warning() is True
|
||||
assert conv.needs_compaction() is False
|
||||
|
||||
# Cross 800: both fire.
|
||||
conv.update_token_count(820)
|
||||
assert conv.compaction_warning() is True
|
||||
assert conv.needs_compaction() is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_threshold_rule_still_works_without_buffer(self):
|
||||
"""Without compaction_buffer_tokens, the old multiplicative rule
|
||||
applies so existing callers keep behaving identically."""
|
||||
conv = NodeConversation(
|
||||
max_context_tokens=1000,
|
||||
compaction_threshold=0.75,
|
||||
)
|
||||
conv.update_token_count(700)
|
||||
assert conv.needs_compaction() is False
|
||||
conv.update_token_count(800)
|
||||
assert conv.needs_compaction() is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compact_replaces_with_summary(self):
|
||||
"""keep_recent=0 replaces all messages; empty conversation is a no-op."""
|
||||
|
||||
@@ -753,8 +753,8 @@ def test_build_system_prompt_injects_dynamic_memory():
|
||||
|
||||
def test_queen_phase_state_appends_global_memory_block():
|
||||
phase = QueenPhaseState(
|
||||
phase="building",
|
||||
prompt_building="base prompt",
|
||||
phase="independent",
|
||||
prompt_independent="base prompt",
|
||||
_cached_global_recall_block="--- Global Memories ---\nglobal stuff",
|
||||
)
|
||||
|
||||
@@ -766,7 +766,7 @@ def test_queen_phase_state_appends_global_memory_block():
|
||||
|
||||
def test_queen_phase_state_appends_queen_memory_block():
|
||||
phase = QueenPhaseState(
|
||||
prompt_building="base prompt",
|
||||
prompt_independent="base prompt",
|
||||
_cached_global_recall_block="--- Global Memories ---\nglobal stuff",
|
||||
_cached_queen_recall_block="--- Queen Memories: queen_technology ---\nqueen stuff",
|
||||
)
|
||||
@@ -779,7 +779,7 @@ def test_queen_phase_state_appends_queen_memory_block():
|
||||
|
||||
|
||||
def test_queen_phase_state_prompt_without_memory():
|
||||
phase = QueenPhaseState(phase="building", prompt_building="base prompt")
|
||||
phase = QueenPhaseState(phase="independent", prompt_independent="base prompt")
|
||||
|
||||
prompt = phase.get_current_prompt()
|
||||
assert "base prompt" in prompt
|
||||
|
||||
@@ -797,3 +797,60 @@ def test_resync_returns_false_when_credentials_unchanged(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(registry, "_snapshot_credentials", lambda: set())
|
||||
|
||||
assert registry.resync_mcp_servers_if_needed() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Concurrency-safe flag propagation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_mcp_tool_conversion_marks_known_safe_tools():
|
||||
"""MCP tools whose names are in CONCURRENCY_SAFE_TOOLS become concurrency_safe."""
|
||||
from framework.loader.mcp_client import MCPTool
|
||||
registry = ToolRegistry()
|
||||
|
||||
safe_mcp = MCPTool(
|
||||
name="read_file",
|
||||
description="",
|
||||
input_schema={"type": "object", "properties": {}, "required": []},
|
||||
server_name="stub",
|
||||
)
|
||||
unsafe_mcp = MCPTool(
|
||||
name="execute_command",
|
||||
description="",
|
||||
input_schema={"type": "object", "properties": {}, "required": []},
|
||||
server_name="stub",
|
||||
)
|
||||
|
||||
safe_tool = registry._convert_mcp_tool_to_framework_tool(safe_mcp) # noqa: SLF001
|
||||
unsafe_tool = registry._convert_mcp_tool_to_framework_tool(unsafe_mcp) # noqa: SLF001
|
||||
|
||||
assert safe_tool.concurrency_safe is True
|
||||
assert unsafe_tool.concurrency_safe is False
|
||||
|
||||
|
||||
def test_concurrency_safe_allowlist_is_conservative():
|
||||
"""Every listed name must denote a read-only operation.
|
||||
|
||||
This test is a guard against someone casually adding a write-capable
|
||||
tool to the allowlist. If a new name is added here, justify it in the
|
||||
comment above the set in tool_registry.py.
|
||||
"""
|
||||
from framework.loader.tool_registry import ToolRegistry
|
||||
|
||||
allowlist = ToolRegistry.CONCURRENCY_SAFE_TOOLS
|
||||
|
||||
# Positive assertions: known-safe read operations are present.
|
||||
for name in ("read_file", "grep", "glob", "list_directory", "web_search"):
|
||||
assert name in allowlist, f"{name} should be concurrency-safe"
|
||||
|
||||
# Negative assertions: nothing that mutates state is allowed in.
|
||||
for forbidden in (
|
||||
"execute_command",
|
||||
"write_file",
|
||||
"hashline_edit",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_navigate",
|
||||
):
|
||||
assert forbidden not in allowlist, f"{forbidden} must not be concurrency-safe"
|
||||
|
||||
+20
-66
@@ -1851,76 +1851,34 @@ fi
|
||||
echo ""
|
||||
|
||||
# ============================================================
|
||||
# Step 4b: Load browser extension into Chrome (one-time setup)
|
||||
# Step 4b: Install browser extension from Chrome Web Store
|
||||
# ============================================================
|
||||
|
||||
echo -e "${YELLOW}⬢${NC} ${BLUE}${BOLD}Setting up browser extension...${NC}"
|
||||
echo -e "${YELLOW}⬢${NC} ${BLUE}${BOLD}Installing browser extension...${NC}"
|
||||
echo ""
|
||||
|
||||
EXTENSION_PATH="$SCRIPT_DIR/tools/browser-extension"
|
||||
CHROME_BIN=""
|
||||
CHROME_LAUNCHED=false
|
||||
EXTENSION_URL="https://chromewebstore.google.com/detail/hive-browser-bridge/jkpcegnbfimimjodblcemoheedidnppm"
|
||||
EXTENSION_INSTALLED=false
|
||||
|
||||
# Find Chrome binary
|
||||
for _bin in "google-chrome" "google-chrome-stable" "chromium" "chromium-browser" "microsoft-edge" "microsoft-edge-stable"; do
|
||||
if command -v "$_bin" &> /dev/null; then
|
||||
CHROME_BIN="$_bin"
|
||||
break
|
||||
fi
|
||||
done
|
||||
# macOS
|
||||
if [ -z "$CHROME_BIN" ]; then
|
||||
for _path in \
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
|
||||
"$HOME/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"; do
|
||||
if [ -e "$_path" ]; then
|
||||
CHROME_BIN="$_path"
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
echo -e " Install ${BOLD}Hive Browser Bridge${NC} from the Chrome Web Store, then click ${BOLD}Add to Chrome${NC}."
|
||||
echo -e " ${DIM}${EXTENSION_URL}${NC}"
|
||||
echo ""
|
||||
read -r -p " Press Enter to open the Web Store... " _dummy || true
|
||||
|
||||
if [ ! -d "$EXTENSION_PATH" ]; then
|
||||
echo -e "${YELLOW} Extension not found at $EXTENSION_PATH — skipping${NC}"
|
||||
elif [ -z "$CHROME_BIN" ]; then
|
||||
echo -e "${YELLOW} Chrome not found — skipping${NC}"
|
||||
echo -e "${DIM} Install Chrome, then load: $EXTENSION_PATH via chrome://extensions${NC}"
|
||||
if [[ "$OSTYPE" == darwin* ]]; then
|
||||
open "$EXTENSION_URL" 2>/dev/null
|
||||
elif command -v xdg-open &> /dev/null; then
|
||||
xdg-open "$EXTENSION_URL" > /dev/null 2>&1 &
|
||||
elif command -v wslview &> /dev/null; then
|
||||
wslview "$EXTENSION_URL" > /dev/null 2>&1 &
|
||||
else
|
||||
# Copy path to clipboard (best-effort)
|
||||
if command -v xclip &> /dev/null; then
|
||||
printf '%s' "$EXTENSION_PATH" | xclip -selection clipboard 2>/dev/null && _copied=true
|
||||
elif command -v xsel &> /dev/null; then
|
||||
printf '%s' "$EXTENSION_PATH" | xsel --clipboard --input 2>/dev/null && _copied=true
|
||||
elif command -v pbcopy &> /dev/null; then
|
||||
printf '%s' "$EXTENSION_PATH" | pbcopy 2>/dev/null && _copied=true
|
||||
fi
|
||||
|
||||
read -r -p " Press Enter when you are ready to set up the Chrome extension... " _dummy || true
|
||||
echo ""
|
||||
|
||||
# Open setup guide in default browser
|
||||
SETUP_URL="file://$SCRIPT_DIR/docs/browser-extension-setup.html?path=$(printf '%s' "$EXTENSION_PATH" | sed 's/ /%20/g')"
|
||||
echo -e " Opening browser extension setup guide..."
|
||||
if [ "${_copied:-false}" = "true" ]; then
|
||||
echo -e " ${DIM}(extension path copied to clipboard — paste it in the folder picker)${NC}"
|
||||
fi
|
||||
if [[ "$OSTYPE" == darwin* ]]; then
|
||||
open "$SETUP_URL" 2>/dev/null
|
||||
elif command -v xdg-open &> /dev/null; then
|
||||
xdg-open "$SETUP_URL" > /dev/null 2>&1 &
|
||||
elif command -v wslview &> /dev/null; then
|
||||
wslview "$SETUP_URL" > /dev/null 2>&1 &
|
||||
else
|
||||
echo -e " ${DIM}Could not open browser automatically. Visit:${NC}"
|
||||
echo -e " ${BOLD}$SETUP_URL${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
read -r -p " Press Enter once you've finished the extension setup... " _dummy || true
|
||||
CHROME_LAUNCHED=true
|
||||
echo -e " ${DIM}Could not open browser automatically — open the URL above in Chrome.${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
read -r -p " Press Enter once the extension is installed... " _dummy || true
|
||||
EXTENSION_INSTALLED=true
|
||||
|
||||
echo ""
|
||||
|
||||
# ============================================================
|
||||
@@ -1987,12 +1945,8 @@ else
|
||||
fi
|
||||
|
||||
echo -n " ⬡ browser extension... "
|
||||
if [ "$CHROME_LAUNCHED" = true ]; then
|
||||
if [ "$EXTENSION_INSTALLED" = true ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
elif [ -d "$EXTENSION_PATH" ] && [ -n "$CHROME_BIN" ]; then
|
||||
echo -e "${GREEN}ok${NC}"
|
||||
elif [ -d "$EXTENSION_PATH" ]; then
|
||||
echo -e "${YELLOW}-- (Chrome not found)${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}--${NC}"
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
---
|
||||
name: linkedin-connection-greeter
|
||||
description: Automates accepting LinkedIn connections and sending a welcome message about the HoneyComb prediction market. Handles shadow DOM and Lexical editors.
|
||||
---
|
||||
|
||||
# LinkedIn Connection Greeter
|
||||
|
||||
This skill outlines the exact flow to accept connection requests and send a specific welcome message without triggering spam filters.
|
||||
|
||||
## 1. Load Ledger
|
||||
Before starting, read `data/linkedin_contacts.json`. If it doesn't exist, initialize with `{"contacts": []}`. You will use this to skip people you've already messaged.
|
||||
|
||||
## 2. Scan Pending Connections
|
||||
Navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/`. Wait until load + sleep 4s.
|
||||
Strip unload handlers:
|
||||
`browser_evaluate("(function(){window.onbeforeunload=null;})()")`
|
||||
|
||||
Extract cards using this specific snippet (handles changing classes and follow invites):
|
||||
```javascript
|
||||
(function(){
|
||||
const btns = Array.from(document.querySelectorAll('button')).filter(b => b.textContent.includes('Accept'));
|
||||
let results = [];
|
||||
for (let b of btns) {
|
||||
let card = b.closest('[role="listitem"]');
|
||||
if (!card) continue;
|
||||
let text = card.textContent.toLowerCase();
|
||||
if (text.includes('invited you to follow') || text.includes('invited you to subscribe')) continue;
|
||||
|
||||
let nameEls = Array.from(card.querySelectorAll('a[href*="/in/"]'));
|
||||
let nameEl = nameEls.find(el => el.textContent.trim().length > 0);
|
||||
|
||||
let r = b.getBoundingClientRect();
|
||||
results.push({
|
||||
first_name: nameEl ? nameEl.textContent.trim().split(/\s+/)[0] : 'there',
|
||||
profile_url: nameEl ? nameEl.href : '',
|
||||
cx: r.x + r.width/2,
|
||||
cy: r.y + r.height/2
|
||||
});
|
||||
}
|
||||
return results;
|
||||
})();
|
||||
```
|
||||
|
||||
## 3. Process Each Card (Max 10 per run)
|
||||
For each card, check if `profile_url` is already in the ledger. If not:
|
||||
1. `browser_click_coordinate(cx, cy)` to click the specific Accept button.
|
||||
2. `sleep(2)`
|
||||
3. `browser_navigate(profile_url, wait_until="load")`
|
||||
4. `sleep(4)`
|
||||
5. `browser_evaluate("(function(){window.onbeforeunload=null; window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);})()")`
|
||||
|
||||
## 4. Message the User
|
||||
Click Message Button on their profile:
|
||||
```javascript
|
||||
(function(){
|
||||
const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
|
||||
for (const a of links){
|
||||
if (!a.href.includes('NON_SELF_PROFILE_VIEW') || a.href.includes('body=')) continue;
|
||||
const r = a.getBoundingClientRect();
|
||||
if (r.width === 0 || r.x > 700) continue;
|
||||
return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, then `sleep(2.5)`.
|
||||
|
||||
Find Textarea (it is hidden inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight, vw = window.innerWidth;
|
||||
const candidates = [];
|
||||
function walk(root){
|
||||
const els = root.querySelectorAll ? root.querySelectorAll('div.msg-form__contenteditable') : [];
|
||||
for (const el of els){
|
||||
const r = el.getBoundingClientRect();
|
||||
if (r.width > 0 && r.height > 0 && r.y >= 0 && r.y + r.height <= vh && r.x >= 0 && r.x + r.width <= vw) {
|
||||
candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
|
||||
}
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
|
||||
}
|
||||
walk(document);
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates.length ? candidates[0] : null;
|
||||
})();
|
||||
```
|
||||
Click that coordinate, `sleep(1)`.
|
||||
|
||||
Inject text and Send:
|
||||
Construct the message: `Hey {first_name}, thanks for the connection invite! I'm currently building a prediction market for jobs: https://honeycomb.open-hive.com/. If you could check it out and share some feedback, I'd really appreciate it.`
|
||||
|
||||
Escape the string properly for JS injection, then run:
|
||||
```javascript
|
||||
// Replace MSG_TEXT with your actual string
|
||||
browser_evaluate("(function(){ document.execCommand('insertText', false, `MSG_TEXT`); return true; })()")
|
||||
```
|
||||
|
||||
Find Send button (also inside shadow DOM):
|
||||
```javascript
|
||||
(function(){
|
||||
const vh = window.innerHeight;
|
||||
function walk(root){
|
||||
const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
|
||||
for (const b of btns){
|
||||
const cls = (b.className || '').toString();
|
||||
if (!cls.includes('send-button') && b.textContent.trim() !== 'Send') continue;
|
||||
const r = b.getBoundingClientRect();
|
||||
if (r.width <= 0 || r.y + r.height > vh) continue;
|
||||
return { cx: r.x + r.width/2, cy: r.y + r.height/2, disabled: b.disabled || b.getAttribute('aria-disabled') === 'true' };
|
||||
}
|
||||
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
||||
for (const host of all){ if (host.shadowRoot) { const got = walk(host.shadowRoot); if (got) return got; } }
|
||||
return null;
|
||||
}
|
||||
return walk(document);
|
||||
})();
|
||||
```
|
||||
Click send coordinate, `sleep(2)`.
|
||||
|
||||
## 5. Update Ledger
|
||||
Append the user to `data/linkedin_contacts.json`.
|
||||
```json
|
||||
{
|
||||
"profile_url": "...",
|
||||
"name": "...",
|
||||
"action": "connection_accepted+message_sent",
|
||||
"timestamp": "2026-..."
|
||||
}
|
||||
```
|
||||
`sleep(5)` before moving to the next card to mimic human pacing.
|
||||
@@ -20,7 +20,12 @@ Your existing Chrome browser
|
||||
- Each subagent → one `chrome.tabGroups` entry, colour-coded in your tab bar
|
||||
- `context.destroy` closes the group's tabs; Chrome stays alive
|
||||
|
||||
## Install (unpacked extension)
|
||||
## Install
|
||||
|
||||
Install from the Chrome Web Store:
|
||||
https://chromewebstore.google.com/detail/hive-browser-bridge/jkpcegnbfimimjodblcemoheedidnppm
|
||||
|
||||
### Developer install (unpacked)
|
||||
|
||||
1. Open `chrome://extensions`
|
||||
2. Enable **Developer mode**
|
||||
|
||||
@@ -31,6 +31,7 @@ from pathlib import Path
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from aden_tools.file_state_cache import Freshness, check_fresh, record_read
|
||||
from aden_tools.hashline import (
|
||||
HASHLINE_MAX_FILE_BYTES,
|
||||
compute_line_hash,
|
||||
@@ -377,8 +378,16 @@ def register_file_tools(
|
||||
return f"Binary file: {path} ({size:,} bytes). Cannot display binary content."
|
||||
|
||||
try:
|
||||
with open(resolved, encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
# Read raw bytes once; use them both for the line-formatted
|
||||
# return value and to hash into the file-state cache so a
|
||||
# later edit can detect external writes without a second
|
||||
# open. Hash is computed even on partial/offset reads so the
|
||||
# guard still fires when the model only read the start of a
|
||||
# large file before editing deeper into it.
|
||||
with open(resolved, "rb") as fb:
|
||||
raw_bytes = fb.read()
|
||||
content = raw_bytes.decode("utf-8", errors="replace")
|
||||
record_read(None, resolved, content_bytes=raw_bytes)
|
||||
|
||||
# Use splitlines() for consistent line splitting with hashline module
|
||||
all_lines = content.splitlines()
|
||||
@@ -434,6 +443,27 @@ def register_file_tools(
|
||||
resolved = _resolve(path)
|
||||
resolved_path = Path(resolved)
|
||||
|
||||
# Stale-edit guard: an existing file must have been read recently
|
||||
# and still match the on-disk content. Writing over a file the
|
||||
# model has never seen (or that changed since it last saw it)
|
||||
# risks clobbering the user's work. Brand-new files are allowed
|
||||
# without a prior read - there's nothing to clobber.
|
||||
if resolved_path.is_file():
|
||||
_fresh = check_fresh(None, resolved)
|
||||
if _fresh.status is Freshness.UNREAD:
|
||||
return (
|
||||
f"Refusing to overwrite '{path}': call read_file('{path}') "
|
||||
f"first so the harness can track its state before you "
|
||||
f"replace it. If you intend to discard the current "
|
||||
f"contents, read it first to acknowledge what you are "
|
||||
f"overwriting."
|
||||
)
|
||||
if _fresh.status is Freshness.STALE:
|
||||
return (
|
||||
f"Refusing to overwrite '{path}': {_fresh.detail}. "
|
||||
f"Re-read the file with read_file before writing."
|
||||
)
|
||||
|
||||
try:
|
||||
# Create parent dirs first (before git snapshot) so structure exists
|
||||
resolved_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -452,6 +482,14 @@ def register_file_tools(
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
|
||||
# Record the post-write state so a later edit in the same
|
||||
# turn doesn't trip the stale-edit guard against the file
|
||||
# this call just created or overwrote.
|
||||
try:
|
||||
record_read(None, resolved, content_bytes=content_str.encode("utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
line_count = content_str.count("\n") + (
|
||||
1 if content_str and not content_str.endswith("\n") else 0
|
||||
)
|
||||
@@ -478,6 +516,23 @@ def register_file_tools(
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
# Stale-edit guard: refuse unless a recent read is on record and
|
||||
# the file on disk still matches it. Prevents the model from
|
||||
# overwriting changes the user made in their editor between
|
||||
# calling read_file and edit_file.
|
||||
_fresh = check_fresh(None, resolved)
|
||||
if _fresh.status is Freshness.UNREAD:
|
||||
return (
|
||||
f"Refusing to edit '{path}': call read_file('{path}') "
|
||||
f"first so the harness can track its state before you "
|
||||
f"edit it."
|
||||
)
|
||||
if _fresh.status is Freshness.STALE:
|
||||
return (
|
||||
f"Refusing to edit '{path}': {_fresh.detail}. Re-read "
|
||||
f"the file with read_file before editing."
|
||||
)
|
||||
|
||||
try:
|
||||
with open(resolved, encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
@@ -532,6 +587,13 @@ def register_file_tools(
|
||||
with open(resolved, "w", encoding="utf-8") as f:
|
||||
f.write(new_content)
|
||||
|
||||
# Re-record post-write state so a second edit in the same
|
||||
# turn doesn't trip its own stale guard.
|
||||
try:
|
||||
record_read(None, resolved, content_bytes=new_content.encode("utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
diff = _compute_diff(content, new_content, path)
|
||||
match_info = f" (matched via {strategy_used})" if strategy_used != "exact" else ""
|
||||
result = f"Replaced {count} occurrence(s) in {path}{match_info}"
|
||||
@@ -771,6 +833,25 @@ def register_file_tools(
|
||||
if not os.path.isfile(resolved):
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
# Stale-edit guard: require a prior read_file that still matches
|
||||
# disk. hashline_edit already rehashes anchors, but anchor hashes
|
||||
# only protect the exact lines touched - content drift around
|
||||
# those lines (e.g. new imports the user added) would still slip
|
||||
# through silently. This guard closes that gap.
|
||||
_fresh = check_fresh(None, resolved)
|
||||
if _fresh.status is Freshness.UNREAD:
|
||||
return (
|
||||
f"Error: Refusing to edit '{path}': call read_file"
|
||||
f"('{path}', hashline=True) first so the harness can "
|
||||
f"track its state before you edit it."
|
||||
)
|
||||
if _fresh.status is Freshness.STALE:
|
||||
return (
|
||||
f"Error: Refusing to edit '{path}': {_fresh.detail}. "
|
||||
f"Re-read the file with read_file(hashline=True) before "
|
||||
f"editing."
|
||||
)
|
||||
|
||||
try:
|
||||
with open(resolved, "rb") as f:
|
||||
raw_head = f.read(8192)
|
||||
@@ -1074,6 +1155,14 @@ def register_file_tools(
|
||||
except Exception as e:
|
||||
return f"Error: Failed to write file: {e}"
|
||||
|
||||
# Refresh the file-state cache so chained edits in the same turn
|
||||
# see the new hash instead of tripping the stale guard against
|
||||
# the post-write disk state.
|
||||
try:
|
||||
record_read(None, resolved, content_bytes=joined.encode(encoding))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 10. Build response
|
||||
updated_lines = joined.splitlines()
|
||||
total_lines = len(updated_lines)
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
"""Per-agent tracking of files the model has Read, so Edit can detect
|
||||
staleness from external writes (e.g. the user saving the file in their
|
||||
editor between a Read and an Edit).
|
||||
|
||||
The cache lives in the MCP server process and is keyed on
|
||||
``(scope, absolute_path)`` where ``scope`` is the agent_id when available
|
||||
(the normal case) or ``"__global__"`` as a last-resort fallback. That
|
||||
keeps two agents running in the same MCP server process from sharing
|
||||
(or corrupting) each other's read-state view.
|
||||
|
||||
Freshness is decided by ``(size, mtime_ns, sha256)``:
|
||||
- If the file's ``size`` and ``mtime_ns`` both match the recorded values,
|
||||
we trust the read (fast path, no hashing).
|
||||
- If either differs, we hash the current content and compare to the
|
||||
recorded sha. mtime preservation by some editors means mtime alone is
|
||||
unreliable; hashing only on a mismatch keeps the happy path cheap.
|
||||
|
||||
The cache is bounded (LRU, 256 entries per scope) so a chatty agent
|
||||
cannot grow it without bound.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FileReadRecord:
|
||||
size: int
|
||||
mtime_ns: int
|
||||
sha256: str
|
||||
|
||||
|
||||
class Freshness(Enum):
|
||||
FRESH = "fresh"
|
||||
STALE = "stale"
|
||||
UNREAD = "unread"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FreshResult:
|
||||
status: Freshness
|
||||
detail: str = ""
|
||||
|
||||
|
||||
_MAX_ENTRIES_PER_SCOPE = 256
|
||||
|
||||
# scope -> ordered dict of absolute_path -> FileReadRecord.
|
||||
# Ordered so we can evict least-recently-read entries.
|
||||
_cache: dict[str, "OrderedDict[str, FileReadRecord]"] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def _scope_key(agent_id: str | None) -> str:
|
||||
return agent_id or "__global__"
|
||||
|
||||
|
||||
def _hash_bytes(data: bytes) -> str:
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def _hash_file(abs_path: str) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(abs_path, "rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def record_read(
|
||||
agent_id: str | None,
|
||||
abs_path: str,
|
||||
content_bytes: bytes | None = None,
|
||||
) -> None:
|
||||
"""Record that ``abs_path`` was just successfully read.
|
||||
|
||||
If ``content_bytes`` is provided the hash is computed from that; this
|
||||
is the fast path and avoids a second open. Otherwise we re-open the
|
||||
file to hash it. Silently ignores files that disappear between the
|
||||
read and the record (race with concurrent deletion).
|
||||
"""
|
||||
try:
|
||||
st = os.stat(abs_path)
|
||||
except OSError:
|
||||
return
|
||||
|
||||
try:
|
||||
sha = _hash_bytes(content_bytes) if content_bytes is not None else _hash_file(abs_path)
|
||||
except OSError:
|
||||
return
|
||||
|
||||
rec = FileReadRecord(size=st.st_size, mtime_ns=st.st_mtime_ns, sha256=sha)
|
||||
scope = _scope_key(agent_id)
|
||||
with _lock:
|
||||
entries = _cache.setdefault(scope, OrderedDict())
|
||||
entries[abs_path] = rec
|
||||
entries.move_to_end(abs_path)
|
||||
while len(entries) > _MAX_ENTRIES_PER_SCOPE:
|
||||
entries.popitem(last=False)
|
||||
|
||||
|
||||
def check_fresh(agent_id: str | None, abs_path: str) -> FreshResult:
|
||||
"""Check whether ``abs_path`` is safe to edit.
|
||||
|
||||
Returns FRESH if the file on disk matches the recorded read.
|
||||
Returns STALE if it was read previously but has since changed.
|
||||
Returns UNREAD if the agent has never read this path via read_file.
|
||||
"""
|
||||
scope = _scope_key(agent_id)
|
||||
with _lock:
|
||||
entries = _cache.get(scope)
|
||||
rec = entries.get(abs_path) if entries else None
|
||||
if rec is not None and entries is not None:
|
||||
entries.move_to_end(abs_path)
|
||||
|
||||
if rec is None:
|
||||
return FreshResult(Freshness.UNREAD)
|
||||
|
||||
try:
|
||||
st = os.stat(abs_path)
|
||||
except FileNotFoundError:
|
||||
return FreshResult(Freshness.STALE, "file has been deleted since it was read")
|
||||
except OSError as e:
|
||||
return FreshResult(Freshness.STALE, f"stat failed: {e}")
|
||||
|
||||
if st.st_size == rec.size and st.st_mtime_ns == rec.mtime_ns:
|
||||
return FreshResult(Freshness.FRESH)
|
||||
|
||||
# mtime/size differ - fall through to a content hash so that editors
|
||||
# that rewrite the file with identical content don't trip a false
|
||||
# stale. This is the only path where we pay the O(file) hashing cost.
|
||||
try:
|
||||
current_sha = _hash_file(abs_path)
|
||||
except OSError as e:
|
||||
return FreshResult(Freshness.STALE, f"hash failed: {e}")
|
||||
|
||||
if current_sha == rec.sha256:
|
||||
# Content is unchanged even though metadata differs (e.g. editor
|
||||
# rewrote with preserved content). Refresh the record so future
|
||||
# checks hit the fast path again.
|
||||
rec = FileReadRecord(size=st.st_size, mtime_ns=st.st_mtime_ns, sha256=current_sha)
|
||||
with _lock:
|
||||
entries = _cache.setdefault(scope, OrderedDict())
|
||||
entries[abs_path] = rec
|
||||
entries.move_to_end(abs_path)
|
||||
return FreshResult(Freshness.FRESH)
|
||||
|
||||
return FreshResult(
|
||||
Freshness.STALE,
|
||||
"content changed on disk since the last read (sha256 differs)",
|
||||
)
|
||||
|
||||
|
||||
def forget(agent_id: str | None, abs_path: str) -> None:
|
||||
"""Drop a single cache entry. Used in tests to force UNREAD."""
|
||||
scope = _scope_key(agent_id)
|
||||
with _lock:
|
||||
entries = _cache.get(scope)
|
||||
if entries is not None:
|
||||
entries.pop(abs_path, None)
|
||||
|
||||
|
||||
def clear_scope(agent_id: str | None) -> None:
|
||||
"""Drop all entries for one agent (used at session teardown)."""
|
||||
with _lock:
|
||||
_cache.pop(_scope_key(agent_id), None)
|
||||
|
||||
|
||||
def reset_all() -> None:
|
||||
"""Test hook: wipe every scope."""
|
||||
with _lock:
|
||||
_cache.clear()
|
||||
@@ -8,6 +8,7 @@ Supports:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@ from pathlib import Path
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from aden_tools.file_state_cache import record_read
|
||||
|
||||
# ~/.hive/ is always allowed for cross-agent file access
|
||||
HIVE_DIR = os.path.expanduser("~/.hive")
|
||||
|
||||
@@ -71,6 +73,7 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
offset: int = 1,
|
||||
limit: int = 0,
|
||||
data_dir: str = "",
|
||||
agent_id: str = "",
|
||||
) -> str:
|
||||
"""Read file contents with line numbers.
|
||||
|
||||
@@ -83,6 +86,8 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
offset: Starting line number, 1-indexed (default: 1).
|
||||
limit: Max lines to return, 0 = up to 2000 (default: 0).
|
||||
data_dir: Auto-injected - the session's data directory.
|
||||
agent_id: Auto-injected - the calling agent id, used to scope
|
||||
the file-state cache that powers stale-edit detection.
|
||||
"""
|
||||
try:
|
||||
resolved = _resolve_path(path, data_dir)
|
||||
@@ -112,8 +117,17 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
pass
|
||||
|
||||
try:
|
||||
with open(resolved, encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
# Read as bytes first so we can hash them for the state cache
|
||||
# without a second open, then decode for the line-formatted
|
||||
# return value the model sees.
|
||||
with open(resolved, "rb") as f:
|
||||
raw_bytes = f.read()
|
||||
content = raw_bytes.decode("utf-8", errors="replace")
|
||||
# Record this read in the per-agent state cache so a later
|
||||
# hashline_edit/write_file call can detect external writes
|
||||
# that happened between now and then. Scoped to agent_id so
|
||||
# two agents sharing the MCP server can't see each other.
|
||||
record_read(agent_id or None, resolved, content_bytes=raw_bytes)
|
||||
|
||||
all_lines = content.splitlines()
|
||||
total_lines = len(all_lines)
|
||||
|
||||
+213
@@ -0,0 +1,213 @@
|
||||
"""In-process registry of long-running shell jobs spawned by
|
||||
``execute_command_tool(run_in_background=True)``.
|
||||
|
||||
Jobs are keyed on a short id the tool returns to the agent. The agent
|
||||
can then call ``bash_output(id=...)`` to poll for new output and
|
||||
``bash_kill(id=...)`` to terminate. Each job is scoped to an
|
||||
``agent_id`` so two agents sharing the same MCP server can't see or
|
||||
kill each other's work.
|
||||
|
||||
The stdout/stderr buffers are bounded rolling tail buffers (64 KB each)
|
||||
so a runaway process can't exhaust memory. Older bytes are dropped with
|
||||
a one-time ``[truncated N bytes]`` marker prepended to the returned
|
||||
text.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections import deque
|
||||
from dataclasses import dataclass, field
|
||||
from uuid import uuid4
|
||||
|
||||
# 64 KB rolling window per stream. Large enough for long build logs,
|
||||
# small enough that a bash infinite loop can't OOM the MCP process.
|
||||
_MAX_BUFFER_BYTES = 64 * 1024
|
||||
|
||||
|
||||
@dataclass
|
||||
class _RingBuffer:
|
||||
"""Append-only byte buffer with a hard byte ceiling and per-read
|
||||
offset tracking so each bash_output call only returns new bytes.
|
||||
"""
|
||||
|
||||
max_bytes: int = _MAX_BUFFER_BYTES
|
||||
# deque of (global_offset, bytes) chunks. global_offset is the total
|
||||
# bytes written prior to this chunk; lets us compute "bytes since
|
||||
# last poll" without copying.
|
||||
_chunks: deque[tuple[int, bytes]] = field(default_factory=deque)
|
||||
_total_written: int = 0
|
||||
_total_dropped: int = 0
|
||||
_read_cursor: int = 0
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
if not data:
|
||||
return
|
||||
self._chunks.append((self._total_written, data))
|
||||
self._total_written += len(data)
|
||||
# Evict from the front until we're under the ceiling.
|
||||
current_bytes = sum(len(c) for _, c in self._chunks)
|
||||
while current_bytes > self.max_bytes and self._chunks:
|
||||
dropped_offset, dropped = self._chunks.popleft()
|
||||
self._total_dropped += len(dropped)
|
||||
current_bytes -= len(dropped)
|
||||
# Push the read cursor forward if the reader was still
|
||||
# pointing at bytes we just evicted.
|
||||
if self._read_cursor < dropped_offset + len(dropped):
|
||||
self._read_cursor = dropped_offset + len(dropped)
|
||||
|
||||
def read_new(self) -> str:
|
||||
"""Return any bytes since the last call, as decoded text.
|
||||
|
||||
Includes a ``[truncated N bytes]`` prefix if rolling-window
|
||||
eviction dropped any bytes the reader hadn't yet consumed.
|
||||
"""
|
||||
chunks_out: list[bytes] = []
|
||||
cursor = self._read_cursor
|
||||
for offset, chunk in self._chunks:
|
||||
end = offset + len(chunk)
|
||||
if end <= cursor:
|
||||
continue
|
||||
start_in_chunk = max(0, cursor - offset)
|
||||
chunks_out.append(chunk[start_in_chunk:])
|
||||
cursor = end
|
||||
self._read_cursor = cursor
|
||||
raw = b"".join(chunks_out)
|
||||
text = raw.decode("utf-8", errors="replace")
|
||||
# Surface eviction ONCE per poll so the agent knows to check
|
||||
# the file system for larger logs instead of assuming it's got
|
||||
# the full output.
|
||||
if self._total_dropped > 0 and text:
|
||||
text = f"[truncated {self._total_dropped} earlier bytes]\n" + text
|
||||
return text
|
||||
|
||||
|
||||
@dataclass
|
||||
class BackgroundJob:
|
||||
id: str
|
||||
agent_id: str
|
||||
command: str
|
||||
cwd: str
|
||||
started_at: float
|
||||
process: asyncio.subprocess.Process
|
||||
stdout_buf: _RingBuffer = field(default_factory=_RingBuffer)
|
||||
stderr_buf: _RingBuffer = field(default_factory=_RingBuffer)
|
||||
_pump_task: asyncio.Task | None = None
|
||||
exit_code: int | None = None
|
||||
|
||||
def status(self) -> str:
|
||||
if self.exit_code is not None:
|
||||
return f"exited({self.exit_code})"
|
||||
if self.process.returncode is not None:
|
||||
# Not yet surfaced by the pump but already finished.
|
||||
return f"exited({self.process.returncode})"
|
||||
return "running"
|
||||
|
||||
|
||||
# agent_id -> {job_id -> BackgroundJob}
|
||||
_jobs: dict[str, dict[str, BackgroundJob]] = {}
|
||||
_jobs_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _short_id() -> str:
|
||||
return uuid4().hex[:8]
|
||||
|
||||
|
||||
async def _pump(job: BackgroundJob) -> None:
|
||||
"""Drain the child process's stdout/stderr into the ring buffers."""
|
||||
proc = job.process
|
||||
|
||||
async def _drain(stream: asyncio.StreamReader | None, buf: _RingBuffer) -> None:
|
||||
if stream is None:
|
||||
return
|
||||
while True:
|
||||
chunk = await stream.read(4096)
|
||||
if not chunk:
|
||||
return
|
||||
buf.write(chunk)
|
||||
|
||||
await asyncio.gather(
|
||||
_drain(proc.stdout, job.stdout_buf),
|
||||
_drain(proc.stderr, job.stderr_buf),
|
||||
)
|
||||
job.exit_code = await proc.wait()
|
||||
|
||||
|
||||
async def spawn(
|
||||
command: str, cwd: str, agent_id: str
|
||||
) -> BackgroundJob:
|
||||
"""Start a subprocess in the background and register it. The caller
|
||||
holds the job id returned from here and can poll via ``get()``.
|
||||
"""
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
cwd=cwd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
job = BackgroundJob(
|
||||
id=_short_id(),
|
||||
agent_id=agent_id,
|
||||
command=command,
|
||||
cwd=cwd,
|
||||
started_at=time.time(),
|
||||
process=proc,
|
||||
)
|
||||
# Start pumping IO in the background so the ring buffers stay warm
|
||||
# even if the agent doesn't poll for a while.
|
||||
job._pump_task = asyncio.create_task(_pump(job))
|
||||
|
||||
async with _jobs_lock:
|
||||
_jobs.setdefault(agent_id, {})[job.id] = job
|
||||
return job
|
||||
|
||||
|
||||
async def get(agent_id: str, job_id: str) -> BackgroundJob | None:
|
||||
async with _jobs_lock:
|
||||
return _jobs.get(agent_id, {}).get(job_id)
|
||||
|
||||
|
||||
async def kill(agent_id: str, job_id: str, grace_seconds: float = 3.0) -> str:
|
||||
"""SIGTERM a background job, escalating to SIGKILL after a grace
|
||||
period. Returns a human-readable status string.
|
||||
"""
|
||||
job = await get(agent_id, job_id)
|
||||
if job is None:
|
||||
return f"no background job with id '{job_id}'"
|
||||
if job.process.returncode is not None:
|
||||
status = f"already exited with code {job.process.returncode}"
|
||||
else:
|
||||
try:
|
||||
job.process.terminate()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
try:
|
||||
await asyncio.wait_for(job.process.wait(), timeout=grace_seconds)
|
||||
status = f"terminated cleanly (exit={job.process.returncode})"
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
job.process.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
await job.process.wait()
|
||||
status = f"killed (SIGKILL, exit={job.process.returncode})"
|
||||
# Deregister after kill so the id is no longer reachable.
|
||||
async with _jobs_lock:
|
||||
scope = _jobs.get(agent_id)
|
||||
if scope is not None:
|
||||
scope.pop(job_id, None)
|
||||
return status
|
||||
|
||||
|
||||
async def clear_agent(agent_id: str) -> None:
|
||||
"""Test hook: kill every job owned by ``agent_id``."""
|
||||
async with _jobs_lock:
|
||||
scope = _jobs.pop(agent_id, {})
|
||||
for job in scope.values():
|
||||
if job.process.returncode is None:
|
||||
try:
|
||||
job.process.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
await job.process.wait()
|
||||
+187
-37
@@ -1,78 +1,228 @@
|
||||
"""Shell command execution tool.
|
||||
|
||||
Three tools are registered:
|
||||
|
||||
* ``execute_command_tool`` runs a command synchronously with a per-call
|
||||
timeout (default 120s, max 600s). Uses ``asyncio.create_subprocess_shell``
|
||||
so the MCP event loop is not blocked while the child runs.
|
||||
* ``bash_output`` polls a background job started with
|
||||
``execute_command_tool(run_in_background=True)`` and returns any new
|
||||
stdout/stderr since the last poll plus the current status.
|
||||
* ``bash_kill`` terminates a background job (SIGTERM then SIGKILL after
|
||||
a 3-second grace period).
|
||||
|
||||
All three go through the same pre-execution safety blocklist in
|
||||
``command_sanitizer.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from ..command_sanitizer import CommandBlockedError, validate_command
|
||||
from ..security import AGENT_SANDBOXES_DIR, get_sandboxed_path
|
||||
from .background_jobs import get as get_job
|
||||
from .background_jobs import kill as kill_job
|
||||
from .background_jobs import spawn as spawn_job
|
||||
|
||||
# Bounds on per-call timeout. 1s minimum prevents accidental zeros that
|
||||
# would cause every command to fail. 600s maximum (10 min) is the same
|
||||
# ceiling Claude Code uses for its Bash tool; builds and test suites
|
||||
# longer than that should use run_in_background instead.
|
||||
_MIN_TIMEOUT = 1
|
||||
_MAX_TIMEOUT = 600
|
||||
_DEFAULT_TIMEOUT = 120
|
||||
|
||||
|
||||
def _resolve_cwd(cwd: str | None, agent_id: str) -> str:
|
||||
agent_root = os.path.join(AGENT_SANDBOXES_DIR, agent_id, "current")
|
||||
os.makedirs(agent_root, exist_ok=True)
|
||||
if cwd:
|
||||
return get_sandboxed_path(cwd, agent_id)
|
||||
return agent_root
|
||||
|
||||
|
||||
def register_tools(mcp: FastMCP) -> None:
|
||||
"""Register command execution tools with the MCP server."""
|
||||
|
||||
@mcp.tool()
|
||||
def execute_command_tool(command: str, agent_id: str, cwd: str | None = None) -> dict:
|
||||
async def execute_command_tool(
|
||||
command: str,
|
||||
agent_id: str,
|
||||
cwd: str | None = None,
|
||||
timeout_seconds: int = _DEFAULT_TIMEOUT,
|
||||
run_in_background: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Purpose
|
||||
Execute a shell command within the agent sandbox.
|
||||
|
||||
When to use
|
||||
Run validators or linters
|
||||
Run validators, linters, builds, test suites
|
||||
Generate derived artifacts (indexes, summaries)
|
||||
Perform controlled maintenance tasks
|
||||
Start long-running processes via ``run_in_background=True``
|
||||
(dev servers, watchers, file-triggered builds)
|
||||
|
||||
Rules & Constraints
|
||||
No network access unless explicitly allowed
|
||||
No destructive commands (rm -rf, system modification)
|
||||
Output must be treated as data, not truth
|
||||
Commands are validated against a safety blocklist before execution
|
||||
Commands still run through shell=True, so the blocklist only
|
||||
prevents explicit nested shell executables; it does not remove
|
||||
shell parsing entirely
|
||||
Commands are validated against a safety blocklist before
|
||||
execution. The blocklist runs through shell=True, so it
|
||||
only prevents explicit nested shell executables.
|
||||
timeout_seconds is clamped to [1, 600]. For longer-running
|
||||
work use run_in_background=True + bash_output to poll.
|
||||
|
||||
Args:
|
||||
command: The shell command to execute
|
||||
agent_id: The ID of the agent
|
||||
cwd: The working directory for the command (relative to agent sandbox, optional)
|
||||
command: The shell command to execute.
|
||||
agent_id: The ID of the agent (auto-injected).
|
||||
cwd: Working directory for the command (relative to the
|
||||
agent sandbox). Defaults to the sandbox root.
|
||||
timeout_seconds: Max wall-clock seconds the foreground
|
||||
command is allowed to run. Ignored when
|
||||
run_in_background=True. Default 120, max 600.
|
||||
run_in_background: If True, spawn the command and return
|
||||
immediately with a job id. Use bash_output(id=...) to
|
||||
read output and bash_kill(id=...) to stop it.
|
||||
|
||||
Returns:
|
||||
Dict with command output and execution details, or error dict
|
||||
For foreground commands: dict with stdout, stderr, return_code,
|
||||
elapsed_seconds.
|
||||
For background commands: dict with id, pid, started_at, and
|
||||
instructions for polling / killing the job.
|
||||
On error: dict with an "error" key.
|
||||
"""
|
||||
# Validate command against safety blocklist before execution
|
||||
try:
|
||||
validate_command(command)
|
||||
except CommandBlockedError as e:
|
||||
return {"error": f"Command blocked: {e}", "blocked": True}
|
||||
|
||||
try:
|
||||
# Default cwd is the agent sandbox root
|
||||
agent_root = os.path.join(AGENT_SANDBOXES_DIR, agent_id, "current")
|
||||
os.makedirs(agent_root, exist_ok=True)
|
||||
|
||||
if cwd:
|
||||
secure_cwd = get_sandboxed_path(cwd, agent_id)
|
||||
else:
|
||||
secure_cwd = agent_root
|
||||
|
||||
result = subprocess.run(
|
||||
command,
|
||||
shell=True,
|
||||
cwd=secure_cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
encoding="utf-8",
|
||||
)
|
||||
secure_cwd = _resolve_cwd(cwd, agent_id)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to resolve cwd: {e}"}
|
||||
|
||||
if run_in_background:
|
||||
try:
|
||||
job = await spawn_job(command, secure_cwd, agent_id)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to spawn background job: {e}"}
|
||||
return {
|
||||
"success": True,
|
||||
"background": True,
|
||||
"id": job.id,
|
||||
"pid": job.process.pid,
|
||||
"command": command,
|
||||
"return_code": result.returncode,
|
||||
"stdout": result.stdout,
|
||||
"stderr": result.stderr,
|
||||
"cwd": cwd or ".",
|
||||
"started_at": job.started_at,
|
||||
"hint": (
|
||||
"Background job started. Call "
|
||||
f"bash_output(id='{job.id}') to read output, or "
|
||||
f"bash_kill(id='{job.id}') to terminate it."
|
||||
),
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"error": "Command timed out after 60 seconds"}
|
||||
|
||||
# Foreground path: clamp timeout, spawn, wait with a watchdog.
|
||||
try:
|
||||
timeout = max(_MIN_TIMEOUT, min(_MAX_TIMEOUT, int(timeout_seconds)))
|
||||
except (TypeError, ValueError):
|
||||
timeout = _DEFAULT_TIMEOUT
|
||||
|
||||
started = time.monotonic()
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
cwd=secure_cwd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to execute command: {str(e)}"}
|
||||
return {"error": f"Failed to execute command: {e}"}
|
||||
|
||||
try:
|
||||
stdout_b, stderr_b = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
# Child is still running: kill it, drain what it already
|
||||
# wrote so the agent gets a partial log, then report.
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
try:
|
||||
stdout_b, stderr_b = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=2.0
|
||||
)
|
||||
except (asyncio.TimeoutError, Exception):
|
||||
stdout_b, stderr_b = b"", b""
|
||||
elapsed = round(time.monotonic() - started, 2)
|
||||
return {
|
||||
"error": (
|
||||
f"Command timed out after {timeout} seconds. "
|
||||
f"For longer work pass timeout_seconds (max 600) or "
|
||||
f"run_in_background=True."
|
||||
),
|
||||
"timed_out": True,
|
||||
"elapsed_seconds": elapsed,
|
||||
"stdout": stdout_b.decode("utf-8", errors="replace"),
|
||||
"stderr": stderr_b.decode("utf-8", errors="replace"),
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": f"Failed while running command: {e}"}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"command": command,
|
||||
"return_code": proc.returncode,
|
||||
"stdout": stdout_b.decode("utf-8", errors="replace"),
|
||||
"stderr": stderr_b.decode("utf-8", errors="replace"),
|
||||
"cwd": cwd or ".",
|
||||
"elapsed_seconds": round(time.monotonic() - started, 2),
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
async def bash_output(id: str, agent_id: str) -> dict:
|
||||
"""Poll a background command for new output and its current status.
|
||||
|
||||
Returns any stdout/stderr bytes written since the last call.
|
||||
The status is one of "running", "exited(N)", or "killed".
|
||||
When the job has finished and all output has been consumed, it
|
||||
is removed from the registry on the next poll.
|
||||
|
||||
Args:
|
||||
id: The job id returned from
|
||||
execute_command_tool(run_in_background=True).
|
||||
agent_id: The ID of the agent (auto-injected).
|
||||
"""
|
||||
job = await get_job(agent_id, id)
|
||||
if job is None:
|
||||
return {"error": f"no background job with id '{id}'"}
|
||||
new_stdout = job.stdout_buf.read_new()
|
||||
new_stderr = job.stderr_buf.read_new()
|
||||
return {
|
||||
"id": id,
|
||||
"status": job.status(),
|
||||
"stdout": new_stdout,
|
||||
"stderr": new_stderr,
|
||||
"elapsed_seconds": round(time.time() - job.started_at, 2),
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
async def bash_kill(id: str, agent_id: str) -> dict:
|
||||
"""Terminate a background command.
|
||||
|
||||
Sends SIGTERM, waits up to 3 seconds, then escalates to SIGKILL
|
||||
if the process is still alive. The job id is then deregistered.
|
||||
|
||||
Args:
|
||||
id: The job id returned from
|
||||
execute_command_tool(run_in_background=True).
|
||||
agent_id: The ID of the agent (auto-injected).
|
||||
"""
|
||||
status = await kill_job(agent_id, id)
|
||||
return {"id": id, "status": status}
|
||||
|
||||
@@ -18,6 +18,8 @@ from aden_tools.hashline import (
|
||||
validate_anchor,
|
||||
)
|
||||
|
||||
from aden_tools.file_state_cache import Freshness, check_fresh, record_read
|
||||
|
||||
from ..security import get_sandboxed_path
|
||||
|
||||
|
||||
@@ -87,6 +89,29 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
if not os.path.isfile(secure_path):
|
||||
return {"error": f"Path is not a file: {path}"}
|
||||
|
||||
# Stale-edit guard: refuse to edit unless we have a recent
|
||||
# read recorded in the file-state cache and the file on disk
|
||||
# still matches it. This catches the case where the user
|
||||
# saved the file in their editor between the model's Read
|
||||
# and Edit, which would otherwise cause the model to write
|
||||
# against a stale mental model.
|
||||
fresh = check_fresh(agent_id, secure_path)
|
||||
if fresh.status is Freshness.UNREAD:
|
||||
return {
|
||||
"error": (
|
||||
f"Refusing to edit '{path}': you must call "
|
||||
f"read_file('{path}') first so the harness can "
|
||||
f"track its state before you edit it."
|
||||
)
|
||||
}
|
||||
if fresh.status is Freshness.STALE:
|
||||
return {
|
||||
"error": (
|
||||
f"Refusing to edit '{path}': {fresh.detail}. "
|
||||
f"Re-read the file with read_file before editing."
|
||||
)
|
||||
}
|
||||
|
||||
with open(secure_path, "rb") as f:
|
||||
raw_head = f.read(8192)
|
||||
eol = "\r\n" if b"\r\n" in raw_head else "\n"
|
||||
@@ -405,6 +430,15 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to write file: {e}"}
|
||||
|
||||
# Re-record the new file state so a second edit in the same turn
|
||||
# sees the post-write hash instead of tripping the stale guard.
|
||||
try:
|
||||
record_read(agent_id, secure_path, content_bytes=joined.encode(encoding))
|
||||
except Exception:
|
||||
# Hash record is best-effort; a failure here must not break
|
||||
# the edit that already succeeded on disk.
|
||||
pass
|
||||
|
||||
# 10. Build response
|
||||
updated_lines = joined.splitlines()
|
||||
hashline_content = format_hashlines(updated_lines)
|
||||
|
||||
+505
-89
@@ -22,6 +22,7 @@ The bridge requires the Beeline Chrome extension to be installed and connected.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
@@ -122,6 +123,21 @@ class BeelineBridge:
|
||||
logger.warning("Bridge status server could not start on port %d: %s", status_port, e)
|
||||
|
||||
async def stop(self) -> None:
|
||||
# Cancel in-flight bridge requests so any caller stuck in _send
|
||||
# sees CancelledError immediately instead of waiting the full
|
||||
# 30s timeout. Mirrors the cleanup in _handle_connection's
|
||||
# disconnect branch so both exit paths behave the same.
|
||||
for fut in self._pending.values():
|
||||
if not fut.done():
|
||||
fut.cancel()
|
||||
self._pending.clear()
|
||||
# Drop CDP attach cache — next run must re-attach fresh.
|
||||
self._cdp_attached.clear()
|
||||
# Drop highlight state — stale entries would otherwise carry
|
||||
# over into a subsequent run and confuse screenshot annotation.
|
||||
_interaction_highlights.clear()
|
||||
self._ws = None
|
||||
|
||||
if self._server:
|
||||
self._server.close()
|
||||
try:
|
||||
@@ -221,7 +237,14 @@ class BeelineBridge:
|
||||
fut.cancel()
|
||||
self._pending.clear()
|
||||
|
||||
async def _send(self, type_: str, **params) -> dict:
|
||||
# Default wait on a bridge command. Callers with known-slow ops
|
||||
# (full-page screenshots on slow networks, AX tree on huge pages)
|
||||
# can pass a longer value via _send(..., timeout=...). Using the
|
||||
# same default as the old hard-coded value so existing call sites
|
||||
# don't regress.
|
||||
_DEFAULT_SEND_TIMEOUT_S: float = 30.0
|
||||
|
||||
async def _send(self, type_: str, *, timeout: float | None = None, **params) -> dict:
|
||||
"""Send a command to the extension and wait for the result."""
|
||||
if not self._ws:
|
||||
raise RuntimeError("Extension not connected")
|
||||
@@ -230,27 +253,58 @@ class BeelineBridge:
|
||||
fut: asyncio.Future = asyncio.get_event_loop().create_future()
|
||||
self._pending[msg_id] = fut
|
||||
start = time.perf_counter()
|
||||
effective_timeout = timeout if timeout is not None else self._DEFAULT_SEND_TIMEOUT_S
|
||||
|
||||
log_bridge_message("send", type_, msg_id=msg_id, params=params)
|
||||
|
||||
try:
|
||||
await self._ws.send(json.dumps({"id": msg_id, "type": type_, **params}))
|
||||
result = await asyncio.wait_for(fut, timeout=30.0)
|
||||
result = await asyncio.wait_for(fut, timeout=effective_timeout)
|
||||
duration_ms = (time.perf_counter() - start) * 1000
|
||||
log_bridge_message("send", type_, msg_id=msg_id, result=result, duration_ms=duration_ms)
|
||||
return result
|
||||
except TimeoutError:
|
||||
self._pending.pop(msg_id, None)
|
||||
log_bridge_message("send", type_, msg_id=msg_id, error="timeout")
|
||||
raise RuntimeError(f"Bridge command '{type_}' timed out") from None
|
||||
# Include which CDP method (if any) so the caller can see
|
||||
# what actually hung — the generic 'cdp' type is useless
|
||||
# when ten different CDP calls use the same type.
|
||||
detail = f" method={params.get('method')}" if params.get("method") else ""
|
||||
raise RuntimeError(
|
||||
f"Bridge command '{type_}'{detail} timed out after {effective_timeout:.0f}s"
|
||||
) from None
|
||||
except BaseException:
|
||||
# CancelledError or any other exception — remove stale future so a late
|
||||
# response from the extension doesn't try to resolve a cancelled future.
|
||||
self._pending.pop(msg_id, None)
|
||||
raise
|
||||
|
||||
# Substrings that indicate Chrome detached the debugger out from
|
||||
# under us (tab closed, user opened DevTools, cross-origin nav).
|
||||
# Our in-memory _cdp_attached set is now stale; next call should
|
||||
# re-attach rather than reporting a cryptic "Target not found".
|
||||
_CDP_DEAD_SESSION_MARKERS = (
|
||||
"target closed",
|
||||
"target not found",
|
||||
"not attached",
|
||||
"session closed",
|
||||
"inspector already attached",
|
||||
"no target with given id",
|
||||
)
|
||||
|
||||
def _is_cdp_dead_session(self, exc: BaseException) -> bool:
|
||||
msg = str(exc).lower()
|
||||
return any(m in msg for m in self._CDP_DEAD_SESSION_MARKERS)
|
||||
|
||||
async def _cdp(self, tab_id: int, method: str, params: dict | None = None) -> dict:
|
||||
"""Send a CDP command to a tab."""
|
||||
"""Send a CDP command to a tab.
|
||||
|
||||
On a dead-session error (Chrome detached externally — tab closed,
|
||||
DevTools opened, cross-origin nav), evict the stale attach
|
||||
cache entry, reattach, and retry once. Without this the Python
|
||||
side would keep assuming it's attached and every subsequent call
|
||||
would hit the same error until someone restarted the bridge.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
result = await self._send("cdp", tabId=tab_id, method=method, params=params or {})
|
||||
@@ -260,6 +314,33 @@ class BeelineBridge:
|
||||
except Exception as e:
|
||||
duration_ms = (time.perf_counter() - start) * 1000
|
||||
log_cdp_command(tab_id, method, params, error=str(e), duration_ms=duration_ms)
|
||||
if self._is_cdp_dead_session(e):
|
||||
logger.info(
|
||||
"CDP session for tab %d looks dead (%s) — re-attaching and retrying",
|
||||
tab_id,
|
||||
str(e)[:120],
|
||||
)
|
||||
self._cdp_attached.discard(tab_id)
|
||||
try:
|
||||
reattach = await self._send("cdp.attach", tabId=tab_id)
|
||||
if reattach.get("ok"):
|
||||
self._cdp_attached.add(tab_id)
|
||||
retry_start = time.perf_counter()
|
||||
result = await self._send(
|
||||
"cdp", tabId=tab_id, method=method, params=params or {}
|
||||
)
|
||||
log_cdp_command(
|
||||
tab_id,
|
||||
method,
|
||||
params,
|
||||
result,
|
||||
duration_ms=(time.perf_counter() - retry_start) * 1000,
|
||||
)
|
||||
return result
|
||||
except Exception as retry_exc:
|
||||
logger.debug(
|
||||
"CDP reattach+retry for tab %d failed: %s", tab_id, retry_exc
|
||||
)
|
||||
raise
|
||||
|
||||
async def _try_enable_domain(self, tab_id: int, domain: str) -> None:
|
||||
@@ -310,7 +391,14 @@ class BeelineBridge:
|
||||
|
||||
async def close_tab(self, tab_id: int) -> dict:
|
||||
"""Close a tab by ID."""
|
||||
return await self._send("tab.close", tabId=tab_id)
|
||||
result = await self._send("tab.close", tabId=tab_id)
|
||||
# Drop per-tab state — the id may be reused by Chrome much
|
||||
# later, and carrying a stale highlight or "attached" flag
|
||||
# forward would misannotate screenshots or skip a needed
|
||||
# reattach on the reused id.
|
||||
self._cdp_attached.discard(tab_id)
|
||||
_interaction_highlights.pop(tab_id, None)
|
||||
return result
|
||||
|
||||
async def list_tabs(self, group_id: int | None = None) -> dict:
|
||||
"""List tabs, optionally filtered by group.
|
||||
@@ -360,6 +448,11 @@ class BeelineBridge:
|
||||
if wait_until not in VALID_WAIT_UNTIL:
|
||||
wait_until = "load"
|
||||
|
||||
# Drop the stale interaction highlight before loading a new
|
||||
# page — otherwise the next screenshot will annotate the new
|
||||
# page with a rect from the previous page's coordinate system.
|
||||
_interaction_highlights.pop(tab_id, None)
|
||||
|
||||
# Attach debugger if needed
|
||||
await self.cdp_attach(tab_id)
|
||||
|
||||
@@ -381,9 +474,11 @@ class BeelineBridge:
|
||||
"Runtime.evaluate",
|
||||
{"expression": "document.readyState", "returnByValue": True},
|
||||
)
|
||||
ready_state = (
|
||||
(eval_result or {}).get("result", {}).get("result", {}).get("value", "")
|
||||
)
|
||||
# _cdp returns the CDP response body; Runtime.evaluate shape
|
||||
# is {"result": {"type": ..., "value": ...}} — one "result"
|
||||
# hop, not two. The extra hop was always returning "" and
|
||||
# this entire lifecycle loop was running until the deadline.
|
||||
ready_state = (eval_result or {}).get("result", {}).get("value", "")
|
||||
|
||||
if wait_until == "domcontentloaded" and ready_state in ("interactive", "complete"):
|
||||
break
|
||||
@@ -415,17 +510,31 @@ class BeelineBridge:
|
||||
return {
|
||||
"ok": True,
|
||||
"tabId": tab_id,
|
||||
"url": (url_result or {}).get("result", {}).get("result", {}).get("value", ""),
|
||||
"title": (title_result or {}).get("result", {}).get("result", {}).get("value", ""),
|
||||
"url": (url_result or {}).get("result", {}).get("value", ""),
|
||||
"title": (title_result or {}).get("result", {}).get("value", ""),
|
||||
}
|
||||
|
||||
async def go_back(self, tab_id: int) -> dict:
|
||||
"""Navigate back in history."""
|
||||
"""Navigate back in history.
|
||||
|
||||
Uses ``history.back()`` via Runtime.evaluate — modern Chrome CDP
|
||||
no longer exposes ``Page.goBack`` / ``Page.goForward`` (removed
|
||||
in favour of ``Page.navigateToHistoryEntry``, which requires
|
||||
first fetching the history list). ``history.back()`` is simpler,
|
||||
works across every Chrome version, and matches what the user
|
||||
expects when they call ``browser_go_back``.
|
||||
"""
|
||||
_interaction_highlights.pop(tab_id, None)
|
||||
await self.cdp_attach(tab_id)
|
||||
await self._cdp(tab_id, "Page.enable")
|
||||
await self._cdp(tab_id, "Page.goBack")
|
||||
|
||||
# Get current URL
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
{"expression": "history.back()", "returnByValue": True},
|
||||
)
|
||||
# Give the browser a beat to commit the navigation before we
|
||||
# read the new URL.
|
||||
await asyncio.sleep(0.3)
|
||||
result = await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
@@ -434,15 +543,20 @@ class BeelineBridge:
|
||||
return {
|
||||
"ok": True,
|
||||
"action": "back",
|
||||
"url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
|
||||
"url": (result or {}).get("result", {}).get("value", ""),
|
||||
}
|
||||
|
||||
async def go_forward(self, tab_id: int) -> dict:
|
||||
"""Navigate forward in history."""
|
||||
"""Navigate forward in history. See go_back() for why we use JS."""
|
||||
_interaction_highlights.pop(tab_id, None)
|
||||
await self.cdp_attach(tab_id)
|
||||
await self._cdp(tab_id, "Page.enable")
|
||||
await self._cdp(tab_id, "Page.goForward")
|
||||
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
{"expression": "history.forward()", "returnByValue": True},
|
||||
)
|
||||
await asyncio.sleep(0.3)
|
||||
result = await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
@@ -451,11 +565,12 @@ class BeelineBridge:
|
||||
return {
|
||||
"ok": True,
|
||||
"action": "forward",
|
||||
"url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
|
||||
"url": (result or {}).get("result", {}).get("value", ""),
|
||||
}
|
||||
|
||||
async def reload(self, tab_id: int) -> dict:
|
||||
"""Reload the page."""
|
||||
_interaction_highlights.pop(tab_id, None)
|
||||
await self.cdp_attach(tab_id)
|
||||
await self._cdp(tab_id, "Page.enable")
|
||||
await self._cdp(tab_id, "Page.reload")
|
||||
@@ -468,7 +583,7 @@ class BeelineBridge:
|
||||
return {
|
||||
"ok": True,
|
||||
"action": "reload",
|
||||
"url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
|
||||
"url": (result or {}).get("result", {}).get("value", ""),
|
||||
}
|
||||
|
||||
# ── Interaction ────────────────────────────────────────────────────────────
|
||||
@@ -758,75 +873,150 @@ class BeelineBridge:
|
||||
clear_first: bool = True,
|
||||
delay_ms: int = 0,
|
||||
timeout_ms: int = 30000,
|
||||
use_insert_text: bool = True,
|
||||
) -> dict:
|
||||
"""Type text into an element.
|
||||
|
||||
Uses JavaScript focus for reliability, then CDP key events.
|
||||
Routes through a real CDP pointer click on the target rect BEFORE
|
||||
inserting text. This is critical for rich-text editors (Draft.js,
|
||||
Lexical, ProseMirror, React-controlled contenteditable): those
|
||||
frameworks only register input as "real" after seeing a native
|
||||
focus event sourced from a real pointer interaction — a
|
||||
JS-sourced ``el.focus()`` is ignored, and the submit button
|
||||
stays disabled because the framework's internal state never
|
||||
updates. Sending a CDP click first fires the real
|
||||
pointerdown/pointerup/click/focus sequence that every modern
|
||||
framework listens to.
|
||||
|
||||
After clicking, we insert text via ``Input.insertText`` by
|
||||
default (``use_insert_text=True``). insertText is a dedicated
|
||||
CDP method that asks the browser to commit text into the
|
||||
focused element as if IME just committed it — it works
|
||||
cleanly on rich editors where per-character keyDown events
|
||||
would otherwise be eaten or mis-timed (empirically verified
|
||||
against LinkedIn's Lexical message composer 2026-04-11).
|
||||
Playwright uses the same approach under the hood.
|
||||
|
||||
Set ``use_insert_text=False`` to get the old per-character
|
||||
keyDown/keyUp path when an editor needs precise keystroke
|
||||
timing (autocomplete triggers, code editors that fire on
|
||||
specific chars, ``delay_ms`` typing animations).
|
||||
"""
|
||||
await self.cdp_attach(tab_id)
|
||||
await self._try_enable_domain(tab_id, "DOM")
|
||||
await self._try_enable_domain(tab_id, "Input")
|
||||
await self._try_enable_domain(tab_id, "Runtime")
|
||||
|
||||
# First, scroll into view and focus via JavaScript (more reliable than CDP)
|
||||
# Find + scroll + (optionally) clear via JS. We still need the
|
||||
# rect, and clearing via `.value = ''` / `.textContent = ''`
|
||||
# is the most reliable way to reset pre-existing content.
|
||||
focus_script = f"""
|
||||
(function() {{
|
||||
const el = document.querySelector({json.dumps(selector)});
|
||||
if (!el) return false;
|
||||
if (!el) return null;
|
||||
|
||||
// Scroll into view
|
||||
// Scroll into view so the click lands in-viewport.
|
||||
el.scrollIntoView({{ block: 'center' }});
|
||||
|
||||
// Focus the element
|
||||
el.focus();
|
||||
|
||||
// Clear if requested
|
||||
// Clear if requested.
|
||||
if ({str(clear_first).lower()}) {{
|
||||
if (el.value !== undefined) {{
|
||||
el.value = '';
|
||||
// Nudge React's onChange — the framework reads
|
||||
// .value via a setter hook, and without firing
|
||||
// an input event the component state remains
|
||||
// stale after our value assignment.
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
}} else if (el.isContentEditable) {{
|
||||
el.textContent = '';
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
}}
|
||||
}}
|
||||
|
||||
return true;
|
||||
const r = el.getBoundingClientRect();
|
||||
return {{
|
||||
x: r.left + r.width / 2,
|
||||
y: r.top + r.height / 2,
|
||||
w: r.width,
|
||||
h: r.height,
|
||||
}};
|
||||
}})();
|
||||
"""
|
||||
|
||||
focus_result = await self.evaluate(tab_id, focus_script)
|
||||
success = (focus_result or {}).get("result", False)
|
||||
rect = (focus_result or {}).get("result")
|
||||
|
||||
if not success:
|
||||
# Element not found - wait and retry
|
||||
if not rect:
|
||||
# Element not found — wait + retry until timeout.
|
||||
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
|
||||
while asyncio.get_event_loop().time() < deadline:
|
||||
result = await self.evaluate(tab_id, focus_script)
|
||||
if result and (result or {}).get("result", False):
|
||||
success = True
|
||||
rect = (result or {}).get("result") if result else None
|
||||
if rect:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
if not success:
|
||||
if not rect:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
|
||||
await asyncio.sleep(0.05) # Wait for focus to take effect
|
||||
if not rect.get("w") or not rect.get("h"):
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Element has zero dimensions, can't click to focus: {selector}",
|
||||
}
|
||||
|
||||
# Type each character using CDP key events
|
||||
for char in text:
|
||||
# Dispatch key down
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{"type": "keyDown", "text": char},
|
||||
)
|
||||
# Dispatch key up
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{"type": "keyUp", "text": char},
|
||||
)
|
||||
if delay_ms > 0:
|
||||
await asyncio.sleep(delay_ms / 1000)
|
||||
# Fire a real CDP pointer click at the element's center. This is
|
||||
# what unblocks rich-text editors — JS el.focus() is not enough.
|
||||
click_x = rect["x"]
|
||||
click_y = rect["y"]
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchMouseEvent",
|
||||
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
|
||||
)
|
||||
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
|
||||
|
||||
if use_insert_text and delay_ms <= 0:
|
||||
# CDP Input.insertText is the most reliable way to insert
|
||||
# text into a rich-text editor. It bypasses the keyboard
|
||||
# event pipeline entirely and commits text into the focused
|
||||
# element as if IME just committed it. Works on plain
|
||||
# <input>/<textarea>, contenteditable, Lexical, Draft.js,
|
||||
# ProseMirror, Monaco textarea buffers — verified empirically
|
||||
# against LinkedIn's message composer (Lexical) on 2026-04-11
|
||||
# where the per-char keyDown path left the editor empty.
|
||||
await self._cdp(tab_id, "Input.insertText", {"text": text})
|
||||
else:
|
||||
# Fallback path: per-character keyDown/keyUp with full key,
|
||||
# code, and text fields. Used when the caller explicitly
|
||||
# wants per-keystroke dispatch (autocomplete testing, code
|
||||
# editors that fire on specific chars, animated typing
|
||||
# with ``delay_ms``). Populating ``code`` for ASCII is
|
||||
# needed so frameworks that branch on ``event.code`` see
|
||||
# the right values.
|
||||
for char in text:
|
||||
key_params: dict[str, Any] = {
|
||||
"type": "keyDown",
|
||||
"text": char,
|
||||
"key": char,
|
||||
}
|
||||
if len(char) == 1 and char.isalpha():
|
||||
key_params["code"] = f"Key{char.upper()}"
|
||||
elif len(char) == 1 and char.isdigit():
|
||||
key_params["code"] = f"Digit{char}"
|
||||
await self._cdp(tab_id, "Input.dispatchKeyEvent", key_params)
|
||||
|
||||
key_up = {"type": "keyUp", "key": char}
|
||||
if "code" in key_params:
|
||||
key_up["code"] = key_params["code"]
|
||||
await self._cdp(tab_id, "Input.dispatchKeyEvent", key_up)
|
||||
if delay_ms > 0:
|
||||
await asyncio.sleep(delay_ms / 1000)
|
||||
|
||||
# Highlight the element that was typed into
|
||||
rect_result = await self.evaluate(
|
||||
@@ -843,12 +1033,47 @@ class BeelineBridge:
|
||||
)
|
||||
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
|
||||
|
||||
async def press_key(self, tab_id: int, key: str, selector: str | None = None) -> dict:
|
||||
"""Press a keyboard key.
|
||||
# CDP Input.dispatchKeyEvent modifiers bitmask.
|
||||
_CDP_MODIFIERS = {"alt": 1, "ctrl": 2, "control": 2, "meta": 4, "cmd": 4, "shift": 8}
|
||||
|
||||
# How Chrome expects each modifier key as its OWN keyDown event —
|
||||
# name, code, and Windows virtual key code. Dispatched before the
|
||||
# main key so Chrome sees the modifier as "held" during the main
|
||||
# event, which is what actually triggers browser shortcuts like
|
||||
# Ctrl+A, Cmd+L, Shift+Tab.
|
||||
_MODIFIER_KEYS = {
|
||||
"alt": {"key": "Alt", "code": "AltLeft", "windowsVirtualKeyCode": 18},
|
||||
"ctrl": {"key": "Control", "code": "ControlLeft", "windowsVirtualKeyCode": 17},
|
||||
"control": {"key": "Control", "code": "ControlLeft", "windowsVirtualKeyCode": 17},
|
||||
"meta": {"key": "Meta", "code": "MetaLeft", "windowsVirtualKeyCode": 91},
|
||||
"cmd": {"key": "Meta", "code": "MetaLeft", "windowsVirtualKeyCode": 91},
|
||||
"shift": {"key": "Shift", "code": "ShiftLeft", "windowsVirtualKeyCode": 16},
|
||||
}
|
||||
|
||||
def _cdp_modifier_mask(self, modifiers: list[str] | None) -> int:
|
||||
if not modifiers:
|
||||
return 0
|
||||
mask = 0
|
||||
for m in modifiers:
|
||||
mask |= self._CDP_MODIFIERS.get(m.lower(), 0)
|
||||
return mask
|
||||
|
||||
async def press_key(
|
||||
self,
|
||||
tab_id: int,
|
||||
key: str,
|
||||
selector: str | None = None,
|
||||
modifiers: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Press a keyboard key, optionally with modifier keys held.
|
||||
|
||||
Args:
|
||||
key: Key name like 'Enter', 'Tab', 'Escape', 'ArrowDown', etc.
|
||||
selector: Optional selector to focus first
|
||||
modifiers: Optional list of modifier keys to hold while pressing
|
||||
``key``. Accepted values: "alt", "ctrl"/"control", "meta"/"cmd",
|
||||
"shift". Example: ``modifiers=["ctrl"]`` → Ctrl+key, which
|
||||
enables shortcuts like Ctrl+A, Ctrl+L, Cmd+Enter, Shift+Tab.
|
||||
"""
|
||||
await self.cdp_attach(tab_id)
|
||||
await self._try_enable_domain(tab_id, "Input")
|
||||
@@ -881,19 +1106,110 @@ class BeelineBridge:
|
||||
}
|
||||
|
||||
text, key_name = key_map.get(key, (key, key))
|
||||
mod_mask = self._cdp_modifier_mask(modifiers)
|
||||
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{"type": "keyDown", "key": key_name, "text": text if text else None},
|
||||
)
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{"type": "keyUp", "key": key_name, "text": text if text else None},
|
||||
)
|
||||
# With modifiers held, suppress the printable text so that
|
||||
# e.g. Ctrl+A doesn't also type the character "a" into the
|
||||
# focused field (CDP will still fire the shortcut).
|
||||
effective_text = text if (text and mod_mask == 0) else None
|
||||
|
||||
return {"ok": True, "action": "press", "key": key}
|
||||
# Compute ``code`` and ``windowsVirtualKeyCode`` for the main
|
||||
# key. These are MANDATORY for Chrome's shortcut dispatcher —
|
||||
# without them, Ctrl+A etc. reach the DOM with ``code=""`` and
|
||||
# ``which=0`` and Chrome doesn't recognise them as shortcuts.
|
||||
# Verified empirically on chrome 131 against a real input.
|
||||
main_code: str | None = None
|
||||
main_vk: int | None = None
|
||||
special_vk = {
|
||||
"Enter": (13, "Enter"),
|
||||
"Tab": (9, "Tab"),
|
||||
"Escape": (27, "Escape"),
|
||||
"Backspace": (8, "Backspace"),
|
||||
"Delete": (46, "Delete"),
|
||||
"ArrowUp": (38, "ArrowUp"),
|
||||
"ArrowDown": (40, "ArrowDown"),
|
||||
"ArrowLeft": (37, "ArrowLeft"),
|
||||
"ArrowRight": (39, "ArrowRight"),
|
||||
"Home": (36, "Home"),
|
||||
"End": (35, "End"),
|
||||
"PageUp": (33, "PageUp"),
|
||||
"PageDown": (34, "PageDown"),
|
||||
}
|
||||
if key_name in special_vk:
|
||||
main_vk, main_code = special_vk[key_name]
|
||||
elif len(key_name) == 1 and key_name.isalpha():
|
||||
main_code = f"Key{key_name.upper()}"
|
||||
main_vk = ord(key_name.upper()) # 'A' = 65 ... 'Z' = 90
|
||||
elif len(key_name) == 1 and key_name.isdigit():
|
||||
main_code = f"Digit{key_name}"
|
||||
main_vk = ord(key_name) # '0' = 48 ... '9' = 57
|
||||
|
||||
# Press each modifier as a separate keyDown BEFORE the main
|
||||
# key. Sending ``modifiers: mask`` on the main key alone isn't
|
||||
# enough — Chrome's shortcut dispatcher looks for a held
|
||||
# modifier event, not just a flag. Matches the Playwright /
|
||||
# Puppeteer sequence. Release modifiers in reverse order after
|
||||
# the main key so the "held" state is correct throughout.
|
||||
pressed_mods: list[dict] = []
|
||||
if modifiers:
|
||||
for m in modifiers:
|
||||
spec = self._MODIFIER_KEYS.get(m.lower())
|
||||
if spec is None:
|
||||
continue
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{
|
||||
"type": "keyDown",
|
||||
"key": spec["key"],
|
||||
"code": spec["code"],
|
||||
"windowsVirtualKeyCode": spec["windowsVirtualKeyCode"],
|
||||
"modifiers": mod_mask,
|
||||
},
|
||||
)
|
||||
pressed_mods.append(spec)
|
||||
|
||||
main_down: dict[str, Any] = {
|
||||
# Use rawKeyDown when a modifier is held so Chrome skips
|
||||
# text insertion and routes the event to the shortcut
|
||||
# dispatcher. For plain press_key without modifiers we can
|
||||
# use regular keyDown.
|
||||
"type": "rawKeyDown" if mod_mask else "keyDown",
|
||||
"key": key_name,
|
||||
"text": effective_text,
|
||||
"modifiers": mod_mask,
|
||||
}
|
||||
main_up: dict[str, Any] = {
|
||||
"type": "keyUp",
|
||||
"key": key_name,
|
||||
"text": effective_text,
|
||||
"modifiers": mod_mask,
|
||||
}
|
||||
if main_code is not None:
|
||||
main_down["code"] = main_code
|
||||
main_up["code"] = main_code
|
||||
if main_vk is not None:
|
||||
main_down["windowsVirtualKeyCode"] = main_vk
|
||||
main_up["windowsVirtualKeyCode"] = main_vk
|
||||
|
||||
await self._cdp(tab_id, "Input.dispatchKeyEvent", main_down)
|
||||
await self._cdp(tab_id, "Input.dispatchKeyEvent", main_up)
|
||||
|
||||
# Release modifiers in reverse order.
|
||||
for spec in reversed(pressed_mods):
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
{
|
||||
"type": "keyUp",
|
||||
"key": spec["key"],
|
||||
"code": spec["code"],
|
||||
"windowsVirtualKeyCode": spec["windowsVirtualKeyCode"],
|
||||
"modifiers": 0,
|
||||
},
|
||||
)
|
||||
|
||||
return {"ok": True, "action": "press", "key": key, "modifiers": modifiers or []}
|
||||
|
||||
# Shared JS snippet: shadow-piercing querySelector via ">>>" separator
|
||||
_SHADOW_QUERY_JS = """
|
||||
@@ -915,9 +1231,15 @@ class BeelineBridge:
|
||||
Example: '#interop-outlet >>> #ember37 >>> p'
|
||||
"""
|
||||
await self.cdp_attach(tab_id)
|
||||
# IMPORTANT: the whole script must be a single IIFE so that
|
||||
# bridge.evaluate() detects it as "already wrapped" and returns
|
||||
# its value. If you let evaluate() re-wrap a script that
|
||||
# starts with a function declaration, the outer wrapper
|
||||
# discards the inner IIFE's return and you always get None —
|
||||
# which is exactly the bug this code had until 2026-04-11.
|
||||
script = (
|
||||
f"{self._SHADOW_QUERY_JS}"
|
||||
f"(function(){{"
|
||||
f"{self._SHADOW_QUERY_JS}"
|
||||
f"const el=_shadowQuery({json.dumps(selector)});"
|
||||
f"if(!el)return null;"
|
||||
f"const r=el.getBoundingClientRect();"
|
||||
@@ -1064,8 +1386,12 @@ class BeelineBridge:
|
||||
await self.highlight_point(tab_id, x, y, label=f"{key} ({x},{y})")
|
||||
return {"ok": True, "action": "press_at", "x": x, "y": y, "key": key}
|
||||
|
||||
# Duration (ms) that injected highlights stay visible before fading out.
|
||||
_HIGHLIGHT_DURATION_MS = 1500
|
||||
# Duration (ms) that injected highlights stay visible before fading.
|
||||
# Bumped from 1500 → 10000 so the overlay outlives typical agent turn
|
||||
# latency (LLM streaming + tool batching often runs 3-8s). With the
|
||||
# old 1.5s lifetime the overlay was already gone by the time the
|
||||
# next ``browser_screenshot`` fired, which is why it looked "flaky".
|
||||
_HIGHLIGHT_DURATION_MS = 10000
|
||||
|
||||
async def highlight_rect(
|
||||
self,
|
||||
@@ -1093,9 +1419,12 @@ class BeelineBridge:
|
||||
|
||||
js = f"""
|
||||
(function() {{
|
||||
// Remove any previous hive highlight
|
||||
var old = document.getElementById('__hive_hl');
|
||||
if (old) old.remove();
|
||||
// Remove any previous hive highlight (including its observer).
|
||||
var prev = document.getElementById('__hive_hl');
|
||||
if (prev) {{
|
||||
try {{ prev.__hiveStop && prev.__hiveStop(); }} catch(e) {{}}
|
||||
prev.remove();
|
||||
}}
|
||||
|
||||
var box = document.createElement('div');
|
||||
box.id = '__hive_hl';
|
||||
@@ -1116,16 +1445,52 @@ class BeelineBridge:
|
||||
box.appendChild(tag);
|
||||
}}
|
||||
|
||||
document.documentElement.appendChild(box);
|
||||
setTimeout(function() {{ box.style.opacity = '0'; }}, {duration});
|
||||
setTimeout(function() {{ box.remove(); }}, {duration + 500});
|
||||
var parent = document.documentElement;
|
||||
parent.appendChild(box);
|
||||
|
||||
// SPA re-mount protection: some frameworks (React/Vue/etc.) and
|
||||
// some host pages run MutationObservers that strip unknown
|
||||
// children from documentElement. Watch for our box being
|
||||
// removed and re-attach it — but cap the retries so we don't
|
||||
// get into a DOM-thrash loop with a hostile host observer.
|
||||
var stopped = false;
|
||||
var retries = 0;
|
||||
var MAX_RETRIES = 5;
|
||||
var obs = new MutationObserver(function() {{
|
||||
if (stopped) return;
|
||||
if (!document.getElementById('__hive_hl')) {{
|
||||
if (retries >= MAX_RETRIES) {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
return;
|
||||
}}
|
||||
retries++;
|
||||
try {{ parent.appendChild(box); }} catch(e) {{}}
|
||||
}}
|
||||
}});
|
||||
try {{ obs.observe(parent, {{childList:true, subtree:false}}); }} catch(e) {{}}
|
||||
box.__hiveStop = function() {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
}};
|
||||
|
||||
setTimeout(function() {{
|
||||
if (box.isConnected) box.style.opacity = '0';
|
||||
}}, {duration});
|
||||
setTimeout(function() {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
box.remove();
|
||||
}}, {duration + 500});
|
||||
}})();
|
||||
"""
|
||||
try:
|
||||
await self.cdp_attach(tab_id)
|
||||
await self.evaluate(tab_id, js)
|
||||
except Exception:
|
||||
pass # best-effort visual feedback
|
||||
except Exception as exc:
|
||||
# Best-effort visual feedback, but log rather than silently
|
||||
# swallow so we can diagnose CSP / mid-navigation failures.
|
||||
logger.debug("highlight_rect injection failed on tab %d: %s", tab_id, exc)
|
||||
|
||||
_interaction_highlights[tab_id] = {
|
||||
"x": x,
|
||||
@@ -1143,8 +1508,11 @@ class BeelineBridge:
|
||||
|
||||
js = f"""
|
||||
(function() {{
|
||||
var old = document.getElementById('__hive_hl');
|
||||
if (old) old.remove();
|
||||
var prev = document.getElementById('__hive_hl');
|
||||
if (prev) {{
|
||||
try {{ prev.__hiveStop && prev.__hiveStop(); }} catch(e) {{}}
|
||||
prev.remove();
|
||||
}}
|
||||
|
||||
var dot = document.createElement('div');
|
||||
dot.id = '__hive_hl';
|
||||
@@ -1164,16 +1532,46 @@ class BeelineBridge:
|
||||
dot.appendChild(tag);
|
||||
}}
|
||||
|
||||
document.documentElement.appendChild(dot);
|
||||
setTimeout(function() {{ dot.style.opacity = '0'; }}, {duration});
|
||||
setTimeout(function() {{ dot.remove(); }}, {duration + 500});
|
||||
var parent = document.documentElement;
|
||||
parent.appendChild(dot);
|
||||
|
||||
// SPA re-mount protection — see highlight_rect comment.
|
||||
var stopped = false;
|
||||
var retries = 0;
|
||||
var MAX_RETRIES = 5;
|
||||
var obs = new MutationObserver(function() {{
|
||||
if (stopped) return;
|
||||
if (!document.getElementById('__hive_hl')) {{
|
||||
if (retries >= MAX_RETRIES) {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
return;
|
||||
}}
|
||||
retries++;
|
||||
try {{ parent.appendChild(dot); }} catch(e) {{}}
|
||||
}}
|
||||
}});
|
||||
try {{ obs.observe(parent, {{childList:true, subtree:false}}); }} catch(e) {{}}
|
||||
dot.__hiveStop = function() {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
}};
|
||||
|
||||
setTimeout(function() {{
|
||||
if (dot.isConnected) dot.style.opacity = '0';
|
||||
}}, {duration});
|
||||
setTimeout(function() {{
|
||||
stopped = true;
|
||||
try {{ obs.disconnect(); }} catch(e) {{}}
|
||||
dot.remove();
|
||||
}}, {duration + 500});
|
||||
}})();
|
||||
"""
|
||||
try:
|
||||
await self.cdp_attach(tab_id)
|
||||
await self.evaluate(tab_id, js)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
logger.debug("highlight_point injection failed on tab %d: %s", tab_id, exc)
|
||||
|
||||
_interaction_highlights[tab_id] = {
|
||||
"x": x,
|
||||
@@ -1678,7 +2076,11 @@ class BeelineBridge:
|
||||
"Runtime.evaluate",
|
||||
{"expression": script, "returnByValue": True},
|
||||
)
|
||||
text = (result or {}).get("result", {}).get("result", {}).get("value")
|
||||
# _cdp returns the raw CDP response {"result":{"type":...,"value":...}}.
|
||||
# The extra .get("result") hop was dropping the value — every
|
||||
# successful lookup was silently misreported as "not found" until
|
||||
# the deadline fired.
|
||||
text = (result or {}).get("result", {}).get("value")
|
||||
if text is not None:
|
||||
return {"ok": True, "selector": selector, "text": text}
|
||||
await asyncio.sleep(0.1)
|
||||
@@ -1705,7 +2107,9 @@ class BeelineBridge:
|
||||
"Runtime.evaluate",
|
||||
{"expression": script, "returnByValue": True},
|
||||
)
|
||||
value = (result or {}).get("result", {}).get("result", {}).get("value")
|
||||
# Same unwrap bug as get_text_by_selector — the response shape
|
||||
# is {"result":{"type":...,"value":...}}, one "result", not two.
|
||||
value = (result or {}).get("result", {}).get("value")
|
||||
if value is not None:
|
||||
return {"ok": True, "selector": selector, "attribute": attribute, "value": value}
|
||||
await asyncio.sleep(0.1)
|
||||
@@ -1746,7 +2150,8 @@ class BeelineBridge:
|
||||
"returnByValue": True,
|
||||
},
|
||||
)
|
||||
rect = (rect_result or {}).get("result", {}).get("result", {}).get("value")
|
||||
# One "result" hop — see comment in the meta fetch below.
|
||||
rect = (rect_result or {}).get("result", {}).get("value")
|
||||
if rect and rect.get("width") and rect.get("height"):
|
||||
params["clip"] = {
|
||||
"x": rect["x"],
|
||||
@@ -1793,7 +2198,14 @@ class BeelineBridge:
|
||||
"returnByValue": True,
|
||||
},
|
||||
)
|
||||
meta = (meta_result or {}).get("result", {}).get("result", {}).get("value") or {}
|
||||
# _cdp returns the raw CDP response body, which for Runtime.evaluate
|
||||
# is {"result": {"type": ..., "value": <our returned object>}}. The
|
||||
# previous code did .get("result").get("result").get("value") —
|
||||
# that extra hop dropped everything, so cssWidth always defaulted
|
||||
# to 0 and devicePixelRatio to 1.0. Which in turn collapsed
|
||||
# physical_scale and css_scale into the same number and made
|
||||
# post-screenshot clicks land at DPR× the intended coordinate.
|
||||
meta = (meta_result or {}).get("result", {}).get("value") or {}
|
||||
|
||||
dpr = meta.get("dpr", 1.0)
|
||||
css_w = meta.get("cssWidth", 0)
|
||||
@@ -1854,7 +2266,10 @@ class BeelineBridge:
|
||||
"Runtime.evaluate",
|
||||
{"expression": script, "returnByValue": True},
|
||||
)
|
||||
found = (result or {}).get("result", {}).get("result", {}).get("value", False)
|
||||
# One "result" hop — see navigate() comment. This was silently
|
||||
# returning False on every poll, so wait_for_selector always
|
||||
# reported "not found" after the full timeout.
|
||||
found = (result or {}).get("result", {}).get("value", False)
|
||||
if found:
|
||||
return {"ok": True, "selector": selector}
|
||||
await asyncio.sleep(0.1)
|
||||
@@ -1878,7 +2293,8 @@ class BeelineBridge:
|
||||
"Runtime.evaluate",
|
||||
{"expression": script, "returnByValue": True},
|
||||
)
|
||||
found = (result or {}).get("result", {}).get("result", {}).get("value", False)
|
||||
# Same unwrap bug as wait_for_selector.
|
||||
found = (result or {}).get("result", {}).get("value", False)
|
||||
if found:
|
||||
return {"ok": True, "text": text}
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -298,11 +298,13 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"cssScale": css_scale,
|
||||
"annotated": bool(highlights),
|
||||
"scaleHint": (
|
||||
f"image_coord × {physical_scale} = physical px "
|
||||
f"(for browser_click_coordinate/"
|
||||
f"hover_coordinate); "
|
||||
f"image_coord × {css_scale} = CSS px "
|
||||
f"(for getBoundingClientRect)"
|
||||
f"→ feed to browser_click_coordinate, "
|
||||
f"browser_hover_coordinate, browser_press_at "
|
||||
f"(CDP Input events use CSS pixels). "
|
||||
f"image_coord × {physical_scale} = physical px "
|
||||
f"is debug-only on HiDPI displays and must NOT "
|
||||
f"be used for clicks — it overshoots by DPR×."
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -343,24 +345,33 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert screenshot image coordinates to browser coordinates.
|
||||
Convert screenshot image coordinates to browser click coordinates.
|
||||
|
||||
After browser_screenshot returns an 800px-wide image, use this to translate
|
||||
pixel positions you see in the image into the two coordinate spaces used by
|
||||
browser tools:
|
||||
After browser_screenshot returns a downscaled image, use this to
|
||||
translate pixel positions you see in the image into the CSS pixel
|
||||
coordinates that Chrome DevTools Protocol expects.
|
||||
|
||||
- physical_x/y → use with browser_click_coordinate, browser_hover_coordinate,
|
||||
browser_press_at (CDP Input events work in physical pixels)
|
||||
- css_x/y → use with getBoundingClientRect comparisons and DOM APIs
|
||||
**CDP Input.dispatchMouseEvent uses CSS pixels**, so you want
|
||||
``css_x`` / ``css_y`` for every click/hover tool. ``physical_x/y``
|
||||
is kept in the return for debugging on HiDPI displays — do NOT
|
||||
feed it to clicks; on a DPR=2 screen it lands 2× too far.
|
||||
|
||||
Edge case: pages using ``zoom`` or ``transform: scale()`` (e.g.
|
||||
LinkedIn's ``#interop-outlet`` shadow DOM) render in a scaled
|
||||
local coordinate space. For those, ``getBoundingClientRect()``
|
||||
reports pre-zoom coordinates and you may still need to multiply
|
||||
by the element's effective zoom. Use browser_shadow_query to
|
||||
get the zoomed rect directly.
|
||||
|
||||
Args:
|
||||
x: X pixel position in the 800px screenshot image
|
||||
y: Y pixel position in the 800px screenshot image
|
||||
x: X pixel position in the screenshot image
|
||||
y: Y pixel position in the screenshot image
|
||||
tab_id: Chrome tab ID (default: active tab for profile)
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with physical_x, physical_y, css_x, css_y, and scale factors
|
||||
Dict with css_x, css_y (primary — use these), physical_x,
|
||||
physical_y (debug only), and scale factors.
|
||||
"""
|
||||
ctx = _get_context(profile)
|
||||
target_tab = tab_id or (ctx.get("activeTabId") if ctx else None)
|
||||
@@ -373,18 +384,25 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"physical_x": round(x * physical_scale, 1),
|
||||
"physical_y": round(y * physical_scale, 1),
|
||||
# Primary output: CSS pixels. Feed these to click/hover/press.
|
||||
"css_x": round(x * css_scale, 1),
|
||||
"css_y": round(y * css_scale, 1),
|
||||
# Debug output: raw physical pixels. DO NOT feed to clicks on
|
||||
# HiDPI displays — CDP Input events use CSS pixels, so sending
|
||||
# physical coordinates lands the click at roughly DPR× the
|
||||
# intended position.
|
||||
"physical_x": round(x * physical_scale, 1),
|
||||
"physical_y": round(y * physical_scale, 1),
|
||||
"physicalScale": physical_scale,
|
||||
"cssScale": css_scale,
|
||||
"tabId": target_tab,
|
||||
"note": (
|
||||
"Use physical_x/y with browser_click_coordinate,"
|
||||
" browser_hover_coordinate, browser_press_at."
|
||||
" Use css_x/y with getBoundingClientRect"
|
||||
" and DOM APIs."
|
||||
"Use css_x/css_y with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"Chrome DevTools Protocol Input.dispatchMouseEvent "
|
||||
"operates in CSS pixels. physical_x/y is for debugging "
|
||||
"on HiDPI displays only; feeding it to clicks lands "
|
||||
"them at DPR× the intended coordinate."
|
||||
),
|
||||
}
|
||||
|
||||
@@ -450,11 +468,11 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"cy": round(rect["cy"] * dpr, 1),
|
||||
},
|
||||
"note": (
|
||||
"Use physical.cx/cy with"
|
||||
" browser_click_coordinate or"
|
||||
" browser_hover_coordinate."
|
||||
" Use css.cx/cy with"
|
||||
" getBoundingClientRect comparisons."
|
||||
"Use css.cx/cy with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"CDP Input events operate in CSS pixels. "
|
||||
"physical.* is debug-only; feeding it to clicks "
|
||||
"lands them DPR× too far on HiDPI displays."
|
||||
),
|
||||
}
|
||||
|
||||
@@ -468,8 +486,10 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
Get the bounding rect of an element by CSS selector.
|
||||
|
||||
Supports '>>>' shadow-piercing selectors for overlay/shadow DOM content.
|
||||
Returns coordinates in both CSS pixels (for DOM APIs) and physical pixels
|
||||
(for browser_click_coordinate, browser_hover_coordinate, browser_press_at).
|
||||
Returns coordinates in CSS pixels (for clicks and DOM APIs); the
|
||||
physical-pixel variant is returned for debugging on HiDPI displays
|
||||
only — it must not be fed to click/hover/press tools, which use
|
||||
CSS pixels.
|
||||
|
||||
Args:
|
||||
selector: CSS selector, optionally with ' >>> ' to pierce shadow roots.
|
||||
@@ -519,7 +539,13 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
"cx": round(rect["cx"] * dpr, 1),
|
||||
"cy": round(rect["cy"] * dpr, 1),
|
||||
},
|
||||
"note": "Use physical.cx/cy with browser_click_coordinate or browser_hover_coordinate.",
|
||||
"note": (
|
||||
"Use css.cx/cy with browser_click_coordinate, "
|
||||
"browser_hover_coordinate, browser_press_at — "
|
||||
"CDP Input events operate in CSS pixels. "
|
||||
"physical.* is debug-only; feeding it to clicks "
|
||||
"lands them DPR× too far on HiDPI displays."
|
||||
),
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
|
||||
@@ -104,11 +104,18 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
button: Literal["left", "right", "middle"] = "left",
|
||||
) -> dict:
|
||||
"""
|
||||
Click at specific viewport coordinates.
|
||||
Click at specific viewport coordinates (CSS pixels).
|
||||
|
||||
Chrome DevTools Protocol's Input.dispatchMouseEvent operates in
|
||||
**CSS pixels**, not physical pixels. If you have a screenshot
|
||||
image coordinate, convert it with ``browser_coords(x, y)`` and
|
||||
use the returned ``css_x`` / ``css_y`` — not ``physical_x/y``.
|
||||
On a DPR=2 display, feeding physical coordinates lands the click
|
||||
at 2× the intended position.
|
||||
|
||||
Args:
|
||||
x: X coordinate in the viewport
|
||||
y: Y coordinate in the viewport
|
||||
x: X coordinate in CSS pixels (viewport space)
|
||||
y: Y coordinate in CSS pixels (viewport space)
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
button: Mouse button to click (left, right, middle)
|
||||
@@ -171,18 +178,37 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
delay_ms: int = 0,
|
||||
clear_first: bool = True,
|
||||
timeout_ms: int = 30000,
|
||||
use_insert_text: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Type text into an input element.
|
||||
|
||||
Automatically routes through a real CDP pointer click on the
|
||||
element before inserting text — so that rich-text editors like
|
||||
Lexical (Gmail, LinkedIn DMs), Draft.js (X compose), and
|
||||
ProseMirror (Reddit) see a native focus event and enable their
|
||||
submit buttons. See the gcu-browser skill for the full "click-
|
||||
then-type" pattern.
|
||||
|
||||
By default uses CDP Input.insertText which is the most reliable
|
||||
way to insert text into rich editors. Set
|
||||
``use_insert_text=False`` to fall back to per-character
|
||||
keyDown/keyUp events (needed only for code editors that fire
|
||||
on specific keystrokes, or when ``delay_ms`` typing animation
|
||||
is required).
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the input element
|
||||
text: Text to type
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
delay_ms: Delay between keystrokes in ms (default: 0)
|
||||
delay_ms: Delay between keystrokes in ms (default: 0).
|
||||
Forces the per-keystroke fallback when > 0.
|
||||
clear_first: Clear existing text before typing (default: True)
|
||||
timeout_ms: Timeout waiting for element (default: 30000)
|
||||
use_insert_text: Use CDP Input.insertText (default: True) for
|
||||
reliable insertion into rich-text editors.
|
||||
Set False for per-keystroke dispatch.
|
||||
|
||||
Returns:
|
||||
Dict with type result
|
||||
@@ -216,6 +242,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
clear_first=clear_first,
|
||||
delay_ms=delay_ms,
|
||||
timeout_ms=timeout_ms,
|
||||
use_insert_text=use_insert_text,
|
||||
)
|
||||
log_tool_call(
|
||||
"browser_type",
|
||||
@@ -270,21 +297,34 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
selector: str | None = None,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
modifiers: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Press a keyboard key.
|
||||
Press a keyboard key, optionally with modifier keys held.
|
||||
|
||||
Args:
|
||||
key: Key to press (e.g., 'Enter', 'Tab', 'Escape', 'ArrowDown')
|
||||
key: Key to press (e.g., 'Enter', 'Tab', 'Escape', 'ArrowDown',
|
||||
or a character like 'a')
|
||||
selector: Focus element first (optional)
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
modifiers: Hold these modifier keys while pressing ``key``. Accepted
|
||||
values (case-insensitive): "alt", "ctrl"/"control", "meta"/"cmd",
|
||||
"shift". Examples: ``modifiers=["ctrl"], key="a"`` = Ctrl+A
|
||||
(select all); ``modifiers=["shift"], key="Tab"`` = Shift+Tab;
|
||||
``modifiers=["meta"], key="Enter"`` = Cmd+Enter.
|
||||
|
||||
Returns:
|
||||
Dict with press result
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"key": key, "selector": selector, "tab_id": tab_id, "profile": profile}
|
||||
params = {
|
||||
"key": key,
|
||||
"selector": selector,
|
||||
"tab_id": tab_id,
|
||||
"profile": profile,
|
||||
"modifiers": modifiers,
|
||||
}
|
||||
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
@@ -305,7 +345,9 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
try:
|
||||
press_result = await bridge.press_key(target_tab, key, selector=selector)
|
||||
press_result = await bridge.press_key(
|
||||
target_tab, key, selector=selector, modifiers=modifiers
|
||||
)
|
||||
log_tool_call(
|
||||
"browser_press",
|
||||
params,
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
"""Tests for aden_tools.file_state_cache and its integration with file_ops.
|
||||
|
||||
These tests cover the stale-edit guard added for Gap 4:
|
||||
- read_file records a per-file hash snapshot
|
||||
- edit_file / write_file / hashline_edit refuse to run when the on-disk
|
||||
file has diverged from the last recorded read
|
||||
- write_file is allowed without a prior read when the target doesn't
|
||||
exist yet (brand-new file, nothing to clobber)
|
||||
- re-recording after a successful write keeps chained edits working
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from aden_tools import file_state_cache
|
||||
from aden_tools.file_ops import register_file_tools
|
||||
|
||||
|
||||
def _find_tool(mcp: FastMCP, name: str):
|
||||
"""Pull a tool function out of an MCP registration for direct testing."""
|
||||
# fastmcp stores tools in a ToolManager. We reach through it to grab
|
||||
# the underlying callable so tests can invoke tools directly without
|
||||
# a full MCP round-trip.
|
||||
manager = getattr(mcp, "_tool_manager", None) or getattr(mcp, "tool_manager", None)
|
||||
assert manager is not None, "could not locate fastmcp tool manager"
|
||||
tools = getattr(manager, "_tools", None) or getattr(manager, "tools", None)
|
||||
assert tools is not None, "could not locate fastmcp tools dict"
|
||||
tool = tools[name]
|
||||
return getattr(tool, "fn", None) or getattr(tool, "func", None) or tool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sandbox(tmp_path: Path):
|
||||
"""A sandbox directory the tools are allowed to read/write within."""
|
||||
file_state_cache.reset_all()
|
||||
return tmp_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tools(sandbox: Path):
|
||||
"""Register file_ops onto a fresh FastMCP and return the tool callables."""
|
||||
mcp = FastMCP("test-server")
|
||||
|
||||
def resolve(path: str) -> str:
|
||||
# Absolute paths under the sandbox are fine; relative paths
|
||||
# resolve against the sandbox root.
|
||||
if os.path.isabs(path):
|
||||
return os.path.abspath(path)
|
||||
return str(sandbox / path)
|
||||
|
||||
register_file_tools(mcp, resolve_path=resolve)
|
||||
|
||||
return {
|
||||
"read_file": _find_tool(mcp, "read_file"),
|
||||
"write_file": _find_tool(mcp, "write_file"),
|
||||
"edit_file": _find_tool(mcp, "edit_file"),
|
||||
"hashline_edit": _find_tool(mcp, "hashline_edit"),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache primitives
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_check_fresh_returns_unread_when_never_recorded(sandbox: Path):
|
||||
target = sandbox / "nope.txt"
|
||||
target.write_text("hi")
|
||||
result = file_state_cache.check_fresh(None, str(target))
|
||||
assert result.status is file_state_cache.Freshness.UNREAD
|
||||
|
||||
|
||||
def test_record_then_check_returns_fresh(sandbox: Path):
|
||||
target = sandbox / "a.txt"
|
||||
target.write_text("one")
|
||||
file_state_cache.record_read(None, str(target), content_bytes=b"one")
|
||||
result = file_state_cache.check_fresh(None, str(target))
|
||||
assert result.status is file_state_cache.Freshness.FRESH
|
||||
|
||||
|
||||
def test_external_write_makes_check_return_stale(sandbox: Path):
|
||||
target = sandbox / "b.txt"
|
||||
target.write_text("original")
|
||||
file_state_cache.record_read(None, str(target), content_bytes=b"original")
|
||||
|
||||
# Simulate an external editor save with different content. Sleep
|
||||
# briefly to ensure mtime moves (some filesystems have 1s resolution
|
||||
# but most Linux fs have ns; this is belt-and-braces).
|
||||
time.sleep(0.01)
|
||||
target.write_text("hijacked by the user")
|
||||
os.utime(str(target), None) # bump mtime in case the write was too fast
|
||||
|
||||
result = file_state_cache.check_fresh(None, str(target))
|
||||
assert result.status is file_state_cache.Freshness.STALE
|
||||
assert "changed on disk" in result.detail or "differs" in result.detail
|
||||
|
||||
|
||||
def test_identical_content_rewrite_stays_fresh(sandbox: Path):
|
||||
"""Editors that rewrite a file without changing its bytes shouldn't
|
||||
be reported as stale even though mtime moved."""
|
||||
target = sandbox / "c.txt"
|
||||
target.write_text("same")
|
||||
file_state_cache.record_read(None, str(target), content_bytes=b"same")
|
||||
|
||||
time.sleep(0.01)
|
||||
target.write_text("same") # different mtime, same content
|
||||
os.utime(str(target), None)
|
||||
|
||||
result = file_state_cache.check_fresh(None, str(target))
|
||||
assert result.status is file_state_cache.Freshness.FRESH
|
||||
|
||||
|
||||
def test_agent_scopes_are_isolated(sandbox: Path):
|
||||
target = sandbox / "d.txt"
|
||||
target.write_text("xyz")
|
||||
file_state_cache.record_read("agent-A", str(target), content_bytes=b"xyz")
|
||||
|
||||
# Another agent hasn't read this file yet.
|
||||
result = file_state_cache.check_fresh("agent-B", str(target))
|
||||
assert result.status is file_state_cache.Freshness.UNREAD
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# file_ops integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_edit_file_refuses_without_prior_read(sandbox: Path, tools):
|
||||
target = sandbox / "e.py"
|
||||
target.write_text("print('hello')\n")
|
||||
# Clear the cache first so there's definitely no recorded read.
|
||||
file_state_cache.reset_all()
|
||||
|
||||
result = tools["edit_file"]("e.py", "hello", "world")
|
||||
assert "Refusing to edit" in result
|
||||
assert "read_file" in result
|
||||
|
||||
|
||||
def test_edit_file_proceeds_after_read(sandbox: Path, tools):
|
||||
target = sandbox / "f.py"
|
||||
target.write_text("print('hello')\n")
|
||||
file_state_cache.reset_all()
|
||||
|
||||
tools["read_file"]("f.py")
|
||||
result = tools["edit_file"]("f.py", "hello", "world")
|
||||
assert "Replaced" in result
|
||||
assert target.read_text() == "print('world')\n"
|
||||
|
||||
|
||||
def test_edit_file_refuses_when_file_changed_between_read_and_edit(
|
||||
sandbox: Path, tools
|
||||
):
|
||||
target = sandbox / "g.py"
|
||||
target.write_text("print('hello')\n")
|
||||
file_state_cache.reset_all()
|
||||
|
||||
tools["read_file"]("g.py")
|
||||
|
||||
# Simulate the user editing the file outside the agent.
|
||||
time.sleep(0.01)
|
||||
target.write_text("print('bye')\n")
|
||||
os.utime(str(target), None)
|
||||
|
||||
result = tools["edit_file"]("g.py", "hello", "world")
|
||||
assert "Refusing to edit" in result
|
||||
assert "Re-read" in result
|
||||
|
||||
|
||||
def test_write_file_allowed_for_new_file_without_prior_read(sandbox: Path, tools):
|
||||
file_state_cache.reset_all()
|
||||
result = tools["write_file"]("brand_new.txt", "first contents\n")
|
||||
assert "Created" in result
|
||||
assert (sandbox / "brand_new.txt").read_text() == "first contents\n"
|
||||
|
||||
|
||||
def test_write_file_refuses_overwrite_without_prior_read(sandbox: Path, tools):
|
||||
target = sandbox / "existing.txt"
|
||||
target.write_text("do not clobber\n")
|
||||
file_state_cache.reset_all()
|
||||
|
||||
result = tools["write_file"]("existing.txt", "clobbered\n")
|
||||
assert "Refusing to overwrite" in result
|
||||
assert target.read_text() == "do not clobber\n" # unchanged
|
||||
|
||||
|
||||
def test_chained_edits_in_same_turn_do_not_self_invalidate(
|
||||
sandbox: Path, tools
|
||||
):
|
||||
target = sandbox / "chained.py"
|
||||
target.write_text("print('a')\nprint('b')\n")
|
||||
file_state_cache.reset_all()
|
||||
|
||||
tools["read_file"]("chained.py")
|
||||
r1 = tools["edit_file"]("chained.py", "a", "A")
|
||||
assert "Replaced" in r1
|
||||
# Immediate second edit must NOT trip the stale guard because
|
||||
# edit_file re-records the post-write state.
|
||||
r2 = tools["edit_file"]("chained.py", "b", "B")
|
||||
assert "Replaced" in r2
|
||||
assert target.read_text() == "print('A')\nprint('B')\n"
|
||||
@@ -11,6 +11,21 @@ from fastmcp import FastMCP
|
||||
from aden_tools.hashline import compute_line_hash
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _bypass_stale_edit_guard():
|
||||
"""These tests exercise edit logic directly without a prior read_file,
|
||||
so the Gap 4 stale-edit guard would reject every call. Force
|
||||
check_fresh to always return FRESH here; the cache itself is
|
||||
covered by ``tools/tests/test_file_state_cache.py``.
|
||||
"""
|
||||
from aden_tools.file_state_cache import FreshResult, Freshness
|
||||
with patch(
|
||||
"aden_tools.file_ops.check_fresh",
|
||||
return_value=FreshResult(Freshness.FRESH),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
def _anchor(line_num, line_text):
|
||||
"""Build an anchor string N:hhhh."""
|
||||
return f"{line_num}:{compute_line_hash(line_text)}"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Tests for file_system_toolkits tools (FastMCP)."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
@@ -8,6 +9,22 @@ import pytest
|
||||
from fastmcp import FastMCP
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _bypass_stale_edit_guard():
|
||||
"""These tests exercise edit logic directly without a prior read_file,
|
||||
so the Gap 4 stale-edit guard would reject every call. Force
|
||||
check_fresh to always return FRESH here; the cache itself is
|
||||
covered by ``tools/tests/test_file_state_cache.py``.
|
||||
"""
|
||||
from aden_tools.file_state_cache import FreshResult, Freshness
|
||||
with patch(
|
||||
"aden_tools.tools.file_system_toolkits.hashline_edit."
|
||||
"hashline_edit.check_fresh",
|
||||
return_value=FreshResult(Freshness.FRESH),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mcp():
|
||||
"""Create a FastMCP instance."""
|
||||
@@ -336,51 +353,222 @@ class TestExecuteCommandTool:
|
||||
register_tools(mcp)
|
||||
return mcp._tool_manager._tools["execute_command_tool"].fn
|
||||
|
||||
def test_execute_simple_command(self, execute_command_fn, mock_workspace, mock_secure_path):
|
||||
async def test_execute_simple_command(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""Executing a simple command returns output."""
|
||||
result = execute_command_fn(command="echo 'Hello World'", **mock_workspace)
|
||||
result = await execute_command_fn(command="echo 'Hello World'", **mock_workspace)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["return_code"] == 0
|
||||
assert "Hello World" in result["stdout"]
|
||||
|
||||
def test_execute_failing_command(self, execute_command_fn, mock_workspace, mock_secure_path):
|
||||
async def test_execute_failing_command(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""Executing a failing command returns non-zero exit code."""
|
||||
result = execute_command_fn(command="exit 1", **mock_workspace)
|
||||
result = await execute_command_fn(command="exit 1", **mock_workspace)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["return_code"] == 1
|
||||
|
||||
def test_execute_command_with_stderr(
|
||||
async def test_execute_command_with_stderr(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""Executing a command that writes to stderr captures it."""
|
||||
result = execute_command_fn(command="echo 'error message' >&2", **mock_workspace)
|
||||
result = await execute_command_fn(
|
||||
command="echo 'error message' >&2", **mock_workspace
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert "error message" in result.get("stderr", "")
|
||||
|
||||
def test_execute_command_list_files(
|
||||
async def test_execute_command_list_files(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path, tmp_path
|
||||
):
|
||||
"""Executing ls command lists files."""
|
||||
# Create a test file
|
||||
(tmp_path / "testfile.txt").write_text("content", encoding="utf-8")
|
||||
|
||||
result = execute_command_fn(command=f"ls {tmp_path}", **mock_workspace)
|
||||
result = await execute_command_fn(command=f"ls {tmp_path}", **mock_workspace)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["return_code"] == 0
|
||||
assert "testfile.txt" in result["stdout"]
|
||||
|
||||
def test_execute_command_with_pipe(self, execute_command_fn, mock_workspace, mock_secure_path):
|
||||
async def test_execute_command_with_pipe(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""Executing a command with pipe works correctly."""
|
||||
result = execute_command_fn(command="echo 'hello world' | tr 'a-z' 'A-Z'", **mock_workspace)
|
||||
result = await execute_command_fn(
|
||||
command="echo 'hello world' | tr 'a-z' 'A-Z'", **mock_workspace
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["return_code"] == 0
|
||||
assert "HELLO WORLD" in result["stdout"]
|
||||
|
||||
# ── Gap 3: async, per-call timeout, background jobs ──────────────
|
||||
|
||||
@pytest.fixture
|
||||
def bash_output_fn(self, mcp):
|
||||
from aden_tools.tools.file_system_toolkits.execute_command_tool import (
|
||||
register_tools,
|
||||
)
|
||||
|
||||
register_tools(mcp)
|
||||
return mcp._tool_manager._tools["bash_output"].fn
|
||||
|
||||
@pytest.fixture
|
||||
def bash_kill_fn(self, mcp):
|
||||
from aden_tools.tools.file_system_toolkits.execute_command_tool import (
|
||||
register_tools,
|
||||
)
|
||||
|
||||
register_tools(mcp)
|
||||
return mcp._tool_manager._tools["bash_kill"].fn
|
||||
|
||||
async def test_per_call_timeout_overrides_default(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""A per-call timeout under the default kills the command early."""
|
||||
import time
|
||||
|
||||
start = time.monotonic()
|
||||
result = await execute_command_fn(
|
||||
command="sleep 10",
|
||||
timeout_seconds=1,
|
||||
**mock_workspace,
|
||||
)
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
assert result.get("timed_out") is True
|
||||
assert "1 seconds" in result.get("error", "")
|
||||
# Must include the watchdog grace but stay well under 10s.
|
||||
assert elapsed < 5, f"timeout did not kill the command promptly ({elapsed:.2f}s)"
|
||||
|
||||
async def test_timeout_is_clamped_upwards(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""A timeout above the 600s ceiling is silently clamped."""
|
||||
# We don't actually sleep 600s - we just run a quick command
|
||||
# with a nonsense timeout to prove the clamp doesn't raise.
|
||||
result = await execute_command_fn(
|
||||
command="echo fast",
|
||||
timeout_seconds=99999,
|
||||
**mock_workspace,
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert "fast" in result["stdout"]
|
||||
|
||||
async def test_event_loop_unblocked_while_command_runs(
|
||||
self, execute_command_fn, mock_workspace, mock_secure_path
|
||||
):
|
||||
"""The event loop keeps servicing other tasks while a bash
|
||||
command is running, unlike the old blocking subprocess.run."""
|
||||
ticks = 0
|
||||
|
||||
async def ticker():
|
||||
nonlocal ticks
|
||||
for _ in range(20):
|
||||
await asyncio.sleep(0.05)
|
||||
ticks += 1
|
||||
|
||||
ticker_task = asyncio.create_task(ticker())
|
||||
# A 0.5s command: if the event loop were blocked, ticks would
|
||||
# stay at 0 until it returned. We expect several ticks to land.
|
||||
result = await execute_command_fn(command="sleep 0.5", **mock_workspace)
|
||||
await ticker_task
|
||||
|
||||
assert result["success"] is True
|
||||
assert ticks >= 5, (
|
||||
f"event loop looked blocked during subprocess "
|
||||
f"(only {ticks} ticks in 1s)"
|
||||
)
|
||||
|
||||
async def test_background_job_start_poll_and_complete(
|
||||
self,
|
||||
execute_command_fn,
|
||||
bash_output_fn,
|
||||
mock_workspace,
|
||||
mock_secure_path,
|
||||
):
|
||||
"""A run_in_background job can be started, polled, and reports
|
||||
its exit status once the command finishes."""
|
||||
start_result = await execute_command_fn(
|
||||
command=(
|
||||
"python -c 'import time,sys;"
|
||||
"print(\"one\");sys.stdout.flush();time.sleep(0.1);"
|
||||
"print(\"two\");sys.stdout.flush();time.sleep(0.1);"
|
||||
"print(\"three\")'"
|
||||
),
|
||||
run_in_background=True,
|
||||
**mock_workspace,
|
||||
)
|
||||
assert start_result["background"] is True
|
||||
job_id = start_result["id"]
|
||||
|
||||
# Wait for the command to finish.
|
||||
deadline = asyncio.get_event_loop().time() + 5.0
|
||||
seen_text = ""
|
||||
while asyncio.get_event_loop().time() < deadline:
|
||||
poll = await bash_output_fn(id=job_id, **mock_workspace)
|
||||
seen_text += poll["stdout"]
|
||||
if poll["status"].startswith("exited"):
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
assert "one" in seen_text
|
||||
assert "two" in seen_text
|
||||
assert "three" in seen_text
|
||||
assert poll["status"] == "exited(0)"
|
||||
|
||||
async def test_background_job_kill(
|
||||
self,
|
||||
execute_command_fn,
|
||||
bash_output_fn,
|
||||
bash_kill_fn,
|
||||
mock_workspace,
|
||||
mock_secure_path,
|
||||
):
|
||||
"""bash_kill terminates a long-running background job."""
|
||||
start_result = await execute_command_fn(
|
||||
command="sleep 30",
|
||||
run_in_background=True,
|
||||
**mock_workspace,
|
||||
)
|
||||
job_id = start_result["id"]
|
||||
|
||||
kill_result = await bash_kill_fn(id=job_id, **mock_workspace)
|
||||
assert kill_result["id"] == job_id
|
||||
assert (
|
||||
"terminated" in kill_result["status"]
|
||||
or "killed" in kill_result["status"]
|
||||
)
|
||||
|
||||
# Job id should be deregistered after kill.
|
||||
poll = await bash_output_fn(id=job_id, **mock_workspace)
|
||||
assert "no background job" in poll.get("error", "")
|
||||
|
||||
async def test_bash_output_isolated_across_agents(
|
||||
self, execute_command_fn, bash_output_fn, mock_secure_path
|
||||
):
|
||||
"""Agent A's job id is not reachable from agent B."""
|
||||
start = await execute_command_fn(
|
||||
command="sleep 5",
|
||||
run_in_background=True,
|
||||
agent_id="agent-A",
|
||||
)
|
||||
poll_b = await bash_output_fn(id=start["id"], agent_id="agent-B")
|
||||
assert "no background job" in poll_b.get("error", "")
|
||||
|
||||
# Clean up.
|
||||
from aden_tools.tools.file_system_toolkits.execute_command_tool import (
|
||||
background_jobs,
|
||||
)
|
||||
|
||||
await background_jobs.clear_agent("agent-A")
|
||||
|
||||
|
||||
class TestApplyDiffTool:
|
||||
"""Tests for apply_diff tool."""
|
||||
|
||||
@@ -11,6 +11,22 @@ from fastmcp import FastMCP
|
||||
from aden_tools.hashline import compute_line_hash
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _bypass_stale_edit_guard():
|
||||
"""These tests exercise edit logic directly without a prior read_file,
|
||||
so the Gap 4 file-state cache would reject every single call. Patch
|
||||
the imported ``check_fresh`` symbol to always return FRESH here; the
|
||||
cache itself is covered by ``tests/test_file_state_cache.py``.
|
||||
"""
|
||||
from aden_tools.file_state_cache import FreshResult, Freshness
|
||||
with patch(
|
||||
"aden_tools.tools.file_system_toolkits.hashline_edit."
|
||||
"hashline_edit.check_fresh",
|
||||
return_value=FreshResult(Freshness.FRESH),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mcp():
|
||||
"""Create a FastMCP instance."""
|
||||
|
||||
Reference in New Issue
Block a user