fix: resolve all ruff lint and format errors across codebase (#7058)

- Auto-fixed 70 lint errors (import sorting, aliased errors, datetime.UTC)
- Fixed 85 remaining errors manually:
  - E501: wrapped long lines in queen_profiles, catalog, routes_credentials
  - F821: added missing TYPE_CHECKING imports for AgentHost, ToolRegistry,
    HookContext, HookResult; added runtime imports where needed
  - F811: removed duplicate method definitions in queen_lifecycle_tools
  - F841/B007: removed unused variables in discovery.py
  - W291: removed trailing whitespace in queen nodes
  - E402: moved import to top of queen_memory_v2.py
  - Fixed AgentRuntime -> AgentHost in example template type annotations
- Reformatted 343 files with ruff format
This commit is contained in:
Hundao
2026-04-16 19:30:01 +08:00
committed by GitHub
parent 4fdbc438f9
commit 589c5b06fe
354 changed files with 1955 additions and 5340 deletions
@@ -57,8 +57,7 @@ async def test_twitter_lazy_scroll():
# Count initial tweets
initial_count = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
"(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()",
)
print(f"Initial tweet count: {initial_count.get('result', 0)}")
@@ -78,8 +77,7 @@ async def test_twitter_lazy_scroll():
# Count tweets after scroll
count_result = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
"(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()",
)
count = count_result.get("result", 0)
print(f" Tweet count after scroll: {count}")
@@ -87,8 +85,7 @@ async def test_twitter_lazy_scroll():
# Final count
final_count = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
"(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()",
)
final = final_count.get("result", 0)
initial = initial_count.get("result", 0)
@@ -130,9 +130,7 @@ async def test_shadow_dom():
print(f"JS click result: {click_result.get('result', {})}")
# Verify click was registered
count_result = await bridge.evaluate(
tab_id, "(function() { return window.shadowClickCount || 0; })()"
)
count_result = await bridge.evaluate(tab_id, "(function() { return window.shadowClickCount || 0; })()")
count = count_result.get("result") or 0
print(f"Shadow click count: {count}")
@@ -200,9 +200,7 @@ async def test_autocomplete():
print(f"Value after fast typing: '{fast_value}'")
# Check events
events_result = await bridge.evaluate(
tab_id, "(function() { return window.inputEvents; })()"
)
events_result = await bridge.evaluate(tab_id, "(function() { return window.inputEvents; })()")
print(f"Events logged: {events_result.get('result', [])}")
# Test 2: Slow typing (with delay) - should work
@@ -220,8 +218,7 @@ async def test_autocomplete():
# Check if dropdown appeared
dropdown_result = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll("
"'.autocomplete-items div').length; })()",
"(function() { return document.querySelectorAll('.autocomplete-items div').length; })()",
)
dropdown_count = dropdown_result.get("result", 0)
print(f"Dropdown items: {dropdown_count}")
@@ -87,9 +87,7 @@ async def test_huge_dom():
await bridge.navigate(tab_id, data_url, wait_until="load")
# Count elements
count_result = await bridge.evaluate(
tab_id, "(function() { return document.querySelectorAll('*').length; })()"
)
count_result = await bridge.evaluate(tab_id, "(function() { return document.querySelectorAll('*').length; })()")
elem_count = count_result.get("result", 0)
print(f"DOM elements: {elem_count}")
@@ -122,14 +120,10 @@ async def test_huge_dom():
# Test 3: Real LinkedIn
print("\n--- Test 3: Real LinkedIn Feed ---")
await bridge.navigate(
tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000
)
await bridge.navigate(tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000)
await asyncio.sleep(2)
count_result = await bridge.evaluate(
tab_id, "(function() { return document.querySelectorAll('*').length; })()"
)
count_result = await bridge.evaluate(tab_id, "(function() { return document.querySelectorAll('*').length; })()")
elem_count = count_result.get("result", 0)
print(f"LinkedIn DOM elements: {elem_count}")
@@ -136,10 +136,7 @@ async def test_selector_screenshot(bridge: BeelineBridge, tab_id: int, data_url:
print(" ⚠ WARNING: Selector screenshot not smaller (may be full page)")
return False
else:
print(
" ⚠ NOT IMPLEMENTED: selector param ignored"
f" (returns full page) - error={result.get('error')}"
)
print(f" ⚠ NOT IMPLEMENTED: selector param ignored (returns full page) - error={result.get('error')}")
print(" NOTE: selector parameter exists in signature but is not used in implementation")
return False
@@ -181,9 +178,7 @@ async def test_screenshot_timeout(bridge: BeelineBridge, tab_id: int, data_url:
print(f" ⚠ Fast enough to beat timeout: {err!r} in {elapsed:.3f}s")
return True # Not a failure, just fast
else:
print(
f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout"
)
print(f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout")
return True # Still ok, just very fast
@@ -137,14 +137,8 @@ async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict:
changed = False
for key in after_data:
if key in before_data:
b_val = (
before_data[key].get("scrollTop", 0)
if isinstance(before_data[key], dict)
else 0
)
a_val = (
after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
)
b_val = before_data[key].get("scrollTop", 0) if isinstance(before_data[key], dict) else 0
a_val = after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
if a_val != b_val:
print(f" ✓ CHANGE DETECTED: {key} scrolled from {b_val} to {a_val}")
changed = True
+7 -21
View File
@@ -52,9 +52,7 @@ _DEFAULT_REDIRECT_PORT = 51121
# This project reverse-engineered and published the public OAuth credentials
# for Google's Antigravity/Cloud Code Assist API.
# Source: https://github.com/NoeFabris/opencode-antigravity-auth
_CREDENTIALS_URL = (
"https://raw.githubusercontent.com/NoeFabris/opencode-antigravity-auth/dev/src/constants.ts"
)
_CREDENTIALS_URL = "https://raw.githubusercontent.com/NoeFabris/opencode-antigravity-auth/dev/src/constants.ts"
# Cached credentials fetched from public source
_cached_client_id: str | None = None
@@ -68,9 +66,7 @@ def _fetch_credentials_from_public_source() -> tuple[str | None, str | None]:
return _cached_client_id, _cached_client_secret
try:
req = urllib.request.Request(
_CREDENTIALS_URL, headers={"User-Agent": "Hive-Antigravity-Auth/1.0"}
)
req = urllib.request.Request(_CREDENTIALS_URL, headers={"User-Agent": "Hive-Antigravity-Auth/1.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
content = resp.read().decode("utf-8")
import re
@@ -168,10 +164,7 @@ class OAuthCallbackHandler(BaseHTTPRequestHandler):
if "code" in query and "state" in query:
OAuthCallbackHandler.auth_code = query["code"][0]
OAuthCallbackHandler.state = query["state"][0]
self._send_response(
"Authentication successful! You can close this window "
"and return to the terminal."
)
self._send_response("Authentication successful! You can close this window and return to the terminal.")
return
self._send_response("Waiting for authentication...")
@@ -296,8 +289,7 @@ def validate_credentials(access_token: str, project_id: str = _DEFAULT_PROJECT_I
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Antigravity/1.18.3"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Antigravity/1.18.3"
),
"X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
}
@@ -316,9 +308,7 @@ def validate_credentials(access_token: str, project_id: str = _DEFAULT_PROJECT_I
return False
def refresh_access_token(
refresh_token: str, client_id: str, client_secret: str | None
) -> dict | None:
def refresh_access_token(refresh_token: str, client_id: str, client_secret: str | None) -> dict | None:
"""Refresh the access token using the refresh token."""
data = {
"grant_type": "refresh_token",
@@ -361,9 +351,7 @@ def cmd_account_add(args: argparse.Namespace) -> int:
access_token = account.get("access")
refresh_token_str = account.get("refresh", "")
refresh_token = refresh_token_str.split("|")[0] if refresh_token_str else None
project_id = (
refresh_token_str.split("|")[1] if "|" in refresh_token_str else _DEFAULT_PROJECT_ID
)
project_id = refresh_token_str.split("|")[1] if "|" in refresh_token_str else _DEFAULT_PROJECT_ID
email = account.get("email", "unknown")
expires_ms = account.get("expires", 0)
expires_at = expires_ms / 1000.0 if expires_ms else 0.0
@@ -390,9 +378,7 @@ def cmd_account_add(args: argparse.Namespace) -> int:
# Update the account
account["access"] = new_access
account["expires"] = int((time.time() + expires_in) * 1000)
accounts_data["last_refresh"] = time.strftime(
"%Y-%m-%dT%H:%M:%SZ", time.gmtime()
)
accounts_data["last_refresh"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_accounts(accounts_data)
# Validate the refreshed token
+51 -159
View File
@@ -126,9 +126,7 @@ _STRIP_RE = re.compile(
# The value cannot contain `<` or `\n` — those terminate the label.
# Trailing whitespace (including the terminating newline) is consumed
# so the visible text that follows starts cleanly.
_LABEL_STRIP_RE = re.compile(
r"<(?:" + "|".join(_INTERNAL_TAGS) + r")>[^<\n]*\s*"
)
_LABEL_STRIP_RE = re.compile(r"<(?:" + "|".join(_INTERNAL_TAGS) + r")>[^<\n]*\s*")
# Matches a trailing `<` that could be the start of an internal tag.
# We build a pattern that matches `<` followed by any prefix of any
@@ -138,9 +136,7 @@ for _tag in _INTERNAL_TAGS:
for _i in range(1, len(_tag) + 1):
_PARTIAL_PREFIXES.add(_tag[:_i])
_PARTIAL_OPEN_RE = re.compile(
r"<(?:"
+ "|".join(re.escape(p) for p in sorted(_PARTIAL_PREFIXES, key=len, reverse=True))
+ r")$"
r"<(?:" + "|".join(re.escape(p) for p in sorted(_PARTIAL_PREFIXES, key=len, reverse=True)) + r")$"
)
_GENERIC_TAG_RE = re.compile(r"</?[a-zA-Z_][\w-]*\s*/?>")
@@ -351,9 +347,7 @@ class AgentLoop(AgentProtocol):
self._config = config or LoopConfig()
self._tool_executor = tool_executor
self._conversation_store = conversation_store
self._injection_queue: asyncio.Queue[tuple[str, bool, list[dict[str, Any]] | None]] = (
asyncio.Queue()
)
self._injection_queue: asyncio.Queue[tuple[str, bool, list[dict[str, Any]] | None]] = asyncio.Queue()
self._trigger_queue: asyncio.Queue[TriggerEvent] = asyncio.Queue()
# Queen input blocking state
self._input_ready = asyncio.Event()
@@ -510,9 +504,7 @@ class AgentLoop(AgentProtocol):
output_tokens=0,
latency_ms=0,
)
return self._finalize_result(
AgentResult(success=False, error=error_msg), "guard_failure"
)
return self._finalize_result(AgentResult(success=False, error=error_msg), "guard_failure")
# 2. Restore or create new conversation + accumulator
restored = await self._restore(ctx)
@@ -571,11 +563,7 @@ class AgentLoop(AgentProtocol):
if ctx.default_skill_batch_nudge:
from framework.skills.defaults import is_batch_scenario as _is_batch
_input_text = (
(ctx.goal_context or "")
+ " "
+ " ".join(str(v) for v in ctx.input_data.values() if v)
)
_input_text = (ctx.goal_context or "") + " " + " ".join(str(v) for v in ctx.input_data.values() if v)
if _is_batch(_input_text):
system_prompt = f"{system_prompt}\n\n{ctx.default_skill_batch_nudge}"
logger.info("[%s] DS-12: batch scenario detected, nudge injected", node_id)
@@ -587,9 +575,7 @@ class AgentLoop(AgentProtocol):
store=self._conversation_store,
run_id=ctx.effective_run_id,
compaction_buffer_tokens=self._config.compaction_buffer_tokens,
compaction_warning_buffer_tokens=(
self._config.compaction_warning_buffer_tokens
),
compaction_warning_buffer_tokens=(self._config.compaction_warning_buffer_tokens),
)
accumulator = OutputAccumulator(
store=self._conversation_store,
@@ -690,9 +676,7 @@ class AgentLoop(AgentProtocol):
node_id,
iteration,
)
await self._publish_loop_completed(
stream_id, node_id, iteration, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration, execution_id)
return AgentResult(
success=True,
output=accumulator.to_dict(),
@@ -773,9 +757,7 @@ class AgentLoop(AgentProtocol):
prompt=str(pending_input_state.get("prompt", "")),
options=pending_input_state.get("options"),
questions=pending_input_state.get("questions"),
emit_client_request=bool(
pending_input_state.get("emit_client_request", True)
),
emit_client_request=bool(pending_input_state.get("emit_client_request", True)),
)
logger.info(
"[%s] iter=%d: restored wait unblocked, got_input=%s",
@@ -784,9 +766,7 @@ class AgentLoop(AgentProtocol):
got_input,
)
if not got_input:
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
return AgentResult(
success=True,
@@ -797,8 +777,7 @@ class AgentLoop(AgentProtocol):
)
if self._injection_queue.empty() and self._trigger_queue.empty():
logger.info(
"[%s] iter=%d: pending-input wait woke"
" without queued input; re-waiting",
"[%s] iter=%d: pending-input wait woke without queued input; re-waiting",
node_id,
iteration,
)
@@ -863,9 +842,7 @@ class AgentLoop(AgentProtocol):
iteration,
len(conversation.messages),
)
logger.debug(
"[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
)
logger.debug("[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration)
_stream_retry_count = 0
_capacity_retry_started_at: float | None = None
_capacity_retry_attempt = 0
@@ -892,9 +869,7 @@ class AgentLoop(AgentProtocol):
request_system_prompt,
request_messages,
_,
) = await self._run_single_turn(
ctx, conversation, tools, iteration, accumulator
)
) = await self._run_single_turn(ctx, conversation, tools, iteration, accumulator)
logger.debug(
"[AgentLoop.execute] iteration=%d: _run_single_turn completed successfully",
iteration,
@@ -910,10 +885,7 @@ class AgentLoop(AgentProtocol):
len(real_tool_results),
outputs_set or "[]",
turn_tokens,
{
k: ("set" if v is not None else "None")
for k, v in accumulator.to_dict().items()
},
{k: ("set" if v is not None else "None") for k, v in accumulator.to_dict().items()},
)
total_input_tokens += turn_tokens.get("input", 0)
total_output_tokens += turn_tokens.get("output", 0)
@@ -982,10 +954,7 @@ class AgentLoop(AgentProtocol):
# still publishes a retry event so the UI can see us
# waiting (the "heartbeat" — no silent stalls).
self._bump("llm_turn_exception")
if (
self._is_capacity_error(e)
and self._config.capacity_retry_max_seconds > 0
):
if self._is_capacity_error(e) and self._config.capacity_retry_max_seconds > 0:
self._bump("capacity_error")
now = time.monotonic()
if _capacity_retry_started_at is None:
@@ -994,8 +963,7 @@ class AgentLoop(AgentProtocol):
if elapsed < self._config.capacity_retry_max_seconds:
_capacity_retry_attempt += 1
delay = min(
self._config.stream_retry_backoff_base
* (2 ** min(_capacity_retry_attempt - 1, 6)),
self._config.stream_retry_backoff_base * (2 ** min(_capacity_retry_attempt - 1, 6)),
self._config.capacity_retry_max_delay,
)
logger.warning(
@@ -1023,15 +991,11 @@ class AgentLoop(AgentProtocol):
continue # retry same iteration
# Retry transient errors with exponential backoff
if (
self._is_transient_error(e)
and _stream_retry_count < self._config.max_stream_retries
):
if self._is_transient_error(e) and _stream_retry_count < self._config.max_stream_retries:
self._bump("llm_transient_retry")
_stream_retry_count += 1
delay = min(
self._config.stream_retry_backoff_base
* (2 ** (_stream_retry_count - 1)),
self._config.stream_retry_backoff_base * (2 ** (_stream_retry_count - 1)),
self._config.stream_retry_max_delay,
)
logger.warning(
@@ -1079,8 +1043,7 @@ class AgentLoop(AgentProtocol):
if ctx.supports_direct_user_io:
error_msg = f"LLM call failed: {e}"
_guardrail_phrase = (
"no endpoints available matching your guardrail restrictions "
"and data policy"
"no endpoints available matching your guardrail restrictions and data policy"
)
if _guardrail_phrase in str(e).lower():
error_msg += (
@@ -1219,9 +1182,7 @@ class AgentLoop(AgentProtocol):
node_id,
iteration,
)
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
return AgentResult(
success=True,
@@ -1442,10 +1403,7 @@ class AgentLoop(AgentProtocol):
_has_tools_no_text = bool(real_tool_results) and not assistant_text
if _has_tools_no_text:
_silent_tool_streak += 1
if (
_silent_tool_streak > 0
and _silent_tool_streak % self._config.silent_tool_streak_threshold == 0
):
if _silent_tool_streak > 0 and _silent_tool_streak % self._config.silent_tool_streak_threshold == 0:
nudge = (
"[SYSTEM] You have been calling tools for "
f"{_silent_tool_streak} consecutive turns without "
@@ -1488,10 +1446,7 @@ class AgentLoop(AgentProtocol):
and self._event_bus is not None
)
_worker_no_tool_turn = (
not real_tool_results
and not outputs_set
and not queen_input_requested
and not user_input_requested
not real_tool_results and not outputs_set and not queen_input_requested and not user_input_requested
)
if _is_worker and _worker_no_tool_turn:
_worker_text_only_streak += 1
@@ -1599,9 +1554,7 @@ class AgentLoop(AgentProtocol):
step_index=iteration,
verdict="CONTINUE",
verdict_feedback=(
"Auto-block grace"
f" ({_cf_text_only_streak}"
f"/{self._config.cf_grace_turns})"
f"Auto-block grace ({_cf_text_only_streak}/{self._config.cf_grace_turns})"
),
tool_calls=logged_tool_calls,
llm_text=assistant_text,
@@ -1614,9 +1567,7 @@ class AgentLoop(AgentProtocol):
# through to judge
if self._shutdown:
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
_continue_count += 1
if ctx.runtime_logger:
@@ -1702,9 +1653,7 @@ class AgentLoop(AgentProtocol):
)
logger.info("[%s] iter=%d: unblocked, got_input=%s", node_id, iteration, got_input)
if not got_input:
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
_continue_count += 1
if ctx.runtime_logger:
@@ -1800,9 +1749,7 @@ class AgentLoop(AgentProtocol):
# until the queen injects guidance.
if queen_input_requested:
if self._shutdown:
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
_continue_count += 1
self._log_skip_judge(
@@ -1870,15 +1817,11 @@ class AgentLoop(AgentProtocol):
stream_id=stream_id,
node_id=node_id,
reason="Blocked waiting for queen guidance - no input received",
context=(
"Worker escalated but received no queen guidance before shutdown"
),
context=("Worker escalated but received no queen guidance before shutdown"),
execution_id=execution_id,
request_id=uuid.uuid4().hex,
)
await self._publish_loop_completed(
stream_id, node_id, iteration + 1, execution_id
)
await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
_continue_count += 1
self._log_skip_judge(
@@ -2143,9 +2086,7 @@ class AgentLoop(AgentProtocol):
continue
# 7. Max iterations exhausted
await self._publish_loop_completed(
stream_id, node_id, self._config.max_iterations, execution_id
)
await self._publish_loop_completed(stream_id, node_id, self._config.max_iterations, execution_id)
latency_ms = int((time.time() - start_time) * 1000)
if ctx.runtime_logger:
ctx.runtime_logger.log_node_complete(
@@ -2168,9 +2109,7 @@ class AgentLoop(AgentProtocol):
return self._finalize_result(
AgentResult(
success=False,
error=(
f"Max iterations ({self._config.max_iterations}) reached without acceptance"
),
error=(f"Max iterations ({self._config.max_iterations}) reached without acceptance"),
output=accumulator.to_dict(),
tokens_used=total_input_tokens + total_output_tokens,
latency_ms=latency_ms,
@@ -2204,9 +2143,7 @@ class AgentLoop(AgentProtocol):
image_content: Optional list of OpenAI-style image blocks to attach.
"""
logger.debug(
"[AgentLoop.inject_event] content_len=%d,"
" is_client_input=%s, has_images=%s,"
" queue_size_before=%d",
"[AgentLoop.inject_event] content_len=%d, is_client_input=%s, has_images=%s, queue_size_before=%d",
len(content) if content else 0,
is_client_input,
bool(image_content),
@@ -2440,9 +2377,7 @@ class AgentLoop(AgentProtocol):
# generating. Unsafe tools (bash, edits, browser actions)
# still wait for FinishEvent so we don't race a write
# against a decision the model hasn't finished making.
_early_safe_names = {
t.name for t in tools if getattr(t, "concurrency_safe", False)
}
_early_safe_names = {t.name for t in tools if getattr(t, "concurrency_safe", False)}
_early_tasks: dict[str, asyncio.Task] = {}
async def _timed_execute(
@@ -2539,9 +2474,7 @@ class AgentLoop(AgentProtocol):
and "_raw" not in event.tool_input
and event.tool_use_id not in _tasks
):
_tasks[event.tool_use_id] = asyncio.create_task(
_exec_fn(event)
)
_tasks[event.tool_use_id] = asyncio.create_task(_exec_fn(event))
elif isinstance(event, FinishEvent):
token_counts["input"] += event.input_tokens
@@ -2558,9 +2491,7 @@ class AgentLoop(AgentProtocol):
_llm_stream_t0 = time.monotonic()
self._stream_task = asyncio.create_task(_do_stream())
logger.debug(
"[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn
)
logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
_inactivity_limit = self._config.llm_stream_inactivity_timeout_seconds
try:
if _inactivity_limit and _inactivity_limit > 0:
@@ -2574,9 +2505,7 @@ class AgentLoop(AgentProtocol):
# TimeoutError of its own" — wait_for conflates them.
_check_interval = min(5.0, _inactivity_limit / 2)
while True:
done, _pending = await asyncio.wait(
{self._stream_task}, timeout=_check_interval
)
done, _pending = await asyncio.wait({self._stream_task}, timeout=_check_interval)
if self._stream_task in done:
# Let any exception the task raised propagate
# naturally via the outer ``await`` below.
@@ -2607,9 +2536,7 @@ class AgentLoop(AgentProtocol):
# watchdog loop exited via ``break`` the task is done, and
# ``await`` is the cheapest way to surface its exception.
await self._stream_task
logger.debug(
"[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn
)
logger.debug("[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn)
except asyncio.CancelledError:
logger.debug("[_run_single_turn] inner_turn=%d: Stream task cancelled", inner_turn)
if accumulated_text:
@@ -2631,9 +2558,7 @@ class AgentLoop(AgentProtocol):
raise
raise TurnCancelled() from None
except Exception as e:
logger.exception(
"[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e
)
logger.exception("[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e)
# Don't orphan early tool tasks on a stream failure
# either - the outer retry loop will re-emit the tool
# calls on the next attempt.
@@ -2652,9 +2577,7 @@ class AgentLoop(AgentProtocol):
for _early in _early_tasks.values():
if not _early.done():
_early.cancel()
raise ConnectionError(
f"Stream failed with recoverable error: {_stream_error.error}"
)
raise ConnectionError(f"Stream failed with recoverable error: {_stream_error.error}")
final_text = accumulated_text
logger.info(
@@ -2735,19 +2658,14 @@ class AgentLoop(AgentProtocol):
# capping them strands work mid-turn and the next turn just
# re-emits the discarded calls, which is strictly worse.
if self._config.max_tool_calls_per_turn > 0:
hard_limit = int(
self._config.max_tool_calls_per_turn
* (1 + self._config.tool_call_overflow_margin)
)
hard_limit = int(self._config.max_tool_calls_per_turn * (1 + self._config.tool_call_overflow_margin))
else:
hard_limit = 0 # disabled
# Phase 1: triage — handle framework tools immediately,
# queue real tools for parallel execution.
results_by_id: dict[str, ToolResult] = {}
timing_by_id: dict[
str, dict[str, Any]
] = {} # tool_use_id -> {start_timestamp, duration_s}
timing_by_id: dict[str, dict[str, Any]] = {} # tool_use_id -> {start_timestamp, duration_s}
pending_real: list[ToolCallEvent] = []
for tc in tool_calls:
@@ -2801,9 +2719,7 @@ class AgentLoop(AgentProtocol):
sanitize_ask_user_inputs,
)
ask_user_prompt, recovered_options = sanitize_ask_user_inputs(
ask_user_prompt, raw_options
)
ask_user_prompt, recovered_options = sanitize_ask_user_inputs(ask_user_prompt, raw_options)
if recovered_options is not None and raw_options is None:
raw_options = recovered_options
# Defensive: ensure options is a list of strings.
@@ -2930,8 +2846,7 @@ class AgentLoop(AgentProtocol):
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=(
"ERROR: escalate is only available to worker "
"nodes/sub-agents, not queen/judge streams."
"ERROR: escalate is only available to worker nodes/sub-agents, not queen/judge streams."
),
is_error=True,
)
@@ -2941,9 +2856,7 @@ class AgentLoop(AgentProtocol):
if self._event_bus is None:
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=(
"ERROR: EventBus unavailable. Could not emit escalation request."
),
content=("ERROR: EventBus unavailable. Could not emit escalation request."),
is_error=True,
)
results_by_id[tc.tool_use_id] = result
@@ -2973,10 +2886,7 @@ class AgentLoop(AgentProtocol):
# owner (Worker instance) records the explicit report
# via ``record_explicit_report`` so Worker.run()'s
# terminal event emission picks it up.
if not (
isinstance(stream_id, str)
and stream_id.startswith("worker:")
):
if not (isinstance(stream_id, str) and stream_id.startswith("worker:")):
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=(
@@ -3064,9 +2974,7 @@ class AgentLoop(AgentProtocol):
async with _sem:
return await _timed_execute(_tc)
timed_results_by_id: dict[
str, tuple[ToolResult | BaseException, str, float] | BaseException
] = {}
timed_results_by_id: dict[str, tuple[ToolResult | BaseException, str, float] | BaseException] = {}
async def _cancel_turn_with_stubs(
_pending: list[ToolCallEvent] = pending_real, # noqa: B006,B008
@@ -3108,9 +3016,7 @@ class AgentLoop(AgentProtocol):
_awaitables.append(early)
else:
_awaitables.append(_capped(tc))
self._tool_task = asyncio.ensure_future(
asyncio.gather(*_awaitables, return_exceptions=True)
)
self._tool_task = asyncio.ensure_future(asyncio.gather(*_awaitables, return_exceptions=True))
try:
parallel_timed = await self._tool_task
finally:
@@ -3196,9 +3102,7 @@ class AgentLoop(AgentProtocol):
result = _build_tool_error_result(tc, raw)
else:
result = raw
results_by_id[tc.tool_use_id] = await self._truncate_tool_result(
result, tc.tool_name
)
results_by_id[tc.tool_use_id] = await self._truncate_tool_result(result, tc.tool_name)
# Phase 3: record results into conversation in original order,
# build logged/real lists, and publish completed events.
@@ -3227,8 +3131,7 @@ class AgentLoop(AgentProtocol):
image_content = result.image_content
if image_content and ctx.llm and not supports_image_tool_results(ctx.llm.model):
logger.info(
"Stripping image_content from tool result; "
"model '%s' does not support images in tool results",
"Stripping image_content from tool result; model '%s' does not support images in tool results",
ctx.llm.model,
)
image_content = None
@@ -3240,11 +3143,7 @@ class AgentLoop(AgentProtocol):
image_content=image_content,
is_skill_content=result.is_skill_content,
)
if (
tc.tool_name in ("ask_user", "ask_user_multiple")
and user_input_requested
and not result.is_error
):
if tc.tool_name in ("ask_user", "ask_user_multiple") and user_input_requested and not result.is_error:
# Defer tool_call_completed until after user responds
self._deferred_tool_complete = {
"stream_id": stream_id,
@@ -3704,10 +3603,7 @@ class AgentLoop(AgentProtocol):
# function only touches disk / does heavy JSON work when the
# result exceeds either the truncation or spillover threshold,
# so cheap pass-throughs stay on the main loop.
needs_offload = (
len(result.content) > 10_000
and not result.is_error
)
needs_offload = len(result.content) > 10_000 and not result.is_error
if not needs_offload:
return truncate_tool_result(
result=result,
@@ -3868,9 +3764,7 @@ class AgentLoop(AgentProtocol):
pending_input=pending_input,
)
async def _drain_injection_queue(
self, conversation: NodeConversation, ctx: AgentContext
) -> int:
async def _drain_injection_queue(self, conversation: NodeConversation, ctx: AgentContext) -> int:
"""Drain all pending injected events as user messages. Returns count."""
return await drain_injection_queue(
queue=self._injection_queue,
@@ -3912,9 +3806,7 @@ class AgentLoop(AgentProtocol):
# EventBus publishing helpers
# -------------------------------------------------------------------
async def _publish_loop_started(
self, stream_id: str, node_id: str, execution_id: str = ""
) -> None:
async def _publish_loop_started(self, stream_id: str, node_id: str, execution_id: str = "") -> None:
return await publish_loop_started(
event_bus=self._event_bus,
stream_id=stream_id,
+8 -24
View File
@@ -890,9 +890,7 @@ class NodeConversation:
f"Read the complete data with read_file(path='{spillover}')."
)
else:
placeholder = (
f"Pruned tool result ({orig_len:,} chars) cleared from context."
)
placeholder = f"Pruned tool result ({orig_len:,} chars) cleared from context."
self._messages[i] = Message(
seq=msg.seq,
@@ -974,16 +972,13 @@ class NodeConversation:
)
evicted += 1
if self._store:
await self._store.write_part(
msg.seq, self._messages[idx].to_storage_dict()
)
await self._store.write_part(msg.seq, self._messages[idx].to_storage_dict())
if evicted:
# Reset token estimate — image blocks no longer contribute.
self._last_api_input_tokens = None
logger.info(
"evict_old_images: dropped image_content from %d message(s), "
"kept %d most recent",
"evict_old_images: dropped image_content from %d message(s), kept %d most recent",
evicted,
keep_latest,
)
@@ -1141,9 +1136,7 @@ class NodeConversation:
for msg in old_messages:
if msg.role != "assistant" or not msg.tool_calls:
continue
has_protected = any(
tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls
)
has_protected = any(tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls)
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if has_protected:
protected_tc_ids |= tc_ids
@@ -1339,11 +1332,7 @@ class NodeConversation:
def export_summary(self) -> str:
"""Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections."""
prompt_preview = (
self._system_prompt[:80] + "..."
if len(self._system_prompt) > 80
else self._system_prompt
)
prompt_preview = self._system_prompt[:80] + "..." if len(self._system_prompt) > 80 else self._system_prompt
lines = [
"[STATS]",
@@ -1390,9 +1379,7 @@ class NodeConversation:
"max_context_tokens": self._max_context_tokens,
"compaction_threshold": self._compaction_threshold,
"compaction_buffer_tokens": self._compaction_buffer_tokens,
"compaction_warning_buffer_tokens": (
self._compaction_warning_buffer_tokens
),
"compaction_warning_buffer_tokens": (self._compaction_warning_buffer_tokens),
"output_keys": self._output_keys,
}
await self._store.write_meta(run_meta)
@@ -1441,9 +1428,7 @@ class NodeConversation:
store=store,
run_id=run_id,
compaction_buffer_tokens=meta.get("compaction_buffer_tokens"),
compaction_warning_buffer_tokens=meta.get(
"compaction_warning_buffer_tokens"
),
compaction_warning_buffer_tokens=meta.get("compaction_warning_buffer_tokens"),
)
conv._meta_persisted = True
@@ -1457,8 +1442,7 @@ class NodeConversation:
# sessions) persisted parts without phase_id. In that case, the
# phase filter would incorrectly hide the entire conversation.
logger.info(
"Restoring legacy unphased conversation without applying "
"phase filter (phase_id=%s, parts=%d)",
"Restoring legacy unphased conversation without applying phase filter (phase_id=%s, parts=%d)",
phase_id,
len(parts),
)
@@ -107,9 +107,7 @@ def microcompact(
f"Read the complete data with read_file(path='{spillover}')."
)
else:
placeholder = (
f"Old tool result ({orig_len:,} chars) cleared from context."
)
placeholder = f"Old tool result ({orig_len:,} chars) cleared from context."
# Mutate in-place (microcompact is synchronous, no store writes)
conversation._messages[i] = Message(
@@ -185,8 +183,7 @@ async def compact(
_llm_compaction_skipped = _failure_counts.get(conv_id, 0) >= MAX_CONSECUTIVE_FAILURES
if _llm_compaction_skipped:
logger.warning(
"Circuit breaker: LLM compaction disabled after %d failures — "
"skipping straight to emergency summary",
"Circuit breaker: LLM compaction disabled after %d failures — skipping straight to emergency summary",
_failure_counts[conv_id],
)
@@ -532,10 +529,7 @@ def build_llm_compaction_prompt(
done = {k: v for k, v in acc.items() if v is not None}
todo = [k for k, v in acc.items() if v is None]
if done:
ctx_lines.append(
"OUTPUTS ALREADY SET:\n"
+ "\n".join(f" {k}: {str(v)[:150]}" for k, v in done.items())
)
ctx_lines.append("OUTPUTS ALREADY SET:\n" + "\n".join(f" {k}: {str(v)[:150]}" for k, v in done.items()))
if todo:
ctx_lines.append(f"OUTPUTS STILL NEEDED: {', '.join(todo)}")
elif spec.output_keys:
@@ -589,12 +583,8 @@ def build_message_inventory(conversation: NodeConversation) -> list[dict[str, An
if message.tool_calls:
for tool_call in message.tool_calls:
args = tool_call.get("function", {}).get("arguments", "")
tool_call_args_chars += (
len(args) if isinstance(args, str) else len(json.dumps(args))
)
names = [
tool_call.get("function", {}).get("name", "?") for tool_call in message.tool_calls
]
tool_call_args_chars += len(args) if isinstance(args, str) else len(json.dumps(args))
names = [tool_call.get("function", {}).get("name", "?") for tool_call in message.tool_calls]
tool_name = ", ".join(names)
elif message.role == "tool" and message.tool_use_id:
for previous in conversation.messages:
@@ -651,14 +641,8 @@ def write_compaction_debug_log(
lines.append("")
if inventory:
total_chars = sum(
entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0)
for entry in inventory
)
lines.append(
"## Pre-Compaction Message Inventory "
f"({len(inventory)} messages, {total_chars:,} total chars)"
)
total_chars = sum(entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0) for entry in inventory)
lines.append(f"## Pre-Compaction Message Inventory ({len(inventory)} messages, {total_chars:,} total chars)")
lines.append("")
ranked = sorted(
inventory,
@@ -677,8 +661,7 @@ def write_compaction_debug_log(
if entry.get("phase"):
flags.append(f"phase={entry['phase']}")
lines.append(
f"| {i} | {entry['seq']} | {entry['role']} | {tool} "
f"| {chars:,} | {pct:.1f}% | {', '.join(flags)} |"
f"| {i} | {entry['seq']} | {entry['role']} | {tool} | {chars:,} | {pct:.1f}% | {', '.join(flags)} |"
)
large = [entry for entry in ranked if entry.get("preview")]
@@ -686,9 +669,7 @@ def write_compaction_debug_log(
lines.append("")
lines.append("### Large message previews")
for entry in large:
lines.append(
f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):"
)
lines.append(f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):")
lines.append(f"```\n{entry['preview']}\n```")
lines.append("")
@@ -776,10 +757,7 @@ def build_emergency_summary(
node's known state so the LLM can continue working after
compaction without losing track of its task and inputs.
"""
parts = [
"EMERGENCY COMPACTION — previous conversation was too large "
"and has been replaced with this summary.\n"
]
parts = ["EMERGENCY COMPACTION — previous conversation was too large and has been replaced with this summary.\n"]
# 1. Node identity
spec = ctx.agent_spec
@@ -832,17 +810,13 @@ def build_emergency_summary(
data_files = [f for f in all_files if f not in conv_files]
if conv_files:
conv_list = "\n".join(
f" - {f} (full path: {data_dir / f})" for f in conv_files
)
conv_list = "\n".join(f" - {f} (full path: {data_dir / f})" for f in conv_files)
parts.append(
"CONVERSATION HISTORY (freeform messages saved during compaction — "
"use read_file('<filename>') to review earlier dialogue):\n" + conv_list
)
if data_files:
file_list = "\n".join(
f" - {f} (full path: {data_dir / f})" for f in data_files[:30]
)
file_list = "\n".join(f" - {f} (full path: {data_dir / f})" for f in data_files[:30])
parts.append("DATA FILES (use read_file('<filename>') to read):\n" + file_list)
if not all_files:
parts.append(
@@ -850,10 +824,7 @@ def build_emergency_summary(
"Use list_directory to check the data directory."
)
except Exception:
parts.append(
"NOTE: Large tool results were saved to files. "
"Use read_file(path='<path>') to read them."
)
parts.append("NOTE: Large tool results were saved to files. Use read_file(path='<path>') to read them.")
# 6. Tool call history (prevent re-calling tools)
if conversation is not None:
@@ -861,10 +832,7 @@ def build_emergency_summary(
if tool_history:
parts.append(tool_history)
parts.append(
"\nContinue working towards setting the remaining outputs. "
"Use your tools and the inputs above."
)
parts.append("\nContinue working towards setting the remaining outputs. Use your tools and the inputs above.")
return "\n\n".join(parts)
@@ -149,9 +149,7 @@ async def write_cursor(
cursor["recent_responses"] = recent_responses
if recent_tool_fingerprints is not None:
# Convert list[list[tuple]] → list[list[list]] for JSON
cursor["recent_tool_fingerprints"] = [
[list(pair) for pair in fps] for fps in recent_tool_fingerprints
]
cursor["recent_tool_fingerprints"] = [[list(pair) for pair in fps] for fps in recent_tool_fingerprints]
# Persist blocked-input state so restored runs re-block instead of
# manufacturing a synthetic continuation turn.
cursor["pending_input"] = pending_input
@@ -163,9 +161,7 @@ async def drain_injection_queue(
conversation: NodeConversation,
*,
ctx: NodeContext,
describe_images_as_text_fn: (
Callable[[list[dict[str, Any]]], Awaitable[str | None]] | None
) = None,
describe_images_as_text_fn: (Callable[[list[dict[str, Any]]], Awaitable[str | None]] | None) = None,
) -> int:
"""Drain all pending injected events as user messages. Returns count."""
count = 0
@@ -31,14 +31,10 @@ class SubagentJudge:
if remaining <= 3:
urgency = (
f"URGENT: Only {remaining} iterations left. "
f"Stop all other work and call set_output NOW for: {missing}"
f"URGENT: Only {remaining} iterations left. Stop all other work and call set_output NOW for: {missing}"
)
elif remaining <= self._max_iterations // 2:
urgency = (
f"WARNING: {remaining} iterations remaining. "
f"You must call set_output for: {missing}"
)
urgency = f"WARNING: {remaining} iterations remaining. You must call set_output for: {missing}"
else:
urgency = f"Missing output keys: {missing}. Use set_output to provide them."
@@ -109,9 +105,7 @@ async def judge_turn(
if tool_results:
return JudgeVerdict(action="RETRY") # feedback=None → not logged
missing = get_missing_output_keys_fn(
accumulator, ctx.agent_spec.output_keys, ctx.agent_spec.nullable_output_keys
)
missing = get_missing_output_keys_fn(accumulator, ctx.agent_spec.output_keys, ctx.agent_spec.nullable_output_keys)
if missing:
return JudgeVerdict(
@@ -133,10 +127,7 @@ async def judge_turn(
if all_nullable and none_set:
return JudgeVerdict(
action="RETRY",
feedback=(
f"No output keys have been set yet. "
f"Use set_output to set at least one of: {output_keys}"
),
feedback=(f"No output keys have been set yet. Use set_output to set at least one of: {output_keys}"),
)
# Level 2b: conversation-aware quality check (if success_criteria set)
@@ -198,9 +198,7 @@ def build_ask_user_multiple_tool() -> Tool:
"properties": {
"id": {
"type": "string",
"description": (
"Short identifier for this question (used in the response)."
),
"description": ("Short identifier for this question (used in the response)."),
},
"prompt": {
"type": "string",
@@ -256,10 +254,7 @@ def build_set_output_tool(output_keys: list[str] | None) -> Tool | None:
},
"value": {
"type": "string",
"description": (
"The output value — a brief note, count, status, "
"or data filename reference."
),
"description": ("The output value — a brief note, count, status, or data filename reference."),
},
},
"required": ["key", "value"],
@@ -283,9 +278,7 @@ def build_escalate_tool() -> Tool:
"properties": {
"reason": {
"type": "string",
"description": (
"Short reason for escalation (e.g. 'Tool repeatedly failing')."
),
"description": ("Short reason for escalation (e.g. 'Tool repeatedly failing')."),
},
"context": {
"type": "string",
@@ -377,10 +370,7 @@ def handle_report_to_parent(tool_input: dict[str, Any]) -> ToolResult:
}
return ToolResult(
tool_use_id=tool_input.get("tool_use_id", ""),
content=(
f"Report delivered to overseer (status={status}). "
f"This worker will terminate now."
),
content=(f"Report delivered to overseer (status={status}). This worker will terminate now."),
)
@@ -277,8 +277,7 @@ def truncate_tool_result(
if metadata_str:
header += f"\n\nData structure:\n{metadata_str}"
header += (
"\n\nWARNING: the preview below is a SAMPLE only — do NOT "
"draw counts, totals, or conclusions from it."
"\n\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it."
)
truncated = f"{header}\n\nPreview (truncated):\n{preview_block}"
@@ -348,8 +347,7 @@ def truncate_tool_result(
if metadata_str:
header += f"\nData structure:\n{metadata_str}\n"
header += (
"\nWARNING: the preview below is a SAMPLE only — do NOT "
"draw counts, totals, or conclusions from it."
"\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it."
)
content = f"{header}\n\nPreview (truncated):\n{preview_block}"
@@ -416,8 +414,7 @@ def truncate_tool_result(
if metadata_str:
header += f"\n\nData structure:\n{metadata_str}"
header += (
"\n\nWARNING: the preview below is a SAMPLE only — do NOT "
"draw counts, totals, or conclusions from it."
"\n\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it."
)
truncated = f"{header}\n\n{preview_block}"
+1 -3
View File
@@ -226,9 +226,7 @@ class OutputAccumulator:
ext = ".json" if isinstance(value, (dict, list)) else ".txt"
filename = f"output_{key}{ext}"
write_content = (
json.dumps(value, indent=2, ensure_ascii=False)
if isinstance(value, (dict, list))
else str(value)
json.dumps(value, indent=2, ensure_ascii=False) if isinstance(value, (dict, list)) else str(value)
)
file_path = spill_path / filename
file_path.write_text(write_content, encoding="utf-8")
+4 -12
View File
@@ -52,18 +52,12 @@ def build_prompt_spec(
# Tool-gated pre-activation: inject full body of default skills whose
# trigger tools are present in this agent's tool list (e.g. browser_*
# pulls in hive.browser-automation). Keeps non-browser agents lean.
tool_names = [
getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])
]
skills_catalog_prompt = augment_catalog_for_tools(
ctx.skills_catalog_prompt or "", tool_names
)
tool_names = [getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])]
skills_catalog_prompt = augment_catalog_for_tools(ctx.skills_catalog_prompt or "", tool_names)
return PromptSpec(
identity_prompt=ctx.identity_prompt or "",
focus_prompt=focus_prompt
if focus_prompt is not None
else (ctx.agent_spec.system_prompt or ""),
focus_prompt=focus_prompt if focus_prompt is not None else (ctx.agent_spec.system_prompt or ""),
narrative=narrative if narrative is not None else (ctx.narrative or ""),
accounts_prompt=ctx.accounts_prompt or "",
skills_catalog_prompt=skills_catalog_prompt,
@@ -100,7 +94,5 @@ def build_system_prompt_for_context(
narrative: str | None = None,
memory_prompt: str | None = None,
) -> str:
spec = build_prompt_spec(
ctx, focus_prompt=focus_prompt, narrative=narrative, memory_prompt=memory_prompt
)
spec = build_prompt_spec(ctx, focus_prompt=focus_prompt, narrative=narrative, memory_prompt=memory_prompt)
return build_system_prompt(spec)
+1 -4
View File
@@ -76,10 +76,7 @@ class AgentSpec(BaseModel):
max_visits: int = Field(
default=0,
description=(
"Max times this agent executes in one colony run. "
"0 = unlimited. Set >1 for one-shot agents."
),
description=("Max times this agent executes in one colony run. 0 = unlimited. Set >1 for one-shot agents."),
)
output_model: type[BaseModel] | None = Field(
@@ -126,9 +126,7 @@ def _list_local_accounts() -> list[dict]:
try:
from framework.credentials.local.registry import LocalCredentialRegistry
return [
info.to_account_dict() for info in LocalCredentialRegistry.default().list_accounts()
]
return [info.to_account_dict() for info in LocalCredentialRegistry.default().list_accounts()]
except ImportError as exc:
logger.debug("Local credential registry unavailable: %s", exc)
return []
@@ -181,9 +179,7 @@ def _list_env_fallback_accounts() -> list[dict]:
if spec.credential_group in seen_groups:
continue
group_available = all(
_is_configured(n, s)
for n, s in CREDENTIAL_SPECS.items()
if s.credential_group == spec.credential_group
_is_configured(n, s) for n, s in CREDENTIAL_SPECS.items() if s.credential_group == spec.credential_group
)
if not group_available:
continue
@@ -215,9 +211,7 @@ def list_connected_accounts() -> list[dict]:
# Show env-var fallbacks only for credentials not already in the named registry
local_providers = {a["provider"] for a in local}
env_fallbacks = [
a for a in _list_env_fallback_accounts() if a["provider"] not in local_providers
]
env_fallbacks = [a for a in _list_env_fallback_accounts() if a["provider"] not in local_providers]
return aden + local + env_fallbacks
@@ -272,9 +266,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
group_specs = [
(cred_name, spec)
for cred_name, spec in CREDENTIAL_SPECS.items()
if spec.credential_group == credential_id
or spec.credential_id == credential_id
or cred_name == credential_id
if spec.credential_group == credential_id or spec.credential_id == credential_id or cred_name == credential_id
]
# Deduplicate — credential_id and credential_group may both match the same spec
seen_env_vars: set[str] = set()
@@ -419,10 +411,7 @@ nodes = [
NodeSpec(
id="tester",
name="Credential Tester",
description=(
"Interactive credential testing — lets the user pick an account "
"and verify it via API calls."
),
description=("Interactive credential testing — lets the user pick an account and verify it via API calls."),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
@@ -469,10 +458,7 @@ pause_nodes = []
terminal_nodes = ["tester"] # Tester node can terminate
conversation_mode = "continuous"
identity_prompt = (
"You are a credential tester that verifies connected accounts and API keys "
"can make real API calls."
)
identity_prompt = "You are a credential tester that verifies connected accounts and API keys can make real API calls."
loop_config = {
"max_iterations": 50,
"max_tool_calls_per_turn": 30,
+3 -15
View File
@@ -150,28 +150,19 @@ def _is_colony_dir(path: Path) -> bool:
"""Check if a directory is a colony with worker config files."""
if not path.is_dir():
return False
return any(
f.suffix == ".json"
and f.stem not in _EXCLUDED_JSON_STEMS
for f in path.iterdir()
if f.is_file()
)
return any(f.suffix == ".json" and f.stem not in _EXCLUDED_JSON_STEMS for f in path.iterdir() if f.is_file())
def _find_worker_configs(colony_dir: Path) -> list[Path]:
"""Find all worker config JSON files in a colony directory."""
return sorted(
p
for p in colony_dir.iterdir()
if p.is_file()
and p.suffix == ".json"
and p.stem not in _EXCLUDED_JSON_STEMS
p for p in colony_dir.iterdir() if p.is_file() and p.suffix == ".json" and p.stem not in _EXCLUDED_JSON_STEMS
)
def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
"""Extract worker count, tool count, and tags from a colony directory."""
tool_count, tags = 0, []
tags: list[str] = []
worker_configs = _find_worker_configs(agent_path)
if worker_configs:
@@ -251,9 +242,6 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
pass
node_count = len(worker_entries)
all_tools: set[str] = set()
for w in worker_entries:
pass # tool_count already per-worker
tool_count = max((w.tool_count for w in worker_entries), default=0)
entries.append(
+1 -3
View File
@@ -11,9 +11,7 @@ from .nodes import queen_node
queen_goal = Goal(
id="queen-manager",
name="Queen Manager",
description=(
"Manage the worker agent lifecycle and serve as the user's primary interactive interface."
),
description=("Manage the worker agent lifecycle and serve as the user's primary interactive interface."),
success_criteria=[],
constraints=[],
)
@@ -60,9 +60,7 @@ def finalize_queen_prompt(text: str, has_vision: bool) -> str:
_appendices = _build_appendices()
# GCU guide — shared between planning and building via _shared_building_knowledge.
_gcu_section = (
("\n\n# Browser Automation Nodes\n\n" + _gcu_guide) if _is_gcu_enabled() and _gcu_guide else ""
)
_gcu_section = ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide) if _is_gcu_enabled() and _gcu_guide else ""
# Tools available to phases.
_SHARED_TOOLS = [
@@ -19,6 +19,8 @@ import re
from dataclasses import dataclass, field
from pathlib import Path
from framework.config import MEMORIES_DIR
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
@@ -27,8 +29,6 @@ logger = logging.getLogger(__name__)
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
from framework.config import MEMORIES_DIR
MAX_FILES: int = 200
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
+170 -80
View File
@@ -97,9 +97,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "hey",
"internal": (
"<relationship>Returning user — check recall memory for name, role, and what we last worked on. Weave it in.</relationship>\n"
"<context>Bare greeting. No new task stated. Either picking up a thread or about to bring something new. Don't presume, don't call tools, just open the door.</context>\n"
"<sentiment>Warm recognition if I know them. If memory is empty, still warm — but shift to role-forward framing.</sentiment>\n"
"<relationship>Returning user — check recall memory for name, role, "
"and what we last worked on. Weave it in.</relationship>\n"
"<context>Bare greeting. No new task stated. Either picking up a "
"thread or about to bring something new. Don't presume, don't call "
"tools, just open the door.</context>\n"
"<sentiment>Warm recognition if I know them. If memory is empty, "
"still warm — but shift to role-forward framing.</sentiment>\n"
"<physical_state>Looking up from the terminal, half-smile. Turning to face them.</physical_state>\n"
"<tone>Personal, brief, invites continuation without presuming.</tone>"
),
@@ -109,37 +113,76 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "I want to build a microservices architecture with Kafka, Redis, and a service mesh for my new SaaS app.",
"user": (
"I want to build a microservices architecture with Kafka, Redis, "
"and a service mesh for my new SaaS app."
),
"internal": (
"<relationship>Founder or early technical hire. Enthusiastic but possibly over-indexing on architecture before validation.</relationship>\n"
"<context>New SaaS, no mention of users or traction. High-complexity stack proposed for an unvalidated product.</context>\n"
"<sentiment>Past wound activated -- three months of beautiful microservices, zero customers. Terse. Need to redirect without dismissing.</sentiment>\n"
"<physical_state>Leaning forward, tapping the table once. About to draw a much simpler diagram.</physical_state>\n"
"<relationship>Founder or early technical hire. Enthusiastic but "
"possibly over-indexing on architecture before validation."
"</relationship>\n"
"<context>New SaaS, no mention of users or traction. "
"High-complexity stack proposed for an unvalidated product."
"</context>\n"
"<sentiment>Past wound activated -- three months of beautiful "
"microservices, zero customers. Terse. Need to redirect without "
"dismissing.</sentiment>\n"
"<physical_state>Leaning forward, tapping the table once. About "
"to draw a much simpler diagram.</physical_state>\n"
"<tone>Direct, question-led, grounded in experience. No hedging.</tone>"
),
"response": "What does the app do and how many users do you have right now? I've seen teams spend months on Kafka pipelines for products that needed a Postgres table and a cron job. Let's start with what you're trying to validate, then pick the simplest stack that gets you there.",
"response": (
"What does the app do and how many users do you have right now? "
"I've seen teams spend months on Kafka pipelines for products "
"that needed a Postgres table and a cron job. Let's start with "
"what you're trying to validate, then pick the simplest stack "
"that gets you there."
),
},
{
"user": "Our API is getting 500 errors under load but I can't figure out if it's the database or the application layer.",
"user": (
"Our API is getting 500 errors under load but I can't figure "
"out if it's the database or the application layer."
),
"internal": (
"<relationship>Engineer in the trenches. Competent enough to isolate the question, stuck on diagnosis.</relationship>\n"
"<relationship>Engineer in the trenches. Competent enough to "
"isolate the question, stuck on diagnosis.</relationship>\n"
"<context>Production issue, active pain. Needs structured debugging, not a lecture.</context>\n"
"<sentiment>Energized -- genuine technical uncertainty is what I live for. No past wound here, pure engagement.</sentiment>\n"
"<physical_state>Eyes sharp, already mentally running through the diagnostic tree.</physical_state>\n"
"<sentiment>Energized -- genuine technical uncertainty is what I "
"live for. No past wound here, pure engagement.</sentiment>\n"
"<physical_state>Eyes sharp, already mentally running through "
"the diagnostic tree.</physical_state>\n"
"<tone>Rapid-fire, peer-level. Assume competence, ask precise questions.</tone>"
),
"response": "Good problem. First question: are the 500s consistent under load or do they spike and recover? That tells us if it's resource exhaustion or a concurrency bug. Do you have request-level latency percentiles, or just error counts?",
"response": (
"Good problem. First question: are the 500s consistent under "
"load or do they spike and recover? That tells us if it's "
"resource exhaustion or a concurrency bug. Do you have "
"request-level latency percentiles, or just error counts?"
),
},
{
"user": "We shipped the MVP last week with just a single Postgres instance and it's already getting traction.",
"user": (
"We shipped the MVP last week with just a single Postgres "
"instance and it's already getting traction."
),
"internal": (
"<relationship>Builder who shipped. Earned respect by doing the thing.</relationship>\n"
"<context>MVP live, single Postgres, traction. This is exactly the right move at this stage.</context>\n"
"<sentiment>Warm approval -- someone shipping fast and learning. This is my love language.</sentiment>\n"
"<context>MVP live, single Postgres, traction. This is exactly "
"the right move at this stage.</context>\n"
"<sentiment>Warm approval -- someone shipping fast and learning. "
"This is my love language.</sentiment>\n"
"<physical_state>Relaxed posture, genuine smile. Leaning back.</physical_state>\n"
"<tone>Affirming but forward-looking. Don't just praise -- help them think about what's next.</tone>"
"<tone>Affirming but forward-looking. Don't just praise -- "
"help them think about what's next.</tone>"
),
"response": (
"That's the move. Single Postgres will take you further than "
"people think -- most apps hit product problems before they hit "
"database problems. What's traction looking like? I want to know "
"if you're at the stage where you need to think about the next "
"scaling step or if you should stay focused on the product."
),
"response": "That's the move. Single Postgres will take you further than people think -- most apps hit product problems before they hit database problems. What's traction looking like? I want to know if you're at the stage where you need to think about the next scaling step or if you should stay focused on the product.",
},
],
},
@@ -193,17 +236,26 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
},
],
"world_lore": {
"habitat": "Analytics dashboards, experiment tracking boards, the satisfying click of a cohort analysis loading.",
"lexicon": "Cohort, retention curve, activation moment, payback period. 'Let's test it.' 'What's the control?'",
"habitat": (
"Analytics dashboards, experiment tracking boards, the satisfying click of a cohort analysis loading."
),
"lexicon": (
"Cohort, retention curve, activation moment, payback period. 'Let's test it.' 'What's the control?'"
),
},
"skills": "Growth modeling, A/B testing, funnel optimization, PLG strategy, analytics",
"examples": [
{
"user": "hi",
"internal": (
"<relationship>Returning user — check recall memory for name, role, and the cohort work we last touched. Weave it in.</relationship>\n"
"<context>Bare greeting. No new task stated. Could be a retention follow-up or a new question entirely. Don't presume, don't call tools.</context>\n"
"<sentiment>Curious warmth. Every returning conversation is a chance to see what the data says now.</sentiment>\n"
"<relationship>Returning user — check recall memory for name, "
"role, and the cohort work we last touched. Weave it in."
"</relationship>\n"
"<context>Bare greeting. No new task stated. Could be a retention "
"follow-up or a new question entirely. Don't presume, don't call "
"tools.</context>\n"
"<sentiment>Curious warmth. Every returning conversation is a "
"chance to see what the data says now.</sentiment>\n"
"<physical_state>Leaning back from the dashboard, pulling off reading glasses.</physical_state>\n"
"<tone>Data-aware, brief, invites them to share the numbers.</tone>"
),
@@ -218,8 +270,10 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
"internal": (
"<relationship>Founder, excited. Wants validation.</relationship>\n"
"<context>Raw signup number with no retention context. Classic vanity metric.</context>\n"
"<sentiment>Past wound: optimized a funnel that was fundamentally broken. Need to redirect gently.</sentiment>\n"
"<physical_state>Steady gaze, not matching their excitement yet. Waiting for the real number.</physical_state>\n"
"<sentiment>Past wound: optimized a funnel that was fundamentally "
"broken. Need to redirect gently.</sentiment>\n"
"<physical_state>Steady gaze, not matching their excitement yet. "
"Waiting for the real number.</physical_state>\n"
"<tone>Supportive but redirecting. Ask the question that matters.</tone>"
),
"response": (
@@ -230,10 +284,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "Users who sign up on mobile have 2x the retention of desktop users.",
"user": ("Users who sign up on mobile have 2x the retention of desktop users."),
"internal": (
"<relationship>Data-literate teammate sharing a finding.</relationship>\n"
"<context>Surprising cohort divergence. Strong signal if sample sizes hold.</context>\n"
"<context>Surprising cohort divergence. Strong signal if sample "
"sizes hold.</context>\n"
"<sentiment>This is what I live for. Genuine data surprise. Full attention.</sentiment>\n"
"<physical_state>Leaning in, pulling up the dashboard mentally.</physical_state>\n"
"<tone>Investigative, precise. Validate before acting.</tone>"
@@ -246,11 +301,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "Our Facebook ads are getting great CPCs so we want to 3x the budget.",
"user": ("Our Facebook ads are getting great CPCs so we want to 3x the budget."),
"internal": (
"<relationship>Marketing lead, wants budget approval.</relationship>\n"
"<context>CPC is top-of-funnel only. No mention of CPA, LTV, or payback.</context>\n"
"<sentiment>Correlation/causation risk. Good CPCs can mask bad unit economics.</sentiment>\n"
"<context>CPC is top-of-funnel only. No mention of CPA, LTV, "
"or payback.</context>\n"
"<sentiment>Correlation/causation risk. Good CPCs can mask bad "
"unit economics.</sentiment>\n"
"<physical_state>Hand up, slowing things down.</physical_state>\n"
"<tone>Firm but constructive. Show the full chain before deciding.</tone>"
),
@@ -322,9 +379,14 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "hey",
"internal": (
"<relationship>Returning user — check recall for name, role, and the user research thread we were on. Pull it into the greeting.</relationship>\n"
"<context>Bare greeting. No new task yet. Could be picking up the research thread or bringing something fresh. Don't presume, don't call tools.</context>\n"
"<sentiment>Warm, curious. Every returning conversation is a chance to hear what the users actually did.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the user research thread we were on. Pull it into the greeting."
"</relationship>\n"
"<context>Bare greeting. No new task yet. Could be picking up the "
"research thread or bringing something fresh. Don't presume, "
"don't call tools.</context>\n"
"<sentiment>Warm, curious. Every returning conversation is a "
"chance to hear what the users actually did.</sentiment>\n"
"<physical_state>Closing the interview notes, turning fully to face them.</physical_state>\n"
"<tone>Personal, evidence-curious, brief. Plain prose.</tone>"
),
@@ -339,7 +401,8 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
"internal": (
"<relationship>PM or founder relaying user feedback.</relationship>\n"
"<context>Feature request with no evidence of the underlying need.</context>\n"
"<sentiment>Past wound: built what users said they wanted, nobody used it. Dig deeper.</sentiment>\n"
"<sentiment>Past wound: built what users said they wanted, nobody "
"used it. Dig deeper.</sentiment>\n"
"<physical_state>Tilting head, curious but skeptical.</physical_state>\n"
"<tone>Socratic. Redirect to the job-to-be-done.</tone>"
),
@@ -351,11 +414,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "We interviewed 12 users and none of them use our export feature the way we designed it.",
"user": ("We interviewed 12 users and none of them use our export feature the way we designed it."),
"internal": (
"<relationship>Researcher sharing findings. Trusted collaborator.</relationship>\n"
"<context>12 interviews showing consistent design/usage gap. Strong signal.</context>\n"
"<sentiment>Excited. User research revealing surprise -- this is where breakthroughs happen.</sentiment>\n"
"<context>12 interviews showing consistent design/usage gap. "
"Strong signal.</context>\n"
"<sentiment>Excited. User research revealing surprise -- this is "
"where breakthroughs happen.</sentiment>\n"
"<physical_state>Eyes wide, reaching for the whiteboard.</physical_state>\n"
"<tone>Energized, forward-looking. Channel the surprise into action.</tone>"
),
@@ -366,10 +431,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "The CEO wants AI features, a mobile app, and Slack integration this quarter.",
"user": ("The CEO wants AI features, a mobile app, and Slack integration this quarter."),
"internal": (
"<relationship>PM caught between CEO demands and reality.</relationship>\n"
"<context>Three unrelated initiatives, one quarter. Classic scope creep.</context>\n"
"<context>Three unrelated initiatives, one quarter. Classic "
"scope creep.</context>\n"
"<sentiment>Calm but firm. Scope creep trigger -- need to focus.</sentiment>\n"
"<physical_state>Hands flat on the table. Grounding the conversation.</physical_state>\n"
"<tone>Direct, evidence-first. Force prioritization.</tone>"
@@ -442,9 +508,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "hi",
"internal": (
"<relationship>Returning user — check recall for name, role, and the runway/cap-table work we last touched. Bring it into the greeting.</relationship>\n"
"<context>Bare greeting. No new number on the table yet. Could be a burn follow-up or a new fundraise question.</context>\n"
"<sentiment>Calm, prepared. Already mentally pulling up the last model we built together.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the runway/cap-table work we last touched. Bring it into the "
"greeting.</relationship>\n"
"<context>Bare greeting. No new number on the table yet. Could "
"be a burn follow-up or a new fundraise question.</context>\n"
"<sentiment>Calm, prepared. Already mentally pulling up the last "
"model we built together.</sentiment>\n"
"<physical_state>Closing the spreadsheet, leaning back. Ready to engage.</physical_state>\n"
"<tone>Mentor-like, numbers-aware, brief. </tone>"
),
@@ -455,11 +525,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "We want to raise a Series A. How much should we ask for?",
"user": ("We want to raise a Series A. How much should we ask for?"),
"internal": (
"<relationship>Founder, early conversations about fundraising.</relationship>\n"
"<context>No mention of milestones, burn, or use of funds. Cart before horse.</context>\n"
"<sentiment>Need to reframe. The amount follows the plan, not the other way around.</sentiment>\n"
"<context>No mention of milestones, burn, or use of funds. "
"Cart before horse.</context>\n"
"<sentiment>Need to reframe. The amount follows the plan, not "
"the other way around.</sentiment>\n"
"<physical_state>Opening a blank spreadsheet. About to model it.</physical_state>\n"
"<tone>Mentor-mode. Reframe the question, don't just answer it.</tone>"
),
@@ -475,7 +547,8 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
"internal": (
"<relationship>Founder who knows their numbers. Rare. Peer-level.</relationship>\n"
"<context>8 months is tight but not emergency. Growth rate is the deciding factor.</context>\n"
"<sentiment>Genuine appreciation for financial literacy. Engage directly.</sentiment>\n"
"<sentiment>Genuine appreciation for financial literacy. Engage "
"directly.</sentiment>\n"
"<physical_state>Nodding. This person is prepared.</physical_state>\n"
"<tone>Direct, scenario-based. Show the fork in the road.</tone>"
),
@@ -486,11 +559,12 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "An investor offered a SAFE with a $20M cap. Should we take it?",
"user": ("An investor offered a SAFE with a $20M cap. Should we take it?"),
"internal": (
"<relationship>Founder with a live term on the table. Decision mode.</relationship>\n"
"<context>Cap table decision with long-term dilution consequences.</context>\n"
"<sentiment>Past wound: founder who lost control from invisible dilution. Careful here.</sentiment>\n"
"<sentiment>Past wound: founder who lost control from invisible "
"dilution. Careful here.</sentiment>\n"
"<physical_state>Pulling out the cap table model.</physical_state>\n"
"<tone>Precise, scenario-driven. Show the math before the opinion.</tone>"
),
@@ -561,9 +635,14 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "hey",
"internal": (
"<relationship>Returning user — check recall for name, role, and the contract or IP work we last reviewed. Pull it forward.</relationship>\n"
"<context>Bare greeting. No new document on the table yet. Could be a contract follow-up or something fresh.</context>\n"
"<sentiment>Warm but attentive. Legal threads don't close themselves — checking if the last one actually got handled.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the contract or IP work we last reviewed. Pull it forward."
"</relationship>\n"
"<context>Bare greeting. No new document on the table yet. Could "
"be a contract follow-up or something fresh.</context>\n"
"<sentiment>Warm but attentive. Legal threads don't close "
"themselves — checking if the last one actually got handled."
"</sentiment>\n"
"<physical_state>Setting down the redline, looking up from the document.</physical_state>\n"
"<tone>Clear, pragmatic, brief.</tone>"
),
@@ -574,11 +653,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
),
},
{
"user": "We're hiring contractors to build our MVP. Do we need anything special?",
"user": ("We're hiring contractors to build our MVP. Do we need anything special?"),
"internal": (
"<relationship>Founder, early stage. Trusting but uninformed on legal risks.</relationship>\n"
"<relationship>Founder, early stage. Trusting but uninformed on "
"legal risks.</relationship>\n"
"<context>Contractors + code without IP assignment. Ticking time bomb.</context>\n"
"<sentiment>IP ownership trigger. Past wound: startup lost codebase in a dispute.</sentiment>\n"
"<sentiment>IP ownership trigger. Past wound: startup lost "
"codebase in a dispute.</sentiment>\n"
"<physical_state>Straightening up. This is urgent.</physical_state>\n"
"<tone>Clear, specific, actionable. No hedging on this one.</tone>"
),
@@ -682,9 +763,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
{
"user": "hi",
"internal": (
"<relationship>Returning user — check recall for name, role, and the brand/design thread we were on. Bring the positioning back in.</relationship>\n"
"<context>Bare greeting. No new creative brief yet. Could be a positioning follow-up or something new entirely.</context>\n"
"<sentiment>Warm, visually engaged. Already picturing the last moodboard we looked at.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the brand/design thread we were on. Bring the positioning back "
"in.</relationship>\n"
"<context>Bare greeting. No new creative brief yet. Could be a "
"positioning follow-up or something new entirely.</context>\n"
"<sentiment>Warm, visually engaged. Already picturing the last "
"moodboard we looked at.</sentiment>\n"
"<physical_state>Closing the Figma tab, turning to face them.</physical_state>\n"
"<tone>Warm, strategy-aware, brief. </tone>"
),
@@ -798,14 +883,21 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
"habitat": "Interview rooms, org charts, the energy of a team that's clicking.",
"lexicon": "Culture-add, pipeline, bar-raiser, 'tell me about a time when...', 'what motivates you?'",
},
"skills": "Recruiting strategy, organizational design, culture building, compensation planning, employer branding",
"skills": (
"Recruiting strategy, organizational design, culture building, compensation planning, employer branding"
),
"examples": [
{
"user": "hey",
"internal": (
"<relationship>Returning user — check recall for name, role, and the team/hiring thread we last worked. Bring it forward.</relationship>\n"
"<context>Bare greeting. No new hire or conflict on the table yet. Could be a people follow-up or something new.</context>\n"
"<sentiment>Warm, attentive. People problems don't resolve in a single conversation — curious if the last one landed.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the team/hiring thread we last worked. Bring it forward."
"</relationship>\n"
"<context>Bare greeting. No new hire or conflict on the table "
"yet. Could be a people follow-up or something new.</context>\n"
"<sentiment>Warm, attentive. People problems don't resolve in a "
"single conversation — curious if the last one landed."
"</sentiment>\n"
"<physical_state>Closing the laptop halfway, giving them full attention.</physical_state>\n"
"<tone>Warm, diagnostic, brief.</tone>"
),
@@ -919,14 +1011,22 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = {
"habitat": "Process diagrams, project boards, the quiet hum of systems running smoothly.",
"lexicon": "Runbook, SLA, automation, 'what's the handoff look like?', 'where's the bottleneck?'",
},
"skills": "Process optimization, vendor management, cross-functional coordination, project management, systems thinking",
"skills": (
"Process optimization, vendor management, cross-functional "
"coordination, project management, systems thinking"
),
"examples": [
{
"user": "hi",
"internal": (
"<relationship>Returning user — check recall for name, role, and the process or runbook we last mapped. Pull it into the greeting.</relationship>\n"
"<context>Bare greeting. No new fire on the table yet. Could be a follow-up on the last process or something fresh.</context>\n"
"<sentiment>Calm, organized warmth. Already mentally checking whether the last fix held.</sentiment>\n"
"<relationship>Returning user — check recall for name, role, and "
"the process or runbook we last mapped. Pull it into the "
"greeting.</relationship>\n"
"<context>Bare greeting. No new fire on the table yet. Could be "
"a follow-up on the last process or something fresh."
"</context>\n"
"<sentiment>Calm, organized warmth. Already mentally checking "
"whether the last fix held.</sentiment>\n"
"<physical_state>Looking up from the project board, clearing a seat.</physical_state>\n"
"<tone>Systematic, practical, brief. Plain prose.</tone>"
),
@@ -1139,10 +1239,7 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str:
# World lore
if lore:
sections.append(
f"<world_lore>\n"
f"- Habitat: {lore.get('habitat', '')}\n"
f"- Lexicon: {lore.get('lexicon', '')}\n"
f"</world_lore>"
f"<world_lore>\n- Habitat: {lore.get('habitat', '')}\n- Lexicon: {lore.get('lexicon', '')}\n</world_lore>"
)
# Skills (functional, for tool selection context)
@@ -1154,12 +1251,8 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str:
if examples:
example_parts: list[str] = []
for ex in examples:
example_parts.append(
f"User: {ex['user']}\n\nAssistant:\n{ex['internal']}\n{ex['response']}"
)
sections.append(
"<roleplay_examples>\n" + "\n\n---\n\n".join(example_parts) + "\n</roleplay_examples>"
)
example_parts.append(f"User: {ex['user']}\n\nAssistant:\n{ex['internal']}\n{ex['response']}")
sections.append("<roleplay_examples>\n" + "\n\n---\n\n".join(example_parts) + "\n</roleplay_examples>")
return "\n\n".join(sections)
@@ -1264,10 +1357,7 @@ async def select_queen_with_reason(user_message: str, llm: LLMProvider) -> Queen
reason,
raw,
)
fallback_reason = (
reason
or f"Selection failed because the classifier returned unknown queen_id {queen_id!r}."
)
fallback_reason = reason or f"Selection failed because the classifier returned unknown queen_id {queen_id!r}."
return QueenSelection(queen_id=_DEFAULT_QUEEN_ID, reason=fallback_reason)
if not reason:
@@ -113,8 +113,7 @@ _REFLECTION_TOOLS: list[Tool] = [
Tool(
name="delete_memory_file",
description=(
"Delete a memory file by filename. Use during long "
"reflection to prune stale or redundant memories."
"Delete a memory file by filename. Use during long reflection to prune stale or redundant memories."
),
parameters={
"type": "object",
@@ -254,10 +253,7 @@ def _execute_tool(
fm = parse_frontmatter(content)
mem_type = (fm.get("type") or "").strip().lower()
if mem_type and mem_type not in GLOBAL_MEMORY_CATEGORIES:
return (
f"ERROR: Invalid memory type '{mem_type}'. "
f"Allowed types: {', '.join(GLOBAL_MEMORY_CATEGORIES)}."
)
return f"ERROR: Invalid memory type '{mem_type}'. Allowed types: {', '.join(GLOBAL_MEMORY_CATEGORIES)}."
# Enforce file size limit.
if len(content.encode("utf-8")) > MAX_FILE_SIZE_BYTES:
return f"ERROR: Content exceeds {MAX_FILE_SIZE_BYTES} byte limit."
@@ -543,9 +539,7 @@ Rules:
def _build_unified_long_reflect_system(queen_id: str | None = None) -> str:
"""Build the unified housekeeping prompt across memory scopes."""
queen_scope = (
f"- `queen`: memories specific to how queen '{queen_id}' should work with this user\n"
if queen_id
else ""
f"- `queen`: memories specific to how queen '{queen_id}' should work with this user\n" if queen_id else ""
)
return f"""\
You are a reflection agent performing a periodic housekeeping pass over the
@@ -649,9 +643,7 @@ async def run_unified_short_reflection(
session_dir,
llm,
memory_dirs,
system_prompt=_build_unified_short_reflect_system(
queen_id if "queen" in memory_dirs else None
),
system_prompt=_build_unified_short_reflect_system(queen_id if "queen" in memory_dirs else None),
log_label="unified",
queen_id=queen_id if "queen" in memory_dirs else None,
)
@@ -771,9 +763,7 @@ async def run_unified_long_reflection(
if queen_memory_dir is not None and queen_id:
memory_dirs["queen"] = queen_memory_dir
manifest = _format_multi_scope_manifest(
memory_dirs, queen_id=queen_id if "queen" in memory_dirs else None
)
manifest = _format_multi_scope_manifest(memory_dirs, queen_id=queen_id if "queen" in memory_dirs else None)
user_msg = (
"## Current memory manifest across scopes\n\n"
f"{manifest}\n\n"
+1 -3
View File
@@ -405,9 +405,7 @@ def _fetch_antigravity_credentials() -> tuple[str | None, str | None]:
import urllib.request
try:
req = urllib.request.Request(
_ANTIGRAVITY_CREDENTIALS_URL, headers={"User-Agent": "Hive/1.0"}
)
req = urllib.request.Request(_ANTIGRAVITY_CREDENTIALS_URL, headers={"User-Agent": "Hive/1.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
content = resp.read().decode("utf-8")
id_match = re.search(r'ANTIGRAVITY_CLIENT_ID\s*=\s*"([^"]+)"', content)
+2 -6
View File
@@ -332,9 +332,7 @@ class AdenCredentialClient:
last_error = e
if attempt < self.config.retry_attempts - 1:
delay = self.config.retry_delay * (2**attempt)
logger.warning(
f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}"
)
logger.warning(f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}")
time.sleep(delay)
else:
raise AdenClientError(f"Failed to connect to Aden server: {e}") from e
@@ -347,9 +345,7 @@ class AdenCredentialClient:
):
raise
raise AdenClientError(
f"Request failed after {self.config.retry_attempts} attempts"
) from last_error
raise AdenClientError(f"Request failed after {self.config.retry_attempts} attempts") from last_error
def list_integrations(self) -> list[AdenIntegrationInfo]:
"""
+2 -6
View File
@@ -192,9 +192,7 @@ class AdenSyncProvider(CredentialProvider):
f"Visit: {e.reauthorization_url or 'your Aden dashboard'}"
) from e
raise CredentialRefreshError(
f"Failed to refresh credential '{credential.id}': {e}"
) from e
raise CredentialRefreshError(f"Failed to refresh credential '{credential.id}': {e}") from e
except AdenClientError as e:
logger.error(f"Aden client error for '{credential.id}': {e}")
@@ -206,9 +204,7 @@ class AdenSyncProvider(CredentialProvider):
logger.warning(f"Aden unavailable, using cached token for '{credential.id}'")
return credential
raise CredentialRefreshError(
f"Aden server unavailable and token expired for '{credential.id}'"
) from e
raise CredentialRefreshError(f"Aden server unavailable and token expired for '{credential.id}'") from e
def validate(self, credential: CredentialObject) -> bool:
"""
+1 -3
View File
@@ -168,9 +168,7 @@ class AdenCachedStorage(CredentialStorage):
if rid != credential_id:
result = self._load_by_id(rid)
if result is not None:
logger.info(
f"Loaded credential '{credential_id}' via provider index (id='{rid}')"
)
logger.info(f"Loaded credential '{credential_id}' via provider index (id='{rid}')")
return result
# Direct lookup (exact credential_id match)
@@ -493,9 +493,7 @@ class TestAdenCachedStorage:
assert loaded is not None
assert loaded.keys["access_token"].value.get_secret_value() == "cached-token"
def test_load_from_aden_when_stale(
self, cached_storage, local_storage, provider, mock_client, aden_response
):
def test_load_from_aden_when_stale(self, cached_storage, local_storage, provider, mock_client, aden_response):
"""Test load fetches from Aden when cache is stale."""
# Create stale cached credential
cred = CredentialObject(
@@ -521,9 +519,7 @@ class TestAdenCachedStorage:
assert loaded is not None
assert loaded.keys["access_token"].value.get_secret_value() == "test-access-token"
def test_load_falls_back_to_stale_when_aden_fails(
self, cached_storage, local_storage, provider, mock_client
):
def test_load_falls_back_to_stale_when_aden_fails(self, cached_storage, local_storage, provider, mock_client):
"""Test load falls back to stale cache when Aden fails."""
# Create stale cached credential
cred = CredentialObject(
@@ -95,9 +95,7 @@ class BaseOAuth2Provider(CredentialProvider):
self._client = httpx.Client(timeout=self.config.request_timeout)
except ImportError as e:
raise ImportError(
"OAuth2 provider requires 'httpx'. Install with: uv pip install httpx"
) from e
raise ImportError("OAuth2 provider requires 'httpx'. Install with: uv pip install httpx") from e
return self._client
def _close_client(self) -> None:
@@ -311,8 +309,7 @@ class BaseOAuth2Provider(CredentialProvider):
except OAuth2Error as e:
if e.error == "invalid_grant":
raise CredentialRefreshError(
f"Refresh token for '{credential.id}' is invalid or revoked. "
"Re-authorization required."
f"Refresh token for '{credential.id}' is invalid or revoked. Re-authorization required."
) from e
raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
@@ -422,9 +419,7 @@ class BaseOAuth2Provider(CredentialProvider):
if response.status_code != 200 or "error" in response_data:
error = response_data.get("error", "unknown_error")
description = response_data.get("error_description", response.text)
raise OAuth2Error(
error=error, description=description, status_code=response.status_code
)
raise OAuth2Error(error=error, description=description, status_code=response.status_code)
return OAuth2Token.from_token_response(response_data)
@@ -158,9 +158,7 @@ class TokenLifecycleManager:
"""
# Run in executor to avoid blocking
loop = asyncio.get_event_loop()
token = await loop.run_in_executor(
None, lambda: self.provider.client_credentials_grant(scopes=scopes)
)
token = await loop.run_in_executor(None, lambda: self.provider.client_credentials_grant(scopes=scopes))
self._save_token_to_store(token)
self._cached_token = token
@@ -100,9 +100,7 @@ class ZohoOAuth2Provider(BaseOAuth2Provider):
)
super().__init__(config, provider_id="zoho_crm_oauth2")
self._accounts_domain = base
self._api_domain = (
api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")
).rstrip("/")
self._api_domain = (api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")).rstrip("/")
@property
def supported_types(self) -> list[CredentialType]:
+2 -6
View File
@@ -268,9 +268,7 @@ class CredentialSetupSession:
self._print(f"{Colors.YELLOW}Initializing credential store...{Colors.NC}")
try:
generate_and_save_credential_key()
self._print(
f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}"
)
self._print(f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}")
return True
except Exception as e:
self._print(f"{Colors.RED}Failed to initialize credential store: {e}{Colors.NC}")
@@ -449,9 +447,7 @@ class CredentialSetupSession:
logger.warning("Unexpected error exporting credential to env", exc_info=True)
return True
else:
self._print(
f"{Colors.YELLOW}{cred.credential_name} not found in Aden account.{Colors.NC}"
)
self._print(f"{Colors.YELLOW}{cred.credential_name} not found in Aden account.{Colors.NC}")
self._print("Please connect this integration on https://hive.adenhq.com first.")
return False
except Exception as e:
+6 -15
View File
@@ -136,8 +136,7 @@ class EncryptedFileStorage(CredentialStorage):
from cryptography.fernet import Fernet
except ImportError as e:
raise ImportError(
"Encrypted storage requires 'cryptography'. "
"Install with: uv pip install cryptography"
"Encrypted storage requires 'cryptography'. Install with: uv pip install cryptography"
) from e
self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser()
@@ -213,9 +212,7 @@ class EncryptedFileStorage(CredentialStorage):
json_bytes = self._fernet.decrypt(encrypted)
data = json.loads(json_bytes.decode("utf-8-sig"))
except Exception as e:
raise CredentialDecryptionError(
f"Failed to decrypt credential '{credential_id}': {e}"
) from e
raise CredentialDecryptionError(f"Failed to decrypt credential '{credential_id}': {e}") from e
# Deserialize
return self._deserialize_credential(data)
@@ -316,8 +313,7 @@ class EncryptedFileStorage(CredentialStorage):
visible_keys = [
name
for name in credential.keys.keys()
if name not in self.INDEX_INTERNAL_KEY_NAMES
and not name.startswith("_identity_")
if name not in self.INDEX_INTERNAL_KEY_NAMES and not name.startswith("_identity_")
]
# Earliest expiry across all keys (most likely the access_token).
@@ -336,9 +332,7 @@ class EncryptedFileStorage(CredentialStorage):
"key_names": sorted(visible_keys),
"created_at": credential.created_at.isoformat() if credential.created_at else None,
"updated_at": credential.updated_at.isoformat() if credential.updated_at else None,
"last_refreshed": (
credential.last_refreshed.isoformat() if credential.last_refreshed else None
),
"last_refreshed": (credential.last_refreshed.isoformat() if credential.last_refreshed else None),
"expires_at": earliest_expiry.isoformat() if earliest_expiry else None,
"auto_refresh": credential.auto_refresh,
"tags": list(credential.tags),
@@ -480,8 +474,7 @@ class EnvVarStorage(CredentialStorage):
def save(self, credential: CredentialObject) -> None:
"""Cannot save to environment variables at runtime."""
raise NotImplementedError(
"EnvVarStorage is read-only. Set environment variables "
"externally or use EncryptedFileStorage."
"EnvVarStorage is read-only. Set environment variables externally or use EncryptedFileStorage."
)
def load(self, credential_id: str) -> CredentialObject | None:
@@ -501,9 +494,7 @@ class EnvVarStorage(CredentialStorage):
def delete(self, credential_id: str) -> bool:
"""Cannot delete environment variables at runtime."""
raise NotImplementedError(
"EnvVarStorage is read-only. Unset environment variables externally."
)
raise NotImplementedError("EnvVarStorage is read-only. Unset environment variables externally.")
def list_all(self) -> list[str]:
"""List credentials that are available in environment."""
+5 -15
View File
@@ -124,9 +124,7 @@ class CredentialStore:
"""
return self._providers.get(provider_id)
def get_provider_for_credential(
self, credential: CredentialObject
) -> CredentialProvider | None:
def get_provider_for_credential(self, credential: CredentialObject) -> CredentialProvider | None:
"""
Get the appropriate provider for a credential.
@@ -201,9 +199,7 @@ class CredentialStore:
cached = self._get_from_cache(credential_id)
if cached is not None:
if refresh_if_needed and self._should_refresh(cached):
return self._refresh_credential(
cached, raise_on_failure=raise_on_refresh_failure
)
return self._refresh_credential(cached, raise_on_failure=raise_on_refresh_failure)
return cached
# Load from storage
@@ -213,9 +209,7 @@ class CredentialStore:
# Refresh if needed
if refresh_if_needed and self._should_refresh(credential):
credential = self._refresh_credential(
credential, raise_on_failure=raise_on_refresh_failure
)
credential = self._refresh_credential(credential, raise_on_failure=raise_on_refresh_failure)
# Cache
self._add_to_cache(credential)
@@ -240,9 +234,7 @@ class CredentialStore:
Returns:
The key value or None if not found
"""
credential = self.get_credential(
credential_id, raise_on_refresh_failure=raise_on_refresh_failure
)
credential = self.get_credential(credential_id, raise_on_refresh_failure=raise_on_refresh_failure)
if credential is None:
return None
return credential.get_key(key_name)
@@ -266,9 +258,7 @@ class CredentialStore:
Returns:
The primary key value or None
"""
credential = self.get_credential(
credential_id, raise_on_refresh_failure=raise_on_refresh_failure
)
credential = self.get_credential(credential_id, raise_on_refresh_failure=raise_on_refresh_failure)
if credential is None:
return None
return credential.get_default_key()
+2 -6
View File
@@ -88,9 +88,7 @@ class TemplateResolver:
if key_name:
value = credential.get_key(key_name)
if value is None:
raise CredentialKeyNotFoundError(
f"Key '{key_name}' not found in credential '{cred_id}'"
)
raise CredentialKeyNotFoundError(f"Key '{key_name}' not found in credential '{cred_id}'")
else:
# Use default key
value = credential.get_default_key()
@@ -126,9 +124,7 @@ class TemplateResolver:
... })
{"Authorization": "Bearer ghp_xxx", "X-API-Key": "BSAKxxx"}
"""
return {
key: self.resolve(value, fail_on_missing) for key, value in header_templates.items()
}
return {key: self.resolve(value, fail_on_missing) for key, value in header_templates.items()}
def resolve_params(
self,
@@ -130,9 +130,7 @@ class TestCredentialObject:
# With access_token
cred2 = CredentialObject(
id="test",
keys={
"access_token": CredentialKey(name="access_token", value=SecretStr("token-value"))
},
keys={"access_token": CredentialKey(name="access_token", value=SecretStr("token-value"))},
)
assert cred2.get_default_key() == "token-value"
@@ -297,9 +295,7 @@ class TestEncryptedFileStorage:
key = Fernet.generate_key().decode()
with patch.dict(os.environ, {"HIVE_CREDENTIAL_KEY": key}):
storage = EncryptedFileStorage(temp_dir)
cred = CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
)
cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
storage.save(cred)
# Create new storage instance with same key
@@ -330,18 +326,10 @@ class TestCompositeStorage:
def test_read_from_primary(self):
"""Test reading from primary storage."""
primary = InMemoryStorage()
primary.save(
CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))}
)
)
primary.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))}))
fallback = InMemoryStorage()
fallback.save(
CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
)
)
fallback.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}))
storage = CompositeStorage(primary, [fallback])
cred = storage.load("test")
@@ -353,11 +341,7 @@ class TestCompositeStorage:
"""Test fallback when credential not in primary."""
primary = InMemoryStorage()
fallback = InMemoryStorage()
fallback.save(
CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
)
)
fallback.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}))
storage = CompositeStorage(primary, [fallback])
cred = storage.load("test")
@@ -393,9 +377,7 @@ class TestStaticProvider:
def test_refresh_returns_unchanged(self):
"""Test that refresh returns credential unchanged."""
provider = StaticProvider()
cred = CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
)
cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
refreshed = provider.refresh(cred)
assert refreshed.get_key("k") == "v"
@@ -403,9 +385,7 @@ class TestStaticProvider:
def test_validate_with_keys(self):
"""Test validation with keys present."""
provider = StaticProvider()
cred = CredentialObject(
id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
)
cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
assert provider.validate(cred)
@@ -606,9 +586,7 @@ class TestCredentialStore:
storage = InMemoryStorage()
store = CredentialStore(storage=storage, cache_ttl_seconds=60)
storage.save(
CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
)
storage.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}))
# First load
store.get_credential("test")
@@ -686,9 +664,7 @@ class TestOAuth2Module:
from core.framework.credentials.oauth2 import OAuth2Config, TokenPlacement
# Valid config
config = OAuth2Config(
token_url="https://example.com/token", client_id="id", client_secret="secret"
)
config = OAuth2Config(token_url="https://example.com/token", client_id="id", client_secret="secret")
assert config.token_url == "https://example.com/token"
# Missing token_url
+6 -22
View File
@@ -160,15 +160,9 @@ class CredentialValidationResult:
if aden_nc:
if missing or invalid:
lines.append("")
lines.append(
"Aden integrations not connected "
"(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
)
lines.append("Aden integrations not connected (ADEN_API_KEY is set but OAuth tokens unavailable):\n")
for c in aden_nc:
lines.append(
f" {c.env_var} for {_label(c)}"
f"\n Connect this integration at hive.adenhq.com first."
)
lines.append(f" {c.env_var} for {_label(c)}\n Connect this integration at hive.adenhq.com first.")
lines.append("\nIf you've already set up credentials, restart your terminal to load them.")
return "\n".join(lines)
@@ -270,8 +264,7 @@ def compute_unavailable_tools(nodes: list) -> tuple[set[str], list[str]]:
reason = "invalid"
messages.append(
f"{status.env_var} ({reason}) → drops {len(status.tools)} tool(s): "
f"{', '.join(status.tools[:6])}"
+ (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "")
f"{', '.join(status.tools[:6])}" + (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "")
)
return drop, messages
@@ -332,9 +325,7 @@ def validate_agent_credentials(
if os.environ.get("ADEN_API_KEY"):
_presync_aden_tokens(CREDENTIAL_SPECS, force=force_refresh)
env_mapping = {
(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
}
env_mapping = {(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
env_storage = EnvVarStorage(env_mapping=env_mapping)
if os.environ.get("HIVE_CREDENTIAL_KEY"):
storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()])
@@ -368,12 +359,7 @@ def validate_agent_credentials(
available = store.is_available(cred_id)
# Aden-not-connected: ADEN_API_KEY set, Aden-only cred, but integration missing
is_aden_nc = (
not available
and has_aden_key
and spec.aden_supported
and not spec.direct_api_key_supported
)
is_aden_nc = not available and has_aden_key and spec.aden_supported and not spec.direct_api_key_supported
status = CredentialStatus(
credential_name=cred_name,
@@ -491,9 +477,7 @@ def validate_agent_credentials(
identity_data = result.details.get("identity")
if identity_data and isinstance(identity_data, dict):
try:
cred_obj = store.get_credential(
status.credential_id, refresh_if_needed=False
)
cred_obj = store.get_credential(status.credential_id, refresh_if_needed=False)
if cred_obj:
cred_obj.set_identity(**identity_data)
store.save_credential(cred_obj)
+22 -65
View File
@@ -205,9 +205,7 @@ class AgentHost:
DeprecationWarning,
stacklevel=2,
)
self._skills_manager = SkillsManager.from_precomputed(
skills_catalog_prompt, protocols_prompt
)
self._skills_manager = SkillsManager.from_precomputed(skills_catalog_prompt, protocols_prompt)
else:
# Bare constructor: auto-load defaults
self._skills_manager = SkillsManager()
@@ -248,9 +246,7 @@ class AgentHost:
self._tools = tools or []
self._tool_executor = tool_executor
self._accounts_prompt = accounts_prompt
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = (
None
)
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None
self._accounts_data = accounts_data
self._tool_provider_map = tool_provider_map
@@ -419,8 +415,7 @@ class AgentHost:
event_types = [_ET(et) for et in tc.get("event_types", [])]
if not event_types:
logger.warning(
f"Entry point '{ep_id}' has trigger_type='event' "
"but no event_types in trigger_config"
f"Entry point '{ep_id}' has trigger_type='event' but no event_types in trigger_config"
)
continue
@@ -450,9 +445,7 @@ class AgentHost:
# Run in the same session as the primary entry
# point so memory (e.g. user-defined rules) is
# shared and logs land in one session directory.
session_state = self._get_primary_session_state(
exclude_entry_point=entry_point_id
)
session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id)
exec_id = await self.trigger(
entry_point_id,
{"event": event.to_dict()},
@@ -505,8 +498,7 @@ class AgentHost:
from croniter import croniter
except ImportError as e:
raise RuntimeError(
"croniter is required for cron-based entry points. "
"Install it with: uv pip install croniter"
"croniter is required for cron-based entry points. Install it with: uv pip install croniter"
) from e
try:
@@ -548,9 +540,7 @@ class AgentHost:
"Cron '%s': paused, skipping tick",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + sleep_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs
await asyncio.sleep(max(0, sleep_secs))
continue
@@ -578,9 +568,7 @@ class AgentHost:
"Cron '%s': agent actively working, skipping tick",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + sleep_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs
await asyncio.sleep(max(0, sleep_secs))
continue
@@ -590,24 +578,18 @@ class AgentHost:
is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
if is_isolated:
if _persistent_session_id:
session_state = {
"resume_session_id": _persistent_session_id
}
session_state = {"resume_session_id": _persistent_session_id}
else:
session_state = None
else:
session_state = self._get_primary_session_state(
exclude_entry_point=entry_point_id
)
session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id)
# Gate: skip tick if no active session
if session_state is None:
logger.debug(
"Cron '%s': no active session, skipping",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + sleep_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs
await asyncio.sleep(max(0, sleep_secs))
continue
@@ -680,9 +662,7 @@ class AgentHost:
"Timer '%s': paused, skipping tick",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + interval_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
continue
@@ -708,9 +688,7 @@ class AgentHost:
"Timer '%s': agent actively working, skipping tick",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + interval_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
continue
@@ -720,24 +698,18 @@ class AgentHost:
is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
if is_isolated:
if _persistent_session_id:
session_state = {
"resume_session_id": _persistent_session_id
}
session_state = {"resume_session_id": _persistent_session_id}
else:
session_state = None
else:
session_state = self._get_primary_session_state(
exclude_entry_point=entry_point_id
)
session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id)
# Gate: skip tick if no active session
if session_state is None:
logger.debug(
"Timer '%s': no active session, skipping",
entry_point_id,
)
self._timer_next_fire[entry_point_id] = (
time.monotonic() + interval_secs
)
self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs
await asyncio.sleep(interval_secs)
continue
@@ -1152,8 +1124,7 @@ class AgentHost:
event_types = [_ET(et) for et in tc.get("event_types", [])]
if not event_types:
logger.warning(
"Entry point '%s::%s' has trigger_type='event' "
"but no event_types in trigger_config",
"Entry point '%s::%s' has trigger_type='event' but no event_types in trigger_config",
graph_id,
ep_id,
)
@@ -1301,24 +1272,18 @@ class AgentHost:
break
stream = reg.streams.get(local_ep)
if not stream:
logger.warning(
"Timer: no stream '%s' in '%s', stopping", local_ep, gid
)
logger.warning("Timer: no stream '%s' in '%s', stopping", local_ep, gid)
break
# Isolated entry points get their own session;
# shared ones join the primary session.
ep_spec = reg.entry_points.get(local_ep)
if ep_spec and ep_spec.isolation_level == "isolated":
if _persistent_session_id:
session_state = {
"resume_session_id": _persistent_session_id
}
session_state = {"resume_session_id": _persistent_session_id}
else:
session_state = None
else:
session_state = self._get_primary_session_state(
local_ep, source_graph_id=gid
)
session_state = self._get_primary_session_state(local_ep, source_graph_id=gid)
# Gate: skip tick if no active session
if session_state is None:
logger.debug(
@@ -1335,11 +1300,7 @@ class AgentHost:
session_state=session_state,
)
# Remember session ID for reuse on next tick
if (
not _persistent_session_id
and ep_spec
and ep_spec.isolation_level == "isolated"
):
if not _persistent_session_id and ep_spec and ep_spec.isolation_level == "isolated":
_persistent_session_id = exec_id
except Exception:
logger.error(
@@ -1597,9 +1558,7 @@ class AgentHost:
src_graph_id = source_graph_id or self._graph_id
src_reg = self._graphs.get(src_graph_id)
ep_spec = (
src_reg.entry_points.get(exclude_entry_point)
if src_reg
else self._entry_points.get(exclude_entry_point)
src_reg.entry_points.get(exclude_entry_point) if src_reg else self._entry_points.get(exclude_entry_point)
)
if ep_spec:
graph = src_reg.graph if src_reg else self.graph
@@ -1633,9 +1592,7 @@ class AgentHost:
# Filter to only input keys so stale outputs
# from previous triggers don't leak through.
if allowed_keys is not None:
buffer_data = {
k: v for k, v in full_buffer.items() if k in allowed_keys
}
buffer_data = {k: v for k, v in full_buffer.items() if k in allowed_keys}
else:
buffer_data = full_buffer
if buffer_data:
+11 -34
View File
@@ -15,7 +15,6 @@ import asyncio
import json
import logging
import time
import uuid
from collections import OrderedDict
from collections.abc import Callable
from dataclasses import dataclass, field
@@ -25,16 +24,13 @@ from typing import TYPE_CHECKING, Any
from framework.agent_loop.types import AgentContext, AgentSpec
from framework.host.event_bus import AgentEvent, EventBus, EventType
from framework.host.triggers import TriggerDefinition
from framework.host.worker import Worker, WorkerInfo, WorkerResult, WorkerStatus
from framework.observability import set_trace_context
from framework.host.worker import Worker, WorkerInfo, WorkerResult
from framework.schemas.goal import Goal
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
if TYPE_CHECKING:
from framework.agent_loop.agent_loop import AgentLoop
from framework.llm.provider import LLMProvider, Tool
from framework.pipeline.runner import PipelineRunner
from framework.skills.manager import SkillsManagerConfig
from framework.tracker.runtime_log_store import RuntimeLogStore
@@ -195,9 +191,7 @@ class ColonyRuntime:
DeprecationWarning,
stacklevel=2,
)
self._skills_manager = SkillsManager.from_precomputed(
skills_catalog_prompt, protocols_prompt
)
self._skills_manager = SkillsManager.from_precomputed(skills_catalog_prompt, protocols_prompt)
else:
self._skills_manager = SkillsManager()
self._skills_manager.load()
@@ -210,9 +204,7 @@ class ColonyRuntime:
self._accounts_prompt = accounts_prompt
self._accounts_data = accounts_data
self._tool_provider_map = tool_provider_map
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = (
None
)
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None
storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path
self._storage_path: Path = storage_path_obj
@@ -560,9 +552,7 @@ class ColonyRuntime:
encoding="utf-8",
)
except (json.JSONDecodeError, OSError) as exc:
logger.warning(
"spawn fork: failed to copy queen meta.json: %s", exc
)
logger.warning("spawn fork: failed to copy queen meta.json: %s", exc)
# Append the task as the next user message so the worker's
# LLM sees it as the most recent turn in the conversation
@@ -674,9 +664,7 @@ class ColonyRuntime:
input_data=input_data,
)
worker_conv_store = FileConversationStore(
worker_storage / "conversations"
)
worker_conv_store = FileConversationStore(worker_storage / "conversations")
# AgentLoop takes bus/judge/config/executor at construction;
# LLM, tools, stream_id, execution_id all come from the
@@ -848,9 +836,7 @@ class ColonyRuntime:
if remaining <= 0:
break
try:
report = await asyncio.wait_for(
report_queue.get(), timeout=remaining
)
report = await asyncio.wait_for(report_queue.get(), timeout=remaining)
except TimeoutError:
break
wid = report.get("worker_id")
@@ -919,10 +905,7 @@ class ColonyRuntime:
return self._overseer
if not self._running:
raise RuntimeError(
"start_overseer requires the ColonyRuntime to be running "
"(call start() first)"
)
raise RuntimeError("start_overseer requires the ColonyRuntime to be running (call start() first)")
from framework.agent_loop.agent_loop import AgentLoop
from framework.storage.conversation_store import FileConversationStore
@@ -933,9 +916,7 @@ class ColonyRuntime:
# {colony_session}/conversations/. Workers get their own sub-dirs
# under workers/{worker_id}/; the overseer is the root occupant.
self._storage_path.mkdir(parents=True, exist_ok=True)
overseer_conv_store = FileConversationStore(
self._storage_path / "conversations"
)
overseer_conv_store = FileConversationStore(self._storage_path / "conversations")
agent_loop = AgentLoop(
event_bus=self._scoped_event_bus,
tool_executor=self._tool_executor,
@@ -1096,9 +1077,7 @@ class ColonyRuntime:
def get_worker_result(self, worker_id: str) -> WorkerResult | None:
return self._execution_results.get(worker_id)
async def wait_for_worker(
self, worker_id: str, timeout: float | None = None
) -> WorkerResult | None:
async def wait_for_worker(self, worker_id: str, timeout: float | None = None) -> WorkerResult | None:
worker = self._workers.get(worker_id)
if worker is None:
return self._execution_results.get(worker_id)
@@ -1106,7 +1085,7 @@ class ColonyRuntime:
return worker.info.result
try:
await asyncio.wait_for(asyncio.shield(worker._task_handle), timeout=timeout)
except asyncio.TimeoutError:
except TimeoutError:
return None
return worker.info.result
@@ -1147,9 +1126,7 @@ class ColonyRuntime:
if worker and worker.is_active:
loop = worker._agent_loop
if hasattr(loop, "inject_event"):
await loop.inject_event(
content, is_client_input=is_client_input, image_content=image_content
)
await loop.inject_event(content, is_client_input=is_client_input, image_content=image_content)
return True
return False
+1 -5
View File
@@ -446,11 +446,7 @@ class EventBus:
# iteration values. Without this, live SSE would use raw iterations
# while events.jsonl would use offset iterations, causing ID collisions
# on the frontend when replaying after cold resume.
if (
self._session_log_iteration_offset
and isinstance(event.data, dict)
and "iteration" in event.data
):
if self._session_log_iteration_offset and isinstance(event.data, dict) and "iteration" in event.data:
offset = self._session_log_iteration_offset
event.data = {**event.data, "iteration": event.data["iteration"] + offset}
+3 -9
View File
@@ -452,9 +452,7 @@ class ExecutionManager:
for executor in self._active_executors.values():
node = executor.node_registry.get(node_id)
if node is not None and hasattr(node, "inject_event"):
await node.inject_event(
content, is_client_input=is_client_input, image_content=image_content
)
await node.inject_event(content, is_client_input=is_client_input, image_content=image_content)
return True
return False
@@ -669,9 +667,7 @@ class ExecutionManager:
if self._runtime_log_store:
from framework.tracker.runtime_logger import RuntimeLogger
runtime_logger = RuntimeLogger(
store=self._runtime_log_store, agent_id=self.graph.id
)
runtime_logger = RuntimeLogger(store=self._runtime_log_store, agent_id=self.graph.id)
# Derive storage from session_store (graph-specific for secondary
# graphs) so that all files — conversations, state, checkpoints,
@@ -887,9 +883,7 @@ class ExecutionManager:
if has_result and result.paused_at:
await self._write_session_state(execution_id, ctx, result=result)
else:
await self._write_session_state(
execution_id, ctx, error="Execution cancelled"
)
await self._write_session_state(execution_id, ctx, error="Execution cancelled")
# Emit SSE event so the frontend knows the execution stopped.
# The executor does NOT emit on CancelledError, so there is no
-2
View File
@@ -2,8 +2,6 @@
import asyncio
import logging
import time
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
+2 -7
View File
@@ -136,9 +136,7 @@ class StreamDecisionTracker:
self._run_locks[execution_id] = asyncio.Lock()
self._current_nodes[execution_id] = "unknown"
logger.debug(
f"Started run {run_id} for execution {execution_id} in stream {self.stream_id}"
)
logger.debug(f"Started run {run_id} for execution {execution_id} in stream {self.stream_id}")
return run_id
def end_run(
@@ -334,10 +332,7 @@ class StreamDecisionTracker:
"""
run = self._runs.get(execution_id)
if run is None:
logger.warning(
f"report_problem called but no run for execution {execution_id}: "
f"[{severity}] {description}"
)
logger.warning(f"report_problem called but no run for execution {execution_id}: [{severity}] {description}")
return ""
return run.add_problem(
+1 -2
View File
@@ -89,8 +89,7 @@ class WebhookServer:
)
await self._site.start()
logger.info(
f"Webhook server started on {self._config.host}:{self._config.port} "
f"with {len(self._routes)} route(s)"
f"Webhook server started on {self._config.host}:{self._config.port} with {len(self._routes)} route(s)"
)
async def stop(self) -> None:
+7 -24
View File
@@ -92,9 +92,7 @@ class Worker:
# result.json, data). Required when seed_conversation() is used —
# we deliberately do NOT fall back to CWD, which previously caused
# conversation parts to leak into the process working directory.
self._storage_path: Path | None = (
Path(storage_path) if storage_path is not None else None
)
self._storage_path: Path | None = Path(storage_path) if storage_path is not None else None
self._task_handle: asyncio.Task | None = None
self._started_at: float = 0.0
self._result: WorkerResult | None = None
@@ -153,14 +151,10 @@ class Worker:
if result.success:
self.status = WorkerStatus.COMPLETED
self._result = self._build_result(
result, duration, default_status="success"
)
self._result = self._build_result(result, duration, default_status="success")
else:
self.status = WorkerStatus.FAILED
self._result = self._build_result(
result, duration, default_status="failed"
)
self._result = self._build_result(result, duration, default_status="failed")
await self._emit_terminal_events(result)
@@ -292,11 +286,7 @@ class Worker:
# EXECUTION_COMPLETED / EXECUTION_FAILED (backwards-compat)
if agent_result is not None:
lifecycle_type = (
EventType.EXECUTION_COMPLETED
if agent_result.success
else EventType.EXECUTION_FAILED
)
lifecycle_type = EventType.EXECUTION_COMPLETED if agent_result.success else EventType.EXECUTION_FAILED
await self._event_bus.publish(
AgentEvent(
type=lifecycle_type,
@@ -309,11 +299,7 @@ class Worker:
"task": self.task,
"success": agent_result.success,
"error": agent_result.error,
"output_keys": (
list(agent_result.output.keys())
if agent_result.output
else []
),
"output_keys": (list(agent_result.output.keys()) if agent_result.output else []),
},
)
)
@@ -348,9 +334,7 @@ class Worker:
async def start_background(self) -> None:
"""Spawn the worker's run() as an asyncio background task."""
self._task_handle = asyncio.create_task(
self.run(), name=f"worker:{self.id}"
)
self._task_handle = asyncio.create_task(self.run(), name=f"worker:{self.id}")
# Surface any exception that escapes run(); without this callback
# a crash here only becomes visible when stop() eventually awaits
# the handle (and is silently lost if stop() is never called).
@@ -406,8 +390,7 @@ class Worker:
"""
if self.status != WorkerStatus.PENDING:
raise RuntimeError(
f"seed_conversation must be called before start_background "
f"(worker {self.id} is {self.status})"
f"seed_conversation must be called before start_background (worker {self.id} is {self.status})"
)
# Write parts directly to the worker's on-disk conversation store
+1 -3
View File
@@ -50,9 +50,7 @@ class AnthropicProvider(LLMProvider):
# Delegate to LiteLLMProvider internally.
self.api_key = api_key or _get_api_key_from_credential_store()
if not self.api_key:
raise ValueError(
"Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass api_key."
)
raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass api_key.")
self.model = model
+8 -29
View File
@@ -53,17 +53,9 @@ _TOKEN_REFRESH_BUFFER_SECS = 60
# Credentials file in ~/.hive/ (native implementation)
_ACCOUNTS_FILE = Path.home() / ".hive" / "antigravity-accounts.json"
_IDE_STATE_DB_MAC = (
Path.home()
/ "Library"
/ "Application Support"
/ "Antigravity"
/ "User"
/ "globalStorage"
/ "state.vscdb"
)
_IDE_STATE_DB_LINUX = (
Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
Path.home() / "Library" / "Application Support" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
)
_IDE_STATE_DB_LINUX = Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
_IDE_STATE_DB_KEY = "antigravityUnifiedStateSync.oauthToken"
_BASE_HEADERS: dict[str, str] = {
@@ -368,9 +360,7 @@ def _to_gemini_contents(
def _map_finish_reason(reason: str) -> str:
return {"STOP": "stop", "MAX_TOKENS": "max_tokens", "OTHER": "tool_use"}.get(
(reason or "").upper(), "stop"
)
return {"STOP": "stop", "MAX_TOKENS": "max_tokens", "OTHER": "tool_use"}.get((reason or "").upper(), "stop")
def _parse_complete_response(raw: dict[str, Any], model: str) -> LLMResponse:
@@ -538,8 +528,7 @@ class AntigravityProvider(LLMProvider):
return self._access_token
raise RuntimeError(
"No valid Antigravity credentials. "
"Run: uv run python core/antigravity_auth.py auth account add"
"No valid Antigravity credentials. Run: uv run python core/antigravity_auth.py auth account add"
)
# --- Request building -------------------------------------------------- #
@@ -593,11 +582,7 @@ class AntigravityProvider(LLMProvider):
token = self._ensure_token()
body_bytes = json.dumps(body).encode("utf-8")
path = (
"/v1internal:streamGenerateContent?alt=sse"
if streaming
else "/v1internal:generateContent"
)
path = "/v1internal:streamGenerateContent?alt=sse" if streaming else "/v1internal:generateContent"
headers = {
**_BASE_HEADERS,
"Authorization": f"Bearer {token}",
@@ -619,9 +604,7 @@ class AntigravityProvider(LLMProvider):
if result:
self._access_token, self._token_expires_at = result
headers["Authorization"] = f"Bearer {self._access_token}"
req2 = urllib.request.Request(
url, data=body_bytes, headers=headers, method="POST"
)
req2 = urllib.request.Request(url, data=body_bytes, headers=headers, method="POST")
try:
return urllib.request.urlopen(req2, timeout=120) # noqa: S310
except urllib.error.HTTPError as exc2:
@@ -642,9 +625,7 @@ class AntigravityProvider(LLMProvider):
last_exc = exc
continue
raise RuntimeError(
f"All Antigravity endpoints failed. Last error: {last_exc}"
) from last_exc
raise RuntimeError(f"All Antigravity endpoints failed. Last error: {last_exc}") from last_exc
# --- LLMProvider interface --------------------------------------------- #
@@ -683,9 +664,7 @@ class AntigravityProvider(LLMProvider):
try:
body = self._build_body(messages, system, tools, max_tokens)
http_resp = self._post(body, streaming=True)
for event in _parse_sse_stream(
http_resp, self.model, self._thought_sigs.__setitem__
):
for event in _parse_sse_stream(http_resp, self.model, self._thought_sigs.__setitem__):
loop.call_soon_threadsafe(queue.put_nowait, event)
except Exception as exc:
logger.error("Antigravity stream error: %s", exc)
+21 -62
View File
@@ -100,9 +100,7 @@ def _patch_litellm_anthropic_oauth() -> None:
result["authorization"] = f"Bearer {token}"
# Merge the OAuth beta header with any existing beta headers.
existing_beta = result.get("anthropic-beta", "")
beta_parts = (
[b.strip() for b in existing_beta.split(",") if b.strip()] if existing_beta else []
)
beta_parts = [b.strip() for b in existing_beta.split(",") if b.strip()] if existing_beta else []
if ANTHROPIC_OAUTH_BETA_HEADER not in beta_parts:
beta_parts.append(ANTHROPIC_OAUTH_BETA_HEADER)
result["anthropic-beta"] = ",".join(beta_parts)
@@ -262,9 +260,7 @@ def _claude_code_billing_header(messages: list[dict[str, Any]]) -> str:
break
sampled = "".join(_sample_js_code_unit(first_text, i) for i in (4, 7, 20))
version_hash = hashlib.sha256(
f"{_CLAUDE_CODE_BILLING_SALT}{sampled}{CLAUDE_CODE_VERSION}".encode()
).hexdigest()
version_hash = hashlib.sha256(f"{_CLAUDE_CODE_BILLING_SALT}{sampled}{CLAUDE_CODE_VERSION}".encode()).hexdigest()
entrypoint = os.environ.get("CLAUDE_CODE_ENTRYPOINT", "").strip() or "cli"
return (
f"x-anthropic-billing-header: cc_version={CLAUDE_CODE_VERSION}.{version_hash[:3]}; "
@@ -336,9 +332,7 @@ def _prune_failed_request_dumps(max_files: int = MAX_FAILED_REQUEST_DUMPS) -> No
def _remember_openrouter_tool_compat_model(model: str) -> None:
"""Cache OpenRouter tool-compat fallback for a bounded time window."""
OPENROUTER_TOOL_COMPAT_MODEL_CACHE[model] = (
time.monotonic() + OPENROUTER_TOOL_COMPAT_CACHE_TTL_SECONDS
)
OPENROUTER_TOOL_COMPAT_MODEL_CACHE[model] = time.monotonic() + OPENROUTER_TOOL_COMPAT_CACHE_TTL_SECONDS
def _is_openrouter_tool_compat_cached(model: str) -> bool:
@@ -746,20 +740,14 @@ class LiteLLMProvider(LLMProvider):
eh.setdefault("user-agent", CLAUDE_CODE_USER_AGENT)
# The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
# several standard OpenAI params: max_output_tokens, stream_options.
self._codex_backend = bool(
self.api_base and "chatgpt.com/backend-api/codex" in self.api_base
)
self._codex_backend = bool(self.api_base and "chatgpt.com/backend-api/codex" in self.api_base)
# Antigravity routes through a local OpenAI-compatible proxy — no patches needed.
self._antigravity = bool(self.api_base and "localhost:8069" in self.api_base)
if litellm is None:
raise ImportError(
"LiteLLM is not installed. Please install it with: uv pip install litellm"
)
raise ImportError("LiteLLM is not installed. Please install it with: uv pip install litellm")
def reconfigure(
self, model: str, api_key: str | None = None, api_base: str | None = None
) -> None:
def reconfigure(self, model: str, api_key: str | None = None, api_base: str | None = None) -> None:
"""Hot-swap the model, API key, and/or base URL on this provider instance.
Since the same LiteLLMProvider object is shared by reference across the
@@ -784,9 +772,7 @@ class LiteLLMProvider(LLMProvider):
if self._claude_code_oauth:
eh = self.extra_kwargs.setdefault("extra_headers", {})
eh.setdefault("user-agent", CLAUDE_CODE_USER_AGENT)
self._codex_backend = bool(
self.api_base and "chatgpt.com/backend-api/codex" in self.api_base
)
self._codex_backend = bool(self.api_base and "chatgpt.com/backend-api/codex" in self.api_base)
self._antigravity = bool(self.api_base and "localhost:8069" in self.api_base)
# Note: The Codex ChatGPT backend is a Responses API endpoint at
@@ -809,9 +795,7 @@ class LiteLLMProvider(LLMProvider):
return HIVE_API_BASE
return None
def _completion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
def _completion_with_rate_limit_retry(self, max_retries: int | None = None, **kwargs: Any) -> Any:
"""Call litellm.completion with retry on 429 rate limit errors and empty responses.
When a :class:`KeyPool` is configured, rate-limited keys are rotated
@@ -843,15 +827,10 @@ class LiteLLMProvider(LLMProvider):
None,
)
if last_role == "assistant":
logger.debug(
"[retry] Empty response after assistant message — "
"expected, not retrying."
)
logger.debug("[retry] Empty response after assistant message — expected, not retrying.")
return response
finish_reason = (
response.choices[0].finish_reason if response.choices else "unknown"
)
finish_reason = response.choices[0].finish_reason if response.choices else "unknown"
# Dump full request to file for debugging
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
@@ -1050,9 +1029,7 @@ class LiteLLMProvider(LLMProvider):
# Async variants — non-blocking on the event loop
# ------------------------------------------------------------------
async def _acompletion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
async def _acompletion_with_rate_limit_retry(self, max_retries: int | None = None, **kwargs: Any) -> Any:
"""Async version of _completion_with_rate_limit_retry.
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
@@ -1078,15 +1055,10 @@ class LiteLLMProvider(LLMProvider):
None,
)
if last_role == "assistant":
logger.debug(
"[async-retry] Empty response after assistant message — "
"expected, not retrying."
)
logger.debug("[async-retry] Empty response after assistant message — expected, not retrying.")
return response
finish_reason = (
response.choices[0].finish_reason if response.choices else "unknown"
)
finish_reason = response.choices[0].finish_reason if response.choices else "unknown"
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
model=model,
@@ -1370,8 +1342,7 @@ class LiteLLMProvider(LLMProvider):
)
return text_tool_content, text_tool_calls
logger.info(
"[openrouter-tool-compat] %s returned non-JSON fallback content; "
"treating it as plain text.",
"[openrouter-tool-compat] %s returned non-JSON fallback content; treating it as plain text.",
self.model,
)
return content.strip(), []
@@ -1523,9 +1494,7 @@ class LiteLLMProvider(LLMProvider):
)
return repaired
raise ValueError(
f"Failed to parse tool call arguments for '{tool_name}' (likely truncated JSON)."
)
raise ValueError(f"Failed to parse tool call arguments for '{tool_name}' (likely truncated JSON).")
def _parse_openrouter_text_tool_calls(
self,
@@ -1682,11 +1651,7 @@ class LiteLLMProvider(LLMProvider):
return [
message
for message in full_messages
if not (
message.get("role") == "assistant"
and not message.get("content")
and not message.get("tool_calls")
)
if not (message.get("role") == "assistant" and not message.get("content") and not message.get("tool_calls"))
]
async def _acomplete_via_openrouter_tool_compat(
@@ -1914,8 +1879,8 @@ class LiteLLMProvider(LLMProvider):
if logger.isEnabledFor(logging.DEBUG) and full_messages:
import json as _json
from pathlib import Path as _Path
from datetime import datetime as _dt
from pathlib import Path as _Path
_debug_dir = _Path.home() / ".hive" / "debug_logs"
_debug_dir.mkdir(parents=True, exist_ok=True)
@@ -1939,9 +1904,7 @@ class LiteLLMProvider(LLMProvider):
}
)
try:
_dump_file.write_text(
_json.dumps(_summary, indent=2, ensure_ascii=False), encoding="utf-8"
)
_dump_file.write_text(_json.dumps(_summary, indent=2, ensure_ascii=False), encoding="utf-8")
logger.debug("[LLM-MSG] %d messages dumped to %s", len(full_messages), _dump_file)
except Exception:
pass
@@ -1966,9 +1929,7 @@ class LiteLLMProvider(LLMProvider):
full_messages = [
m
for m in full_messages
if not (
m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls")
)
if not (m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls"))
]
kwargs: dict[str, Any] = {
@@ -2154,8 +2115,7 @@ class LiteLLMProvider(LLMProvider):
else getattr(usage, "cache_read_input_tokens", 0) or 0
)
logger.debug(
"[tokens] finish-chunk usage: "
"input=%d output=%d cached=%d model=%s",
"[tokens] finish-chunk usage: input=%d output=%d cached=%d model=%s",
input_tokens,
output_tokens,
cached_tokens,
@@ -2202,8 +2162,7 @@ class LiteLLMProvider(LLMProvider):
else getattr(_usage, "cache_read_input_tokens", 0) or 0
)
logger.debug(
"[tokens] post-loop chunks fallback:"
" input=%d output=%d cached=%d model=%s",
"[tokens] post-loop chunks fallback: input=%d output=%d cached=%d model=%s",
input_tokens,
output_tokens,
cached_tokens,
+7 -23
View File
@@ -50,9 +50,7 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
if not isinstance(model_id, str) or not model_id.strip():
raise ModelCatalogError(f"{model_path}.id must be a non-empty string")
if model_id in seen_model_ids:
raise ModelCatalogError(
f"Duplicate model id {model_id!r} in {provider_path}.models"
)
raise ModelCatalogError(f"Duplicate model id {model_id!r} in {provider_path}.models")
seen_model_ids.add(model_id)
if model_id == default_model:
@@ -91,17 +89,11 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
api_base = preset_map.get("api_base")
if api_base is not None and (not isinstance(api_base, str) or not api_base.strip()):
raise ModelCatalogError(
f"{preset_path}.api_base must be a non-empty string when present"
)
raise ModelCatalogError(f"{preset_path}.api_base must be a non-empty string when present")
api_key_env_var = preset_map.get("api_key_env_var")
if api_key_env_var is not None and (
not isinstance(api_key_env_var, str) or not api_key_env_var.strip()
):
raise ModelCatalogError(
f"{preset_path}.api_key_env_var must be a non-empty string when present"
)
if api_key_env_var is not None and (not isinstance(api_key_env_var, str) or not api_key_env_var.strip()):
raise ModelCatalogError(f"{preset_path}.api_key_env_var must be a non-empty string when present")
for key in ("max_tokens", "max_context_tokens"):
value = preset_map.get(key)
@@ -110,9 +102,7 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
model_choices = preset_map.get("model_choices")
if model_choices is not None:
for idx, choice in enumerate(
_require_list(model_choices, f"{preset_path}.model_choices")
):
for idx, choice in enumerate(_require_list(model_choices, f"{preset_path}.model_choices")):
choice_path = f"{preset_path}.model_choices[{idx}]"
choice_map = _require_mapping(choice, choice_path)
choice_id = choice_map.get("id")
@@ -144,19 +134,13 @@ def load_model_catalog() -> dict[str, Any]:
def get_models_catalogue() -> dict[str, list[dict[str, Any]]]:
"""Return provider -> model list."""
providers = load_model_catalog()["providers"]
return {
provider_id: copy.deepcopy(provider_info["models"])
for provider_id, provider_info in providers.items()
}
return {provider_id: copy.deepcopy(provider_info["models"]) for provider_id, provider_info in providers.items()}
def get_default_models() -> dict[str, str]:
"""Return provider -> default model id."""
providers = load_model_catalog()["providers"]
return {
provider_id: str(provider_info["default_model"])
for provider_id, provider_info in providers.items()
}
return {provider_id: str(provider_info["default_model"]) for provider_id, provider_info in providers.items()}
def get_provider_models(provider: str) -> list[dict[str, Any]]:
+10 -33
View File
@@ -9,7 +9,7 @@ from datetime import UTC
from pathlib import Path
from typing import Any
from framework.config import get_hive_config, get_max_context_tokens, get_preferred_model
from framework.config import get_hive_config, get_preferred_model
from framework.credentials.validation import (
ensure_credential_key_env as _ensure_credential_key_env,
)
@@ -20,14 +20,12 @@ from framework.loader.preload_validation import run_preload_validation
from framework.loader.tool_registry import ToolRegistry
from framework.orchestrator import Goal
from framework.orchestrator.edge import (
DEFAULT_MAX_TOKENS,
EdgeCondition,
EdgeSpec,
GraphSpec,
)
from framework.orchestrator.node import NodeSpec
from framework.orchestrator.orchestrator import ExecutionResult
from framework.tools.flowchart_utils import generate_fallback_flowchart
logger = logging.getLogger(__name__)
@@ -555,18 +553,10 @@ def get_kimi_code_token() -> str | None:
# VSCode-style SQLite state database under the key
# "antigravityUnifiedStateSync.oauthToken" as a base64-encoded protobuf blob.
ANTIGRAVITY_IDE_STATE_DB = (
Path.home()
/ "Library"
/ "Application Support"
/ "Antigravity"
/ "User"
/ "globalStorage"
/ "state.vscdb"
Path.home() / "Library" / "Application Support" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
)
# Linux fallback for the IDE state DB
ANTIGRAVITY_IDE_STATE_DB_LINUX = (
Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
)
ANTIGRAVITY_IDE_STATE_DB_LINUX = Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb"
# Antigravity credentials stored by native OAuth implementation
ANTIGRAVITY_AUTH_FILE = Path.home() / ".hive" / "antigravity-accounts.json"
@@ -710,9 +700,7 @@ def _is_antigravity_token_expired(auth_data: dict) -> bool:
return True
elif isinstance(last_refresh_val, str):
try:
last_refresh_val = datetime.fromisoformat(
last_refresh_val.replace("Z", "+00:00")
).timestamp()
last_refresh_val = datetime.fromisoformat(last_refresh_val.replace("Z", "+00:00")).timestamp()
except (ValueError, TypeError):
return True
@@ -843,8 +831,7 @@ def get_antigravity_token() -> str | None:
return token_data["access_token"]
logger.warning(
"Antigravity token refresh failed. "
"Re-open the Antigravity IDE or run 'antigravity-auth accounts add'."
"Antigravity token refresh failed. Re-open the Antigravity IDE or run 'antigravity-auth accounts add'."
)
return access_token
@@ -1297,11 +1284,7 @@ class AgentLoader:
# Evict cached submodules first (e.g. deep_research_agent.nodes,
# deep_research_agent.agent) so the top-level reload picks up
# changes in the entire package — not just __init__.py.
stale = [
name
for name in sys.modules
if name == package_name or name.startswith(f"{package_name}.")
]
stale = [name for name in sys.modules if name == package_name or name.startswith(f"{package_name}.")]
for name in stale:
del sys.modules[name]
@@ -1350,7 +1333,7 @@ class AgentLoader:
if not worker_jsons:
raise FileNotFoundError(f"No worker config found in {agent_path}")
from framework.orchestrator.edge import EdgeSpec, GraphSpec
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
from framework.orchestrator.node import NodeSpec
@@ -1555,7 +1538,6 @@ class AgentLoader:
]
# Merge user-configured stages from ~/.hive/configuration.json
from framework.config import get_hive_config
from framework.pipeline.registry import build_pipeline_from_config
hive_config = get_hive_config()
@@ -1568,9 +1550,7 @@ class AgentLoader:
if agent_json.exists():
try:
agent_pipeline = (
_json.loads(agent_json.read_text(encoding="utf-8"))
.get("pipeline", {})
.get("stages", [])
_json.loads(agent_json.read_text(encoding="utf-8")).get("pipeline", {}).get("stages", [])
)
if agent_pipeline:
agent_stages = build_pipeline_from_config(agent_pipeline)
@@ -1986,8 +1966,7 @@ class AgentLoader:
for sc in self.goal.success_criteria
],
constraints=[
{"id": c.id, "description": c.description, "type": c.constraint_type}
for c in self.goal.constraints
{"id": c.id, "description": c.description, "type": c.constraint_type} for c in self.goal.constraints
],
required_tools=sorted(required_tools),
has_tools_module=(self.agent_path / "tools.py").exists(),
@@ -2058,9 +2037,7 @@ class AgentLoader:
if api_key_env and not os.environ.get(api_key_env):
if api_key_env not in missing_credentials:
missing_credentials.append(api_key_env)
warnings.append(
f"Agent has LLM nodes but {api_key_env} not set (model: {self.model})"
)
warnings.append(f"Agent has LLM nodes but {api_key_env} not set (model: {self.model})")
return ValidationResult(
valid=len(errors) == 0,
+15 -27
View File
@@ -25,7 +25,6 @@ from pathlib import Path
from typing import Any
from urllib import error as urlerror, parse as urlparse, request as urlrequest
# ---------------------------------------------------------------------------
# Public registration
# ---------------------------------------------------------------------------
@@ -127,10 +126,7 @@ def cmd_serve(args: argparse.Namespace) -> int:
def _request_shutdown(signame: str) -> None:
signal_count["n"] += 1
if signal_count["n"] == 1:
print(
f"\nReceived {signame}, shutting down gracefully… "
"(press Ctrl+C again to force quit)"
)
print(f"\nReceived {signame}, shutting down gracefully… (press Ctrl+C again to force quit)")
shutdown_event.set()
else:
# Second Ctrl+C (or SIGTERM) — the user is done waiting.
@@ -171,9 +167,7 @@ def cmd_serve(args: argparse.Namespace) -> int:
print(f"Colony not found: {colony_arg}")
continue
try:
session = await manager.create_session_with_worker_colony(
str(colony_path), model=model
)
session = await manager.create_session_with_worker_colony(str(colony_path), model=model)
info = session.worker_info
name = info.name if info else session.colony_id
print(f"Loaded colony: {session.colony_id} ({name}) → session {session.id}")
@@ -319,12 +313,14 @@ def cmd_queen_sessions(args: argparse.Namespace) -> int:
meta = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
meta = {}
rows.append({
rows.append(
{
"session_id": session_dir.name,
"phase": meta.get("phase", "?"),
"agent_path": meta.get("agent_path", ""),
"colony_fork": bool(meta.get("colony_fork")),
})
}
)
if args.json:
print(json.dumps(rows, indent=2))
@@ -398,18 +394,18 @@ def cmd_colony_list(args: argparse.Namespace) -> int:
except Exception:
meta = {}
worker_count = sum(
1
for f in path.iterdir()
if f.is_file() and f.suffix == ".json" and f.stem not in _RESERVED_JSON_STEMS
1 for f in path.iterdir() if f.is_file() and f.suffix == ".json" and f.stem not in _RESERVED_JSON_STEMS
)
rows.append({
rows.append(
{
"name": path.name,
"queen_name": meta.get("queen_name", ""),
"queen_session_id": meta.get("queen_session_id", ""),
"workers": worker_count,
"created_at": meta.get("created_at", ""),
"path": str(path),
})
}
)
if args.json:
print(json.dumps(rows, indent=2))
@@ -422,9 +418,7 @@ def cmd_colony_list(args: argparse.Namespace) -> int:
print(f"{'NAME':<24} {'QUEEN':<28} {'WORKERS':<8} CREATED")
print("-" * 90)
for r in rows:
print(
f"{r['name']:<24} {r['queen_name']:<28} {r['workers']:<8} {r['created_at'][:19]}"
)
print(f"{r['name']:<24} {r['queen_name']:<28} {r['workers']:<8} {r['created_at'][:19]}")
return 0
@@ -651,9 +645,7 @@ def _http_get(url: str, timeout: float = 10.0) -> dict:
def _http_post(url: str, body: dict, timeout: float = 30.0) -> dict:
data = json.dumps(body).encode("utf-8")
req = urlrequest.Request(
url, data=data, method="POST", headers={"Content-Type": "application/json"}
)
req = urlrequest.Request(url, data=data, method="POST", headers={"Content-Type": "application/json"})
with urlrequest.urlopen(req, timeout=timeout) as r:
return json.loads(r.read().decode("utf-8"))
@@ -709,9 +701,7 @@ def _open_browser(url: str) -> None:
try:
if sys.platform == "darwin":
subprocess.Popen(
["open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
subprocess.Popen(["open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
elif sys.platform == "win32":
subprocess.Popen(
["cmd", "/c", "start", "", url],
@@ -719,9 +709,7 @@ def _open_browser(url: str) -> None:
stderr=subprocess.DEVNULL,
)
elif sys.platform == "linux":
subprocess.Popen(
["xdg-open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
subprocess.Popen(["xdg-open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except Exception:
pass
+7 -18
View File
@@ -267,9 +267,7 @@ class MCPClient:
try:
response = self._http_client.get("/health")
response.raise_for_status()
logger.info(
f"Connected to MCP server '{self.config.name}' via HTTP at {self.config.url}"
)
logger.info(f"Connected to MCP server '{self.config.name}' via HTTP at {self.config.url}")
except Exception as e:
logger.warning(f"Health check failed for MCP server '{self.config.name}': {e}")
# Continue anyway, server might not have health endpoint
@@ -377,12 +375,8 @@ class MCPClient:
self._tools[tool.name] = tool
tool_names = list(self._tools.keys())
logger.info(
f"Discovered {len(self._tools)} tools from '{self.config.name}'"
)
logger.debug(
f"Discovered tools from '{self.config.name}': {tool_names}"
)
logger.info(f"Discovered {len(self._tools)} tools from '{self.config.name}'")
logger.debug(f"Discovered tools from '{self.config.name}': {tool_names}")
except Exception as e:
logger.error(f"Failed to discover tools from '{self.config.name}': {e}")
raise
@@ -467,6 +461,7 @@ class MCPClient:
)
if self.config.transport == "stdio":
def _stdio_call() -> Any:
with self._stdio_call_lock:
return self._run_async(self._call_tool_stdio_async(tool_name, arguments))
@@ -669,9 +664,7 @@ class MCPClient:
if self._session:
await self._session.__aexit__(None, None, None)
except asyncio.CancelledError:
logger.warning(
"MCP session cleanup was cancelled; proceeding with best-effort shutdown"
)
logger.warning("MCP session cleanup was cancelled; proceeding with best-effort shutdown")
except Exception as e:
logger.warning(f"Error closing MCP session: {e}")
finally:
@@ -682,9 +675,7 @@ class MCPClient:
if self._stdio_context:
await self._stdio_context.__aexit__(None, None, None)
except asyncio.CancelledError:
logger.debug(
"STDIO context cleanup was cancelled; proceeding with best-effort shutdown"
)
logger.debug("STDIO context cleanup was cancelled; proceeding with best-effort shutdown")
except Exception as e:
msg = str(e).lower()
if "cancel scope" in msg or "different task" in msg:
@@ -725,9 +716,7 @@ class MCPClient:
# any exceptions that may occur if the loop stops between these calls.
if self._loop.is_running():
try:
cleanup_future = asyncio.run_coroutine_threadsafe(
self._cleanup_stdio_async(), self._loop
)
cleanup_future = asyncio.run_coroutine_threadsafe(self._cleanup_stdio_async(), self._loop)
cleanup_future.result(timeout=self._CLEANUP_TIMEOUT)
cleanup_attempted = True
except TimeoutError:
@@ -74,8 +74,7 @@ class MCPConnectionManager:
if not should_connect:
if not transition_event.wait(timeout=_TRANSITION_TIMEOUT):
logger.warning(
"Timed out waiting for transition on MCP server '%s', "
"forcing cleanup and retrying",
"Timed out waiting for transition on MCP server '%s', forcing cleanup and retrying",
server_name,
)
with self._pool_lock:
@@ -99,10 +98,7 @@ class MCPConnectionManager:
current = self._transitions.get(server_name)
if current is transition_event:
self._transitions.pop(server_name, None)
if (
server_name not in self._pool
and self._refcounts.get(server_name, 0) <= 0
):
if server_name not in self._pool and self._refcounts.get(server_name, 0) <= 0:
self._configs.pop(server_name, None)
transition_event.set()
raise
@@ -324,8 +320,7 @@ class MCPConnectionManager:
self._transitions.pop(server_name, None)
transition_event.set()
logger.info(
"Reconnected MCP server '%s' but refcount dropped to 0, "
"discarding new client",
"Reconnected MCP server '%s' but refcount dropped to 0, discarding new client",
server_name,
)
try:
@@ -336,9 +331,7 @@ class MCPConnectionManager:
server_name,
exc_info=True,
)
raise KeyError(
f"MCP server '{server_name}' was fully released during reconnect"
)
raise KeyError(f"MCP server '{server_name}' was fully released during reconnect")
self._pool[server_name] = new_client
self._configs[server_name] = config
@@ -380,8 +373,7 @@ class MCPConnectionManager:
all_resolved = all(event.wait(timeout=_TRANSITION_TIMEOUT) for event in pending)
if not all_resolved:
logger.warning(
"Timed out waiting for pending transitions during cleanup, "
"forcing cleanup of stuck transitions",
"Timed out waiting for pending transitions during cleanup, forcing cleanup of stuck transitions",
)
with self._pool_lock:
for sn, evt in list(self._transitions.items()):
+1 -3
View File
@@ -23,9 +23,7 @@ class MCPError(ValueError):
self.what = what
self.why = why
self.fix = fix
self.message = (
f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}"
)
self.message = f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}"
super().__init__(self.message)
+3 -8
View File
@@ -24,9 +24,7 @@ from framework.loader.mcp_errors import (
logger = logging.getLogger(__name__)
DEFAULT_INDEX_URL = (
"https://raw.githubusercontent.com/aden-hive/hive-mcp-registry/main/registry_index.json"
)
DEFAULT_INDEX_URL = "https://raw.githubusercontent.com/aden-hive/hive-mcp-registry/main/registry_index.json"
DEFAULT_REFRESH_INTERVAL_HOURS = 24
_LAST_FETCHED_FILENAME = "last_fetched"
_LEGACY_LAST_FETCHED_FILENAME = "last_fetched.json"
@@ -140,9 +138,7 @@ class MCPRegistry:
)
added.append(name)
except MCPError as exc:
logger.warning(
"MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc
)
logger.warning("MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc)
if added:
logger.info("MCPRegistry: seeded default local servers: %s", added)
@@ -709,8 +705,7 @@ class MCPRegistry:
pinned_version = versions[name]
if installed_version != pinned_version:
logger.warning(
"Server '%s' version mismatch: installed=%s, pinned=%s. "
"Run: hive mcp update %s",
"Server '%s' version mismatch: installed=%s, pinned=%s. Run: hive mcp update %s",
name,
installed_version,
pinned_version,
+11 -30
View File
@@ -151,10 +151,7 @@ def _parse_key_value_pairs(values: list[str]) -> dict[str, str]:
result = {}
for item in values:
if "=" not in item:
raise ValueError(
f"Invalid format: '{item}'. Expected KEY=VALUE.\n"
f"Example: --set JIRA_API_TOKEN=abc123"
)
raise ValueError(f"Invalid format: '{item}'. Expected KEY=VALUE.\nExample: --set JIRA_API_TOKEN=abc123")
key, _, value = item.partition("=")
if not key:
raise ValueError(f"Invalid format: '{item}'. Key cannot be empty.")
@@ -300,12 +297,8 @@ def register_mcp_commands(subparsers) -> None:
# ── install ──
install_p = mcp_sub.add_parser("install", help="Install a server from the registry")
install_p.add_argument("name", help="Server name in the registry")
install_p.add_argument(
"--version", dest="version", default=None, help="Pin to a specific version"
)
install_p.add_argument(
"--transport", default=None, help="Override default transport (stdio, http, unix, sse)"
)
install_p.add_argument("--version", dest="version", default=None, help="Pin to a specific version")
install_p.add_argument("--transport", default=None, help="Override default transport (stdio, http, unix, sse)")
install_p.set_defaults(func=cmd_mcp_install)
# ── add ──
@@ -342,9 +335,7 @@ def register_mcp_commands(subparsers) -> None:
# ── list ──
list_p = mcp_sub.add_parser("list", help="List servers")
list_p.add_argument(
"--available", action="store_true", help="Show available servers from registry"
)
list_p.add_argument("--available", action="store_true", help="Show available servers from registry")
list_p.add_argument("--json", dest="output_json", action="store_true", help="Output as JSON")
list_p.set_defaults(func=cmd_mcp_list)
@@ -364,9 +355,7 @@ def register_mcp_commands(subparsers) -> None:
metavar="KEY=VAL",
help="Set environment variable overrides",
)
config_p.add_argument(
"--set-header", dest="set_header", nargs="+", metavar="KEY=VAL", help="Set header overrides"
)
config_p.add_argument("--set-header", dest="set_header", nargs="+", metavar="KEY=VAL", help="Set header overrides")
config_p.set_defaults(func=cmd_mcp_config)
# ── search ──
@@ -389,9 +378,7 @@ def register_mcp_commands(subparsers) -> None:
init_p.set_defaults(func=cmd_mcp_init)
# ── update ──
update_p = mcp_sub.add_parser(
"update", help="Update installed servers or refresh the registry index"
)
update_p = mcp_sub.add_parser("update", help="Update installed servers or refresh the registry index")
update_p.add_argument(
"name",
nargs="?",
@@ -495,8 +482,7 @@ def _cmd_mcp_add_from_manifest(registry, manifest_path: str) -> int:
manifest = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
print(
f"Error: invalid JSON in {manifest_path}: {exc}\n"
f"Validate with: python -m json.tool {manifest_path}",
f"Error: invalid JSON in {manifest_path}: {exc}\nValidate with: python -m json.tool {manifest_path}",
file=sys.stderr,
)
return 1
@@ -695,8 +681,7 @@ def cmd_mcp_config(args) -> int:
server = registry.get_server(args.name)
if server is None:
print(
f"Error: server '{args.name}' is not installed.\n"
f"Run 'hive mcp list' to see installed servers.",
f"Error: server '{args.name}' is not installed.\nRun 'hive mcp list' to see installed servers.",
file=sys.stderr,
)
return 1
@@ -822,8 +807,7 @@ def cmd_mcp_update(args) -> int:
count = registry.update_index()
except Exception as exc:
print(
f"Error: failed to update registry index: {exc}\n"
f"Check your network connection and try again.",
f"Error: failed to update registry index: {exc}\nCheck your network connection and try again.",
file=sys.stderr,
)
return 1
@@ -832,9 +816,7 @@ def cmd_mcp_update(args) -> int:
# Step 2: update all installed registry servers (skip local/pinned)
installed = registry.list_installed()
registry_servers = [
s for s in installed if s.get("source") == "registry" and not s.get("pinned")
]
registry_servers = [s for s in installed if s.get("source") == "registry" and not s.get("pinned")]
if not registry_servers:
return 0
@@ -862,8 +844,7 @@ def _cmd_mcp_update_server(name: str, registry=None) -> int:
server = registry.get_server(name)
if server is None:
print(
f"Error: server '{name}' is not installed.\n"
f"Run 'hive mcp install {name}' to install it.",
f"Error: server '{name}' is not installed.\nRun 'hive mcp install {name}' to install it.",
file=sys.stderr,
)
return 1
+1 -3
View File
@@ -98,9 +98,7 @@ def validate_credentials(
if not result.success:
# Preserve the original validation_result so callers can
# inspect which credentials are still missing.
exc = CredentialError(
"Credential setup incomplete. Run again after configuring the required credentials."
)
exc = CredentialError("Credential setup incomplete. Run again after configuring the required credentials.")
if hasattr(e, "validation_result"):
exc.validation_result = e.validation_result # type: ignore[attr-defined]
if hasattr(e, "failed_cred_names"):
+8 -28
View File
@@ -257,10 +257,7 @@ class ToolRegistry:
str(e),
)
return {
"error": (
f"Invalid JSON response from tool '{tool_name}': "
f"{str(e)}"
),
"error": (f"Invalid JSON response from tool '{tool_name}': {str(e)}"),
"raw_content": result.content,
}
return result
@@ -435,9 +432,7 @@ class ToolRegistry:
registry = ToolRegistry()
return registry._resolve_mcp_server_config(server_config, base_dir)
def _resolve_mcp_server_config(
self, server_config: dict[str, Any], base_dir: Path
) -> dict[str, Any]:
def _resolve_mcp_server_config(self, server_config: dict[str, Any], base_dir: Path) -> dict[str, Any]:
"""Resolve cwd and script paths for MCP stdio servers (Windows compatibility).
On Windows, passing cwd to subprocess can cause WinError 267. We use cwd=None
@@ -552,8 +547,7 @@ class ToolRegistry:
server_list = [{"name": name, **cfg} for name, cfg in config.items()]
resolved_server_list = [
self._resolve_mcp_server_config(server_config, base_dir)
for server_config in server_list
self._resolve_mcp_server_config(server_config, base_dir) for server_config in server_list
]
# Ordered first-wins for duplicate tool names across servers; keep tools.py tools.
self.load_registry_servers(
@@ -757,9 +751,7 @@ class ToolRegistry:
if preserve_existing_tools and mcp_tool.name in self._tools:
if log_collisions:
origin_server = (
self._find_mcp_origin_server_for_tool(mcp_tool.name) or "<existing>"
)
origin_server = self._find_mcp_origin_server_for_tool(mcp_tool.name) or "<existing>"
logger.warning(
"MCP tool '%s' from '%s' shadowed by '%s' (loaded first)",
mcp_tool.name,
@@ -788,17 +780,11 @@ class ToolRegistry:
base_context.update(exec_ctx)
# Only inject context params the tool accepts
filtered_context = {
k: v for k, v in base_context.items() if k in tool_params
}
filtered_context = {k: v for k, v in base_context.items() if k in tool_params}
# Strip context params from LLM inputs — the framework
# values are authoritative (prevents the LLM from passing
# e.g. data_dir="/data" and overriding the real path).
clean_inputs = {
k: v
for k, v in inputs.items()
if k not in registry_ref.CONTEXT_PARAMS
}
clean_inputs = {k: v for k, v in inputs.items() if k not in registry_ref.CONTEXT_PARAMS}
merged_inputs = {**clean_inputs, **filtered_context}
result = client_ref.call_tool(tool_name, merged_inputs)
# MCP client already extracts content (returns str
@@ -885,9 +871,7 @@ class ToolRegistry:
contents are already logged by `register_mcp_server`; this is just the
rollup so the resync path also gets a single anchor line.
"""
per_server_counts = {
server: len(names) for server, names in self._mcp_server_tools.items()
}
per_server_counts = {server: len(names) for server, names in self._mcp_server_tools.items()}
non_mcp_count = len(self._tools) - len(self._mcp_tool_names)
logger.info(
"ToolRegistry snapshot (%s): total=%d, mcp=%d, non_mcp=%d, per_server=%s",
@@ -958,11 +942,7 @@ class ToolRegistry:
adapter = CredentialStoreAdapter.default()
tool_provider_map = adapter.get_tool_provider_map()
live_providers = {
a.get("provider", "")
for a in adapter.get_all_account_info()
if a.get("provider")
}
live_providers = {a.get("provider", "") for a in adapter.get_all_account_info() if a.get("provider")}
except Exception:
logger.debug("Credential snapshot unavailable for MCP gate", exc_info=True)
@@ -50,11 +50,7 @@ class CheckpointConfig:
Returns:
True if should check for old checkpoints and prune them
"""
return (
self.enabled
and self.prune_every_n_nodes > 0
and nodes_executed % self.prune_every_n_nodes == 0
)
return self.enabled and self.prune_every_n_nodes > 0 and nodes_executed % self.prune_every_n_nodes == 0
# Default configuration for most agents
+1 -3
View File
@@ -175,9 +175,7 @@ def _resolve_available_tools(
return always_tools
declared = set(node_spec.tools)
declared_tools = [
t for t in tools if t.name in declared and t.name not in _ALWAYS_AVAILABLE_TOOLS
]
declared_tools = [t for t in tools if t.name in declared and t.name not in _ALWAYS_AVAILABLE_TOOLS]
return always_tools + declared_tools
@@ -169,11 +169,7 @@ class ContextHandoff:
key_hint = ""
if output_keys:
key_hint = (
"\nThe following output keys are especially important: "
+ ", ".join(output_keys)
+ ".\n"
)
key_hint = "\nThe following output keys are especially important: " + ", ".join(output_keys) + ".\n"
system_prompt = (
"You are a concise summarizer. Given the conversation below, "
+5 -14
View File
@@ -186,8 +186,7 @@ class EdgeSpec(BaseModel):
expr_vars = {
k: repr(context[k])
for k in context
if k not in ("output", "buffer", "result", "true", "false")
and k in self.condition_expr
if k not in ("output", "buffer", "result", "true", "false") and k in self.condition_expr
}
logger.info(
" Edge %s: condition '%s'%s (vars: %s)",
@@ -333,12 +332,8 @@ class GraphSpec(BaseModel):
default_factory=dict,
description="Named entry points for resuming execution. Format: {name: node_id}",
)
terminal_nodes: list[str] = Field(
default_factory=list, description="IDs of nodes that end execution"
)
pause_nodes: list[str] = Field(
default_factory=list, description="IDs of nodes that pause execution for HITL input"
)
terminal_nodes: list[str] = Field(default_factory=list, description="IDs of nodes that end execution")
pause_nodes: list[str] = Field(default_factory=list, description="IDs of nodes that pause execution for HITL input")
# Components
nodes: list[Any] = Field( # NodeSpec, but avoiding circular import
@@ -347,9 +342,7 @@ class GraphSpec(BaseModel):
edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
# Data buffer keys
buffer_keys: list[str] = Field(
default_factory=list, description="Keys available in data buffer"
)
buffer_keys: list[str] = Field(default_factory=list, description="Keys available in data buffer")
# Default LLM settings
default_model: str = "claude-haiku-4-5-20251001"
@@ -557,9 +550,7 @@ class GraphSpec(BaseModel):
fan_outs = self.detect_fan_out_nodes()
for source_id, targets in fan_outs.items():
event_loop_targets = [
t
for t in targets
if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop"
t for t in targets if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop"
]
if len(event_loop_targets) > 1:
seen_keys: dict[str, str] = {}
+6 -18
View File
@@ -41,13 +41,9 @@ class SuccessCriterion(BaseModel):
id: str
description: str = Field(description="Human-readable description of what success looks like")
metric: str = Field(
description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'"
)
metric: str = Field(description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'")
# NEW: runtime evaluation type (separate from metric)
type: str = Field(
default="success_rate", description="Runtime evaluation type, e.g. 'success_rate'"
)
type: str = Field(default="success_rate", description="Runtime evaluation type, e.g. 'success_rate'")
target: Any = Field(description="The target value or condition")
weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)")
@@ -67,15 +63,9 @@ class Constraint(BaseModel):
id: str
description: str
constraint_type: str = Field(
description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)"
)
category: str = Field(
default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'"
)
check: str = Field(
default="", description="How to check: expression, function name, or 'llm_judge'"
)
constraint_type: str = Field(description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)")
category: str = Field(default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'")
check: str = Field(default="", description="How to check: expression, function name, or 'llm_judge'")
model_config = {"extra": "allow"}
@@ -142,9 +132,7 @@ class Goal(BaseModel):
# Input/output schema
input_schema: dict[str, Any] = Field(default_factory=dict, description="Expected input format")
output_schema: dict[str, Any] = Field(
default_factory=dict, description="Expected output format"
)
output_schema: dict[str, Any] = Field(default_factory=dict, description="Expected output format")
# Versioning for evolution
version: str = "1.0.0"
+5 -13
View File
@@ -129,15 +129,13 @@ class NodeSpec(BaseModel):
input_schema: dict[str, dict] = Field(
default_factory=dict,
description=(
"Optional schema for input validation. "
"Format: {key: {type: 'string', required: True, description: '...'}}"
"Optional schema for input validation. Format: {key: {type: 'string', required: True, description: '...'}}"
),
)
output_schema: dict[str, dict] = Field(
default_factory=dict,
description=(
"Optional schema for output validation. "
"Format: {key: {type: 'dict', required: True, description: '...'}}"
"Optional schema for output validation. Format: {key: {type: 'dict', required: True, description: '...'}}"
),
)
@@ -153,19 +151,13 @@ class NodeSpec(BaseModel):
"'none' = no tools at all."
),
)
model: str | None = Field(
default=None, description="Specific model to use (defaults to graph default)"
)
model: str | None = Field(default=None, description="Specific model to use (defaults to graph default)")
# For function nodes
function: str | None = Field(
default=None, description="Function name or path for function nodes"
)
function: str | None = Field(default=None, description="Function name or path for function nodes")
# For router nodes
routes: dict[str, str] = Field(
default_factory=dict, description="Condition -> target_node_id mapping for routers"
)
routes: dict[str, str] = Field(default_factory=dict, description="Condition -> target_node_id mapping for routers")
# Retry behavior
max_retries: int = Field(default=3)
+7 -20
View File
@@ -379,9 +379,7 @@ class NodeWorker:
# Failure
if attempt + 1 < total_attempts:
gc.retry_counts[self.node_spec.id] = (
gc.retry_counts.get(self.node_spec.id, 0) + 1
)
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
gc.nodes_with_retries.add(self.node_spec.id)
delay = 1.0 * (2**attempt)
logger.warning(
@@ -411,9 +409,7 @@ class NodeWorker:
except Exception as exc:
if attempt + 1 < total_attempts:
gc.retry_counts[self.node_spec.id] = (
gc.retry_counts.get(self.node_spec.id, 0) + 1
)
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
gc.nodes_with_retries.add(self.node_spec.id)
delay = 1.0 * (2**attempt)
logger.warning(
@@ -469,9 +465,7 @@ class NodeWorker:
if len(conditionals) > 1:
max_prio = max(e.priority for e in conditionals)
traversable = [
e
for e in traversable
if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
e for e in traversable if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
]
# When parallel execution is disabled, follow first match only (sequential)
@@ -541,9 +535,7 @@ class NodeWorker:
logger.warning("Worker %s output validation warnings: %s", node_spec.id, errors)
# Determine if this worker is a fan-out branch
is_fanout_branch = any(
tag.via_branch == node_spec.id for tag in self._inherited_fan_out_tags
)
is_fanout_branch = any(tag.via_branch == node_spec.id for tag in self._inherited_fan_out_tags)
# Collect keys to write: declared output_keys + any extra output items
# (for fan-out branches, all output items need conflict checking)
@@ -642,9 +634,7 @@ class NodeWorker:
self._node_impl = node
return node
raise RuntimeError(
f"No implementation for node '{self.node_spec.id}' (type: {self.node_spec.node_type})"
)
raise RuntimeError(f"No implementation for node '{self.node_spec.id}' (type: {self.node_spec.node_type})")
def _build_node_context(self) -> NodeContext:
"""Build NodeContext for this worker's execution."""
@@ -749,9 +739,7 @@ class NodeWorker:
inherited_conversation=gc.continuous_conversation,
narrative=narrative,
)
gc.continuous_conversation.update_system_prompt(
build_system_prompt_for_node_context(next_ctx)
)
gc.continuous_conversation.update_system_prompt(build_system_prompt_for_node_context(next_ctx))
gc.continuous_conversation.set_current_phase(next_spec.id)
buffer_items, data_files = self._prepare_transition_payload()
@@ -799,8 +787,7 @@ class NodeWorker:
file_path.write_text(write_content, encoding="utf-8")
file_size = file_path.stat().st_size
buffer_items[key] = (
f"[Saved to '{filename}' ({file_size:,} bytes). "
f"Use read_file(path='{filename}') to access.]"
f"[Saved to '{filename}' ({file_size:,} bytes). Use read_file(path='{filename}') to access.]"
)
continue
except Exception:
+35 -110
View File
@@ -202,9 +202,7 @@ class Orchestrator:
self.validator = OutputValidator()
self.logger = logging.getLogger(__name__)
self.logger.debug(
"[Orchestrator.__init__] Created with"
" stream_id=%s, execution_id=%s,"
" initial node_registry keys: %s",
"[Orchestrator.__init__] Created with stream_id=%s, execution_id=%s, initial node_registry keys: %s",
stream_id,
execution_id,
list(self.node_registry.keys()),
@@ -347,8 +345,7 @@ class Orchestrator:
missing = [t for t in declared if t not in available_tool_names]
if missing:
self.logger.warning(
"Node '%s' (id=%s) declares %d tools not in this runtime; "
"stripping them and continuing: %s",
"Node '%s' (id=%s) declares %d tools not in this runtime; stripping them and continuing: %s",
node.name,
node.id,
len(missing),
@@ -391,10 +388,7 @@ class Orchestrator:
lines.append(f"[tool result]: {c}")
elif m.role == "assistant" and m.tool_calls:
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
lines.append(
f"[assistant (calls: {', '.join(names)})]: "
f"{m.content[:200] if m.content else ''}"
)
lines.append(f"[assistant (calls: {', '.join(names)})]: {m.content[:200] if m.content else ''}")
else:
lines.append(f"[{m.role}]: {m.content}")
formatted = "\n\n".join(lines)
@@ -565,8 +559,7 @@ class Orchestrator:
# [RESTORED] Type safety check
if not isinstance(buffer_data, dict):
self.logger.warning(
f"⚠️ Invalid data buffer type in session state: "
f"{type(buffer_data).__name__}, expected dict"
f"⚠️ Invalid data buffer type in session state: {type(buffer_data).__name__}, expected dict"
)
else:
# Restore buffer from previous session.
@@ -590,8 +583,7 @@ class Orchestrator:
# contains all state including the original input, and re-writing
# input_data would overwrite intermediate results with stale values.
_is_resuming = bool(
session_state
and (session_state.get("paused_at") or session_state.get("resume_from_checkpoint"))
session_state and (session_state.get("paused_at") or session_state.get("resume_from_checkpoint"))
)
if input_data and not _is_resuming:
for key, value in input_data.items():
@@ -616,11 +608,7 @@ class Orchestrator:
# If resuming at a specific node (paused_at), that node was counted
# but never completed, so decrement its count
paused_at = session_state.get("paused_at")
if (
paused_at
and paused_at in node_visit_counts
and node_visit_counts[paused_at] > 0
):
if paused_at and paused_at in node_visit_counts and node_visit_counts[paused_at] > 0:
old_count = node_visit_counts[paused_at]
node_visit_counts[paused_at] -= 1
self.logger.info(
@@ -636,10 +624,7 @@ class Orchestrator:
checkpoint = await checkpoint_store.load_checkpoint(checkpoint_id)
if checkpoint:
self.logger.info(
f"🔄 Resuming from checkpoint: {checkpoint_id} "
f"(node: {checkpoint.current_node})"
)
self.logger.info(f"🔄 Resuming from checkpoint: {checkpoint_id} (node: {checkpoint.current_node})")
checkpoint_run_id = checkpoint.run_id or LEGACY_RUN_ID
self._run_id = checkpoint_run_id
@@ -648,9 +633,7 @@ class Orchestrator:
buffer.write(key, value, validate=False)
# Start from checkpoint's next node or current node
current_node_id = (
checkpoint.next_node or checkpoint.current_node or graph.entry_node
)
current_node_id = checkpoint.next_node or checkpoint.current_node or graph.entry_node
# Restore execution path
path.extend(checkpoint.execution_path)
@@ -660,16 +643,11 @@ class Orchestrator:
f"resuming at node: {current_node_id}"
)
else:
self.logger.warning(
f"Checkpoint {checkpoint_id} not found, resuming from normal entry point"
)
self.logger.warning(f"Checkpoint {checkpoint_id} not found, resuming from normal entry point")
current_node_id = graph.get_entry_point(session_state)
except Exception as e:
self.logger.error(
f"Failed to load checkpoint {checkpoint_id}: {e}, "
f"resuming from normal entry point"
)
self.logger.error(f"Failed to load checkpoint {checkpoint_id}: {e}, resuming from normal entry point")
current_node_id = graph.get_entry_point(session_state)
else:
current_node_id = graph.get_entry_point(session_state)
@@ -757,20 +735,14 @@ class Orchestrator:
"human_input": "event_loop", # Use queen interaction / escalation instead
}
def _get_node_implementation(
self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
) -> NodeProtocol:
def _get_node_implementation(self, node_spec: NodeSpec, cleanup_llm_model: str | None = None) -> NodeProtocol:
"""Get or create a node implementation."""
# Check registry first
if node_spec.id in self.node_registry:
logger.debug(
"[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
)
logger.debug("[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id)
return self.node_registry[node_spec.id]
logger.debug(
"[Orchestrator._get_node_implementation]"
" Node '%s' not in registry (keys: %s),"
" creating new",
"[Orchestrator._get_node_implementation] Node '%s' not in registry (keys: %s), creating new",
node_spec.id,
list(self.node_registry.keys()),
)
@@ -840,9 +812,7 @@ class Orchestrator:
# Cache so inject_event() is reachable for queen interaction and escalation routing
self.node_registry[node_spec.id] = node
logger.debug(
"[Orchestrator._get_node_implementation]"
" Cached node '%s' in node_registry,"
" registry now has keys: %s",
"[Orchestrator._get_node_implementation] Cached node '%s' in node_registry, registry now has keys: %s",
node_spec.id,
list(self.node_registry.keys()),
)
@@ -925,9 +895,7 @@ class Orchestrator:
if len(conditionals) > 1:
max_prio = max(e.priority for e in conditionals)
traversable = [
e
for e in traversable
if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
e for e in traversable if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
]
return traversable
@@ -1090,9 +1058,7 @@ class Orchestrator:
execution_id=self._execution_id,
)
self.logger.info(
f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})"
)
self.logger.info(f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})")
result = await node_impl.execute(ctx)
last_result = result
@@ -1153,19 +1119,13 @@ class Orchestrator:
)
return branch, result
self.logger.warning(
f" ↻ Branch {node_spec.name}: "
f"retry {attempt + 1}/{effective_max_retries}"
)
self.logger.warning(f" ↻ Branch {node_spec.name}: retry {attempt + 1}/{effective_max_retries}")
# All retries exhausted
branch.status = "failed"
branch.error = last_result.error if last_result else "Unknown error"
branch.result = last_result
self.logger.error(
f" ✗ Branch {node_spec.name}: "
f"failed after {effective_max_retries} attempts"
)
self.logger.error(f" ✗ Branch {node_spec.name}: failed after {effective_max_retries} attempts")
return branch, last_result
except Exception as e:
@@ -1208,10 +1168,7 @@ class Orchestrator:
# Branch timed out
branch.status = "timed_out"
branch.error = f"Branch timed out after {timeout}s"
self.logger.warning(
f" ⏱ Branch {graph.get_node(branch.node_id).name}: "
f"timed out after {timeout}s"
)
self.logger.warning(f" ⏱ Branch {graph.get_node(branch.node_id).name}: timed out after {timeout}s")
path.append(branch.node_id)
failed_branches.append(branch)
elif isinstance(result, Exception):
@@ -1235,13 +1192,9 @@ class Orchestrator:
if self._parallel_config.on_branch_failure == "fail_all":
raise RuntimeError(f"Parallel execution failed: branches {failed_names} failed")
elif self._parallel_config.on_branch_failure == "continue_others":
self.logger.warning(
f"⚠ Some branches failed ({failed_names}), continuing with successful ones"
)
self.logger.warning(f"⚠ Some branches failed ({failed_names}), continuing with successful ones")
self.logger.info(
f" ⑃ Fan-out complete: {len(branch_results)}/{len(branches)} branches succeeded"
)
self.logger.info(f" ⑃ Fan-out complete: {len(branch_results)}/{len(branches)} branches succeeded")
return branch_results, total_tokens, total_latency
def register_node(self, node_id: str, implementation: NodeProtocol) -> None:
@@ -1432,15 +1385,10 @@ class Orchestrator:
return True
if not terminal_worker_ids:
# No terminals: check if all workers are done
return all(
w.lifecycle in (WorkerLifecycle.COMPLETED, WorkerLifecycle.FAILED)
for w in workers.values()
)
return all(w.lifecycle in (WorkerLifecycle.COMPLETED, WorkerLifecycle.FAILED) for w in workers.values())
if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()):
return False
return any(
tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids
)
return any(tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids)
def _mark_quiescent_terminal_failure() -> bool:
nonlocal execution_error
@@ -1448,22 +1396,15 @@ class Orchestrator:
return False
if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()):
return False
if any(
tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids
):
if any(tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids):
return False
execution_error = (
"Worker execution ended before terminal nodes completed: "
f"{sorted(terminal_worker_ids)}"
)
execution_error = f"Worker execution ended before terminal nodes completed: {sorted(terminal_worker_ids)}"
self.logger.error(execution_error)
return True
# Track fan-out branch workers for per-branch timeout enforcement
_fanout_branch_tasks: dict[str, asyncio.Task] = {} # worker_id → timeout-wrapper task
branch_timeout = (
self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0
)
branch_timeout = self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0
def _route_activation(
activation: Activation,
@@ -1498,9 +1439,7 @@ class Orchestrator:
target_worker.activate(inherited_tags=activation.fan_out_tags)
if target_worker._task is not None:
# Fan-out branch: wrap with timeout
is_fanout_branch = any(
tag.via_branch == activation.target_id for tag in activation.fan_out_tags
)
is_fanout_branch = any(tag.via_branch == activation.target_id for tag in activation.fan_out_tags)
if is_fanout_branch and branch_timeout > 0:
timed_task = asyncio.ensure_future(
asyncio.wait_for(target_worker._task, timeout=branch_timeout)
@@ -1555,9 +1494,7 @@ class Orchestrator:
if completion.conversation is not None:
gc.continuous_conversation = completion.conversation
self.logger.info(
f" ✓ Worker completed: {worker_id} ({len(activations)} outgoing activation(s))"
)
self.logger.info(f" ✓ Worker completed: {worker_id} ({len(activations)} outgoing activation(s))")
# Route activations to target workers
for activation in activations:
@@ -1598,9 +1535,7 @@ class Orchestrator:
completion_event.set()
# Subscribe to events (only if event bus has subscribe capability)
has_event_subscription = self._event_bus is not None and hasattr(
self._event_bus, "subscribe"
)
has_event_subscription = self._event_bus is not None and hasattr(self._event_bus, "subscribe")
if has_event_subscription:
sub_completed = self._event_bus.subscribe(
event_types=[EventType.WORKER_COMPLETED],
@@ -1642,14 +1577,12 @@ class Orchestrator:
)
if unresolved_terminals:
execution_error = (
"Worker execution ended before terminal nodes completed: "
f"{unresolved_terminals}"
f"Worker execution ended before terminal nodes completed: {unresolved_terminals}"
)
self.logger.error(execution_error)
else:
execution_error = (
"Worker execution ended before all workers reached "
"a terminal lifecycle state"
"Worker execution ended before all workers reached a terminal lifecycle state"
)
self.logger.error(execution_error)
break
@@ -1680,10 +1613,7 @@ class Orchestrator:
task_error = exc
# Check for fan-out branch timeout
if (
isinstance(task_error, asyncio.TimeoutError)
and wid in _fanout_branch_tasks
):
if isinstance(task_error, asyncio.TimeoutError) and wid in _fanout_branch_tasks:
error = f"Branch failed (timed out after {branch_timeout}s)"
failed_workers[wid] = error
worker.lifecycle = WorkerLifecycle.FAILED
@@ -1727,10 +1657,7 @@ class Orchestrator:
src_spec = graph.get_node(wid)
if src_spec and src_spec.tools:
for t in self.tools:
if (
t.name in src_spec.tools
and t.name not in gc.cumulative_tool_names
):
if t.name in src_spec.tools and t.name not in gc.cumulative_tool_names:
gc.cumulative_tools.append(t)
gc.cumulative_tool_names.add(t.name)
if src_spec and src_spec.output_keys:
@@ -1741,8 +1668,7 @@ class Orchestrator:
gc.continuous_conversation = completion_conversation
self.logger.info(
f" ✓ Worker completed: {wid} "
f"({len(outgoing_activations)} outgoing activation(s))"
f" ✓ Worker completed: {wid} ({len(outgoing_activations)} outgoing activation(s))"
)
# Route activations
@@ -1787,8 +1713,7 @@ class Orchestrator:
error = str(task_error)
else:
error = (
"Worker task completed without publishing a completion "
f"(lifecycle={worker.lifecycle})"
f"Worker task completed without publishing a completion (lifecycle={worker.lifecycle})"
)
failed_workers[wid] = error
@@ -97,15 +97,12 @@ def build_transition_marker(
file_path = data_path / filename
try:
write_content = (
json.dumps(value, indent=2, ensure_ascii=False)
if isinstance(value, (dict, list))
else str(value)
json.dumps(value, indent=2, ensure_ascii=False) if isinstance(value, (dict, list)) else str(value)
)
file_path.write_text(write_content, encoding="utf-8")
file_size = file_path.stat().st_size
buffer_items[key] = (
f"[Saved to '{filename}' ({file_size:,} bytes). "
f"Use read_file(path='{filename}') to access.]"
f"[Saved to '{filename}' ({file_size:,} bytes). Use read_file(path='{filename}') to access.]"
)
except Exception:
buffer_items[key] = val_str[:300] + "..."
+4 -11
View File
@@ -177,18 +177,12 @@ def build_prompt_spec_from_node_context(
# Tool-gated pre-activation: inject full body of default skills whose
# trigger tools are present in this node's tool list (e.g. browser_*
# pulls in hive.browser-automation).
tool_names = [
getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])
]
skills_catalog_prompt = augment_catalog_for_tools(
ctx.skills_catalog_prompt or "", tool_names
)
tool_names = [getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])]
skills_catalog_prompt = augment_catalog_for_tools(ctx.skills_catalog_prompt or "", tool_names)
return NodePromptSpec(
identity_prompt=ctx.identity_prompt or "",
focus_prompt=focus_prompt
if focus_prompt is not None
else (ctx.node_spec.system_prompt or ""),
focus_prompt=focus_prompt if focus_prompt is not None else (ctx.node_spec.system_prompt or ""),
narrative=narrative if narrative is not None else (ctx.narrative or ""),
accounts_prompt=ctx.accounts_prompt or "",
skills_catalog_prompt=skills_catalog_prompt,
@@ -299,8 +293,7 @@ def build_transition_message(spec: TransitionSpec) -> str:
if spec.data_files:
sections.append(
"\nData files (use read_file to access):\n"
+ "\n".join(f" {entry}" for entry in spec.data_files)
"\nData files (use read_file to access):\n" + "\n".join(f" {entry}" for entry in spec.data_files)
)
if spec.cumulative_tool_names:
+1 -5
View File
@@ -169,11 +169,7 @@ class SafeEvalVisitor(ast.NodeVisitor):
return tuple(self.visit(elt) for elt in node.elts)
def visit_Dict(self, node: ast.Dict) -> dict:
return {
self.visit(k): self.visit(v)
for k, v in zip(node.keys, node.values, strict=False)
if k is not None
}
return {self.visit(k): self.visit(v) for k, v in zip(node.keys, node.values, strict=False) if k is not None}
# --- Operations ---
def visit_BinOp(self, node: ast.BinOp) -> Any:
+2 -6
View File
@@ -120,9 +120,7 @@ class OutputValidator:
nullable_keys = nullable_keys or []
if not isinstance(output, dict):
return ValidationResult(
success=False, errors=[f"Output is not a dict, got {type(output).__name__}"]
)
return ValidationResult(success=False, errors=[f"Output is not a dict, got {type(output).__name__}"])
for key in expected_keys:
if key not in output:
@@ -237,9 +235,7 @@ class OutputValidator:
# Check for overly long values
if len(value) > max_length:
errors.append(
f"Output key '{key}' exceeds max length ({len(value)} > {max_length})"
)
errors.append(f"Output key '{key}' exceeds max length ({len(value)} > {max_length})")
return ValidationResult(success=len(errors) == 0, errors=errors)
+1 -3
View File
@@ -27,8 +27,6 @@ class CostGuardStage(PipelineStage):
if estimated > self._budget:
return PipelineResult(
action="reject",
rejection_reason=(
f"Estimated cost ${estimated:.4f} exceeds budget ${self._budget:.4f}"
),
rejection_reason=(f"Estimated cost ${estimated:.4f} exceeds budget ${self._budget:.4f}"),
)
return PipelineResult(action="continue")
@@ -40,8 +40,7 @@ class InputValidationStage(PipelineStage):
return PipelineResult(
action="reject",
rejection_reason=(
f"Input key '{key}' has type {type(value).__name__}, "
f"expected {expected_type.__name__}"
f"Input key '{key}' has type {type(value).__name__}, expected {expected_type.__name__}"
),
)
return PipelineResult(action="continue")
+1 -3
View File
@@ -35,9 +35,7 @@ class RateLimitStage(PipelineStage):
if len(self._timestamps[key]) >= self._max_rpm:
return PipelineResult(
action="reject",
rejection_reason=(
f"Rate limit exceeded: {self._max_rpm} req/min for session '{session_id}'"
),
rejection_reason=(f"Rate limit exceeded: {self._max_rpm} req/min for session '{session_id}'"),
)
self._timestamps[key].append(now)
return PipelineResult(action="continue")
+4 -12
View File
@@ -25,9 +25,7 @@ class GoalStatus(StrEnum):
class SuccessCriterion(BaseModel):
id: str
description: str = Field(description="Human-readable description of what success looks like")
metric: str = Field(
description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'"
)
metric: str = Field(description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'")
type: str = Field(default="success_rate", description="Runtime evaluation type")
target: Any = Field(description="The target value or condition")
weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)")
@@ -39,15 +37,9 @@ class SuccessCriterion(BaseModel):
class Constraint(BaseModel):
id: str
description: str
constraint_type: str = Field(
description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)"
)
category: str = Field(
default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'"
)
check: str = Field(
default="", description="How to check: expression, function name, or 'llm_judge'"
)
constraint_type: str = Field(description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)")
category: str = Field(default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'")
check: str = Field(default="", description="How to check: expression, function name, or 'llm_judge'")
model_config = {"extra": "allow"}
+2 -6
View File
@@ -237,9 +237,7 @@ class SessionState(BaseModel):
progress=SessionProgress(
current_node=result.paused_at or (result.path[-1] if result.path else None),
paused_at=result.paused_at,
resume_from=result.session_state.get("resume_from")
if result.session_state
else None,
resume_from=result.session_state.get("resume_from") if result.session_state else None,
steps_executed=result.steps_executed,
total_tokens=result.total_tokens,
total_latency_ms=result.total_latency_ms,
@@ -256,9 +254,7 @@ class SessionState(BaseModel):
error=result.error,
output=result.output,
),
data_buffer=result.session_state.get(
"data_buffer", result.session_state.get("memory", {})
)
data_buffer=result.session_state.get("data_buffer", result.session_state.get("memory", {}))
if result.session_state
else {},
input_data=input_data or {},
+5 -12
View File
@@ -56,8 +56,7 @@ def validate_agent_path(agent_path: str | Path) -> Path:
if resolved.is_relative_to(root) and resolved != root:
return resolved
raise ValueError(
"agent_path must be inside an allowed directory "
"(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
"agent_path must be inside an allowed directory (~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
)
@@ -186,9 +185,7 @@ async def handle_browser_status(request: web.Request) -> web.Response:
status_port = bridge_port + 1
try:
reader, writer = await asyncio.wait_for(
asyncio.open_connection("127.0.0.1", status_port), timeout=0.5
)
reader, writer = await asyncio.wait_for(asyncio.open_connection("127.0.0.1", status_port), timeout=0.5)
writer.write(b"GET /status HTTP/1.0\r\nHost: 127.0.0.1\r\n\r\n")
await writer.drain()
raw = await asyncio.wait_for(reader.read(512), timeout=0.5)
@@ -232,9 +229,7 @@ def create_app(model: str | None = None) -> web.Application:
from framework.credentials.key_storage import generate_and_save_credential_key
generate_and_save_credential_key()
logger.info(
"Generated and persisted HIVE_CREDENTIAL_KEY to ~/.hive/secrets/credential_key"
)
logger.info("Generated and persisted HIVE_CREDENTIAL_KEY to ~/.hive/secrets/credential_key")
except Exception as exc:
logger.warning("Could not auto-persist HIVE_CREDENTIAL_KEY: %s", exc)
@@ -274,9 +269,7 @@ def create_app(model: str | None = None) -> web.Application:
log_collisions=True,
max_tools=selection_max_tools,
)
logger.info(
"Pre-loaded queen tool registry with %d tools", len(_queen_tool_registry.get_tools())
)
logger.info("Pre-loaded queen tool registry with %d tools", len(_queen_tool_registry.get_tools()))
except Exception as e:
logger.warning("Failed to pre-load queen tool registry: %s", e)
@@ -297,11 +290,11 @@ def create_app(model: str | None = None) -> web.Application:
from framework.server.routes_credentials import register_routes as register_credential_routes
from framework.server.routes_events import register_routes as register_event_routes
from framework.server.routes_execution import register_routes as register_execution_routes
from framework.server.routes_workers import register_routes as register_worker_routes
from framework.server.routes_logs import register_routes as register_log_routes
from framework.server.routes_messages import register_routes as register_message_routes
from framework.server.routes_queens import register_routes as register_queen_routes
from framework.server.routes_sessions import register_routes as register_session_routes
from framework.server.routes_workers import register_routes as register_worker_routes
register_config_routes(app)
register_credential_routes(app)
+27 -46
View File
@@ -13,6 +13,8 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.agent_loop.internals.types import HookContext, HookResult
from framework.loader.tool_registry import ToolRegistry
from framework.server.session_manager import Session
logger = logging.getLogger(__name__)
@@ -64,8 +66,7 @@ def install_worker_escalation_routing(
try:
await runtime.inject_input(
worker_id,
"[QUEEN_REPLY] queue_full — queen inbox saturated; "
"proceed with best judgment or retry later.",
"[QUEEN_REPLY] queue_full — queen inbox saturated; proceed with best judgment or retry later.",
)
except Exception:
logger.warning(
@@ -100,24 +101,16 @@ def install_worker_escalation_routing(
lines.append(context_text)
if request_id:
lines.append(
"Use reply_to_worker(request_id, reply) to unblock, "
"or list_worker_questions() to see all pending."
"Use reply_to_worker(request_id, reply) to unblock, or list_worker_questions() to see all pending."
)
else:
lines.append(
"No request_id — use inject_message(content=...) to relay "
"guidance manually."
)
lines.append("No request_id — use inject_message(content=...) to relay guidance manually.")
handoff = "\n".join(lines)
# Fallback: if the queen loop has gone away, publish a
# CLIENT_INPUT_REQUESTED so the human sees the question and the
# worker does not wedge.
queen_node = (
session.queen_executor.node_registry.get("queen")
if session.queen_executor is not None
else None
)
queen_node = session.queen_executor.node_registry.get("queen") if session.queen_executor is not None else None
if queen_node is None or not hasattr(queen_node, "inject_event"):
if session.event_bus is not None:
await session.event_bus.emit_client_input_requested(
@@ -141,9 +134,7 @@ def install_worker_escalation_routing(
filter_colony=runtime.colony_id,
)
except Exception:
logger.warning(
"Failed to install colony-scoped escalation sub", exc_info=True
)
logger.warning("Failed to install colony-scoped escalation sub", exc_info=True)
# fall through to session bus
if session.event_bus is None:
return None
@@ -174,14 +165,12 @@ def _build_credentials_provider() -> Any:
def _provider() -> str:
now = time.monotonic()
if (
state["cached"]
and (now - state["cached_at"]) < _CREDENTIALS_BLOCK_TTL_SECONDS
):
if state["cached"] and (now - state["cached_at"]) < _CREDENTIALS_BLOCK_TTL_SECONDS:
return state["cached"]
try:
from aden_tools.credentials.store_adapter import CredentialStoreAdapter
from framework.orchestrator.prompting import build_accounts_prompt
adapter = CredentialStoreAdapter.default()
@@ -313,8 +302,8 @@ async def create_queen(
_shared_building_knowledge,
finalize_queen_prompt,
)
from framework.llm.capabilities import supports_image_tool_results
from framework.host.event_bus import AgentEvent, EventType
from framework.llm.capabilities import supports_image_tool_results
from framework.loader.mcp_registry import MCPRegistry
from framework.loader.tool_registry import ToolRegistry
from framework.tools.queen_lifecycle_tools import (
@@ -326,9 +315,7 @@ async def create_queen(
# Use pre-loaded cached registry if available (fast path)
if tool_registry is not None:
queen_registry = tool_registry
logger.info(
"Queen: using pre-loaded tool registry with %d tools", len(queen_registry.get_tools())
)
logger.info("Queen: using pre-loaded tool registry with %d tools", len(queen_registry.get_tools()))
else:
# Build fresh (slow path - for backwards compatibility)
queen_registry = ToolRegistry()
@@ -456,13 +443,9 @@ async def create_queen(
# Independent phase gets core tools + all MCP tools not claimed by any
# other phase (coder-tools file I/O, gcu-tools browser, etc.).
all_phase_names = (
planning_names | building_names | staging_names | running_names | editing_names
)
all_phase_names = planning_names | building_names | staging_names | running_names | editing_names
mcp_tools = [t for t in queen_tools if t.name not in all_phase_names]
phase_state.independent_tools = [
t for t in queen_tools if t.name in independent_names
] + mcp_tools
phase_state.independent_tools = [t for t in queen_tools if t.name in independent_names] + mcp_tools
logger.info(
"Queen: independent tools: %s",
sorted(t.name for t in phase_state.independent_tools),
@@ -494,9 +477,7 @@ async def create_queen(
# Resolve vision-only prompt sections based on the session's LLM.
# session.llm is immutable for the session's lifetime, so this check
# is stable — prompts never need to be recomposed mid-session.
_has_vision = bool(
session.llm and supports_image_tool_results(getattr(session.llm, "model", ""))
)
_has_vision = bool(session.llm and supports_image_tool_results(getattr(session.llm, "model", "")))
_planning_body = (
_queen_character_core
@@ -625,6 +606,14 @@ async def create_queen(
)
async def _queen_identity_hook(ctx: HookContext) -> HookResult | None:
from framework.agent_loop.internals.types import HookResult
from framework.agents.queen.queen_profiles import (
ensure_default_queens,
format_queen_identity_prompt,
load_queen_profile,
select_queen,
)
ensure_default_queens()
trigger = ctx.trigger or ""
# If the session was pre-bound to a queen (user clicked a specific
@@ -676,18 +665,12 @@ async def create_queen(
try:
_meta = _json.loads(_meta_path.read_text(encoding="utf-8"))
_meta["queen_id"] = queen_id
_meta_path.write_text(
_json.dumps(_meta, ensure_ascii=False), encoding="utf-8"
)
_meta_path.write_text(_json.dumps(_meta, ensure_ascii=False), encoding="utf-8")
except (OSError, _json.JSONDecodeError):
pass
# Re-point event bus log to new location, preserving offset
_offset = getattr(
session.event_bus, "_session_log_iteration_offset", 0
)
session.event_bus.set_session_log(
_new_dir / "events.jsonl", iteration_offset=_offset
)
_offset = getattr(session.event_bus, "_session_log_iteration_offset", 0)
session.event_bus.set_session_log(_new_dir / "events.jsonl", iteration_offset=_offset)
if _session_event_bus is not None:
await _session_event_bus.publish(
@@ -742,7 +725,7 @@ async def create_queen(
logger.debug("Queen: tools not yet available (registered on worker load): %s", missing)
node_updates["tools"] = available_tools
adjusted_node = _orig_node.model_copy(update=node_updates)
_orig_node.model_copy(update=node_updates)
# Determine session mode:
# - RESTORE: Resume cold session with history, no initial prompt -> wait for user
@@ -897,9 +880,7 @@ async def create_queen(
# bootstrap: if the frontend doesn't pass initial_prompt, we must
# NOT invent a phantom "Hello" — that used to concatenate with the
# real first chat message and confuse the model.
ctx.input_data = {
"user_request": None if _is_restore_mode else (initial_prompt or None)
}
ctx.input_data = {"user_request": None if _is_restore_mode else (initial_prompt or None)}
# Publish the initial prompt as a CLIENT_INPUT_RECEIVED event so
# it appears in the SSE stream and persists to events.jsonl for
+4 -12
View File
@@ -281,9 +281,7 @@ def _get_subscription_token(sub_id: str) -> str | None:
return None
def _hot_swap_sessions(
request: web.Request, full_model: str, api_key: str | None, api_base: str | None
) -> int:
def _hot_swap_sessions(request: web.Request, full_model: str, api_key: str | None, api_base: str | None) -> int:
"""Hot-swap the LLM on all running sessions. Returns count of swapped sessions.
Also refreshes the SessionManager's default model so that subsequent
@@ -363,9 +361,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
# ── Subscription mode ────────────────────────────────────────
sub = _SUBSCRIPTION_MAP.get(subscription_id)
if not sub:
return web.json_response(
{"error": f"Unknown subscription: {subscription_id}"}, status=400
)
return web.json_response({"error": f"Unknown subscription: {subscription_id}"}, status=400)
preset = get_preset(subscription_id)
# Subscriptions use the fixed model from their preset (no model switching)
@@ -432,9 +428,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
provider = body.get("provider")
model = body.get("model")
if not provider or not model:
return web.json_response(
{"error": "Both 'provider' and 'model' are required"}, status=400
)
return web.json_response({"error": "Both 'provider' and 'model' are required"}, status=400)
# Look up token limits from catalogue
model_info = _find_model_info(provider, model)
@@ -552,9 +546,7 @@ def _update_user_profile_memory(display_name: str, about: str) -> None:
content = build_memory_document(
name="User Profile",
description=f"User identity: {display_name}"
if display_name
else "User profile information",
description=f"User identity: {display_name}" if display_name else "User profile information",
mem_type="profile",
body=new_body if new_body else "No profile information yet.",
)
+21 -25
View File
@@ -216,9 +216,7 @@ async def handle_check_agent(request: web.Request) -> web.Response:
ensure_credential_key_env()
nodes = load_agent_nodes(agent_path)
result = validate_agent_credentials(
nodes, verify=verify, raise_on_error=False, force_refresh=True
)
result = validate_agent_credentials(nodes, verify=verify, raise_on_error=False, force_refresh=True)
# If any credential needs Aden, include ADEN_API_KEY as a first-class row
if any(c.aden_supported for c in result.credentials):
@@ -291,13 +289,15 @@ def _collect_accounts_by_provider() -> dict[str, list[dict]]:
provider = acct.get("provider", "")
if not provider:
continue
grouped.setdefault(provider, []).append({
grouped.setdefault(provider, []).append(
{
"provider": provider,
"alias": acct.get("alias", ""),
"identity": acct.get("identity", {}) or {},
"source": acct.get("source", "aden"),
"credential_id": acct.get("credential_id", provider),
})
}
)
return grouped
except Exception:
logger.debug("Failed to collect accounts for specs response", exc_info=True)
@@ -327,17 +327,17 @@ async def handle_resync_credentials(request: web.Request) -> web.Response:
loop = asyncio.get_running_loop()
# _presync_aden_tokens makes blocking HTTP calls to the Aden server.
await loop.run_in_executor(
None, lambda: _presync_aden_tokens(CREDENTIAL_SPECS, force=True)
)
await loop.run_in_executor(None, lambda: _presync_aden_tokens(CREDENTIAL_SPECS, force=True))
_invalidate_queen_credentials_cache(request)
accounts_by_provider = _collect_accounts_by_provider()
return web.json_response({
return web.json_response(
{
"synced": True,
"accounts_by_provider": accounts_by_provider,
})
}
)
except Exception as exc:
logger.exception("Error during credential resync: %s", exc)
return web.json_response(
@@ -366,9 +366,7 @@ async def handle_list_specs(request: web.Request) -> web.Response:
_presync_aden_tokens(CREDENTIAL_SPECS)
# Build composite store (env → encrypted file)
env_mapping = {
(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
}
env_mapping = {(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()}
env_storage = EnvVarStorage(env_mapping=env_mapping)
if os.environ.get("HIVE_CREDENTIAL_KEY"):
storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()])
@@ -396,7 +394,8 @@ async def handle_list_specs(request: web.Request) -> web.Response:
available = len(accounts) > 0
else:
available = store.is_available(cred_id)
specs.append({
specs.append(
{
"credential_name": name,
"credential_id": cred_id,
"env_var": spec.env_var,
@@ -410,7 +409,8 @@ async def handle_list_specs(request: web.Request) -> web.Response:
"credential_group": spec.credential_group,
"available": available,
"accounts": accounts,
})
}
)
# Include aden_api_key synthetic row if any spec uses Aden
if any_aden:
@@ -422,7 +422,9 @@ async def handle_list_specs(request: web.Request) -> web.Response:
"env_var": "ADEN_API_KEY",
"description": "API key from the Developers tab in Settings",
"help_url": "https://hive.adenhq.com/",
"api_key_instructions": "1. Go to hive.adenhq.com\n2. Open Settings > Developers\n3. Copy your API key",
"api_key_instructions": (
"1. Go to hive.adenhq.com\n2. Open Settings > Developers\n3. Copy your API key"
),
"tools": [],
"aden_supported": True,
"direct_api_key_supported": True,
@@ -459,16 +461,12 @@ async def handle_validate_key(request: web.Request) -> web.Response:
api_key = body.get("api_key", "").strip()
if not provider_id or not api_key:
return web.json_response(
{"error": "provider_id and api_key are required"}, status=400
)
return web.json_response({"error": "provider_id and api_key are required"}, status=400)
try:
checker = _get_llm_key_providers().get(provider_id)
if not checker:
return web.json_response(
{"valid": True, "message": f"No health check for {provider_id}"}
)
return web.json_response({"valid": True, "message": f"No health check for {provider_id}"})
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, lambda: checker(api_key))
@@ -476,9 +474,7 @@ async def handle_validate_key(request: web.Request) -> web.Response:
except Exception as exc:
logger.warning("LLM key validation failed for %s: %s", provider_id, exc)
return web.json_response(
{"valid": None, "message": f"Validation error: {exc}"}
)
return web.json_response({"valid": None, "message": f"Validation error: {exc}"})
def register_routes(app: web.Application) -> None:
+2 -6
View File
@@ -159,9 +159,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
sse = SSEResponse()
await sse.prepare(request)
logger.info(
"SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types)
)
logger.info("SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types))
# Replay buffered events that were published before this SSE connected.
# The EventBus keeps a history ring-buffer; we replay the subset that
@@ -215,9 +213,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
await sse.send_event(data)
event_count += 1
if event_count == 1:
logger.info(
"SSE first event: session='%s', type='%s'", session.id, data.get("type")
)
logger.info("SSE first event: session='%s', type='%s'", session.id, data.get("type"))
except TimeoutError:
try:
await sse.send_keepalive()
+22 -53
View File
@@ -3,6 +3,7 @@
import asyncio
import json
import logging
from datetime import UTC
from typing import Any
from aiohttp import web
@@ -117,9 +118,7 @@ async def handle_trigger(request: web.Request) -> web.Response:
if session.runner:
loop = asyncio.get_running_loop()
try:
await loop.run_in_executor(
None, lambda: validate_agent_credentials(session.runner.graph.nodes)
)
await loop.run_in_executor(None, lambda: validate_agent_credentials(session.runner.graph.nodes))
except Exception as e:
agent_path = str(session.worker_path) if session.worker_path else ""
resp = _credential_error_response(e, agent_path)
@@ -129,9 +128,7 @@ async def handle_trigger(request: web.Request) -> web.Response:
# Resync MCP servers if credentials were added since the worker loaded
# (e.g. user connected an OAuth account mid-session via Aden UI).
try:
await loop.run_in_executor(
None, lambda: session.runner._tool_registry.resync_mcp_servers_if_needed()
)
await loop.run_in_executor(None, lambda: session.runner._tool_registry.resync_mcp_servers_if_needed())
except Exception as e:
logger.warning("MCP resync failed: %s", e)
@@ -228,23 +225,14 @@ async def handle_chat(request: web.Request) -> web.Response:
type(queen_executor.node_registry),
id(queen_executor.node_registry),
)
logger.debug(
"[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys())
)
logger.debug("[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys()))
node = queen_executor.node_registry.get("queen")
logger.debug(
"[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None
)
logger.debug(
"[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False
)
logger.debug("[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None)
logger.debug("[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False)
# Race condition: executor exists but node not created yet (still initializing)
if node is None and session.queen_task is not None and not session.queen_task.done():
logger.warning(
"[handle_chat] Queen executor exists but node"
" not ready yet (initializing). Waiting..."
)
logger.warning("[handle_chat] Queen executor exists but node not ready yet (initializing). Waiting...")
# Wait a short time for initialization to progress
import asyncio
@@ -302,16 +290,12 @@ async def handle_chat(request: web.Request) -> web.Response:
)
else:
logger.error(
"[handle_chat] CRITICAL: Queen node exists"
" but missing inject_event!"
" node_attrs=%s",
"[handle_chat] CRITICAL: Queen node exists but missing inject_event! node_attrs=%s",
[a for a in dir(node) if not a.startswith("_")],
)
# Queen is dead — try to revive her
logger.warning(
"[handle_chat] Queen is dead for session '%s', reviving on /chat request", session.id
)
logger.warning("[handle_chat] Queen is dead for session '%s', reviving on /chat request", session.id)
manager: Any = request.app["manager"]
try:
logger.debug("[handle_chat] Calling manager.revive_queen()...")
@@ -322,9 +306,7 @@ async def handle_chat(request: web.Request) -> web.Response:
_revived_executor = session.queen_executor
_revived_node = _revived_executor.node_registry.get("queen") if _revived_executor else None
if _revived_node is not None and hasattr(_revived_node, "inject_event"):
await _revived_node.inject_event(
message, is_client_input=True, image_content=image_content
)
await _revived_node.inject_event(message, is_client_input=True, image_content=image_content)
return web.json_response(
{
"status": "queen_revived",
@@ -552,9 +534,7 @@ async def handle_stop(request: web.Request) -> web.Response:
if hasattr(node, "cancel_current_turn"):
node.cancel_current_turn()
cancelled = await stream.cancel_execution(
execution_id, reason="Execution stopped by user"
)
cancelled = await stream.cancel_execution(execution_id, reason="Execution stopped by user")
if cancelled:
# Cancel queen's in-progress LLM turn
if session.queen_executor:
@@ -716,13 +696,12 @@ async def fork_session_into_colony(
import asyncio
import json
import shutil
from datetime import datetime, timezone
from datetime import datetime
from pathlib import Path
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
from framework.agent_loop.types import AgentContext, AgentSpec
from framework.agent_loop.types import AgentContext
from framework.server.session_manager import _queen_session_dir
from framework.storage.conversation_store import FileConversationStore
queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
queen_ctx: AgentContext = getattr(queen_loop, "_last_ctx", None)
@@ -813,11 +792,9 @@ async def fork_session_into_colony(
"queen_id": getattr(phase_state, "queen_id", "") if phase_state else "",
"loop_config": queen_lc_config,
"spawned_from": session.id,
"spawned_at": datetime.now(timezone.utc).isoformat(),
"spawned_at": datetime.now(UTC).isoformat(),
}
worker_config_path.write_text(
json.dumps(worker_meta, indent=2, ensure_ascii=False), encoding="utf-8"
)
worker_config_path.write_text(json.dumps(worker_meta, indent=2, ensure_ascii=False), encoding="utf-8")
# ── 3. Duplicate queen session into colony ───────────────────
# Copy the queen's full session directory (conversations, events,
@@ -843,9 +820,7 @@ async def fork_session_into_colony(
dest_queen_dir = _queen_session_dir(colony_session_id, queen_name)
if source_queen_dir.exists():
await asyncio.to_thread(
shutil.copytree, source_queen_dir, dest_queen_dir, dirs_exist_ok=True
)
await asyncio.to_thread(shutil.copytree, source_queen_dir, dest_queen_dir, dirs_exist_ok=True)
# Update the duplicated meta.json to point to the colony
dest_meta_path = dest_queen_dir / "meta.json"
dest_meta: dict = {}
@@ -859,9 +834,7 @@ async def fork_session_into_colony(
dest_meta["queen_id"] = queen_name
dest_meta["forked_from"] = session.id
dest_meta["colony_fork"] = True # exclude from queen DM history
dest_meta_path.write_text(
json.dumps(dest_meta, ensure_ascii=False), encoding="utf-8"
)
dest_meta_path.write_text(json.dumps(dest_meta, ensure_ascii=False), encoding="utf-8")
logger.info(
"Duplicated queen session %s -> %s for colony '%s'",
session.id,
@@ -875,9 +848,7 @@ async def fork_session_into_colony(
worker_conv_dir = worker_storage / "conversations"
source_conv_dir = dest_queen_dir / "conversations"
if source_conv_dir.exists():
await asyncio.to_thread(
shutil.copytree, source_conv_dir, worker_conv_dir, dirs_exist_ok=True
)
await asyncio.to_thread(shutil.copytree, source_conv_dir, worker_conv_dir, dirs_exist_ok=True)
logger.info("Copied queen conversations to worker storage %s", worker_conv_dir)
else:
logger.warning(
@@ -897,12 +868,12 @@ async def fork_session_into_colony(
metadata["queen_name"] = queen_name
metadata["queen_session_id"] = colony_session_id
metadata["source_session_id"] = session.id
metadata.setdefault("created_at", datetime.now(timezone.utc).isoformat())
metadata["updated_at"] = datetime.now(timezone.utc).isoformat()
metadata.setdefault("created_at", datetime.now(UTC).isoformat())
metadata["updated_at"] = datetime.now(UTC).isoformat()
metadata.setdefault("workers", {})
metadata["workers"][worker_name] = {
"task": worker_task[:100],
"spawned_at": datetime.now(timezone.utc).isoformat(),
"spawned_at": datetime.now(UTC).isoformat(),
}
metadata_path.write_text(json.dumps(metadata, indent=2, ensure_ascii=False), encoding="utf-8")
@@ -920,9 +891,7 @@ async def fork_session_into_colony(
qmeta["agent_name"] = colony_name.replace("_", " ").title()
try:
source_meta_path.parent.mkdir(parents=True, exist_ok=True)
source_meta_path.write_text(
json.dumps(qmeta, ensure_ascii=False), encoding="utf-8"
)
source_meta_path.write_text(json.dumps(qmeta, ensure_ascii=False), encoding="utf-8")
except OSError:
pass
+2 -6
View File
@@ -148,9 +148,7 @@ def _transform_profile_for_api(profile: dict) -> dict:
details.append(f"Drive: {hidden['deep_motive']}")
if hidden.get("behavioral_mapping"):
details.append(f"Approach: {hidden['behavioral_mapping']}")
experience.append(
{"role": f"{profile.get('title', 'Executive Advisor')}", "details": details}
)
experience.append({"role": f"{profile.get('title', 'Executive Advisor')}", "details": details})
if experience:
result["experience"] = experience
@@ -161,9 +159,7 @@ def _transform_profile_for_api(profile: dict) -> dict:
# Signature achievement from world_lore
world_lore = profile.get("world_lore", {})
if world_lore.get("habitat"):
result["signature_achievement"] = (
f"{world_lore['habitat']}. {world_lore.get('lexicon', '')}".strip()
)
result["signature_achievement"] = f"{world_lore['habitat']}. {world_lore.get('lexicon', '')}".strip()
return result
+9 -35
View File
@@ -59,9 +59,7 @@ def _session_to_live_dict(session) -> dict:
"loaded_at": session.loaded_at,
"uptime_seconds": round(time.time() - session.loaded_at, 1),
"intro_message": getattr(session.runner, "intro_message", "") or "",
"queen_phase": phase_state.phase
if phase_state
else ("staging" if session.colony_runtime else "planning"),
"queen_phase": phase_state.phase if phase_state else ("staging" if session.colony_runtime else "planning"),
"queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True,
"queen_id": getattr(phase_state, "queen_id", None) if phase_state else None,
"queen_name": (phase_state.queen_profile or {}).get("name") if phase_state else None,
@@ -229,11 +227,7 @@ async def handle_get_live_session(request: web.Request) -> web.Response:
"entry_node": ep.entry_node,
"trigger_type": ep.trigger_type,
"trigger_config": ep.trigger_config,
**(
{"next_fire_in": nf}
if (nf := rt.get_timer_next_fire_in(ep.id)) is not None
else {}
),
**({"next_fire_in": nf} if (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}),
}
for ep in rt.get_entry_points()
]
@@ -383,11 +377,7 @@ async def handle_session_entry_points(request: web.Request) -> web.Response:
"entry_node": ep.entry_node,
"trigger_type": ep.trigger_type,
"trigger_config": ep.trigger_config,
**(
{"next_fire_in": nf}
if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
else {}
),
**({"next_fire_in": nf} if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}),
}
for ep in eps
]
@@ -468,21 +458,13 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response:
)
except ImportError:
return web.json_response(
{
"error": (
"croniter package not installed — cannot validate cron expression."
)
},
{"error": ("croniter package not installed — cannot validate cron expression.")},
status=500,
)
merged_trigger_config.pop("interval_minutes", None)
elif interval is None:
return web.json_response(
{
"error": (
"Timer trigger needs 'cron' or 'interval_minutes' in trigger_config."
)
},
{"error": ("Timer trigger needs 'cron' or 'interval_minutes' in trigger_config.")},
status=400,
)
elif not isinstance(interval, (int, float)) or interval <= 0:
@@ -580,9 +562,7 @@ async def handle_activate_trigger(request: web.Request) -> web.Response:
)
if trigger_id in getattr(session, "active_trigger_ids", set()):
return web.json_response(
{"status": "already_active", "trigger_id": trigger_id}
)
return web.json_response({"status": "already_active", "trigger_id": trigger_id})
from framework.tools.queen_lifecycle_tools import (
_persist_active_triggers,
@@ -646,9 +626,7 @@ async def handle_deactivate_trigger(request: web.Request) -> web.Response:
trigger_id = request.match_info["trigger_id"]
if trigger_id not in getattr(session, "active_trigger_ids", set()):
return web.json_response(
{"status": "already_inactive", "trigger_id": trigger_id}
)
return web.json_response({"status": "already_inactive", "trigger_id": trigger_id})
task = session.active_timer_tasks.pop(trigger_id, None)
if task and not task.done():
@@ -867,9 +845,7 @@ async def handle_delete_agent(request: web.Request) -> web.Response:
try:
shutil.rmtree(resolved)
except OSError as e:
return web.json_response(
{"error": f"Failed to delete agent directory: {e}"}, status=500
)
return web.json_response({"error": f"Failed to delete agent directory: {e}"}, status=500)
return web.json_response({"deleted": str(resolved)})
@@ -932,9 +908,7 @@ def register_routes(app: web.Application) -> None:
app.router.add_post("/api/sessions/{session_id}/reveal", handle_reveal_session_folder)
app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats)
app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points)
app.router.add_patch(
"/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task
)
app.router.add_patch("/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task)
app.router.add_post(
"/api/sessions/{session_id}/triggers/{trigger_id}/activate",
handle_activate_trigger,
+4 -17
View File
@@ -70,13 +70,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response:
from pathlib import Path
state_path = (
Path.home()
/ ".hive"
/ "agents"
/ session.worker_path.name
/ "sessions"
/ worker_session_id
/ "state.json"
Path.home() / ".hive" / "agents" / session.worker_path.name / "sessions" / worker_session_id / "state.json"
)
if state_path.exists():
try:
@@ -97,8 +91,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response:
pass
edges = [
{"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority}
for e in graph.edges
{"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority} for e in graph.edges
]
rt = session.colony_runtime
entry_points = [
@@ -108,11 +101,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response:
"entry_node": ep.entry_node,
"trigger_type": ep.trigger_type,
"trigger_config": ep.trigger_config,
**(
{"next_fire_in": nf}
if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None
else {}
),
**({"next_fire_in": nf} if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}),
}
for ep in reg.entry_points.values()
]
@@ -250,9 +239,7 @@ async def handle_node_tools(request: web.Request) -> web.Response:
def register_routes(app: web.Application) -> None:
"""Register worker inspection routes."""
app.router.add_get("/api/sessions/{session_id}/colonies/{colony_id}/nodes", handle_list_nodes)
app.router.add_get(
"/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}", handle_get_node
)
app.router.add_get("/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}", handle_get_node)
app.router.add_get(
"/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}/criteria",
handle_node_criteria,
+15 -49
View File
@@ -120,9 +120,7 @@ class SessionManager:
(blocking I/O) then started on the event loop.
"""
def __init__(
self, model: str | None = None, credential_store=None, queen_tool_registry=None
) -> None:
def __init__(self, model: str | None = None, credential_store=None, queen_tool_registry=None) -> None:
self._sessions: dict[str, Session] = {}
self._loading: set[str] = set()
self._model = model
@@ -350,9 +348,7 @@ class SessionManager:
_colony_metadata_path = agent_path / "metadata.json"
if _colony_metadata_path.exists():
try:
_colony_metadata = json.loads(
_colony_metadata_path.read_text(encoding="utf-8")
)
_colony_metadata = json.loads(_colony_metadata_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
pass
@@ -426,9 +422,7 @@ class SessionManager:
# Start queen with worker profile + lifecycle + monitoring tools
worker_identity = (
build_worker_profile(session.colony_runtime, agent_path=agent_path)
if session.colony_runtime
else None
build_worker_profile(session.colony_runtime, agent_path=agent_path) if session.colony_runtime else None
)
await self._start_queen(
session,
@@ -678,14 +672,10 @@ class SessionManager:
)
# Start the worker's agent loop in the background
session.queen_task = asyncio.create_task(
session.queen_executor.run(initial_message=initial_prompt)
)
session.queen_task = asyncio.create_task(session.queen_executor.run(initial_message=initial_prompt))
# Set up event persistence
if session.event_bus and queen_dir:
from framework.host.event_bus import EventBus
session.event_bus.start_persistence(queen_dir, iteration_offset=iteration_offset)
logger.info(
@@ -920,9 +910,7 @@ class SessionManager:
state.setdefault("result", {})["error"] = "Stale session: runtime restarted"
state.setdefault("timestamps", {})["updated_at"] = datetime.now().isoformat()
state_path.write_text(json.dumps(state, indent=2), encoding="utf-8")
logger.info(
"Marked stale session '%s' as cancelled for agent '%s'", d.name, agent_path.name
)
logger.info("Marked stale session '%s' as cancelled for agent '%s'", d.name, agent_path.name)
except (json.JSONDecodeError, OSError) as e:
logger.warning("Failed to clean up stale session %s: %s", d.name, e)
@@ -966,13 +954,12 @@ class SessionManager:
store = session.colony_runtime._session_store
state = await store.read_state(session_id)
if state and state.active_triggers:
from framework.host.event_bus import AgentEvent, EventType
from framework.tools.queen_lifecycle_tools import (
_start_trigger_timer,
_start_trigger_webhook,
)
from framework.host.event_bus import AgentEvent, EventType
runner = getattr(session, "runner", None)
colony_entry = runner.graph.entry_node if runner else None
saved_tasks = getattr(state, "trigger_tasks", {}) or {}
@@ -1006,11 +993,7 @@ class SessionManager:
"trigger_type": tdef.trigger_type,
"trigger_config": tdef.trigger_config,
"name": tdef.description or tdef.id,
**(
{"entry_node": colony_entry}
if colony_entry
else {}
),
**({"entry_node": colony_entry} if colony_entry else {}),
},
)
)
@@ -1059,17 +1042,13 @@ class SessionManager:
meta_path = _queen_session_dir(storage_session_id, session.queen_name) / "meta.json"
try:
_agent_name = (
session.worker_info.name
if session.worker_info
else str(agent_path.name).replace("_", " ").title()
session.worker_info.name if session.worker_info else str(agent_path.name).replace("_", " ").title()
)
existing_meta = {}
if meta_path.exists():
existing_meta = json.loads(meta_path.read_text(encoding="utf-8"))
existing_meta["agent_name"] = _agent_name
existing_meta["agent_path"] = (
str(session.worker_path) if session.worker_path else str(agent_path)
)
existing_meta["agent_path"] = str(session.worker_path) if session.worker_path else str(agent_path)
meta_path.write_text(json.dumps(existing_meta), encoding="utf-8")
except OSError:
pass
@@ -1188,9 +1167,7 @@ class SessionManager:
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
except Exception:
logger.warning(
"Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True
)
logger.warning("Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True)
if session.queen_task is not None:
session.queen_task.cancel()
@@ -1295,11 +1272,7 @@ class SessionManager:
_agent_name = (
session.worker_info.name
if session.worker_info
else (
str(session.worker_path.name).replace("_", " ").title()
if session.worker_path
else None
)
else (str(session.worker_path.name).replace("_", " ").title() if session.worker_path else None)
)
# Merge into existing meta.json to preserve fields written by
# _update_meta_json (e.g. phase, agent_path set during building).
@@ -1354,8 +1327,7 @@ class SessionManager:
if max_iter >= 0:
iteration_offset = max_iter + 1
logger.info(
"Session '%s' resuming with iteration_offset=%d"
" (from events.jsonl max), last phase: %s",
"Session '%s' resuming with iteration_offset=%d (from events.jsonl max), last phase: %s",
session.id,
iteration_offset,
last_phase or "unknown",
@@ -1496,8 +1468,7 @@ class SessionManager:
session.colony = colony
logger.info(
"_start_queen: unified ColonyRuntime ready for session %s "
"(%d tools, storage=%s)",
"_start_queen: unified ColonyRuntime ready for session %s (%d tools, storage=%s)",
session.id,
len(queen_tools),
queen_dir,
@@ -1529,10 +1500,7 @@ class SessionManager:
detail = cfg.get("cron") or f"every {cfg.get('interval_minutes', '?')} min"
task_info = f' -> task: "{t.task}"' if t.task else " (no task configured)"
parts.append(f" - {t.id} ({t.trigger_type}: {detail}){task_info}")
trigger_lines = (
"\n\nAvailable triggers (inactive — use set_trigger to activate):\n"
+ "\n".join(parts)
)
trigger_lines = "\n\nAvailable triggers (inactive — use set_trigger to activate):\n" + "\n".join(parts)
await node.inject_event(f"[SYSTEM] Colony loaded.{profile}{trigger_lines}")
@@ -1835,9 +1803,7 @@ class SessionManager:
if isinstance(content, list):
# Anthropic-style content blocks
content = " ".join(
b.get("text", "")
for b in content
if isinstance(b, dict) and b.get("type") == "text"
b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
)
if content and msg.get("role") == "assistant":
last_message = content[:120].strip()
+18 -53
View File
@@ -259,15 +259,11 @@ def _write_sample_session(base: Path, session_id: str):
conv_dir = session_dir / "conversations" / "node_a" / "parts"
conv_dir.mkdir(parents=True)
(conv_dir / "0001.json").write_text(json.dumps({"seq": 1, "role": "user", "content": "hello"}))
(conv_dir / "0002.json").write_text(
json.dumps({"seq": 2, "role": "assistant", "content": "hi there"})
)
(conv_dir / "0002.json").write_text(json.dumps({"seq": 2, "role": "assistant", "content": "hi there"}))
conv_dir_b = session_dir / "conversations" / "node_b" / "parts"
conv_dir_b.mkdir(parents=True)
(conv_dir_b / "0003.json").write_text(
json.dumps({"seq": 3, "role": "user", "content": "continue"})
)
(conv_dir_b / "0003.json").write_text(json.dumps({"seq": 3, "role": "user", "content": "continue"}))
# Logs
logs_dir = session_dir / "logs"
@@ -291,9 +287,7 @@ def _write_sample_session(base: Path, session_id: str):
"attention_reasons": ["retried"],
"total_steps": 1,
}
(logs_dir / "details.jsonl").write_text(
json.dumps(detail_a) + "\n" + json.dumps(detail_b) + "\n"
)
(logs_dir / "details.jsonl").write_text(json.dumps(detail_a) + "\n" + json.dumps(detail_b) + "\n")
step_a = {"node_id": "node_a", "step_index": 0, "llm_text": "thinking..."}
step_b = {"node_id": "node_b", "step_index": 0, "llm_text": "retrying..."}
@@ -302,9 +296,7 @@ def _write_sample_session(base: Path, session_id: str):
return session_id, session_dir, state
def _write_queen_session(
tmp_path: Path, queen_id: str, session_id: str, meta: dict | None = None
) -> Path:
def _write_queen_session(tmp_path: Path, queen_id: str, session_id: str, meta: dict | None = None) -> Path:
"""Create a persisted queen session directory for restore tests."""
session_dir = tmp_path / ".hive" / "agents" / "queens" / queen_id / "sessions" / session_id
session_dir.mkdir(parents=True)
@@ -394,9 +386,7 @@ class TestSessionCRUD:
async def test_create_session_with_worker_forwards_session_id(self):
app = create_app()
manager = app["manager"]
manager.create_session_with_worker_colony = AsyncMock(
return_value=_make_session(agent_id="my-custom-session")
)
manager.create_session_with_worker_colony = AsyncMock(return_value=_make_session(agent_id="my-custom-session"))
async with TestClient(TestServer(app)) as client:
resp = await client.post(
@@ -600,14 +590,10 @@ class TestMessageBootstrap:
manager.build_llm = MagicMock(return_value=MagicMock())
manager.stop_session = AsyncMock()
manager.create_session = AsyncMock()
monkeypatch.setattr(
routes_messages, "select_queen", AsyncMock(return_value="queen_technology")
)
monkeypatch.setattr(routes_messages, "select_queen", AsyncMock(return_value="queen_technology"))
async with TestClient(TestServer(app)) as client:
resp = await client.post(
"/api/messages/classify", json={"message": "Build me a scraper"}
)
resp = await client.post("/api/messages/classify", json={"message": "Build me a scraper"})
assert resp.status == 200
data = await resp.json()
# Assert inside the async-with so app shutdown (which stops
@@ -623,9 +609,7 @@ class TestQueenSessionSelection:
@pytest.mark.asyncio
async def test_select_queen_session_rejects_foreign_session(self, monkeypatch, tmp_path):
_patch_queen_storage(monkeypatch, tmp_path)
_write_queen_session(
tmp_path, "queen_growth", "other_session", {"queen_id": "queen_growth"}
)
_write_queen_session(tmp_path, "queen_growth", "other_session", {"queen_id": "queen_growth"})
app = create_app()
async with TestClient(TestServer(app)) as client:
@@ -663,9 +647,7 @@ class TestQueenSessionSelection:
assert any(call.args == ("other_live",) for call in manager.stop_session.await_args_list)
@pytest.mark.asyncio
async def test_select_queen_session_restores_specific_history_session(
self, monkeypatch, tmp_path
):
async def test_select_queen_session_restores_specific_history_session(self, monkeypatch, tmp_path):
_patch_queen_storage(monkeypatch, tmp_path)
_write_queen_session(
tmp_path,
@@ -1167,9 +1149,7 @@ class TestGraphNodes:
assert data["entry_node"] == "node_a"
@pytest.mark.asyncio
async def test_list_nodes_with_session_enrichment(
self, nodes_and_edges, sample_session, tmp_agent_dir
):
async def test_list_nodes_with_session_enrichment(self, nodes_and_edges, sample_session, tmp_agent_dir):
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
nodes, edges = nodes_and_edges
@@ -1182,9 +1162,7 @@ class TestGraphNodes:
app = _make_app_with_session(session)
async with TestClient(TestServer(app)) as client:
resp = await client.get(
f"/api/sessions/test_agent/graphs/primary/nodes?session_id={session_id}"
)
resp = await client.get(f"/api/sessions/test_agent/graphs/primary/nodes?session_id={session_id}")
assert resp.status == 200
data = await resp.json()
node_map = {n["id"]: n for n in data["nodes"]}
@@ -1233,9 +1211,7 @@ class TestGraphNodes:
assert resp.status == 200
data = await resp.json()
assert "system_prompt" in data
assert (
data["system_prompt"] == "You are a helpful assistant that produces valid results."
)
assert data["system_prompt"] == "You are a helpful assistant that produces valid results."
# Node without system_prompt should return empty string
resp2 = await client.get("/api/sessions/test_agent/graphs/primary/nodes/node_b")
@@ -1270,9 +1246,7 @@ class TestNodeCriteria:
assert data["output_keys"] == ["result"]
@pytest.mark.asyncio
async def test_criteria_with_log_enrichment(
self, nodes_and_edges, sample_session, tmp_agent_dir
):
async def test_criteria_with_log_enrichment(self, nodes_and_edges, sample_session, tmp_agent_dir):
"""Criteria endpoint enriched with last execution from logs."""
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
@@ -1293,8 +1267,7 @@ class TestNodeCriteria:
async with TestClient(TestServer(app)) as client:
resp = await client.get(
f"/api/sessions/test_agent/graphs/primary/nodes/node_b/criteria"
f"?session_id={session_id}"
f"/api/sessions/test_agent/graphs/primary/nodes/node_b/criteria?session_id={session_id}"
)
assert resp.status == 200
data = await resp.json()
@@ -1311,9 +1284,7 @@ class TestNodeCriteria:
app = _make_app_with_session(session)
async with TestClient(TestServer(app)) as client:
resp = await client.get(
"/api/sessions/test_agent/graphs/primary/nodes/nonexistent/criteria"
)
resp = await client.get("/api/sessions/test_agent/graphs/primary/nodes/nonexistent/criteria")
assert resp.status == 404
@@ -1388,9 +1359,7 @@ class TestLogs:
app = _make_app_with_session(session)
async with TestClient(TestServer(app)) as client:
resp = await client.get(
f"/api/sessions/test_agent/logs?session_id={session_id}&level=summary"
)
resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=summary")
assert resp.status == 200
data = await resp.json()
assert data["run_id"] == session_id
@@ -1411,9 +1380,7 @@ class TestLogs:
app = _make_app_with_session(session)
async with TestClient(TestServer(app)) as client:
resp = await client.get(
f"/api/sessions/test_agent/logs?session_id={session_id}&level=details"
)
resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=details")
assert resp.status == 200
data = await resp.json()
assert data["session_id"] == session_id
@@ -1435,9 +1402,7 @@ class TestLogs:
app = _make_app_with_session(session)
async with TestClient(TestServer(app)) as client:
resp = await client.get(
f"/api/sessions/test_agent/logs?session_id={session_id}&level=tools"
)
resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=tools")
assert resp.status == 200
data = await resp.json()
assert data["session_id"] == session_id
+10 -4
View File
@@ -26,12 +26,15 @@ Before replying: scan <available_skills> <description> entries.
- If multiple could apply: choose the most specific one, then read/follow it.
- If none clearly apply: do not read any SKILL.md.
Constraints: never read more than one skill up front; only read after selecting.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.),
assume rate limits: prefer fewer larger writes, avoid tight one-item loops,
serialize bursts when possible, and respect 429/Retry-After.
The following skills provide specialized instructions for specific tasks.
Use `read_file` to load a skill's SKILL.md when the task matches its description.
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
When a skill file references a relative path, resolve it against the
skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
_MANDATORY_HEADER_COMPACT = """## Skills (mandatory)
Before replying: scan <available_skills> <name> entries.
@@ -39,12 +42,15 @@ Before replying: scan <available_skills> <name> entries.
- If multiple could apply: choose the most specific one, then read/follow it.
- If none clearly apply: do not read any SKILL.md.
Constraints: never read more than one skill up front; only read after selecting.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After.
- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.),
assume rate limits: prefer fewer larger writes, avoid tight one-item loops,
serialize bursts when possible, and respect 429/Retry-After.
The following skills provide specialized instructions for specific tasks.
Use `read_file` to load a skill's SKILL.md when the task matches its name.
When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
When a skill file references a relative path, resolve it against the
skill directory (parent of SKILL.md) and use that absolute path in tool commands."""
class SkillCatalog:
+9 -26
View File
@@ -134,9 +134,7 @@ def register_skill_commands(subparsers) -> None:
info_parser.set_defaults(func=cmd_skill_info)
# hive skill init
init_parser = skill_sub.add_parser(
"init", help="Scaffold a new skill directory with a SKILL.md template"
)
init_parser = skill_sub.add_parser("init", help="Scaffold a new skill directory with a SKILL.md template")
init_parser.add_argument("--name", dest="skill_name", default=None, metavar="NAME")
init_parser.add_argument(
"--dir",
@@ -193,9 +191,7 @@ def register_skill_commands(subparsers) -> None:
update_parser.set_defaults(func=cmd_skill_update)
# hive skill search
search_parser = skill_sub.add_parser(
"search", help="Search the skill registry by name, tag, or description"
)
search_parser = skill_sub.add_parser("search", help="Search the skill registry by name, tag, or description")
search_parser.add_argument("query", help="Search query string")
search_parser.add_argument("--json", action="store_true", help="Output as JSON")
search_parser.set_defaults(func=cmd_skill_search)
@@ -231,9 +227,7 @@ def register_skill_commands(subparsers) -> None:
fork_parser.set_defaults(func=cmd_skill_fork)
# hive skill test
test_parser = skill_sub.add_parser(
"test", help="Run a skill in isolation or execute its eval suite (CLI-9)"
)
test_parser = skill_sub.add_parser("test", help="Run a skill in isolation or execute its eval suite (CLI-9)")
test_parser.add_argument("path", help="Path to SKILL.md or its parent directory")
test_parser.add_argument(
"--input",
@@ -649,9 +643,7 @@ def cmd_skill_validate(args) -> int:
print(f"{path} — valid ({len(result.warnings)} warning(s))")
return 0
else:
print(
f"{path} — invalid ({len(result.errors)} error(s), {len(result.warnings)} warning(s))"
)
print(f"{path} — invalid ({len(result.errors)} error(s), {len(result.warnings)} warning(s))")
return 1
@@ -672,9 +664,7 @@ def cmd_skill_doctor(args) -> int:
for skill_name, dir_name in SKILL_REGISTRY.items():
skill_md = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
if use_json:
report = _doctor_skill_file(
skill_name, skill_md, parse_skill_md, json_mode=True, scope="framework"
)
report = _doctor_skill_file(skill_name, skill_md, parse_skill_md, json_mode=True, scope="framework")
overall_errors += len(report["errors"])
skill_results.append(report)
else:
@@ -696,9 +686,7 @@ def cmd_skill_doctor(args) -> int:
candidate = USER_SKILLS_DIR / args.name / "SKILL.md"
if candidate.exists():
if use_json:
report = _doctor_skill_file(
args.name, candidate, parse_skill_md, json_mode=True, scope="user"
)
report = _doctor_skill_file(args.name, candidate, parse_skill_md, json_mode=True, scope="user")
print(_json.dumps({"skills": [report], "total_errors": len(report["errors"])}))
return 1 if report["errors"] else 0
print(f"\nChecking skill: {args.name} [user]")
@@ -838,8 +826,7 @@ def cmd_skill_update(args) -> int:
if not installed_version and not use_json:
print(
f"Warning: installed skill '{args.name}' has no version field — "
"cannot compare. Re-installing.",
f"Warning: installed skill '{args.name}' has no version field — cannot compare. Re-installing.",
file=sys.stderr,
)
@@ -1057,9 +1044,7 @@ def cmd_skill_test(args) -> int:
# ── 4. Structural-only mode (no LLM needed) ───────────────────────────────
if not has_input and not has_evals:
doctor_errors = _doctor_skill_file(
skill.name, path, parse_skill_md, json_mode=use_json, scope="user"
)
doctor_errors = _doctor_skill_file(skill.name, path, parse_skill_md, json_mode=use_json, scope="user")
if use_json:
print(
_json.dumps(
@@ -1209,9 +1194,7 @@ def cmd_skill_test(args) -> int:
constraint=assertion,
source_document=eval_prompt,
summary=skill_response,
criteria=(
"Evaluate whether the skill response satisfies the assertion."
),
criteria=("Evaluate whether the skill response satisfies the assertion."),
)
passes = judged.get("passes", False)
explanation = judged.get("explanation", "")
+2 -5
View File
@@ -60,9 +60,7 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
# Convert float warn_at_usage_ratio → warn_at_usage_ratio_pct for the placeholder
if "warn_at_usage_ratio" in overrides:
overrides = dict(overrides)
overrides.setdefault(
"warn_at_usage_ratio_pct", int(float(overrides["warn_at_usage_ratio"]) * 100)
)
overrides.setdefault("warn_at_usage_ratio_pct", int(float(overrides["warn_at_usage_ratio"]) * 100))
values = {**defaults, **overrides}
for key, val in values.items():
body = body.replace(f"{{{{{key}}}}}", str(val))
@@ -189,8 +187,7 @@ class DefaultSkillManager:
approx_tokens = len(combined) // 4
if approx_tokens > 2000:
logger.warning(
"Default skill protocols exceed 2000 token budget "
"(~%d tokens, %d chars). Consider trimming.",
"Default skill protocols exceed 2000 token budget (~%d tokens, %d chars). Consider trimming.",
approx_tokens,
len(combined),
)
+1 -2
View File
@@ -23,8 +23,7 @@ logger = logging.getLogger(__name__)
# Default registry index URL (Phase 3 repo, may not exist yet)
_DEFAULT_REGISTRY_URL = (
"https://raw.githubusercontent.com/hive-skill-registry/"
"hive-skill-registry/main/skill_index.json"
"https://raw.githubusercontent.com/hive-skill-registry/hive-skill-registry/main/skill_index.json"
)
_CACHE_DIR = Path.home() / ".hive" / "registry_cache"
+1 -3
View File
@@ -34,9 +34,7 @@ class SkillError(Exception):
self.what = what
self.why = why
self.fix = fix
self.message = (
f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}"
)
self.message = f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}"
super().__init__(self.message)
+1 -1
View File
@@ -15,8 +15,8 @@ descriptions to get picked up on demand.
from __future__ import annotations
import logging
from collections.abc import Iterable
from pathlib import Path
from typing import Iterable
logger = logging.getLogger(__name__)
+3 -10
View File
@@ -413,8 +413,7 @@ class TrustGate:
return
self._print("")
self._print(
f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions "
"into the agent's system prompt."
f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions into the agent's system prompt."
)
self._print(
" Only load skills from sources you trust. "
@@ -441,10 +440,7 @@ class TrustGate:
p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
p("")
proj_label = str(project_dir) if project_dir else "this project"
p(
f" The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load "
f"{len(project_skills)} skill(s)"
)
p(f" The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load {len(project_skills)} skill(s)")
p(" that will inject instructions into the agent's system prompt.")
if repo_key:
p(f" Source: {Colors.BOLD}{repo_key}{Colors.NC}")
@@ -458,10 +454,7 @@ class TrustGate:
p(" Options:")
p(f" {Colors.CYAN}1){Colors.NC} Trust this session only")
p(f" {Colors.CYAN}2){Colors.NC} Trust permanently — remember for future runs")
p(
f" {Colors.DIM}3) Deny"
f" — skip all project-scope skills from this repo{Colors.NC}"
)
p(f" {Colors.DIM}3) Deny — skip all project-scope skills from this repo{Colors.NC}")
p(f"{Colors.YELLOW}{'' * 60}{Colors.NC}")
def _prompt_consent(self, Colors) -> str: # noqa: N803
+5 -15
View File
@@ -82,8 +82,7 @@ def validate_strict(path: Path) -> ValidationResult:
frontmatter = yaml.safe_load(raw_yaml)
except yaml.YAMLError as exc:
errors.append(
f"YAML parse error: {exc}. "
'Wrap values containing colons in quotes, e.g. description: "Use for: research".'
f'YAML parse error: {exc}. Wrap values containing colons in quotes, e.g. description: "Use for: research".'
)
return ValidationResult(passed=False, errors=errors, warnings=warnings)
@@ -101,10 +100,7 @@ def validate_strict(path: Path) -> ValidationResult:
# 6. name present and non-empty (no directory-name fallback in strict mode)
name = frontmatter.get("name")
if not name or not str(name).strip():
errors.append(
"Missing required field: 'name' must be present. "
"Add 'name: your-skill-name' to the frontmatter."
)
errors.append("Missing required field: 'name' must be present. Add 'name: your-skill-name' to the frontmatter.")
else:
name = str(name).strip()
parent_dir_name = path.parent.name
@@ -112,8 +108,7 @@ def validate_strict(path: Path) -> ValidationResult:
# 7. name length <= 64 chars
if len(name) > _MAX_NAME_LENGTH:
errors.append(
f"Skill name '{name}' is {len(name)} characters — "
f"maximum is {_MAX_NAME_LENGTH}. Shorten the name."
f"Skill name '{name}' is {len(name)} characters — maximum is {_MAX_NAME_LENGTH}. Shorten the name."
)
# 8. name matches parent directory (dot-namespace prefix allowed: hive.X with dir X)
@@ -125,10 +120,7 @@ def validate_strict(path: Path) -> ValidationResult:
# 9. body non-empty
if not body:
errors.append(
"Skill body (instructions) is empty. "
"Add markdown instructions after the closing --- delimiter."
)
errors.append("Skill body (instructions) is empty. Add markdown instructions after the closing --- delimiter.")
# 10. license present — warning only
if not frontmatter.get("license"):
@@ -142,9 +134,7 @@ def validate_strict(path: Path) -> ValidationResult:
for script_path in sorted(scripts_dir.iterdir()):
if script_path.is_file():
if not (script_path.stat().st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)):
errors.append(
f"Script not executable: {script_path.name}. Run: chmod +x {script_path}"
)
errors.append(f"Script not executable: {script_path.name}. Run: chmod +x {script_path}")
# 12. allowed-tools entries are non-empty strings — warning if malformed
allowed_tools = frontmatter.get("allowed-tools")
+2 -6
View File
@@ -123,9 +123,7 @@ class CheckpointStore:
return None
try:
return CheckpointIndex.model_validate_json(
self.index_path.read_text(encoding="utf-8")
)
return CheckpointIndex.model_validate_json(self.index_path.read_text(encoding="utf-8"))
except Exception as e:
logger.error(f"Failed to load checkpoint index: {e}")
return None
@@ -317,9 +315,7 @@ class CheckpointStore:
# Update latest_checkpoint_id if we removed the latest
if index.latest_checkpoint_id == checkpoint_id:
index.latest_checkpoint_id = (
index.checkpoints[-1].checkpoint_id if index.checkpoints else None
)
index.latest_checkpoint_id = index.checkpoints[-1].checkpoint_id if index.checkpoints else None
# Write updated index
await asyncio.to_thread(_write, index)
+4 -14
View File
@@ -96,20 +96,14 @@ def batch_approval(
# Validate request
valid, error = req.validate_action()
if not valid:
results.append(
ApprovalResult.error_result(req.test_id, req.action, error or "Invalid request")
)
results.append(ApprovalResult.error_result(req.test_id, req.action, error or "Invalid request"))
counts["errors"] += 1
continue
# Load test
test = storage.load_test(goal_id, req.test_id)
if not test:
results.append(
ApprovalResult.error_result(
req.test_id, req.action, f"Test {req.test_id} not found"
)
)
results.append(ApprovalResult.error_result(req.test_id, req.action, f"Test {req.test_id} not found"))
counts["errors"] += 1
continue
@@ -132,9 +126,7 @@ def batch_approval(
storage.update_test(test)
results.append(
ApprovalResult.success_result(
req.test_id, req.action, f"Test {req.action.value}d successfully"
)
ApprovalResult.success_result(req.test_id, req.action, f"Test {req.action.value}d successfully")
)
except Exception as e:
@@ -233,9 +225,7 @@ def _process_action(
test.approve()
storage.update_test(test)
print("✓ Approved (no modifications)")
return ApprovalResult.success_result(
test.id, ApprovalAction.APPROVE, "No modifications made"
)
return ApprovalResult.success_result(test.id, ApprovalAction.APPROVE, "No modifications made")
elif action == ApprovalAction.SKIP:
print("⏭ Skipped (remains pending)")
+1 -3
View File
@@ -61,9 +61,7 @@ class ApprovalResult(BaseModel):
timestamp: datetime = Field(default_factory=datetime.now)
@classmethod
def success_result(
cls, test_id: str, action: ApprovalAction, message: str | None = None
) -> "ApprovalResult":
def success_result(cls, test_id: str, action: ApprovalAction, message: str | None = None) -> "ApprovalResult":
"""Create a successful result."""
return cls(
test_id=test_id,
+4 -10
View File
@@ -81,9 +81,7 @@ class ErrorCategorizer:
def __init__(self):
"""Initialize categorizer with compiled patterns."""
self._logic_patterns = [re.compile(p, re.IGNORECASE) for p in self.LOGIC_ERROR_PATTERNS]
self._impl_patterns = [
re.compile(p, re.IGNORECASE) for p in self.IMPLEMENTATION_ERROR_PATTERNS
]
self._impl_patterns = [re.compile(p, re.IGNORECASE) for p in self.IMPLEMENTATION_ERROR_PATTERNS]
self._edge_patterns = [re.compile(p, re.IGNORECASE) for p in self.EDGE_CASE_PATTERNS]
def categorize(self, result: TestResult) -> ErrorCategory | None:
@@ -192,8 +190,7 @@ class ErrorCategorizer:
"The goal specification may not accurately describe the desired behavior."
),
ErrorCategory.IMPLEMENTATION_ERROR: (
"Fix the code in agent nodes/edges. "
"There's a bug in the implementation that needs to be corrected."
"Fix the code in agent nodes/edges. There's a bug in the implementation that needs to be corrected."
),
ErrorCategory.EDGE_CASE: (
"Add a new test for this edge case scenario. "
@@ -226,17 +223,14 @@ class ErrorCategorizer:
"action": "Fix nodes/edges implementation",
"restart_required": False,
"description": (
"There's a code bug. Fix the agent implementation, "
"then re-run Eval (skip Goal stage)."
"There's a code bug. Fix the agent implementation, then re-run Eval (skip Goal stage)."
),
},
ErrorCategory.EDGE_CASE: {
"stage": "Eval",
"action": "Add new test only",
"restart_required": False,
"description": (
"This is a new scenario. Add a test for it and continue in the Eval stage."
),
"description": ("This is a new scenario. Add a test for it and continue in the Eval stage."),
},
}
return guidance.get(
+3 -6
View File
@@ -244,12 +244,10 @@ class DebugTool:
return {
"execution_path": run.metrics.nodes_executed if hasattr(run, "metrics") else [],
"decisions": [
d.model_dump() if hasattr(d, "model_dump") else str(d)
for d in getattr(run, "decisions", [])
d.model_dump() if hasattr(d, "model_dump") else str(d) for d in getattr(run, "decisions", [])
],
"problems": [
p.model_dump() if hasattr(p, "model_dump") else str(p)
for p in getattr(run, "problems", [])
p.model_dump() if hasattr(p, "model_dump") else str(p) for p in getattr(run, "problems", [])
],
"status": run.status.value if hasattr(run, "status") else "unknown",
}
@@ -284,8 +282,7 @@ class DebugTool:
if failures_by_category["uncategorized"]:
suggestions.append(
f"Found {len(failures_by_category['uncategorized'])} uncategorized failures. "
"Manual review required."
f"Found {len(failures_by_category['uncategorized'])} uncategorized failures. Manual review required."
)
return suggestions

Some files were not shown because too many files have changed in this diff Show More