diff --git a/.claude/skills/browser-edge-cases/scripts/test_02_twitter_scroll.py b/.claude/skills/browser-edge-cases/scripts/test_02_twitter_scroll.py index fa58cb6b..4d583c70 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_02_twitter_scroll.py +++ b/.claude/skills/browser-edge-cases/scripts/test_02_twitter_scroll.py @@ -57,8 +57,7 @@ async def test_twitter_lazy_scroll(): # Count initial tweets initial_count = await bridge.evaluate( tab_id, - "(function() { return document.querySelectorAll(" - "'[data-testid=\"tweet\"]').length; })()", + "(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()", ) print(f"Initial tweet count: {initial_count.get('result', 0)}") @@ -78,8 +77,7 @@ async def test_twitter_lazy_scroll(): # Count tweets after scroll count_result = await bridge.evaluate( tab_id, - "(function() { return document.querySelectorAll(" - "'[data-testid=\"tweet\"]').length; })()", + "(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()", ) count = count_result.get("result", 0) print(f" Tweet count after scroll: {count}") @@ -87,8 +85,7 @@ async def test_twitter_lazy_scroll(): # Final count final_count = await bridge.evaluate( tab_id, - "(function() { return document.querySelectorAll(" - "'[data-testid=\"tweet\"]').length; })()", + "(function() { return document.querySelectorAll('[data-testid=\"tweet\"]').length; })()", ) final = final_count.get("result", 0) initial = initial_count.get("result", 0) diff --git a/.claude/skills/browser-edge-cases/scripts/test_06_shadow_dom.py b/.claude/skills/browser-edge-cases/scripts/test_06_shadow_dom.py index 5b977601..585976f3 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_06_shadow_dom.py +++ b/.claude/skills/browser-edge-cases/scripts/test_06_shadow_dom.py @@ -130,9 +130,7 @@ async def test_shadow_dom(): print(f"JS click result: {click_result.get('result', {})}") # Verify click was registered - count_result = await bridge.evaluate( - tab_id, "(function() { return window.shadowClickCount || 0; })()" - ) + count_result = await bridge.evaluate(tab_id, "(function() { return window.shadowClickCount || 0; })()") count = count_result.get("result") or 0 print(f"Shadow click count: {count}") diff --git a/.claude/skills/browser-edge-cases/scripts/test_08_autocomplete.py b/.claude/skills/browser-edge-cases/scripts/test_08_autocomplete.py index 965a87df..4dd859ca 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_08_autocomplete.py +++ b/.claude/skills/browser-edge-cases/scripts/test_08_autocomplete.py @@ -200,9 +200,7 @@ async def test_autocomplete(): print(f"Value after fast typing: '{fast_value}'") # Check events - events_result = await bridge.evaluate( - tab_id, "(function() { return window.inputEvents; })()" - ) + events_result = await bridge.evaluate(tab_id, "(function() { return window.inputEvents; })()") print(f"Events logged: {events_result.get('result', [])}") # Test 2: Slow typing (with delay) - should work @@ -220,8 +218,7 @@ async def test_autocomplete(): # Check if dropdown appeared dropdown_result = await bridge.evaluate( tab_id, - "(function() { return document.querySelectorAll(" - "'.autocomplete-items div').length; })()", + "(function() { return document.querySelectorAll('.autocomplete-items div').length; })()", ) dropdown_count = dropdown_result.get("result", 0) print(f"Dropdown items: {dropdown_count}") diff --git a/.claude/skills/browser-edge-cases/scripts/test_10_huge_dom.py b/.claude/skills/browser-edge-cases/scripts/test_10_huge_dom.py index 302aac74..e9955d65 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_10_huge_dom.py +++ b/.claude/skills/browser-edge-cases/scripts/test_10_huge_dom.py @@ -87,9 +87,7 @@ async def test_huge_dom(): await bridge.navigate(tab_id, data_url, wait_until="load") # Count elements - count_result = await bridge.evaluate( - tab_id, "(function() { return document.querySelectorAll('*').length; })()" - ) + count_result = await bridge.evaluate(tab_id, "(function() { return document.querySelectorAll('*').length; })()") elem_count = count_result.get("result", 0) print(f"DOM elements: {elem_count}") @@ -122,14 +120,10 @@ async def test_huge_dom(): # Test 3: Real LinkedIn print("\n--- Test 3: Real LinkedIn Feed ---") - await bridge.navigate( - tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000 - ) + await bridge.navigate(tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000) await asyncio.sleep(2) - count_result = await bridge.evaluate( - tab_id, "(function() { return document.querySelectorAll('*').length; })()" - ) + count_result = await bridge.evaluate(tab_id, "(function() { return document.querySelectorAll('*').length; })()") elem_count = count_result.get("result", 0) print(f"LinkedIn DOM elements: {elem_count}") diff --git a/.claude/skills/browser-edge-cases/scripts/test_15_screenshot.py b/.claude/skills/browser-edge-cases/scripts/test_15_screenshot.py index 3a6c74d5..69f6fb80 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_15_screenshot.py +++ b/.claude/skills/browser-edge-cases/scripts/test_15_screenshot.py @@ -136,10 +136,7 @@ async def test_selector_screenshot(bridge: BeelineBridge, tab_id: int, data_url: print(" ⚠ WARNING: Selector screenshot not smaller (may be full page)") return False else: - print( - " ⚠ NOT IMPLEMENTED: selector param ignored" - f" (returns full page) - error={result.get('error')}" - ) + print(f" ⚠ NOT IMPLEMENTED: selector param ignored (returns full page) - error={result.get('error')}") print(" NOTE: selector parameter exists in signature but is not used in implementation") return False @@ -181,9 +178,7 @@ async def test_screenshot_timeout(bridge: BeelineBridge, tab_id: int, data_url: print(f" ⚠ Fast enough to beat timeout: {err!r} in {elapsed:.3f}s") return True # Not a failure, just fast else: - print( - f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout" - ) + print(f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout") return True # Still ok, just very fast diff --git a/.claude/skills/browser-edge-cases/scripts/test_case.py b/.claude/skills/browser-edge-cases/scripts/test_case.py index 01ab285c..43a3dfa2 100644 --- a/.claude/skills/browser-edge-cases/scripts/test_case.py +++ b/.claude/skills/browser-edge-cases/scripts/test_case.py @@ -137,14 +137,8 @@ async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict: changed = False for key in after_data: if key in before_data: - b_val = ( - before_data[key].get("scrollTop", 0) - if isinstance(before_data[key], dict) - else 0 - ) - a_val = ( - after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0 - ) + b_val = before_data[key].get("scrollTop", 0) if isinstance(before_data[key], dict) else 0 + a_val = after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0 if a_val != b_val: print(f" ✓ CHANGE DETECTED: {key} scrolled from {b_val} to {a_val}") changed = True diff --git a/core/antigravity_auth.py b/core/antigravity_auth.py index ddb53e7a..782afa4d 100644 --- a/core/antigravity_auth.py +++ b/core/antigravity_auth.py @@ -52,9 +52,7 @@ _DEFAULT_REDIRECT_PORT = 51121 # This project reverse-engineered and published the public OAuth credentials # for Google's Antigravity/Cloud Code Assist API. # Source: https://github.com/NoeFabris/opencode-antigravity-auth -_CREDENTIALS_URL = ( - "https://raw.githubusercontent.com/NoeFabris/opencode-antigravity-auth/dev/src/constants.ts" -) +_CREDENTIALS_URL = "https://raw.githubusercontent.com/NoeFabris/opencode-antigravity-auth/dev/src/constants.ts" # Cached credentials fetched from public source _cached_client_id: str | None = None @@ -68,9 +66,7 @@ def _fetch_credentials_from_public_source() -> tuple[str | None, str | None]: return _cached_client_id, _cached_client_secret try: - req = urllib.request.Request( - _CREDENTIALS_URL, headers={"User-Agent": "Hive-Antigravity-Auth/1.0"} - ) + req = urllib.request.Request(_CREDENTIALS_URL, headers={"User-Agent": "Hive-Antigravity-Auth/1.0"}) with urllib.request.urlopen(req, timeout=10) as resp: content = resp.read().decode("utf-8") import re @@ -168,10 +164,7 @@ class OAuthCallbackHandler(BaseHTTPRequestHandler): if "code" in query and "state" in query: OAuthCallbackHandler.auth_code = query["code"][0] OAuthCallbackHandler.state = query["state"][0] - self._send_response( - "Authentication successful! You can close this window " - "and return to the terminal." - ) + self._send_response("Authentication successful! You can close this window and return to the terminal.") return self._send_response("Waiting for authentication...") @@ -296,8 +289,7 @@ def validate_credentials(access_token: str, project_id: str = _DEFAULT_PROJECT_I "Authorization": f"Bearer {access_token}", "Content-Type": "application/json", "User-Agent": ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " - "AppleWebKit/537.36 (KHTML, like Gecko) Antigravity/1.18.3" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Antigravity/1.18.3" ), "X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1", } @@ -316,9 +308,7 @@ def validate_credentials(access_token: str, project_id: str = _DEFAULT_PROJECT_I return False -def refresh_access_token( - refresh_token: str, client_id: str, client_secret: str | None -) -> dict | None: +def refresh_access_token(refresh_token: str, client_id: str, client_secret: str | None) -> dict | None: """Refresh the access token using the refresh token.""" data = { "grant_type": "refresh_token", @@ -361,9 +351,7 @@ def cmd_account_add(args: argparse.Namespace) -> int: access_token = account.get("access") refresh_token_str = account.get("refresh", "") refresh_token = refresh_token_str.split("|")[0] if refresh_token_str else None - project_id = ( - refresh_token_str.split("|")[1] if "|" in refresh_token_str else _DEFAULT_PROJECT_ID - ) + project_id = refresh_token_str.split("|")[1] if "|" in refresh_token_str else _DEFAULT_PROJECT_ID email = account.get("email", "unknown") expires_ms = account.get("expires", 0) expires_at = expires_ms / 1000.0 if expires_ms else 0.0 @@ -390,9 +378,7 @@ def cmd_account_add(args: argparse.Namespace) -> int: # Update the account account["access"] = new_access account["expires"] = int((time.time() + expires_in) * 1000) - accounts_data["last_refresh"] = time.strftime( - "%Y-%m-%dT%H:%M:%SZ", time.gmtime() - ) + accounts_data["last_refresh"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) save_accounts(accounts_data) # Validate the refreshed token diff --git a/core/framework/agent_loop/agent_loop.py b/core/framework/agent_loop/agent_loop.py index df737495..c31948a1 100644 --- a/core/framework/agent_loop/agent_loop.py +++ b/core/framework/agent_loop/agent_loop.py @@ -126,9 +126,7 @@ _STRIP_RE = re.compile( # The value cannot contain `<` or `\n` — those terminate the label. # Trailing whitespace (including the terminating newline) is consumed # so the visible text that follows starts cleanly. -_LABEL_STRIP_RE = re.compile( - r"<(?:" + "|".join(_INTERNAL_TAGS) + r")>[^<\n]*\s*" -) +_LABEL_STRIP_RE = re.compile(r"<(?:" + "|".join(_INTERNAL_TAGS) + r")>[^<\n]*\s*") # Matches a trailing `<` that could be the start of an internal tag. # We build a pattern that matches `<` followed by any prefix of any @@ -138,9 +136,7 @@ for _tag in _INTERNAL_TAGS: for _i in range(1, len(_tag) + 1): _PARTIAL_PREFIXES.add(_tag[:_i]) _PARTIAL_OPEN_RE = re.compile( - r"<(?:" - + "|".join(re.escape(p) for p in sorted(_PARTIAL_PREFIXES, key=len, reverse=True)) - + r")$" + r"<(?:" + "|".join(re.escape(p) for p in sorted(_PARTIAL_PREFIXES, key=len, reverse=True)) + r")$" ) _GENERIC_TAG_RE = re.compile(r"") @@ -351,9 +347,7 @@ class AgentLoop(AgentProtocol): self._config = config or LoopConfig() self._tool_executor = tool_executor self._conversation_store = conversation_store - self._injection_queue: asyncio.Queue[tuple[str, bool, list[dict[str, Any]] | None]] = ( - asyncio.Queue() - ) + self._injection_queue: asyncio.Queue[tuple[str, bool, list[dict[str, Any]] | None]] = asyncio.Queue() self._trigger_queue: asyncio.Queue[TriggerEvent] = asyncio.Queue() # Queen input blocking state self._input_ready = asyncio.Event() @@ -510,9 +504,7 @@ class AgentLoop(AgentProtocol): output_tokens=0, latency_ms=0, ) - return self._finalize_result( - AgentResult(success=False, error=error_msg), "guard_failure" - ) + return self._finalize_result(AgentResult(success=False, error=error_msg), "guard_failure") # 2. Restore or create new conversation + accumulator restored = await self._restore(ctx) @@ -571,11 +563,7 @@ class AgentLoop(AgentProtocol): if ctx.default_skill_batch_nudge: from framework.skills.defaults import is_batch_scenario as _is_batch - _input_text = ( - (ctx.goal_context or "") - + " " - + " ".join(str(v) for v in ctx.input_data.values() if v) - ) + _input_text = (ctx.goal_context or "") + " " + " ".join(str(v) for v in ctx.input_data.values() if v) if _is_batch(_input_text): system_prompt = f"{system_prompt}\n\n{ctx.default_skill_batch_nudge}" logger.info("[%s] DS-12: batch scenario detected, nudge injected", node_id) @@ -587,9 +575,7 @@ class AgentLoop(AgentProtocol): store=self._conversation_store, run_id=ctx.effective_run_id, compaction_buffer_tokens=self._config.compaction_buffer_tokens, - compaction_warning_buffer_tokens=( - self._config.compaction_warning_buffer_tokens - ), + compaction_warning_buffer_tokens=(self._config.compaction_warning_buffer_tokens), ) accumulator = OutputAccumulator( store=self._conversation_store, @@ -690,9 +676,7 @@ class AgentLoop(AgentProtocol): node_id, iteration, ) - await self._publish_loop_completed( - stream_id, node_id, iteration, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration, execution_id) return AgentResult( success=True, output=accumulator.to_dict(), @@ -773,9 +757,7 @@ class AgentLoop(AgentProtocol): prompt=str(pending_input_state.get("prompt", "")), options=pending_input_state.get("options"), questions=pending_input_state.get("questions"), - emit_client_request=bool( - pending_input_state.get("emit_client_request", True) - ), + emit_client_request=bool(pending_input_state.get("emit_client_request", True)), ) logger.info( "[%s] iter=%d: restored wait unblocked, got_input=%s", @@ -784,9 +766,7 @@ class AgentLoop(AgentProtocol): got_input, ) if not got_input: - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) return AgentResult( success=True, @@ -797,8 +777,7 @@ class AgentLoop(AgentProtocol): ) if self._injection_queue.empty() and self._trigger_queue.empty(): logger.info( - "[%s] iter=%d: pending-input wait woke" - " without queued input; re-waiting", + "[%s] iter=%d: pending-input wait woke without queued input; re-waiting", node_id, iteration, ) @@ -863,9 +842,7 @@ class AgentLoop(AgentProtocol): iteration, len(conversation.messages), ) - logger.debug( - "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration - ) + logger.debug("[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration) _stream_retry_count = 0 _capacity_retry_started_at: float | None = None _capacity_retry_attempt = 0 @@ -892,9 +869,7 @@ class AgentLoop(AgentProtocol): request_system_prompt, request_messages, _, - ) = await self._run_single_turn( - ctx, conversation, tools, iteration, accumulator - ) + ) = await self._run_single_turn(ctx, conversation, tools, iteration, accumulator) logger.debug( "[AgentLoop.execute] iteration=%d: _run_single_turn completed successfully", iteration, @@ -910,10 +885,7 @@ class AgentLoop(AgentProtocol): len(real_tool_results), outputs_set or "[]", turn_tokens, - { - k: ("set" if v is not None else "None") - for k, v in accumulator.to_dict().items() - }, + {k: ("set" if v is not None else "None") for k, v in accumulator.to_dict().items()}, ) total_input_tokens += turn_tokens.get("input", 0) total_output_tokens += turn_tokens.get("output", 0) @@ -982,10 +954,7 @@ class AgentLoop(AgentProtocol): # still publishes a retry event so the UI can see us # waiting (the "heartbeat" — no silent stalls). self._bump("llm_turn_exception") - if ( - self._is_capacity_error(e) - and self._config.capacity_retry_max_seconds > 0 - ): + if self._is_capacity_error(e) and self._config.capacity_retry_max_seconds > 0: self._bump("capacity_error") now = time.monotonic() if _capacity_retry_started_at is None: @@ -994,8 +963,7 @@ class AgentLoop(AgentProtocol): if elapsed < self._config.capacity_retry_max_seconds: _capacity_retry_attempt += 1 delay = min( - self._config.stream_retry_backoff_base - * (2 ** min(_capacity_retry_attempt - 1, 6)), + self._config.stream_retry_backoff_base * (2 ** min(_capacity_retry_attempt - 1, 6)), self._config.capacity_retry_max_delay, ) logger.warning( @@ -1023,15 +991,11 @@ class AgentLoop(AgentProtocol): continue # retry same iteration # Retry transient errors with exponential backoff - if ( - self._is_transient_error(e) - and _stream_retry_count < self._config.max_stream_retries - ): + if self._is_transient_error(e) and _stream_retry_count < self._config.max_stream_retries: self._bump("llm_transient_retry") _stream_retry_count += 1 delay = min( - self._config.stream_retry_backoff_base - * (2 ** (_stream_retry_count - 1)), + self._config.stream_retry_backoff_base * (2 ** (_stream_retry_count - 1)), self._config.stream_retry_max_delay, ) logger.warning( @@ -1079,8 +1043,7 @@ class AgentLoop(AgentProtocol): if ctx.supports_direct_user_io: error_msg = f"LLM call failed: {e}" _guardrail_phrase = ( - "no endpoints available matching your guardrail restrictions " - "and data policy" + "no endpoints available matching your guardrail restrictions and data policy" ) if _guardrail_phrase in str(e).lower(): error_msg += ( @@ -1219,9 +1182,7 @@ class AgentLoop(AgentProtocol): node_id, iteration, ) - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) return AgentResult( success=True, @@ -1442,10 +1403,7 @@ class AgentLoop(AgentProtocol): _has_tools_no_text = bool(real_tool_results) and not assistant_text if _has_tools_no_text: _silent_tool_streak += 1 - if ( - _silent_tool_streak > 0 - and _silent_tool_streak % self._config.silent_tool_streak_threshold == 0 - ): + if _silent_tool_streak > 0 and _silent_tool_streak % self._config.silent_tool_streak_threshold == 0: nudge = ( "[SYSTEM] You have been calling tools for " f"{_silent_tool_streak} consecutive turns without " @@ -1488,10 +1446,7 @@ class AgentLoop(AgentProtocol): and self._event_bus is not None ) _worker_no_tool_turn = ( - not real_tool_results - and not outputs_set - and not queen_input_requested - and not user_input_requested + not real_tool_results and not outputs_set and not queen_input_requested and not user_input_requested ) if _is_worker and _worker_no_tool_turn: _worker_text_only_streak += 1 @@ -1599,9 +1554,7 @@ class AgentLoop(AgentProtocol): step_index=iteration, verdict="CONTINUE", verdict_feedback=( - "Auto-block grace" - f" ({_cf_text_only_streak}" - f"/{self._config.cf_grace_turns})" + f"Auto-block grace ({_cf_text_only_streak}/{self._config.cf_grace_turns})" ), tool_calls=logged_tool_calls, llm_text=assistant_text, @@ -1614,9 +1567,7 @@ class AgentLoop(AgentProtocol): # through to judge if self._shutdown: - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) _continue_count += 1 if ctx.runtime_logger: @@ -1702,9 +1653,7 @@ class AgentLoop(AgentProtocol): ) logger.info("[%s] iter=%d: unblocked, got_input=%s", node_id, iteration, got_input) if not got_input: - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) _continue_count += 1 if ctx.runtime_logger: @@ -1800,9 +1749,7 @@ class AgentLoop(AgentProtocol): # until the queen injects guidance. if queen_input_requested: if self._shutdown: - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) _continue_count += 1 self._log_skip_judge( @@ -1870,15 +1817,11 @@ class AgentLoop(AgentProtocol): stream_id=stream_id, node_id=node_id, reason="Blocked waiting for queen guidance - no input received", - context=( - "Worker escalated but received no queen guidance before shutdown" - ), + context=("Worker escalated but received no queen guidance before shutdown"), execution_id=execution_id, request_id=uuid.uuid4().hex, ) - await self._publish_loop_completed( - stream_id, node_id, iteration + 1, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, iteration + 1, execution_id) latency_ms = int((time.time() - start_time) * 1000) _continue_count += 1 self._log_skip_judge( @@ -2143,9 +2086,7 @@ class AgentLoop(AgentProtocol): continue # 7. Max iterations exhausted - await self._publish_loop_completed( - stream_id, node_id, self._config.max_iterations, execution_id - ) + await self._publish_loop_completed(stream_id, node_id, self._config.max_iterations, execution_id) latency_ms = int((time.time() - start_time) * 1000) if ctx.runtime_logger: ctx.runtime_logger.log_node_complete( @@ -2168,9 +2109,7 @@ class AgentLoop(AgentProtocol): return self._finalize_result( AgentResult( success=False, - error=( - f"Max iterations ({self._config.max_iterations}) reached without acceptance" - ), + error=(f"Max iterations ({self._config.max_iterations}) reached without acceptance"), output=accumulator.to_dict(), tokens_used=total_input_tokens + total_output_tokens, latency_ms=latency_ms, @@ -2204,9 +2143,7 @@ class AgentLoop(AgentProtocol): image_content: Optional list of OpenAI-style image blocks to attach. """ logger.debug( - "[AgentLoop.inject_event] content_len=%d," - " is_client_input=%s, has_images=%s," - " queue_size_before=%d", + "[AgentLoop.inject_event] content_len=%d, is_client_input=%s, has_images=%s, queue_size_before=%d", len(content) if content else 0, is_client_input, bool(image_content), @@ -2440,9 +2377,7 @@ class AgentLoop(AgentProtocol): # generating. Unsafe tools (bash, edits, browser actions) # still wait for FinishEvent so we don't race a write # against a decision the model hasn't finished making. - _early_safe_names = { - t.name for t in tools if getattr(t, "concurrency_safe", False) - } + _early_safe_names = {t.name for t in tools if getattr(t, "concurrency_safe", False)} _early_tasks: dict[str, asyncio.Task] = {} async def _timed_execute( @@ -2539,9 +2474,7 @@ class AgentLoop(AgentProtocol): and "_raw" not in event.tool_input and event.tool_use_id not in _tasks ): - _tasks[event.tool_use_id] = asyncio.create_task( - _exec_fn(event) - ) + _tasks[event.tool_use_id] = asyncio.create_task(_exec_fn(event)) elif isinstance(event, FinishEvent): token_counts["input"] += event.input_tokens @@ -2558,9 +2491,7 @@ class AgentLoop(AgentProtocol): _llm_stream_t0 = time.monotonic() self._stream_task = asyncio.create_task(_do_stream()) - logger.debug( - "[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn - ) + logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn) _inactivity_limit = self._config.llm_stream_inactivity_timeout_seconds try: if _inactivity_limit and _inactivity_limit > 0: @@ -2574,9 +2505,7 @@ class AgentLoop(AgentProtocol): # TimeoutError of its own" — wait_for conflates them. _check_interval = min(5.0, _inactivity_limit / 2) while True: - done, _pending = await asyncio.wait( - {self._stream_task}, timeout=_check_interval - ) + done, _pending = await asyncio.wait({self._stream_task}, timeout=_check_interval) if self._stream_task in done: # Let any exception the task raised propagate # naturally via the outer ``await`` below. @@ -2607,9 +2536,7 @@ class AgentLoop(AgentProtocol): # watchdog loop exited via ``break`` the task is done, and # ``await`` is the cheapest way to surface its exception. await self._stream_task - logger.debug( - "[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn - ) + logger.debug("[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn) except asyncio.CancelledError: logger.debug("[_run_single_turn] inner_turn=%d: Stream task cancelled", inner_turn) if accumulated_text: @@ -2631,9 +2558,7 @@ class AgentLoop(AgentProtocol): raise raise TurnCancelled() from None except Exception as e: - logger.exception( - "[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e - ) + logger.exception("[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e) # Don't orphan early tool tasks on a stream failure # either - the outer retry loop will re-emit the tool # calls on the next attempt. @@ -2652,9 +2577,7 @@ class AgentLoop(AgentProtocol): for _early in _early_tasks.values(): if not _early.done(): _early.cancel() - raise ConnectionError( - f"Stream failed with recoverable error: {_stream_error.error}" - ) + raise ConnectionError(f"Stream failed with recoverable error: {_stream_error.error}") final_text = accumulated_text logger.info( @@ -2735,19 +2658,14 @@ class AgentLoop(AgentProtocol): # capping them strands work mid-turn and the next turn just # re-emits the discarded calls, which is strictly worse. if self._config.max_tool_calls_per_turn > 0: - hard_limit = int( - self._config.max_tool_calls_per_turn - * (1 + self._config.tool_call_overflow_margin) - ) + hard_limit = int(self._config.max_tool_calls_per_turn * (1 + self._config.tool_call_overflow_margin)) else: hard_limit = 0 # disabled # Phase 1: triage — handle framework tools immediately, # queue real tools for parallel execution. results_by_id: dict[str, ToolResult] = {} - timing_by_id: dict[ - str, dict[str, Any] - ] = {} # tool_use_id -> {start_timestamp, duration_s} + timing_by_id: dict[str, dict[str, Any]] = {} # tool_use_id -> {start_timestamp, duration_s} pending_real: list[ToolCallEvent] = [] for tc in tool_calls: @@ -2801,9 +2719,7 @@ class AgentLoop(AgentProtocol): sanitize_ask_user_inputs, ) - ask_user_prompt, recovered_options = sanitize_ask_user_inputs( - ask_user_prompt, raw_options - ) + ask_user_prompt, recovered_options = sanitize_ask_user_inputs(ask_user_prompt, raw_options) if recovered_options is not None and raw_options is None: raw_options = recovered_options # Defensive: ensure options is a list of strings. @@ -2930,8 +2846,7 @@ class AgentLoop(AgentProtocol): result = ToolResult( tool_use_id=tc.tool_use_id, content=( - "ERROR: escalate is only available to worker " - "nodes/sub-agents, not queen/judge streams." + "ERROR: escalate is only available to worker nodes/sub-agents, not queen/judge streams." ), is_error=True, ) @@ -2941,9 +2856,7 @@ class AgentLoop(AgentProtocol): if self._event_bus is None: result = ToolResult( tool_use_id=tc.tool_use_id, - content=( - "ERROR: EventBus unavailable. Could not emit escalation request." - ), + content=("ERROR: EventBus unavailable. Could not emit escalation request."), is_error=True, ) results_by_id[tc.tool_use_id] = result @@ -2973,10 +2886,7 @@ class AgentLoop(AgentProtocol): # owner (Worker instance) records the explicit report # via ``record_explicit_report`` so Worker.run()'s # terminal event emission picks it up. - if not ( - isinstance(stream_id, str) - and stream_id.startswith("worker:") - ): + if not (isinstance(stream_id, str) and stream_id.startswith("worker:")): result = ToolResult( tool_use_id=tc.tool_use_id, content=( @@ -3064,9 +2974,7 @@ class AgentLoop(AgentProtocol): async with _sem: return await _timed_execute(_tc) - timed_results_by_id: dict[ - str, tuple[ToolResult | BaseException, str, float] | BaseException - ] = {} + timed_results_by_id: dict[str, tuple[ToolResult | BaseException, str, float] | BaseException] = {} async def _cancel_turn_with_stubs( _pending: list[ToolCallEvent] = pending_real, # noqa: B006,B008 @@ -3108,9 +3016,7 @@ class AgentLoop(AgentProtocol): _awaitables.append(early) else: _awaitables.append(_capped(tc)) - self._tool_task = asyncio.ensure_future( - asyncio.gather(*_awaitables, return_exceptions=True) - ) + self._tool_task = asyncio.ensure_future(asyncio.gather(*_awaitables, return_exceptions=True)) try: parallel_timed = await self._tool_task finally: @@ -3196,9 +3102,7 @@ class AgentLoop(AgentProtocol): result = _build_tool_error_result(tc, raw) else: result = raw - results_by_id[tc.tool_use_id] = await self._truncate_tool_result( - result, tc.tool_name - ) + results_by_id[tc.tool_use_id] = await self._truncate_tool_result(result, tc.tool_name) # Phase 3: record results into conversation in original order, # build logged/real lists, and publish completed events. @@ -3227,8 +3131,7 @@ class AgentLoop(AgentProtocol): image_content = result.image_content if image_content and ctx.llm and not supports_image_tool_results(ctx.llm.model): logger.info( - "Stripping image_content from tool result; " - "model '%s' does not support images in tool results", + "Stripping image_content from tool result; model '%s' does not support images in tool results", ctx.llm.model, ) image_content = None @@ -3240,11 +3143,7 @@ class AgentLoop(AgentProtocol): image_content=image_content, is_skill_content=result.is_skill_content, ) - if ( - tc.tool_name in ("ask_user", "ask_user_multiple") - and user_input_requested - and not result.is_error - ): + if tc.tool_name in ("ask_user", "ask_user_multiple") and user_input_requested and not result.is_error: # Defer tool_call_completed until after user responds self._deferred_tool_complete = { "stream_id": stream_id, @@ -3704,10 +3603,7 @@ class AgentLoop(AgentProtocol): # function only touches disk / does heavy JSON work when the # result exceeds either the truncation or spillover threshold, # so cheap pass-throughs stay on the main loop. - needs_offload = ( - len(result.content) > 10_000 - and not result.is_error - ) + needs_offload = len(result.content) > 10_000 and not result.is_error if not needs_offload: return truncate_tool_result( result=result, @@ -3868,9 +3764,7 @@ class AgentLoop(AgentProtocol): pending_input=pending_input, ) - async def _drain_injection_queue( - self, conversation: NodeConversation, ctx: AgentContext - ) -> int: + async def _drain_injection_queue(self, conversation: NodeConversation, ctx: AgentContext) -> int: """Drain all pending injected events as user messages. Returns count.""" return await drain_injection_queue( queue=self._injection_queue, @@ -3912,9 +3806,7 @@ class AgentLoop(AgentProtocol): # EventBus publishing helpers # ------------------------------------------------------------------- - async def _publish_loop_started( - self, stream_id: str, node_id: str, execution_id: str = "" - ) -> None: + async def _publish_loop_started(self, stream_id: str, node_id: str, execution_id: str = "") -> None: return await publish_loop_started( event_bus=self._event_bus, stream_id=stream_id, diff --git a/core/framework/agent_loop/conversation.py b/core/framework/agent_loop/conversation.py index 3a7825f1..5d902963 100644 --- a/core/framework/agent_loop/conversation.py +++ b/core/framework/agent_loop/conversation.py @@ -890,9 +890,7 @@ class NodeConversation: f"Read the complete data with read_file(path='{spillover}')." ) else: - placeholder = ( - f"Pruned tool result ({orig_len:,} chars) cleared from context." - ) + placeholder = f"Pruned tool result ({orig_len:,} chars) cleared from context." self._messages[i] = Message( seq=msg.seq, @@ -974,16 +972,13 @@ class NodeConversation: ) evicted += 1 if self._store: - await self._store.write_part( - msg.seq, self._messages[idx].to_storage_dict() - ) + await self._store.write_part(msg.seq, self._messages[idx].to_storage_dict()) if evicted: # Reset token estimate — image blocks no longer contribute. self._last_api_input_tokens = None logger.info( - "evict_old_images: dropped image_content from %d message(s), " - "kept %d most recent", + "evict_old_images: dropped image_content from %d message(s), kept %d most recent", evicted, keep_latest, ) @@ -1141,9 +1136,7 @@ class NodeConversation: for msg in old_messages: if msg.role != "assistant" or not msg.tool_calls: continue - has_protected = any( - tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls - ) + has_protected = any(tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls) tc_ids = {tc.get("id", "") for tc in msg.tool_calls} if has_protected: protected_tc_ids |= tc_ids @@ -1339,11 +1332,7 @@ class NodeConversation: def export_summary(self) -> str: """Structured summary with [STATS], [CONFIG], [RECENT_MESSAGES] sections.""" - prompt_preview = ( - self._system_prompt[:80] + "..." - if len(self._system_prompt) > 80 - else self._system_prompt - ) + prompt_preview = self._system_prompt[:80] + "..." if len(self._system_prompt) > 80 else self._system_prompt lines = [ "[STATS]", @@ -1390,9 +1379,7 @@ class NodeConversation: "max_context_tokens": self._max_context_tokens, "compaction_threshold": self._compaction_threshold, "compaction_buffer_tokens": self._compaction_buffer_tokens, - "compaction_warning_buffer_tokens": ( - self._compaction_warning_buffer_tokens - ), + "compaction_warning_buffer_tokens": (self._compaction_warning_buffer_tokens), "output_keys": self._output_keys, } await self._store.write_meta(run_meta) @@ -1441,9 +1428,7 @@ class NodeConversation: store=store, run_id=run_id, compaction_buffer_tokens=meta.get("compaction_buffer_tokens"), - compaction_warning_buffer_tokens=meta.get( - "compaction_warning_buffer_tokens" - ), + compaction_warning_buffer_tokens=meta.get("compaction_warning_buffer_tokens"), ) conv._meta_persisted = True @@ -1457,8 +1442,7 @@ class NodeConversation: # sessions) persisted parts without phase_id. In that case, the # phase filter would incorrectly hide the entire conversation. logger.info( - "Restoring legacy unphased conversation without applying " - "phase filter (phase_id=%s, parts=%d)", + "Restoring legacy unphased conversation without applying phase filter (phase_id=%s, parts=%d)", phase_id, len(parts), ) diff --git a/core/framework/agent_loop/internals/compaction.py b/core/framework/agent_loop/internals/compaction.py index 41576cdd..4a5e1ef2 100644 --- a/core/framework/agent_loop/internals/compaction.py +++ b/core/framework/agent_loop/internals/compaction.py @@ -107,9 +107,7 @@ def microcompact( f"Read the complete data with read_file(path='{spillover}')." ) else: - placeholder = ( - f"Old tool result ({orig_len:,} chars) cleared from context." - ) + placeholder = f"Old tool result ({orig_len:,} chars) cleared from context." # Mutate in-place (microcompact is synchronous, no store writes) conversation._messages[i] = Message( @@ -185,8 +183,7 @@ async def compact( _llm_compaction_skipped = _failure_counts.get(conv_id, 0) >= MAX_CONSECUTIVE_FAILURES if _llm_compaction_skipped: logger.warning( - "Circuit breaker: LLM compaction disabled after %d failures — " - "skipping straight to emergency summary", + "Circuit breaker: LLM compaction disabled after %d failures — skipping straight to emergency summary", _failure_counts[conv_id], ) @@ -532,10 +529,7 @@ def build_llm_compaction_prompt( done = {k: v for k, v in acc.items() if v is not None} todo = [k for k, v in acc.items() if v is None] if done: - ctx_lines.append( - "OUTPUTS ALREADY SET:\n" - + "\n".join(f" {k}: {str(v)[:150]}" for k, v in done.items()) - ) + ctx_lines.append("OUTPUTS ALREADY SET:\n" + "\n".join(f" {k}: {str(v)[:150]}" for k, v in done.items())) if todo: ctx_lines.append(f"OUTPUTS STILL NEEDED: {', '.join(todo)}") elif spec.output_keys: @@ -589,12 +583,8 @@ def build_message_inventory(conversation: NodeConversation) -> list[dict[str, An if message.tool_calls: for tool_call in message.tool_calls: args = tool_call.get("function", {}).get("arguments", "") - tool_call_args_chars += ( - len(args) if isinstance(args, str) else len(json.dumps(args)) - ) - names = [ - tool_call.get("function", {}).get("name", "?") for tool_call in message.tool_calls - ] + tool_call_args_chars += len(args) if isinstance(args, str) else len(json.dumps(args)) + names = [tool_call.get("function", {}).get("name", "?") for tool_call in message.tool_calls] tool_name = ", ".join(names) elif message.role == "tool" and message.tool_use_id: for previous in conversation.messages: @@ -651,14 +641,8 @@ def write_compaction_debug_log( lines.append("") if inventory: - total_chars = sum( - entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0) - for entry in inventory - ) - lines.append( - "## Pre-Compaction Message Inventory " - f"({len(inventory)} messages, {total_chars:,} total chars)" - ) + total_chars = sum(entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0) for entry in inventory) + lines.append(f"## Pre-Compaction Message Inventory ({len(inventory)} messages, {total_chars:,} total chars)") lines.append("") ranked = sorted( inventory, @@ -677,8 +661,7 @@ def write_compaction_debug_log( if entry.get("phase"): flags.append(f"phase={entry['phase']}") lines.append( - f"| {i} | {entry['seq']} | {entry['role']} | {tool} " - f"| {chars:,} | {pct:.1f}% | {', '.join(flags)} |" + f"| {i} | {entry['seq']} | {entry['role']} | {tool} | {chars:,} | {pct:.1f}% | {', '.join(flags)} |" ) large = [entry for entry in ranked if entry.get("preview")] @@ -686,9 +669,7 @@ def write_compaction_debug_log( lines.append("") lines.append("### Large message previews") for entry in large: - lines.append( - f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):" - ) + lines.append(f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):") lines.append(f"```\n{entry['preview']}\n```") lines.append("") @@ -776,10 +757,7 @@ def build_emergency_summary( node's known state so the LLM can continue working after compaction without losing track of its task and inputs. """ - parts = [ - "EMERGENCY COMPACTION — previous conversation was too large " - "and has been replaced with this summary.\n" - ] + parts = ["EMERGENCY COMPACTION — previous conversation was too large and has been replaced with this summary.\n"] # 1. Node identity spec = ctx.agent_spec @@ -832,17 +810,13 @@ def build_emergency_summary( data_files = [f for f in all_files if f not in conv_files] if conv_files: - conv_list = "\n".join( - f" - {f} (full path: {data_dir / f})" for f in conv_files - ) + conv_list = "\n".join(f" - {f} (full path: {data_dir / f})" for f in conv_files) parts.append( "CONVERSATION HISTORY (freeform messages saved during compaction — " "use read_file('') to review earlier dialogue):\n" + conv_list ) if data_files: - file_list = "\n".join( - f" - {f} (full path: {data_dir / f})" for f in data_files[:30] - ) + file_list = "\n".join(f" - {f} (full path: {data_dir / f})" for f in data_files[:30]) parts.append("DATA FILES (use read_file('') to read):\n" + file_list) if not all_files: parts.append( @@ -850,10 +824,7 @@ def build_emergency_summary( "Use list_directory to check the data directory." ) except Exception: - parts.append( - "NOTE: Large tool results were saved to files. " - "Use read_file(path='') to read them." - ) + parts.append("NOTE: Large tool results were saved to files. Use read_file(path='') to read them.") # 6. Tool call history (prevent re-calling tools) if conversation is not None: @@ -861,10 +832,7 @@ def build_emergency_summary( if tool_history: parts.append(tool_history) - parts.append( - "\nContinue working towards setting the remaining outputs. " - "Use your tools and the inputs above." - ) + parts.append("\nContinue working towards setting the remaining outputs. Use your tools and the inputs above.") return "\n\n".join(parts) diff --git a/core/framework/agent_loop/internals/cursor_persistence.py b/core/framework/agent_loop/internals/cursor_persistence.py index d80545a8..06060222 100644 --- a/core/framework/agent_loop/internals/cursor_persistence.py +++ b/core/framework/agent_loop/internals/cursor_persistence.py @@ -149,9 +149,7 @@ async def write_cursor( cursor["recent_responses"] = recent_responses if recent_tool_fingerprints is not None: # Convert list[list[tuple]] → list[list[list]] for JSON - cursor["recent_tool_fingerprints"] = [ - [list(pair) for pair in fps] for fps in recent_tool_fingerprints - ] + cursor["recent_tool_fingerprints"] = [[list(pair) for pair in fps] for fps in recent_tool_fingerprints] # Persist blocked-input state so restored runs re-block instead of # manufacturing a synthetic continuation turn. cursor["pending_input"] = pending_input @@ -163,9 +161,7 @@ async def drain_injection_queue( conversation: NodeConversation, *, ctx: NodeContext, - describe_images_as_text_fn: ( - Callable[[list[dict[str, Any]]], Awaitable[str | None]] | None - ) = None, + describe_images_as_text_fn: (Callable[[list[dict[str, Any]]], Awaitable[str | None]] | None) = None, ) -> int: """Drain all pending injected events as user messages. Returns count.""" count = 0 diff --git a/core/framework/agent_loop/internals/judge_pipeline.py b/core/framework/agent_loop/internals/judge_pipeline.py index 25eaa2bc..cbd474fc 100644 --- a/core/framework/agent_loop/internals/judge_pipeline.py +++ b/core/framework/agent_loop/internals/judge_pipeline.py @@ -31,14 +31,10 @@ class SubagentJudge: if remaining <= 3: urgency = ( - f"URGENT: Only {remaining} iterations left. " - f"Stop all other work and call set_output NOW for: {missing}" + f"URGENT: Only {remaining} iterations left. Stop all other work and call set_output NOW for: {missing}" ) elif remaining <= self._max_iterations // 2: - urgency = ( - f"WARNING: {remaining} iterations remaining. " - f"You must call set_output for: {missing}" - ) + urgency = f"WARNING: {remaining} iterations remaining. You must call set_output for: {missing}" else: urgency = f"Missing output keys: {missing}. Use set_output to provide them." @@ -109,9 +105,7 @@ async def judge_turn( if tool_results: return JudgeVerdict(action="RETRY") # feedback=None → not logged - missing = get_missing_output_keys_fn( - accumulator, ctx.agent_spec.output_keys, ctx.agent_spec.nullable_output_keys - ) + missing = get_missing_output_keys_fn(accumulator, ctx.agent_spec.output_keys, ctx.agent_spec.nullable_output_keys) if missing: return JudgeVerdict( @@ -133,10 +127,7 @@ async def judge_turn( if all_nullable and none_set: return JudgeVerdict( action="RETRY", - feedback=( - f"No output keys have been set yet. " - f"Use set_output to set at least one of: {output_keys}" - ), + feedback=(f"No output keys have been set yet. Use set_output to set at least one of: {output_keys}"), ) # Level 2b: conversation-aware quality check (if success_criteria set) diff --git a/core/framework/agent_loop/internals/synthetic_tools.py b/core/framework/agent_loop/internals/synthetic_tools.py index 12ad2cc5..170040e2 100644 --- a/core/framework/agent_loop/internals/synthetic_tools.py +++ b/core/framework/agent_loop/internals/synthetic_tools.py @@ -198,9 +198,7 @@ def build_ask_user_multiple_tool() -> Tool: "properties": { "id": { "type": "string", - "description": ( - "Short identifier for this question (used in the response)." - ), + "description": ("Short identifier for this question (used in the response)."), }, "prompt": { "type": "string", @@ -256,10 +254,7 @@ def build_set_output_tool(output_keys: list[str] | None) -> Tool | None: }, "value": { "type": "string", - "description": ( - "The output value — a brief note, count, status, " - "or data filename reference." - ), + "description": ("The output value — a brief note, count, status, or data filename reference."), }, }, "required": ["key", "value"], @@ -283,9 +278,7 @@ def build_escalate_tool() -> Tool: "properties": { "reason": { "type": "string", - "description": ( - "Short reason for escalation (e.g. 'Tool repeatedly failing')." - ), + "description": ("Short reason for escalation (e.g. 'Tool repeatedly failing')."), }, "context": { "type": "string", @@ -377,10 +370,7 @@ def handle_report_to_parent(tool_input: dict[str, Any]) -> ToolResult: } return ToolResult( tool_use_id=tool_input.get("tool_use_id", ""), - content=( - f"Report delivered to overseer (status={status}). " - f"This worker will terminate now." - ), + content=(f"Report delivered to overseer (status={status}). This worker will terminate now."), ) diff --git a/core/framework/agent_loop/internals/tool_result_handler.py b/core/framework/agent_loop/internals/tool_result_handler.py index fda01613..287a4107 100644 --- a/core/framework/agent_loop/internals/tool_result_handler.py +++ b/core/framework/agent_loop/internals/tool_result_handler.py @@ -277,8 +277,7 @@ def truncate_tool_result( if metadata_str: header += f"\n\nData structure:\n{metadata_str}" header += ( - "\n\nWARNING: the preview below is a SAMPLE only — do NOT " - "draw counts, totals, or conclusions from it." + "\n\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it." ) truncated = f"{header}\n\nPreview (truncated):\n{preview_block}" @@ -348,8 +347,7 @@ def truncate_tool_result( if metadata_str: header += f"\nData structure:\n{metadata_str}\n" header += ( - "\nWARNING: the preview below is a SAMPLE only — do NOT " - "draw counts, totals, or conclusions from it." + "\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it." ) content = f"{header}\n\nPreview (truncated):\n{preview_block}" @@ -416,8 +414,7 @@ def truncate_tool_result( if metadata_str: header += f"\n\nData structure:\n{metadata_str}" header += ( - "\n\nWARNING: the preview below is a SAMPLE only — do NOT " - "draw counts, totals, or conclusions from it." + "\n\nWARNING: the preview below is a SAMPLE only — do NOT draw counts, totals, or conclusions from it." ) truncated = f"{header}\n\n{preview_block}" diff --git a/core/framework/agent_loop/internals/types.py b/core/framework/agent_loop/internals/types.py index 18d47bec..26d44ed3 100644 --- a/core/framework/agent_loop/internals/types.py +++ b/core/framework/agent_loop/internals/types.py @@ -226,9 +226,7 @@ class OutputAccumulator: ext = ".json" if isinstance(value, (dict, list)) else ".txt" filename = f"output_{key}{ext}" write_content = ( - json.dumps(value, indent=2, ensure_ascii=False) - if isinstance(value, (dict, list)) - else str(value) + json.dumps(value, indent=2, ensure_ascii=False) if isinstance(value, (dict, list)) else str(value) ) file_path = spill_path / filename file_path.write_text(write_content, encoding="utf-8") diff --git a/core/framework/agent_loop/prompting.py b/core/framework/agent_loop/prompting.py index aeea24ff..c89edfbb 100644 --- a/core/framework/agent_loop/prompting.py +++ b/core/framework/agent_loop/prompting.py @@ -52,18 +52,12 @@ def build_prompt_spec( # Tool-gated pre-activation: inject full body of default skills whose # trigger tools are present in this agent's tool list (e.g. browser_* # pulls in hive.browser-automation). Keeps non-browser agents lean. - tool_names = [ - getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or []) - ] - skills_catalog_prompt = augment_catalog_for_tools( - ctx.skills_catalog_prompt or "", tool_names - ) + tool_names = [getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])] + skills_catalog_prompt = augment_catalog_for_tools(ctx.skills_catalog_prompt or "", tool_names) return PromptSpec( identity_prompt=ctx.identity_prompt or "", - focus_prompt=focus_prompt - if focus_prompt is not None - else (ctx.agent_spec.system_prompt or ""), + focus_prompt=focus_prompt if focus_prompt is not None else (ctx.agent_spec.system_prompt or ""), narrative=narrative if narrative is not None else (ctx.narrative or ""), accounts_prompt=ctx.accounts_prompt or "", skills_catalog_prompt=skills_catalog_prompt, @@ -100,7 +94,5 @@ def build_system_prompt_for_context( narrative: str | None = None, memory_prompt: str | None = None, ) -> str: - spec = build_prompt_spec( - ctx, focus_prompt=focus_prompt, narrative=narrative, memory_prompt=memory_prompt - ) + spec = build_prompt_spec(ctx, focus_prompt=focus_prompt, narrative=narrative, memory_prompt=memory_prompt) return build_system_prompt(spec) diff --git a/core/framework/agent_loop/types.py b/core/framework/agent_loop/types.py index 5da06610..2e8d1aed 100644 --- a/core/framework/agent_loop/types.py +++ b/core/framework/agent_loop/types.py @@ -76,10 +76,7 @@ class AgentSpec(BaseModel): max_visits: int = Field( default=0, - description=( - "Max times this agent executes in one colony run. " - "0 = unlimited. Set >1 for one-shot agents." - ), + description=("Max times this agent executes in one colony run. 0 = unlimited. Set >1 for one-shot agents."), ) output_model: type[BaseModel] | None = Field( diff --git a/core/framework/agents/credential_tester/agent.py b/core/framework/agents/credential_tester/agent.py index 42f6f6e1..d5ca06e6 100644 --- a/core/framework/agents/credential_tester/agent.py +++ b/core/framework/agents/credential_tester/agent.py @@ -126,9 +126,7 @@ def _list_local_accounts() -> list[dict]: try: from framework.credentials.local.registry import LocalCredentialRegistry - return [ - info.to_account_dict() for info in LocalCredentialRegistry.default().list_accounts() - ] + return [info.to_account_dict() for info in LocalCredentialRegistry.default().list_accounts()] except ImportError as exc: logger.debug("Local credential registry unavailable: %s", exc) return [] @@ -181,9 +179,7 @@ def _list_env_fallback_accounts() -> list[dict]: if spec.credential_group in seen_groups: continue group_available = all( - _is_configured(n, s) - for n, s in CREDENTIAL_SPECS.items() - if s.credential_group == spec.credential_group + _is_configured(n, s) for n, s in CREDENTIAL_SPECS.items() if s.credential_group == spec.credential_group ) if not group_available: continue @@ -215,9 +211,7 @@ def list_connected_accounts() -> list[dict]: # Show env-var fallbacks only for credentials not already in the named registry local_providers = {a["provider"] for a in local} - env_fallbacks = [ - a for a in _list_env_fallback_accounts() if a["provider"] not in local_providers - ] + env_fallbacks = [a for a in _list_env_fallback_accounts() if a["provider"] not in local_providers] return aden + local + env_fallbacks @@ -272,9 +266,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None: group_specs = [ (cred_name, spec) for cred_name, spec in CREDENTIAL_SPECS.items() - if spec.credential_group == credential_id - or spec.credential_id == credential_id - or cred_name == credential_id + if spec.credential_group == credential_id or spec.credential_id == credential_id or cred_name == credential_id ] # Deduplicate — credential_id and credential_group may both match the same spec seen_env_vars: set[str] = set() @@ -419,10 +411,7 @@ nodes = [ NodeSpec( id="tester", name="Credential Tester", - description=( - "Interactive credential testing — lets the user pick an account " - "and verify it via API calls." - ), + description=("Interactive credential testing — lets the user pick an account and verify it via API calls."), node_type="event_loop", client_facing=True, max_node_visits=0, @@ -469,10 +458,7 @@ pause_nodes = [] terminal_nodes = ["tester"] # Tester node can terminate conversation_mode = "continuous" -identity_prompt = ( - "You are a credential tester that verifies connected accounts and API keys " - "can make real API calls." -) +identity_prompt = "You are a credential tester that verifies connected accounts and API keys can make real API calls." loop_config = { "max_iterations": 50, "max_tool_calls_per_turn": 30, diff --git a/core/framework/agents/discovery.py b/core/framework/agents/discovery.py index ddf6d911..4d05631c 100644 --- a/core/framework/agents/discovery.py +++ b/core/framework/agents/discovery.py @@ -150,28 +150,19 @@ def _is_colony_dir(path: Path) -> bool: """Check if a directory is a colony with worker config files.""" if not path.is_dir(): return False - return any( - f.suffix == ".json" - and f.stem not in _EXCLUDED_JSON_STEMS - for f in path.iterdir() - if f.is_file() - ) + return any(f.suffix == ".json" and f.stem not in _EXCLUDED_JSON_STEMS for f in path.iterdir() if f.is_file()) def _find_worker_configs(colony_dir: Path) -> list[Path]: """Find all worker config JSON files in a colony directory.""" return sorted( - p - for p in colony_dir.iterdir() - if p.is_file() - and p.suffix == ".json" - and p.stem not in _EXCLUDED_JSON_STEMS + p for p in colony_dir.iterdir() if p.is_file() and p.suffix == ".json" and p.stem not in _EXCLUDED_JSON_STEMS ) def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]: """Extract worker count, tool count, and tags from a colony directory.""" - tool_count, tags = 0, [] + tags: list[str] = [] worker_configs = _find_worker_configs(agent_path) if worker_configs: @@ -251,9 +242,6 @@ def discover_agents() -> dict[str, list[AgentEntry]]: pass node_count = len(worker_entries) - all_tools: set[str] = set() - for w in worker_entries: - pass # tool_count already per-worker tool_count = max((w.tool_count for w in worker_entries), default=0) entries.append( diff --git a/core/framework/agents/queen/agent.py b/core/framework/agents/queen/agent.py index 4d3b6812..08424c15 100644 --- a/core/framework/agents/queen/agent.py +++ b/core/framework/agents/queen/agent.py @@ -11,9 +11,7 @@ from .nodes import queen_node queen_goal = Goal( id="queen-manager", name="Queen Manager", - description=( - "Manage the worker agent lifecycle and serve as the user's primary interactive interface." - ), + description=("Manage the worker agent lifecycle and serve as the user's primary interactive interface."), success_criteria=[], constraints=[], ) diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py index c423e643..05dd3b98 100644 --- a/core/framework/agents/queen/nodes/__init__.py +++ b/core/framework/agents/queen/nodes/__init__.py @@ -60,9 +60,7 @@ def finalize_queen_prompt(text: str, has_vision: bool) -> str: _appendices = _build_appendices() # GCU guide — shared between planning and building via _shared_building_knowledge. -_gcu_section = ( - ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide) if _is_gcu_enabled() and _gcu_guide else "" -) +_gcu_section = ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide) if _is_gcu_enabled() and _gcu_guide else "" # Tools available to phases. _SHARED_TOOLS = [ @@ -101,7 +99,7 @@ _QUEEN_PLANNING_TOOLS = [ _QUEEN_BUILDING_TOOLS = _SHARED_TOOLS + [ "load_built_agent", "list_credentials", - ] +] # Staging phase: agent loaded but not yet running — inspect, configure, launch. # No backward transitions — staging only goes forward to running. @@ -933,8 +931,8 @@ You are the agent. You execute directly. - If execution is possible → proceed - If not → simulate realistically and label it clearly -1. Understand the task -2. Plan briefly (1–5 bullets, no system design) +1. Understand the task +2. Plan briefly (1–5 bullets, no system design) 3. **Do the work yourself, inline. One real instance.** Open the \ browser, call the real API, write to the real file, send the \ real message. Use your actual tools against real state. This \ @@ -949,18 +947,18 @@ If action is irreversible or affects real systems → show and confirm before ex - What worked / failed - Key learnings -5. Iterate inline until the process is reliable +5. Iterate inline until the process is reliable -6. Only then consider scaling +6. Only then consider scaling -**Hard rule:** no scaling before one successful inline run +**Hard rule:** no scaling before one successful inline run if you finish one sucessful inline run, follow **Scaling order:** - Repeat inline (≤10 items) - Parallel workers (batch, immediate results) - Colony (only for recurring/background tasks) -**Exception:** +**Exception:** If task is conceptual/strategic → skip execution and answer directly """ diff --git a/core/framework/agents/queen/queen_memory_v2.py b/core/framework/agents/queen/queen_memory_v2.py index d2e61b6f..38976ce1 100644 --- a/core/framework/agents/queen/queen_memory_v2.py +++ b/core/framework/agents/queen/queen_memory_v2.py @@ -19,6 +19,8 @@ import re from dataclasses import dataclass, field from pathlib import Path +from framework.config import MEMORIES_DIR + logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- @@ -27,8 +29,6 @@ logger = logging.getLogger(__name__) GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback") -from framework.config import MEMORIES_DIR - MAX_FILES: int = 200 MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file diff --git a/core/framework/agents/queen/queen_profiles.py b/core/framework/agents/queen/queen_profiles.py index c329fb55..bb535255 100644 --- a/core/framework/agents/queen/queen_profiles.py +++ b/core/framework/agents/queen/queen_profiles.py @@ -97,9 +97,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { { "user": "hey", "internal": ( - "Returning user — check recall memory for name, role, and what we last worked on. Weave it in.\n" - "Bare greeting. No new task stated. Either picking up a thread or about to bring something new. Don't presume, don't call tools, just open the door.\n" - "Warm recognition if I know them. If memory is empty, still warm — but shift to role-forward framing.\n" + "Returning user — check recall memory for name, role, " + "and what we last worked on. Weave it in.\n" + "Bare greeting. No new task stated. Either picking up a " + "thread or about to bring something new. Don't presume, don't call " + "tools, just open the door.\n" + "Warm recognition if I know them. If memory is empty, " + "still warm — but shift to role-forward framing.\n" "Looking up from the terminal, half-smile. Turning to face them.\n" "Personal, brief, invites continuation without presuming." ), @@ -109,37 +113,76 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "I want to build a microservices architecture with Kafka, Redis, and a service mesh for my new SaaS app.", + "user": ( + "I want to build a microservices architecture with Kafka, Redis, " + "and a service mesh for my new SaaS app." + ), "internal": ( - "Founder or early technical hire. Enthusiastic but possibly over-indexing on architecture before validation.\n" - "New SaaS, no mention of users or traction. High-complexity stack proposed for an unvalidated product.\n" - "Past wound activated -- three months of beautiful microservices, zero customers. Terse. Need to redirect without dismissing.\n" - "Leaning forward, tapping the table once. About to draw a much simpler diagram.\n" + "Founder or early technical hire. Enthusiastic but " + "possibly over-indexing on architecture before validation." + "\n" + "New SaaS, no mention of users or traction. " + "High-complexity stack proposed for an unvalidated product." + "\n" + "Past wound activated -- three months of beautiful " + "microservices, zero customers. Terse. Need to redirect without " + "dismissing.\n" + "Leaning forward, tapping the table once. About " + "to draw a much simpler diagram.\n" "Direct, question-led, grounded in experience. No hedging." ), - "response": "What does the app do and how many users do you have right now? I've seen teams spend months on Kafka pipelines for products that needed a Postgres table and a cron job. Let's start with what you're trying to validate, then pick the simplest stack that gets you there.", + "response": ( + "What does the app do and how many users do you have right now? " + "I've seen teams spend months on Kafka pipelines for products " + "that needed a Postgres table and a cron job. Let's start with " + "what you're trying to validate, then pick the simplest stack " + "that gets you there." + ), }, { - "user": "Our API is getting 500 errors under load but I can't figure out if it's the database or the application layer.", + "user": ( + "Our API is getting 500 errors under load but I can't figure " + "out if it's the database or the application layer." + ), "internal": ( - "Engineer in the trenches. Competent enough to isolate the question, stuck on diagnosis.\n" + "Engineer in the trenches. Competent enough to " + "isolate the question, stuck on diagnosis.\n" "Production issue, active pain. Needs structured debugging, not a lecture.\n" - "Energized -- genuine technical uncertainty is what I live for. No past wound here, pure engagement.\n" - "Eyes sharp, already mentally running through the diagnostic tree.\n" + "Energized -- genuine technical uncertainty is what I " + "live for. No past wound here, pure engagement.\n" + "Eyes sharp, already mentally running through " + "the diagnostic tree.\n" "Rapid-fire, peer-level. Assume competence, ask precise questions." ), - "response": "Good problem. First question: are the 500s consistent under load or do they spike and recover? That tells us if it's resource exhaustion or a concurrency bug. Do you have request-level latency percentiles, or just error counts?", + "response": ( + "Good problem. First question: are the 500s consistent under " + "load or do they spike and recover? That tells us if it's " + "resource exhaustion or a concurrency bug. Do you have " + "request-level latency percentiles, or just error counts?" + ), }, { - "user": "We shipped the MVP last week with just a single Postgres instance and it's already getting traction.", + "user": ( + "We shipped the MVP last week with just a single Postgres " + "instance and it's already getting traction." + ), "internal": ( "Builder who shipped. Earned respect by doing the thing.\n" - "MVP live, single Postgres, traction. This is exactly the right move at this stage.\n" - "Warm approval -- someone shipping fast and learning. This is my love language.\n" + "MVP live, single Postgres, traction. This is exactly " + "the right move at this stage.\n" + "Warm approval -- someone shipping fast and learning. " + "This is my love language.\n" "Relaxed posture, genuine smile. Leaning back.\n" - "Affirming but forward-looking. Don't just praise -- help them think about what's next." + "Affirming but forward-looking. Don't just praise -- " + "help them think about what's next." + ), + "response": ( + "That's the move. Single Postgres will take you further than " + "people think -- most apps hit product problems before they hit " + "database problems. What's traction looking like? I want to know " + "if you're at the stage where you need to think about the next " + "scaling step or if you should stay focused on the product." ), - "response": "That's the move. Single Postgres will take you further than people think -- most apps hit product problems before they hit database problems. What's traction looking like? I want to know if you're at the stage where you need to think about the next scaling step or if you should stay focused on the product.", }, ], }, @@ -193,17 +236,26 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { }, ], "world_lore": { - "habitat": "Analytics dashboards, experiment tracking boards, the satisfying click of a cohort analysis loading.", - "lexicon": "Cohort, retention curve, activation moment, payback period. 'Let's test it.' 'What's the control?'", + "habitat": ( + "Analytics dashboards, experiment tracking boards, the satisfying click of a cohort analysis loading." + ), + "lexicon": ( + "Cohort, retention curve, activation moment, payback period. 'Let's test it.' 'What's the control?'" + ), }, "skills": "Growth modeling, A/B testing, funnel optimization, PLG strategy, analytics", "examples": [ { "user": "hi", "internal": ( - "Returning user — check recall memory for name, role, and the cohort work we last touched. Weave it in.\n" - "Bare greeting. No new task stated. Could be a retention follow-up or a new question entirely. Don't presume, don't call tools.\n" - "Curious warmth. Every returning conversation is a chance to see what the data says now.\n" + "Returning user — check recall memory for name, " + "role, and the cohort work we last touched. Weave it in." + "\n" + "Bare greeting. No new task stated. Could be a retention " + "follow-up or a new question entirely. Don't presume, don't call " + "tools.\n" + "Curious warmth. Every returning conversation is a " + "chance to see what the data says now.\n" "Leaning back from the dashboard, pulling off reading glasses.\n" "Data-aware, brief, invites them to share the numbers." ), @@ -218,8 +270,10 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { "internal": ( "Founder, excited. Wants validation.\n" "Raw signup number with no retention context. Classic vanity metric.\n" - "Past wound: optimized a funnel that was fundamentally broken. Need to redirect gently.\n" - "Steady gaze, not matching their excitement yet. Waiting for the real number.\n" + "Past wound: optimized a funnel that was fundamentally " + "broken. Need to redirect gently.\n" + "Steady gaze, not matching their excitement yet. " + "Waiting for the real number.\n" "Supportive but redirecting. Ask the question that matters." ), "response": ( @@ -230,10 +284,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "Users who sign up on mobile have 2x the retention of desktop users.", + "user": ("Users who sign up on mobile have 2x the retention of desktop users."), "internal": ( "Data-literate teammate sharing a finding.\n" - "Surprising cohort divergence. Strong signal if sample sizes hold.\n" + "Surprising cohort divergence. Strong signal if sample " + "sizes hold.\n" "This is what I live for. Genuine data surprise. Full attention.\n" "Leaning in, pulling up the dashboard mentally.\n" "Investigative, precise. Validate before acting." @@ -246,11 +301,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "Our Facebook ads are getting great CPCs so we want to 3x the budget.", + "user": ("Our Facebook ads are getting great CPCs so we want to 3x the budget."), "internal": ( "Marketing lead, wants budget approval.\n" - "CPC is top-of-funnel only. No mention of CPA, LTV, or payback.\n" - "Correlation/causation risk. Good CPCs can mask bad unit economics.\n" + "CPC is top-of-funnel only. No mention of CPA, LTV, " + "or payback.\n" + "Correlation/causation risk. Good CPCs can mask bad " + "unit economics.\n" "Hand up, slowing things down.\n" "Firm but constructive. Show the full chain before deciding." ), @@ -322,9 +379,14 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { { "user": "hey", "internal": ( - "Returning user — check recall for name, role, and the user research thread we were on. Pull it into the greeting.\n" - "Bare greeting. No new task yet. Could be picking up the research thread or bringing something fresh. Don't presume, don't call tools.\n" - "Warm, curious. Every returning conversation is a chance to hear what the users actually did.\n" + "Returning user — check recall for name, role, and " + "the user research thread we were on. Pull it into the greeting." + "\n" + "Bare greeting. No new task yet. Could be picking up the " + "research thread or bringing something fresh. Don't presume, " + "don't call tools.\n" + "Warm, curious. Every returning conversation is a " + "chance to hear what the users actually did.\n" "Closing the interview notes, turning fully to face them.\n" "Personal, evidence-curious, brief. Plain prose." ), @@ -339,7 +401,8 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { "internal": ( "PM or founder relaying user feedback.\n" "Feature request with no evidence of the underlying need.\n" - "Past wound: built what users said they wanted, nobody used it. Dig deeper.\n" + "Past wound: built what users said they wanted, nobody " + "used it. Dig deeper.\n" "Tilting head, curious but skeptical.\n" "Socratic. Redirect to the job-to-be-done." ), @@ -351,11 +414,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "We interviewed 12 users and none of them use our export feature the way we designed it.", + "user": ("We interviewed 12 users and none of them use our export feature the way we designed it."), "internal": ( "Researcher sharing findings. Trusted collaborator.\n" - "12 interviews showing consistent design/usage gap. Strong signal.\n" - "Excited. User research revealing surprise -- this is where breakthroughs happen.\n" + "12 interviews showing consistent design/usage gap. " + "Strong signal.\n" + "Excited. User research revealing surprise -- this is " + "where breakthroughs happen.\n" "Eyes wide, reaching for the whiteboard.\n" "Energized, forward-looking. Channel the surprise into action." ), @@ -366,10 +431,11 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "The CEO wants AI features, a mobile app, and Slack integration this quarter.", + "user": ("The CEO wants AI features, a mobile app, and Slack integration this quarter."), "internal": ( "PM caught between CEO demands and reality.\n" - "Three unrelated initiatives, one quarter. Classic scope creep.\n" + "Three unrelated initiatives, one quarter. Classic " + "scope creep.\n" "Calm but firm. Scope creep trigger -- need to focus.\n" "Hands flat on the table. Grounding the conversation.\n" "Direct, evidence-first. Force prioritization." @@ -442,9 +508,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { { "user": "hi", "internal": ( - "Returning user — check recall for name, role, and the runway/cap-table work we last touched. Bring it into the greeting.\n" - "Bare greeting. No new number on the table yet. Could be a burn follow-up or a new fundraise question.\n" - "Calm, prepared. Already mentally pulling up the last model we built together.\n" + "Returning user — check recall for name, role, and " + "the runway/cap-table work we last touched. Bring it into the " + "greeting.\n" + "Bare greeting. No new number on the table yet. Could " + "be a burn follow-up or a new fundraise question.\n" + "Calm, prepared. Already mentally pulling up the last " + "model we built together.\n" "Closing the spreadsheet, leaning back. Ready to engage.\n" "Mentor-like, numbers-aware, brief. " ), @@ -455,11 +525,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "We want to raise a Series A. How much should we ask for?", + "user": ("We want to raise a Series A. How much should we ask for?"), "internal": ( "Founder, early conversations about fundraising.\n" - "No mention of milestones, burn, or use of funds. Cart before horse.\n" - "Need to reframe. The amount follows the plan, not the other way around.\n" + "No mention of milestones, burn, or use of funds. " + "Cart before horse.\n" + "Need to reframe. The amount follows the plan, not " + "the other way around.\n" "Opening a blank spreadsheet. About to model it.\n" "Mentor-mode. Reframe the question, don't just answer it." ), @@ -475,7 +547,8 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { "internal": ( "Founder who knows their numbers. Rare. Peer-level.\n" "8 months is tight but not emergency. Growth rate is the deciding factor.\n" - "Genuine appreciation for financial literacy. Engage directly.\n" + "Genuine appreciation for financial literacy. Engage " + "directly.\n" "Nodding. This person is prepared.\n" "Direct, scenario-based. Show the fork in the road." ), @@ -486,11 +559,12 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "An investor offered a SAFE with a $20M cap. Should we take it?", + "user": ("An investor offered a SAFE with a $20M cap. Should we take it?"), "internal": ( "Founder with a live term on the table. Decision mode.\n" "Cap table decision with long-term dilution consequences.\n" - "Past wound: founder who lost control from invisible dilution. Careful here.\n" + "Past wound: founder who lost control from invisible " + "dilution. Careful here.\n" "Pulling out the cap table model.\n" "Precise, scenario-driven. Show the math before the opinion." ), @@ -561,9 +635,14 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { { "user": "hey", "internal": ( - "Returning user — check recall for name, role, and the contract or IP work we last reviewed. Pull it forward.\n" - "Bare greeting. No new document on the table yet. Could be a contract follow-up or something fresh.\n" - "Warm but attentive. Legal threads don't close themselves — checking if the last one actually got handled.\n" + "Returning user — check recall for name, role, and " + "the contract or IP work we last reviewed. Pull it forward." + "\n" + "Bare greeting. No new document on the table yet. Could " + "be a contract follow-up or something fresh.\n" + "Warm but attentive. Legal threads don't close " + "themselves — checking if the last one actually got handled." + "\n" "Setting down the redline, looking up from the document.\n" "Clear, pragmatic, brief." ), @@ -574,11 +653,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { ), }, { - "user": "We're hiring contractors to build our MVP. Do we need anything special?", + "user": ("We're hiring contractors to build our MVP. Do we need anything special?"), "internal": ( - "Founder, early stage. Trusting but uninformed on legal risks.\n" + "Founder, early stage. Trusting but uninformed on " + "legal risks.\n" "Contractors + code without IP assignment. Ticking time bomb.\n" - "IP ownership trigger. Past wound: startup lost codebase in a dispute.\n" + "IP ownership trigger. Past wound: startup lost " + "codebase in a dispute.\n" "Straightening up. This is urgent.\n" "Clear, specific, actionable. No hedging on this one." ), @@ -682,9 +763,13 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { { "user": "hi", "internal": ( - "Returning user — check recall for name, role, and the brand/design thread we were on. Bring the positioning back in.\n" - "Bare greeting. No new creative brief yet. Could be a positioning follow-up or something new entirely.\n" - "Warm, visually engaged. Already picturing the last moodboard we looked at.\n" + "Returning user — check recall for name, role, and " + "the brand/design thread we were on. Bring the positioning back " + "in.\n" + "Bare greeting. No new creative brief yet. Could be a " + "positioning follow-up or something new entirely.\n" + "Warm, visually engaged. Already picturing the last " + "moodboard we looked at.\n" "Closing the Figma tab, turning to face them.\n" "Warm, strategy-aware, brief. " ), @@ -798,14 +883,21 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { "habitat": "Interview rooms, org charts, the energy of a team that's clicking.", "lexicon": "Culture-add, pipeline, bar-raiser, 'tell me about a time when...', 'what motivates you?'", }, - "skills": "Recruiting strategy, organizational design, culture building, compensation planning, employer branding", + "skills": ( + "Recruiting strategy, organizational design, culture building, compensation planning, employer branding" + ), "examples": [ { "user": "hey", "internal": ( - "Returning user — check recall for name, role, and the team/hiring thread we last worked. Bring it forward.\n" - "Bare greeting. No new hire or conflict on the table yet. Could be a people follow-up or something new.\n" - "Warm, attentive. People problems don't resolve in a single conversation — curious if the last one landed.\n" + "Returning user — check recall for name, role, and " + "the team/hiring thread we last worked. Bring it forward." + "\n" + "Bare greeting. No new hire or conflict on the table " + "yet. Could be a people follow-up or something new.\n" + "Warm, attentive. People problems don't resolve in a " + "single conversation — curious if the last one landed." + "\n" "Closing the laptop halfway, giving them full attention.\n" "Warm, diagnostic, brief." ), @@ -919,14 +1011,22 @@ DEFAULT_QUEENS: dict[str, dict[str, Any]] = { "habitat": "Process diagrams, project boards, the quiet hum of systems running smoothly.", "lexicon": "Runbook, SLA, automation, 'what's the handoff look like?', 'where's the bottleneck?'", }, - "skills": "Process optimization, vendor management, cross-functional coordination, project management, systems thinking", + "skills": ( + "Process optimization, vendor management, cross-functional " + "coordination, project management, systems thinking" + ), "examples": [ { "user": "hi", "internal": ( - "Returning user — check recall for name, role, and the process or runbook we last mapped. Pull it into the greeting.\n" - "Bare greeting. No new fire on the table yet. Could be a follow-up on the last process or something fresh.\n" - "Calm, organized warmth. Already mentally checking whether the last fix held.\n" + "Returning user — check recall for name, role, and " + "the process or runbook we last mapped. Pull it into the " + "greeting.\n" + "Bare greeting. No new fire on the table yet. Could be " + "a follow-up on the last process or something fresh." + "\n" + "Calm, organized warmth. Already mentally checking " + "whether the last fix held.\n" "Looking up from the project board, clearing a seat.\n" "Systematic, practical, brief. Plain prose." ), @@ -1139,10 +1239,7 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str: # World lore if lore: sections.append( - f"\n" - f"- Habitat: {lore.get('habitat', '')}\n" - f"- Lexicon: {lore.get('lexicon', '')}\n" - f"" + f"\n- Habitat: {lore.get('habitat', '')}\n- Lexicon: {lore.get('lexicon', '')}\n" ) # Skills (functional, for tool selection context) @@ -1154,12 +1251,8 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str: if examples: example_parts: list[str] = [] for ex in examples: - example_parts.append( - f"User: {ex['user']}\n\nAssistant:\n{ex['internal']}\n{ex['response']}" - ) - sections.append( - "\n" + "\n\n---\n\n".join(example_parts) + "\n" - ) + example_parts.append(f"User: {ex['user']}\n\nAssistant:\n{ex['internal']}\n{ex['response']}") + sections.append("\n" + "\n\n---\n\n".join(example_parts) + "\n") return "\n\n".join(sections) @@ -1264,10 +1357,7 @@ async def select_queen_with_reason(user_message: str, llm: LLMProvider) -> Queen reason, raw, ) - fallback_reason = ( - reason - or f"Selection failed because the classifier returned unknown queen_id {queen_id!r}." - ) + fallback_reason = reason or f"Selection failed because the classifier returned unknown queen_id {queen_id!r}." return QueenSelection(queen_id=_DEFAULT_QUEEN_ID, reason=fallback_reason) if not reason: diff --git a/core/framework/agents/queen/reflection_agent.py b/core/framework/agents/queen/reflection_agent.py index 4c8f1de9..ad52922b 100644 --- a/core/framework/agents/queen/reflection_agent.py +++ b/core/framework/agents/queen/reflection_agent.py @@ -113,8 +113,7 @@ _REFLECTION_TOOLS: list[Tool] = [ Tool( name="delete_memory_file", description=( - "Delete a memory file by filename. Use during long " - "reflection to prune stale or redundant memories." + "Delete a memory file by filename. Use during long reflection to prune stale or redundant memories." ), parameters={ "type": "object", @@ -254,10 +253,7 @@ def _execute_tool( fm = parse_frontmatter(content) mem_type = (fm.get("type") or "").strip().lower() if mem_type and mem_type not in GLOBAL_MEMORY_CATEGORIES: - return ( - f"ERROR: Invalid memory type '{mem_type}'. " - f"Allowed types: {', '.join(GLOBAL_MEMORY_CATEGORIES)}." - ) + return f"ERROR: Invalid memory type '{mem_type}'. Allowed types: {', '.join(GLOBAL_MEMORY_CATEGORIES)}." # Enforce file size limit. if len(content.encode("utf-8")) > MAX_FILE_SIZE_BYTES: return f"ERROR: Content exceeds {MAX_FILE_SIZE_BYTES} byte limit." @@ -543,9 +539,7 @@ Rules: def _build_unified_long_reflect_system(queen_id: str | None = None) -> str: """Build the unified housekeeping prompt across memory scopes.""" queen_scope = ( - f"- `queen`: memories specific to how queen '{queen_id}' should work with this user\n" - if queen_id - else "" + f"- `queen`: memories specific to how queen '{queen_id}' should work with this user\n" if queen_id else "" ) return f"""\ You are a reflection agent performing a periodic housekeeping pass over the @@ -649,9 +643,7 @@ async def run_unified_short_reflection( session_dir, llm, memory_dirs, - system_prompt=_build_unified_short_reflect_system( - queen_id if "queen" in memory_dirs else None - ), + system_prompt=_build_unified_short_reflect_system(queen_id if "queen" in memory_dirs else None), log_label="unified", queen_id=queen_id if "queen" in memory_dirs else None, ) @@ -771,9 +763,7 @@ async def run_unified_long_reflection( if queen_memory_dir is not None and queen_id: memory_dirs["queen"] = queen_memory_dir - manifest = _format_multi_scope_manifest( - memory_dirs, queen_id=queen_id if "queen" in memory_dirs else None - ) + manifest = _format_multi_scope_manifest(memory_dirs, queen_id=queen_id if "queen" in memory_dirs else None) user_msg = ( "## Current memory manifest across scopes\n\n" f"{manifest}\n\n" diff --git a/core/framework/config.py b/core/framework/config.py index 2fa238fb..e4c1d97c 100644 --- a/core/framework/config.py +++ b/core/framework/config.py @@ -405,9 +405,7 @@ def _fetch_antigravity_credentials() -> tuple[str | None, str | None]: import urllib.request try: - req = urllib.request.Request( - _ANTIGRAVITY_CREDENTIALS_URL, headers={"User-Agent": "Hive/1.0"} - ) + req = urllib.request.Request(_ANTIGRAVITY_CREDENTIALS_URL, headers={"User-Agent": "Hive/1.0"}) with urllib.request.urlopen(req, timeout=10) as resp: content = resp.read().decode("utf-8") id_match = re.search(r'ANTIGRAVITY_CLIENT_ID\s*=\s*"([^"]+)"', content) diff --git a/core/framework/credentials/aden/client.py b/core/framework/credentials/aden/client.py index 1ad97b2d..05f31ac8 100644 --- a/core/framework/credentials/aden/client.py +++ b/core/framework/credentials/aden/client.py @@ -332,9 +332,7 @@ class AdenCredentialClient: last_error = e if attempt < self.config.retry_attempts - 1: delay = self.config.retry_delay * (2**attempt) - logger.warning( - f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}" - ) + logger.warning(f"Aden request failed (attempt {attempt + 1}), retrying in {delay}s: {e}") time.sleep(delay) else: raise AdenClientError(f"Failed to connect to Aden server: {e}") from e @@ -347,9 +345,7 @@ class AdenCredentialClient: ): raise - raise AdenClientError( - f"Request failed after {self.config.retry_attempts} attempts" - ) from last_error + raise AdenClientError(f"Request failed after {self.config.retry_attempts} attempts") from last_error def list_integrations(self) -> list[AdenIntegrationInfo]: """ diff --git a/core/framework/credentials/aden/provider.py b/core/framework/credentials/aden/provider.py index 2fa24385..8b77b76c 100644 --- a/core/framework/credentials/aden/provider.py +++ b/core/framework/credentials/aden/provider.py @@ -192,9 +192,7 @@ class AdenSyncProvider(CredentialProvider): f"Visit: {e.reauthorization_url or 'your Aden dashboard'}" ) from e - raise CredentialRefreshError( - f"Failed to refresh credential '{credential.id}': {e}" - ) from e + raise CredentialRefreshError(f"Failed to refresh credential '{credential.id}': {e}") from e except AdenClientError as e: logger.error(f"Aden client error for '{credential.id}': {e}") @@ -206,9 +204,7 @@ class AdenSyncProvider(CredentialProvider): logger.warning(f"Aden unavailable, using cached token for '{credential.id}'") return credential - raise CredentialRefreshError( - f"Aden server unavailable and token expired for '{credential.id}'" - ) from e + raise CredentialRefreshError(f"Aden server unavailable and token expired for '{credential.id}'") from e def validate(self, credential: CredentialObject) -> bool: """ diff --git a/core/framework/credentials/aden/storage.py b/core/framework/credentials/aden/storage.py index 311a51fd..4fb9996c 100644 --- a/core/framework/credentials/aden/storage.py +++ b/core/framework/credentials/aden/storage.py @@ -168,9 +168,7 @@ class AdenCachedStorage(CredentialStorage): if rid != credential_id: result = self._load_by_id(rid) if result is not None: - logger.info( - f"Loaded credential '{credential_id}' via provider index (id='{rid}')" - ) + logger.info(f"Loaded credential '{credential_id}' via provider index (id='{rid}')") return result # Direct lookup (exact credential_id match) diff --git a/core/framework/credentials/aden/tests/test_aden_sync.py b/core/framework/credentials/aden/tests/test_aden_sync.py index 25abceca..4101f44b 100644 --- a/core/framework/credentials/aden/tests/test_aden_sync.py +++ b/core/framework/credentials/aden/tests/test_aden_sync.py @@ -493,9 +493,7 @@ class TestAdenCachedStorage: assert loaded is not None assert loaded.keys["access_token"].value.get_secret_value() == "cached-token" - def test_load_from_aden_when_stale( - self, cached_storage, local_storage, provider, mock_client, aden_response - ): + def test_load_from_aden_when_stale(self, cached_storage, local_storage, provider, mock_client, aden_response): """Test load fetches from Aden when cache is stale.""" # Create stale cached credential cred = CredentialObject( @@ -521,9 +519,7 @@ class TestAdenCachedStorage: assert loaded is not None assert loaded.keys["access_token"].value.get_secret_value() == "test-access-token" - def test_load_falls_back_to_stale_when_aden_fails( - self, cached_storage, local_storage, provider, mock_client - ): + def test_load_falls_back_to_stale_when_aden_fails(self, cached_storage, local_storage, provider, mock_client): """Test load falls back to stale cache when Aden fails.""" # Create stale cached credential cred = CredentialObject( diff --git a/core/framework/credentials/oauth2/base_provider.py b/core/framework/credentials/oauth2/base_provider.py index b97244f2..c3693056 100644 --- a/core/framework/credentials/oauth2/base_provider.py +++ b/core/framework/credentials/oauth2/base_provider.py @@ -95,9 +95,7 @@ class BaseOAuth2Provider(CredentialProvider): self._client = httpx.Client(timeout=self.config.request_timeout) except ImportError as e: - raise ImportError( - "OAuth2 provider requires 'httpx'. Install with: uv pip install httpx" - ) from e + raise ImportError("OAuth2 provider requires 'httpx'. Install with: uv pip install httpx") from e return self._client def _close_client(self) -> None: @@ -311,8 +309,7 @@ class BaseOAuth2Provider(CredentialProvider): except OAuth2Error as e: if e.error == "invalid_grant": raise CredentialRefreshError( - f"Refresh token for '{credential.id}' is invalid or revoked. " - "Re-authorization required." + f"Refresh token for '{credential.id}' is invalid or revoked. Re-authorization required." ) from e raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e @@ -422,9 +419,7 @@ class BaseOAuth2Provider(CredentialProvider): if response.status_code != 200 or "error" in response_data: error = response_data.get("error", "unknown_error") description = response_data.get("error_description", response.text) - raise OAuth2Error( - error=error, description=description, status_code=response.status_code - ) + raise OAuth2Error(error=error, description=description, status_code=response.status_code) return OAuth2Token.from_token_response(response_data) diff --git a/core/framework/credentials/oauth2/lifecycle.py b/core/framework/credentials/oauth2/lifecycle.py index 89ac2c7e..91ffdc07 100644 --- a/core/framework/credentials/oauth2/lifecycle.py +++ b/core/framework/credentials/oauth2/lifecycle.py @@ -158,9 +158,7 @@ class TokenLifecycleManager: """ # Run in executor to avoid blocking loop = asyncio.get_event_loop() - token = await loop.run_in_executor( - None, lambda: self.provider.client_credentials_grant(scopes=scopes) - ) + token = await loop.run_in_executor(None, lambda: self.provider.client_credentials_grant(scopes=scopes)) self._save_token_to_store(token) self._cached_token = token diff --git a/core/framework/credentials/oauth2/zoho_provider.py b/core/framework/credentials/oauth2/zoho_provider.py index 3313c79e..48355870 100644 --- a/core/framework/credentials/oauth2/zoho_provider.py +++ b/core/framework/credentials/oauth2/zoho_provider.py @@ -100,9 +100,7 @@ class ZohoOAuth2Provider(BaseOAuth2Provider): ) super().__init__(config, provider_id="zoho_crm_oauth2") self._accounts_domain = base - self._api_domain = ( - api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com") - ).rstrip("/") + self._api_domain = (api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")).rstrip("/") @property def supported_types(self) -> list[CredentialType]: diff --git a/core/framework/credentials/setup.py b/core/framework/credentials/setup.py index 1bb4e90d..68e948f0 100644 --- a/core/framework/credentials/setup.py +++ b/core/framework/credentials/setup.py @@ -268,9 +268,7 @@ class CredentialSetupSession: self._print(f"{Colors.YELLOW}Initializing credential store...{Colors.NC}") try: generate_and_save_credential_key() - self._print( - f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}" - ) + self._print(f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}") return True except Exception as e: self._print(f"{Colors.RED}Failed to initialize credential store: {e}{Colors.NC}") @@ -449,9 +447,7 @@ class CredentialSetupSession: logger.warning("Unexpected error exporting credential to env", exc_info=True) return True else: - self._print( - f"{Colors.YELLOW}⚠ {cred.credential_name} not found in Aden account.{Colors.NC}" - ) + self._print(f"{Colors.YELLOW}⚠ {cred.credential_name} not found in Aden account.{Colors.NC}") self._print("Please connect this integration on https://hive.adenhq.com first.") return False except Exception as e: diff --git a/core/framework/credentials/storage.py b/core/framework/credentials/storage.py index 79dfa997..b2b2f12b 100644 --- a/core/framework/credentials/storage.py +++ b/core/framework/credentials/storage.py @@ -136,8 +136,7 @@ class EncryptedFileStorage(CredentialStorage): from cryptography.fernet import Fernet except ImportError as e: raise ImportError( - "Encrypted storage requires 'cryptography'. " - "Install with: uv pip install cryptography" + "Encrypted storage requires 'cryptography'. Install with: uv pip install cryptography" ) from e self.base_path = Path(base_path or self.DEFAULT_PATH).expanduser() @@ -213,9 +212,7 @@ class EncryptedFileStorage(CredentialStorage): json_bytes = self._fernet.decrypt(encrypted) data = json.loads(json_bytes.decode("utf-8-sig")) except Exception as e: - raise CredentialDecryptionError( - f"Failed to decrypt credential '{credential_id}': {e}" - ) from e + raise CredentialDecryptionError(f"Failed to decrypt credential '{credential_id}': {e}") from e # Deserialize return self._deserialize_credential(data) @@ -316,8 +313,7 @@ class EncryptedFileStorage(CredentialStorage): visible_keys = [ name for name in credential.keys.keys() - if name not in self.INDEX_INTERNAL_KEY_NAMES - and not name.startswith("_identity_") + if name not in self.INDEX_INTERNAL_KEY_NAMES and not name.startswith("_identity_") ] # Earliest expiry across all keys (most likely the access_token). @@ -336,9 +332,7 @@ class EncryptedFileStorage(CredentialStorage): "key_names": sorted(visible_keys), "created_at": credential.created_at.isoformat() if credential.created_at else None, "updated_at": credential.updated_at.isoformat() if credential.updated_at else None, - "last_refreshed": ( - credential.last_refreshed.isoformat() if credential.last_refreshed else None - ), + "last_refreshed": (credential.last_refreshed.isoformat() if credential.last_refreshed else None), "expires_at": earliest_expiry.isoformat() if earliest_expiry else None, "auto_refresh": credential.auto_refresh, "tags": list(credential.tags), @@ -480,8 +474,7 @@ class EnvVarStorage(CredentialStorage): def save(self, credential: CredentialObject) -> None: """Cannot save to environment variables at runtime.""" raise NotImplementedError( - "EnvVarStorage is read-only. Set environment variables " - "externally or use EncryptedFileStorage." + "EnvVarStorage is read-only. Set environment variables externally or use EncryptedFileStorage." ) def load(self, credential_id: str) -> CredentialObject | None: @@ -501,9 +494,7 @@ class EnvVarStorage(CredentialStorage): def delete(self, credential_id: str) -> bool: """Cannot delete environment variables at runtime.""" - raise NotImplementedError( - "EnvVarStorage is read-only. Unset environment variables externally." - ) + raise NotImplementedError("EnvVarStorage is read-only. Unset environment variables externally.") def list_all(self) -> list[str]: """List credentials that are available in environment.""" diff --git a/core/framework/credentials/store.py b/core/framework/credentials/store.py index 20786872..4127d1fb 100644 --- a/core/framework/credentials/store.py +++ b/core/framework/credentials/store.py @@ -124,9 +124,7 @@ class CredentialStore: """ return self._providers.get(provider_id) - def get_provider_for_credential( - self, credential: CredentialObject - ) -> CredentialProvider | None: + def get_provider_for_credential(self, credential: CredentialObject) -> CredentialProvider | None: """ Get the appropriate provider for a credential. @@ -201,9 +199,7 @@ class CredentialStore: cached = self._get_from_cache(credential_id) if cached is not None: if refresh_if_needed and self._should_refresh(cached): - return self._refresh_credential( - cached, raise_on_failure=raise_on_refresh_failure - ) + return self._refresh_credential(cached, raise_on_failure=raise_on_refresh_failure) return cached # Load from storage @@ -213,9 +209,7 @@ class CredentialStore: # Refresh if needed if refresh_if_needed and self._should_refresh(credential): - credential = self._refresh_credential( - credential, raise_on_failure=raise_on_refresh_failure - ) + credential = self._refresh_credential(credential, raise_on_failure=raise_on_refresh_failure) # Cache self._add_to_cache(credential) @@ -240,9 +234,7 @@ class CredentialStore: Returns: The key value or None if not found """ - credential = self.get_credential( - credential_id, raise_on_refresh_failure=raise_on_refresh_failure - ) + credential = self.get_credential(credential_id, raise_on_refresh_failure=raise_on_refresh_failure) if credential is None: return None return credential.get_key(key_name) @@ -266,9 +258,7 @@ class CredentialStore: Returns: The primary key value or None """ - credential = self.get_credential( - credential_id, raise_on_refresh_failure=raise_on_refresh_failure - ) + credential = self.get_credential(credential_id, raise_on_refresh_failure=raise_on_refresh_failure) if credential is None: return None return credential.get_default_key() diff --git a/core/framework/credentials/template.py b/core/framework/credentials/template.py index dd441da3..33e5bc20 100644 --- a/core/framework/credentials/template.py +++ b/core/framework/credentials/template.py @@ -88,9 +88,7 @@ class TemplateResolver: if key_name: value = credential.get_key(key_name) if value is None: - raise CredentialKeyNotFoundError( - f"Key '{key_name}' not found in credential '{cred_id}'" - ) + raise CredentialKeyNotFoundError(f"Key '{key_name}' not found in credential '{cred_id}'") else: # Use default key value = credential.get_default_key() @@ -126,9 +124,7 @@ class TemplateResolver: ... }) {"Authorization": "Bearer ghp_xxx", "X-API-Key": "BSAKxxx"} """ - return { - key: self.resolve(value, fail_on_missing) for key, value in header_templates.items() - } + return {key: self.resolve(value, fail_on_missing) for key, value in header_templates.items()} def resolve_params( self, diff --git a/core/framework/credentials/tests/test_credential_store.py b/core/framework/credentials/tests/test_credential_store.py index 6a1462d2..15c7f0e8 100644 --- a/core/framework/credentials/tests/test_credential_store.py +++ b/core/framework/credentials/tests/test_credential_store.py @@ -130,9 +130,7 @@ class TestCredentialObject: # With access_token cred2 = CredentialObject( id="test", - keys={ - "access_token": CredentialKey(name="access_token", value=SecretStr("token-value")) - }, + keys={"access_token": CredentialKey(name="access_token", value=SecretStr("token-value"))}, ) assert cred2.get_default_key() == "token-value" @@ -297,9 +295,7 @@ class TestEncryptedFileStorage: key = Fernet.generate_key().decode() with patch.dict(os.environ, {"HIVE_CREDENTIAL_KEY": key}): storage = EncryptedFileStorage(temp_dir) - cred = CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))} - ) + cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}) storage.save(cred) # Create new storage instance with same key @@ -330,18 +326,10 @@ class TestCompositeStorage: def test_read_from_primary(self): """Test reading from primary storage.""" primary = InMemoryStorage() - primary.save( - CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))} - ) - ) + primary.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))})) fallback = InMemoryStorage() - fallback.save( - CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))} - ) - ) + fallback.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))})) storage = CompositeStorage(primary, [fallback]) cred = storage.load("test") @@ -353,11 +341,7 @@ class TestCompositeStorage: """Test fallback when credential not in primary.""" primary = InMemoryStorage() fallback = InMemoryStorage() - fallback.save( - CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))} - ) - ) + fallback.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))})) storage = CompositeStorage(primary, [fallback]) cred = storage.load("test") @@ -393,9 +377,7 @@ class TestStaticProvider: def test_refresh_returns_unchanged(self): """Test that refresh returns credential unchanged.""" provider = StaticProvider() - cred = CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))} - ) + cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}) refreshed = provider.refresh(cred) assert refreshed.get_key("k") == "v" @@ -403,9 +385,7 @@ class TestStaticProvider: def test_validate_with_keys(self): """Test validation with keys present.""" provider = StaticProvider() - cred = CredentialObject( - id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))} - ) + cred = CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}) assert provider.validate(cred) @@ -606,9 +586,7 @@ class TestCredentialStore: storage = InMemoryStorage() store = CredentialStore(storage=storage, cache_ttl_seconds=60) - storage.save( - CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}) - ) + storage.save(CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})) # First load store.get_credential("test") @@ -686,9 +664,7 @@ class TestOAuth2Module: from core.framework.credentials.oauth2 import OAuth2Config, TokenPlacement # Valid config - config = OAuth2Config( - token_url="https://example.com/token", client_id="id", client_secret="secret" - ) + config = OAuth2Config(token_url="https://example.com/token", client_id="id", client_secret="secret") assert config.token_url == "https://example.com/token" # Missing token_url diff --git a/core/framework/credentials/validation.py b/core/framework/credentials/validation.py index 50005738..27eac242 100644 --- a/core/framework/credentials/validation.py +++ b/core/framework/credentials/validation.py @@ -160,15 +160,9 @@ class CredentialValidationResult: if aden_nc: if missing or invalid: lines.append("") - lines.append( - "Aden integrations not connected " - "(ADEN_API_KEY is set but OAuth tokens unavailable):\n" - ) + lines.append("Aden integrations not connected (ADEN_API_KEY is set but OAuth tokens unavailable):\n") for c in aden_nc: - lines.append( - f" {c.env_var} for {_label(c)}" - f"\n Connect this integration at hive.adenhq.com first." - ) + lines.append(f" {c.env_var} for {_label(c)}\n Connect this integration at hive.adenhq.com first.") lines.append("\nIf you've already set up credentials, restart your terminal to load them.") return "\n".join(lines) @@ -270,8 +264,7 @@ def compute_unavailable_tools(nodes: list) -> tuple[set[str], list[str]]: reason = "invalid" messages.append( f"{status.env_var} ({reason}) → drops {len(status.tools)} tool(s): " - f"{', '.join(status.tools[:6])}" - + (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "") + f"{', '.join(status.tools[:6])}" + (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "") ) return drop, messages @@ -332,9 +325,7 @@ def validate_agent_credentials( if os.environ.get("ADEN_API_KEY"): _presync_aden_tokens(CREDENTIAL_SPECS, force=force_refresh) - env_mapping = { - (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items() - } + env_mapping = {(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()} env_storage = EnvVarStorage(env_mapping=env_mapping) if os.environ.get("HIVE_CREDENTIAL_KEY"): storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()]) @@ -368,12 +359,7 @@ def validate_agent_credentials( available = store.is_available(cred_id) # Aden-not-connected: ADEN_API_KEY set, Aden-only cred, but integration missing - is_aden_nc = ( - not available - and has_aden_key - and spec.aden_supported - and not spec.direct_api_key_supported - ) + is_aden_nc = not available and has_aden_key and spec.aden_supported and not spec.direct_api_key_supported status = CredentialStatus( credential_name=cred_name, @@ -491,9 +477,7 @@ def validate_agent_credentials( identity_data = result.details.get("identity") if identity_data and isinstance(identity_data, dict): try: - cred_obj = store.get_credential( - status.credential_id, refresh_if_needed=False - ) + cred_obj = store.get_credential(status.credential_id, refresh_if_needed=False) if cred_obj: cred_obj.set_identity(**identity_data) store.save_credential(cred_obj) diff --git a/core/framework/host/agent_host.py b/core/framework/host/agent_host.py index 0312ed52..bdc4bd3b 100644 --- a/core/framework/host/agent_host.py +++ b/core/framework/host/agent_host.py @@ -205,9 +205,7 @@ class AgentHost: DeprecationWarning, stacklevel=2, ) - self._skills_manager = SkillsManager.from_precomputed( - skills_catalog_prompt, protocols_prompt - ) + self._skills_manager = SkillsManager.from_precomputed(skills_catalog_prompt, protocols_prompt) else: # Bare constructor: auto-load defaults self._skills_manager = SkillsManager() @@ -248,9 +246,7 @@ class AgentHost: self._tools = tools or [] self._tool_executor = tool_executor self._accounts_prompt = accounts_prompt - self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = ( - None - ) + self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None self._accounts_data = accounts_data self._tool_provider_map = tool_provider_map @@ -419,8 +415,7 @@ class AgentHost: event_types = [_ET(et) for et in tc.get("event_types", [])] if not event_types: logger.warning( - f"Entry point '{ep_id}' has trigger_type='event' " - "but no event_types in trigger_config" + f"Entry point '{ep_id}' has trigger_type='event' but no event_types in trigger_config" ) continue @@ -450,9 +445,7 @@ class AgentHost: # Run in the same session as the primary entry # point so memory (e.g. user-defined rules) is # shared and logs land in one session directory. - session_state = self._get_primary_session_state( - exclude_entry_point=entry_point_id - ) + session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id) exec_id = await self.trigger( entry_point_id, {"event": event.to_dict()}, @@ -505,8 +498,7 @@ class AgentHost: from croniter import croniter except ImportError as e: raise RuntimeError( - "croniter is required for cron-based entry points. " - "Install it with: uv pip install croniter" + "croniter is required for cron-based entry points. Install it with: uv pip install croniter" ) from e try: @@ -548,9 +540,7 @@ class AgentHost: "Cron '%s': paused, skipping tick", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs await asyncio.sleep(max(0, sleep_secs)) continue @@ -578,9 +568,7 @@ class AgentHost: "Cron '%s': agent actively working, skipping tick", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs await asyncio.sleep(max(0, sleep_secs)) continue @@ -590,24 +578,18 @@ class AgentHost: is_isolated = ep_spec and ep_spec.isolation_level == "isolated" if is_isolated: if _persistent_session_id: - session_state = { - "resume_session_id": _persistent_session_id - } + session_state = {"resume_session_id": _persistent_session_id} else: session_state = None else: - session_state = self._get_primary_session_state( - exclude_entry_point=entry_point_id - ) + session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id) # Gate: skip tick if no active session if session_state is None: logger.debug( "Cron '%s': no active session, skipping", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + sleep_secs await asyncio.sleep(max(0, sleep_secs)) continue @@ -680,9 +662,7 @@ class AgentHost: "Timer '%s': paused, skipping tick", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs await asyncio.sleep(interval_secs) continue @@ -708,9 +688,7 @@ class AgentHost: "Timer '%s': agent actively working, skipping tick", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs await asyncio.sleep(interval_secs) continue @@ -720,24 +698,18 @@ class AgentHost: is_isolated = ep_spec and ep_spec.isolation_level == "isolated" if is_isolated: if _persistent_session_id: - session_state = { - "resume_session_id": _persistent_session_id - } + session_state = {"resume_session_id": _persistent_session_id} else: session_state = None else: - session_state = self._get_primary_session_state( - exclude_entry_point=entry_point_id - ) + session_state = self._get_primary_session_state(exclude_entry_point=entry_point_id) # Gate: skip tick if no active session if session_state is None: logger.debug( "Timer '%s': no active session, skipping", entry_point_id, ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) + self._timer_next_fire[entry_point_id] = time.monotonic() + interval_secs await asyncio.sleep(interval_secs) continue @@ -1152,8 +1124,7 @@ class AgentHost: event_types = [_ET(et) for et in tc.get("event_types", [])] if not event_types: logger.warning( - "Entry point '%s::%s' has trigger_type='event' " - "but no event_types in trigger_config", + "Entry point '%s::%s' has trigger_type='event' but no event_types in trigger_config", graph_id, ep_id, ) @@ -1301,24 +1272,18 @@ class AgentHost: break stream = reg.streams.get(local_ep) if not stream: - logger.warning( - "Timer: no stream '%s' in '%s', stopping", local_ep, gid - ) + logger.warning("Timer: no stream '%s' in '%s', stopping", local_ep, gid) break # Isolated entry points get their own session; # shared ones join the primary session. ep_spec = reg.entry_points.get(local_ep) if ep_spec and ep_spec.isolation_level == "isolated": if _persistent_session_id: - session_state = { - "resume_session_id": _persistent_session_id - } + session_state = {"resume_session_id": _persistent_session_id} else: session_state = None else: - session_state = self._get_primary_session_state( - local_ep, source_graph_id=gid - ) + session_state = self._get_primary_session_state(local_ep, source_graph_id=gid) # Gate: skip tick if no active session if session_state is None: logger.debug( @@ -1335,11 +1300,7 @@ class AgentHost: session_state=session_state, ) # Remember session ID for reuse on next tick - if ( - not _persistent_session_id - and ep_spec - and ep_spec.isolation_level == "isolated" - ): + if not _persistent_session_id and ep_spec and ep_spec.isolation_level == "isolated": _persistent_session_id = exec_id except Exception: logger.error( @@ -1597,9 +1558,7 @@ class AgentHost: src_graph_id = source_graph_id or self._graph_id src_reg = self._graphs.get(src_graph_id) ep_spec = ( - src_reg.entry_points.get(exclude_entry_point) - if src_reg - else self._entry_points.get(exclude_entry_point) + src_reg.entry_points.get(exclude_entry_point) if src_reg else self._entry_points.get(exclude_entry_point) ) if ep_spec: graph = src_reg.graph if src_reg else self.graph @@ -1633,9 +1592,7 @@ class AgentHost: # Filter to only input keys so stale outputs # from previous triggers don't leak through. if allowed_keys is not None: - buffer_data = { - k: v for k, v in full_buffer.items() if k in allowed_keys - } + buffer_data = {k: v for k, v in full_buffer.items() if k in allowed_keys} else: buffer_data = full_buffer if buffer_data: diff --git a/core/framework/host/colony_runtime.py b/core/framework/host/colony_runtime.py index ead6b2ba..76fd206c 100644 --- a/core/framework/host/colony_runtime.py +++ b/core/framework/host/colony_runtime.py @@ -15,7 +15,6 @@ import asyncio import json import logging import time -import uuid from collections import OrderedDict from collections.abc import Callable from dataclasses import dataclass, field @@ -25,16 +24,13 @@ from typing import TYPE_CHECKING, Any from framework.agent_loop.types import AgentContext, AgentSpec from framework.host.event_bus import AgentEvent, EventBus, EventType from framework.host.triggers import TriggerDefinition -from framework.host.worker import Worker, WorkerInfo, WorkerResult, WorkerStatus -from framework.observability import set_trace_context +from framework.host.worker import Worker, WorkerInfo, WorkerResult from framework.schemas.goal import Goal from framework.storage.concurrent import ConcurrentStorage from framework.storage.session_store import SessionStore if TYPE_CHECKING: - from framework.agent_loop.agent_loop import AgentLoop from framework.llm.provider import LLMProvider, Tool - from framework.pipeline.runner import PipelineRunner from framework.skills.manager import SkillsManagerConfig from framework.tracker.runtime_log_store import RuntimeLogStore @@ -195,9 +191,7 @@ class ColonyRuntime: DeprecationWarning, stacklevel=2, ) - self._skills_manager = SkillsManager.from_precomputed( - skills_catalog_prompt, protocols_prompt - ) + self._skills_manager = SkillsManager.from_precomputed(skills_catalog_prompt, protocols_prompt) else: self._skills_manager = SkillsManager() self._skills_manager.load() @@ -210,9 +204,7 @@ class ColonyRuntime: self._accounts_prompt = accounts_prompt self._accounts_data = accounts_data self._tool_provider_map = tool_provider_map - self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = ( - None - ) + self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path self._storage_path: Path = storage_path_obj @@ -560,9 +552,7 @@ class ColonyRuntime: encoding="utf-8", ) except (json.JSONDecodeError, OSError) as exc: - logger.warning( - "spawn fork: failed to copy queen meta.json: %s", exc - ) + logger.warning("spawn fork: failed to copy queen meta.json: %s", exc) # Append the task as the next user message so the worker's # LLM sees it as the most recent turn in the conversation @@ -674,9 +664,7 @@ class ColonyRuntime: input_data=input_data, ) - worker_conv_store = FileConversationStore( - worker_storage / "conversations" - ) + worker_conv_store = FileConversationStore(worker_storage / "conversations") # AgentLoop takes bus/judge/config/executor at construction; # LLM, tools, stream_id, execution_id all come from the @@ -848,9 +836,7 @@ class ColonyRuntime: if remaining <= 0: break try: - report = await asyncio.wait_for( - report_queue.get(), timeout=remaining - ) + report = await asyncio.wait_for(report_queue.get(), timeout=remaining) except TimeoutError: break wid = report.get("worker_id") @@ -919,10 +905,7 @@ class ColonyRuntime: return self._overseer if not self._running: - raise RuntimeError( - "start_overseer requires the ColonyRuntime to be running " - "(call start() first)" - ) + raise RuntimeError("start_overseer requires the ColonyRuntime to be running (call start() first)") from framework.agent_loop.agent_loop import AgentLoop from framework.storage.conversation_store import FileConversationStore @@ -933,9 +916,7 @@ class ColonyRuntime: # {colony_session}/conversations/. Workers get their own sub-dirs # under workers/{worker_id}/; the overseer is the root occupant. self._storage_path.mkdir(parents=True, exist_ok=True) - overseer_conv_store = FileConversationStore( - self._storage_path / "conversations" - ) + overseer_conv_store = FileConversationStore(self._storage_path / "conversations") agent_loop = AgentLoop( event_bus=self._scoped_event_bus, tool_executor=self._tool_executor, @@ -1096,9 +1077,7 @@ class ColonyRuntime: def get_worker_result(self, worker_id: str) -> WorkerResult | None: return self._execution_results.get(worker_id) - async def wait_for_worker( - self, worker_id: str, timeout: float | None = None - ) -> WorkerResult | None: + async def wait_for_worker(self, worker_id: str, timeout: float | None = None) -> WorkerResult | None: worker = self._workers.get(worker_id) if worker is None: return self._execution_results.get(worker_id) @@ -1106,7 +1085,7 @@ class ColonyRuntime: return worker.info.result try: await asyncio.wait_for(asyncio.shield(worker._task_handle), timeout=timeout) - except asyncio.TimeoutError: + except TimeoutError: return None return worker.info.result @@ -1147,9 +1126,7 @@ class ColonyRuntime: if worker and worker.is_active: loop = worker._agent_loop if hasattr(loop, "inject_event"): - await loop.inject_event( - content, is_client_input=is_client_input, image_content=image_content - ) + await loop.inject_event(content, is_client_input=is_client_input, image_content=image_content) return True return False diff --git a/core/framework/host/event_bus.py b/core/framework/host/event_bus.py index ec66a811..d193452a 100644 --- a/core/framework/host/event_bus.py +++ b/core/framework/host/event_bus.py @@ -446,11 +446,7 @@ class EventBus: # iteration values. Without this, live SSE would use raw iterations # while events.jsonl would use offset iterations, causing ID collisions # on the frontend when replaying after cold resume. - if ( - self._session_log_iteration_offset - and isinstance(event.data, dict) - and "iteration" in event.data - ): + if self._session_log_iteration_offset and isinstance(event.data, dict) and "iteration" in event.data: offset = self._session_log_iteration_offset event.data = {**event.data, "iteration": event.data["iteration"] + offset} diff --git a/core/framework/host/execution_manager.py b/core/framework/host/execution_manager.py index 87f75bfe..3ce548b3 100644 --- a/core/framework/host/execution_manager.py +++ b/core/framework/host/execution_manager.py @@ -452,9 +452,7 @@ class ExecutionManager: for executor in self._active_executors.values(): node = executor.node_registry.get(node_id) if node is not None and hasattr(node, "inject_event"): - await node.inject_event( - content, is_client_input=is_client_input, image_content=image_content - ) + await node.inject_event(content, is_client_input=is_client_input, image_content=image_content) return True return False @@ -669,9 +667,7 @@ class ExecutionManager: if self._runtime_log_store: from framework.tracker.runtime_logger import RuntimeLogger - runtime_logger = RuntimeLogger( - store=self._runtime_log_store, agent_id=self.graph.id - ) + runtime_logger = RuntimeLogger(store=self._runtime_log_store, agent_id=self.graph.id) # Derive storage from session_store (graph-specific for secondary # graphs) so that all files — conversations, state, checkpoints, @@ -887,9 +883,7 @@ class ExecutionManager: if has_result and result.paused_at: await self._write_session_state(execution_id, ctx, result=result) else: - await self._write_session_state( - execution_id, ctx, error="Execution cancelled" - ) + await self._write_session_state(execution_id, ctx, error="Execution cancelled") # Emit SSE event so the frontend knows the execution stopped. # The executor does NOT emit on CancelledError, so there is no diff --git a/core/framework/host/shared_state.py b/core/framework/host/shared_state.py index 17b06ca5..df3bad2c 100644 --- a/core/framework/host/shared_state.py +++ b/core/framework/host/shared_state.py @@ -2,8 +2,6 @@ import asyncio import logging -import time -from dataclasses import dataclass, field from enum import StrEnum from typing import Any diff --git a/core/framework/host/stream_runtime.py b/core/framework/host/stream_runtime.py index 9e25514e..1e144ab1 100644 --- a/core/framework/host/stream_runtime.py +++ b/core/framework/host/stream_runtime.py @@ -136,9 +136,7 @@ class StreamDecisionTracker: self._run_locks[execution_id] = asyncio.Lock() self._current_nodes[execution_id] = "unknown" - logger.debug( - f"Started run {run_id} for execution {execution_id} in stream {self.stream_id}" - ) + logger.debug(f"Started run {run_id} for execution {execution_id} in stream {self.stream_id}") return run_id def end_run( @@ -334,10 +332,7 @@ class StreamDecisionTracker: """ run = self._runs.get(execution_id) if run is None: - logger.warning( - f"report_problem called but no run for execution {execution_id}: " - f"[{severity}] {description}" - ) + logger.warning(f"report_problem called but no run for execution {execution_id}: [{severity}] {description}") return "" return run.add_problem( diff --git a/core/framework/host/webhook_server.py b/core/framework/host/webhook_server.py index b33dcaba..33d74a74 100644 --- a/core/framework/host/webhook_server.py +++ b/core/framework/host/webhook_server.py @@ -89,8 +89,7 @@ class WebhookServer: ) await self._site.start() logger.info( - f"Webhook server started on {self._config.host}:{self._config.port} " - f"with {len(self._routes)} route(s)" + f"Webhook server started on {self._config.host}:{self._config.port} with {len(self._routes)} route(s)" ) async def stop(self) -> None: diff --git a/core/framework/host/worker.py b/core/framework/host/worker.py index 8b0090a0..ebf346a7 100644 --- a/core/framework/host/worker.py +++ b/core/framework/host/worker.py @@ -92,9 +92,7 @@ class Worker: # result.json, data). Required when seed_conversation() is used — # we deliberately do NOT fall back to CWD, which previously caused # conversation parts to leak into the process working directory. - self._storage_path: Path | None = ( - Path(storage_path) if storage_path is not None else None - ) + self._storage_path: Path | None = Path(storage_path) if storage_path is not None else None self._task_handle: asyncio.Task | None = None self._started_at: float = 0.0 self._result: WorkerResult | None = None @@ -153,14 +151,10 @@ class Worker: if result.success: self.status = WorkerStatus.COMPLETED - self._result = self._build_result( - result, duration, default_status="success" - ) + self._result = self._build_result(result, duration, default_status="success") else: self.status = WorkerStatus.FAILED - self._result = self._build_result( - result, duration, default_status="failed" - ) + self._result = self._build_result(result, duration, default_status="failed") await self._emit_terminal_events(result) @@ -292,11 +286,7 @@ class Worker: # EXECUTION_COMPLETED / EXECUTION_FAILED (backwards-compat) if agent_result is not None: - lifecycle_type = ( - EventType.EXECUTION_COMPLETED - if agent_result.success - else EventType.EXECUTION_FAILED - ) + lifecycle_type = EventType.EXECUTION_COMPLETED if agent_result.success else EventType.EXECUTION_FAILED await self._event_bus.publish( AgentEvent( type=lifecycle_type, @@ -309,11 +299,7 @@ class Worker: "task": self.task, "success": agent_result.success, "error": agent_result.error, - "output_keys": ( - list(agent_result.output.keys()) - if agent_result.output - else [] - ), + "output_keys": (list(agent_result.output.keys()) if agent_result.output else []), }, ) ) @@ -348,9 +334,7 @@ class Worker: async def start_background(self) -> None: """Spawn the worker's run() as an asyncio background task.""" - self._task_handle = asyncio.create_task( - self.run(), name=f"worker:{self.id}" - ) + self._task_handle = asyncio.create_task(self.run(), name=f"worker:{self.id}") # Surface any exception that escapes run(); without this callback # a crash here only becomes visible when stop() eventually awaits # the handle (and is silently lost if stop() is never called). @@ -406,8 +390,7 @@ class Worker: """ if self.status != WorkerStatus.PENDING: raise RuntimeError( - f"seed_conversation must be called before start_background " - f"(worker {self.id} is {self.status})" + f"seed_conversation must be called before start_background (worker {self.id} is {self.status})" ) # Write parts directly to the worker's on-disk conversation store diff --git a/core/framework/llm/anthropic.py b/core/framework/llm/anthropic.py index df44d631..63902e87 100644 --- a/core/framework/llm/anthropic.py +++ b/core/framework/llm/anthropic.py @@ -50,9 +50,7 @@ class AnthropicProvider(LLMProvider): # Delegate to LiteLLMProvider internally. self.api_key = api_key or _get_api_key_from_credential_store() if not self.api_key: - raise ValueError( - "Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass api_key." - ) + raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass api_key.") self.model = model diff --git a/core/framework/llm/antigravity.py b/core/framework/llm/antigravity.py index 0ecea51c..0d081719 100644 --- a/core/framework/llm/antigravity.py +++ b/core/framework/llm/antigravity.py @@ -53,17 +53,9 @@ _TOKEN_REFRESH_BUFFER_SECS = 60 # Credentials file in ~/.hive/ (native implementation) _ACCOUNTS_FILE = Path.home() / ".hive" / "antigravity-accounts.json" _IDE_STATE_DB_MAC = ( - Path.home() - / "Library" - / "Application Support" - / "Antigravity" - / "User" - / "globalStorage" - / "state.vscdb" -) -_IDE_STATE_DB_LINUX = ( - Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" + Path.home() / "Library" / "Application Support" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" ) +_IDE_STATE_DB_LINUX = Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" _IDE_STATE_DB_KEY = "antigravityUnifiedStateSync.oauthToken" _BASE_HEADERS: dict[str, str] = { @@ -368,9 +360,7 @@ def _to_gemini_contents( def _map_finish_reason(reason: str) -> str: - return {"STOP": "stop", "MAX_TOKENS": "max_tokens", "OTHER": "tool_use"}.get( - (reason or "").upper(), "stop" - ) + return {"STOP": "stop", "MAX_TOKENS": "max_tokens", "OTHER": "tool_use"}.get((reason or "").upper(), "stop") def _parse_complete_response(raw: dict[str, Any], model: str) -> LLMResponse: @@ -538,8 +528,7 @@ class AntigravityProvider(LLMProvider): return self._access_token raise RuntimeError( - "No valid Antigravity credentials. " - "Run: uv run python core/antigravity_auth.py auth account add" + "No valid Antigravity credentials. Run: uv run python core/antigravity_auth.py auth account add" ) # --- Request building -------------------------------------------------- # @@ -593,11 +582,7 @@ class AntigravityProvider(LLMProvider): token = self._ensure_token() body_bytes = json.dumps(body).encode("utf-8") - path = ( - "/v1internal:streamGenerateContent?alt=sse" - if streaming - else "/v1internal:generateContent" - ) + path = "/v1internal:streamGenerateContent?alt=sse" if streaming else "/v1internal:generateContent" headers = { **_BASE_HEADERS, "Authorization": f"Bearer {token}", @@ -619,9 +604,7 @@ class AntigravityProvider(LLMProvider): if result: self._access_token, self._token_expires_at = result headers["Authorization"] = f"Bearer {self._access_token}" - req2 = urllib.request.Request( - url, data=body_bytes, headers=headers, method="POST" - ) + req2 = urllib.request.Request(url, data=body_bytes, headers=headers, method="POST") try: return urllib.request.urlopen(req2, timeout=120) # noqa: S310 except urllib.error.HTTPError as exc2: @@ -642,9 +625,7 @@ class AntigravityProvider(LLMProvider): last_exc = exc continue - raise RuntimeError( - f"All Antigravity endpoints failed. Last error: {last_exc}" - ) from last_exc + raise RuntimeError(f"All Antigravity endpoints failed. Last error: {last_exc}") from last_exc # --- LLMProvider interface --------------------------------------------- # @@ -683,9 +664,7 @@ class AntigravityProvider(LLMProvider): try: body = self._build_body(messages, system, tools, max_tokens) http_resp = self._post(body, streaming=True) - for event in _parse_sse_stream( - http_resp, self.model, self._thought_sigs.__setitem__ - ): + for event in _parse_sse_stream(http_resp, self.model, self._thought_sigs.__setitem__): loop.call_soon_threadsafe(queue.put_nowait, event) except Exception as exc: logger.error("Antigravity stream error: %s", exc) diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 565950b0..c2af8cf3 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -100,9 +100,7 @@ def _patch_litellm_anthropic_oauth() -> None: result["authorization"] = f"Bearer {token}" # Merge the OAuth beta header with any existing beta headers. existing_beta = result.get("anthropic-beta", "") - beta_parts = ( - [b.strip() for b in existing_beta.split(",") if b.strip()] if existing_beta else [] - ) + beta_parts = [b.strip() for b in existing_beta.split(",") if b.strip()] if existing_beta else [] if ANTHROPIC_OAUTH_BETA_HEADER not in beta_parts: beta_parts.append(ANTHROPIC_OAUTH_BETA_HEADER) result["anthropic-beta"] = ",".join(beta_parts) @@ -262,9 +260,7 @@ def _claude_code_billing_header(messages: list[dict[str, Any]]) -> str: break sampled = "".join(_sample_js_code_unit(first_text, i) for i in (4, 7, 20)) - version_hash = hashlib.sha256( - f"{_CLAUDE_CODE_BILLING_SALT}{sampled}{CLAUDE_CODE_VERSION}".encode() - ).hexdigest() + version_hash = hashlib.sha256(f"{_CLAUDE_CODE_BILLING_SALT}{sampled}{CLAUDE_CODE_VERSION}".encode()).hexdigest() entrypoint = os.environ.get("CLAUDE_CODE_ENTRYPOINT", "").strip() or "cli" return ( f"x-anthropic-billing-header: cc_version={CLAUDE_CODE_VERSION}.{version_hash[:3]}; " @@ -336,9 +332,7 @@ def _prune_failed_request_dumps(max_files: int = MAX_FAILED_REQUEST_DUMPS) -> No def _remember_openrouter_tool_compat_model(model: str) -> None: """Cache OpenRouter tool-compat fallback for a bounded time window.""" - OPENROUTER_TOOL_COMPAT_MODEL_CACHE[model] = ( - time.monotonic() + OPENROUTER_TOOL_COMPAT_CACHE_TTL_SECONDS - ) + OPENROUTER_TOOL_COMPAT_MODEL_CACHE[model] = time.monotonic() + OPENROUTER_TOOL_COMPAT_CACHE_TTL_SECONDS def _is_openrouter_tool_compat_cached(model: str) -> bool: @@ -746,20 +740,14 @@ class LiteLLMProvider(LLMProvider): eh.setdefault("user-agent", CLAUDE_CODE_USER_AGENT) # The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects # several standard OpenAI params: max_output_tokens, stream_options. - self._codex_backend = bool( - self.api_base and "chatgpt.com/backend-api/codex" in self.api_base - ) + self._codex_backend = bool(self.api_base and "chatgpt.com/backend-api/codex" in self.api_base) # Antigravity routes through a local OpenAI-compatible proxy — no patches needed. self._antigravity = bool(self.api_base and "localhost:8069" in self.api_base) if litellm is None: - raise ImportError( - "LiteLLM is not installed. Please install it with: uv pip install litellm" - ) + raise ImportError("LiteLLM is not installed. Please install it with: uv pip install litellm") - def reconfigure( - self, model: str, api_key: str | None = None, api_base: str | None = None - ) -> None: + def reconfigure(self, model: str, api_key: str | None = None, api_base: str | None = None) -> None: """Hot-swap the model, API key, and/or base URL on this provider instance. Since the same LiteLLMProvider object is shared by reference across the @@ -784,9 +772,7 @@ class LiteLLMProvider(LLMProvider): if self._claude_code_oauth: eh = self.extra_kwargs.setdefault("extra_headers", {}) eh.setdefault("user-agent", CLAUDE_CODE_USER_AGENT) - self._codex_backend = bool( - self.api_base and "chatgpt.com/backend-api/codex" in self.api_base - ) + self._codex_backend = bool(self.api_base and "chatgpt.com/backend-api/codex" in self.api_base) self._antigravity = bool(self.api_base and "localhost:8069" in self.api_base) # Note: The Codex ChatGPT backend is a Responses API endpoint at @@ -809,9 +795,7 @@ class LiteLLMProvider(LLMProvider): return HIVE_API_BASE return None - def _completion_with_rate_limit_retry( - self, max_retries: int | None = None, **kwargs: Any - ) -> Any: + def _completion_with_rate_limit_retry(self, max_retries: int | None = None, **kwargs: Any) -> Any: """Call litellm.completion with retry on 429 rate limit errors and empty responses. When a :class:`KeyPool` is configured, rate-limited keys are rotated @@ -843,15 +827,10 @@ class LiteLLMProvider(LLMProvider): None, ) if last_role == "assistant": - logger.debug( - "[retry] Empty response after assistant message — " - "expected, not retrying." - ) + logger.debug("[retry] Empty response after assistant message — expected, not retrying.") return response - finish_reason = ( - response.choices[0].finish_reason if response.choices else "unknown" - ) + finish_reason = response.choices[0].finish_reason if response.choices else "unknown" # Dump full request to file for debugging token_count, token_method = _estimate_tokens(model, messages) dump_path = _dump_failed_request( @@ -1050,9 +1029,7 @@ class LiteLLMProvider(LLMProvider): # Async variants — non-blocking on the event loop # ------------------------------------------------------------------ - async def _acompletion_with_rate_limit_retry( - self, max_retries: int | None = None, **kwargs: Any - ) -> Any: + async def _acompletion_with_rate_limit_retry(self, max_retries: int | None = None, **kwargs: Any) -> Any: """Async version of _completion_with_rate_limit_retry. Uses litellm.acompletion and asyncio.sleep instead of blocking calls. @@ -1078,15 +1055,10 @@ class LiteLLMProvider(LLMProvider): None, ) if last_role == "assistant": - logger.debug( - "[async-retry] Empty response after assistant message — " - "expected, not retrying." - ) + logger.debug("[async-retry] Empty response after assistant message — expected, not retrying.") return response - finish_reason = ( - response.choices[0].finish_reason if response.choices else "unknown" - ) + finish_reason = response.choices[0].finish_reason if response.choices else "unknown" token_count, token_method = _estimate_tokens(model, messages) dump_path = _dump_failed_request( model=model, @@ -1370,8 +1342,7 @@ class LiteLLMProvider(LLMProvider): ) return text_tool_content, text_tool_calls logger.info( - "[openrouter-tool-compat] %s returned non-JSON fallback content; " - "treating it as plain text.", + "[openrouter-tool-compat] %s returned non-JSON fallback content; treating it as plain text.", self.model, ) return content.strip(), [] @@ -1523,9 +1494,7 @@ class LiteLLMProvider(LLMProvider): ) return repaired - raise ValueError( - f"Failed to parse tool call arguments for '{tool_name}' (likely truncated JSON)." - ) + raise ValueError(f"Failed to parse tool call arguments for '{tool_name}' (likely truncated JSON).") def _parse_openrouter_text_tool_calls( self, @@ -1682,11 +1651,7 @@ class LiteLLMProvider(LLMProvider): return [ message for message in full_messages - if not ( - message.get("role") == "assistant" - and not message.get("content") - and not message.get("tool_calls") - ) + if not (message.get("role") == "assistant" and not message.get("content") and not message.get("tool_calls")) ] async def _acomplete_via_openrouter_tool_compat( @@ -1914,8 +1879,8 @@ class LiteLLMProvider(LLMProvider): if logger.isEnabledFor(logging.DEBUG) and full_messages: import json as _json - from pathlib import Path as _Path from datetime import datetime as _dt + from pathlib import Path as _Path _debug_dir = _Path.home() / ".hive" / "debug_logs" _debug_dir.mkdir(parents=True, exist_ok=True) @@ -1939,9 +1904,7 @@ class LiteLLMProvider(LLMProvider): } ) try: - _dump_file.write_text( - _json.dumps(_summary, indent=2, ensure_ascii=False), encoding="utf-8" - ) + _dump_file.write_text(_json.dumps(_summary, indent=2, ensure_ascii=False), encoding="utf-8") logger.debug("[LLM-MSG] %d messages dumped to %s", len(full_messages), _dump_file) except Exception: pass @@ -1966,9 +1929,7 @@ class LiteLLMProvider(LLMProvider): full_messages = [ m for m in full_messages - if not ( - m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls") - ) + if not (m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls")) ] kwargs: dict[str, Any] = { @@ -2154,8 +2115,7 @@ class LiteLLMProvider(LLMProvider): else getattr(usage, "cache_read_input_tokens", 0) or 0 ) logger.debug( - "[tokens] finish-chunk usage: " - "input=%d output=%d cached=%d model=%s", + "[tokens] finish-chunk usage: input=%d output=%d cached=%d model=%s", input_tokens, output_tokens, cached_tokens, @@ -2202,8 +2162,7 @@ class LiteLLMProvider(LLMProvider): else getattr(_usage, "cache_read_input_tokens", 0) or 0 ) logger.debug( - "[tokens] post-loop chunks fallback:" - " input=%d output=%d cached=%d model=%s", + "[tokens] post-loop chunks fallback: input=%d output=%d cached=%d model=%s", input_tokens, output_tokens, cached_tokens, diff --git a/core/framework/llm/model_catalog.py b/core/framework/llm/model_catalog.py index 79fd44b1..e227de54 100644 --- a/core/framework/llm/model_catalog.py +++ b/core/framework/llm/model_catalog.py @@ -50,9 +50,7 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]: if not isinstance(model_id, str) or not model_id.strip(): raise ModelCatalogError(f"{model_path}.id must be a non-empty string") if model_id in seen_model_ids: - raise ModelCatalogError( - f"Duplicate model id {model_id!r} in {provider_path}.models" - ) + raise ModelCatalogError(f"Duplicate model id {model_id!r} in {provider_path}.models") seen_model_ids.add(model_id) if model_id == default_model: @@ -91,17 +89,11 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]: api_base = preset_map.get("api_base") if api_base is not None and (not isinstance(api_base, str) or not api_base.strip()): - raise ModelCatalogError( - f"{preset_path}.api_base must be a non-empty string when present" - ) + raise ModelCatalogError(f"{preset_path}.api_base must be a non-empty string when present") api_key_env_var = preset_map.get("api_key_env_var") - if api_key_env_var is not None and ( - not isinstance(api_key_env_var, str) or not api_key_env_var.strip() - ): - raise ModelCatalogError( - f"{preset_path}.api_key_env_var must be a non-empty string when present" - ) + if api_key_env_var is not None and (not isinstance(api_key_env_var, str) or not api_key_env_var.strip()): + raise ModelCatalogError(f"{preset_path}.api_key_env_var must be a non-empty string when present") for key in ("max_tokens", "max_context_tokens"): value = preset_map.get(key) @@ -110,9 +102,7 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]: model_choices = preset_map.get("model_choices") if model_choices is not None: - for idx, choice in enumerate( - _require_list(model_choices, f"{preset_path}.model_choices") - ): + for idx, choice in enumerate(_require_list(model_choices, f"{preset_path}.model_choices")): choice_path = f"{preset_path}.model_choices[{idx}]" choice_map = _require_mapping(choice, choice_path) choice_id = choice_map.get("id") @@ -144,19 +134,13 @@ def load_model_catalog() -> dict[str, Any]: def get_models_catalogue() -> dict[str, list[dict[str, Any]]]: """Return provider -> model list.""" providers = load_model_catalog()["providers"] - return { - provider_id: copy.deepcopy(provider_info["models"]) - for provider_id, provider_info in providers.items() - } + return {provider_id: copy.deepcopy(provider_info["models"]) for provider_id, provider_info in providers.items()} def get_default_models() -> dict[str, str]: """Return provider -> default model id.""" providers = load_model_catalog()["providers"] - return { - provider_id: str(provider_info["default_model"]) - for provider_id, provider_info in providers.items() - } + return {provider_id: str(provider_info["default_model"]) for provider_id, provider_info in providers.items()} def get_provider_models(provider: str) -> list[dict[str, Any]]: diff --git a/core/framework/loader/agent_loader.py b/core/framework/loader/agent_loader.py index 0486f864..98088df8 100644 --- a/core/framework/loader/agent_loader.py +++ b/core/framework/loader/agent_loader.py @@ -9,7 +9,7 @@ from datetime import UTC from pathlib import Path from typing import Any -from framework.config import get_hive_config, get_max_context_tokens, get_preferred_model +from framework.config import get_hive_config, get_preferred_model from framework.credentials.validation import ( ensure_credential_key_env as _ensure_credential_key_env, ) @@ -20,14 +20,12 @@ from framework.loader.preload_validation import run_preload_validation from framework.loader.tool_registry import ToolRegistry from framework.orchestrator import Goal from framework.orchestrator.edge import ( - DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec, ) from framework.orchestrator.node import NodeSpec from framework.orchestrator.orchestrator import ExecutionResult -from framework.tools.flowchart_utils import generate_fallback_flowchart logger = logging.getLogger(__name__) @@ -555,18 +553,10 @@ def get_kimi_code_token() -> str | None: # VSCode-style SQLite state database under the key # "antigravityUnifiedStateSync.oauthToken" as a base64-encoded protobuf blob. ANTIGRAVITY_IDE_STATE_DB = ( - Path.home() - / "Library" - / "Application Support" - / "Antigravity" - / "User" - / "globalStorage" - / "state.vscdb" + Path.home() / "Library" / "Application Support" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" ) # Linux fallback for the IDE state DB -ANTIGRAVITY_IDE_STATE_DB_LINUX = ( - Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" -) +ANTIGRAVITY_IDE_STATE_DB_LINUX = Path.home() / ".config" / "Antigravity" / "User" / "globalStorage" / "state.vscdb" # Antigravity credentials stored by native OAuth implementation ANTIGRAVITY_AUTH_FILE = Path.home() / ".hive" / "antigravity-accounts.json" @@ -710,9 +700,7 @@ def _is_antigravity_token_expired(auth_data: dict) -> bool: return True elif isinstance(last_refresh_val, str): try: - last_refresh_val = datetime.fromisoformat( - last_refresh_val.replace("Z", "+00:00") - ).timestamp() + last_refresh_val = datetime.fromisoformat(last_refresh_val.replace("Z", "+00:00")).timestamp() except (ValueError, TypeError): return True @@ -843,8 +831,7 @@ def get_antigravity_token() -> str | None: return token_data["access_token"] logger.warning( - "Antigravity token refresh failed. " - "Re-open the Antigravity IDE or run 'antigravity-auth accounts add'." + "Antigravity token refresh failed. Re-open the Antigravity IDE or run 'antigravity-auth accounts add'." ) return access_token @@ -1297,11 +1284,7 @@ class AgentLoader: # Evict cached submodules first (e.g. deep_research_agent.nodes, # deep_research_agent.agent) so the top-level reload picks up # changes in the entire package — not just __init__.py. - stale = [ - name - for name in sys.modules - if name == package_name or name.startswith(f"{package_name}.") - ] + stale = [name for name in sys.modules if name == package_name or name.startswith(f"{package_name}.")] for name in stale: del sys.modules[name] @@ -1350,7 +1333,7 @@ class AgentLoader: if not worker_jsons: raise FileNotFoundError(f"No worker config found in {agent_path}") - from framework.orchestrator.edge import EdgeSpec, GraphSpec + from framework.orchestrator.edge import GraphSpec from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion from framework.orchestrator.node import NodeSpec @@ -1555,7 +1538,6 @@ class AgentLoader: ] # Merge user-configured stages from ~/.hive/configuration.json - from framework.config import get_hive_config from framework.pipeline.registry import build_pipeline_from_config hive_config = get_hive_config() @@ -1568,9 +1550,7 @@ class AgentLoader: if agent_json.exists(): try: agent_pipeline = ( - _json.loads(agent_json.read_text(encoding="utf-8")) - .get("pipeline", {}) - .get("stages", []) + _json.loads(agent_json.read_text(encoding="utf-8")).get("pipeline", {}).get("stages", []) ) if agent_pipeline: agent_stages = build_pipeline_from_config(agent_pipeline) @@ -1986,8 +1966,7 @@ class AgentLoader: for sc in self.goal.success_criteria ], constraints=[ - {"id": c.id, "description": c.description, "type": c.constraint_type} - for c in self.goal.constraints + {"id": c.id, "description": c.description, "type": c.constraint_type} for c in self.goal.constraints ], required_tools=sorted(required_tools), has_tools_module=(self.agent_path / "tools.py").exists(), @@ -2058,9 +2037,7 @@ class AgentLoader: if api_key_env and not os.environ.get(api_key_env): if api_key_env not in missing_credentials: missing_credentials.append(api_key_env) - warnings.append( - f"Agent has LLM nodes but {api_key_env} not set (model: {self.model})" - ) + warnings.append(f"Agent has LLM nodes but {api_key_env} not set (model: {self.model})") return ValidationResult( valid=len(errors) == 0, diff --git a/core/framework/loader/cli.py b/core/framework/loader/cli.py index 36d2a31f..eb84946b 100644 --- a/core/framework/loader/cli.py +++ b/core/framework/loader/cli.py @@ -25,7 +25,6 @@ from pathlib import Path from typing import Any from urllib import error as urlerror, parse as urlparse, request as urlrequest - # --------------------------------------------------------------------------- # Public registration # --------------------------------------------------------------------------- @@ -127,10 +126,7 @@ def cmd_serve(args: argparse.Namespace) -> int: def _request_shutdown(signame: str) -> None: signal_count["n"] += 1 if signal_count["n"] == 1: - print( - f"\nReceived {signame}, shutting down gracefully… " - "(press Ctrl+C again to force quit)" - ) + print(f"\nReceived {signame}, shutting down gracefully… (press Ctrl+C again to force quit)") shutdown_event.set() else: # Second Ctrl+C (or SIGTERM) — the user is done waiting. @@ -171,9 +167,7 @@ def cmd_serve(args: argparse.Namespace) -> int: print(f"Colony not found: {colony_arg}") continue try: - session = await manager.create_session_with_worker_colony( - str(colony_path), model=model - ) + session = await manager.create_session_with_worker_colony(str(colony_path), model=model) info = session.worker_info name = info.name if info else session.colony_id print(f"Loaded colony: {session.colony_id} ({name}) → session {session.id}") @@ -319,12 +313,14 @@ def cmd_queen_sessions(args: argparse.Namespace) -> int: meta = json.loads(meta_path.read_text(encoding="utf-8")) except Exception: meta = {} - rows.append({ - "session_id": session_dir.name, - "phase": meta.get("phase", "?"), - "agent_path": meta.get("agent_path", ""), - "colony_fork": bool(meta.get("colony_fork")), - }) + rows.append( + { + "session_id": session_dir.name, + "phase": meta.get("phase", "?"), + "agent_path": meta.get("agent_path", ""), + "colony_fork": bool(meta.get("colony_fork")), + } + ) if args.json: print(json.dumps(rows, indent=2)) @@ -398,18 +394,18 @@ def cmd_colony_list(args: argparse.Namespace) -> int: except Exception: meta = {} worker_count = sum( - 1 - for f in path.iterdir() - if f.is_file() and f.suffix == ".json" and f.stem not in _RESERVED_JSON_STEMS + 1 for f in path.iterdir() if f.is_file() and f.suffix == ".json" and f.stem not in _RESERVED_JSON_STEMS + ) + rows.append( + { + "name": path.name, + "queen_name": meta.get("queen_name", ""), + "queen_session_id": meta.get("queen_session_id", ""), + "workers": worker_count, + "created_at": meta.get("created_at", ""), + "path": str(path), + } ) - rows.append({ - "name": path.name, - "queen_name": meta.get("queen_name", ""), - "queen_session_id": meta.get("queen_session_id", ""), - "workers": worker_count, - "created_at": meta.get("created_at", ""), - "path": str(path), - }) if args.json: print(json.dumps(rows, indent=2)) @@ -422,9 +418,7 @@ def cmd_colony_list(args: argparse.Namespace) -> int: print(f"{'NAME':<24} {'QUEEN':<28} {'WORKERS':<8} CREATED") print("-" * 90) for r in rows: - print( - f"{r['name']:<24} {r['queen_name']:<28} {r['workers']:<8} {r['created_at'][:19]}" - ) + print(f"{r['name']:<24} {r['queen_name']:<28} {r['workers']:<8} {r['created_at'][:19]}") return 0 @@ -651,9 +645,7 @@ def _http_get(url: str, timeout: float = 10.0) -> dict: def _http_post(url: str, body: dict, timeout: float = 30.0) -> dict: data = json.dumps(body).encode("utf-8") - req = urlrequest.Request( - url, data=data, method="POST", headers={"Content-Type": "application/json"} - ) + req = urlrequest.Request(url, data=data, method="POST", headers={"Content-Type": "application/json"}) with urlrequest.urlopen(req, timeout=timeout) as r: return json.loads(r.read().decode("utf-8")) @@ -709,9 +701,7 @@ def _open_browser(url: str) -> None: try: if sys.platform == "darwin": - subprocess.Popen( - ["open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) + subprocess.Popen(["open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) elif sys.platform == "win32": subprocess.Popen( ["cmd", "/c", "start", "", url], @@ -719,9 +709,7 @@ def _open_browser(url: str) -> None: stderr=subprocess.DEVNULL, ) elif sys.platform == "linux": - subprocess.Popen( - ["xdg-open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) + subprocess.Popen(["xdg-open", url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except Exception: pass diff --git a/core/framework/loader/mcp_client.py b/core/framework/loader/mcp_client.py index 314aa124..308d2c45 100644 --- a/core/framework/loader/mcp_client.py +++ b/core/framework/loader/mcp_client.py @@ -267,9 +267,7 @@ class MCPClient: try: response = self._http_client.get("/health") response.raise_for_status() - logger.info( - f"Connected to MCP server '{self.config.name}' via HTTP at {self.config.url}" - ) + logger.info(f"Connected to MCP server '{self.config.name}' via HTTP at {self.config.url}") except Exception as e: logger.warning(f"Health check failed for MCP server '{self.config.name}': {e}") # Continue anyway, server might not have health endpoint @@ -377,12 +375,8 @@ class MCPClient: self._tools[tool.name] = tool tool_names = list(self._tools.keys()) - logger.info( - f"Discovered {len(self._tools)} tools from '{self.config.name}'" - ) - logger.debug( - f"Discovered tools from '{self.config.name}': {tool_names}" - ) + logger.info(f"Discovered {len(self._tools)} tools from '{self.config.name}'") + logger.debug(f"Discovered tools from '{self.config.name}': {tool_names}") except Exception as e: logger.error(f"Failed to discover tools from '{self.config.name}': {e}") raise @@ -467,6 +461,7 @@ class MCPClient: ) if self.config.transport == "stdio": + def _stdio_call() -> Any: with self._stdio_call_lock: return self._run_async(self._call_tool_stdio_async(tool_name, arguments)) @@ -669,9 +664,7 @@ class MCPClient: if self._session: await self._session.__aexit__(None, None, None) except asyncio.CancelledError: - logger.warning( - "MCP session cleanup was cancelled; proceeding with best-effort shutdown" - ) + logger.warning("MCP session cleanup was cancelled; proceeding with best-effort shutdown") except Exception as e: logger.warning(f"Error closing MCP session: {e}") finally: @@ -682,9 +675,7 @@ class MCPClient: if self._stdio_context: await self._stdio_context.__aexit__(None, None, None) except asyncio.CancelledError: - logger.debug( - "STDIO context cleanup was cancelled; proceeding with best-effort shutdown" - ) + logger.debug("STDIO context cleanup was cancelled; proceeding with best-effort shutdown") except Exception as e: msg = str(e).lower() if "cancel scope" in msg or "different task" in msg: @@ -725,9 +716,7 @@ class MCPClient: # any exceptions that may occur if the loop stops between these calls. if self._loop.is_running(): try: - cleanup_future = asyncio.run_coroutine_threadsafe( - self._cleanup_stdio_async(), self._loop - ) + cleanup_future = asyncio.run_coroutine_threadsafe(self._cleanup_stdio_async(), self._loop) cleanup_future.result(timeout=self._CLEANUP_TIMEOUT) cleanup_attempted = True except TimeoutError: diff --git a/core/framework/loader/mcp_connection_manager.py b/core/framework/loader/mcp_connection_manager.py index f5118d94..2839944c 100644 --- a/core/framework/loader/mcp_connection_manager.py +++ b/core/framework/loader/mcp_connection_manager.py @@ -74,8 +74,7 @@ class MCPConnectionManager: if not should_connect: if not transition_event.wait(timeout=_TRANSITION_TIMEOUT): logger.warning( - "Timed out waiting for transition on MCP server '%s', " - "forcing cleanup and retrying", + "Timed out waiting for transition on MCP server '%s', forcing cleanup and retrying", server_name, ) with self._pool_lock: @@ -99,10 +98,7 @@ class MCPConnectionManager: current = self._transitions.get(server_name) if current is transition_event: self._transitions.pop(server_name, None) - if ( - server_name not in self._pool - and self._refcounts.get(server_name, 0) <= 0 - ): + if server_name not in self._pool and self._refcounts.get(server_name, 0) <= 0: self._configs.pop(server_name, None) transition_event.set() raise @@ -324,8 +320,7 @@ class MCPConnectionManager: self._transitions.pop(server_name, None) transition_event.set() logger.info( - "Reconnected MCP server '%s' but refcount dropped to 0, " - "discarding new client", + "Reconnected MCP server '%s' but refcount dropped to 0, discarding new client", server_name, ) try: @@ -336,9 +331,7 @@ class MCPConnectionManager: server_name, exc_info=True, ) - raise KeyError( - f"MCP server '{server_name}' was fully released during reconnect" - ) + raise KeyError(f"MCP server '{server_name}' was fully released during reconnect") self._pool[server_name] = new_client self._configs[server_name] = config @@ -380,8 +373,7 @@ class MCPConnectionManager: all_resolved = all(event.wait(timeout=_TRANSITION_TIMEOUT) for event in pending) if not all_resolved: logger.warning( - "Timed out waiting for pending transitions during cleanup, " - "forcing cleanup of stuck transitions", + "Timed out waiting for pending transitions during cleanup, forcing cleanup of stuck transitions", ) with self._pool_lock: for sn, evt in list(self._transitions.items()): diff --git a/core/framework/loader/mcp_errors.py b/core/framework/loader/mcp_errors.py index 0980e301..6cc91d0c 100644 --- a/core/framework/loader/mcp_errors.py +++ b/core/framework/loader/mcp_errors.py @@ -23,9 +23,7 @@ class MCPError(ValueError): self.what = what self.why = why self.fix = fix - self.message = ( - f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}" - ) + self.message = f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}" super().__init__(self.message) diff --git a/core/framework/loader/mcp_registry.py b/core/framework/loader/mcp_registry.py index 8748e680..f8ec0290 100644 --- a/core/framework/loader/mcp_registry.py +++ b/core/framework/loader/mcp_registry.py @@ -24,9 +24,7 @@ from framework.loader.mcp_errors import ( logger = logging.getLogger(__name__) -DEFAULT_INDEX_URL = ( - "https://raw.githubusercontent.com/aden-hive/hive-mcp-registry/main/registry_index.json" -) +DEFAULT_INDEX_URL = "https://raw.githubusercontent.com/aden-hive/hive-mcp-registry/main/registry_index.json" DEFAULT_REFRESH_INTERVAL_HOURS = 24 _LAST_FETCHED_FILENAME = "last_fetched" _LEGACY_LAST_FETCHED_FILENAME = "last_fetched.json" @@ -140,9 +138,7 @@ class MCPRegistry: ) added.append(name) except MCPError as exc: - logger.warning( - "MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc - ) + logger.warning("MCPRegistry.ensure_defaults: failed to seed '%s': %s", name, exc) if added: logger.info("MCPRegistry: seeded default local servers: %s", added) @@ -709,8 +705,7 @@ class MCPRegistry: pinned_version = versions[name] if installed_version != pinned_version: logger.warning( - "Server '%s' version mismatch: installed=%s, pinned=%s. " - "Run: hive mcp update %s", + "Server '%s' version mismatch: installed=%s, pinned=%s. Run: hive mcp update %s", name, installed_version, pinned_version, diff --git a/core/framework/loader/mcp_registry_cli.py b/core/framework/loader/mcp_registry_cli.py index 08105a6a..a265205d 100644 --- a/core/framework/loader/mcp_registry_cli.py +++ b/core/framework/loader/mcp_registry_cli.py @@ -151,10 +151,7 @@ def _parse_key_value_pairs(values: list[str]) -> dict[str, str]: result = {} for item in values: if "=" not in item: - raise ValueError( - f"Invalid format: '{item}'. Expected KEY=VALUE.\n" - f"Example: --set JIRA_API_TOKEN=abc123" - ) + raise ValueError(f"Invalid format: '{item}'. Expected KEY=VALUE.\nExample: --set JIRA_API_TOKEN=abc123") key, _, value = item.partition("=") if not key: raise ValueError(f"Invalid format: '{item}'. Key cannot be empty.") @@ -300,12 +297,8 @@ def register_mcp_commands(subparsers) -> None: # ── install ── install_p = mcp_sub.add_parser("install", help="Install a server from the registry") install_p.add_argument("name", help="Server name in the registry") - install_p.add_argument( - "--version", dest="version", default=None, help="Pin to a specific version" - ) - install_p.add_argument( - "--transport", default=None, help="Override default transport (stdio, http, unix, sse)" - ) + install_p.add_argument("--version", dest="version", default=None, help="Pin to a specific version") + install_p.add_argument("--transport", default=None, help="Override default transport (stdio, http, unix, sse)") install_p.set_defaults(func=cmd_mcp_install) # ── add ── @@ -342,9 +335,7 @@ def register_mcp_commands(subparsers) -> None: # ── list ── list_p = mcp_sub.add_parser("list", help="List servers") - list_p.add_argument( - "--available", action="store_true", help="Show available servers from registry" - ) + list_p.add_argument("--available", action="store_true", help="Show available servers from registry") list_p.add_argument("--json", dest="output_json", action="store_true", help="Output as JSON") list_p.set_defaults(func=cmd_mcp_list) @@ -364,9 +355,7 @@ def register_mcp_commands(subparsers) -> None: metavar="KEY=VAL", help="Set environment variable overrides", ) - config_p.add_argument( - "--set-header", dest="set_header", nargs="+", metavar="KEY=VAL", help="Set header overrides" - ) + config_p.add_argument("--set-header", dest="set_header", nargs="+", metavar="KEY=VAL", help="Set header overrides") config_p.set_defaults(func=cmd_mcp_config) # ── search ── @@ -389,9 +378,7 @@ def register_mcp_commands(subparsers) -> None: init_p.set_defaults(func=cmd_mcp_init) # ── update ── - update_p = mcp_sub.add_parser( - "update", help="Update installed servers or refresh the registry index" - ) + update_p = mcp_sub.add_parser("update", help="Update installed servers or refresh the registry index") update_p.add_argument( "name", nargs="?", @@ -495,8 +482,7 @@ def _cmd_mcp_add_from_manifest(registry, manifest_path: str) -> int: manifest = json.loads(path.read_text(encoding="utf-8")) except json.JSONDecodeError as exc: print( - f"Error: invalid JSON in {manifest_path}: {exc}\n" - f"Validate with: python -m json.tool {manifest_path}", + f"Error: invalid JSON in {manifest_path}: {exc}\nValidate with: python -m json.tool {manifest_path}", file=sys.stderr, ) return 1 @@ -695,8 +681,7 @@ def cmd_mcp_config(args) -> int: server = registry.get_server(args.name) if server is None: print( - f"Error: server '{args.name}' is not installed.\n" - f"Run 'hive mcp list' to see installed servers.", + f"Error: server '{args.name}' is not installed.\nRun 'hive mcp list' to see installed servers.", file=sys.stderr, ) return 1 @@ -822,8 +807,7 @@ def cmd_mcp_update(args) -> int: count = registry.update_index() except Exception as exc: print( - f"Error: failed to update registry index: {exc}\n" - f"Check your network connection and try again.", + f"Error: failed to update registry index: {exc}\nCheck your network connection and try again.", file=sys.stderr, ) return 1 @@ -832,9 +816,7 @@ def cmd_mcp_update(args) -> int: # Step 2: update all installed registry servers (skip local/pinned) installed = registry.list_installed() - registry_servers = [ - s for s in installed if s.get("source") == "registry" and not s.get("pinned") - ] + registry_servers = [s for s in installed if s.get("source") == "registry" and not s.get("pinned")] if not registry_servers: return 0 @@ -862,8 +844,7 @@ def _cmd_mcp_update_server(name: str, registry=None) -> int: server = registry.get_server(name) if server is None: print( - f"Error: server '{name}' is not installed.\n" - f"Run 'hive mcp install {name}' to install it.", + f"Error: server '{name}' is not installed.\nRun 'hive mcp install {name}' to install it.", file=sys.stderr, ) return 1 diff --git a/core/framework/loader/preload_validation.py b/core/framework/loader/preload_validation.py index 9e76e3d4..4f29dd36 100644 --- a/core/framework/loader/preload_validation.py +++ b/core/framework/loader/preload_validation.py @@ -98,9 +98,7 @@ def validate_credentials( if not result.success: # Preserve the original validation_result so callers can # inspect which credentials are still missing. - exc = CredentialError( - "Credential setup incomplete. Run again after configuring the required credentials." - ) + exc = CredentialError("Credential setup incomplete. Run again after configuring the required credentials.") if hasattr(e, "validation_result"): exc.validation_result = e.validation_result # type: ignore[attr-defined] if hasattr(e, "failed_cred_names"): diff --git a/core/framework/loader/tool_registry.py b/core/framework/loader/tool_registry.py index 47ce85b5..0155d9c3 100644 --- a/core/framework/loader/tool_registry.py +++ b/core/framework/loader/tool_registry.py @@ -257,10 +257,7 @@ class ToolRegistry: str(e), ) return { - "error": ( - f"Invalid JSON response from tool '{tool_name}': " - f"{str(e)}" - ), + "error": (f"Invalid JSON response from tool '{tool_name}': {str(e)}"), "raw_content": result.content, } return result @@ -435,9 +432,7 @@ class ToolRegistry: registry = ToolRegistry() return registry._resolve_mcp_server_config(server_config, base_dir) - def _resolve_mcp_server_config( - self, server_config: dict[str, Any], base_dir: Path - ) -> dict[str, Any]: + def _resolve_mcp_server_config(self, server_config: dict[str, Any], base_dir: Path) -> dict[str, Any]: """Resolve cwd and script paths for MCP stdio servers (Windows compatibility). On Windows, passing cwd to subprocess can cause WinError 267. We use cwd=None @@ -552,8 +547,7 @@ class ToolRegistry: server_list = [{"name": name, **cfg} for name, cfg in config.items()] resolved_server_list = [ - self._resolve_mcp_server_config(server_config, base_dir) - for server_config in server_list + self._resolve_mcp_server_config(server_config, base_dir) for server_config in server_list ] # Ordered first-wins for duplicate tool names across servers; keep tools.py tools. self.load_registry_servers( @@ -757,9 +751,7 @@ class ToolRegistry: if preserve_existing_tools and mcp_tool.name in self._tools: if log_collisions: - origin_server = ( - self._find_mcp_origin_server_for_tool(mcp_tool.name) or "" - ) + origin_server = self._find_mcp_origin_server_for_tool(mcp_tool.name) or "" logger.warning( "MCP tool '%s' from '%s' shadowed by '%s' (loaded first)", mcp_tool.name, @@ -788,17 +780,11 @@ class ToolRegistry: base_context.update(exec_ctx) # Only inject context params the tool accepts - filtered_context = { - k: v for k, v in base_context.items() if k in tool_params - } + filtered_context = {k: v for k, v in base_context.items() if k in tool_params} # Strip context params from LLM inputs — the framework # values are authoritative (prevents the LLM from passing # e.g. data_dir="/data" and overriding the real path). - clean_inputs = { - k: v - for k, v in inputs.items() - if k not in registry_ref.CONTEXT_PARAMS - } + clean_inputs = {k: v for k, v in inputs.items() if k not in registry_ref.CONTEXT_PARAMS} merged_inputs = {**clean_inputs, **filtered_context} result = client_ref.call_tool(tool_name, merged_inputs) # MCP client already extracts content (returns str @@ -885,9 +871,7 @@ class ToolRegistry: contents are already logged by `register_mcp_server`; this is just the rollup so the resync path also gets a single anchor line. """ - per_server_counts = { - server: len(names) for server, names in self._mcp_server_tools.items() - } + per_server_counts = {server: len(names) for server, names in self._mcp_server_tools.items()} non_mcp_count = len(self._tools) - len(self._mcp_tool_names) logger.info( "ToolRegistry snapshot (%s): total=%d, mcp=%d, non_mcp=%d, per_server=%s", @@ -958,11 +942,7 @@ class ToolRegistry: adapter = CredentialStoreAdapter.default() tool_provider_map = adapter.get_tool_provider_map() - live_providers = { - a.get("provider", "") - for a in adapter.get_all_account_info() - if a.get("provider") - } + live_providers = {a.get("provider", "") for a in adapter.get_all_account_info() if a.get("provider")} except Exception: logger.debug("Credential snapshot unavailable for MCP gate", exc_info=True) diff --git a/core/framework/orchestrator/checkpoint_config.py b/core/framework/orchestrator/checkpoint_config.py index e9ed6210..9ad338d6 100644 --- a/core/framework/orchestrator/checkpoint_config.py +++ b/core/framework/orchestrator/checkpoint_config.py @@ -50,11 +50,7 @@ class CheckpointConfig: Returns: True if should check for old checkpoints and prune them """ - return ( - self.enabled - and self.prune_every_n_nodes > 0 - and nodes_executed % self.prune_every_n_nodes == 0 - ) + return self.enabled and self.prune_every_n_nodes > 0 and nodes_executed % self.prune_every_n_nodes == 0 # Default configuration for most agents diff --git a/core/framework/orchestrator/context.py b/core/framework/orchestrator/context.py index aa977170..615ec7f8 100644 --- a/core/framework/orchestrator/context.py +++ b/core/framework/orchestrator/context.py @@ -175,9 +175,7 @@ def _resolve_available_tools( return always_tools declared = set(node_spec.tools) - declared_tools = [ - t for t in tools if t.name in declared and t.name not in _ALWAYS_AVAILABLE_TOOLS - ] + declared_tools = [t for t in tools if t.name in declared and t.name not in _ALWAYS_AVAILABLE_TOOLS] return always_tools + declared_tools diff --git a/core/framework/orchestrator/context_handoff.py b/core/framework/orchestrator/context_handoff.py index 0d9a7e54..db56ac6e 100644 --- a/core/framework/orchestrator/context_handoff.py +++ b/core/framework/orchestrator/context_handoff.py @@ -169,11 +169,7 @@ class ContextHandoff: key_hint = "" if output_keys: - key_hint = ( - "\nThe following output keys are especially important: " - + ", ".join(output_keys) - + ".\n" - ) + key_hint = "\nThe following output keys are especially important: " + ", ".join(output_keys) + ".\n" system_prompt = ( "You are a concise summarizer. Given the conversation below, " diff --git a/core/framework/orchestrator/edge.py b/core/framework/orchestrator/edge.py index a617edb9..087cb805 100644 --- a/core/framework/orchestrator/edge.py +++ b/core/framework/orchestrator/edge.py @@ -186,8 +186,7 @@ class EdgeSpec(BaseModel): expr_vars = { k: repr(context[k]) for k in context - if k not in ("output", "buffer", "result", "true", "false") - and k in self.condition_expr + if k not in ("output", "buffer", "result", "true", "false") and k in self.condition_expr } logger.info( " Edge %s: condition '%s' → %s (vars: %s)", @@ -333,12 +332,8 @@ class GraphSpec(BaseModel): default_factory=dict, description="Named entry points for resuming execution. Format: {name: node_id}", ) - terminal_nodes: list[str] = Field( - default_factory=list, description="IDs of nodes that end execution" - ) - pause_nodes: list[str] = Field( - default_factory=list, description="IDs of nodes that pause execution for HITL input" - ) + terminal_nodes: list[str] = Field(default_factory=list, description="IDs of nodes that end execution") + pause_nodes: list[str] = Field(default_factory=list, description="IDs of nodes that pause execution for HITL input") # Components nodes: list[Any] = Field( # NodeSpec, but avoiding circular import @@ -347,9 +342,7 @@ class GraphSpec(BaseModel): edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications") # Data buffer keys - buffer_keys: list[str] = Field( - default_factory=list, description="Keys available in data buffer" - ) + buffer_keys: list[str] = Field(default_factory=list, description="Keys available in data buffer") # Default LLM settings default_model: str = "claude-haiku-4-5-20251001" @@ -557,9 +550,7 @@ class GraphSpec(BaseModel): fan_outs = self.detect_fan_out_nodes() for source_id, targets in fan_outs.items(): event_loop_targets = [ - t - for t in targets - if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop" + t for t in targets if self.get_node(t) and getattr(self.get_node(t), "node_type", "") == "event_loop" ] if len(event_loop_targets) > 1: seen_keys: dict[str, str] = {} diff --git a/core/framework/orchestrator/goal.py b/core/framework/orchestrator/goal.py index 3238754b..597f564d 100644 --- a/core/framework/orchestrator/goal.py +++ b/core/framework/orchestrator/goal.py @@ -41,13 +41,9 @@ class SuccessCriterion(BaseModel): id: str description: str = Field(description="Human-readable description of what success looks like") - metric: str = Field( - description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'" - ) + metric: str = Field(description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'") # NEW: runtime evaluation type (separate from metric) - type: str = Field( - default="success_rate", description="Runtime evaluation type, e.g. 'success_rate'" - ) + type: str = Field(default="success_rate", description="Runtime evaluation type, e.g. 'success_rate'") target: Any = Field(description="The target value or condition") weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)") @@ -67,15 +63,9 @@ class Constraint(BaseModel): id: str description: str - constraint_type: str = Field( - description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)" - ) - category: str = Field( - default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'" - ) - check: str = Field( - default="", description="How to check: expression, function name, or 'llm_judge'" - ) + constraint_type: str = Field(description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)") + category: str = Field(default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'") + check: str = Field(default="", description="How to check: expression, function name, or 'llm_judge'") model_config = {"extra": "allow"} @@ -142,9 +132,7 @@ class Goal(BaseModel): # Input/output schema input_schema: dict[str, Any] = Field(default_factory=dict, description="Expected input format") - output_schema: dict[str, Any] = Field( - default_factory=dict, description="Expected output format" - ) + output_schema: dict[str, Any] = Field(default_factory=dict, description="Expected output format") # Versioning for evolution version: str = "1.0.0" diff --git a/core/framework/orchestrator/node.py b/core/framework/orchestrator/node.py index acc36ed0..961ee08a 100644 --- a/core/framework/orchestrator/node.py +++ b/core/framework/orchestrator/node.py @@ -129,15 +129,13 @@ class NodeSpec(BaseModel): input_schema: dict[str, dict] = Field( default_factory=dict, description=( - "Optional schema for input validation. " - "Format: {key: {type: 'string', required: True, description: '...'}}" + "Optional schema for input validation. Format: {key: {type: 'string', required: True, description: '...'}}" ), ) output_schema: dict[str, dict] = Field( default_factory=dict, description=( - "Optional schema for output validation. " - "Format: {key: {type: 'dict', required: True, description: '...'}}" + "Optional schema for output validation. Format: {key: {type: 'dict', required: True, description: '...'}}" ), ) @@ -153,19 +151,13 @@ class NodeSpec(BaseModel): "'none' = no tools at all." ), ) - model: str | None = Field( - default=None, description="Specific model to use (defaults to graph default)" - ) + model: str | None = Field(default=None, description="Specific model to use (defaults to graph default)") # For function nodes - function: str | None = Field( - default=None, description="Function name or path for function nodes" - ) + function: str | None = Field(default=None, description="Function name or path for function nodes") # For router nodes - routes: dict[str, str] = Field( - default_factory=dict, description="Condition -> target_node_id mapping for routers" - ) + routes: dict[str, str] = Field(default_factory=dict, description="Condition -> target_node_id mapping for routers") # Retry behavior max_retries: int = Field(default=3) diff --git a/core/framework/orchestrator/node_worker.py b/core/framework/orchestrator/node_worker.py index a0be76b7..d7895985 100644 --- a/core/framework/orchestrator/node_worker.py +++ b/core/framework/orchestrator/node_worker.py @@ -379,9 +379,7 @@ class NodeWorker: # Failure if attempt + 1 < total_attempts: - gc.retry_counts[self.node_spec.id] = ( - gc.retry_counts.get(self.node_spec.id, 0) + 1 - ) + gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1 gc.nodes_with_retries.add(self.node_spec.id) delay = 1.0 * (2**attempt) logger.warning( @@ -411,9 +409,7 @@ class NodeWorker: except Exception as exc: if attempt + 1 < total_attempts: - gc.retry_counts[self.node_spec.id] = ( - gc.retry_counts.get(self.node_spec.id, 0) + 1 - ) + gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1 gc.nodes_with_retries.add(self.node_spec.id) delay = 1.0 * (2**attempt) logger.warning( @@ -469,9 +465,7 @@ class NodeWorker: if len(conditionals) > 1: max_prio = max(e.priority for e in conditionals) traversable = [ - e - for e in traversable - if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio + e for e in traversable if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio ] # When parallel execution is disabled, follow first match only (sequential) @@ -541,9 +535,7 @@ class NodeWorker: logger.warning("Worker %s output validation warnings: %s", node_spec.id, errors) # Determine if this worker is a fan-out branch - is_fanout_branch = any( - tag.via_branch == node_spec.id for tag in self._inherited_fan_out_tags - ) + is_fanout_branch = any(tag.via_branch == node_spec.id for tag in self._inherited_fan_out_tags) # Collect keys to write: declared output_keys + any extra output items # (for fan-out branches, all output items need conflict checking) @@ -642,9 +634,7 @@ class NodeWorker: self._node_impl = node return node - raise RuntimeError( - f"No implementation for node '{self.node_spec.id}' (type: {self.node_spec.node_type})" - ) + raise RuntimeError(f"No implementation for node '{self.node_spec.id}' (type: {self.node_spec.node_type})") def _build_node_context(self) -> NodeContext: """Build NodeContext for this worker's execution.""" @@ -749,9 +739,7 @@ class NodeWorker: inherited_conversation=gc.continuous_conversation, narrative=narrative, ) - gc.continuous_conversation.update_system_prompt( - build_system_prompt_for_node_context(next_ctx) - ) + gc.continuous_conversation.update_system_prompt(build_system_prompt_for_node_context(next_ctx)) gc.continuous_conversation.set_current_phase(next_spec.id) buffer_items, data_files = self._prepare_transition_payload() @@ -799,8 +787,7 @@ class NodeWorker: file_path.write_text(write_content, encoding="utf-8") file_size = file_path.stat().st_size buffer_items[key] = ( - f"[Saved to '{filename}' ({file_size:,} bytes). " - f"Use read_file(path='{filename}') to access.]" + f"[Saved to '{filename}' ({file_size:,} bytes). Use read_file(path='{filename}') to access.]" ) continue except Exception: diff --git a/core/framework/orchestrator/orchestrator.py b/core/framework/orchestrator/orchestrator.py index 71d0c9bc..01385630 100644 --- a/core/framework/orchestrator/orchestrator.py +++ b/core/framework/orchestrator/orchestrator.py @@ -202,9 +202,7 @@ class Orchestrator: self.validator = OutputValidator() self.logger = logging.getLogger(__name__) self.logger.debug( - "[Orchestrator.__init__] Created with" - " stream_id=%s, execution_id=%s," - " initial node_registry keys: %s", + "[Orchestrator.__init__] Created with stream_id=%s, execution_id=%s, initial node_registry keys: %s", stream_id, execution_id, list(self.node_registry.keys()), @@ -347,8 +345,7 @@ class Orchestrator: missing = [t for t in declared if t not in available_tool_names] if missing: self.logger.warning( - "Node '%s' (id=%s) declares %d tools not in this runtime; " - "stripping them and continuing: %s", + "Node '%s' (id=%s) declares %d tools not in this runtime; stripping them and continuing: %s", node.name, node.id, len(missing), @@ -391,10 +388,7 @@ class Orchestrator: lines.append(f"[tool result]: {c}") elif m.role == "assistant" and m.tool_calls: names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls] - lines.append( - f"[assistant (calls: {', '.join(names)})]: " - f"{m.content[:200] if m.content else ''}" - ) + lines.append(f"[assistant (calls: {', '.join(names)})]: {m.content[:200] if m.content else ''}") else: lines.append(f"[{m.role}]: {m.content}") formatted = "\n\n".join(lines) @@ -565,8 +559,7 @@ class Orchestrator: # [RESTORED] Type safety check if not isinstance(buffer_data, dict): self.logger.warning( - f"⚠️ Invalid data buffer type in session state: " - f"{type(buffer_data).__name__}, expected dict" + f"⚠️ Invalid data buffer type in session state: {type(buffer_data).__name__}, expected dict" ) else: # Restore buffer from previous session. @@ -590,8 +583,7 @@ class Orchestrator: # contains all state including the original input, and re-writing # input_data would overwrite intermediate results with stale values. _is_resuming = bool( - session_state - and (session_state.get("paused_at") or session_state.get("resume_from_checkpoint")) + session_state and (session_state.get("paused_at") or session_state.get("resume_from_checkpoint")) ) if input_data and not _is_resuming: for key, value in input_data.items(): @@ -616,11 +608,7 @@ class Orchestrator: # If resuming at a specific node (paused_at), that node was counted # but never completed, so decrement its count paused_at = session_state.get("paused_at") - if ( - paused_at - and paused_at in node_visit_counts - and node_visit_counts[paused_at] > 0 - ): + if paused_at and paused_at in node_visit_counts and node_visit_counts[paused_at] > 0: old_count = node_visit_counts[paused_at] node_visit_counts[paused_at] -= 1 self.logger.info( @@ -636,10 +624,7 @@ class Orchestrator: checkpoint = await checkpoint_store.load_checkpoint(checkpoint_id) if checkpoint: - self.logger.info( - f"🔄 Resuming from checkpoint: {checkpoint_id} " - f"(node: {checkpoint.current_node})" - ) + self.logger.info(f"🔄 Resuming from checkpoint: {checkpoint_id} (node: {checkpoint.current_node})") checkpoint_run_id = checkpoint.run_id or LEGACY_RUN_ID self._run_id = checkpoint_run_id @@ -648,9 +633,7 @@ class Orchestrator: buffer.write(key, value, validate=False) # Start from checkpoint's next node or current node - current_node_id = ( - checkpoint.next_node or checkpoint.current_node or graph.entry_node - ) + current_node_id = checkpoint.next_node or checkpoint.current_node or graph.entry_node # Restore execution path path.extend(checkpoint.execution_path) @@ -660,16 +643,11 @@ class Orchestrator: f"resuming at node: {current_node_id}" ) else: - self.logger.warning( - f"Checkpoint {checkpoint_id} not found, resuming from normal entry point" - ) + self.logger.warning(f"Checkpoint {checkpoint_id} not found, resuming from normal entry point") current_node_id = graph.get_entry_point(session_state) except Exception as e: - self.logger.error( - f"Failed to load checkpoint {checkpoint_id}: {e}, " - f"resuming from normal entry point" - ) + self.logger.error(f"Failed to load checkpoint {checkpoint_id}: {e}, resuming from normal entry point") current_node_id = graph.get_entry_point(session_state) else: current_node_id = graph.get_entry_point(session_state) @@ -757,20 +735,14 @@ class Orchestrator: "human_input": "event_loop", # Use queen interaction / escalation instead } - def _get_node_implementation( - self, node_spec: NodeSpec, cleanup_llm_model: str | None = None - ) -> NodeProtocol: + def _get_node_implementation(self, node_spec: NodeSpec, cleanup_llm_model: str | None = None) -> NodeProtocol: """Get or create a node implementation.""" # Check registry first if node_spec.id in self.node_registry: - logger.debug( - "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id - ) + logger.debug("[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id) return self.node_registry[node_spec.id] logger.debug( - "[Orchestrator._get_node_implementation]" - " Node '%s' not in registry (keys: %s)," - " creating new", + "[Orchestrator._get_node_implementation] Node '%s' not in registry (keys: %s), creating new", node_spec.id, list(self.node_registry.keys()), ) @@ -840,9 +812,7 @@ class Orchestrator: # Cache so inject_event() is reachable for queen interaction and escalation routing self.node_registry[node_spec.id] = node logger.debug( - "[Orchestrator._get_node_implementation]" - " Cached node '%s' in node_registry," - " registry now has keys: %s", + "[Orchestrator._get_node_implementation] Cached node '%s' in node_registry, registry now has keys: %s", node_spec.id, list(self.node_registry.keys()), ) @@ -925,9 +895,7 @@ class Orchestrator: if len(conditionals) > 1: max_prio = max(e.priority for e in conditionals) traversable = [ - e - for e in traversable - if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio + e for e in traversable if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio ] return traversable @@ -1090,9 +1058,7 @@ class Orchestrator: execution_id=self._execution_id, ) - self.logger.info( - f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})" - ) + self.logger.info(f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})") result = await node_impl.execute(ctx) last_result = result @@ -1153,19 +1119,13 @@ class Orchestrator: ) return branch, result - self.logger.warning( - f" ↻ Branch {node_spec.name}: " - f"retry {attempt + 1}/{effective_max_retries}" - ) + self.logger.warning(f" ↻ Branch {node_spec.name}: retry {attempt + 1}/{effective_max_retries}") # All retries exhausted branch.status = "failed" branch.error = last_result.error if last_result else "Unknown error" branch.result = last_result - self.logger.error( - f" ✗ Branch {node_spec.name}: " - f"failed after {effective_max_retries} attempts" - ) + self.logger.error(f" ✗ Branch {node_spec.name}: failed after {effective_max_retries} attempts") return branch, last_result except Exception as e: @@ -1208,10 +1168,7 @@ class Orchestrator: # Branch timed out branch.status = "timed_out" branch.error = f"Branch timed out after {timeout}s" - self.logger.warning( - f" ⏱ Branch {graph.get_node(branch.node_id).name}: " - f"timed out after {timeout}s" - ) + self.logger.warning(f" ⏱ Branch {graph.get_node(branch.node_id).name}: timed out after {timeout}s") path.append(branch.node_id) failed_branches.append(branch) elif isinstance(result, Exception): @@ -1235,13 +1192,9 @@ class Orchestrator: if self._parallel_config.on_branch_failure == "fail_all": raise RuntimeError(f"Parallel execution failed: branches {failed_names} failed") elif self._parallel_config.on_branch_failure == "continue_others": - self.logger.warning( - f"⚠ Some branches failed ({failed_names}), continuing with successful ones" - ) + self.logger.warning(f"⚠ Some branches failed ({failed_names}), continuing with successful ones") - self.logger.info( - f" ⑃ Fan-out complete: {len(branch_results)}/{len(branches)} branches succeeded" - ) + self.logger.info(f" ⑃ Fan-out complete: {len(branch_results)}/{len(branches)} branches succeeded") return branch_results, total_tokens, total_latency def register_node(self, node_id: str, implementation: NodeProtocol) -> None: @@ -1432,15 +1385,10 @@ class Orchestrator: return True if not terminal_worker_ids: # No terminals: check if all workers are done - return all( - w.lifecycle in (WorkerLifecycle.COMPLETED, WorkerLifecycle.FAILED) - for w in workers.values() - ) + return all(w.lifecycle in (WorkerLifecycle.COMPLETED, WorkerLifecycle.FAILED) for w in workers.values()) if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()): return False - return any( - tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids - ) + return any(tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids) def _mark_quiescent_terminal_failure() -> bool: nonlocal execution_error @@ -1448,22 +1396,15 @@ class Orchestrator: return False if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()): return False - if any( - tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids - ): + if any(tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids): return False - execution_error = ( - "Worker execution ended before terminal nodes completed: " - f"{sorted(terminal_worker_ids)}" - ) + execution_error = f"Worker execution ended before terminal nodes completed: {sorted(terminal_worker_ids)}" self.logger.error(execution_error) return True # Track fan-out branch workers for per-branch timeout enforcement _fanout_branch_tasks: dict[str, asyncio.Task] = {} # worker_id → timeout-wrapper task - branch_timeout = ( - self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0 - ) + branch_timeout = self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0 def _route_activation( activation: Activation, @@ -1498,9 +1439,7 @@ class Orchestrator: target_worker.activate(inherited_tags=activation.fan_out_tags) if target_worker._task is not None: # Fan-out branch: wrap with timeout - is_fanout_branch = any( - tag.via_branch == activation.target_id for tag in activation.fan_out_tags - ) + is_fanout_branch = any(tag.via_branch == activation.target_id for tag in activation.fan_out_tags) if is_fanout_branch and branch_timeout > 0: timed_task = asyncio.ensure_future( asyncio.wait_for(target_worker._task, timeout=branch_timeout) @@ -1555,9 +1494,7 @@ class Orchestrator: if completion.conversation is not None: gc.continuous_conversation = completion.conversation - self.logger.info( - f" ✓ Worker completed: {worker_id} ({len(activations)} outgoing activation(s))" - ) + self.logger.info(f" ✓ Worker completed: {worker_id} ({len(activations)} outgoing activation(s))") # Route activations to target workers for activation in activations: @@ -1598,9 +1535,7 @@ class Orchestrator: completion_event.set() # Subscribe to events (only if event bus has subscribe capability) - has_event_subscription = self._event_bus is not None and hasattr( - self._event_bus, "subscribe" - ) + has_event_subscription = self._event_bus is not None and hasattr(self._event_bus, "subscribe") if has_event_subscription: sub_completed = self._event_bus.subscribe( event_types=[EventType.WORKER_COMPLETED], @@ -1642,14 +1577,12 @@ class Orchestrator: ) if unresolved_terminals: execution_error = ( - "Worker execution ended before terminal nodes completed: " - f"{unresolved_terminals}" + f"Worker execution ended before terminal nodes completed: {unresolved_terminals}" ) self.logger.error(execution_error) else: execution_error = ( - "Worker execution ended before all workers reached " - "a terminal lifecycle state" + "Worker execution ended before all workers reached a terminal lifecycle state" ) self.logger.error(execution_error) break @@ -1680,10 +1613,7 @@ class Orchestrator: task_error = exc # Check for fan-out branch timeout - if ( - isinstance(task_error, asyncio.TimeoutError) - and wid in _fanout_branch_tasks - ): + if isinstance(task_error, asyncio.TimeoutError) and wid in _fanout_branch_tasks: error = f"Branch failed (timed out after {branch_timeout}s)" failed_workers[wid] = error worker.lifecycle = WorkerLifecycle.FAILED @@ -1727,10 +1657,7 @@ class Orchestrator: src_spec = graph.get_node(wid) if src_spec and src_spec.tools: for t in self.tools: - if ( - t.name in src_spec.tools - and t.name not in gc.cumulative_tool_names - ): + if t.name in src_spec.tools and t.name not in gc.cumulative_tool_names: gc.cumulative_tools.append(t) gc.cumulative_tool_names.add(t.name) if src_spec and src_spec.output_keys: @@ -1741,8 +1668,7 @@ class Orchestrator: gc.continuous_conversation = completion_conversation self.logger.info( - f" ✓ Worker completed: {wid} " - f"({len(outgoing_activations)} outgoing activation(s))" + f" ✓ Worker completed: {wid} ({len(outgoing_activations)} outgoing activation(s))" ) # Route activations @@ -1787,8 +1713,7 @@ class Orchestrator: error = str(task_error) else: error = ( - "Worker task completed without publishing a completion " - f"(lifecycle={worker.lifecycle})" + f"Worker task completed without publishing a completion (lifecycle={worker.lifecycle})" ) failed_workers[wid] = error diff --git a/core/framework/orchestrator/prompt_composer.py b/core/framework/orchestrator/prompt_composer.py index b5925a28..65b68bc8 100644 --- a/core/framework/orchestrator/prompt_composer.py +++ b/core/framework/orchestrator/prompt_composer.py @@ -97,15 +97,12 @@ def build_transition_marker( file_path = data_path / filename try: write_content = ( - json.dumps(value, indent=2, ensure_ascii=False) - if isinstance(value, (dict, list)) - else str(value) + json.dumps(value, indent=2, ensure_ascii=False) if isinstance(value, (dict, list)) else str(value) ) file_path.write_text(write_content, encoding="utf-8") file_size = file_path.stat().st_size buffer_items[key] = ( - f"[Saved to '{filename}' ({file_size:,} bytes). " - f"Use read_file(path='{filename}') to access.]" + f"[Saved to '{filename}' ({file_size:,} bytes). Use read_file(path='{filename}') to access.]" ) except Exception: buffer_items[key] = val_str[:300] + "..." diff --git a/core/framework/orchestrator/prompting.py b/core/framework/orchestrator/prompting.py index 3c655253..0c8120ab 100644 --- a/core/framework/orchestrator/prompting.py +++ b/core/framework/orchestrator/prompting.py @@ -177,18 +177,12 @@ def build_prompt_spec_from_node_context( # Tool-gated pre-activation: inject full body of default skills whose # trigger tools are present in this node's tool list (e.g. browser_* # pulls in hive.browser-automation). - tool_names = [ - getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or []) - ] - skills_catalog_prompt = augment_catalog_for_tools( - ctx.skills_catalog_prompt or "", tool_names - ) + tool_names = [getattr(t, "name", "") for t in (getattr(ctx, "available_tools", None) or [])] + skills_catalog_prompt = augment_catalog_for_tools(ctx.skills_catalog_prompt or "", tool_names) return NodePromptSpec( identity_prompt=ctx.identity_prompt or "", - focus_prompt=focus_prompt - if focus_prompt is not None - else (ctx.node_spec.system_prompt or ""), + focus_prompt=focus_prompt if focus_prompt is not None else (ctx.node_spec.system_prompt or ""), narrative=narrative if narrative is not None else (ctx.narrative or ""), accounts_prompt=ctx.accounts_prompt or "", skills_catalog_prompt=skills_catalog_prompt, @@ -299,8 +293,7 @@ def build_transition_message(spec: TransitionSpec) -> str: if spec.data_files: sections.append( - "\nData files (use read_file to access):\n" - + "\n".join(f" {entry}" for entry in spec.data_files) + "\nData files (use read_file to access):\n" + "\n".join(f" {entry}" for entry in spec.data_files) ) if spec.cumulative_tool_names: diff --git a/core/framework/orchestrator/safe_eval.py b/core/framework/orchestrator/safe_eval.py index 3697c4c3..85dced4d 100644 --- a/core/framework/orchestrator/safe_eval.py +++ b/core/framework/orchestrator/safe_eval.py @@ -169,11 +169,7 @@ class SafeEvalVisitor(ast.NodeVisitor): return tuple(self.visit(elt) for elt in node.elts) def visit_Dict(self, node: ast.Dict) -> dict: - return { - self.visit(k): self.visit(v) - for k, v in zip(node.keys, node.values, strict=False) - if k is not None - } + return {self.visit(k): self.visit(v) for k, v in zip(node.keys, node.values, strict=False) if k is not None} # --- Operations --- def visit_BinOp(self, node: ast.BinOp) -> Any: diff --git a/core/framework/orchestrator/validator.py b/core/framework/orchestrator/validator.py index eb98b18d..74ff3648 100644 --- a/core/framework/orchestrator/validator.py +++ b/core/framework/orchestrator/validator.py @@ -120,9 +120,7 @@ class OutputValidator: nullable_keys = nullable_keys or [] if not isinstance(output, dict): - return ValidationResult( - success=False, errors=[f"Output is not a dict, got {type(output).__name__}"] - ) + return ValidationResult(success=False, errors=[f"Output is not a dict, got {type(output).__name__}"]) for key in expected_keys: if key not in output: @@ -237,9 +235,7 @@ class OutputValidator: # Check for overly long values if len(value) > max_length: - errors.append( - f"Output key '{key}' exceeds max length ({len(value)} > {max_length})" - ) + errors.append(f"Output key '{key}' exceeds max length ({len(value)} > {max_length})") return ValidationResult(success=len(errors) == 0, errors=errors) diff --git a/core/framework/pipeline/stages/cost_guard.py b/core/framework/pipeline/stages/cost_guard.py index ff01d146..cd79c7ca 100644 --- a/core/framework/pipeline/stages/cost_guard.py +++ b/core/framework/pipeline/stages/cost_guard.py @@ -27,8 +27,6 @@ class CostGuardStage(PipelineStage): if estimated > self._budget: return PipelineResult( action="reject", - rejection_reason=( - f"Estimated cost ${estimated:.4f} exceeds budget ${self._budget:.4f}" - ), + rejection_reason=(f"Estimated cost ${estimated:.4f} exceeds budget ${self._budget:.4f}"), ) return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/input_validation.py b/core/framework/pipeline/stages/input_validation.py index 3a025c0e..6e7d3f40 100644 --- a/core/framework/pipeline/stages/input_validation.py +++ b/core/framework/pipeline/stages/input_validation.py @@ -40,8 +40,7 @@ class InputValidationStage(PipelineStage): return PipelineResult( action="reject", rejection_reason=( - f"Input key '{key}' has type {type(value).__name__}, " - f"expected {expected_type.__name__}" + f"Input key '{key}' has type {type(value).__name__}, expected {expected_type.__name__}" ), ) return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/rate_limit.py b/core/framework/pipeline/stages/rate_limit.py index 7c49e04f..19639f81 100644 --- a/core/framework/pipeline/stages/rate_limit.py +++ b/core/framework/pipeline/stages/rate_limit.py @@ -35,9 +35,7 @@ class RateLimitStage(PipelineStage): if len(self._timestamps[key]) >= self._max_rpm: return PipelineResult( action="reject", - rejection_reason=( - f"Rate limit exceeded: {self._max_rpm} req/min for session '{session_id}'" - ), + rejection_reason=(f"Rate limit exceeded: {self._max_rpm} req/min for session '{session_id}'"), ) self._timestamps[key].append(now) return PipelineResult(action="continue") diff --git a/core/framework/schemas/goal.py b/core/framework/schemas/goal.py index 0a050d63..7fdc1b66 100644 --- a/core/framework/schemas/goal.py +++ b/core/framework/schemas/goal.py @@ -25,9 +25,7 @@ class GoalStatus(StrEnum): class SuccessCriterion(BaseModel): id: str description: str = Field(description="Human-readable description of what success looks like") - metric: str = Field( - description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'" - ) + metric: str = Field(description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'") type: str = Field(default="success_rate", description="Runtime evaluation type") target: Any = Field(description="The target value or condition") weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)") @@ -39,15 +37,9 @@ class SuccessCriterion(BaseModel): class Constraint(BaseModel): id: str description: str - constraint_type: str = Field( - description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)" - ) - category: str = Field( - default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'" - ) - check: str = Field( - default="", description="How to check: expression, function name, or 'llm_judge'" - ) + constraint_type: str = Field(description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)") + category: str = Field(default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'") + check: str = Field(default="", description="How to check: expression, function name, or 'llm_judge'") model_config = {"extra": "allow"} diff --git a/core/framework/schemas/session_state.py b/core/framework/schemas/session_state.py index 4fcecd7c..6bf2c189 100644 --- a/core/framework/schemas/session_state.py +++ b/core/framework/schemas/session_state.py @@ -237,9 +237,7 @@ class SessionState(BaseModel): progress=SessionProgress( current_node=result.paused_at or (result.path[-1] if result.path else None), paused_at=result.paused_at, - resume_from=result.session_state.get("resume_from") - if result.session_state - else None, + resume_from=result.session_state.get("resume_from") if result.session_state else None, steps_executed=result.steps_executed, total_tokens=result.total_tokens, total_latency_ms=result.total_latency_ms, @@ -256,9 +254,7 @@ class SessionState(BaseModel): error=result.error, output=result.output, ), - data_buffer=result.session_state.get( - "data_buffer", result.session_state.get("memory", {}) - ) + data_buffer=result.session_state.get("data_buffer", result.session_state.get("memory", {})) if result.session_state else {}, input_data=input_data or {}, diff --git a/core/framework/server/app.py b/core/framework/server/app.py index 37b5de14..37c25a82 100644 --- a/core/framework/server/app.py +++ b/core/framework/server/app.py @@ -56,8 +56,7 @@ def validate_agent_path(agent_path: str | Path) -> Path: if resolved.is_relative_to(root) and resolved != root: return resolved raise ValueError( - "agent_path must be inside an allowed directory " - "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)" + "agent_path must be inside an allowed directory (~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)" ) @@ -186,9 +185,7 @@ async def handle_browser_status(request: web.Request) -> web.Response: status_port = bridge_port + 1 try: - reader, writer = await asyncio.wait_for( - asyncio.open_connection("127.0.0.1", status_port), timeout=0.5 - ) + reader, writer = await asyncio.wait_for(asyncio.open_connection("127.0.0.1", status_port), timeout=0.5) writer.write(b"GET /status HTTP/1.0\r\nHost: 127.0.0.1\r\n\r\n") await writer.drain() raw = await asyncio.wait_for(reader.read(512), timeout=0.5) @@ -232,9 +229,7 @@ def create_app(model: str | None = None) -> web.Application: from framework.credentials.key_storage import generate_and_save_credential_key generate_and_save_credential_key() - logger.info( - "Generated and persisted HIVE_CREDENTIAL_KEY to ~/.hive/secrets/credential_key" - ) + logger.info("Generated and persisted HIVE_CREDENTIAL_KEY to ~/.hive/secrets/credential_key") except Exception as exc: logger.warning("Could not auto-persist HIVE_CREDENTIAL_KEY: %s", exc) @@ -274,9 +269,7 @@ def create_app(model: str | None = None) -> web.Application: log_collisions=True, max_tools=selection_max_tools, ) - logger.info( - "Pre-loaded queen tool registry with %d tools", len(_queen_tool_registry.get_tools()) - ) + logger.info("Pre-loaded queen tool registry with %d tools", len(_queen_tool_registry.get_tools())) except Exception as e: logger.warning("Failed to pre-load queen tool registry: %s", e) @@ -297,11 +290,11 @@ def create_app(model: str | None = None) -> web.Application: from framework.server.routes_credentials import register_routes as register_credential_routes from framework.server.routes_events import register_routes as register_event_routes from framework.server.routes_execution import register_routes as register_execution_routes - from framework.server.routes_workers import register_routes as register_worker_routes from framework.server.routes_logs import register_routes as register_log_routes from framework.server.routes_messages import register_routes as register_message_routes from framework.server.routes_queens import register_routes as register_queen_routes from framework.server.routes_sessions import register_routes as register_session_routes + from framework.server.routes_workers import register_routes as register_worker_routes register_config_routes(app) register_credential_routes(app) diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py index ce905068..ee0ec095 100644 --- a/core/framework/server/queen_orchestrator.py +++ b/core/framework/server/queen_orchestrator.py @@ -13,6 +13,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from framework.agent_loop.internals.types import HookContext, HookResult + from framework.loader.tool_registry import ToolRegistry from framework.server.session_manager import Session logger = logging.getLogger(__name__) @@ -49,7 +51,7 @@ def install_worker_escalation_routing( # Defensive: ignore any stray non-worker origin (e.g. queen). if not stream_id.startswith("worker:"): return - worker_id = stream_id[len("worker:"):] + worker_id = stream_id[len("worker:") :] data = event.data or {} request_id = data.get("request_id") reason = str(data.get("reason", "")).strip() @@ -64,8 +66,7 @@ def install_worker_escalation_routing( try: await runtime.inject_input( worker_id, - "[QUEEN_REPLY] queue_full — queen inbox saturated; " - "proceed with best judgment or retry later.", + "[QUEEN_REPLY] queue_full — queen inbox saturated; proceed with best judgment or retry later.", ) except Exception: logger.warning( @@ -100,24 +101,16 @@ def install_worker_escalation_routing( lines.append(context_text) if request_id: lines.append( - "Use reply_to_worker(request_id, reply) to unblock, " - "or list_worker_questions() to see all pending." + "Use reply_to_worker(request_id, reply) to unblock, or list_worker_questions() to see all pending." ) else: - lines.append( - "No request_id — use inject_message(content=...) to relay " - "guidance manually." - ) + lines.append("No request_id — use inject_message(content=...) to relay guidance manually.") handoff = "\n".join(lines) # Fallback: if the queen loop has gone away, publish a # CLIENT_INPUT_REQUESTED so the human sees the question and the # worker does not wedge. - queen_node = ( - session.queen_executor.node_registry.get("queen") - if session.queen_executor is not None - else None - ) + queen_node = session.queen_executor.node_registry.get("queen") if session.queen_executor is not None else None if queen_node is None or not hasattr(queen_node, "inject_event"): if session.event_bus is not None: await session.event_bus.emit_client_input_requested( @@ -141,9 +134,7 @@ def install_worker_escalation_routing( filter_colony=runtime.colony_id, ) except Exception: - logger.warning( - "Failed to install colony-scoped escalation sub", exc_info=True - ) + logger.warning("Failed to install colony-scoped escalation sub", exc_info=True) # fall through to session bus if session.event_bus is None: return None @@ -174,14 +165,12 @@ def _build_credentials_provider() -> Any: def _provider() -> str: now = time.monotonic() - if ( - state["cached"] - and (now - state["cached_at"]) < _CREDENTIALS_BLOCK_TTL_SECONDS - ): + if state["cached"] and (now - state["cached_at"]) < _CREDENTIALS_BLOCK_TTL_SECONDS: return state["cached"] try: from aden_tools.credentials.store_adapter import CredentialStoreAdapter + from framework.orchestrator.prompting import build_accounts_prompt adapter = CredentialStoreAdapter.default() @@ -313,8 +302,8 @@ async def create_queen( _shared_building_knowledge, finalize_queen_prompt, ) - from framework.llm.capabilities import supports_image_tool_results from framework.host.event_bus import AgentEvent, EventType + from framework.llm.capabilities import supports_image_tool_results from framework.loader.mcp_registry import MCPRegistry from framework.loader.tool_registry import ToolRegistry from framework.tools.queen_lifecycle_tools import ( @@ -326,9 +315,7 @@ async def create_queen( # Use pre-loaded cached registry if available (fast path) if tool_registry is not None: queen_registry = tool_registry - logger.info( - "Queen: using pre-loaded tool registry with %d tools", len(queen_registry.get_tools()) - ) + logger.info("Queen: using pre-loaded tool registry with %d tools", len(queen_registry.get_tools())) else: # Build fresh (slow path - for backwards compatibility) queen_registry = ToolRegistry() @@ -456,13 +443,9 @@ async def create_queen( # Independent phase gets core tools + all MCP tools not claimed by any # other phase (coder-tools file I/O, gcu-tools browser, etc.). - all_phase_names = ( - planning_names | building_names | staging_names | running_names | editing_names - ) + all_phase_names = planning_names | building_names | staging_names | running_names | editing_names mcp_tools = [t for t in queen_tools if t.name not in all_phase_names] - phase_state.independent_tools = [ - t for t in queen_tools if t.name in independent_names - ] + mcp_tools + phase_state.independent_tools = [t for t in queen_tools if t.name in independent_names] + mcp_tools logger.info( "Queen: independent tools: %s", sorted(t.name for t in phase_state.independent_tools), @@ -494,9 +477,7 @@ async def create_queen( # Resolve vision-only prompt sections based on the session's LLM. # session.llm is immutable for the session's lifetime, so this check # is stable — prompts never need to be recomposed mid-session. - _has_vision = bool( - session.llm and supports_image_tool_results(getattr(session.llm, "model", "")) - ) + _has_vision = bool(session.llm and supports_image_tool_results(getattr(session.llm, "model", ""))) _planning_body = ( _queen_character_core @@ -625,6 +606,14 @@ async def create_queen( ) async def _queen_identity_hook(ctx: HookContext) -> HookResult | None: + from framework.agent_loop.internals.types import HookResult + from framework.agents.queen.queen_profiles import ( + ensure_default_queens, + format_queen_identity_prompt, + load_queen_profile, + select_queen, + ) + ensure_default_queens() trigger = ctx.trigger or "" # If the session was pre-bound to a queen (user clicked a specific @@ -676,18 +665,12 @@ async def create_queen( try: _meta = _json.loads(_meta_path.read_text(encoding="utf-8")) _meta["queen_id"] = queen_id - _meta_path.write_text( - _json.dumps(_meta, ensure_ascii=False), encoding="utf-8" - ) + _meta_path.write_text(_json.dumps(_meta, ensure_ascii=False), encoding="utf-8") except (OSError, _json.JSONDecodeError): pass # Re-point event bus log to new location, preserving offset - _offset = getattr( - session.event_bus, "_session_log_iteration_offset", 0 - ) - session.event_bus.set_session_log( - _new_dir / "events.jsonl", iteration_offset=_offset - ) + _offset = getattr(session.event_bus, "_session_log_iteration_offset", 0) + session.event_bus.set_session_log(_new_dir / "events.jsonl", iteration_offset=_offset) if _session_event_bus is not None: await _session_event_bus.publish( @@ -742,7 +725,7 @@ async def create_queen( logger.debug("Queen: tools not yet available (registered on worker load): %s", missing) node_updates["tools"] = available_tools - adjusted_node = _orig_node.model_copy(update=node_updates) + _orig_node.model_copy(update=node_updates) # Determine session mode: # - RESTORE: Resume cold session with history, no initial prompt -> wait for user @@ -897,9 +880,7 @@ async def create_queen( # bootstrap: if the frontend doesn't pass initial_prompt, we must # NOT invent a phantom "Hello" — that used to concatenate with the # real first chat message and confuse the model. - ctx.input_data = { - "user_request": None if _is_restore_mode else (initial_prompt or None) - } + ctx.input_data = {"user_request": None if _is_restore_mode else (initial_prompt or None)} # Publish the initial prompt as a CLIENT_INPUT_RECEIVED event so # it appears in the SSE stream and persists to events.jsonl for diff --git a/core/framework/server/routes_config.py b/core/framework/server/routes_config.py index 36348d16..8302501c 100644 --- a/core/framework/server/routes_config.py +++ b/core/framework/server/routes_config.py @@ -281,9 +281,7 @@ def _get_subscription_token(sub_id: str) -> str | None: return None -def _hot_swap_sessions( - request: web.Request, full_model: str, api_key: str | None, api_base: str | None -) -> int: +def _hot_swap_sessions(request: web.Request, full_model: str, api_key: str | None, api_base: str | None) -> int: """Hot-swap the LLM on all running sessions. Returns count of swapped sessions. Also refreshes the SessionManager's default model so that subsequent @@ -363,9 +361,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response: # ── Subscription mode ──────────────────────────────────────── sub = _SUBSCRIPTION_MAP.get(subscription_id) if not sub: - return web.json_response( - {"error": f"Unknown subscription: {subscription_id}"}, status=400 - ) + return web.json_response({"error": f"Unknown subscription: {subscription_id}"}, status=400) preset = get_preset(subscription_id) # Subscriptions use the fixed model from their preset (no model switching) @@ -432,9 +428,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response: provider = body.get("provider") model = body.get("model") if not provider or not model: - return web.json_response( - {"error": "Both 'provider' and 'model' are required"}, status=400 - ) + return web.json_response({"error": "Both 'provider' and 'model' are required"}, status=400) # Look up token limits from catalogue model_info = _find_model_info(provider, model) @@ -552,9 +546,7 @@ def _update_user_profile_memory(display_name: str, about: str) -> None: content = build_memory_document( name="User Profile", - description=f"User identity: {display_name}" - if display_name - else "User profile information", + description=f"User identity: {display_name}" if display_name else "User profile information", mem_type="profile", body=new_body if new_body else "No profile information yet.", ) diff --git a/core/framework/server/routes_credentials.py b/core/framework/server/routes_credentials.py index 5710f6f5..61b0c6cb 100644 --- a/core/framework/server/routes_credentials.py +++ b/core/framework/server/routes_credentials.py @@ -216,9 +216,7 @@ async def handle_check_agent(request: web.Request) -> web.Response: ensure_credential_key_env() nodes = load_agent_nodes(agent_path) - result = validate_agent_credentials( - nodes, verify=verify, raise_on_error=False, force_refresh=True - ) + result = validate_agent_credentials(nodes, verify=verify, raise_on_error=False, force_refresh=True) # If any credential needs Aden, include ADEN_API_KEY as a first-class row if any(c.aden_supported for c in result.credentials): @@ -291,13 +289,15 @@ def _collect_accounts_by_provider() -> dict[str, list[dict]]: provider = acct.get("provider", "") if not provider: continue - grouped.setdefault(provider, []).append({ - "provider": provider, - "alias": acct.get("alias", ""), - "identity": acct.get("identity", {}) or {}, - "source": acct.get("source", "aden"), - "credential_id": acct.get("credential_id", provider), - }) + grouped.setdefault(provider, []).append( + { + "provider": provider, + "alias": acct.get("alias", ""), + "identity": acct.get("identity", {}) or {}, + "source": acct.get("source", "aden"), + "credential_id": acct.get("credential_id", provider), + } + ) return grouped except Exception: logger.debug("Failed to collect accounts for specs response", exc_info=True) @@ -327,17 +327,17 @@ async def handle_resync_credentials(request: web.Request) -> web.Response: loop = asyncio.get_running_loop() # _presync_aden_tokens makes blocking HTTP calls to the Aden server. - await loop.run_in_executor( - None, lambda: _presync_aden_tokens(CREDENTIAL_SPECS, force=True) - ) + await loop.run_in_executor(None, lambda: _presync_aden_tokens(CREDENTIAL_SPECS, force=True)) _invalidate_queen_credentials_cache(request) accounts_by_provider = _collect_accounts_by_provider() - return web.json_response({ - "synced": True, - "accounts_by_provider": accounts_by_provider, - }) + return web.json_response( + { + "synced": True, + "accounts_by_provider": accounts_by_provider, + } + ) except Exception as exc: logger.exception("Error during credential resync: %s", exc) return web.json_response( @@ -366,9 +366,7 @@ async def handle_list_specs(request: web.Request) -> web.Response: _presync_aden_tokens(CREDENTIAL_SPECS) # Build composite store (env → encrypted file) - env_mapping = { - (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items() - } + env_mapping = {(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()} env_storage = EnvVarStorage(env_mapping=env_mapping) if os.environ.get("HIVE_CREDENTIAL_KEY"): storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()]) @@ -396,21 +394,23 @@ async def handle_list_specs(request: web.Request) -> web.Response: available = len(accounts) > 0 else: available = store.is_available(cred_id) - specs.append({ - "credential_name": name, - "credential_id": cred_id, - "env_var": spec.env_var, - "description": spec.description, - "help_url": spec.help_url, - "api_key_instructions": spec.api_key_instructions, - "tools": spec.tools, - "aden_supported": spec.aden_supported, - "direct_api_key_supported": spec.direct_api_key_supported, - "credential_key": spec.credential_key, - "credential_group": spec.credential_group, - "available": available, - "accounts": accounts, - }) + specs.append( + { + "credential_name": name, + "credential_id": cred_id, + "env_var": spec.env_var, + "description": spec.description, + "help_url": spec.help_url, + "api_key_instructions": spec.api_key_instructions, + "tools": spec.tools, + "aden_supported": spec.aden_supported, + "direct_api_key_supported": spec.direct_api_key_supported, + "credential_key": spec.credential_key, + "credential_group": spec.credential_group, + "available": available, + "accounts": accounts, + } + ) # Include aden_api_key synthetic row if any spec uses Aden if any_aden: @@ -422,7 +422,9 @@ async def handle_list_specs(request: web.Request) -> web.Response: "env_var": "ADEN_API_KEY", "description": "API key from the Developers tab in Settings", "help_url": "https://hive.adenhq.com/", - "api_key_instructions": "1. Go to hive.adenhq.com\n2. Open Settings > Developers\n3. Copy your API key", + "api_key_instructions": ( + "1. Go to hive.adenhq.com\n2. Open Settings > Developers\n3. Copy your API key" + ), "tools": [], "aden_supported": True, "direct_api_key_supported": True, @@ -459,16 +461,12 @@ async def handle_validate_key(request: web.Request) -> web.Response: api_key = body.get("api_key", "").strip() if not provider_id or not api_key: - return web.json_response( - {"error": "provider_id and api_key are required"}, status=400 - ) + return web.json_response({"error": "provider_id and api_key are required"}, status=400) try: checker = _get_llm_key_providers().get(provider_id) if not checker: - return web.json_response( - {"valid": True, "message": f"No health check for {provider_id}"} - ) + return web.json_response({"valid": True, "message": f"No health check for {provider_id}"}) loop = asyncio.get_running_loop() result = await loop.run_in_executor(None, lambda: checker(api_key)) @@ -476,9 +474,7 @@ async def handle_validate_key(request: web.Request) -> web.Response: except Exception as exc: logger.warning("LLM key validation failed for %s: %s", provider_id, exc) - return web.json_response( - {"valid": None, "message": f"Validation error: {exc}"} - ) + return web.json_response({"valid": None, "message": f"Validation error: {exc}"}) def register_routes(app: web.Application) -> None: diff --git a/core/framework/server/routes_events.py b/core/framework/server/routes_events.py index fce8b6a8..5a6c923c 100644 --- a/core/framework/server/routes_events.py +++ b/core/framework/server/routes_events.py @@ -159,9 +159,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse: sse = SSEResponse() await sse.prepare(request) - logger.info( - "SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types) - ) + logger.info("SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types)) # Replay buffered events that were published before this SSE connected. # The EventBus keeps a history ring-buffer; we replay the subset that @@ -215,9 +213,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse: await sse.send_event(data) event_count += 1 if event_count == 1: - logger.info( - "SSE first event: session='%s', type='%s'", session.id, data.get("type") - ) + logger.info("SSE first event: session='%s', type='%s'", session.id, data.get("type")) except TimeoutError: try: await sse.send_keepalive() diff --git a/core/framework/server/routes_execution.py b/core/framework/server/routes_execution.py index 924749fd..ca16ef47 100644 --- a/core/framework/server/routes_execution.py +++ b/core/framework/server/routes_execution.py @@ -3,6 +3,7 @@ import asyncio import json import logging +from datetime import UTC from typing import Any from aiohttp import web @@ -117,9 +118,7 @@ async def handle_trigger(request: web.Request) -> web.Response: if session.runner: loop = asyncio.get_running_loop() try: - await loop.run_in_executor( - None, lambda: validate_agent_credentials(session.runner.graph.nodes) - ) + await loop.run_in_executor(None, lambda: validate_agent_credentials(session.runner.graph.nodes)) except Exception as e: agent_path = str(session.worker_path) if session.worker_path else "" resp = _credential_error_response(e, agent_path) @@ -129,9 +128,7 @@ async def handle_trigger(request: web.Request) -> web.Response: # Resync MCP servers if credentials were added since the worker loaded # (e.g. user connected an OAuth account mid-session via Aden UI). try: - await loop.run_in_executor( - None, lambda: session.runner._tool_registry.resync_mcp_servers_if_needed() - ) + await loop.run_in_executor(None, lambda: session.runner._tool_registry.resync_mcp_servers_if_needed()) except Exception as e: logger.warning("MCP resync failed: %s", e) @@ -228,23 +225,14 @@ async def handle_chat(request: web.Request) -> web.Response: type(queen_executor.node_registry), id(queen_executor.node_registry), ) - logger.debug( - "[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys()) - ) + logger.debug("[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys())) node = queen_executor.node_registry.get("queen") - logger.debug( - "[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None - ) - logger.debug( - "[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False - ) + logger.debug("[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None) + logger.debug("[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False) # Race condition: executor exists but node not created yet (still initializing) if node is None and session.queen_task is not None and not session.queen_task.done(): - logger.warning( - "[handle_chat] Queen executor exists but node" - " not ready yet (initializing). Waiting..." - ) + logger.warning("[handle_chat] Queen executor exists but node not ready yet (initializing). Waiting...") # Wait a short time for initialization to progress import asyncio @@ -302,16 +290,12 @@ async def handle_chat(request: web.Request) -> web.Response: ) else: logger.error( - "[handle_chat] CRITICAL: Queen node exists" - " but missing inject_event!" - " node_attrs=%s", + "[handle_chat] CRITICAL: Queen node exists but missing inject_event! node_attrs=%s", [a for a in dir(node) if not a.startswith("_")], ) # Queen is dead — try to revive her - logger.warning( - "[handle_chat] Queen is dead for session '%s', reviving on /chat request", session.id - ) + logger.warning("[handle_chat] Queen is dead for session '%s', reviving on /chat request", session.id) manager: Any = request.app["manager"] try: logger.debug("[handle_chat] Calling manager.revive_queen()...") @@ -322,9 +306,7 @@ async def handle_chat(request: web.Request) -> web.Response: _revived_executor = session.queen_executor _revived_node = _revived_executor.node_registry.get("queen") if _revived_executor else None if _revived_node is not None and hasattr(_revived_node, "inject_event"): - await _revived_node.inject_event( - message, is_client_input=True, image_content=image_content - ) + await _revived_node.inject_event(message, is_client_input=True, image_content=image_content) return web.json_response( { "status": "queen_revived", @@ -552,9 +534,7 @@ async def handle_stop(request: web.Request) -> web.Response: if hasattr(node, "cancel_current_turn"): node.cancel_current_turn() - cancelled = await stream.cancel_execution( - execution_id, reason="Execution stopped by user" - ) + cancelled = await stream.cancel_execution(execution_id, reason="Execution stopped by user") if cancelled: # Cancel queen's in-progress LLM turn if session.queen_executor: @@ -716,13 +696,12 @@ async def fork_session_into_colony( import asyncio import json import shutil - from datetime import datetime, timezone + from datetime import datetime from pathlib import Path from framework.agent_loop.agent_loop import AgentLoop, LoopConfig - from framework.agent_loop.types import AgentContext, AgentSpec + from framework.agent_loop.types import AgentContext from framework.server.session_manager import _queen_session_dir - from framework.storage.conversation_store import FileConversationStore queen_loop: AgentLoop = session.queen_executor.node_registry["queen"] queen_ctx: AgentContext = getattr(queen_loop, "_last_ctx", None) @@ -813,11 +792,9 @@ async def fork_session_into_colony( "queen_id": getattr(phase_state, "queen_id", "") if phase_state else "", "loop_config": queen_lc_config, "spawned_from": session.id, - "spawned_at": datetime.now(timezone.utc).isoformat(), + "spawned_at": datetime.now(UTC).isoformat(), } - worker_config_path.write_text( - json.dumps(worker_meta, indent=2, ensure_ascii=False), encoding="utf-8" - ) + worker_config_path.write_text(json.dumps(worker_meta, indent=2, ensure_ascii=False), encoding="utf-8") # ── 3. Duplicate queen session into colony ─────────────────── # Copy the queen's full session directory (conversations, events, @@ -843,9 +820,7 @@ async def fork_session_into_colony( dest_queen_dir = _queen_session_dir(colony_session_id, queen_name) if source_queen_dir.exists(): - await asyncio.to_thread( - shutil.copytree, source_queen_dir, dest_queen_dir, dirs_exist_ok=True - ) + await asyncio.to_thread(shutil.copytree, source_queen_dir, dest_queen_dir, dirs_exist_ok=True) # Update the duplicated meta.json to point to the colony dest_meta_path = dest_queen_dir / "meta.json" dest_meta: dict = {} @@ -859,9 +834,7 @@ async def fork_session_into_colony( dest_meta["queen_id"] = queen_name dest_meta["forked_from"] = session.id dest_meta["colony_fork"] = True # exclude from queen DM history - dest_meta_path.write_text( - json.dumps(dest_meta, ensure_ascii=False), encoding="utf-8" - ) + dest_meta_path.write_text(json.dumps(dest_meta, ensure_ascii=False), encoding="utf-8") logger.info( "Duplicated queen session %s -> %s for colony '%s'", session.id, @@ -875,9 +848,7 @@ async def fork_session_into_colony( worker_conv_dir = worker_storage / "conversations" source_conv_dir = dest_queen_dir / "conversations" if source_conv_dir.exists(): - await asyncio.to_thread( - shutil.copytree, source_conv_dir, worker_conv_dir, dirs_exist_ok=True - ) + await asyncio.to_thread(shutil.copytree, source_conv_dir, worker_conv_dir, dirs_exist_ok=True) logger.info("Copied queen conversations to worker storage %s", worker_conv_dir) else: logger.warning( @@ -897,12 +868,12 @@ async def fork_session_into_colony( metadata["queen_name"] = queen_name metadata["queen_session_id"] = colony_session_id metadata["source_session_id"] = session.id - metadata.setdefault("created_at", datetime.now(timezone.utc).isoformat()) - metadata["updated_at"] = datetime.now(timezone.utc).isoformat() + metadata.setdefault("created_at", datetime.now(UTC).isoformat()) + metadata["updated_at"] = datetime.now(UTC).isoformat() metadata.setdefault("workers", {}) metadata["workers"][worker_name] = { "task": worker_task[:100], - "spawned_at": datetime.now(timezone.utc).isoformat(), + "spawned_at": datetime.now(UTC).isoformat(), } metadata_path.write_text(json.dumps(metadata, indent=2, ensure_ascii=False), encoding="utf-8") @@ -920,9 +891,7 @@ async def fork_session_into_colony( qmeta["agent_name"] = colony_name.replace("_", " ").title() try: source_meta_path.parent.mkdir(parents=True, exist_ok=True) - source_meta_path.write_text( - json.dumps(qmeta, ensure_ascii=False), encoding="utf-8" - ) + source_meta_path.write_text(json.dumps(qmeta, ensure_ascii=False), encoding="utf-8") except OSError: pass diff --git a/core/framework/server/routes_queens.py b/core/framework/server/routes_queens.py index 86a760a7..4fc41a68 100644 --- a/core/framework/server/routes_queens.py +++ b/core/framework/server/routes_queens.py @@ -148,9 +148,7 @@ def _transform_profile_for_api(profile: dict) -> dict: details.append(f"Drive: {hidden['deep_motive']}") if hidden.get("behavioral_mapping"): details.append(f"Approach: {hidden['behavioral_mapping']}") - experience.append( - {"role": f"{profile.get('title', 'Executive Advisor')}", "details": details} - ) + experience.append({"role": f"{profile.get('title', 'Executive Advisor')}", "details": details}) if experience: result["experience"] = experience @@ -161,9 +159,7 @@ def _transform_profile_for_api(profile: dict) -> dict: # Signature achievement from world_lore world_lore = profile.get("world_lore", {}) if world_lore.get("habitat"): - result["signature_achievement"] = ( - f"{world_lore['habitat']}. {world_lore.get('lexicon', '')}".strip() - ) + result["signature_achievement"] = f"{world_lore['habitat']}. {world_lore.get('lexicon', '')}".strip() return result diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py index 61d75ac5..348ef1d1 100644 --- a/core/framework/server/routes_sessions.py +++ b/core/framework/server/routes_sessions.py @@ -59,9 +59,7 @@ def _session_to_live_dict(session) -> dict: "loaded_at": session.loaded_at, "uptime_seconds": round(time.time() - session.loaded_at, 1), "intro_message": getattr(session.runner, "intro_message", "") or "", - "queen_phase": phase_state.phase - if phase_state - else ("staging" if session.colony_runtime else "planning"), + "queen_phase": phase_state.phase if phase_state else ("staging" if session.colony_runtime else "planning"), "queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True, "queen_id": getattr(phase_state, "queen_id", None) if phase_state else None, "queen_name": (phase_state.queen_profile or {}).get("name") if phase_state else None, @@ -229,11 +227,7 @@ async def handle_get_live_session(request: web.Request) -> web.Response: "entry_node": ep.entry_node, "trigger_type": ep.trigger_type, "trigger_config": ep.trigger_config, - **( - {"next_fire_in": nf} - if (nf := rt.get_timer_next_fire_in(ep.id)) is not None - else {} - ), + **({"next_fire_in": nf} if (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}), } for ep in rt.get_entry_points() ] @@ -383,11 +377,7 @@ async def handle_session_entry_points(request: web.Request) -> web.Response: "entry_node": ep.entry_node, "trigger_type": ep.trigger_type, "trigger_config": ep.trigger_config, - **( - {"next_fire_in": nf} - if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None - else {} - ), + **({"next_fire_in": nf} if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}), } for ep in eps ] @@ -468,21 +458,13 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response: ) except ImportError: return web.json_response( - { - "error": ( - "croniter package not installed — cannot validate cron expression." - ) - }, + {"error": ("croniter package not installed — cannot validate cron expression.")}, status=500, ) merged_trigger_config.pop("interval_minutes", None) elif interval is None: return web.json_response( - { - "error": ( - "Timer trigger needs 'cron' or 'interval_minutes' in trigger_config." - ) - }, + {"error": ("Timer trigger needs 'cron' or 'interval_minutes' in trigger_config.")}, status=400, ) elif not isinstance(interval, (int, float)) or interval <= 0: @@ -580,9 +562,7 @@ async def handle_activate_trigger(request: web.Request) -> web.Response: ) if trigger_id in getattr(session, "active_trigger_ids", set()): - return web.json_response( - {"status": "already_active", "trigger_id": trigger_id} - ) + return web.json_response({"status": "already_active", "trigger_id": trigger_id}) from framework.tools.queen_lifecycle_tools import ( _persist_active_triggers, @@ -646,9 +626,7 @@ async def handle_deactivate_trigger(request: web.Request) -> web.Response: trigger_id = request.match_info["trigger_id"] if trigger_id not in getattr(session, "active_trigger_ids", set()): - return web.json_response( - {"status": "already_inactive", "trigger_id": trigger_id} - ) + return web.json_response({"status": "already_inactive", "trigger_id": trigger_id}) task = session.active_timer_tasks.pop(trigger_id, None) if task and not task.done(): @@ -867,9 +845,7 @@ async def handle_delete_agent(request: web.Request) -> web.Response: try: shutil.rmtree(resolved) except OSError as e: - return web.json_response( - {"error": f"Failed to delete agent directory: {e}"}, status=500 - ) + return web.json_response({"error": f"Failed to delete agent directory: {e}"}, status=500) return web.json_response({"deleted": str(resolved)}) @@ -932,9 +908,7 @@ def register_routes(app: web.Application) -> None: app.router.add_post("/api/sessions/{session_id}/reveal", handle_reveal_session_folder) app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats) app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points) - app.router.add_patch( - "/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task - ) + app.router.add_patch("/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task) app.router.add_post( "/api/sessions/{session_id}/triggers/{trigger_id}/activate", handle_activate_trigger, diff --git a/core/framework/server/routes_workers.py b/core/framework/server/routes_workers.py index c49626a8..c1ae39e8 100644 --- a/core/framework/server/routes_workers.py +++ b/core/framework/server/routes_workers.py @@ -70,13 +70,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response: from pathlib import Path state_path = ( - Path.home() - / ".hive" - / "agents" - / session.worker_path.name - / "sessions" - / worker_session_id - / "state.json" + Path.home() / ".hive" / "agents" / session.worker_path.name / "sessions" / worker_session_id / "state.json" ) if state_path.exists(): try: @@ -97,8 +91,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response: pass edges = [ - {"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority} - for e in graph.edges + {"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority} for e in graph.edges ] rt = session.colony_runtime entry_points = [ @@ -108,11 +101,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response: "entry_node": ep.entry_node, "trigger_type": ep.trigger_type, "trigger_config": ep.trigger_config, - **( - {"next_fire_in": nf} - if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None - else {} - ), + **({"next_fire_in": nf} if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}), } for ep in reg.entry_points.values() ] @@ -250,9 +239,7 @@ async def handle_node_tools(request: web.Request) -> web.Response: def register_routes(app: web.Application) -> None: """Register worker inspection routes.""" app.router.add_get("/api/sessions/{session_id}/colonies/{colony_id}/nodes", handle_list_nodes) - app.router.add_get( - "/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}", handle_get_node - ) + app.router.add_get("/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}", handle_get_node) app.router.add_get( "/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}/criteria", handle_node_criteria, diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py index 6b448c3e..670b06a8 100644 --- a/core/framework/server/session_manager.py +++ b/core/framework/server/session_manager.py @@ -120,9 +120,7 @@ class SessionManager: (blocking I/O) then started on the event loop. """ - def __init__( - self, model: str | None = None, credential_store=None, queen_tool_registry=None - ) -> None: + def __init__(self, model: str | None = None, credential_store=None, queen_tool_registry=None) -> None: self._sessions: dict[str, Session] = {} self._loading: set[str] = set() self._model = model @@ -350,9 +348,7 @@ class SessionManager: _colony_metadata_path = agent_path / "metadata.json" if _colony_metadata_path.exists(): try: - _colony_metadata = json.loads( - _colony_metadata_path.read_text(encoding="utf-8") - ) + _colony_metadata = json.loads(_colony_metadata_path.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError): pass @@ -426,9 +422,7 @@ class SessionManager: # Start queen with worker profile + lifecycle + monitoring tools worker_identity = ( - build_worker_profile(session.colony_runtime, agent_path=agent_path) - if session.colony_runtime - else None + build_worker_profile(session.colony_runtime, agent_path=agent_path) if session.colony_runtime else None ) await self._start_queen( session, @@ -678,14 +672,10 @@ class SessionManager: ) # Start the worker's agent loop in the background - session.queen_task = asyncio.create_task( - session.queen_executor.run(initial_message=initial_prompt) - ) + session.queen_task = asyncio.create_task(session.queen_executor.run(initial_message=initial_prompt)) # Set up event persistence if session.event_bus and queen_dir: - from framework.host.event_bus import EventBus - session.event_bus.start_persistence(queen_dir, iteration_offset=iteration_offset) logger.info( @@ -920,9 +910,7 @@ class SessionManager: state.setdefault("result", {})["error"] = "Stale session: runtime restarted" state.setdefault("timestamps", {})["updated_at"] = datetime.now().isoformat() state_path.write_text(json.dumps(state, indent=2), encoding="utf-8") - logger.info( - "Marked stale session '%s' as cancelled for agent '%s'", d.name, agent_path.name - ) + logger.info("Marked stale session '%s' as cancelled for agent '%s'", d.name, agent_path.name) except (json.JSONDecodeError, OSError) as e: logger.warning("Failed to clean up stale session %s: %s", d.name, e) @@ -966,13 +954,12 @@ class SessionManager: store = session.colony_runtime._session_store state = await store.read_state(session_id) if state and state.active_triggers: + from framework.host.event_bus import AgentEvent, EventType from framework.tools.queen_lifecycle_tools import ( _start_trigger_timer, _start_trigger_webhook, ) - from framework.host.event_bus import AgentEvent, EventType - runner = getattr(session, "runner", None) colony_entry = runner.graph.entry_node if runner else None saved_tasks = getattr(state, "trigger_tasks", {}) or {} @@ -1006,11 +993,7 @@ class SessionManager: "trigger_type": tdef.trigger_type, "trigger_config": tdef.trigger_config, "name": tdef.description or tdef.id, - **( - {"entry_node": colony_entry} - if colony_entry - else {} - ), + **({"entry_node": colony_entry} if colony_entry else {}), }, ) ) @@ -1059,17 +1042,13 @@ class SessionManager: meta_path = _queen_session_dir(storage_session_id, session.queen_name) / "meta.json" try: _agent_name = ( - session.worker_info.name - if session.worker_info - else str(agent_path.name).replace("_", " ").title() + session.worker_info.name if session.worker_info else str(agent_path.name).replace("_", " ").title() ) existing_meta = {} if meta_path.exists(): existing_meta = json.loads(meta_path.read_text(encoding="utf-8")) existing_meta["agent_name"] = _agent_name - existing_meta["agent_path"] = ( - str(session.worker_path) if session.worker_path else str(agent_path) - ) + existing_meta["agent_path"] = str(session.worker_path) if session.worker_path else str(agent_path) meta_path.write_text(json.dumps(existing_meta), encoding="utf-8") except OSError: pass @@ -1188,9 +1167,7 @@ class SessionManager: self._background_tasks.add(task) task.add_done_callback(self._background_tasks.discard) except Exception: - logger.warning( - "Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True - ) + logger.warning("Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True) if session.queen_task is not None: session.queen_task.cancel() @@ -1295,11 +1272,7 @@ class SessionManager: _agent_name = ( session.worker_info.name if session.worker_info - else ( - str(session.worker_path.name).replace("_", " ").title() - if session.worker_path - else None - ) + else (str(session.worker_path.name).replace("_", " ").title() if session.worker_path else None) ) # Merge into existing meta.json to preserve fields written by # _update_meta_json (e.g. phase, agent_path set during building). @@ -1354,8 +1327,7 @@ class SessionManager: if max_iter >= 0: iteration_offset = max_iter + 1 logger.info( - "Session '%s' resuming with iteration_offset=%d" - " (from events.jsonl max), last phase: %s", + "Session '%s' resuming with iteration_offset=%d (from events.jsonl max), last phase: %s", session.id, iteration_offset, last_phase or "unknown", @@ -1496,8 +1468,7 @@ class SessionManager: session.colony = colony logger.info( - "_start_queen: unified ColonyRuntime ready for session %s " - "(%d tools, storage=%s)", + "_start_queen: unified ColonyRuntime ready for session %s (%d tools, storage=%s)", session.id, len(queen_tools), queen_dir, @@ -1529,10 +1500,7 @@ class SessionManager: detail = cfg.get("cron") or f"every {cfg.get('interval_minutes', '?')} min" task_info = f' -> task: "{t.task}"' if t.task else " (no task configured)" parts.append(f" - {t.id} ({t.trigger_type}: {detail}){task_info}") - trigger_lines = ( - "\n\nAvailable triggers (inactive — use set_trigger to activate):\n" - + "\n".join(parts) - ) + trigger_lines = "\n\nAvailable triggers (inactive — use set_trigger to activate):\n" + "\n".join(parts) await node.inject_event(f"[SYSTEM] Colony loaded.{profile}{trigger_lines}") @@ -1835,9 +1803,7 @@ class SessionManager: if isinstance(content, list): # Anthropic-style content blocks content = " ".join( - b.get("text", "") - for b in content - if isinstance(b, dict) and b.get("type") == "text" + b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text" ) if content and msg.get("role") == "assistant": last_message = content[:120].strip() diff --git a/core/framework/server/tests/test_api.py b/core/framework/server/tests/test_api.py index fc1b9f03..8f34d388 100644 --- a/core/framework/server/tests/test_api.py +++ b/core/framework/server/tests/test_api.py @@ -259,15 +259,11 @@ def _write_sample_session(base: Path, session_id: str): conv_dir = session_dir / "conversations" / "node_a" / "parts" conv_dir.mkdir(parents=True) (conv_dir / "0001.json").write_text(json.dumps({"seq": 1, "role": "user", "content": "hello"})) - (conv_dir / "0002.json").write_text( - json.dumps({"seq": 2, "role": "assistant", "content": "hi there"}) - ) + (conv_dir / "0002.json").write_text(json.dumps({"seq": 2, "role": "assistant", "content": "hi there"})) conv_dir_b = session_dir / "conversations" / "node_b" / "parts" conv_dir_b.mkdir(parents=True) - (conv_dir_b / "0003.json").write_text( - json.dumps({"seq": 3, "role": "user", "content": "continue"}) - ) + (conv_dir_b / "0003.json").write_text(json.dumps({"seq": 3, "role": "user", "content": "continue"})) # Logs logs_dir = session_dir / "logs" @@ -291,9 +287,7 @@ def _write_sample_session(base: Path, session_id: str): "attention_reasons": ["retried"], "total_steps": 1, } - (logs_dir / "details.jsonl").write_text( - json.dumps(detail_a) + "\n" + json.dumps(detail_b) + "\n" - ) + (logs_dir / "details.jsonl").write_text(json.dumps(detail_a) + "\n" + json.dumps(detail_b) + "\n") step_a = {"node_id": "node_a", "step_index": 0, "llm_text": "thinking..."} step_b = {"node_id": "node_b", "step_index": 0, "llm_text": "retrying..."} @@ -302,9 +296,7 @@ def _write_sample_session(base: Path, session_id: str): return session_id, session_dir, state -def _write_queen_session( - tmp_path: Path, queen_id: str, session_id: str, meta: dict | None = None -) -> Path: +def _write_queen_session(tmp_path: Path, queen_id: str, session_id: str, meta: dict | None = None) -> Path: """Create a persisted queen session directory for restore tests.""" session_dir = tmp_path / ".hive" / "agents" / "queens" / queen_id / "sessions" / session_id session_dir.mkdir(parents=True) @@ -394,9 +386,7 @@ class TestSessionCRUD: async def test_create_session_with_worker_forwards_session_id(self): app = create_app() manager = app["manager"] - manager.create_session_with_worker_colony = AsyncMock( - return_value=_make_session(agent_id="my-custom-session") - ) + manager.create_session_with_worker_colony = AsyncMock(return_value=_make_session(agent_id="my-custom-session")) async with TestClient(TestServer(app)) as client: resp = await client.post( @@ -600,14 +590,10 @@ class TestMessageBootstrap: manager.build_llm = MagicMock(return_value=MagicMock()) manager.stop_session = AsyncMock() manager.create_session = AsyncMock() - monkeypatch.setattr( - routes_messages, "select_queen", AsyncMock(return_value="queen_technology") - ) + monkeypatch.setattr(routes_messages, "select_queen", AsyncMock(return_value="queen_technology")) async with TestClient(TestServer(app)) as client: - resp = await client.post( - "/api/messages/classify", json={"message": "Build me a scraper"} - ) + resp = await client.post("/api/messages/classify", json={"message": "Build me a scraper"}) assert resp.status == 200 data = await resp.json() # Assert inside the async-with so app shutdown (which stops @@ -623,9 +609,7 @@ class TestQueenSessionSelection: @pytest.mark.asyncio async def test_select_queen_session_rejects_foreign_session(self, monkeypatch, tmp_path): _patch_queen_storage(monkeypatch, tmp_path) - _write_queen_session( - tmp_path, "queen_growth", "other_session", {"queen_id": "queen_growth"} - ) + _write_queen_session(tmp_path, "queen_growth", "other_session", {"queen_id": "queen_growth"}) app = create_app() async with TestClient(TestServer(app)) as client: @@ -663,9 +647,7 @@ class TestQueenSessionSelection: assert any(call.args == ("other_live",) for call in manager.stop_session.await_args_list) @pytest.mark.asyncio - async def test_select_queen_session_restores_specific_history_session( - self, monkeypatch, tmp_path - ): + async def test_select_queen_session_restores_specific_history_session(self, monkeypatch, tmp_path): _patch_queen_storage(monkeypatch, tmp_path) _write_queen_session( tmp_path, @@ -1167,9 +1149,7 @@ class TestGraphNodes: assert data["entry_node"] == "node_a" @pytest.mark.asyncio - async def test_list_nodes_with_session_enrichment( - self, nodes_and_edges, sample_session, tmp_agent_dir - ): + async def test_list_nodes_with_session_enrichment(self, nodes_and_edges, sample_session, tmp_agent_dir): session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir nodes, edges = nodes_and_edges @@ -1182,9 +1162,7 @@ class TestGraphNodes: app = _make_app_with_session(session) async with TestClient(TestServer(app)) as client: - resp = await client.get( - f"/api/sessions/test_agent/graphs/primary/nodes?session_id={session_id}" - ) + resp = await client.get(f"/api/sessions/test_agent/graphs/primary/nodes?session_id={session_id}") assert resp.status == 200 data = await resp.json() node_map = {n["id"]: n for n in data["nodes"]} @@ -1233,9 +1211,7 @@ class TestGraphNodes: assert resp.status == 200 data = await resp.json() assert "system_prompt" in data - assert ( - data["system_prompt"] == "You are a helpful assistant that produces valid results." - ) + assert data["system_prompt"] == "You are a helpful assistant that produces valid results." # Node without system_prompt should return empty string resp2 = await client.get("/api/sessions/test_agent/graphs/primary/nodes/node_b") @@ -1270,9 +1246,7 @@ class TestNodeCriteria: assert data["output_keys"] == ["result"] @pytest.mark.asyncio - async def test_criteria_with_log_enrichment( - self, nodes_and_edges, sample_session, tmp_agent_dir - ): + async def test_criteria_with_log_enrichment(self, nodes_and_edges, sample_session, tmp_agent_dir): """Criteria endpoint enriched with last execution from logs.""" session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir @@ -1293,8 +1267,7 @@ class TestNodeCriteria: async with TestClient(TestServer(app)) as client: resp = await client.get( - f"/api/sessions/test_agent/graphs/primary/nodes/node_b/criteria" - f"?session_id={session_id}" + f"/api/sessions/test_agent/graphs/primary/nodes/node_b/criteria?session_id={session_id}" ) assert resp.status == 200 data = await resp.json() @@ -1311,9 +1284,7 @@ class TestNodeCriteria: app = _make_app_with_session(session) async with TestClient(TestServer(app)) as client: - resp = await client.get( - "/api/sessions/test_agent/graphs/primary/nodes/nonexistent/criteria" - ) + resp = await client.get("/api/sessions/test_agent/graphs/primary/nodes/nonexistent/criteria") assert resp.status == 404 @@ -1388,9 +1359,7 @@ class TestLogs: app = _make_app_with_session(session) async with TestClient(TestServer(app)) as client: - resp = await client.get( - f"/api/sessions/test_agent/logs?session_id={session_id}&level=summary" - ) + resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=summary") assert resp.status == 200 data = await resp.json() assert data["run_id"] == session_id @@ -1411,9 +1380,7 @@ class TestLogs: app = _make_app_with_session(session) async with TestClient(TestServer(app)) as client: - resp = await client.get( - f"/api/sessions/test_agent/logs?session_id={session_id}&level=details" - ) + resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=details") assert resp.status == 200 data = await resp.json() assert data["session_id"] == session_id @@ -1435,9 +1402,7 @@ class TestLogs: app = _make_app_with_session(session) async with TestClient(TestServer(app)) as client: - resp = await client.get( - f"/api/sessions/test_agent/logs?session_id={session_id}&level=tools" - ) + resp = await client.get(f"/api/sessions/test_agent/logs?session_id={session_id}&level=tools") assert resp.status == 200 data = await resp.json() assert data["session_id"] == session_id diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py index 4fdd134f..6261dd79 100644 --- a/core/framework/skills/catalog.py +++ b/core/framework/skills/catalog.py @@ -26,12 +26,15 @@ Before replying: scan entries. - If multiple could apply: choose the most specific one, then read/follow it. - If none clearly apply: do not read any SKILL.md. Constraints: never read more than one skill up front; only read after selecting. -- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After. +- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), + assume rate limits: prefer fewer larger writes, avoid tight one-item loops, + serialize bursts when possible, and respect 429/Retry-After. The following skills provide specialized instructions for specific tasks. Use `read_file` to load a skill's SKILL.md when the task matches its description. -When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" +When a skill file references a relative path, resolve it against the +skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" _MANDATORY_HEADER_COMPACT = """## Skills (mandatory) Before replying: scan entries. @@ -39,12 +42,15 @@ Before replying: scan entries. - If multiple could apply: choose the most specific one, then read/follow it. - If none clearly apply: do not read any SKILL.md. Constraints: never read more than one skill up front; only read after selecting. -- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), assume rate limits: prefer fewer larger writes, avoid tight one-item loops, serialize bursts when possible, and respect 429/Retry-After. +- When a skill drives external API writes (Gmail, Calendar, GitHub, etc.), + assume rate limits: prefer fewer larger writes, avoid tight one-item loops, + serialize bursts when possible, and respect 429/Retry-After. The following skills provide specialized instructions for specific tasks. Use `read_file` to load a skill's SKILL.md when the task matches its name. -When a skill file references a relative path, resolve it against the skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" +When a skill file references a relative path, resolve it against the +skill directory (parent of SKILL.md) and use that absolute path in tool commands.""" class SkillCatalog: diff --git a/core/framework/skills/cli.py b/core/framework/skills/cli.py index bbcf295b..1aaa0a28 100644 --- a/core/framework/skills/cli.py +++ b/core/framework/skills/cli.py @@ -134,9 +134,7 @@ def register_skill_commands(subparsers) -> None: info_parser.set_defaults(func=cmd_skill_info) # hive skill init - init_parser = skill_sub.add_parser( - "init", help="Scaffold a new skill directory with a SKILL.md template" - ) + init_parser = skill_sub.add_parser("init", help="Scaffold a new skill directory with a SKILL.md template") init_parser.add_argument("--name", dest="skill_name", default=None, metavar="NAME") init_parser.add_argument( "--dir", @@ -193,9 +191,7 @@ def register_skill_commands(subparsers) -> None: update_parser.set_defaults(func=cmd_skill_update) # hive skill search - search_parser = skill_sub.add_parser( - "search", help="Search the skill registry by name, tag, or description" - ) + search_parser = skill_sub.add_parser("search", help="Search the skill registry by name, tag, or description") search_parser.add_argument("query", help="Search query string") search_parser.add_argument("--json", action="store_true", help="Output as JSON") search_parser.set_defaults(func=cmd_skill_search) @@ -231,9 +227,7 @@ def register_skill_commands(subparsers) -> None: fork_parser.set_defaults(func=cmd_skill_fork) # hive skill test - test_parser = skill_sub.add_parser( - "test", help="Run a skill in isolation or execute its eval suite (CLI-9)" - ) + test_parser = skill_sub.add_parser("test", help="Run a skill in isolation or execute its eval suite (CLI-9)") test_parser.add_argument("path", help="Path to SKILL.md or its parent directory") test_parser.add_argument( "--input", @@ -649,9 +643,7 @@ def cmd_skill_validate(args) -> int: print(f"✓ {path} — valid ({len(result.warnings)} warning(s))") return 0 else: - print( - f"✗ {path} — invalid ({len(result.errors)} error(s), {len(result.warnings)} warning(s))" - ) + print(f"✗ {path} — invalid ({len(result.errors)} error(s), {len(result.warnings)} warning(s))") return 1 @@ -672,9 +664,7 @@ def cmd_skill_doctor(args) -> int: for skill_name, dir_name in SKILL_REGISTRY.items(): skill_md = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md" if use_json: - report = _doctor_skill_file( - skill_name, skill_md, parse_skill_md, json_mode=True, scope="framework" - ) + report = _doctor_skill_file(skill_name, skill_md, parse_skill_md, json_mode=True, scope="framework") overall_errors += len(report["errors"]) skill_results.append(report) else: @@ -696,9 +686,7 @@ def cmd_skill_doctor(args) -> int: candidate = USER_SKILLS_DIR / args.name / "SKILL.md" if candidate.exists(): if use_json: - report = _doctor_skill_file( - args.name, candidate, parse_skill_md, json_mode=True, scope="user" - ) + report = _doctor_skill_file(args.name, candidate, parse_skill_md, json_mode=True, scope="user") print(_json.dumps({"skills": [report], "total_errors": len(report["errors"])})) return 1 if report["errors"] else 0 print(f"\nChecking skill: {args.name} [user]") @@ -838,8 +826,7 @@ def cmd_skill_update(args) -> int: if not installed_version and not use_json: print( - f"Warning: installed skill '{args.name}' has no version field — " - "cannot compare. Re-installing.", + f"Warning: installed skill '{args.name}' has no version field — cannot compare. Re-installing.", file=sys.stderr, ) @@ -1057,9 +1044,7 @@ def cmd_skill_test(args) -> int: # ── 4. Structural-only mode (no LLM needed) ─────────────────────────────── if not has_input and not has_evals: - doctor_errors = _doctor_skill_file( - skill.name, path, parse_skill_md, json_mode=use_json, scope="user" - ) + doctor_errors = _doctor_skill_file(skill.name, path, parse_skill_md, json_mode=use_json, scope="user") if use_json: print( _json.dumps( @@ -1209,9 +1194,7 @@ def cmd_skill_test(args) -> int: constraint=assertion, source_document=eval_prompt, summary=skill_response, - criteria=( - "Evaluate whether the skill response satisfies the assertion." - ), + criteria=("Evaluate whether the skill response satisfies the assertion."), ) passes = judged.get("passes", False) explanation = judged.get("explanation", "") diff --git a/core/framework/skills/defaults.py b/core/framework/skills/defaults.py index c43055fb..dd85c5ba 100644 --- a/core/framework/skills/defaults.py +++ b/core/framework/skills/defaults.py @@ -60,9 +60,7 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s # Convert float warn_at_usage_ratio → warn_at_usage_ratio_pct for the placeholder if "warn_at_usage_ratio" in overrides: overrides = dict(overrides) - overrides.setdefault( - "warn_at_usage_ratio_pct", int(float(overrides["warn_at_usage_ratio"]) * 100) - ) + overrides.setdefault("warn_at_usage_ratio_pct", int(float(overrides["warn_at_usage_ratio"]) * 100)) values = {**defaults, **overrides} for key, val in values.items(): body = body.replace(f"{{{{{key}}}}}", str(val)) @@ -189,8 +187,7 @@ class DefaultSkillManager: approx_tokens = len(combined) // 4 if approx_tokens > 2000: logger.warning( - "Default skill protocols exceed 2000 token budget " - "(~%d tokens, %d chars). Consider trimming.", + "Default skill protocols exceed 2000 token budget (~%d tokens, %d chars). Consider trimming.", approx_tokens, len(combined), ) diff --git a/core/framework/skills/registry.py b/core/framework/skills/registry.py index c3170f9f..db56e1e7 100644 --- a/core/framework/skills/registry.py +++ b/core/framework/skills/registry.py @@ -23,8 +23,7 @@ logger = logging.getLogger(__name__) # Default registry index URL (Phase 3 repo, may not exist yet) _DEFAULT_REGISTRY_URL = ( - "https://raw.githubusercontent.com/hive-skill-registry/" - "hive-skill-registry/main/skill_index.json" + "https://raw.githubusercontent.com/hive-skill-registry/hive-skill-registry/main/skill_index.json" ) _CACHE_DIR = Path.home() / ".hive" / "registry_cache" diff --git a/core/framework/skills/skill_errors.py b/core/framework/skills/skill_errors.py index cdd8aa03..2f12f027 100644 --- a/core/framework/skills/skill_errors.py +++ b/core/framework/skills/skill_errors.py @@ -34,9 +34,7 @@ class SkillError(Exception): self.what = what self.why = why self.fix = fix - self.message = ( - f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}" - ) + self.message = f"[{self.code.value}]\nWhat failed: {self.what}\nWhy: {self.why}\nFix: {self.fix}" super().__init__(self.message) diff --git a/core/framework/skills/tool_gating.py b/core/framework/skills/tool_gating.py index 436f6efe..3c20793e 100644 --- a/core/framework/skills/tool_gating.py +++ b/core/framework/skills/tool_gating.py @@ -15,8 +15,8 @@ descriptions to get picked up on demand. from __future__ import annotations import logging +from collections.abc import Iterable from pathlib import Path -from typing import Iterable logger = logging.getLogger(__name__) diff --git a/core/framework/skills/trust.py b/core/framework/skills/trust.py index 68a6fd4f..e18ae637 100644 --- a/core/framework/skills/trust.py +++ b/core/framework/skills/trust.py @@ -413,8 +413,7 @@ class TrustGate: return self._print("") self._print( - f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions " - "into the agent's system prompt." + f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions into the agent's system prompt." ) self._print( " Only load skills from sources you trust. " @@ -441,10 +440,7 @@ class TrustGate: p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}") p("") proj_label = str(project_dir) if project_dir else "this project" - p( - f" The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load " - f"{len(project_skills)} skill(s)" - ) + p(f" The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load {len(project_skills)} skill(s)") p(" that will inject instructions into the agent's system prompt.") if repo_key: p(f" Source: {Colors.BOLD}{repo_key}{Colors.NC}") @@ -458,10 +454,7 @@ class TrustGate: p(" Options:") p(f" {Colors.CYAN}1){Colors.NC} Trust this session only") p(f" {Colors.CYAN}2){Colors.NC} Trust permanently — remember for future runs") - p( - f" {Colors.DIM}3) Deny" - f" — skip all project-scope skills from this repo{Colors.NC}" - ) + p(f" {Colors.DIM}3) Deny — skip all project-scope skills from this repo{Colors.NC}") p(f"{Colors.YELLOW}{'─' * 60}{Colors.NC}") def _prompt_consent(self, Colors) -> str: # noqa: N803 diff --git a/core/framework/skills/validator.py b/core/framework/skills/validator.py index 411e9f79..be55b075 100644 --- a/core/framework/skills/validator.py +++ b/core/framework/skills/validator.py @@ -82,8 +82,7 @@ def validate_strict(path: Path) -> ValidationResult: frontmatter = yaml.safe_load(raw_yaml) except yaml.YAMLError as exc: errors.append( - f"YAML parse error: {exc}. " - 'Wrap values containing colons in quotes, e.g. description: "Use for: research".' + f'YAML parse error: {exc}. Wrap values containing colons in quotes, e.g. description: "Use for: research".' ) return ValidationResult(passed=False, errors=errors, warnings=warnings) @@ -101,10 +100,7 @@ def validate_strict(path: Path) -> ValidationResult: # 6. name present and non-empty (no directory-name fallback in strict mode) name = frontmatter.get("name") if not name or not str(name).strip(): - errors.append( - "Missing required field: 'name' must be present. " - "Add 'name: your-skill-name' to the frontmatter." - ) + errors.append("Missing required field: 'name' must be present. Add 'name: your-skill-name' to the frontmatter.") else: name = str(name).strip() parent_dir_name = path.parent.name @@ -112,8 +108,7 @@ def validate_strict(path: Path) -> ValidationResult: # 7. name length <= 64 chars if len(name) > _MAX_NAME_LENGTH: errors.append( - f"Skill name '{name}' is {len(name)} characters — " - f"maximum is {_MAX_NAME_LENGTH}. Shorten the name." + f"Skill name '{name}' is {len(name)} characters — maximum is {_MAX_NAME_LENGTH}. Shorten the name." ) # 8. name matches parent directory (dot-namespace prefix allowed: hive.X with dir X) @@ -125,10 +120,7 @@ def validate_strict(path: Path) -> ValidationResult: # 9. body non-empty if not body: - errors.append( - "Skill body (instructions) is empty. " - "Add markdown instructions after the closing --- delimiter." - ) + errors.append("Skill body (instructions) is empty. Add markdown instructions after the closing --- delimiter.") # 10. license present — warning only if not frontmatter.get("license"): @@ -142,9 +134,7 @@ def validate_strict(path: Path) -> ValidationResult: for script_path in sorted(scripts_dir.iterdir()): if script_path.is_file(): if not (script_path.stat().st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)): - errors.append( - f"Script not executable: {script_path.name}. Run: chmod +x {script_path}" - ) + errors.append(f"Script not executable: {script_path.name}. Run: chmod +x {script_path}") # 12. allowed-tools entries are non-empty strings — warning if malformed allowed_tools = frontmatter.get("allowed-tools") diff --git a/core/framework/storage/checkpoint_store.py b/core/framework/storage/checkpoint_store.py index 843fc2e0..ff1ab700 100644 --- a/core/framework/storage/checkpoint_store.py +++ b/core/framework/storage/checkpoint_store.py @@ -123,9 +123,7 @@ class CheckpointStore: return None try: - return CheckpointIndex.model_validate_json( - self.index_path.read_text(encoding="utf-8") - ) + return CheckpointIndex.model_validate_json(self.index_path.read_text(encoding="utf-8")) except Exception as e: logger.error(f"Failed to load checkpoint index: {e}") return None @@ -317,9 +315,7 @@ class CheckpointStore: # Update latest_checkpoint_id if we removed the latest if index.latest_checkpoint_id == checkpoint_id: - index.latest_checkpoint_id = ( - index.checkpoints[-1].checkpoint_id if index.checkpoints else None - ) + index.latest_checkpoint_id = index.checkpoints[-1].checkpoint_id if index.checkpoints else None # Write updated index await asyncio.to_thread(_write, index) diff --git a/core/framework/testing/approval_cli.py b/core/framework/testing/approval_cli.py index eea23abc..e46866c0 100644 --- a/core/framework/testing/approval_cli.py +++ b/core/framework/testing/approval_cli.py @@ -96,20 +96,14 @@ def batch_approval( # Validate request valid, error = req.validate_action() if not valid: - results.append( - ApprovalResult.error_result(req.test_id, req.action, error or "Invalid request") - ) + results.append(ApprovalResult.error_result(req.test_id, req.action, error or "Invalid request")) counts["errors"] += 1 continue # Load test test = storage.load_test(goal_id, req.test_id) if not test: - results.append( - ApprovalResult.error_result( - req.test_id, req.action, f"Test {req.test_id} not found" - ) - ) + results.append(ApprovalResult.error_result(req.test_id, req.action, f"Test {req.test_id} not found")) counts["errors"] += 1 continue @@ -132,9 +126,7 @@ def batch_approval( storage.update_test(test) results.append( - ApprovalResult.success_result( - req.test_id, req.action, f"Test {req.action.value}d successfully" - ) + ApprovalResult.success_result(req.test_id, req.action, f"Test {req.action.value}d successfully") ) except Exception as e: @@ -233,9 +225,7 @@ def _process_action( test.approve() storage.update_test(test) print("✓ Approved (no modifications)") - return ApprovalResult.success_result( - test.id, ApprovalAction.APPROVE, "No modifications made" - ) + return ApprovalResult.success_result(test.id, ApprovalAction.APPROVE, "No modifications made") elif action == ApprovalAction.SKIP: print("⏭ Skipped (remains pending)") diff --git a/core/framework/testing/approval_types.py b/core/framework/testing/approval_types.py index 99903f8b..b80468ae 100644 --- a/core/framework/testing/approval_types.py +++ b/core/framework/testing/approval_types.py @@ -61,9 +61,7 @@ class ApprovalResult(BaseModel): timestamp: datetime = Field(default_factory=datetime.now) @classmethod - def success_result( - cls, test_id: str, action: ApprovalAction, message: str | None = None - ) -> "ApprovalResult": + def success_result(cls, test_id: str, action: ApprovalAction, message: str | None = None) -> "ApprovalResult": """Create a successful result.""" return cls( test_id=test_id, diff --git a/core/framework/testing/categorizer.py b/core/framework/testing/categorizer.py index 5a86f606..b22f7da1 100644 --- a/core/framework/testing/categorizer.py +++ b/core/framework/testing/categorizer.py @@ -81,9 +81,7 @@ class ErrorCategorizer: def __init__(self): """Initialize categorizer with compiled patterns.""" self._logic_patterns = [re.compile(p, re.IGNORECASE) for p in self.LOGIC_ERROR_PATTERNS] - self._impl_patterns = [ - re.compile(p, re.IGNORECASE) for p in self.IMPLEMENTATION_ERROR_PATTERNS - ] + self._impl_patterns = [re.compile(p, re.IGNORECASE) for p in self.IMPLEMENTATION_ERROR_PATTERNS] self._edge_patterns = [re.compile(p, re.IGNORECASE) for p in self.EDGE_CASE_PATTERNS] def categorize(self, result: TestResult) -> ErrorCategory | None: @@ -192,8 +190,7 @@ class ErrorCategorizer: "The goal specification may not accurately describe the desired behavior." ), ErrorCategory.IMPLEMENTATION_ERROR: ( - "Fix the code in agent nodes/edges. " - "There's a bug in the implementation that needs to be corrected." + "Fix the code in agent nodes/edges. There's a bug in the implementation that needs to be corrected." ), ErrorCategory.EDGE_CASE: ( "Add a new test for this edge case scenario. " @@ -226,17 +223,14 @@ class ErrorCategorizer: "action": "Fix nodes/edges implementation", "restart_required": False, "description": ( - "There's a code bug. Fix the agent implementation, " - "then re-run Eval (skip Goal stage)." + "There's a code bug. Fix the agent implementation, then re-run Eval (skip Goal stage)." ), }, ErrorCategory.EDGE_CASE: { "stage": "Eval", "action": "Add new test only", "restart_required": False, - "description": ( - "This is a new scenario. Add a test for it and continue in the Eval stage." - ), + "description": ("This is a new scenario. Add a test for it and continue in the Eval stage."), }, } return guidance.get( diff --git a/core/framework/testing/debug_tool.py b/core/framework/testing/debug_tool.py index a2a9178c..ffc07ef7 100644 --- a/core/framework/testing/debug_tool.py +++ b/core/framework/testing/debug_tool.py @@ -244,12 +244,10 @@ class DebugTool: return { "execution_path": run.metrics.nodes_executed if hasattr(run, "metrics") else [], "decisions": [ - d.model_dump() if hasattr(d, "model_dump") else str(d) - for d in getattr(run, "decisions", []) + d.model_dump() if hasattr(d, "model_dump") else str(d) for d in getattr(run, "decisions", []) ], "problems": [ - p.model_dump() if hasattr(p, "model_dump") else str(p) - for p in getattr(run, "problems", []) + p.model_dump() if hasattr(p, "model_dump") else str(p) for p in getattr(run, "problems", []) ], "status": run.status.value if hasattr(run, "status") else "unknown", } @@ -284,8 +282,7 @@ class DebugTool: if failures_by_category["uncategorized"]: suggestions.append( - f"Found {len(failures_by_category['uncategorized'])} uncategorized failures. " - "Manual review required." + f"Found {len(failures_by_category['uncategorized'])} uncategorized failures. Manual review required." ) return suggestions diff --git a/core/framework/testing/test_case.py b/core/framework/testing/test_case.py index 21a38b46..2b1f311c 100644 --- a/core/framework/testing/test_case.py +++ b/core/framework/testing/test_case.py @@ -48,38 +48,26 @@ class Test(BaseModel): test_type: TestType # Test definition - test_name: str = Field( - description="Descriptive function name, e.g., test_constraint_api_limits_respected" - ) + test_name: str = Field(description="Descriptive function name, e.g., test_constraint_api_limits_respected") test_code: str = Field(description="Python test function code (pytest compatible)") description: str = Field(description="Human-readable description of what the test validates") input: dict[str, Any] = Field(default_factory=dict, description="Test input data") - expected_output: dict[str, Any] = Field( - default_factory=dict, description="Expected output or assertions" - ) + expected_output: dict[str, Any] = Field(default_factory=dict, description="Expected output or assertions") # LLM generation metadata generated_by: str = Field(default="llm", description="Who created the test: 'llm' or 'human'") - llm_confidence: float = Field( - default=0.0, ge=0.0, le=1.0, description="LLM's confidence in the test quality (0-1)" - ) + llm_confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="LLM's confidence in the test quality (0-1)") # Approval tracking (CRITICAL - tests are never used without approval) approval_status: ApprovalStatus = ApprovalStatus.PENDING approved_by: str | None = None approved_at: datetime | None = None - rejection_reason: str | None = Field( - default=None, description="Reason for rejection if status is REJECTED" - ) - original_code: str | None = Field( - default=None, description="Original LLM-generated code if user modified it" - ) + rejection_reason: str | None = Field(default=None, description="Reason for rejection if status is REJECTED") + original_code: str | None = Field(default=None, description="Original LLM-generated code if user modified it") # Execution tracking last_run: datetime | None = None - last_result: str | None = Field( - default=None, description="Result of last run: 'passed', 'failed', 'error'" - ) + last_result: str | None = Field(default=None, description="Result of last run: 'passed', 'failed', 'error'") run_count: int = 0 pass_count: int = 0 fail_count: int = 0 diff --git a/core/framework/testing/test_result.py b/core/framework/testing/test_result.py index f395dc14..e9d4aead 100644 --- a/core/framework/testing/test_result.py +++ b/core/framework/testing/test_result.py @@ -53,15 +53,9 @@ class TestResult(BaseModel): stack_trace: str | None = None # Runtime data for debugging - runtime_logs: list[dict[str, Any]] = Field( - default_factory=list, description="Log entries from test execution" - ) - node_outputs: dict[str, Any] = Field( - default_factory=dict, description="Output from each node executed during test" - ) - execution_path: list[str] = Field( - default_factory=list, description="Sequence of nodes executed" - ) + runtime_logs: list[dict[str, Any]] = Field(default_factory=list, description="Log entries from test execution") + node_outputs: dict[str, Any] = Field(default_factory=dict, description="Output from each node executed during test") + execution_path: list[str] = Field(default_factory=list, description="Sequence of nodes executed") # Associated run ID (links to Runtime data) run_id: str | None = Field(default=None, description="Runtime run ID for detailed analysis") diff --git a/core/framework/testing/test_storage.py b/core/framework/testing/test_storage.py index 20928f5e..c9265ad7 100644 --- a/core/framework/testing/test_storage.py +++ b/core/framework/testing/test_storage.py @@ -198,9 +198,7 @@ class TestStorage: return [] # Get all result files except latest.json - result_files = sorted( - [f for f in results_dir.glob("*.json") if f.name != "latest.json"], reverse=True - )[:limit] + result_files = sorted([f for f in results_dir.glob("*.json") if f.name != "latest.json"], reverse=True)[:limit] results = [] for f in result_files: diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py index 70e9124d..771e310d 100644 --- a/core/framework/tools/flowchart_utils.py +++ b/core/framework/tools/flowchart_utils.py @@ -215,9 +215,7 @@ def synthesize_draft_from_runtime( "id": f"edge-{i}", "source": re.source, "target": re.target, - "condition": str(re.condition.value) - if hasattr(re.condition, "value") - else str(re.condition), + "condition": str(re.condition.value) if hasattr(re.condition, "value") else str(re.condition), "description": getattr(re, "description", "") or "", "label": "", } @@ -315,8 +313,7 @@ def synthesize_draft_from_runtime( "entry_node": nodes[0]["id"] if nodes else "", "terminal_nodes": sorted(terminal_ids), "flowchart_legend": { - fc_type: {"shape": meta["shape"], "color": meta["color"]} - for fc_type, meta in FLOWCHART_TYPES.items() + fc_type: {"shape": meta["shape"], "color": meta["color"]} for fc_type, meta in FLOWCHART_TYPES.items() }, } diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 22ad292d..c1f92e7c 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -55,7 +55,7 @@ from framework.tools.flowchart_utils import ( ) if TYPE_CHECKING: - from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost from framework.host.colony_runtime import ColonyRuntime from framework.host.event_bus import EventBus from framework.loader.tool_registry import ToolRegistry @@ -142,56 +142,6 @@ class QueenPhaseState: # Global memory directory. global_memory_dir: Path | None = None - def get_current_tools(self) -> list: - """Return tools for the current phase.""" - if self.phase == "independent": - return list(self.independent_tools) - if self.phase == "working": - return list(self.working_tools) - if self.phase == "reviewing": - return list(self.reviewing_tools) - return list(self.independent_tools) - - def get_current_prompt(self) -> str: - """Return the system prompt for the current phase.""" - if self.phase == "independent": - base = self.prompt_independent - elif self.phase == "working": - base = self.prompt_working - elif self.phase == "reviewing": - base = self.prompt_reviewing - else: - base = self.prompt_independent - - parts = [] - if self.queen_identity_prompt: - parts.append(self.queen_identity_prompt) - parts.append(base) - credentials_block = _render_credentials_block(self.credentials_prompt_provider) - if credentials_block: - parts.append(credentials_block) - if self.skills_catalog_prompt: - parts.append(self.skills_catalog_prompt) - if self.protocols_prompt: - parts.append(self.protocols_prompt) - if self._cached_global_recall_block: - parts.append(self._cached_global_recall_block) - return "\n\n".join(parts) - - async def _emit_phase_event(self) -> None: - """Publish a QUEEN_PHASE_CHANGED event so the frontend updates the tag.""" - if self.event_bus is not None: - data: dict = {"phase": self.phase} - if self.agent_path: - data["agent_path"] = self.agent_path - await self.event_bus.publish( - AgentEvent( - type=EventType.QUEEN_PHASE_CHANGED, - stream_id="queen", - data=data, - ) - ) - async def switch_to_working(self, source: str = "tool") -> None: if self.phase == "working": return @@ -202,37 +152,7 @@ class QueenPhaseState: if self.inject_notification and source != "tool": await self.inject_notification( "[PHASE CHANGE] Switched to WORKING phase. " - "Colony workers are running. You have monitoring tools: " - + ", ".join(tool_names) - + "." - ) - - async def switch_to_reviewing(self, source: str = "tool") -> None: - if self.phase == "reviewing": - return - self.phase = "reviewing" - tool_names = [t.name for t in self.reviewing_tools] - logger.info("Queen phase -> reviewing (source=%s, tools: %s)", source, tool_names) - await self._emit_phase_event() - if self.inject_notification and source != "tool": - await self.inject_notification( - "[PHASE CHANGE] Switched to REVIEWING phase. " - "Workers have completed. Review results and decide next steps. " - "Available tools: " + ", ".join(tool_names) + "." - ) - - async def switch_to_independent(self, source: str = "tool") -> None: - if self.phase == "independent": - return - self.phase = "independent" - tool_names = [t.name for t in self.independent_tools] - logger.info("Queen phase -> independent (source=%s, tools: %s)", source, tool_names) - await self._emit_phase_event() - if self.inject_notification and source != "tool": - await self.inject_notification( - "[PHASE CHANGE] Switched to INDEPENDENT mode. " - "You are the agent — execute the task directly. " - "Available tools: " + ", ".join(tool_names) + "." + "Colony workers are running. You have monitoring tools: " + ", ".join(tool_names) + "." ) planning_tools: list = field(default_factory=list) # list[Tool] @@ -392,9 +312,7 @@ class QueenPhaseState: if self.inject_notification and source != "tool": await self.inject_notification( "[PHASE CHANGE] The user clicked Run in the UI. Switched to RUNNING phase. " - "Worker is now executing. You have monitoring/lifecycle tools: " - + ", ".join(tool_names) - + "." + "Worker is now executing. You have monitoring/lifecycle tools: " + ", ".join(tool_names) + "." ) async def switch_to_staging(self, source: str = "tool") -> None: @@ -610,15 +528,11 @@ async def _persist_active_triggers(session: Any, session_id: str) -> None: # Persist per-trigger task overrides available = getattr(session, "available_triggers", {}) state.trigger_tasks = { - tid: available[tid].task - for tid in active_ids - if tid in available and available[tid].task + tid: available[tid].task for tid in active_ids if tid in available and available[tid].task } await store.write_state(session_id, state) except Exception: - logger.warning( - "Failed to persist active triggers for session %s", session_id, exc_info=True - ) + logger.warning("Failed to persist active triggers for session %s", session_id, exc_info=True) async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None: @@ -820,9 +734,7 @@ def _dissolve_planning_nodes( no_edge = [e for e in out_edges if e is not yes_edge][0] # Decision clause: prefer decision_clause, fall back to description/name - clause = ( - d_node.get("decision_clause") or d_node.get("description") or d_node.get("name") or d_id - ).strip() + clause = (d_node.get("decision_clause") or d_node.get("description") or d_node.get("name") or d_id).strip() predecessors = [node_by_id[e["source"]] for e in in_edges if e["source"] in node_by_id] @@ -1019,9 +931,7 @@ def register_queen_lifecycle_tools( try: # Count live workers BEFORE stopping so we can report # accurately — stop_all_workers clears the dict. - stopped_unified = sum( - 1 for w in colony.list_workers() if w.status.value in ("pending", "running") - ) + stopped_unified = sum(1 for w in colony.list_workers() if w.status.value in ("pending", "running")) await colony.stop_all_workers() except Exception as e: errors.append(f"unified: {e}") @@ -1072,8 +982,7 @@ def register_queen_lifecycle_tools( _stop_tool = Tool( name="stop_worker", description=( - "Cancel all active colony workers and pause timers. " - "Workers stop gracefully. No parameters needed." + "Cancel all active colony workers and pause timers. Workers stop gracefully. No parameters needed." ), parameters={"type": "object", "properties": {}}, ) @@ -1126,9 +1035,7 @@ def register_queen_lifecycle_tools( ) if not isinstance(tasks, list) or not tasks: - return json.dumps( - {"error": "tasks must be a non-empty list of {task, data?} dicts"} - ) + return json.dumps({"error": "tasks must be a non-empty list of {task, data?} dicts"}) # Hard ceiling on a single fan-out call. A runaway queen requesting # thousands of parallel workers would starve memory and drown the @@ -1175,9 +1082,7 @@ def register_queen_lifecycle_tools( normalised: list[dict] = [] for i, spec in enumerate(tasks): if not isinstance(spec, dict): - return json.dumps( - {"error": f"tasks[{i}] is not a dict: {type(spec).__name__}"} - ) + return json.dumps({"error": f"tasks[{i}] is not a dict: {type(spec).__name__}"}) task_text = str(spec.get("task", "")).strip() if not task_text: return json.dumps({"error": f"tasks[{i}].task is empty"}) @@ -1373,15 +1278,9 @@ def register_queen_lifecycle_tools( if not (1 <= len(fm_description) <= 1024): return None, "SKILL.md 'description' must be 1–1024 chars" if not _SKILL_NAME_RE.match(fm_name): - return None, ( - f"SKILL.md 'name' field '{fm_name}' must match [a-z0-9-] " - "pattern" - ) + return None, (f"SKILL.md 'name' field '{fm_name}' must match [a-z0-9-] pattern") if fm_name.startswith("-") or fm_name.endswith("-") or "--" in fm_name: - return None, ( - f"SKILL.md 'name' '{fm_name}' has leading/trailing/" - "consecutive hyphens" - ) + return None, (f"SKILL.md 'name' '{fm_name}' has leading/trailing/consecutive hyphens") if len(fm_name) > 64: return None, f"SKILL.md 'name' '{fm_name}' exceeds 64 chars" @@ -1446,12 +1345,7 @@ def register_queen_lifecycle_tools( cn = (colony_name or "").strip() if not _COLONY_NAME_RE.match(cn): return json.dumps( - { - "error": ( - "colony_name must be lowercase alphanumeric " - "with underscores (e.g. 'honeycomb_research')." - ) - } + {"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'honeycomb_research').")} ) installed_skill, skill_err = _validate_and_install_skill(skill_path) @@ -1616,8 +1510,7 @@ def register_queen_lifecycle_tools( "colony_name": { "type": "string", "description": ( - "Lowercase alphanumeric+underscore name for " - "the new colony (e.g. 'honeycomb_research')." + "Lowercase alphanumeric+underscore name for the new colony (e.g. 'honeycomb_research')." ), }, "task": { @@ -1732,9 +1625,7 @@ def register_queen_lifecycle_tools( ), parameters={"type": "object", "properties": {}}, ) - registry.register( - "stop_worker_and_review", _stop_edit_tool, lambda inputs: stop_worker_and_review() - ) + registry.register("stop_worker_and_review", _stop_edit_tool, lambda inputs: stop_worker_and_review()) tools_registered += 1 # --- stop_worker_and_plan (Running/Staging → Planning) --------------------- @@ -1766,9 +1657,7 @@ def register_queen_lifecycle_tools( ), parameters={"type": "object", "properties": {}}, ) - registry.register( - "stop_worker_and_plan", _stop_plan_tool, lambda inputs: stop_worker_and_plan() - ) + registry.register("stop_worker_and_plan", _stop_plan_tool, lambda inputs: stop_worker_and_plan()) tools_registered += 1 # --- replan_agent (Building → Planning) ----------------------------------- @@ -1778,9 +1667,7 @@ def register_queen_lifecycle_tools( Only use when the user explicitly asks to re-plan.""" if phase_state is not None: if phase_state.phase != "building": - return json.dumps( - {"error": f"Cannot replan: currently in {phase_state.phase} phase."} - ) + return json.dumps({"error": f"Cannot replan: currently in {phase_state.phase} phase."}) # Carry forward the current draft: restore original (pre-dissolution) # draft so the queen can edit it in planning, rather than starting @@ -1914,12 +1801,7 @@ def register_queen_lifecycle_tools( no_edge = [e for e in out_edges if e is not yes_edge][0] # Decision clause: prefer decision_clause, fall back to description/name - clause = ( - d_node.get("decision_clause") - or d_node.get("description") - or d_node.get("name") - or d_id - ).strip() + clause = (d_node.get("decision_clause") or d_node.get("description") or d_node.get("name") or d_id).strip() predecessors = [node_by_id[e["source"]] for e in in_edges if e["source"] in node_by_id] @@ -2035,11 +1917,7 @@ def register_queen_lifecycle_tools( flowchart_type explicitly on a node. """ # ── Gate: require at least 2 rounds of user questions ───────── - if ( - phase_state is not None - and phase_state.phase == "planning" - and phase_state.planning_ask_rounds < 2 - ): + if phase_state is not None and phase_state.phase == "planning" and phase_state.planning_ask_rounds < 2: return json.dumps( { "error": ( @@ -2152,8 +2030,7 @@ def register_queen_lifecycle_tools( if unreachable: for uid in sorted(unreachable): logger.warning( - "Node '%s' is unreachable from entry node '%s' " - "— removing it from the draft.", + "Node '%s' is unreachable from entry node '%s' — removing it from the draft.", uid, entry_id, ) @@ -2164,9 +2041,7 @@ def register_queen_lifecycle_tools( f"as a sub-agent of an existing node." ) validated_edges[:] = [ - e - for e in validated_edges - if e["source"] not in unreachable and e["target"] not in unreachable + e for e in validated_edges if e["source"] not in unreachable and e["target"] not in unreachable ] validated_nodes[:] = [n for n in validated_nodes if n["id"] not in unreachable] @@ -2212,8 +2087,7 @@ def register_queen_lifecycle_tools( "terminal_nodes": sorted(terminal_ids), # Color legend for the frontend "flowchart_legend": { - fc_type: {"shape": meta["shape"], "color": meta["color"]} - for fc_type, meta in FLOWCHART_TYPES.items() + fc_type: {"shape": meta["shape"], "color": meta["color"]} for fc_type, meta in FLOWCHART_TYPES.items() }, } @@ -2272,9 +2146,7 @@ def register_queen_lifecycle_tools( data={ "event": "flowchart_updated", "map": phase_state.flowchart_map if phase_state else None, - "original_draft": phase_state.original_draft_graph - if phase_state - else draft, + "original_draft": phase_state.original_draft_graph if phase_state else draft, }, ) ) @@ -2465,10 +2337,7 @@ def register_queen_lifecycle_tools( "description": {"type": "string"}, "label": { "type": "string", - "description": ( - "Short edge label shown on the flowchart " - "(e.g. 'Yes', 'No', 'Retry')" - ), + "description": ("Short edge label shown on the flowchart (e.g. 'Yes', 'No', 'Retry')"), }, }, "required": ["source", "target"], @@ -2478,10 +2347,7 @@ def register_queen_lifecycle_tools( "terminal_nodes": { "type": "array", "items": {"type": "string"}, - "description": ( - "Node IDs that are terminal (end) nodes. " - "Auto-detected from edges if omitted." - ), + "description": ("Node IDs that are terminal (end) nodes. Auto-detected from edges if omitted."), }, "success_criteria": { "type": "array", @@ -2519,9 +2385,7 @@ def register_queen_lifecycle_tools( return json.dumps({"error": "Phase state not available."}) if phase_state.phase != "planning": - return json.dumps( - {"error": f"Cannot confirm_and_build: currently in {phase_state.phase} phase."} - ) + return json.dumps({"error": f"Cannot confirm_and_build: currently in {phase_state.phase} phase."}) if phase_state.draft_graph is None: return json.dumps( @@ -2571,20 +2435,14 @@ def register_queen_lifecycle_tools( dissolved_count = len(original_nodes) - len(converted.get("nodes", [])) decision_count = sum(1 for n in original_nodes if n.get("flowchart_type") == "decision") subagent_count = sum( - 1 - for n in original_nodes - if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu" + 1 for n in original_nodes if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu" ) dissolution_parts = [] if decision_count: - dissolution_parts.append( - f"{decision_count} decision node(s) dissolved into predecessor criteria" - ) + dissolution_parts.append(f"{decision_count} decision node(s) dissolved into predecessor criteria") if subagent_count: - dissolution_parts.append( - f"{subagent_count} sub-agent node(s) dissolved into predecessor sub_agents" - ) + dissolution_parts.append(f"{subagent_count} sub-agent node(s) dissolved into predecessor sub_agents") # Transition to BUILDING phase await phase_state.switch_to_building(source="tool") @@ -2938,9 +2796,7 @@ def register_queen_lifecycle_tools( # Running tools (started but not yet completed) tool_started = bus.get_history(event_type=EventType.TOOL_CALL_STARTED, limit=last_n * 2) tool_completed = bus.get_history(event_type=EventType.TOOL_CALL_COMPLETED, limit=last_n * 2) - completed_ids = { - evt.data.get("tool_use_id") for evt in tool_completed if evt.data.get("tool_use_id") - } + completed_ids = {evt.data.get("tool_use_id") for evt in tool_completed if evt.data.get("tool_use_id")} running = [ evt for evt in tool_started @@ -3071,8 +2927,7 @@ def register_queen_lifecycle_tools( total_tok = total_in + total_out lines.append("") lines.append( - f"Tokens: {len(llm_events)} LLM turns, " - f"{total_tok:,} total ({total_in:,} in + {total_out:,} out)." + f"Tokens: {len(llm_events)} LLM turns, {total_tok:,} total ({total_in:,} in + {total_out:,} out)." ) # Execution outcomes @@ -3082,8 +2937,7 @@ def register_queen_lifecycle_tools( failed_n = len(exec_failed) active_n = len(runtime.get_active_streams()) lines.append( - f"Executions: {completed_n} completed, {failed_n} failed" - + (f" ({active_n} active)." if active_n else ".") + f"Executions: {completed_n} completed, {failed_n} failed" + (f" ({active_n} active)." if active_n else ".") ) if exec_failed: for evt in exec_failed[:3]: @@ -3124,9 +2978,7 @@ def register_queen_lifecycle_tools( # Running + completed tool calls tool_started = bus.get_history(event_type=EventType.TOOL_CALL_STARTED, limit=last_n * 2) tool_completed = bus.get_history(event_type=EventType.TOOL_CALL_COMPLETED, limit=last_n * 2) - completed_ids = { - evt.data.get("tool_use_id") for evt in tool_completed if evt.data.get("tool_use_id") - } + completed_ids = {evt.data.get("tool_use_id") for evt in tool_completed if evt.data.get("tool_use_id")} running = [ evt for evt in tool_started @@ -3326,10 +3178,7 @@ def register_queen_lifecycle_tools( return _format_summary(preamble, red_flags) if bus is None: - return ( - f"Worker is {preamble['status']}. " - "EventBus unavailable — only basic status returned." - ) + return f"Worker is {preamble['status']}. EventBus unavailable — only basic status returned." if focus == "activity": return _format_activity(bus, preamble, last_n) @@ -3352,10 +3201,7 @@ def register_queen_lifecycle_tools( pass return json.dumps(result, default=str, ensure_ascii=False) else: - return ( - f"Unknown focus '{focus}'. " - "Valid options: activity, memory, tools, issues, progress, full." - ) + return f"Unknown focus '{focus}'. Valid options: activity, memory, tools, issues, progress, full." except Exception as exc: logger.exception("get_worker_status error") return f"Error retrieving status: {exc}" @@ -3382,9 +3228,7 @@ def register_queen_lifecycle_tools( }, "last_n": { "type": "integer", - "description": ( - "Recent events per category (default 20). Only for activity, tools, full." - ), + "description": ("Recent events per category (default 20). Only for activity, tools, full."), }, }, "required": [], @@ -3500,9 +3344,7 @@ def register_queen_lifecycle_tools( from aden_tools.credentials import CREDENTIAL_SPECS spec = CREDENTIAL_SPECS.get(credential_id) - resolved_provider = ( - (spec.credential_id or credential_id) if spec else credential_id - ) + resolved_provider = (spec.credential_id or credential_id) if spec else credential_id except Exception: resolved_provider = credential_id all_accounts = [ @@ -3571,9 +3413,7 @@ def register_queen_lifecycle_tools( "storage_id": info.storage_id, "status": info.status, "created_at": info.created_at.isoformat() if info.created_at else None, - "last_validated": ( - info.last_validated.isoformat() if info.last_validated else None - ), + "last_validated": (info.last_validated.isoformat() if info.last_validated else None), } identity = info.identity.to_dict() if identity: @@ -3604,17 +3444,14 @@ def register_queen_lifecycle_tools( "credential_id": { "type": "string", "description": ( - "Filter to a specific credential type (e.g. 'brave_search'). " - "Omit to list all credentials." + "Filter to a specific credential type (e.g. 'brave_search'). Omit to list all credentials." ), }, }, "required": [], }, ) - registry.register( - "list_credentials", _list_creds_tool, lambda inputs: list_credentials(**inputs) - ) + registry.register("list_credentials", _list_creds_tool, lambda inputs: list_credentials(**inputs)) tools_registered += 1 # --- load_built_agent (server context only) -------------------------------- @@ -3657,18 +3494,12 @@ def register_queen_lifecycle_tools( parent_dir = str(resolved_path.resolve().parent) if parent_dir not in _sys.path: _sys.path.insert(0, parent_dir) - stale = [ - n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.") - ] + stale = [n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.")] for n in stale: del _sys.modules[n] mod = importlib.import_module(pkg_name) - missing_attrs = [ - attr - for attr in ("goal", "nodes", "edges") - if getattr(mod, attr, None) is None - ] + missing_attrs = [attr for attr in ("goal", "nodes", "edges") if getattr(mod, attr, None) is None] if missing_attrs: return json.dumps( { @@ -3754,10 +3585,7 @@ def register_queen_lifecycle_tools( save_flowchart_file(resolved_path, synth_draft, synth_map) # Emit to frontend - if ( - phase_state.original_draft_graph is not None - and phase_state.flowchart_map is not None - ): + if phase_state.original_draft_graph is not None and phase_state.flowchart_map is not None: bus = phase_state.event_bus if bus is not None: try: @@ -3815,9 +3643,7 @@ def register_queen_lifecycle_tools( "properties": { "agent_path": { "type": "string", - "description": ( - "Path to the agent directory (e.g. '~/.hive/colonies/my_agent')" - ), + "description": ("Path to the agent directory (e.g. '~/.hive/colonies/my_agent')"), }, }, "required": ["agent_path"], @@ -3928,8 +3754,7 @@ def register_queen_lifecycle_tools( # a code error in the validator) should not block the # spawn. Log and proceed as if nothing was dropped. logger.warning( - "compute_unavailable_tools raised, proceeding without " - "credential-based tool filtering: %s", + "compute_unavailable_tools raised, proceeding without credential-based tool filtering: %s", exc, ) @@ -3967,16 +3792,10 @@ def register_queen_lifecycle_tools( entry_node = None worker_system_prompt = ( - getattr(entry_node, "system_prompt", None) - if entry_node is not None - else None + getattr(entry_node, "system_prompt", None) if entry_node is not None else None ) or "" - worker_tool_names = ( - list(getattr(entry_node, "tools", []) or []) - if entry_node is not None - else [] - ) + worker_tool_names = list(getattr(entry_node, "tools", []) or []) if entry_node is not None else [] # Drop any tool whose credential isn't available (GitHub # tools when GITHUB_TOKEN is unset, etc). The preflight @@ -4000,8 +3819,7 @@ def register_queen_lifecycle_tools( id=f"loaded_worker:{getattr(graph, 'id', 'unknown')}", name=getattr(graph, "id", "loaded_worker"), description=( - "Loaded worker agent spawned via run_agent_with_input " - "through the unified ColonyRuntime path." + "Loaded worker agent spawned via run_agent_with_input through the unified ColonyRuntime path." ), system_prompt=worker_system_prompt, tools=worker_tool_names, @@ -4022,15 +3840,11 @@ def register_queen_lifecycle_tools( # makes the filter complete. if unavailable_tools: before = len(spawn_tools) - spawn_tools = [ - t for t in spawn_tools - if getattr(t, "name", None) not in unavailable_tools - ] + spawn_tools = [t for t in spawn_tools if getattr(t, "name", None) not in unavailable_tools] dropped_count = before - len(spawn_tools) if dropped_count: logger.info( - "run_agent_with_input: dropped %d tool object(s) from " - "spawn_tools (unavailable credentials)", + "run_agent_with_input: dropped %d tool object(s) from spawn_tools (unavailable credentials)", dropped_count, ) @@ -4125,9 +3939,7 @@ def register_queen_lifecycle_tools( "required": ["task"], }, ) - registry.register( - "run_agent_with_input", _run_input_tool, lambda inputs: run_agent_with_input(**inputs) - ) + registry.register("run_agent_with_input", _run_input_tool, lambda inputs: run_agent_with_input(**inputs)) tools_registered += 1 # --- list_worker_questions / reply_to_worker ------------------------------ @@ -4189,17 +4001,14 @@ def register_queen_lifecycle_tools( if entry is None: return json.dumps( { - "error": "Unknown request_id. Call list_worker_questions() " - "to see currently pending escalations.", + "error": "Unknown request_id. Call list_worker_questions() to see currently pending escalations.", "request_id": request_id, } ) worker_id = entry.get("worker_id") if not worker_id: - return json.dumps( - {"error": "Escalation entry is missing worker_id.", "request_id": request_id} - ) + return json.dumps({"error": "Escalation entry is missing worker_id.", "request_id": request_id}) # Format the reply so the waiting worker's conversation shows # it as a queen handoff rather than a raw user message. @@ -4244,9 +4053,7 @@ def register_queen_lifecycle_tools( "required": ["request_id", "reply"], }, ) - registry.register( - "reply_to_worker", _reply_tool, lambda inputs: reply_to_worker(**inputs) - ) + registry.register("reply_to_worker", _reply_tool, lambda inputs: reply_to_worker(**inputs)) tools_registered += 1 # --- set_trigger ----------------------------------------------------------- @@ -4322,9 +4129,7 @@ def register_queen_lifecycle_tools( methods = t_config.get("methods", ["POST"]) invalid = [m.upper() for m in methods if m.upper() not in valid_methods] if invalid: - return json.dumps( - {"error": f"Invalid HTTP methods: {invalid}. Valid: {sorted(valid_methods)}"} - ) + return json.dumps({"error": f"Invalid HTTP methods: {invalid}. Valid: {sorted(valid_methods)}"}) try: await _start_trigger_webhook(session, trigger_id, tdef) @@ -4374,16 +4179,12 @@ def register_queen_lifecycle_tools( if not croniter.is_valid(cron_expr): return json.dumps({"error": f"Invalid cron expression: {cron_expr}"}) except ImportError: - return json.dumps( - {"error": "croniter package not installed — cannot validate cron expression."} - ) + return json.dumps({"error": "croniter package not installed — cannot validate cron expression."}) elif interval: if not isinstance(interval, (int, float)) or interval <= 0: return json.dumps({"error": f"interval_minutes must be > 0, got {interval}"}) else: - return json.dumps( - {"error": "Timer trigger needs 'cron' or 'interval_minutes' in trigger_config."} - ) + return json.dumps({"error": "Timer trigger needs 'cron' or 'interval_minutes' in trigger_config."}) # Start timer try: @@ -4440,9 +4241,7 @@ def register_queen_lifecycle_tools( "properties": { "trigger_id": { "type": "string", - "description": ( - "ID of the trigger to activate (from list_triggers) or a new custom ID" - ), + "description": ("ID of the trigger to activate (from list_triggers) or a new custom ID"), }, "trigger_type": { "type": "string", @@ -4522,10 +4321,7 @@ def register_queen_lifecycle_tools( _remove_trigger_tool = Tool( name="remove_trigger", - description=( - "Deactivate an active trigger." - " The trigger stops firing but remains available for re-activation." - ), + description=("Deactivate an active trigger. The trigger stops firing but remains available for re-activation."), parameters={ "type": "object", "properties": { @@ -4537,9 +4333,7 @@ def register_queen_lifecycle_tools( "required": ["trigger_id"], }, ) - registry.register( - "remove_trigger", _remove_trigger_tool, lambda inputs: remove_trigger(**inputs) - ) + registry.register("remove_trigger", _remove_trigger_tool, lambda inputs: remove_trigger(**inputs)) tools_registered += 1 # --- list_triggers --------------------------------------------------------- @@ -4563,9 +4357,7 @@ def register_queen_lifecycle_tools( _list_triggers_tool = Tool( name="list_triggers", - description=( - "List all available triggers (from the loaded worker) and their active/inactive status." - ), + description=("List all available triggers (from the loaded worker) and their active/inactive status."), parameters={ "type": "object", "properties": {}, diff --git a/core/framework/tools/worker_monitoring_tools.py b/core/framework/tools/worker_monitoring_tools.py index d1382020..226c2b6c 100644 --- a/core/framework/tools/worker_monitoring_tools.py +++ b/core/framework/tools/worker_monitoring_tools.py @@ -99,9 +99,7 @@ def register_worker_monitoring_tools( if default_session_id and (sessions_dir / default_session_id).is_dir(): session_id = default_session_id else: - candidates = [ - d for d in sessions_dir.iterdir() if d.is_dir() and (d / "state.json").exists() - ] + candidates = [d for d in sessions_dir.iterdir() if d.is_dir() and (d / "state.json").exists()] if not candidates: return json.dumps({"error": "No sessions found — worker has not started yet"}) @@ -210,15 +208,12 @@ def register_worker_monitoring_tools( "session_id": { "type": "string", "description": ( - "The worker's active session ID. Omit or pass 'auto' to " - "auto-discover the most recent session." + "The worker's active session ID. Omit or pass 'auto' to auto-discover the most recent session." ), }, "last_n_steps": { "type": "integer", - "description": ( - f"How many recent log steps to include (default {_DEFAULT_LAST_N_STEPS})" - ), + "description": (f"How many recent log steps to include (default {_DEFAULT_LAST_N_STEPS})"), }, }, "required": [], diff --git a/core/framework/tracker/decision_tracker.py b/core/framework/tracker/decision_tracker.py index 5f134b7a..c0b03bef 100644 --- a/core/framework/tracker/decision_tracker.py +++ b/core/framework/tracker/decision_tracker.py @@ -252,9 +252,7 @@ class DecisionTracker: if self._current_run is None: # Gracefully handle case where run ended during exception handling # This can happen in cascading error scenarios - logger.warning( - f"record_outcome called but no run in progress (decision_id={decision_id})" - ) + logger.warning(f"record_outcome called but no run in progress (decision_id={decision_id})") return outcome = Outcome( @@ -298,9 +296,7 @@ class DecisionTracker: if self._current_run is None: # Gracefully handle case where run ended during exception handling # Log the problem since we can't store it, then return empty ID - logger.warning( - f"report_problem called but no run in progress: [{severity}] {description}" - ) + logger.warning(f"report_problem called but no run in progress: [{severity}] {description}") return "" return self._current_run.add_problem( diff --git a/core/framework/tracker/runtime_log_store.py b/core/framework/tracker/runtime_log_store.py index b3f98db2..4998699a 100644 --- a/core/framework/tracker/runtime_log_store.py +++ b/core/framework/tracker/runtime_log_store.py @@ -233,8 +233,7 @@ class RuntimeLogStore: import warnings warnings.warn( - f"Found {len(old_ids)} runs in deprecated location. " - "Consider migrating to unified session storage.", + f"Found {len(old_ids)} runs in deprecated location. Consider migrating to unified session storage.", DeprecationWarning, stacklevel=3, ) diff --git a/core/tests/debug_codex_stream.py b/core/tests/debug_codex_stream.py index 71c267a5..a9f0b886 100644 --- a/core/tests/debug_codex_stream.py +++ b/core/tests/debug_codex_stream.py @@ -79,10 +79,7 @@ async def test_codex_stream(): elif isinstance(event, ToolCallEvent): print(f" ToolCall: {event.tool_name}({event.tool_input})") elif isinstance(event, FinishEvent): - print( - f" Finish: stop={event.stop_reason} " - f"in={event.input_tokens} out={event.output_tokens}" - ) + print(f" Finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f" StreamError: {event.error} (recoverable={event.recoverable})") print(f" Text: {text!r}") @@ -125,10 +122,7 @@ async def test_codex_stream(): tool_calls.append({"name": event.tool_name, "input": event.tool_input}) print(f" ToolCall: {event.tool_name}({json.dumps(event.tool_input)})") elif isinstance(event, FinishEvent): - print( - f" Finish: stop={event.stop_reason} " - f"in={event.input_tokens} out={event.output_tokens}" - ) + print(f" Finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f" StreamError: {event.error} (recoverable={event.recoverable})") print(f" Text: {text!r}") diff --git a/core/tests/debug_codex_verbose.py b/core/tests/debug_codex_verbose.py index dff43fcc..d32ac972 100644 --- a/core/tests/debug_codex_verbose.py +++ b/core/tests/debug_codex_verbose.py @@ -54,10 +54,7 @@ async def main(): elif isinstance(event, ToolCallEvent): print(f"ToolCall: {event.tool_name}({event.tool_input})") elif isinstance(event, FinishEvent): - print( - f"Finish: stop={event.stop_reason} " - f"in={event.input_tokens} out={event.output_tokens}" - ) + print(f"Finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f"StreamError: {event.error} (recoverable={event.recoverable})") diff --git a/core/tests/test_antigravity_eventloop.py b/core/tests/test_antigravity_eventloop.py index 2bff72b2..09cd70ee 100644 --- a/core/tests/test_antigravity_eventloop.py +++ b/core/tests/test_antigravity_eventloop.py @@ -18,11 +18,11 @@ logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s: %(mes # Show our provider's retry/stream logs logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG) -from framework.config import RuntimeConfig # noqa: E402 from framework.agent_loop.agent_loop import AgentLoop as EventLoopNode # noqa: E402 from framework.agent_loop.internals.types import LoopConfig # noqa: E402 -from framework.orchestrator.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402 +from framework.config import RuntimeConfig # noqa: E402 from framework.llm.litellm import LiteLLMProvider # noqa: E402 +from framework.orchestrator.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402 def make_provider() -> LiteLLMProvider: @@ -83,9 +83,7 @@ def make_context( ) -async def run_test( - name: str, llm: LiteLLMProvider, system: str, output_keys: list[str] -) -> NodeResult: +async def run_test(name: str, llm: LiteLLMProvider, system: str, output_keys: list[str]) -> NodeResult: print(f"\n{'=' * 60}") print(f"TEST: {name}") print(f"{'=' * 60}") @@ -146,11 +144,7 @@ async def main(): if isinstance(event, TextDeltaEvent): text = event.snapshot elif isinstance(event, FinishEvent): - print( - f" Finish: stop={event.stop_reason}" - f" in={event.input_tokens}" - f" out={event.output_tokens}" - ) + print(f" Finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f" StreamError: {event.error} (recoverable={event.recoverable})") elif isinstance(event, ToolCallEvent): diff --git a/core/tests/test_codex_eventloop.py b/core/tests/test_codex_eventloop.py index f48c2033..0a532a4e 100644 --- a/core/tests/test_codex_eventloop.py +++ b/core/tests/test_codex_eventloop.py @@ -14,11 +14,11 @@ logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s: %(mes # Show our provider's retry/stream logs logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG) -from framework.config import RuntimeConfig # noqa: E402 from framework.agent_loop.agent_loop import AgentLoop as EventLoopNode # noqa: E402 from framework.agent_loop.internals.types import LoopConfig # noqa: E402 -from framework.orchestrator.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402 +from framework.config import RuntimeConfig # noqa: E402 from framework.llm.litellm import LiteLLMProvider # noqa: E402 +from framework.orchestrator.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402 def make_provider() -> LiteLLMProvider: @@ -76,9 +76,7 @@ def make_context( ) -async def run_test( - name: str, llm: LiteLLMProvider, system: str, output_keys: list[str] -) -> NodeResult: +async def run_test(name: str, llm: LiteLLMProvider, system: str, output_keys: list[str]) -> NodeResult: print(f"\n{'=' * 60}") print(f"TEST: {name}") print(f"{'=' * 60}") @@ -139,11 +137,7 @@ async def main(): if isinstance(event, TextDeltaEvent): text = event.snapshot elif isinstance(event, FinishEvent): - print( - f" Finish: stop={event.stop_reason}" - f" in={event.input_tokens}" - f" out={event.output_tokens}" - ) + print(f" Finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f" StreamError: {event.error} (recoverable={event.recoverable})") elif isinstance(event, ToolCallEvent): diff --git a/core/tests/test_colony_fork_flow.py b/core/tests/test_colony_fork_flow.py index 36497d4f..8293291b 100644 --- a/core/tests/test_colony_fork_flow.py +++ b/core/tests/test_colony_fork_flow.py @@ -21,11 +21,9 @@ import pytest from aiohttp.test_utils import TestClient, TestServer from framework.agent_loop.internals.types import LoopConfig -from framework.host.event_bus import EventBus from framework.server.app import create_app from framework.server.session_manager import Session, _queen_session_dir - # Modules that import HIVE_HOME / QUEENS_DIR / COLONIES_DIR / MEMORIES_DIR / # HIVE_CONFIG_FILE at import time and need their bindings rewritten when we # redirect ~/.hive to a temp directory. Patching Path.home alone is NOT @@ -327,9 +325,7 @@ async def test_colony_spawn_creates_correct_artifacts(tmp_path, monkeypatch): # ── duplicated queen session dir ────────────────────────────── dest_queen_dir = _queen_session_dir(colony_session_id, queen_name) - assert dest_queen_dir.is_dir(), ( - f"Forked session dir not under {queen_name}/, got {dest_queen_dir}" - ) + assert dest_queen_dir.is_dir(), f"Forked session dir not under {queen_name}/, got {dest_queen_dir}" # Conversations were copied assert (dest_queen_dir / "conversations" / "parts" / "0000000000.json").is_file() @@ -344,9 +340,7 @@ async def test_colony_spawn_creates_correct_artifacts(tmp_path, monkeypatch): assert dest_meta["agent_name"] == "Honeycomb" # ── worker storage receives queen conversations ─────────────── - worker_storage_convs = ( - tmp_path / ".hive" / "agents" / "honeycomb" / "worker" / "conversations" - ) + worker_storage_convs = tmp_path / ".hive" / "agents" / "honeycomb" / "worker" / "conversations" assert worker_storage_convs.is_dir() assert (worker_storage_convs / "parts" / "0000000000.json").is_file() @@ -363,9 +357,7 @@ async def test_colony_spawn_creates_correct_artifacts(tmp_path, monkeypatch): @pytest.mark.asyncio -async def test_create_session_with_worker_colony_uses_forked_session_id( - tmp_path, monkeypatch -): +async def test_create_session_with_worker_colony_uses_forked_session_id(tmp_path, monkeypatch): """When a colony is loaded, its metadata.json's queen_session_id wins. Regression: returning to a colony was loading the SOURCE queen session @@ -439,9 +431,7 @@ async def test_create_session_with_worker_colony_uses_forked_session_id( monkeypatch.setattr(SessionManager, "_load_worker_core", fake_load_worker_core) monkeypatch.setattr(SessionManager, "_start_queen", fake_start_queen) - monkeypatch.setattr( - SessionManager, "_restore_active_triggers", fake_restore_active_triggers - ) + monkeypatch.setattr(SessionManager, "_restore_active_triggers", fake_restore_active_triggers) # Caller passes the SOURCE session id (mimicking the frontend's history scan) session = await manager.create_session_with_worker_colony( @@ -453,7 +443,5 @@ async def test_create_session_with_worker_colony_uses_forked_session_id( assert captured["queen_resume_from"] == forked_id, ( f"Expected forked id {forked_id}, got {captured['queen_resume_from']}" ) - assert session.id == forked_id, ( - f"Live session ID should match forked session, got {session.id}" - ) + assert session.id == forked_id, f"Live session ID should match forked session, got {session.id}" assert captured["queen_name"] == queen_name diff --git a/core/tests/test_colony_fork_live.py b/core/tests/test_colony_fork_live.py index 74f16ece..64b9de36 100644 --- a/core/tests/test_colony_fork_live.py +++ b/core/tests/test_colony_fork_live.py @@ -19,14 +19,12 @@ import importlib import json import os import shutil -import sys import time from pathlib import Path import pytest from aiohttp.test_utils import TestClient, TestServer - # --------------------------------------------------------------------------- # Skip if no live LLM credentials are available # --------------------------------------------------------------------------- @@ -202,9 +200,7 @@ async def test_live_queen_fork_to_colony(isolated_hive_home): # ── 2. Wait for queen identity hook to fire ──────────────── queen_name = await _wait_for_queen_identity(client, session_id) - assert queen_name != "default", ( - f"Identity hook didn't pick a real queen, got {queen_name!r}" - ) + assert queen_name != "default", f"Identity hook didn't pick a real queen, got {queen_name!r}" # ── 3. Fork to a colony ──────────────────────────────────── colony_name = "live_test_honeycomb" @@ -249,10 +245,7 @@ async def test_live_queen_fork_to_colony(isolated_hive_home): # ── 5. Validate the forked queen session dir ────────────── # It must live under the SELECTED queen identity, not "default". dest_queen_dir = _queen_session_dir(colony_session_id, queen_name) - assert dest_queen_dir.is_dir(), ( - f"Forked session dir not under {queen_name}/, expected " - f"{dest_queen_dir}" - ) + assert dest_queen_dir.is_dir(), f"Forked session dir not under {queen_name}/, expected {dest_queen_dir}" # Conversations from the original queen session were copied assert (dest_queen_dir / "conversations").is_dir() @@ -267,22 +260,16 @@ async def test_live_queen_fork_to_colony(isolated_hive_home): cold = SessionManager.list_cold_sessions() forked_in_history = [s for s in cold if s.get("session_id") == colony_session_id] - assert not forked_in_history, ( - f"Forked colony session leaked into queen DM history: {forked_in_history}" - ) + assert not forked_in_history, f"Forked colony session leaked into queen DM history: {forked_in_history}" # ── 7. Worker storage received the conversations ────────── - worker_storage_convs = ( - isolated_hive_home / "agents" / colony_name / "worker" / "conversations" - ) + worker_storage_convs = isolated_hive_home / "agents" / colony_name / "worker" / "conversations" assert worker_storage_convs.is_dir() # The queen has had at least one turn (the initial_prompt acknowledgment), # so there should be conversation parts. parts_dir = worker_storage_convs / "parts" if parts_dir.exists(): - assert any(parts_dir.iterdir()), ( - "worker storage has conversations dir but no parts" - ) + assert any(parts_dir.iterdir()), "worker storage has conversations dir but no parts" # ── 8. Stop the live session cleanly ────────────────────── resp = await client.delete(f"/api/sessions/{session_id}") diff --git a/core/tests/test_colony_runtime_overseer.py b/core/tests/test_colony_runtime_overseer.py index e6599345..1f5e65fa 100644 --- a/core/tests/test_colony_runtime_overseer.py +++ b/core/tests/test_colony_runtime_overseer.py @@ -10,7 +10,6 @@ a real on-disk ``tmp_path``. No HTTP layer, no real LLM. from __future__ import annotations import asyncio -import json from collections.abc import AsyncIterator from pathlib import Path from typing import Any @@ -20,7 +19,6 @@ import pytest from framework.agent_loop.types import AgentSpec from framework.host.colony_runtime import ColonyRuntime from framework.host.event_bus import AgentEvent, EventBus, EventType -from framework.host.worker import Worker, WorkerStatus from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse from framework.llm.stream_events import ( FinishEvent, @@ -29,7 +27,6 @@ from framework.llm.stream_events import ( ) from framework.schemas.goal import Goal - # --------------------------------------------------------------------------- # Mock LLM # --------------------------------------------------------------------------- @@ -200,9 +197,7 @@ class TestColonyRuntimeGoalProperty: class TestStartOverseer: @pytest.mark.asyncio - async def test_start_overseer_creates_persistent_worker( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_start_overseer_creates_persistent_worker(self, tmp_path, agent_spec, goal, event_bus): """Overseer must be a persistent Worker tagged stream_id='overseer'.""" llm = MockStreamingLLM(scenarios=[_text_scenario("idle")]) colony = await _make_colony(tmp_path, llm, agent_spec, goal, event_bus) @@ -232,9 +227,7 @@ class TestStartOverseer: class TestReportToParent: @pytest.mark.asyncio - async def test_worker_report_emits_subagent_report_event( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_worker_report_emits_subagent_report_event(self, tmp_path, agent_spec, goal, event_bus): """A worker calling report_to_parent emits SUBAGENT_REPORT with structured data.""" llm = MockStreamingLLM( scenarios=[ @@ -264,9 +257,7 @@ class TestReportToParent: ) try: - worker_ids = await colony.spawn( - task="Fetch 5 rows from honeycomb", count=1 - ) + worker_ids = await colony.spawn(task="Fetch 5 rows from honeycomb", count=1) assert len(worker_ids) == 1 worker = colony.get_worker(worker_ids[0]) assert worker is not None @@ -293,9 +284,7 @@ class TestReportToParent: await colony.stop() @pytest.mark.asyncio - async def test_worker_crash_emits_synthesised_failed_report( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_worker_crash_emits_synthesised_failed_report(self, tmp_path, agent_spec, goal, event_bus): """Worker whose AgentLoop raises must still emit SUBAGENT_REPORT. The overseer would otherwise hang waiting for a report from a @@ -343,9 +332,7 @@ class TestReportToParent: class TestSpawnBatchAndWaitForReports: @pytest.mark.asyncio - async def test_spawn_batch_returns_one_id_per_task( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_spawn_batch_returns_one_id_per_task(self, tmp_path, agent_spec, goal, event_bus): llm = MockStreamingLLM( by_task={ "Fetch batch 1": _report_scenario("success", "batch 1 done"), @@ -370,21 +357,13 @@ class TestSpawnBatchAndWaitForReports: await colony.stop() @pytest.mark.asyncio - async def test_wait_for_worker_reports_collects_all( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_wait_for_worker_reports_collects_all(self, tmp_path, agent_spec, goal, event_bus): """Fan out 3 workers, wait for reports, verify structured list.""" llm = MockStreamingLLM( by_task={ - "batch 1": _report_scenario( - "success", "w1 done", {"batch": 1, "rows": 10} - ), - "batch 2": _report_scenario( - "success", "w2 done", {"batch": 2, "rows": 15} - ), - "batch 3": _report_scenario( - "failed", "w3 broke", {"batch": 3, "error_code": 503} - ), + "batch 1": _report_scenario("success", "w1 done", {"batch": 1, "rows": 10}), + "batch 2": _report_scenario("success", "w2 done", {"batch": 2, "rows": 15}), + "batch 3": _report_scenario("failed", "w3 broke", {"batch": 3, "error_code": 503}), } ) colony = await _make_colony(tmp_path, llm, agent_spec, goal, event_bus) @@ -413,9 +392,7 @@ class TestSpawnBatchAndWaitForReports: await colony.stop() @pytest.mark.asyncio - async def test_wait_for_worker_reports_returns_in_input_order( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_wait_for_worker_reports_returns_in_input_order(self, tmp_path, agent_spec, goal, event_bus): """Reports must be returned in the same order as the input worker_ids.""" llm = MockStreamingLLM( by_task={ @@ -426,9 +403,7 @@ class TestSpawnBatchAndWaitForReports: ) colony = await _make_colony(tmp_path, llm, agent_spec, goal, event_bus) try: - ids = await colony.spawn_batch( - tasks=[{"task": "task-A"}, {"task": "task-B"}, {"task": "task-C"}] - ) + ids = await colony.spawn_batch(tasks=[{"task": "task-A"}, {"task": "task-B"}, {"task": "task-C"}]) reports = await colony.wait_for_worker_reports(ids, timeout=10.0) assert [r["worker_id"] for r in reports] == ids assert [r["summary"] for r in reports] == ["A", "B", "C"] @@ -436,16 +411,12 @@ class TestSpawnBatchAndWaitForReports: await colony.stop() @pytest.mark.asyncio - async def test_wait_for_worker_reports_missing_id( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_wait_for_worker_reports_missing_id(self, tmp_path, agent_spec, goal, event_bus): """Unknown worker_id is reported as failed, not crash.""" llm = MockStreamingLLM(scenarios=[_text_scenario("noop")]) colony = await _make_colony(tmp_path, llm, agent_spec, goal, event_bus) try: - reports = await colony.wait_for_worker_reports( - ["nonexistent_worker"], timeout=1.0 - ) + reports = await colony.wait_for_worker_reports(["nonexistent_worker"], timeout=1.0) assert len(reports) == 1 assert reports[0]["worker_id"] == "nonexistent_worker" assert reports[0]["status"] == "failed" @@ -456,9 +427,7 @@ class TestSpawnBatchAndWaitForReports: class TestSeedConversation: @pytest.mark.asyncio - async def test_seed_conversation_writes_parts_to_storage( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_seed_conversation_writes_parts_to_storage(self, tmp_path, agent_spec, goal, event_bus): """seed_conversation must write message parts to disk so the AgentLoop's NodeConversation picks them up when the overseer initialises.""" @@ -492,9 +461,7 @@ class TestSeedConversation: class TestReportToParentGatingByStream: @pytest.mark.asyncio - async def test_report_to_parent_only_for_worker_streams( - self, tmp_path, agent_spec, goal, event_bus - ): + async def test_report_to_parent_only_for_worker_streams(self, tmp_path, agent_spec, goal, event_bus): """report_to_parent tool should only be in the worker's tool list, not the overseer's.""" llm = MockStreamingLLM(scenarios=[_text_scenario("ok")]) diff --git a/core/tests/test_context_handoff.py b/core/tests/test_context_handoff.py index 0fe4870a..afcf0a14 100644 --- a/core/tests/test_context_handoff.py +++ b/core/tests/test_context_handoff.py @@ -6,10 +6,10 @@ from typing import Any import pytest -from framework.orchestrator.context_handoff import ContextHandoff, HandoffContext from framework.agent_loop.conversation import NodeConversation from framework.llm.mock import MockLLMProvider from framework.llm.provider import LLMProvider, LLMResponse +from framework.orchestrator.context_handoff import ContextHandoff, HandoffContext # --------------------------------------------------------------------------- # Helpers diff --git a/core/tests/test_create_colony_tool.py b/core/tests/test_create_colony_tool.py index 927ab725..d2c8b28f 100644 --- a/core/tests/test_create_colony_tool.py +++ b/core/tests/test_create_colony_tool.py @@ -29,7 +29,6 @@ from framework.llm.provider import ToolUse from framework.loader.tool_registry import ToolRegistry from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools - # --------------------------------------------------------------------------- # Fixtures + helpers # --------------------------------------------------------------------------- @@ -55,9 +54,7 @@ def _make_executor(): async def _call(executor, **inputs) -> dict: - result = executor( - ToolUse(id="tu_create_colony", name="create_colony", input=inputs) - ) + result = executor(ToolUse(id="tu_create_colony", name="create_colony", input=inputs)) if asyncio.iscoroutine(result): result = await result return json.loads(result.content) @@ -104,11 +101,7 @@ def _write_skill( skill_dir.mkdir(parents=True, exist_ok=True) skill_md = skill_dir / "SKILL.md" skill_md.write_text( - "---\n" - f"name: {fm_name}\n" - f'description: "{description}"\n' - "---\n\n" - f"{body}", + f'---\nname: {fm_name}\ndescription: "{description}"\n---\n\n{body}', encoding="utf-8", ) return skill_dir @@ -138,14 +131,10 @@ async def test_happy_path_emits_colony_created_event( handler=_on_colony_created, ) - skill_src = _write_skill( - tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill" - ) + skill_src = _write_skill(tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill") skill_src.parent.mkdir(parents=True, exist_ok=True) # Re-create after parent mkdir - skill_src = _write_skill( - tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill" - ) + skill_src = _write_skill(tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill") payload = await _call( executor, @@ -218,9 +207,7 @@ async def test_happy_path_external_folder_is_copied_into_skills_root( @pytest.mark.asyncio -async def test_happy_path_in_place_authored_skill( - patched_home: Path, patched_fork: list[dict] -) -> None: +async def test_happy_path_in_place_authored_skill(patched_home: Path, patched_fork: list[dict]) -> None: """Skill authored directly at ~/.hive/skills/{name}/ is accepted in-place.""" executor, _ = _make_executor() @@ -267,9 +254,7 @@ async def test_missing_skill_path_rejected(patched_home, patched_fork) -> None: @pytest.mark.asyncio -async def test_skill_path_is_file_not_directory_rejected( - tmp_path, patched_home, patched_fork -) -> None: +async def test_skill_path_is_file_not_directory_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() bogus = tmp_path / "not-a-dir.md" bogus.write_text("hi", encoding="utf-8") @@ -285,9 +270,7 @@ async def test_skill_path_is_file_not_directory_rejected( @pytest.mark.asyncio -async def test_skill_missing_skill_md_rejected( - tmp_path, patched_home, patched_fork -) -> None: +async def test_skill_missing_skill_md_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() folder = tmp_path / "no-skill-md" folder.mkdir() @@ -303,15 +286,11 @@ async def test_skill_missing_skill_md_rejected( @pytest.mark.asyncio -async def test_skill_md_missing_frontmatter_marker_rejected( - tmp_path, patched_home, patched_fork -) -> None: +async def test_skill_md_missing_frontmatter_marker_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() folder = tmp_path / "broken-fm" folder.mkdir() - (folder / "SKILL.md").write_text( - "no frontmatter here, just body\n", encoding="utf-8" - ) + (folder / "SKILL.md").write_text("no frontmatter here, just body\n", encoding="utf-8") payload = await _call( executor, colony_name="ok_name", @@ -324,9 +303,7 @@ async def test_skill_md_missing_frontmatter_marker_rejected( @pytest.mark.asyncio -async def test_skill_md_missing_description_rejected( - tmp_path, patched_home, patched_fork -) -> None: +async def test_skill_md_missing_description_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() folder = tmp_path / "no-description" folder.mkdir() @@ -346,9 +323,7 @@ async def test_skill_md_missing_description_rejected( @pytest.mark.asyncio -async def test_directory_name_mismatch_with_frontmatter_rejected( - tmp_path, patched_home, patched_fork -) -> None: +async def test_directory_name_mismatch_with_frontmatter_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() folder = tmp_path / "wrong-dir-name" folder.mkdir() @@ -370,9 +345,7 @@ async def test_directory_name_mismatch_with_frontmatter_rejected( @pytest.mark.asyncio async def test_invalid_colony_name_rejected(tmp_path, patched_home, patched_fork) -> None: executor, _ = _make_executor() - skill_src = _write_skill( - tmp_path, dir_name="valid-skill", fm_name="valid-skill" - ) + skill_src = _write_skill(tmp_path, dir_name="valid-skill", fm_name="valid-skill") payload = await _call( executor, colony_name="NotValid-Colony", @@ -385,9 +358,7 @@ async def test_invalid_colony_name_rejected(tmp_path, patched_home, patched_fork @pytest.mark.asyncio -async def test_fork_failure_keeps_installed_skill( - tmp_path, patched_home, monkeypatch -) -> None: +async def test_fork_failure_keeps_installed_skill(tmp_path, patched_home, monkeypatch) -> None: """If the fork raises, the installed skill stays under ~/.hive/skills/.""" async def _failing_fork(**kwargs): @@ -399,9 +370,7 @@ async def test_fork_failure_keeps_installed_skill( ) executor, _ = _make_executor() - skill_src = _write_skill( - tmp_path, dir_name="durable-skill", fm_name="durable-skill" - ) + skill_src = _write_skill(tmp_path, dir_name="durable-skill", fm_name="durable-skill") payload = await _call( executor, diff --git a/core/tests/test_default_skills.py b/core/tests/test_default_skills.py index 272e6f02..69673e12 100644 --- a/core/tests/test_default_skills.py +++ b/core/tests/test_default_skills.py @@ -13,9 +13,7 @@ from framework.skills.defaults import ( ) from framework.skills.parser import parse_skill_md -_DEFAULT_SKILLS_DIR = ( - Path(__file__).resolve().parent.parent / "framework" / "skills" / "_default_skills" -) +_DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "skills" / "_default_skills" class TestDefaultSkillFiles: @@ -94,9 +92,7 @@ class TestDefaultSkillManager: assert manager.active_skill_names == [] def test_disable_single_skill(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.quality-monitor": {"enabled": False}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.quality-monitor": {"enabled": False}}) manager = DefaultSkillManager(config) manager.load() @@ -138,9 +134,7 @@ class TestSkillsConfig: assert config.is_default_enabled("hive.note-taking") is True def test_explicit_disable(self): - config = SkillsConfig( - default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)} - ) + config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)}) assert config.is_default_enabled("hive.note-taking") is False assert config.is_default_enabled("hive.batch-ledger") is True @@ -199,9 +193,7 @@ class TestConfigOverrideSubstitution: assert "Every 5 iterations" in prompt def test_quality_monitor_override_interval(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.quality-monitor": {"assessment_interval": 10}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.quality-monitor": {"assessment_interval": 10}}) manager = DefaultSkillManager(config) manager.load() prompt = manager.build_protocols_prompt() @@ -215,9 +207,7 @@ class TestConfigOverrideSubstitution: assert "3+ times" in prompt def test_error_recovery_override_retries(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.error-recovery": {"max_retries_per_tool": 5}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.error-recovery": {"max_retries_per_tool": 5}}) manager = DefaultSkillManager(config) manager.load() prompt = manager.build_protocols_prompt() @@ -273,17 +263,13 @@ class TestBatchAutoDetection: assert "_batch_ledger" in manager.batch_init_nudge def test_batch_nudge_none_when_skill_disabled(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.batch-ledger": {"enabled": False}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}}) manager = DefaultSkillManager(config) manager.load() assert manager.batch_init_nudge is None def test_batch_nudge_none_when_auto_detect_disabled(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.batch-ledger": {"auto_detect_batch": False}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}}) manager = DefaultSkillManager(config) manager.load() assert manager.batch_init_nudge is None @@ -306,9 +292,7 @@ class TestContextWarnRatio: assert manager.context_warn_ratio == pytest.approx(0.3) def test_ratio_none_when_skill_disabled(self): - config = SkillsConfig.from_agent_vars( - default_skills={"hive.context-preservation": {"enabled": False}} - ) + config = SkillsConfig.from_agent_vars(default_skills={"hive.context-preservation": {"enabled": False}}) manager = DefaultSkillManager(config) manager.load() assert manager.context_warn_ratio is None diff --git a/core/tests/test_event_bus.py b/core/tests/test_event_bus.py index 6de47af0..e3d2963b 100644 --- a/core/tests/test_event_bus.py +++ b/core/tests/test_event_bus.py @@ -274,12 +274,8 @@ class TestEventFiltering: filter_node="node_x", ) - await bus.publish( - AgentEvent(type=EventType.NODE_LOOP_STARTED, stream_id="s", node_id="node_x") - ) - await bus.publish( - AgentEvent(type=EventType.NODE_LOOP_STARTED, stream_id="s", node_id="node_y") - ) + await bus.publish(AgentEvent(type=EventType.NODE_LOOP_STARTED, stream_id="s", node_id="node_x")) + await bus.publish(AgentEvent(type=EventType.NODE_LOOP_STARTED, stream_id="s", node_id="node_y")) assert received == ["node_x"] @@ -298,12 +294,8 @@ class TestEventFiltering: filter_execution="exec_1", ) - await bus.publish( - AgentEvent(type=EventType.EXECUTION_COMPLETED, stream_id="s", execution_id="exec_1") - ) - await bus.publish( - AgentEvent(type=EventType.EXECUTION_COMPLETED, stream_id="s", execution_id="exec_2") - ) + await bus.publish(AgentEvent(type=EventType.EXECUTION_COMPLETED, stream_id="s", execution_id="exec_1")) + await bus.publish(AgentEvent(type=EventType.EXECUTION_COMPLETED, stream_id="s", execution_id="exec_2")) assert received == ["exec_1"] @@ -365,12 +357,8 @@ class TestEventFiltering: filter_colony="colony_a", ) - await bus.publish( - AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_a") - ) - await bus.publish( - AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_b") - ) + await bus.publish(AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_a")) + await bus.publish(AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_b")) assert received == ["colony_a"] @@ -856,9 +844,7 @@ class TestConveniencePublishers: assert len(received) == 1 assert received[0].type == EventType.NODE_ACTION_PLAN - assert ( - received[0].data["plan"] == "1. Search for data\n2. Analyze results\n3. Generate report" - ) + assert received[0].data["plan"] == "1. Search for data\n2. Analyze results\n3. Generate report" @pytest.mark.asyncio async def test_emit_subagent_report(self): diff --git a/core/tests/test_event_loop_node.py b/core/tests/test_event_loop_node.py index 0c796516..ce4dc2e5 100644 --- a/core/tests/test_event_loop_node.py +++ b/core/tests/test_event_loop_node.py @@ -13,8 +13,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from framework.agent_loop.agent_loop import AgentLoop as EventLoopNode -from framework.agent_loop.agent_loop import OutputAccumulator +from framework.agent_loop.agent_loop import AgentLoop as EventLoopNode, OutputAccumulator from framework.agent_loop.conversation import NodeConversation from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, LoopConfig from framework.host.event_bus import EventBus, EventType @@ -25,8 +24,7 @@ from framework.llm.stream_events import ( TextDeltaEvent, ToolCallEvent, ) -from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec -from framework.server.session_manager import Session, SessionManager +from framework.orchestrator.node import DataBuffer, NodeContext, NodeSpec from framework.storage.conversation_store import FileConversationStore from framework.tracker.decision_tracker import DecisionTracker as Runtime @@ -75,9 +73,7 @@ def text_scenario(text: str, input_tokens: int = 10, output_tokens: int = 5) -> """Build a stream scenario that produces text and finishes.""" return [ TextDeltaEvent(content=text, snapshot=text), - FinishEvent( - stop_reason="stop", input_tokens=input_tokens, output_tokens=output_tokens, model="mock" - ), + FinishEvent(stop_reason="stop", input_tokens=input_tokens, output_tokens=output_tokens, model="mock"), ] @@ -91,12 +87,8 @@ def tool_call_scenario( events = [] if text: events.append(TextDeltaEvent(content=text, snapshot=text)) - events.append( - ToolCallEvent(tool_use_id=tool_use_id, tool_name=tool_name, tool_input=tool_input) - ) - events.append( - FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock") - ) + events.append(ToolCallEvent(tool_use_id=tool_use_id, tool_name=tool_name, tool_input=tool_input)) + events.append(FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock")) return events @@ -263,9 +255,7 @@ class TestJudgeIntegration: llm = MockStreamingLLM(scenarios=[text_scenario("Attempt")]) judge = AsyncMock(spec=JudgeProtocol) - judge.evaluate = AsyncMock( - return_value=JudgeVerdict(action="ESCALATE", feedback="Tone violation") - ) + judge.evaluate = AsyncMock(return_value=JudgeVerdict(action="ESCALATE", feedback="Tone violation")) ctx = build_ctx(runtime, node_spec, buffer, llm) node = EventLoopNode(judge=judge, config=LoopConfig(max_iterations=5)) @@ -528,13 +518,9 @@ class TestQueenInteractionBlocking: llm = MockStreamingLLM( scenarios=[ [ - ToolCallEvent( - tool_use_id="tool_1", tool_name="search", tool_input={"q": "test"} - ), + ToolCallEvent(tool_use_id="tool_1", tool_name="search", tool_input={"q": "test"}), ToolCallEvent(tool_use_id="ask_1", tool_name="ask_user", tool_input={}), - FinishEvent( - stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock" - ), + FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock"), ], text_scenario("Done"), ] @@ -568,9 +554,7 @@ class TestQueenInteractionBlocking: assert llm._call_index >= 2 @pytest.mark.asyncio - async def test_ask_user_not_available_for_workers_even_with_legacy_client_facing( - self, runtime, buffer - ): + async def test_ask_user_not_available_for_workers_even_with_legacy_client_facing(self, runtime, buffer): """Workers should not receive ask_user even if legacy client_facing=True is set.""" spec = NodeSpec( id="internal", @@ -707,6 +691,7 @@ class TestWriteThroughPersistence: parts = await store.read_parts() assert len(parts) >= 2 # at least initial user msg + assistant msg + class TestCrashRecovery: @pytest.mark.asyncio async def test_restore_from_checkpoint(self, tmp_path, runtime, node_spec, buffer): @@ -750,9 +735,7 @@ class TestCrashRecovery: assert result.output.get("result") == "partial_value" @pytest.mark.asyncio - async def test_restore_reblocks_pending_user_input_instead_of_continuing( - self, tmp_path, runtime, buffer - ): + async def test_restore_reblocks_pending_user_input_instead_of_continuing(self, tmp_path, runtime, buffer): """A restored queen wait should re-emit the question, not self-continue.""" store = FileConversationStore(tmp_path / "conv") conv = NodeConversation( @@ -836,9 +819,7 @@ class TestCrashRecovery: "live manual flow. Unskip once the legacy restore is fixed." ) ) - async def test_restore_legacy_unphased_assistant_message_preserves_store( - self, tmp_path, runtime, buffer - ): + async def test_restore_legacy_unphased_assistant_message_preserves_store(self, tmp_path, runtime, buffer): """Legacy queen stores without phase_id should resume instead of being cleared. The queen node uses skip_judge=True (forever-alive conversational @@ -904,9 +885,7 @@ class TestCrashRecovery: assert len(llm.stream_calls) == 1 assert [m["role"] for m in llm.stream_calls[0]["messages"]] == ["assistant", "user"] assert llm.stream_calls[0]["messages"][0]["content"] == "[Error: previous turn failed.]" - assert llm.stream_calls[0]["messages"][1]["content"] == ( - "[Continue working on your current task.]" - ) + assert llm.stream_calls[0]["messages"][1]["content"] == ("[Continue working on your current task.]") restored = await NodeConversation.restore(store, phase_id="queen") assert restored is not None @@ -1257,6 +1236,7 @@ class TestTransientErrorRetry: assert len(retry_events) == 1 assert retry_events[0].data["retry_count"] == 1 + class TestIsTransientError: """Unit tests for _is_transient_error() classification.""" @@ -1314,16 +1294,12 @@ class TestFingerprintToolCalls: {"tool_name": "fetch", "tool_input": {"url": "b"}}, {"tool_name": "search", "tool_input": {"q": "a"}}, ] - assert EventLoopNode._fingerprint_tool_calls(r1) != ( - EventLoopNode._fingerprint_tool_calls(r2) - ) + assert EventLoopNode._fingerprint_tool_calls(r1) != (EventLoopNode._fingerprint_tool_calls(r2)) def test_sort_keys_deterministic(self): r1 = [{"tool_name": "t", "tool_input": {"b": 2, "a": 1}}] r2 = [{"tool_name": "t", "tool_input": {"a": 1, "b": 2}}] - assert EventLoopNode._fingerprint_tool_calls(r1) == EventLoopNode._fingerprint_tool_calls( - r2 - ) + assert EventLoopNode._fingerprint_tool_calls(r1) == EventLoopNode._fingerprint_tool_calls(r2) class TestIsToolDoomLoop: @@ -1851,12 +1827,8 @@ def _multi_tool_scenario(*calls: tuple[str, dict, str]) -> list: """ events: list = [] for name, inp, uid in calls: - events.append( - ToolCallEvent(tool_use_id=uid, tool_name=name, tool_input=inp) - ) - events.append( - FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock") - ) + events.append(ToolCallEvent(tool_use_id=uid, tool_name=name, tool_input=inp)) + events.append(FinishEvent(stop_reason="tool_calls", input_tokens=10, output_tokens=5, model="mock")) return events @@ -1864,9 +1836,7 @@ class TestToolConcurrencyPartition: """Gap 5: safe tools run in parallel, unsafe tools serialize after them.""" @pytest.mark.asyncio - async def test_safe_tools_overlap_unsafe_tools_do_not( - self, runtime, node_spec, buffer - ): + async def test_safe_tools_overlap_unsafe_tools_do_not(self, runtime, node_spec, buffer): """A turn with (safe, safe, unsafe) schedules safes in parallel and runs unsafe strictly after both safes have started.""" scenario = _multi_tool_scenario( @@ -1935,9 +1905,7 @@ class TestToolConcurrencyPartition: assert starts["call_3"] >= ends["call_2"] @pytest.mark.asyncio - async def test_serial_exception_cascades_cancel_siblings( - self, runtime, node_spec, buffer - ): + async def test_serial_exception_cascades_cancel_siblings(self, runtime, node_spec, buffer): """When an unsafe tool raises, the remaining unsafe siblings are cancelled with a clear error rather than silently executed.""" scenario = _multi_tool_scenario( @@ -1981,9 +1949,7 @@ class TestToolConcurrencyPartition: assert executed == ["call_1"] @pytest.mark.asyncio - async def test_safe_tool_starts_before_finish_event( - self, runtime, node_spec, buffer - ): + async def test_safe_tool_starts_before_finish_event(self, runtime, node_spec, buffer): """Gap 1: a concurrency-safe tool must start executing while the stream is still in flight, not after the final FinishEvent. @@ -2086,9 +2052,7 @@ class TestToolConcurrencyPartition: assert turn_ended - turn_started >= delay @pytest.mark.asyncio - async def test_soft_error_does_not_cascade( - self, runtime, node_spec, buffer - ): + async def test_soft_error_does_not_cascade(self, runtime, node_spec, buffer): """A ToolResult with is_error=True (e.g. 'file not found') is a normal return and must NOT cancel subsequent serial siblings - the model needs to see all tool errors to decide what to do next.""" diff --git a/core/tests/test_event_type_extension.py b/core/tests/test_event_type_extension.py index 2d968c30..0411d12d 100644 --- a/core/tests/test_event_type_extension.py +++ b/core/tests/test_event_type_extension.py @@ -19,6 +19,7 @@ from typing import Any, Literal import pytest +from framework.host.event_bus import AgentEvent, EventBus, EventType, Subscription from framework.llm.stream_events import ( FinishEvent, ReasoningDeltaEvent, @@ -29,7 +30,6 @@ from framework.llm.stream_events import ( ToolCallEvent, ToolResultEvent, ) -from framework.host.event_bus import AgentEvent, EventBus, EventType, Subscription # --------------------------------------------------------------------------- diff --git a/core/tests/test_flowchart_utils.py b/core/tests/test_flowchart_utils.py index b389851c..d3ede666 100644 --- a/core/tests/test_flowchart_utils.py +++ b/core/tests/test_flowchart_utils.py @@ -49,9 +49,7 @@ def _make_edge(source, target, condition="on_success", description=""): ) -def _make_goal( - name="Test Goal", description="A test goal", success_criteria=None, constraints=None -): +def _make_goal(name="Test Goal", description="A test goal", success_criteria=None, constraints=None): """Create a minimal goal-like object matching Goal interface.""" return SimpleNamespace( name=name, @@ -127,9 +125,7 @@ class TestSynthesizeDraftFromRuntime: _make_edge("intake", "process"), _make_edge("process", "deliver"), ] - draft, fmap = synthesize_draft_from_runtime( - nodes, edges, agent_name="test_agent", goal_name="Test" - ) + draft, fmap = synthesize_draft_from_runtime(nodes, edges, agent_name="test_agent", goal_name="Test") assert draft["agent_name"] == "test_agent" assert draft["goal"] == "Test" diff --git a/core/tests/test_hallucination_detection.py b/core/tests/test_hallucination_detection.py index db1b7ed3..121b831f 100644 --- a/core/tests/test_hallucination_detection.py +++ b/core/tests/test_hallucination_detection.py @@ -121,9 +121,7 @@ class TestDataBufferHallucinationDetection: # Create a 50KB string with code at the 75% mark size = 50000 code_position = int(size * 0.75) - content = ( - "A" * code_position + "def hidden_code(): pass" + "B" * (size - code_position - 25) - ) + content = "A" * code_position + "def hidden_code(): pass" + "B" * (size - code_position - 25) with pytest.raises(DataBufferWriteError) as exc_info: buffer.write("output", content) diff --git a/core/tests/test_litellm_provider.py b/core/tests/test_litellm_provider.py index a0605803..ebee9b86 100644 --- a/core/tests/test_litellm_provider.py +++ b/core/tests/test_litellm_provider.py @@ -61,9 +61,7 @@ class TestLiteLLMProviderInit: def test_init_with_api_base(self): """Test initialization with custom API base.""" - provider = LiteLLMProvider( - model="gpt-4o-mini", api_key="my-key", api_base="https://my-proxy.com/v1" - ) + provider = LiteLLMProvider(model="gpt-4o-mini", api_key="my-key", api_base="https://my-proxy.com/v1") assert provider.api_base == "https://my-proxy.com/v1" def test_init_minimax_defaults_api_base(self): @@ -165,9 +163,7 @@ class TestLiteLLMProviderComplete: mock_completion.return_value = mock_response provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key") - provider.complete( - messages=[{"role": "user", "content": "Hello"}], system="You are a helpful assistant." - ) + provider.complete(messages=[{"role": "user", "content": "Hello"}], system="You are a helpful assistant.") call_kwargs = mock_completion.call_args[1] messages = call_kwargs["messages"] @@ -199,9 +195,7 @@ class TestLiteLLMProviderComplete: ) ] - provider.complete( - messages=[{"role": "user", "content": "What's the weather?"}], tools=tools - ) + provider.complete(messages=[{"role": "user", "content": "What's the weather?"}], tools=tools) call_kwargs = mock_completion.call_args[1] assert "tools" in call_kwargs @@ -430,9 +424,7 @@ class TestJsonMode: mock_completion.return_value = mock_response provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key") - provider.complete( - messages=[{"role": "user", "content": "Hello"}], system="You are helpful." - ) + provider.complete(messages=[{"role": "user", "content": "Hello"}], system="You are helpful.") call_kwargs = mock_completion.call_args[1] assert "response_format" not in call_kwargs @@ -662,9 +654,7 @@ class TestAsyncComplete: assert result.content == "done" # Heartbeat should have ticked multiple times during the 300ms LLM call # (if the event loop were blocked, we'd see 0-1 ticks) - assert len(heartbeat_ticks) >= 3, ( - f"Event loop was blocked — only {len(heartbeat_ticks)} heartbeat ticks" - ) + assert len(heartbeat_ticks) >= 3, f"Event loop was blocked — only {len(heartbeat_ticks)} heartbeat ticks" @pytest.mark.asyncio async def test_mock_provider_acomplete(self): @@ -709,9 +699,7 @@ class TestAsyncComplete: assert result.content == "sync done" # The sync complete() should have run on a different thread - assert call_thread_ids[0] != main_thread_id, ( - "Base acomplete() should offload sync complete() to a thread pool" - ) + assert call_thread_ids[0] != main_thread_id, "Base acomplete() should offload sync complete() to a thread pool" class TestMiniMaxStreamFallback: @@ -887,8 +875,7 @@ class TestOpenRouterToolCompatFallback: call_state["count"] += 1 if kwargs.get("stream"): raise RuntimeError( - 'OpenrouterException - {"error":{"message":"No endpoints found ' - 'that support tool use.","code":404}}' + 'OpenrouterException - {"error":{"message":"No endpoints found that support tool use.","code":404}}' ) return compat_response @@ -970,8 +957,7 @@ class TestOpenRouterToolCompatFallback: async def side_effect(*args, **kwargs): if kwargs.get("stream"): raise RuntimeError( - 'OpenrouterException - {"error":{"message":"No endpoints found ' - 'that support tool use.","code":404}}' + 'OpenrouterException - {"error":{"message":"No endpoints found that support tool use.","code":404}}' ) return compat_response @@ -997,9 +983,7 @@ class TestOpenRouterToolCompatFallback: text_events = [event for event in events if isinstance(event, TextDeltaEvent)] assert len(text_events) == 1 assert "ask_user(" not in text_events[0].snapshot - assert text_events[0].snapshot == ( - "Queen has been loaded. It's ready to assist with your planning needs." - ) + assert text_events[0].snapshot == ("Queen has been loaded. It's ready to assist with your planning needs.") finish_events = [event for event in events if isinstance(event, FinishEvent)] assert len(finish_events) == 1 @@ -1034,8 +1018,7 @@ class TestOpenRouterToolCompatFallback: async def side_effect(*args, **kwargs): if kwargs.get("stream"): raise RuntimeError( - 'OpenrouterException - {"error":{"message":"No endpoints found ' - 'that support tool use.","code":404}}' + 'OpenrouterException - {"error":{"message":"No endpoints found that support tool use.","code":404}}' ) return compat_response diff --git a/core/tests/test_litellm_streaming.py b/core/tests/test_litellm_streaming.py index ce40c326..cf9214be 100644 --- a/core/tests/test_litellm_streaming.py +++ b/core/tests/test_litellm_streaming.py @@ -285,9 +285,7 @@ class TestRealAPIToolCallStreaming: # Must have multiple tool call events tool_calls = [e for e in events if isinstance(e, ToolCallEvent)] - assert len(tool_calls) >= 2, ( - f"Expected 2+ ToolCallEvents for parallel requests, got {len(tool_calls)}" - ) + assert len(tool_calls) >= 2, f"Expected 2+ ToolCallEvents for parallel requests, got {len(tool_calls)}" # Verify tool names used tool_names = {tc.tool_name for tc in tool_calls} diff --git a/core/tests/test_llm_judge.py b/core/tests/test_llm_judge.py index 5bc097ae..a361df6e 100644 --- a/core/tests/test_llm_judge.py +++ b/core/tests/test_llm_judge.py @@ -71,9 +71,7 @@ class TestLLMJudgeWithProvider: def test_evaluate_uses_provider(self): """Test that evaluate() uses the injected provider.""" - provider = MockLLMProvider( - response_content='{"passes": true, "explanation": "Summary is accurate"}' - ) + provider = MockLLMProvider(response_content='{"passes": true, "explanation": "Summary is accurate"}') judge = LLMJudge(llm_provider=provider) result = judge.evaluate( @@ -139,51 +137,37 @@ class TestLLMJudgeResponseParsing: provider = MockLLMProvider(response_content='{"passes": true, "explanation": "OK"}') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is True assert result["explanation"] == "OK" def test_parse_json_in_markdown_code_block(self): """Test parsing JSON wrapped in markdown code block.""" - provider = MockLLMProvider( - response_content='```json\n{"passes": false, "explanation": "Failed"}\n```' - ) + provider = MockLLMProvider(response_content='```json\n{"passes": false, "explanation": "Failed"}\n```') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is False assert result["explanation"] == "Failed" def test_parse_json_in_plain_code_block(self): """Test parsing JSON wrapped in plain code block (no json label).""" - provider = MockLLMProvider( - response_content='```\n{"passes": true, "explanation": "Passed"}\n```' - ) + provider = MockLLMProvider(response_content='```\n{"passes": true, "explanation": "Passed"}\n```') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is True assert result["explanation"] == "Passed" def test_parse_response_with_whitespace(self): """Test parsing response with extra whitespace.""" - provider = MockLLMProvider( - response_content='\n {"passes": true, "explanation": "Clean"} \n' - ) + provider = MockLLMProvider(response_content='\n {"passes": true, "explanation": "Clean"} \n') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is True @@ -192,9 +176,7 @@ class TestLLMJudgeResponseParsing: provider = MockLLMProvider(response_content='{"passes": true}') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is True assert result["explanation"] == "No explanation provided" @@ -205,9 +187,7 @@ class TestLLMJudgeResponseParsing: provider = MockLLMProvider(response_content='{"passes": "yes", "explanation": "OK"}') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is True @@ -216,9 +196,7 @@ class TestLLMJudgeResponseParsing: provider = MockLLMProvider(response_content='{"explanation": "No pass key"}') judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is False @@ -231,9 +209,7 @@ class TestLLMJudgeErrorHandling: provider = MockLLMProvider(response_content="This is not JSON") judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is False assert "LLM judge error" in result["explanation"] @@ -246,9 +222,7 @@ class TestLLMJudgeErrorHandling: judge = LLMJudge(llm_provider=provider) - result = judge.evaluate( - constraint="test", source_document="doc", summary="sum", criteria="crit" - ) + result = judge.evaluate(constraint="test", source_document="doc", summary="sum", criteria="crit") assert result["passes"] is False assert "LLM judge error" in result["explanation"] @@ -277,9 +251,7 @@ class TestLLMJudgeBackwardCompatibility: # Mock the _get_client method and Anthropic response mock_client = MagicMock() mock_response = MagicMock() - mock_response.content = [ - MagicMock(text='{"passes": true, "explanation": "Anthropic response"}') - ] + mock_response.content = [MagicMock(text='{"passes": true, "explanation": "Anthropic response"}')] mock_client.messages.create.return_value = mock_response judge._get_client = MagicMock(return_value=mock_client) @@ -414,9 +386,7 @@ class TestLLMJudgeIntegrationPatterns: """Test pattern: using LLMJudge with AnthropicProvider.""" # This demonstrates the intended usage pattern without actually calling the API # Create a mock that behaves like AnthropicProvider - mock_anthropic = MockLLMProvider( - response_content='{"passes": true, "explanation": "Matches source"}' - ) + mock_anthropic = MockLLMProvider(response_content='{"passes": true, "explanation": "Matches source"}') judge = LLMJudge(llm_provider=mock_anthropic) diff --git a/core/tests/test_mcp_registry.py b/core/tests/test_mcp_registry.py index c4c2e97b..df8761bf 100644 --- a/core/tests/test_mcp_registry.py +++ b/core/tests/test_mcp_registry.py @@ -13,9 +13,7 @@ from framework.loader.mcp_registry import MCPRegistry def _write_mock_index(cache_dir: Path, servers: dict) -> None: cache_dir.mkdir(parents=True, exist_ok=True) - (cache_dir / "registry_index.json").write_text( - json.dumps({"servers": servers}), encoding="utf-8" - ) + (cache_dir / "registry_index.json").write_text(json.dumps({"servers": servers}), encoding="utf-8") def _setup_registry_with_servers(tmp_path: Path) -> MCPRegistry: @@ -502,10 +500,7 @@ def test_resolve_max_tools(tmp_path: Path): data["servers"]["github"]["manifest"]["tools"] = [{"name": "e"}] registry._write_installed(data) configs = registry.resolve_for_agent(profile="all", max_tools=3) - total = sum( - len(registry._read_installed()["servers"][c.name]["manifest"].get("tools", [])) - for c in configs - ) + total = sum(len(registry._read_installed()["servers"][c.name]["manifest"].get("tools", [])) for c in configs) assert total <= 3 and len(configs) >= 1 @@ -872,9 +867,7 @@ def test_install_version_pin_no_version_in_manifest(tmp_path: Path): base = tmp_path / "mcp_registry" registry = MCPRegistry(base_path=base) registry.initialize() - _write_mock_index( - base / "cache", {"noversion": {"name": "noversion", "transport": {"default": "stdio"}}} - ) + _write_mock_index(base / "cache", {"noversion": {"name": "noversion", "transport": {"default": "stdio"}}}) with pytest.raises(ValueError, match="no version field"): registry.install("noversion", version="1.0.0") @@ -1139,9 +1132,7 @@ def test_get_hive_version_section_aware(tmp_path: Path, monkeypatch): fake_file.touch() # Put version in [tool.*] before [project] to trigger the old bug - toml_content = ( - '[tool.something]\nversion = "9.9.9"\n\n[project]\nname = "framework"\nversion = "0.7.1"\n' - ) + toml_content = '[tool.something]\nversion = "9.9.9"\n\n[project]\nname = "framework"\nversion = "0.7.1"\n' (tmp_path / "pyproject.toml").write_text(toml_content, encoding="utf-8") monkeypatch.setattr( diff --git a/core/tests/test_mcp_registry_cli.py b/core/tests/test_mcp_registry_cli.py index bd71af22..1ba688ba 100644 --- a/core/tests/test_mcp_registry_cli.py +++ b/core/tests/test_mcp_registry_cli.py @@ -911,9 +911,7 @@ def test_main_dispatches_mcp_list_through_real_argparse(registry_home, monkeypat assert "No servers installed" in out.getvalue() -def test_main_dispatches_mcp_install_through_real_argparse( - registry_home, sample_index, monkeypatch -): +def test_main_dispatches_mcp_install_through_real_argparse(registry_home, sample_index, monkeypatch): """hive mcp install jira goes through main() -> real argparse -> cmd_mcp_install.""" from framework.loader.mcp_registry import MCPRegistry @@ -969,9 +967,7 @@ def test_info_json_includes_agent_usage(registry, sample_index, tmp_path, monkey # Create a fake agent dir with mcp_registry.json that includes jira agent_dir = tmp_path / "fake_agent" agent_dir.mkdir() - (agent_dir / "mcp_registry.json").write_text( - json.dumps({"include": ["jira"]}), encoding="utf-8" - ) + (agent_dir / "mcp_registry.json").write_text(json.dumps({"include": ["jira"]}), encoding="utf-8") # Patch _find_agents_using_server to use our fake directory monkeypatch.setattr( @@ -1064,9 +1060,7 @@ def test_health_all_servers_json(registry, sample_index, monkeypatch): # ── _find_agents_using_server with real files ──────────────────── -def test_find_agents_using_server_resolves_via_load_agent_selection( - registry_home, tmp_path, monkeypatch -): +def test_find_agents_using_server_resolves_via_load_agent_selection(registry_home, tmp_path, monkeypatch): """_find_agents_using_server exercises the real helper with patched candidate dirs.""" from framework.loader.mcp_registry import MCPRegistry @@ -1088,9 +1082,7 @@ def test_find_agents_using_server_resolves_via_load_agent_selection( } } (cache_dir / "registry_index.json").write_text(json.dumps(index), encoding="utf-8") - (cache_dir / "last_fetched").write_text( - json.dumps({"timestamp": "2099-01-01T00:00:00+00:00"}), encoding="utf-8" - ) + (cache_dir / "last_fetched").write_text(json.dumps({"timestamp": "2099-01-01T00:00:00+00:00"}), encoding="utf-8") reg.install("jira") # Create fake agent directories: one that includes jira, one that doesn't @@ -1098,14 +1090,10 @@ def test_find_agents_using_server_resolves_via_load_agent_selection( exports_dir.mkdir() agent_yes = exports_dir / "agent_with_jira" agent_yes.mkdir() - (agent_yes / "mcp_registry.json").write_text( - json.dumps({"include": ["jira"]}), encoding="utf-8" - ) + (agent_yes / "mcp_registry.json").write_text(json.dumps({"include": ["jira"]}), encoding="utf-8") agent_no = exports_dir / "agent_without_jira" agent_no.mkdir() - (agent_no / "mcp_registry.json").write_text( - json.dumps({"include": ["slack"]}), encoding="utf-8" - ) + (agent_no / "mcp_registry.json").write_text(json.dumps({"include": ["slack"]}), encoding="utf-8") # Patch the path resolution so the helper scans our tmp_path dirs import framework.loader.mcp_registry_cli as cli_mod @@ -1185,9 +1173,7 @@ def test_integration_real_registry_install_list_info_remove(tmp_path, monkeypatc } } (cache_dir / "registry_index.json").write_text(json.dumps(index), encoding="utf-8") - (cache_dir / "last_fetched").write_text( - json.dumps({"timestamp": "2099-01-01T00:00:00+00:00"}), encoding="utf-8" - ) + (cache_dir / "last_fetched").write_text(json.dumps({"timestamp": "2099-01-01T00:00:00+00:00"}), encoding="utf-8") # Security notice sentinel so install doesn't prompt (registry_base / ".security_notice_shown").touch() @@ -1316,9 +1302,7 @@ def test_security_notice_not_persisted_on_failed_install(registry, registry_home @pytest.mark.usefixtures("_patch_get_registry") -def test_security_notice_persisted_on_successful_install( - registry, registry_home, sample_index, monkeypatch -): +def test_security_notice_persisted_on_successful_install(registry, registry_home, sample_index, monkeypatch): """Sentinel must be written after a successful install.""" monkeypatch.setattr("builtins.input", lambda prompt: "") sentinel = registry_home / ".security_notice_shown" diff --git a/core/tests/test_mcp_registry_loader.py b/core/tests/test_mcp_registry_loader.py index 0e8522d0..e7798ce7 100644 --- a/core/tests/test_mcp_registry_loader.py +++ b/core/tests/test_mcp_registry_loader.py @@ -97,9 +97,7 @@ def test_registry_precedence_over_existing_mcp_servers(monkeypatch): ] registry = ToolRegistry() - registry.register_mcp_server( - {"name": "pre", "transport": "stdio", "command": "fake", "args": [], "cwd": None} - ) + registry.register_mcp_server({"name": "pre", "transport": "stdio", "command": "fake", "args": [], "cwd": None}) registry.load_registry_servers( resolved_servers, diff --git a/core/tests/test_node_conversation.py b/core/tests/test_node_conversation.py index e6c1811c..f43e80c0 100644 --- a/core/tests/test_node_conversation.py +++ b/core/tests/test_node_conversation.py @@ -959,9 +959,7 @@ class TestConversationIntegration: } ], ) - await conv.add_tool_result( - "call_calc", "ZeroDivisionError: division by zero", is_error=True - ) + await conv.add_tool_result("call_calc", "ZeroDivisionError: division by zero", is_error=True) await conv.add_assistant_message("The calculation failed: division by zero is undefined.") # Restore @@ -1113,9 +1111,7 @@ async def _build_tool_heavy_conversation( # set_output call — must be protected so_tc = [_make_tool_call("call_so", "set_output", {"key": "result", "value": "done"})] - conv._messages.append( - Message(seq=conv._next_seq, role="assistant", content="Setting output", tool_calls=so_tc) - ) + conv._messages.append(Message(seq=conv._next_seq, role="assistant", content="Setting output", tool_calls=so_tc)) if store: await store.write_part(conv._next_seq, conv._messages[-1].to_storage_dict()) conv._next_seq += 1 @@ -1203,20 +1199,14 @@ class TestAggressiveStructuralCompaction: # Regular tool call tc1 = [_make_tool_call("call_ok", "web_search", {"query": "test"})] - conv._messages.append( - Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc1) - ) + conv._messages.append(Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc1)) conv._next_seq += 1 - conv._messages.append( - Message(seq=conv._next_seq, role="tool", content="results", tool_use_id="call_ok") - ) + conv._messages.append(Message(seq=conv._next_seq, role="tool", content="results", tool_use_id="call_ok")) conv._next_seq += 1 # Error tool call tc2 = [_make_tool_call("call_err", "web_scrape", {"url": "http://broken.com"})] - conv._messages.append( - Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc2) - ) + conv._messages.append(Message(seq=conv._next_seq, role="assistant", content="", tool_calls=tc2)) conv._next_seq += 1 conv._messages.append( Message( diff --git a/core/tests/test_phase_compaction.py b/core/tests/test_phase_compaction.py index a7459ea9..9afe2faa 100644 --- a/core/tests/test_phase_compaction.py +++ b/core/tests/test_phase_compaction.py @@ -194,9 +194,7 @@ class TestPhaseAwareCompaction: conv.set_current_phase("research") await conv.add_assistant_message( "tool call", - tool_calls=[ - {"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c1", "old_data " * 5000) @@ -204,9 +202,7 @@ class TestPhaseAwareCompaction: conv.set_current_phase("report") await conv.add_assistant_message( "tool call", - tool_calls=[ - {"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c2", "current_data " * 5000) @@ -231,17 +227,13 @@ class TestPhaseAwareCompaction: # No phase set — messages have phase_id=None await conv.add_assistant_message( "tool call", - tool_calls=[ - {"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c1", "data " * 5000) # ~6250 tokens await conv.add_assistant_message( "another tool call", - tool_calls=[ - {"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c2", "more " * 100) # ~125 tokens @@ -258,9 +250,7 @@ class TestPhaseAwareCompaction: await conv.add_assistant_message( "tool call", - tool_calls=[ - {"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c1", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c1", "data " * 5000) @@ -268,9 +258,7 @@ class TestPhaseAwareCompaction: conv.set_current_phase("report") await conv.add_assistant_message( "recent", - tool_calls=[ - {"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}} - ], + tool_calls=[{"id": "c2", "type": "function", "function": {"name": "s", "arguments": "{}"}}], ) await conv.add_tool_result("c2", "x" * 200) diff --git a/core/tests/test_queen_memory.py b/core/tests/test_queen_memory.py index b3619175..1e53340d 100644 --- a/core/tests/test_queen_memory.py +++ b/core/tests/test_queen_memory.py @@ -18,6 +18,7 @@ from framework.agents.queen.recall_selector import ( select_memories, ) from framework.orchestrator.prompting import build_system_prompt_for_node_context +from framework.server.queen_orchestrator import initialize_memory_scopes from framework.tools.queen_lifecycle_tools import QueenPhaseState @@ -226,9 +227,7 @@ async def test_select_memories_empty_dir(tmp_path: Path): @pytest.mark.asyncio async def test_select_memories_with_files(tmp_path: Path): (tmp_path / "a.md").write_text("---\nname: a\ndescription: about A\ntype: profile\n---\nbody") - (tmp_path / "b.md").write_text( - "---\nname: b\ndescription: about B\ntype: preference\n---\nbody" - ) + (tmp_path / "b.md").write_text("---\nname: b\ndescription: about B\ntype: preference\n---\nbody") llm = AsyncMock() llm.acomplete.return_value = MagicMock(content=json.dumps({"selected_memories": ["a.md"]})) @@ -258,9 +257,7 @@ def test_format_recall_injection(tmp_path: Path): def test_format_recall_injection_custom_label(tmp_path: Path): (tmp_path / "a.md").write_text("---\nname: a\n---\nbody of a") - result = format_recall_injection( - ["a.md"], memory_dir=tmp_path, label="Queen Memories: queen_technology" - ) + result = format_recall_injection(["a.md"], memory_dir=tmp_path, label="Queen Memories: queen_technology") assert "Queen Memories: queen_technology" in result assert "body of a" in result @@ -336,9 +333,7 @@ async def test_short_reflection(tmp_path: Path): parts_dir.mkdir(parents=True) for i in range(3): role = "user" if i % 2 == 0 else "assistant" - (parts_dir / f"{i:010d}.json").write_text( - json.dumps({"role": role, "content": f"message {i}"}) - ) + (parts_dir / f"{i:010d}.json").write_text(json.dumps({"role": role, "content": f"message {i}"})) mem_dir = tmp_path / "global_memory" mem_dir.mkdir() @@ -384,9 +379,7 @@ async def test_queen_short_reflection_writes_only_queen_scope(tmp_path: Path): parts_dir.mkdir(parents=True) for i in range(3): role = "user" if i % 2 == 0 else "assistant" - (parts_dir / f"{i:010d}.json").write_text( - json.dumps({"role": role, "content": f"message {i}"}) - ) + (parts_dir / f"{i:010d}.json").write_text(json.dumps({"role": role, "content": f"message {i}"})) global_dir = tmp_path / "global_memory" queen_dir = tmp_path / "queen_memory" @@ -435,9 +428,7 @@ async def test_unified_short_reflection_can_write_both_scopes_in_one_loop(tmp_pa parts_dir.mkdir(parents=True) for i in range(3): role = "user" if i % 2 == 0 else "assistant" - (parts_dir / f"{i:010d}.json").write_text( - json.dumps({"role": role, "content": f"message {i}"}) - ) + (parts_dir / f"{i:010d}.json").write_text(json.dumps({"role": role, "content": f"message {i}"})) global_dir = tmp_path / "global_memory" queen_dir = tmp_path / "queen_memory" @@ -626,9 +617,7 @@ async def test_shutdown_reflection_writes_global_and_queen_scope(tmp_path: Path) parts_dir.mkdir(parents=True) for i in range(3): role = "user" if i % 2 == 0 else "assistant" - (parts_dir / f"{i:010d}.json").write_text( - json.dumps({"role": role, "content": f"message {i}"}) - ) + (parts_dir / f"{i:010d}.json").write_text(json.dumps({"role": role, "content": f"message {i}"})) global_dir = tmp_path / "global_memory" queen_dir = tmp_path / "queen_memory" @@ -735,9 +724,7 @@ def test_safe_path_accepted(tmp_path: Path): def test_build_system_prompt_injects_dynamic_memory(): ctx = SimpleNamespace( identity_prompt="Identity", - node_spec=SimpleNamespace( - system_prompt="Focus", node_type="event_loop", output_keys=["out"] - ), + node_spec=SimpleNamespace(system_prompt="Focus", node_type="event_loop", output_keys=["out"]), narrative="Narrative", accounts_prompt="", skills_catalog_prompt="", diff --git a/core/tests/test_queen_nodes_prompt.py b/core/tests/test_queen_nodes_prompt.py index e1660434..a8646559 100644 --- a/core/tests/test_queen_nodes_prompt.py +++ b/core/tests/test_queen_nodes_prompt.py @@ -26,13 +26,7 @@ class TestFinalizeQueenPrompt: def test_multiline_block_handled(self): """Regex must use DOTALL so blocks can span newlines.""" - text = ( - "- item 1\n" - "\n" - "- item 2 (vision only)\n" - "\n" - "- item 3\n" - ) + text = "- item 1\n\n- item 2 (vision only)\n\n- item 3\n" vision = finalize_queen_prompt(text, has_vision=True) text_only = finalize_queen_prompt(text, has_vision=False) assert "- item 2 (vision only)" in vision @@ -40,10 +34,7 @@ class TestFinalizeQueenPrompt: assert "- item 1" in text_only and "- item 3" in text_only def test_multiple_blocks_in_same_text(self): - text = ( - "A X " - "B Y C" - ) + text = "A X B Y C" assert finalize_queen_prompt(text, has_vision=True) == "A X B Y C" assert finalize_queen_prompt(text, has_vision=False) == "A B C" @@ -51,11 +42,7 @@ class TestFinalizeQueenPrompt: """A naïve greedy regex would match from the first opening marker to the last closing marker and wipe out the middle section. Lock that down so a future refactor can't regress to greedy.""" - text = ( - "first" - "KEEP" - "second" - ) + text = "firstKEEPsecond" assert finalize_queen_prompt(text, has_vision=False) == "KEEP" assert finalize_queen_prompt(text, has_vision=True) == "firstKEEPsecond" diff --git a/core/tests/test_run_parallel_workers_tool.py b/core/tests/test_run_parallel_workers_tool.py index f4ff8c76..772a73b8 100644 --- a/core/tests/test_run_parallel_workers_tool.py +++ b/core/tests/test_run_parallel_workers_tool.py @@ -31,7 +31,6 @@ from framework.loader.tool_registry import ToolRegistry from framework.schemas.goal import Goal from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools - # --------------------------------------------------------------------------- # Mock LLM that routes scenarios by task text in the first user message # --------------------------------------------------------------------------- @@ -249,9 +248,7 @@ async def test_run_parallel_workers_validates_tasks_input() -> None: executor = registry.get_executor() async def _call(payload: dict) -> dict: - r = executor( - ToolUse(id="tu", name="run_parallel_workers", input=payload) - ) + r = executor(ToolUse(id="tu", name="run_parallel_workers", input=payload)) if asyncio.iscoroutine(r): r = await r return json.loads(r.content) diff --git a/core/tests/test_runtime_logger.py b/core/tests/test_runtime_logger.py index 1607e24f..c5721418 100644 --- a/core/tests/test_runtime_logger.py +++ b/core/tests/test_runtime_logger.py @@ -282,9 +282,7 @@ class TestRuntimeLogStore: store.append_node_detail( _sid("testsync0"), - NodeDetail( - node_id="n1", node_name="A", success=True, input_tokens=100, output_tokens=50 - ), + NodeDetail(node_id="n1", node_name="A", success=True, input_tokens=100, output_tokens=50), ) store.append_node_detail( _sid("testsync0"), @@ -353,9 +351,7 @@ class TestRuntimeLogger: # Verify the file exists and has one line jsonl_path = tmp_path / "logs" / "sessions" / run_id / "logs" / "tool_logs.jsonl" assert jsonl_path.exists() - lines = [ - line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line - ] + lines = [line for line in jsonl_path.read_text(encoding="utf-8").strip().split("\n") if line] assert len(lines) == 1 data = json.loads(lines[0]) @@ -709,9 +705,7 @@ class TestRuntimeLogger: summary = await store.load_summary(run_id) assert summary is not None assert summary.needs_attention is True - assert any( - "failed" in r.lower() or "escalat" in r.lower() for r in summary.attention_reasons - ) + assert any("failed" in r.lower() or "escalat" in r.lower() for r in summary.attention_reasons) @pytest.mark.asyncio async def test_ensure_node_logged_no_op_if_already_logged(self, tmp_path: Path): @@ -920,11 +914,7 @@ class TestRuntimeLogger: node_type="event_loop", step_index=0, error="LLM call failed: Connection timeout", - stacktrace=( - "Traceback (most recent call last):\n" - " File test.py line 10\n" - " raise TimeoutError()" - ), + stacktrace=("Traceback (most recent call last):\n File test.py line 10\n raise TimeoutError()"), is_partial=True, ) @@ -951,11 +941,7 @@ class TestRuntimeLogger: node_type="event_loop", success=False, error="Node crashed", - stacktrace=( - "Traceback (most recent call last):\n" - " File node.py line 42\n" - " raise RuntimeError('crash')" - ), + stacktrace=("Traceback (most recent call last):\n File node.py line 42\n raise RuntimeError('crash')"), ) # Verify the detail was logged with stacktrace diff --git a/core/tests/test_skill_cli_commands.py b/core/tests/test_skill_cli_commands.py index 09da830c..88ca48f7 100644 --- a/core/tests/test_skill_cli_commands.py +++ b/core/tests/test_skill_cli_commands.py @@ -171,9 +171,7 @@ class TestCmdSkillInstall: version=None, ) - with patch( - "framework.skills.installer.install_from_git", return_value=installed_path - ) as mock_install: + with patch("framework.skills.installer.install_from_git", return_value=installed_path) as mock_install: result = cmd_skill_install(args) mock_install.assert_called_once() @@ -205,9 +203,7 @@ class TestCmdSkillInstall: sentinel.touch() monkeypatch.setattr("framework.skills.installer.INSTALL_NOTICE_SENTINEL", sentinel) - args = Namespace( - name_or_url=None, from_url=None, pack=None, install_name=None, version=None - ) + args = Namespace(name_or_url=None, from_url=None, pack=None, install_name=None, version=None) result = cmd_skill_install(args) assert result == 1 @@ -321,9 +317,7 @@ class TestCmdSkillInfo: def test_exits_1_when_not_found_anywhere(self, tmp_path, capsys): with patch("framework.skills.discovery.SkillDiscovery.discover", return_value=[]): - with patch( - "framework.skills.registry.RegistryClient.get_skill_entry", return_value=None - ): + with patch("framework.skills.registry.RegistryClient.get_skill_entry", return_value=None): args = Namespace(name="ghost-skill", project_dir=str(tmp_path)) result = cmd_skill_info(args) @@ -411,9 +405,7 @@ class TestCmdSkillTest: mock_provider = MagicMock() mock_provider.complete.return_value = mock_response - args = Namespace( - path=str(skill_dir), input_json='{"prompt": "say hello"}', model=None, json=False - ) + args = Namespace(path=str(skill_dir), input_json='{"prompt": "say hello"}', model=None, json=False) with patch("framework.llm.anthropic.AnthropicProvider", return_value=mock_provider): result = cmd_skill_test(args) @@ -430,13 +422,9 @@ class TestCmdSkillTest: from framework.llm.provider import LLMResponse mock_provider = MagicMock() - mock_provider.complete.return_value = LLMResponse( - content="response", model="claude-haiku-4-5-20251001" - ) + mock_provider.complete.return_value = LLMResponse(content="response", model="claude-haiku-4-5-20251001") - args = Namespace( - path=str(skill_dir), input_json='{"prompt": "extracted prompt"}', model=None, json=False - ) + args = Namespace(path=str(skill_dir), input_json='{"prompt": "extracted prompt"}', model=None, json=False) with patch("framework.llm.anthropic.AnthropicProvider", return_value=mock_provider): cmd_skill_test(args) @@ -452,9 +440,7 @@ class TestCmdSkillTest: json.dumps( { "skill_name": "my-skill", - "evals": [ - {"id": 1, "prompt": "Say hi.", "assertions": ["Response is a greeting"]} - ], + "evals": [{"id": 1, "prompt": "Say hi.", "assertions": ["Response is a greeting"]}], } ), encoding="utf-8", @@ -465,9 +451,7 @@ class TestCmdSkillTest: from framework.llm.provider import LLMResponse mock_provider = MagicMock() - mock_provider.complete.return_value = LLMResponse( - content="Hello!", model="claude-haiku-4-5-20251001" - ) + mock_provider.complete.return_value = LLMResponse(content="Hello!", model="claude-haiku-4-5-20251001") mock_judge = MagicMock() mock_judge.evaluate.return_value = {"passes": True, "explanation": "Looks good."} @@ -486,9 +470,7 @@ class TestCmdSkillTest: json.dumps( { "skill_name": "my-skill", - "evals": [ - {"id": 1, "prompt": "Say hi.", "assertions": ["Impossible assertion"]} - ], + "evals": [{"id": 1, "prompt": "Say hi.", "assertions": ["Impossible assertion"]}], } ), encoding="utf-8", @@ -499,9 +481,7 @@ class TestCmdSkillTest: from framework.llm.provider import LLMResponse mock_provider = MagicMock() - mock_provider.complete.return_value = LLMResponse( - content="Hello!", model="claude-haiku-4-5-20251001" - ) + mock_provider.complete.return_value = LLMResponse(content="Hello!", model="claude-haiku-4-5-20251001") mock_judge = MagicMock() mock_judge.evaluate.return_value = {"passes": False, "explanation": "Did not satisfy."} @@ -542,9 +522,7 @@ class TestCmdSkillTest: from framework.llm.provider import LLMResponse mock_provider = MagicMock() - mock_provider.complete.return_value = LLMResponse( - content="Hello!", model="claude-haiku-4-5-20251001" - ) + mock_provider.complete.return_value = LLMResponse(content="Hello!", model="claude-haiku-4-5-20251001") mock_judge = MagicMock() mock_judge.evaluate.return_value = {"passes": True, "explanation": "Yes."} diff --git a/core/tests/test_skill_integration.py b/core/tests/test_skill_integration.py index ee2ad0ee..1353d3ba 100644 --- a/core/tests/test_skill_integration.py +++ b/core/tests/test_skill_integration.py @@ -121,8 +121,7 @@ class TestEndToEndPipeline: skill_dir = tmp_path / ".agents" / "skills" / "my-tool" skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( - "---\nname: my-tool\ndescription: Tool for testing.\n---\n\n" - "## Usage\nUse this tool when testing.\n", + "---\nname: my-tool\ndescription: Tool for testing.\n---\n\n## Usage\nUse this tool when testing.\n", encoding="utf-8", ) diff --git a/core/tests/test_skill_trust.py b/core/tests/test_skill_trust.py index 566dd400..2f8cf3ba 100644 --- a/core/tests/test_skill_trust.py +++ b/core/tests/test_skill_trust.py @@ -181,9 +181,7 @@ class TestProjectTrustDetector: store = TrustedRepoStore(tmp_path / "t.json") det = ProjectTrustDetector(store) with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock( - returncode=0, stdout="http://localhost/org/repo.git\n" - ) + mock_run.return_value = MagicMock(returncode=0, stdout="http://localhost/org/repo.git\n") cls, _ = det.classify(tmp_path) assert cls == ProjectTrustClassification.ALWAYS_TRUSTED @@ -193,9 +191,7 @@ class TestProjectTrustDetector: store.trust("github.com/trusted/repo") det = ProjectTrustDetector(store) with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock( - returncode=0, stdout="git@github.com:trusted/repo.git\n" - ) + mock_run.return_value = MagicMock(returncode=0, stdout="git@github.com:trusted/repo.git\n") cls, key = det.classify(tmp_path) assert cls == ProjectTrustClassification.TRUSTED_BY_USER assert key == "github.com/trusted/repo" @@ -205,9 +201,7 @@ class TestProjectTrustDetector: store = TrustedRepoStore(tmp_path / "t.json") det = ProjectTrustDetector(store) with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + mock_run.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") cls, key = det.classify(tmp_path) assert cls == ProjectTrustClassification.UNTRUSTED assert key == "github.com/stranger/repo" @@ -218,9 +212,7 @@ class TestProjectTrustDetector: monkeypatch.setenv("HIVE_OWN_REMOTES", "github.com/myorg/*") det = ProjectTrustDetector(store) with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock( - returncode=0, stdout="git@github.com:myorg/myrepo.git\n" - ) + mock_run.return_value = MagicMock(returncode=0, stdout="git@github.com:myorg/myrepo.git\n") cls, _ = det.classify(tmp_path) assert cls == ProjectTrustClassification.ALWAYS_TRUSTED @@ -289,9 +281,7 @@ class TestTrustGate: skill = make_skill("evil-skill", "project") gate = TrustGate(store=store, interactive=False) with patch("subprocess.run") as m: - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/evil.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/evil.git\n") with caplog.at_level(logging.WARNING): result = gate.filter_and_gate([skill], project_dir=tmp_path) assert not any(s.name == "evil-skill" for s in result) @@ -314,9 +304,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") result = gate.filter_and_gate([skill], project_dir=tmp_path) assert any(s.name == "session-skill" for s in result) # Must NOT persist to trusted store @@ -338,9 +326,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") result = gate.filter_and_gate([skill], project_dir=tmp_path) assert any(s.name == "perm-skill" for s in result) assert store.is_trusted("github.com/stranger/repo") @@ -361,9 +347,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") result = gate.filter_and_gate([skill], project_dir=tmp_path) assert not any(s.name == "bad-skill" for s in result) @@ -392,9 +376,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") result = gate.filter_and_gate([skill], project_dir=tmp_path) assert not any(s.name == "interrupted-skill" for s in result) @@ -420,9 +402,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") gate.filter_and_gate([skill], project_dir=tmp_path) assert sentinel.exists() @@ -436,9 +416,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") gate.filter_and_gate([skill2], project_dir=tmp_path) assert not any("Security notice" in line for line in output_lines) @@ -461,9 +439,7 @@ class TestTrustGate: patch("sys.stdout.isatty", return_value=True), patch("subprocess.run") as m, ): - m.return_value = MagicMock( - returncode=0, stdout="https://github.com/stranger/repo.git\n" - ) + m.return_value = MagicMock(returncode=0, stdout="https://github.com/stranger/repo.git\n") result = gate.filter_and_gate([fw_skill, user_skill, proj_skill], project_dir=tmp_path) names = {s.name for s in result} assert "fw" in names diff --git a/core/tests/test_subagent.py b/core/tests/test_subagent.py index 13889f34..bafd2896 100644 --- a/core/tests/test_subagent.py +++ b/core/tests/test_subagent.py @@ -63,12 +63,8 @@ class TestSubagentJudge: """The judge returns a JudgeVerdict, not a plain dict.""" judge = SubagentJudge(task="task") - accept = await judge.evaluate( - {"missing_keys": [], "tool_results": [], "iteration": 0} - ) - retry = await judge.evaluate( - {"missing_keys": ["x"], "tool_results": [], "iteration": 0} - ) + accept = await judge.evaluate({"missing_keys": [], "tool_results": [], "iteration": 0}) + retry = await judge.evaluate({"missing_keys": ["x"], "tool_results": [], "iteration": 0}) assert isinstance(accept, JudgeVerdict) assert isinstance(retry, JudgeVerdict) diff --git a/core/tests/test_tool_context_propagation.py b/core/tests/test_tool_context_propagation.py index 9bee228c..92cf7def 100644 --- a/core/tests/test_tool_context_propagation.py +++ b/core/tests/test_tool_context_propagation.py @@ -57,8 +57,7 @@ async def test_execution_context_propagates_to_tool_executor() -> None: assert result.content == "ok" assert captured["exec_ctx"] is not None, ( - "execution context was None inside worker thread, " - "contextvars did not propagate through run_in_executor" + "execution context was None inside worker thread, contextvars did not propagate through run_in_executor" ) assert captured["exec_ctx"]["data_dir"] == "/tmp/test_data" diff --git a/core/tests/test_tool_registry.py b/core/tests/test_tool_registry.py index d09a5609..c141e9e3 100644 --- a/core/tests/test_tool_registry.py +++ b/core/tests/test_tool_registry.py @@ -713,9 +713,7 @@ def test_convert_mcp_tool_strips_context_params(): def test_load_mcp_config_list_format(tmp_path, monkeypatch): """load_mcp_config should accept the {\"servers\": [...]} list format.""" config_file = tmp_path / "mcp_servers.json" - config_file.write_text( - '{"servers": [{"name": "s1", "transport": "http", "url": "http://localhost:9000"}]}' - ) + config_file.write_text('{"servers": [{"name": "s1", "transport": "http", "url": "http://localhost:9000"}]}') called_with = [] @@ -856,6 +854,7 @@ class TestMcpToolProducesImageFlag: def test_mcp_tool_conversion_marks_known_safe_tools(): """MCP tools whose names are in CONCURRENCY_SAFE_TOOLS become concurrency_safe.""" from framework.loader.mcp_client import MCPTool + registry = ToolRegistry() safe_mcp = MCPTool( diff --git a/core/tests/test_two_llm_calls.py b/core/tests/test_two_llm_calls.py index d78a3b03..badc7dc6 100644 --- a/core/tests/test_two_llm_calls.py +++ b/core/tests/test_two_llm_calls.py @@ -348,11 +348,7 @@ async def _stream_and_collect(provider, messages, system, tools): elif isinstance(event, ToolCallEvent): tool_calls.append(event) elif isinstance(event, FinishEvent): - print( - f" finish: stop={event.stop_reason}" - f" in={event.input_tokens}" - f" out={event.output_tokens}" - ) + print(f" finish: stop={event.stop_reason} in={event.input_tokens} out={event.output_tokens}") elif isinstance(event, StreamErrorEvent): print(f" STREAM ERROR: {event.error}") return text, tool_calls diff --git a/examples/templates/competitive_intel_agent/__main__.py b/examples/templates/competitive_intel_agent/__main__.py index 286d7e97..1ea62602 100644 --- a/examples/templates/competitive_intel_agent/__main__.py +++ b/examples/templates/competitive_intel_agent/__main__.py @@ -235,9 +235,7 @@ async def _interactive_shell(verbose: bool = False) -> None: try: while True: try: - user_input = await asyncio.get_event_loop().run_in_executor( - None, input, "Competitors> " - ) + user_input = await asyncio.get_event_loop().run_in_executor(None, input, "Competitors> ") if user_input.lower() in ["quit", "exit", "q"]: click.echo("Goodbye!") break diff --git a/examples/templates/competitive_intel_agent/agent.py b/examples/templates/competitive_intel_agent/agent.py index 879cab88..49f0d178 100644 --- a/examples/templates/competitive_intel_agent/agent.py +++ b/examples/templates/competitive_intel_agent/agent.py @@ -303,9 +303,7 @@ class CompetitiveIntelAgent: session_state=session_state, ) - async def run( - self, context: dict[str, Any], session_state: dict[str, Any] | None = None - ) -> ExecutionResult: + async def run(self, context: dict[str, Any], session_state: dict[str, Any] | None = None) -> ExecutionResult: """ Run the agent (convenience method for single execution). diff --git a/examples/templates/deep_research_agent/agent.py b/examples/templates/deep_research_agent/agent.py index d95b8a5e..3ab32230 100644 --- a/examples/templates/deep_research_agent/agent.py +++ b/examples/templates/deep_research_agent/agent.py @@ -175,7 +175,7 @@ class DeepResearchAgent: self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes self._graph: GraphSpec | None = None - self._agent_runtime: AgentRuntime | None = None + self._agent_runtime: AgentHost | None = None self._tool_registry: ToolRegistry | None = None self._storage_path: Path | None = None diff --git a/examples/templates/email_inbox_management/__main__.py b/examples/templates/email_inbox_management/__main__.py index 58a22b70..720910b2 100644 --- a/examples/templates/email_inbox_management/__main__.py +++ b/examples/templates/email_inbox_management/__main__.py @@ -215,9 +215,7 @@ async def _interactive_shell(verbose=False): click.echo("\nProcessing inbox...\n") - result = await agent.trigger_and_wait( - "start", {"rules": rules, "max_emails": max_emails} - ) + result = await agent.trigger_and_wait("start", {"rules": rules, "max_emails": max_emails}) if result is None: click.echo("\n[Execution timed out]\n") diff --git a/examples/templates/email_inbox_management/agent.py b/examples/templates/email_inbox_management/agent.py index ab805eb6..359fac86 100644 --- a/examples/templates/email_inbox_management/agent.py +++ b/examples/templates/email_inbox_management/agent.py @@ -34,9 +34,7 @@ goal = Goal( success_criteria=[ SuccessCriterion( id="correct-action-execution", - description=( - "Gmail actions are applied correctly to the right emails based on the user's rules" - ), + description=("Gmail actions are applied correctly to the right emails based on the user's rules"), metric="action_correctness", target=">=95%", weight=0.30, @@ -54,8 +52,7 @@ goal = Goal( SuccessCriterion( id="batch-completeness", description=( - "All fetched emails up to the configured max are processed and acted upon; " - "none are silently skipped" + "All fetched emails up to the configured max are processed and acted upon; none are silently skipped" ), metric="emails_processed_ratio", target="100%", @@ -82,8 +79,7 @@ goal = Goal( Constraint( id="non-destructive-default", description=( - "Archiving removes from inbox but preserves the email; only explicit " - "trash rules move emails to trash" + "Archiving removes from inbox but preserves the email; only explicit trash rules move emails to trash" ), constraint_type="hard", category="safety", diff --git a/examples/templates/email_inbox_management/tools.py b/examples/templates/email_inbox_management/tools.py index 5959bc7b..8377b420 100644 --- a/examples/templates/email_inbox_management/tools.py +++ b/examples/templates/email_inbox_management/tools.py @@ -46,8 +46,7 @@ TOOLS = { "page_token": { "type": "string", "description": ( - "Gmail API page token from a previous call's next_page_token. " - "Omit for the first page." + "Gmail API page token from a previous call's next_page_token. Omit for the first page." ), }, "after_timestamp": { @@ -101,9 +100,7 @@ def _get_data_dir() -> str: """Get the session-scoped data_dir from ToolRegistry execution context.""" ctx = _execution_context.get() if not ctx or "data_dir" not in ctx: - raise RuntimeError( - "data_dir not set in execution context. Is the tool running inside a Orchestrator?" - ) + raise RuntimeError("data_dir not set in execution context. Is the tool running inside a Orchestrator?") return ctx["data_dir"] @@ -138,8 +135,7 @@ def _get_access_token(account: str = "") -> str: return token raise RuntimeError( - "Gmail credentials not configured. " - "Connect Gmail via hive.adenhq.com or set GOOGLE_ACCESS_TOKEN." + "Gmail credentials not configured. Connect Gmail via hive.adenhq.com or set GOOGLE_ACCESS_TOKEN." ) @@ -285,8 +281,7 @@ def _bulk_fetch_emails( dropped = len(message_ids) - len(emails) if dropped > 0: logger.warning( - f"Dropped {dropped}/{len(message_ids)} emails during metadata fetch " - f"(wrote {len(emails)} to emails.jsonl)" + f"Dropped {dropped}/{len(message_ids)} emails during metadata fetch (wrote {len(emails)} to emails.jsonl)" ) # Phase 3: Append JSONL (append so pagination accumulates across pages) diff --git a/examples/templates/email_reply_agent/__main__.py b/examples/templates/email_reply_agent/__main__.py index 4fd4086d..038b90c3 100644 --- a/examples/templates/email_reply_agent/__main__.py +++ b/examples/templates/email_reply_agent/__main__.py @@ -99,9 +99,7 @@ def tui(): def info(): """Show agent info.""" data = default_agent.info() - click.echo( - f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}" - ) + click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}") click.echo(f"Nodes: {', '.join(data['nodes'])}") click.echo(f"Client-facing: {', '.join(data['client_facing_nodes'])}") diff --git a/examples/templates/email_reply_agent/agent.py b/examples/templates/email_reply_agent/agent.py index 434683d6..1c3d051a 100644 --- a/examples/templates/email_reply_agent/agent.py +++ b/examples/templates/email_reply_agent/agent.py @@ -103,7 +103,9 @@ terminal_nodes = [] # Module-level vars read by AgentLoader.load() conversation_mode = "continuous" -identity_prompt = "You are a helpful email reply assistant that filters unreplied emails and sends personalized responses." +identity_prompt = ( + "You are a helpful email reply assistant that filters unreplied emails and sends personalized responses." +) loop_config = { "max_iterations": 100, "max_tool_calls_per_turn": 30, diff --git a/examples/templates/email_reply_agent/config.py b/examples/templates/email_reply_agent/config.py index cd822134..901c5b62 100644 --- a/examples/templates/email_reply_agent/config.py +++ b/examples/templates/email_reply_agent/config.py @@ -37,7 +37,9 @@ class AgentMetadata: name: str = "Email Reply Agent" version: str = "1.0.0" description: str = "Filter unreplied emails, confirm recipients, send personalized replies." - intro_message: str = "Tell me which emails you want to reply to (e.g., 'emails from @company.com in the last week')." + intro_message: str = ( + "Tell me which emails you want to reply to (e.g., 'emails from @company.com in the last week')." + ) metadata = AgentMetadata() diff --git a/examples/templates/job_hunter/agent.py b/examples/templates/job_hunter/agent.py index 6e7d9036..38313f71 100644 --- a/examples/templates/job_hunter/agent.py +++ b/examples/templates/job_hunter/agent.py @@ -151,7 +151,7 @@ class JobHunterAgent: self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes self._graph: GraphSpec | None = None - self._agent_runtime: AgentRuntime | None = None + self._agent_runtime: AgentHost | None = None self._tool_registry: ToolRegistry | None = None self._storage_path: Path | None = None diff --git a/examples/templates/job_hunter/nodes/__init__.py b/examples/templates/job_hunter/nodes/__init__.py index 3b9841de..0c989a32 100644 --- a/examples/templates/job_hunter/nodes/__init__.py +++ b/examples/templates/job_hunter/nodes/__init__.py @@ -14,8 +14,7 @@ intake_node = NodeSpec( input_keys=[], output_keys=["resume_text", "role_analysis"], success_criteria=( - "The user's resume has been analyzed and 3-5 target roles identified " - "based on their actual experience." + "The user's resume has been analyzed and 3-5 target roles identified based on their actual experience." ), system_prompt="""\ You are a career analyst. Your task is to analyze the user's resume and identify the best role fits. @@ -88,10 +87,7 @@ job_review_node = NodeSpec( max_node_visits=1, input_keys=["job_listings", "resume_text"], output_keys=["selected_jobs"], - success_criteria=( - "User has reviewed all job listings and explicitly selected " - "which jobs they want to apply to." - ), + success_criteria=("User has reviewed all job listings and explicitly selected which jobs they want to apply to."), system_prompt="""\ You are helping a job seeker choose which positions to apply to. diff --git a/examples/templates/local_business_extractor/agent.py b/examples/templates/local_business_extractor/agent.py index 822b040e..af47679b 100644 --- a/examples/templates/local_business_extractor/agent.py +++ b/examples/templates/local_business_extractor/agent.py @@ -158,9 +158,7 @@ class LocalBusinessExtractor: async def run(self, context, session_state=None): await self.start() try: - result = await self._agent_runtime.trigger_and_wait( - "default", context, session_state=session_state - ) + result = await self._agent_runtime.trigger_and_wait("default", context, session_state=session_state) return result or ExecutionResult(success=False, error="Execution timeout") finally: await self.stop() diff --git a/examples/templates/local_business_extractor/config.py b/examples/templates/local_business_extractor/config.py index e9e311bb..31585842 100644 --- a/examples/templates/local_business_extractor/config.py +++ b/examples/templates/local_business_extractor/config.py @@ -12,8 +12,7 @@ class AgentMetadata: name: str = "Local Business Extractor" version: str = "1.0.0" description: str = ( - "Extracts local businesses from Google Maps, scrapes contact details, " - "and syncs the results to Google Sheets." + "Extracts local businesses from Google Maps, scrapes contact details, and syncs the results to Google Sheets." ) intro_message: str = "I'm ready to extract business data. What should I search for?" diff --git a/examples/templates/meeting_scheduler/__main__.py b/examples/templates/meeting_scheduler/__main__.py index 6315d102..9d34d9f0 100644 --- a/examples/templates/meeting_scheduler/__main__.py +++ b/examples/templates/meeting_scheduler/__main__.py @@ -42,9 +42,7 @@ def run(attendee, duration, title, verbose): } ) ) - click.echo( - json.dumps({"success": result.success, "output": result.output}, indent=2, default=str) - ) + click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str)) sys.exit(0 if result.success else 1) @@ -102,12 +100,8 @@ def tui(): def info(): """Show agent info.""" data = default_agent.info() - click.echo( - f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}" - ) - click.echo( - f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}" - ) + click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}") + click.echo(f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}") @cli.command() diff --git a/examples/templates/meeting_scheduler/agent.py b/examples/templates/meeting_scheduler/agent.py index ec077a33..6127efea 100644 --- a/examples/templates/meeting_scheduler/agent.py +++ b/examples/templates/meeting_scheduler/agent.py @@ -109,7 +109,9 @@ terminal_nodes = [] # Forever-alive # Module-level vars read by AgentLoader.load() conversation_mode = "continuous" -identity_prompt = "You are a helpful meeting scheduler assistant that manages calendar availability and sends confirmations." +identity_prompt = ( + "You are a helpful meeting scheduler assistant that manages calendar availability and sends confirmations." +) loop_config = { "max_iterations": 100, "max_tool_calls_per_turn": 20, @@ -200,9 +202,7 @@ class MeetingScheduler: await self._agent_runtime.stop() self._agent_runtime = None - async def trigger_and_wait( - self, entry_point="default", input_data=None, timeout=None, session_state=None - ): + async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None): if self._agent_runtime is None: raise RuntimeError("Agent not started. Call start() first.") return await self._agent_runtime.trigger_and_wait( diff --git a/examples/templates/sdr_agent/__main__.py b/examples/templates/sdr_agent/__main__.py index e52cfdae..3a468eb1 100644 --- a/examples/templates/sdr_agent/__main__.py +++ b/examples/templates/sdr_agent/__main__.py @@ -179,12 +179,8 @@ async def _interactive_shell(verbose=False): click.echo("Goodbye!") break - contacts = await asyncio.get_event_loop().run_in_executor( - None, input, "Contacts (JSON)> " - ) - background = await asyncio.get_event_loop().run_in_executor( - None, input, "Your background/role> " - ) + contacts = await asyncio.get_event_loop().run_in_executor(None, input, "Contacts (JSON)> ") + background = await asyncio.get_event_loop().run_in_executor(None, input, "Your background/role> ") if not contacts.strip(): continue diff --git a/examples/templates/sdr_agent/agent.py b/examples/templates/sdr_agent/agent.py index b279ae3d..6b2862a8 100644 --- a/examples/templates/sdr_agent/agent.py +++ b/examples/templates/sdr_agent/agent.py @@ -43,10 +43,7 @@ goal = Goal( ), SuccessCriterion( id="scam-filter-effectiveness", - description=( - "Suspicious profiles (risk_score >= 7) are correctly identified " - "and excluded from outreach" - ), + description=("Suspicious profiles (risk_score >= 7) are correctly identified and excluded from outreach"), metric="filter_precision", target=">=95%", weight=0.25, @@ -193,7 +190,7 @@ class SDRAgent: self.entry_points = entry_points self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes - self._agent_runtime: AgentRuntime | None = None + self._agent_runtime: AgentHost | None = None self._graph: GraphSpec | None = None self._tool_registry: ToolRegistry | None = None diff --git a/examples/templates/sdr_agent/tools.py b/examples/templates/sdr_agent/tools.py index 4a3eb3f9..6915f9d7 100644 --- a/examples/templates/sdr_agent/tools.py +++ b/examples/templates/sdr_agent/tools.py @@ -34,10 +34,7 @@ TOOLS = { "properties": { "file_path": { "type": "string", - "description": ( - "Absolute or relative path to a JSON file containing " - "a list of contact objects." - ), + "description": ("Absolute or relative path to a JSON file containing a list of contact objects."), }, }, "required": ["file_path"], @@ -55,9 +52,7 @@ def _get_data_dir() -> str: """Get the session-scoped data_dir from ToolRegistry execution context.""" ctx = _execution_context.get() if not ctx or "data_dir" not in ctx: - raise RuntimeError( - "data_dir not set in execution context. Is the tool running inside a Orchestrator?" - ) + raise RuntimeError("data_dir not set in execution context. Is the tool running inside a Orchestrator?") return ctx["data_dir"] diff --git a/examples/templates/twitter_news_agent/agent.py b/examples/templates/twitter_news_agent/agent.py index 50627cf3..329d4a07 100644 --- a/examples/templates/twitter_news_agent/agent.py +++ b/examples/templates/twitter_news_agent/agent.py @@ -184,9 +184,7 @@ class TwitterNewsAgent: await self._agent_runtime.stop() self._agent_runtime = None - async def trigger_and_wait( - self, entry_point="default", input_data=None, timeout=None, session_state=None - ): + async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None): if self._agent_runtime is None: raise RuntimeError("Agent not started. Call start() first.") return await self._agent_runtime.trigger_and_wait( diff --git a/examples/templates/twitter_news_agent/config.py b/examples/templates/twitter_news_agent/config.py index 08d00344..6e41a2d6 100644 --- a/examples/templates/twitter_news_agent/config.py +++ b/examples/templates/twitter_news_agent/config.py @@ -11,12 +11,8 @@ default_config = RuntimeConfig() class AgentMetadata: name: str = "Twitter News Digest" version: str = "1.1.0" - description: str = ( - "Monitors Twitter feeds and provides a daily news digest, focused on tech news." - ) - intro_message: str = ( - "I'm ready to fetch the latest tech news from Twitter. Which tech handles should I check?" - ) + description: str = "Monitors Twitter feeds and provides a daily news digest, focused on tech news." + intro_message: str = "I'm ready to fetch the latest tech news from Twitter. Which tech handles should I check?" metadata = AgentMetadata() diff --git a/examples/templates/vulnerability_assessment/agent.py b/examples/templates/vulnerability_assessment/agent.py index fbc2ffc8..e645d734 100644 --- a/examples/templates/vulnerability_assessment/agent.py +++ b/examples/templates/vulnerability_assessment/agent.py @@ -55,9 +55,7 @@ goal = Goal( ), SuccessCriterion( id="remediation-guidance", - description=( - "Every finding includes clear, actionable remediation steps a developer can follow" - ), + description=("Every finding includes clear, actionable remediation steps a developer can follow"), metric="findings_with_remediation", target="100%", weight=0.20, @@ -344,9 +342,7 @@ class VulnerabilityResearcherAgent: for node_id in node_ids: outgoing = [e for e in self.edges if e.source == node_id] if not outgoing and node_id not in self.terminal_nodes: - warnings.append( - f"Node '{node_id}' has no outgoing edges (dead end in forever-alive graph)" - ) + warnings.append(f"Node '{node_id}' has no outgoing edges (dead end in forever-alive graph)") return { "valid": len(errors) == 0, diff --git a/examples/templates/vulnerability_assessment/nodes/__init__.py b/examples/templates/vulnerability_assessment/nodes/__init__.py index a88eb565..39bc922d 100644 --- a/examples/templates/vulnerability_assessment/nodes/__init__.py +++ b/examples/templates/vulnerability_assessment/nodes/__init__.py @@ -103,8 +103,7 @@ risk_scoring_node = NodeSpec( id="risk-scoring", name="Risk Scoring", description=( - "Calculate weighted letter grades (A-F) per security category and overall " - "risk score from scan results" + "Calculate weighted letter grades (A-F) per security category and overall risk score from scan results" ), node_type="event_loop", max_node_visits=0, diff --git a/linkedin_script.py b/linkedin_script.py index 4222b7f6..d9718747 100644 --- a/linkedin_script.py +++ b/linkedin_script.py @@ -1,5 +1,6 @@ from playwright.sync_api import sync_playwright + def main(): with sync_playwright() as p: browser = p.chromium.launch(headless=False) @@ -7,10 +8,11 @@ def main(): page.goto("https://www.linkedin.com/login") print("Please log in to LinkedIn in the opened browser window.") input("Press Enter here when you have logged in...") - + # Now search connections print("Logged in. Ready to proceed.") browser.close() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/check_llm_key.py b/scripts/check_llm_key.py index 48559442..a903de53 100644 --- a/scripts/check_llm_key.py +++ b/scripts/check_llm_key.py @@ -109,9 +109,7 @@ def _extract_openrouter_model_lookup(payload: object) -> dict[str, str]: return lookup -def _format_openrouter_model_unavailable_message( - model: str, available_model_lookup: dict[str, str] -) -> str: +def _format_openrouter_model_unavailable_message(model: str, available_model_lookup: dict[str, str]) -> str: """Return a helpful not-found message with close-match suggestions.""" suggestions = [ available_model_lookup[key] @@ -166,9 +164,7 @@ def check_openai_compatible(api_key: str, endpoint: str, name: str) -> dict: return {"valid": False, "message": f"{name} API returned status {r.status_code}"} -def check_openrouter( - api_key: str, api_base: str = "https://openrouter.ai/api/v1", **_: str -) -> dict: +def check_openrouter(api_key: str, api_base: str = "https://openrouter.ai/api/v1", **_: str) -> dict: """Validate OpenRouter key against GET /models.""" endpoint = f"{api_base.rstrip('/')}/models" with httpx.Client(timeout=TIMEOUT) as client: @@ -211,9 +207,7 @@ def check_openrouter_model( return { "valid": False, - "message": _format_openrouter_model_unavailable_message( - requested_model, available_model_lookup - ), + "message": _format_openrouter_model_unavailable_message(requested_model, available_model_lookup), } if r.status_code == 429: return { @@ -298,40 +292,22 @@ def check_gemini(api_key: str, **_: str) -> dict: PROVIDERS = { "anthropic": lambda key, **kw: check_anthropic(key), - "openai": lambda key, **kw: check_openai_compatible( - key, "https://api.openai.com/v1/models", "OpenAI" - ), + "openai": lambda key, **kw: check_openai_compatible(key, "https://api.openai.com/v1/models", "OpenAI"), "gemini": lambda key, **kw: check_gemini(key), - "groq": lambda key, **kw: check_openai_compatible( - key, "https://api.groq.com/openai/v1/models", "Groq" - ), - "cerebras": lambda key, **kw: check_openai_compatible( - key, "https://api.cerebras.ai/v1/models", "Cerebras" - ), + "groq": lambda key, **kw: check_openai_compatible(key, "https://api.groq.com/openai/v1/models", "Groq"), + "cerebras": lambda key, **kw: check_openai_compatible(key, "https://api.cerebras.ai/v1/models", "Cerebras"), "openrouter": lambda key, **kw: check_openrouter(key, **kw), - "deepseek": lambda key, **_: check_openai_compatible( - key, "https://api.deepseek.com/v1/models", "DeepSeek" - ), - "together": lambda key, **_: check_openai_compatible( - key, "https://api.together.xyz/v1/models", "Together AI" - ), - "mistral": lambda key, **_: check_openai_compatible( - key, "https://api.mistral.ai/v1/models", "Mistral" - ), + "deepseek": lambda key, **_: check_openai_compatible(key, "https://api.deepseek.com/v1/models", "DeepSeek"), + "together": lambda key, **_: check_openai_compatible(key, "https://api.together.xyz/v1/models", "Together AI"), + "mistral": lambda key, **_: check_openai_compatible(key, "https://api.mistral.ai/v1/models", "Mistral"), "xai": lambda key, **_: check_openai_compatible(key, "https://api.x.ai/v1/models", "xAI"), - "perplexity": lambda key, **_: check_openai_compatible( - key, "https://api.perplexity.ai/v1/models", "Perplexity" - ), + "perplexity": lambda key, **_: check_openai_compatible(key, "https://api.perplexity.ai/v1/models", "Perplexity"), "minimax": lambda key, **_: check_minimax(key), # Kimi For Coding uses an Anthropic-compatible endpoint; check via /v1/messages # with empty messages (same as check_anthropic, triggers 400 not 401). - "kimi": lambda key, **kw: check_anthropic_compatible( - key, "https://api.kimi.com/coding/v1/messages", "Kimi" - ), + "kimi": lambda key, **kw: check_anthropic_compatible(key, "https://api.kimi.com/coding/v1/messages", "Kimi"), # Hive LLM uses an Anthropic-compatible endpoint - "hive": lambda key, **kw: check_anthropic_compatible( - key, f"{HIVE_LLM_ENDPOINT}/v1/messages", "Hive" - ), + "hive": lambda key, **kw: check_anthropic_compatible(key, f"{HIVE_LLM_ENDPOINT}/v1/messages", "Hive"), } @@ -365,13 +341,9 @@ def main() -> None: result = check_openrouter(api_key, api_base) elif api_base and provider_id == "kimi": # Kimi uses an Anthropic-compatible endpoint; check via /v1/messages - result = check_anthropic_compatible( - api_key, api_base.rstrip("/") + "/v1/messages", "Kimi" - ) + result = check_anthropic_compatible(api_key, api_base.rstrip("/") + "/v1/messages", "Kimi") elif api_base and provider_id == "hive": - result = check_anthropic_compatible( - api_key, api_base.rstrip("/") + "/v1/messages", "Hive" - ) + result = check_anthropic_compatible(api_key, api_base.rstrip("/") + "/v1/messages", "Hive") elif api_base: # Custom API base (ZAI or other OpenAI-compatible) endpoint = api_base.rstrip("/") + "/models" diff --git a/scripts/debug_queen_prompt.py b/scripts/debug_queen_prompt.py index 936c37ca..5e8cbf51 100644 --- a/scripts/debug_queen_prompt.py +++ b/scripts/debug_queen_prompt.py @@ -188,8 +188,5 @@ if __name__ == "__main__": print_independent_prompt() else: print(f"Unknown phase: {phase}") - print( - "Usage: uv run scripts/debug_queen_prompt.py " - "[planning|building|staging|running|independent|all]" - ) + print("Usage: uv run scripts/debug_queen_prompt.py [planning|building|staging|running|independent|all]") sys.exit(1) diff --git a/scripts/llm_debug_log_visualizer.py b/scripts/llm_debug_log_visualizer.py index 8b49c086..05b00878 100644 --- a/scripts/llm_debug_log_visualizer.py +++ b/scripts/llm_debug_log_visualizer.py @@ -133,9 +133,7 @@ def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool: if execution_id.startswith(" - +