""" Event Bus - Pub/sub event system for inter-stream communication. Allows streams to: - Publish events about their execution - Subscribe to events from other streams - Coordinate based on shared state changes """ import asyncio import json import logging import os from collections.abc import Awaitable, Callable from dataclasses import dataclass, field from datetime import datetime from enum import StrEnum from pathlib import Path from typing import IO, Any logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # HIVE_DEBUG_EVENTS — write every published event to a JSONL file. # # Set the env var to any truthy value to enable: # HIVE_DEBUG_EVENTS=1 → writes to ~/.hive/event_logs/.jsonl # HIVE_DEBUG_EVENTS=/tmp/ev → writes to that exact directory # # Each line is a full JSON serialisation of the AgentEvent. # The file is opened lazily on first publish and flushed after every write. # --------------------------------------------------------------------------- _DEBUG_EVENTS_RAW = os.environ.get("HIVE_DEBUG_EVENTS", "").strip() _DEBUG_EVENTS_ENABLED = _DEBUG_EVENTS_RAW.lower() in ("1", "true", "full") or ( bool(_DEBUG_EVENTS_RAW) and _DEBUG_EVENTS_RAW.lower() not in ("0", "false", "") ) def _open_event_log() -> IO[str] | None: """Open a JSONL event log file. Returns None if disabled.""" if not _DEBUG_EVENTS_ENABLED: return None raw = _DEBUG_EVENTS_RAW if raw.lower() in ("1", "true", "full"): log_dir = Path.home() / ".hive" / "event_logs" else: log_dir = Path(raw) log_dir.mkdir(parents=True, exist_ok=True) ts = datetime.now().strftime("%Y%m%d_%H%M%S") path = log_dir / f"{ts}.jsonl" logger.info("Event debug log → %s", path) return open(path, "a", encoding="utf-8") # noqa: SIM115 _event_log_file: IO[str] | None = None _event_log_ready = False # lazy init guard class EventType(StrEnum): """Types of events that can be published.""" # Execution lifecycle EXECUTION_STARTED = "execution_started" EXECUTION_COMPLETED = "execution_completed" EXECUTION_FAILED = "execution_failed" EXECUTION_PAUSED = "execution_paused" EXECUTION_RESUMED = "execution_resumed" # State changes STATE_CHANGED = "state_changed" STATE_CONFLICT = "state_conflict" # Goal tracking GOAL_PROGRESS = "goal_progress" GOAL_ACHIEVED = "goal_achieved" CONSTRAINT_VIOLATION = "constraint_violation" # Stream lifecycle STREAM_STARTED = "stream_started" STREAM_STOPPED = "stream_stopped" # Node event-loop lifecycle NODE_LOOP_STARTED = "node_loop_started" NODE_LOOP_ITERATION = "node_loop_iteration" NODE_LOOP_COMPLETED = "node_loop_completed" NODE_ACTION_PLAN = "node_action_plan" # LLM streaming observability LLM_TEXT_DELTA = "llm_text_delta" LLM_REASONING_DELTA = "llm_reasoning_delta" LLM_TURN_COMPLETE = "llm_turn_complete" # Tool lifecycle TOOL_CALL_STARTED = "tool_call_started" TOOL_CALL_COMPLETED = "tool_call_completed" # Queen/user interaction events CLIENT_OUTPUT_DELTA = "client_output_delta" CLIENT_INPUT_REQUESTED = "client_input_requested" CLIENT_INPUT_RECEIVED = "client_input_received" # Internal node observability NODE_INTERNAL_OUTPUT = "node_internal_output" NODE_INPUT_BLOCKED = "node_input_blocked" NODE_STALLED = "node_stalled" NODE_TOOL_DOOM_LOOP = "node_tool_doom_loop" # Judge decisions (implicit judge in event loop nodes) JUDGE_VERDICT = "judge_verdict" # Retry tracking NODE_RETRY = "node_retry" # Worker agent lifecycle WORKER_COMPLETED = "worker_completed" WORKER_FAILED = "worker_failed" # Context management CONTEXT_COMPACTED = "context_compacted" CONTEXT_USAGE_UPDATED = "context_usage_updated" # External triggers WEBHOOK_RECEIVED = "webhook_received" # Custom events CUSTOM = "custom" # Escalation (agent requests handoff to queen) ESCALATION_REQUESTED = "escalation_requested" # Execution resurrection (auto-restart on non-fatal failure) EXECUTION_RESURRECTED = "execution_resurrected" # Colony lifecycle (session manager → frontend) WORKER_COLONY_LOADED = "worker_colony_loaded" # Queen create_colony tool finished forking; carries colony_name + # path so the frontend can render a system message linking to the # new colony page at /colony/{colony_name}. COLONY_CREATED = "colony_created" CREDENTIALS_REQUIRED = "credentials_required" # Queen phase changes (working <-> reviewing) QUEEN_PHASE_CHANGED = "queen_phase_changed" # Queen identity — which queen profile was selected for this session QUEEN_IDENTITY_SELECTED = "queen_identity_selected" # Subagent reports (one-way progress updates from sub-agents) SUBAGENT_REPORT = "subagent_report" # Trigger lifecycle (queen-level triggers / heartbeats) TRIGGER_AVAILABLE = "trigger_available" TRIGGER_ACTIVATED = "trigger_activated" TRIGGER_DEACTIVATED = "trigger_deactivated" TRIGGER_FIRED = "trigger_fired" TRIGGER_REMOVED = "trigger_removed" TRIGGER_UPDATED = "trigger_updated" @dataclass class AgentEvent: """An event in the agent system.""" type: EventType stream_id: str node_id: str | None = None # Which node emitted this event execution_id: str | None = None data: dict[str, Any] = field(default_factory=dict) timestamp: datetime = field(default_factory=datetime.now) correlation_id: str | None = None # For tracking related events colony_id: str | None = None # Which colony emitted this event run_id: str | None = None # Unique ID per trigger() invocation — used for run dividers def to_dict(self) -> dict: """Convert to dictionary for serialization.""" d = { "type": self.type.value, "stream_id": self.stream_id, "node_id": self.node_id, "execution_id": self.execution_id, "data": self.data, "timestamp": self.timestamp.isoformat(), "correlation_id": self.correlation_id, "colony_id": self.colony_id, } if self.run_id is not None: d["run_id"] = self.run_id return d # Type for event handlers EventHandler = Callable[[AgentEvent], Awaitable[None]] @dataclass class Subscription: """A subscription to events.""" id: str event_types: set[EventType] handler: EventHandler filter_stream: str | None = None # Only receive events from this stream filter_node: str | None = None # Only receive events from this node filter_execution: str | None = None # Only receive events from this execution filter_colony: str | None = None # Only receive events from this colony class EventBus: """ Pub/sub event bus for inter-stream communication. Features: - Async event handling - Type-based subscriptions - Stream/execution filtering - Event history for debugging Example: bus = EventBus() # Subscribe to execution events async def on_execution_complete(event: AgentEvent): print(f"Execution {event.execution_id} completed") bus.subscribe( event_types=[EventType.EXECUTION_COMPLETED], handler=on_execution_complete, ) # Publish an event await bus.publish(AgentEvent( type=EventType.EXECUTION_COMPLETED, stream_id="webhook", execution_id="exec_123", data={"result": "success"}, )) """ def __init__( self, max_history: int = 1000, max_concurrent_handlers: int = 10, ): """ Initialize event bus. Args: max_history: Maximum events to keep in history max_concurrent_handlers: Maximum concurrent handler executions """ self._subscriptions: dict[str, Subscription] = {} self._event_history: list[AgentEvent] = [] self._max_history = max_history self._semaphore = asyncio.Semaphore(max_concurrent_handlers) self._subscription_counter = 0 self._lock = asyncio.Lock() # Per-session persistent event log (always-on, survives restarts) self._session_log: IO[str] | None = None self._session_log_iteration_offset: int = 0 # Accumulator for client_output_delta snapshots — flushed on llm_turn_complete. # Key: (stream_id, node_id, execution_id, iteration, inner_turn) → latest AgentEvent self._pending_output_snapshots: dict[tuple, AgentEvent] = {} def set_session_log(self, path: Path, *, iteration_offset: int = 0) -> None: """Enable per-session event persistence to a JSONL file. Called once when the queen starts so that all events survive server restarts and can be replayed to reconstruct the frontend state. ``iteration_offset`` is added to the ``iteration`` field in logged events so that cold-resumed sessions produce monotonically increasing iteration values — preventing frontend message ID collisions between the original run and resumed runs. """ if self._session_log is not None: try: self._session_log.close() except Exception: pass path.parent.mkdir(parents=True, exist_ok=True) self._session_log = open(path, "a", encoding="utf-8") # noqa: SIM115 self._session_log_iteration_offset = iteration_offset logger.info("Session event log → %s (iteration_offset=%d)", path, iteration_offset) def close_session_log(self) -> None: """Close the per-session event log file.""" # Flush any pending output snapshots before closing self._flush_pending_snapshots() if self._session_log is not None: try: self._session_log.close() except Exception: pass self._session_log = None # Event types that are high-frequency streaming deltas — accumulated rather # than written individually to the session log. _STREAMING_DELTA_TYPES = frozenset( { EventType.CLIENT_OUTPUT_DELTA, EventType.LLM_TEXT_DELTA, EventType.LLM_REASONING_DELTA, } ) def _write_session_log_event(self, event: AgentEvent) -> None: """Write an event to the per-session log with streaming coalescing. Streaming deltas (client_output_delta, llm_text_delta) are accumulated in memory. When llm_turn_complete fires, any pending snapshots for that (stream_id, node_id, execution_id) are flushed as single consolidated events before the turn-complete event itself is written. Note: iteration offset is already applied in publish() before this is called, so events here already have correct iteration values. """ if self._session_log is None: return if event.type in self._STREAMING_DELTA_TYPES: # Accumulate — keep only the latest event (which carries the full snapshot) key = ( event.stream_id, event.node_id, event.execution_id, event.data.get("iteration"), event.data.get("inner_turn", 0), ) self._pending_output_snapshots[key] = event return # On turn-complete, flush accumulated snapshots for this stream first if event.type == EventType.LLM_TURN_COMPLETE: self._flush_pending_snapshots( stream_id=event.stream_id, node_id=event.node_id, execution_id=event.execution_id, ) line = json.dumps(event.to_dict(), default=str) self._session_log.write(line + "\n") self._session_log.flush() def _flush_pending_snapshots( self, stream_id: str | None = None, node_id: str | None = None, execution_id: str | None = None, ) -> None: """Flush accumulated streaming snapshots to the session log. When called with filters, only matching entries are flushed. When called without filters (e.g. on close), everything is flushed. """ if self._session_log is None or not self._pending_output_snapshots: return to_flush: list[tuple] = [] for key, _evt in self._pending_output_snapshots.items(): if stream_id is not None: k_stream, k_node, k_exec, _, _ = key if k_stream != stream_id or k_node != node_id or k_exec != execution_id: continue to_flush.append(key) for key in to_flush: evt = self._pending_output_snapshots.pop(key) try: line = json.dumps(evt.to_dict(), default=str) self._session_log.write(line + "\n") except Exception: pass if to_flush: try: self._session_log.flush() except Exception: pass def subscribe( self, event_types: list[EventType], handler: EventHandler, filter_stream: str | None = None, filter_node: str | None = None, filter_execution: str | None = None, filter_colony: str | None = None, ) -> str: """ Subscribe to events. Args: event_types: Types of events to receive handler: Async function to call when event occurs filter_stream: Only receive events from this stream filter_node: Only receive events from this node filter_execution: Only receive events from this execution filter_colony: Only receive events from this colony Returns: Subscription ID (use to unsubscribe) """ self._subscription_counter += 1 sub_id = f"sub_{self._subscription_counter}" subscription = Subscription( id=sub_id, event_types=set(event_types), handler=handler, filter_stream=filter_stream, filter_node=filter_node, filter_execution=filter_execution, filter_colony=filter_colony, ) self._subscriptions[sub_id] = subscription logger.debug(f"Subscription {sub_id} registered for {event_types}") return sub_id def unsubscribe(self, subscription_id: str) -> bool: """ Unsubscribe from events. Args: subscription_id: ID returned from subscribe() Returns: True if subscription was found and removed """ if subscription_id in self._subscriptions: del self._subscriptions[subscription_id] logger.debug(f"Subscription {subscription_id} removed") return True return False async def publish(self, event: AgentEvent) -> None: """ Publish an event to all matching subscribers. Args: event: Event to publish """ # Apply iteration offset at the source so ALL consumers (SSE subscribers, # event history, session log) see the same monotonically increasing # iteration values. Without this, live SSE would use raw iterations # while events.jsonl would use offset iterations, causing ID collisions # on the frontend when replaying after cold resume. if self._session_log_iteration_offset and isinstance(event.data, dict) and "iteration" in event.data: offset = self._session_log_iteration_offset event.data = {**event.data, "iteration": event.data["iteration"] + offset} # Add to history async with self._lock: self._event_history.append(event) if len(self._event_history) > self._max_history: self._event_history = self._event_history[-self._max_history :] # Write event to JSONL file (gated by HIVE_DEBUG_EVENTS env var) if _DEBUG_EVENTS_ENABLED: global _event_log_file, _event_log_ready # noqa: PLW0603 if not _event_log_ready: _event_log_file = _open_event_log() _event_log_ready = True if _event_log_file is not None: try: line = json.dumps(event.to_dict(), default=str) _event_log_file.write(line + "\n") _event_log_file.flush() except Exception: pass # never break event delivery # Per-session persistent log (always-on when set_session_log was called). # Streaming deltas are coalesced: client_output_delta and llm_text_delta # are accumulated and flushed as a single snapshot event on llm_turn_complete. if self._session_log is not None: try: self._write_session_log_event(event) except Exception: pass # never break event delivery # Find matching subscriptions matching_handlers: list[EventHandler] = [] for subscription in self._subscriptions.values(): if self._matches(subscription, event): matching_handlers.append(subscription.handler) # Execute handlers concurrently if matching_handlers: await self._execute_handlers(event, matching_handlers) def _matches(self, subscription: Subscription, event: AgentEvent) -> bool: """Check if a subscription matches an event.""" # Check event type if event.type not in subscription.event_types: return False # Check stream filter if subscription.filter_stream and subscription.filter_stream != event.stream_id: return False # Check node filter if subscription.filter_node and subscription.filter_node != event.node_id: return False # Check execution filter if subscription.filter_execution and subscription.filter_execution != event.execution_id: return False # Check colony filter if subscription.filter_colony and subscription.filter_colony != event.colony_id: return False return True # Per-handler wall-clock timeout. A subscriber that deadlocks or # blocks on slow I/O would otherwise freeze the publisher (and via # ``await publish(...)`` any coroutine that emits events) indefinitely. # 15 s is generous for legitimate handlers and cheap to tune later. _HANDLER_TIMEOUT_SECONDS: float = 15.0 async def _execute_handlers( self, event: AgentEvent, handlers: list[EventHandler], ) -> None: """Execute handlers concurrently with rate limiting + hard timeout.""" async def run_handler(handler: EventHandler) -> None: async with self._semaphore: try: await asyncio.wait_for( handler(event), timeout=self._HANDLER_TIMEOUT_SECONDS, ) except TimeoutError: handler_name = getattr(handler, "__qualname__", repr(handler)) logger.error( "EventBus handler %s exceeded %.0fs on event %s — dropping; " "fix the handler or the publisher will stall", handler_name, self._HANDLER_TIMEOUT_SECONDS, getattr(event.type, "name", event.type), ) except Exception: logger.exception(f"Handler error for {event.type}") # Run all handlers concurrently await asyncio.gather(*[run_handler(h) for h in handlers], return_exceptions=True) # === CONVENIENCE PUBLISHERS === async def emit_execution_started( self, stream_id: str, execution_id: str, input_data: dict[str, Any] | None = None, correlation_id: str | None = None, run_id: str | None = None, ) -> None: """Emit execution started event.""" await self.publish( AgentEvent( type=EventType.EXECUTION_STARTED, stream_id=stream_id, execution_id=execution_id, data={"input": input_data or {}}, correlation_id=correlation_id, run_id=run_id, ) ) async def emit_execution_completed( self, stream_id: str, execution_id: str, output: dict[str, Any] | None = None, correlation_id: str | None = None, run_id: str | None = None, ) -> None: """Emit execution completed event.""" await self.publish( AgentEvent( type=EventType.EXECUTION_COMPLETED, stream_id=stream_id, execution_id=execution_id, data={"output": output or {}}, correlation_id=correlation_id, run_id=run_id, ) ) async def emit_execution_failed( self, stream_id: str, execution_id: str, error: str, correlation_id: str | None = None, run_id: str | None = None, ) -> None: """Emit execution failed event.""" await self.publish( AgentEvent( type=EventType.EXECUTION_FAILED, stream_id=stream_id, execution_id=execution_id, data={"error": error}, correlation_id=correlation_id, run_id=run_id, ) ) async def emit_goal_progress( self, stream_id: str, progress: float, criteria_status: dict[str, Any], ) -> None: """Emit goal progress event.""" await self.publish( AgentEvent( type=EventType.GOAL_PROGRESS, stream_id=stream_id, data={ "progress": progress, "criteria_status": criteria_status, }, ) ) async def emit_constraint_violation( self, stream_id: str, execution_id: str, constraint_id: str, description: str, ) -> None: """Emit constraint violation event.""" await self.publish( AgentEvent( type=EventType.CONSTRAINT_VIOLATION, stream_id=stream_id, execution_id=execution_id, data={ "constraint_id": constraint_id, "description": description, }, ) ) async def emit_state_changed( self, stream_id: str, execution_id: str, key: str, old_value: Any, new_value: Any, scope: str, ) -> None: """Emit state changed event.""" await self.publish( AgentEvent( type=EventType.STATE_CHANGED, stream_id=stream_id, execution_id=execution_id, data={ "key": key, "old_value": old_value, "new_value": new_value, "scope": scope, }, ) ) # === NODE EVENT-LOOP PUBLISHERS === async def emit_node_loop_started( self, stream_id: str, node_id: str, execution_id: str | None = None, max_iterations: int | None = None, ) -> None: """Emit node loop started event.""" await self.publish( AgentEvent( type=EventType.NODE_LOOP_STARTED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"max_iterations": max_iterations}, ) ) async def emit_node_loop_iteration( self, stream_id: str, node_id: str, iteration: int, execution_id: str | None = None, extra_data: dict[str, Any] | None = None, ) -> None: """Emit node loop iteration event.""" data: dict[str, Any] = {"iteration": iteration} if extra_data: data.update(extra_data) await self.publish( AgentEvent( type=EventType.NODE_LOOP_ITERATION, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data=data, ) ) async def emit_node_loop_completed( self, stream_id: str, node_id: str, iterations: int, execution_id: str | None = None, ) -> None: """Emit node loop completed event.""" await self.publish( AgentEvent( type=EventType.NODE_LOOP_COMPLETED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"iterations": iterations}, ) ) async def emit_node_action_plan( self, stream_id: str, node_id: str, plan: str, execution_id: str | None = None, ) -> None: """Emit node action plan event.""" await self.publish( AgentEvent( type=EventType.NODE_ACTION_PLAN, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"plan": plan}, ) ) # === LLM STREAMING PUBLISHERS === async def emit_llm_text_delta( self, stream_id: str, node_id: str, content: str, snapshot: str, execution_id: str | None = None, inner_turn: int = 0, ) -> None: """Emit LLM text delta event.""" await self.publish( AgentEvent( type=EventType.LLM_TEXT_DELTA, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"content": content, "snapshot": snapshot, "inner_turn": inner_turn}, ) ) async def emit_llm_reasoning_delta( self, stream_id: str, node_id: str, content: str, execution_id: str | None = None, ) -> None: """Emit LLM reasoning delta event.""" await self.publish( AgentEvent( type=EventType.LLM_REASONING_DELTA, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"content": content}, ) ) async def emit_llm_turn_complete( self, stream_id: str, node_id: str, stop_reason: str, model: str, input_tokens: int, output_tokens: int, cached_tokens: int = 0, execution_id: str | None = None, iteration: int | None = None, ) -> None: """Emit LLM turn completion with stop reason and model metadata.""" data: dict = { "stop_reason": stop_reason, "model": model, "input_tokens": input_tokens, "output_tokens": output_tokens, "cached_tokens": cached_tokens, } if iteration is not None: data["iteration"] = iteration await self.publish( AgentEvent( type=EventType.LLM_TURN_COMPLETE, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data=data, ) ) # === TOOL LIFECYCLE PUBLISHERS === async def emit_tool_call_started( self, stream_id: str, node_id: str, tool_use_id: str, tool_name: str, tool_input: dict[str, Any] | None = None, execution_id: str | None = None, ) -> None: """Emit tool call started event.""" await self.publish( AgentEvent( type=EventType.TOOL_CALL_STARTED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "tool_use_id": tool_use_id, "tool_name": tool_name, "tool_input": tool_input or {}, }, ) ) async def emit_tool_call_completed( self, stream_id: str, node_id: str, tool_use_id: str, tool_name: str, result: str = "", is_error: bool = False, execution_id: str | None = None, ) -> None: """Emit tool call completed event.""" await self.publish( AgentEvent( type=EventType.TOOL_CALL_COMPLETED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "tool_use_id": tool_use_id, "tool_name": tool_name, "result": result, "is_error": is_error, }, ) ) # === CLIENT I/O PUBLISHERS === async def emit_client_output_delta( self, stream_id: str, node_id: str, content: str, snapshot: str, execution_id: str | None = None, iteration: int | None = None, inner_turn: int = 0, ) -> None: """Emit user-facing output delta for interactive queen turns.""" data: dict = {"content": content, "snapshot": snapshot, "inner_turn": inner_turn} if iteration is not None: data["iteration"] = iteration await self.publish( AgentEvent( type=EventType.CLIENT_OUTPUT_DELTA, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data=data, ) ) async def emit_client_input_requested( self, stream_id: str, node_id: str, prompt: str = "", execution_id: str | None = None, options: list[str] | None = None, questions: list[dict] | None = None, ) -> None: """Emit a user-input request for interactive queen turns. Args: options: Optional predefined choices for the user (1-3 items). The frontend appends an "Other" free-text option automatically. questions: Optional list of question dicts for multi-question batches (from ask_user_multiple). Each dict has id, prompt, and optional options. """ data: dict[str, Any] = {"prompt": prompt} if options: data["options"] = options if questions: data["questions"] = questions await self.publish( AgentEvent( type=EventType.CLIENT_INPUT_REQUESTED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data=data, ) ) # === INTERNAL NODE PUBLISHERS === async def emit_node_internal_output( self, stream_id: str, node_id: str, content: str, execution_id: str | None = None, ) -> None: """Emit node internal output for non-user-facing execution.""" await self.publish( AgentEvent( type=EventType.NODE_INTERNAL_OUTPUT, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"content": content}, ) ) async def emit_node_stalled( self, stream_id: str, node_id: str, reason: str = "", execution_id: str | None = None, ) -> None: """Emit node stalled event.""" await self.publish( AgentEvent( type=EventType.NODE_STALLED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"reason": reason}, ) ) async def emit_tool_doom_loop( self, stream_id: str, node_id: str, description: str = "", execution_id: str | None = None, ) -> None: """Emit tool doom loop detection event.""" await self.publish( AgentEvent( type=EventType.NODE_TOOL_DOOM_LOOP, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"description": description}, ) ) async def emit_node_input_blocked( self, stream_id: str, node_id: str, prompt: str = "", execution_id: str | None = None, ) -> None: """Emit node input blocked event.""" await self.publish( AgentEvent( type=EventType.NODE_INPUT_BLOCKED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"prompt": prompt}, ) ) # === JUDGE / OUTPUT / RETRY / EDGE PUBLISHERS === async def emit_judge_verdict( self, stream_id: str, node_id: str, action: str, feedback: str = "", judge_type: str = "implicit", iteration: int = 0, execution_id: str | None = None, ) -> None: """Emit judge verdict event.""" await self.publish( AgentEvent( type=EventType.JUDGE_VERDICT, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "action": action, "feedback": feedback, "judge_type": judge_type, "iteration": iteration, }, ) ) async def emit_node_retry( self, stream_id: str, node_id: str, retry_count: int, max_retries: int, error: str = "", execution_id: str | None = None, ) -> None: """Emit node retry event.""" await self.publish( AgentEvent( type=EventType.NODE_RETRY, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "retry_count": retry_count, "max_retries": max_retries, "error": error, }, ) ) async def emit_worker_completed( self, stream_id: str, node_id: str, worker_id: str, success: bool, output: dict[str, Any], activations: list[dict[str, Any]] | None = None, execution_id: str | None = None, **extra_data: Any, ) -> None: """Emit worker completed event with outgoing activations.""" data: dict[str, Any] = { "worker_id": worker_id, "success": success, "output": output, "activations": activations or [], **extra_data, } await self.publish( AgentEvent( type=EventType.WORKER_COMPLETED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data=data, ) ) async def emit_worker_failed( self, stream_id: str, node_id: str, worker_id: str, error: str, execution_id: str | None = None, ) -> None: """Emit worker failed event.""" await self.publish( AgentEvent( type=EventType.WORKER_FAILED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"worker_id": worker_id, "error": error}, ) ) async def emit_execution_paused( self, stream_id: str, node_id: str, reason: str = "", execution_id: str | None = None, ) -> None: """Emit execution paused event.""" await self.publish( AgentEvent( type=EventType.EXECUTION_PAUSED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={"reason": reason}, ) ) async def emit_execution_resumed( self, stream_id: str, node_id: str, execution_id: str | None = None, ) -> None: """Emit execution resumed event.""" await self.publish( AgentEvent( type=EventType.EXECUTION_RESUMED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={}, ) ) async def emit_webhook_received( self, source_id: str, path: str, method: str, headers: dict[str, str], payload: dict[str, Any], query_params: dict[str, str] | None = None, ) -> None: """Emit webhook received event.""" await self.publish( AgentEvent( type=EventType.WEBHOOK_RECEIVED, stream_id=source_id, data={ "path": path, "method": method, "headers": headers, "payload": payload, "query_params": query_params or {}, }, ) ) async def emit_escalation_requested( self, stream_id: str, node_id: str, reason: str = "", context: str = "", execution_id: str | None = None, request_id: str | None = None, ) -> None: """Emit escalation requested event (agent wants queen). ``request_id`` is a caller-supplied handle used by the queen to address its reply back to the specific escalation. When omitted the event still fires but the queen cannot route a targeted reply. """ await self.publish( AgentEvent( type=EventType.ESCALATION_REQUESTED, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "request_id": request_id, "reason": reason, "context": context, }, ) ) async def emit_subagent_report( self, stream_id: str, node_id: str, subagent_id: str, message: str, data: dict[str, Any] | None = None, execution_id: str | None = None, ) -> None: """Emit a one-way progress report from a sub-agent.""" await self.publish( AgentEvent( type=EventType.SUBAGENT_REPORT, stream_id=stream_id, node_id=node_id, execution_id=execution_id, data={ "subagent_id": subagent_id, "message": message, "data": data, }, ) ) # === QUERY OPERATIONS === def get_history( self, event_type: EventType | None = None, stream_id: str | None = None, execution_id: str | None = None, limit: int = 100, ) -> list[AgentEvent]: """ Get event history with optional filtering. Args: event_type: Filter by event type stream_id: Filter by stream execution_id: Filter by execution limit: Maximum events to return Returns: List of matching events (most recent first) """ events = self._event_history[::-1] # Reverse for most recent first # Apply filters if event_type: events = [e for e in events if e.type == event_type] if stream_id: events = [e for e in events if e.stream_id == stream_id] if execution_id: events = [e for e in events if e.execution_id == execution_id] return events[:limit] def get_stats(self) -> dict: """Get event bus statistics.""" type_counts = {} for event in self._event_history: type_counts[event.type.value] = type_counts.get(event.type.value, 0) + 1 return { "total_events": len(self._event_history), "subscriptions": len(self._subscriptions), "events_by_type": type_counts, } # === WAITING OPERATIONS === async def wait_for( self, event_type: EventType, stream_id: str | None = None, node_id: str | None = None, execution_id: str | None = None, colony_id: str | None = None, timeout: float | None = None, ) -> AgentEvent | None: """ Wait for a specific event to occur. Args: event_type: Type of event to wait for stream_id: Filter by stream node_id: Filter by node execution_id: Filter by execution colony_id: Filter by colony timeout: Maximum time to wait (seconds) Returns: The event if received, None if timeout """ result: AgentEvent | None = None event_received = asyncio.Event() async def handler(event: AgentEvent) -> None: nonlocal result result = event event_received.set() # Subscribe sub_id = self.subscribe( event_types=[event_type], handler=handler, filter_stream=stream_id, filter_node=node_id, filter_execution=execution_id, filter_colony=colony_id, ) try: # Wait with timeout if timeout: try: await asyncio.wait_for(event_received.wait(), timeout=timeout) except TimeoutError: return None else: await event_received.wait() return result finally: self.unsubscribe(sub_id)