"""SSE event streaming route."""

import asyncio
import logging

from aiohttp import web
from aiohttp.client_exceptions import ClientConnectionResetError as _AiohttpConnReset

from framework.host.event_bus import EventType
from framework.server.app import resolve_session

logger = logging.getLogger(__name__)

# Default event types streamed to clients
DEFAULT_EVENT_TYPES = [
    EventType.CLIENT_OUTPUT_DELTA,
    EventType.CLIENT_INPUT_REQUESTED,
    EventType.CLIENT_INPUT_RECEIVED,
    EventType.LLM_TEXT_DELTA,
    EventType.TOOL_CALL_STARTED,
    EventType.TOOL_CALL_COMPLETED,
    EventType.EXECUTION_STARTED,
    EventType.EXECUTION_COMPLETED,
    EventType.EXECUTION_FAILED,
    EventType.EXECUTION_PAUSED,
    EventType.NODE_LOOP_STARTED,
    EventType.NODE_LOOP_ITERATION,
    EventType.NODE_LOOP_COMPLETED,
    EventType.LLM_TURN_COMPLETE,
    EventType.NODE_ACTION_PLAN,
    EventType.GOAL_PROGRESS,
    EventType.NODE_INTERNAL_OUTPUT,
    EventType.NODE_STALLED,
    EventType.NODE_RETRY,
    EventType.NODE_TOOL_DOOM_LOOP,
    EventType.CONTEXT_COMPACTED,
    EventType.CONTEXT_USAGE_UPDATED,
    EventType.WORKER_COLONY_LOADED,
    EventType.COLONY_CREATED,
    EventType.CREDENTIALS_REQUIRED,
    EventType.SUBAGENT_REPORT,
    EventType.QUEEN_PHASE_CHANGED,
    EventType.TRIGGER_AVAILABLE,
    EventType.TRIGGER_ACTIVATED,
    EventType.TRIGGER_DEACTIVATED,
    EventType.TRIGGER_FIRED,
    EventType.TRIGGER_REMOVED,
    EventType.TRIGGER_UPDATED,
]

# Keepalive interval in seconds
KEEPALIVE_INTERVAL = 15.0

# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
# publish high-frequency LLM deltas / tool calls that would flood the
# user's queen DM chat. We let only this small allowlist of worker
# events through to the queen-chat SSE so the frontend can render
# fan-out lifecycle and structured fan-in reports without seeing the
# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
# filter via a dedicated /workers/{worker_id}/events route.
_WORKER_EVENT_ALLOWLIST = {
    EventType.SUBAGENT_REPORT.value,
    EventType.EXECUTION_COMPLETED.value,
    EventType.EXECUTION_FAILED.value,
}


def _is_worker_noise(evt_dict: dict) -> bool:
    """True if the event is a parallel-worker event we should drop."""
    stream_id = evt_dict.get("stream_id") or ""
    if not stream_id.startswith("worker:"):
        return False
    return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST


def _parse_event_types(query_param: str | None) -> list[EventType]:
    """Parse comma-separated event type names into EventType values.

    Falls back to DEFAULT_EVENT_TYPES if param is empty or invalid.
    """
    if not query_param:
        return DEFAULT_EVENT_TYPES

    result = []
    for name in query_param.split(","):
        name = name.strip()
        try:
            result.append(EventType(name))
        except ValueError:
            logger.warning(f"Unknown event type filter: {name}")

    return result or DEFAULT_EVENT_TYPES


async def handle_events(request: web.Request) -> web.StreamResponse:
    """SSE event stream for a session.

    Query params:
        types: Comma-separated event type names to filter (optional).
    """
    session, err = resolve_session(request)
    if err:
        return err

    # Session always has an event_bus — no runtime guard needed
    event_bus = session.event_bus
    event_types = _parse_event_types(request.query.get("types"))

    # Per-client buffer queue
    queue: asyncio.Queue = asyncio.Queue(maxsize=1000)

    # Lifecycle events drive frontend state transitions and must never be lost.
    _CRITICAL_EVENTS = {
        "execution_started",
        "execution_completed",
        "execution_failed",
        "execution_paused",
        "client_input_requested",
        "client_input_received",
        "node_loop_iteration",
        "node_loop_started",
        "credentials_required",
        "worker_graph_loaded",
        "queen_phase_changed",
    }

    client_disconnected = asyncio.Event()

    async def on_event(event) -> None:
        """Push event dict into queue; drop non-critical events if full."""
        if client_disconnected.is_set():
            return

        evt_dict = event.to_dict()
        if _is_worker_noise(evt_dict):
            return
        if evt_dict.get("type") in _CRITICAL_EVENTS:
            try:
                queue.put_nowait(evt_dict)
            except asyncio.QueueFull:
                logger.warning(
                    "SSE client queue full on critical event; disconnecting session='%s'",
                    session.id,
                )
                client_disconnected.set()
        else:
            try:
                queue.put_nowait(evt_dict)
            except asyncio.QueueFull:
                pass  # high-frequency events can be dropped; client will catch up

    # Subscribe to EventBus
    from framework.server.sse import SSEResponse

    sub_id = event_bus.subscribe(
        event_types=event_types,
        handler=on_event,
    )

    sse = SSEResponse()
    await sse.prepare(request)
    logger.info("SSE connected: session='%s', sub_id='%s', types=%d", session.id, sub_id, len(event_types))

    # Replay buffered events that were published before this SSE connected.
    # The EventBus keeps a history ring-buffer; we replay the subset that
    # produces visible chat messages so the frontend never misses early
    # queen output.  Execution/node lifecycle events are NOT replayed to
    # avoid duplicate state transitions (turn counter increments, etc.).
    #
    # Trigger lifecycle events ARE replayed: they're idempotent state
    # setters (this trigger exists / is active / was deactivated) and
    # they're published during session load — BEFORE the frontend's
    # SSE subscription is established. Without replay, a freshly-opened
    # colony would never see its own triggers.
    _REPLAY_TYPES = {
        EventType.CLIENT_OUTPUT_DELTA.value,
        EventType.EXECUTION_STARTED.value,
        EventType.CLIENT_INPUT_REQUESTED.value,
        EventType.CLIENT_INPUT_RECEIVED.value,
        EventType.TRIGGER_AVAILABLE.value,
        EventType.TRIGGER_ACTIVATED.value,
        EventType.TRIGGER_DEACTIVATED.value,
        EventType.TRIGGER_REMOVED.value,
        EventType.TRIGGER_UPDATED.value,
    }
    event_type_values = {et.value for et in event_types}
    replay_types = _REPLAY_TYPES & event_type_values
    replayed = 0
    for past_event in event_bus._event_history:
        if past_event.type.value in replay_types:
            past_dict = past_event.to_dict()
            if _is_worker_noise(past_dict):
                continue
            try:
                queue.put_nowait(past_dict)
                replayed += 1
            except asyncio.QueueFull:
                break
    if replayed:
        logger.info("SSE replayed %d buffered events for session='%s'", replayed, session.id)

    # Live status is surfaced via the EventBus ring-buffer replay above
    # (executed earlier in this handler).  The old graph-executor snapshot
    # injection was removed when graph execution was retired -- the
    # AgentLoop publishes its own lifecycle events to the EventBus.

    event_count = 0
    close_reason = "unknown"
    try:
        while not client_disconnected.is_set():
            try:
                data = await asyncio.wait_for(queue.get(), timeout=KEEPALIVE_INTERVAL)
                await sse.send_event(data)
                event_count += 1
                if event_count == 1:
                    logger.info("SSE first event: session='%s', type='%s'", session.id, data.get("type"))
            except TimeoutError:
                try:
                    await sse.send_keepalive()
                except (ConnectionResetError, ConnectionError, _AiohttpConnReset):
                    close_reason = "client_disconnected"
                    break
                except Exception as exc:
                    close_reason = f"keepalive_error: {exc}"
                    break
            except (ConnectionResetError, ConnectionError, _AiohttpConnReset):
                close_reason = "client_disconnected"
                break
            except RuntimeError as exc:
                if "closing transport" in str(exc).lower():
                    close_reason = "client_disconnected"
                else:
                    close_reason = f"error: {exc}"
                break
            except Exception as exc:
                close_reason = f"error: {exc}"
                break

        if client_disconnected.is_set() and close_reason == "unknown":
            close_reason = "slow_client"
    except asyncio.CancelledError:
        close_reason = "cancelled"
    finally:
        try:
            event_bus.unsubscribe(sub_id)
        except Exception:
            pass
        logger.info(
            "SSE disconnected: session='%s', events_sent=%d, reason='%s'",
            session.id,
            event_count,
            close_reason,
        )

    return sse.response


def register_routes(app: web.Application) -> None:
    """Register SSE event streaming routes."""
    # Session-primary route
    app.router.add_get("/api/sessions/{session_id}/events", handle_events)