Files
hive/core/framework/server/routes_sessions.py
T
2026-04-28 19:16:35 -07:00

1231 lines
46 KiB
Python

"""Session lifecycle and session info routes.
Session-primary routes:
- POST /api/sessions — create session (with or without worker)
- GET /api/sessions — list all active sessions
- GET /api/sessions/{session_id} — session detail
- DELETE /api/sessions/{session_id} — stop session entirely
- POST /api/sessions/{session_id}/colony — load a colony into session
- DELETE /api/sessions/{session_id}/colony — unload colony from session
- GET /api/sessions/{session_id}/stats — runtime statistics
- GET /api/sessions/{session_id}/entry-points — list entry points
- PATCH /api/sessions/{session_id}/triggers/{id} — update trigger task
- POST /api/sessions/{session_id}/triggers/{id}/run — fire trigger once (manual)
- GET /api/sessions/{session_id}/colonies — list colony IDs
- GET /api/sessions/{session_id}/events/history — persisted eventbus log (for replay)
"""
import asyncio
import contextlib
import json
import logging
import shutil
import subprocess
import sys
import time
from pathlib import Path
from aiohttp import web
from framework.server.app import (
resolve_session,
validate_agent_path,
)
from framework.server.session_manager import SessionManager
logger = logging.getLogger(__name__)
def _get_manager(request: web.Request) -> SessionManager:
return request.app["manager"]
def _session_to_live_dict(session) -> dict:
"""Serialize a live Session to the session-primary JSON shape."""
from framework.llm.capabilities import supports_image_tool_results
info = session.worker_info
phase_state = getattr(session, "phase_state", None)
queen_model: str = getattr(getattr(session, "runner", None), "model", "") or ""
return {
"session_id": session.id,
"colony_id": session.colony_id,
"colony_name": info.name if info else session.colony_id,
"has_worker": session.colony_runtime is not None,
"agent_path": str(session.worker_path) if session.worker_path else "",
"description": info.description if info else "",
"goal": info.goal_name if info else "",
"node_count": info.node_count if info else 0,
"loaded_at": session.loaded_at,
"uptime_seconds": round(time.time() - session.loaded_at, 1),
"intro_message": getattr(session.runner, "intro_message", "") or "",
"queen_phase": phase_state.phase if phase_state else ("staging" if session.colony_runtime else "planning"),
"queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True,
"queen_id": getattr(phase_state, "queen_id", None) if phase_state else None,
"queen_name": (phase_state.queen_profile or {}).get("name") if phase_state else None,
"colony_spawned": getattr(session, "colony_spawned", False),
"spawned_colony_name": getattr(session, "spawned_colony_name", None),
}
def _credential_error_response(exc: Exception, agent_path: str | None) -> web.Response | None:
"""If *exc* is a CredentialError, return a 424 with structured credential info.
Returns None if *exc* is not a credential error (caller should handle it).
Uses the CredentialValidationResult attached by validate_agent_credentials.
"""
from framework.credentials.models import CredentialError
if not isinstance(exc, CredentialError):
return None
from framework.server.routes_credentials import _status_to_dict
# Prefer the structured validation result attached to the exception
validation_result = getattr(exc, "validation_result", None)
if validation_result is not None:
required = [_status_to_dict(c) for c in validation_result.failed]
else:
# Fallback for exceptions without a validation result
required = []
return web.json_response(
{
"error": "credentials_required",
"message": str(exc),
"agent_path": agent_path or "",
"required": required,
},
status=424,
)
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
async def handle_create_session(request: web.Request) -> web.Response:
"""POST /api/sessions — create a session.
Body: {
"agent_path": "..." (optional — if provided, creates session with colony),
"agent_id": "..." (optional — colony ID override),
"session_id": "..." (optional — custom session ID),
"model": "..." (optional),
"initial_prompt": "..." (optional — first user message for the queen),
"initial_phase": "..." (optional — "independent" for standalone queen),
}
When agent_path is provided, creates a session with a colony in one step
(equivalent to the old POST /api/agents). Otherwise creates a queen-only
session that can later have a colony loaded via POST /sessions/{id}/colony.
"""
from framework.agents.queen.queen_profiles import ensure_default_queens, load_queen_profile
from framework.tools.queen_lifecycle_tools import QUEEN_PHASES
manager = _get_manager(request)
if request.can_read_body:
try:
body = await request.json()
except json.JSONDecodeError:
return web.json_response({"error": "Invalid JSON body"}, status=400)
if not isinstance(body, dict):
return web.json_response({"error": "Request body must be a JSON object"}, status=400)
else:
body = {}
agent_path = body.get("agent_path")
agent_id = body.get("agent_id")
session_id = body.get("session_id")
model = body.get("model")
initial_prompt = body.get("initial_prompt")
queen_resume_from = body.get("queen_resume_from")
queen_name = body.get("queen_name")
initial_phase = body.get("initial_phase")
worker_name = body.get("worker_name")
if initial_phase is not None and initial_phase not in QUEEN_PHASES:
return web.json_response(
{
"error": f"Invalid initial_phase '{initial_phase}'",
"valid": sorted(QUEEN_PHASES),
},
status=400,
)
if queen_name:
ensure_default_queens()
try:
load_queen_profile(queen_name)
except FileNotFoundError:
return web.json_response({"error": f"Queen '{queen_name}' not found"}, status=404)
if agent_path:
try:
agent_path = str(validate_agent_path(agent_path))
except ValueError as e:
return web.json_response({"error": str(e)}, status=400)
try:
if agent_path:
session = await manager.create_session_with_worker_colony(
agent_path,
agent_id=agent_id,
session_id=session_id,
model=model,
initial_prompt=initial_prompt,
queen_resume_from=queen_resume_from,
queen_name=queen_name,
initial_phase=initial_phase,
worker_name=worker_name,
)
else:
# Queen-only session
session = await manager.create_session(
session_id=session_id,
model=model,
initial_prompt=initial_prompt,
queen_resume_from=queen_resume_from,
queen_name=queen_name,
initial_phase=initial_phase,
)
except ValueError as e:
msg = str(e)
if "currently loading" in msg:
resolved_id = agent_id or (Path(agent_path).name if agent_path else "")
return web.json_response(
{"error": msg, "colony_id": resolved_id, "loading": True},
status=409,
)
return web.json_response({"error": msg}, status=409)
except FileNotFoundError:
return web.json_response(
{"error": f"Agent not found: {agent_path or 'no path'}"},
status=404,
)
except Exception as e:
resp = _credential_error_response(e, agent_path)
if resp is not None:
return resp
logger.exception("Error creating session: %s", e)
return web.json_response({"error": "Internal server error"}, status=500)
return web.json_response(_session_to_live_dict(session), status=201)
async def handle_list_live_sessions(request: web.Request) -> web.Response:
"""GET /api/sessions — list all active sessions."""
manager = _get_manager(request)
sessions = [_session_to_live_dict(s) for s in manager.list_sessions()]
return web.json_response({"sessions": sessions})
async def handle_get_live_session(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id} — get session detail.
Falls back to cold session metadata (HTTP 200 with ``cold: true``) when the
session is not alive in memory but queen conversation files exist on disk.
This lets the frontend detect a server restart and restore message history.
"""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
session = manager.get_session(session_id)
if session is None:
if manager.is_loading(session_id):
return web.json_response(
{"session_id": session_id, "loading": True},
status=202,
)
# Check if conversation files survived on disk (post-restart scenario)
cold_info = SessionManager.get_cold_session_info(session_id)
if cold_info is not None:
return web.json_response(cold_info)
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
data = _session_to_live_dict(session)
if session.colony_runtime:
rt = session.colony_runtime
data["entry_points"] = [
{
"id": ep.id,
"name": ep.name,
"entry_node": ep.entry_node,
"trigger_type": ep.trigger_type,
"trigger_config": ep.trigger_config,
**({"next_fire_in": nf} if (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}),
}
for ep in rt.get_entry_points()
]
# Append triggers from triggers.json (stored on session)
runner = getattr(session, "runner", None)
colony_entry = runner.graph.entry_node if runner else ""
for t in getattr(session, "available_triggers", {}).values():
entry = {
"id": t.id,
"name": t.description or t.id,
"entry_node": colony_entry,
"trigger_type": t.trigger_type,
"trigger_config": t.trigger_config,
"task": t.task,
}
mono = getattr(session, "trigger_next_fire", {}).get(t.id)
if mono is not None:
remaining = max(0.0, mono - time.monotonic())
entry["next_fire_in"] = remaining
entry["next_fire_at"] = int((time.time() + remaining) * 1000)
stats = getattr(session, "trigger_fire_stats", {}).get(t.id)
if stats:
entry["fire_count"] = stats.get("fire_count", 0)
if stats.get("last_fired_at") is not None:
entry["last_fired_at"] = stats["last_fired_at"]
data["entry_points"].append(entry)
data["colonies"] = session.colony_runtime.list_graphs()
return web.json_response(data)
async def handle_stop_session(request: web.Request) -> web.Response:
"""DELETE /api/sessions/{session_id} — stop a session entirely."""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
stopped = await manager.stop_session(session_id)
if not stopped:
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
return web.json_response({"session_id": session_id, "stopped": True})
# ------------------------------------------------------------------
# Colony lifecycle
# ------------------------------------------------------------------
async def handle_load_colony(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/colony — load a colony into a session.
Body: {"agent_path": "...", "colony_id": "..." (optional), "model": "..." (optional)}
"""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
body = await request.json()
agent_path = body.get("agent_path")
if not agent_path:
return web.json_response({"error": "agent_path is required"}, status=400)
try:
agent_path = str(validate_agent_path(agent_path))
except ValueError as e:
return web.json_response({"error": str(e)}, status=400)
colony_id = body.get("colony_id")
model = body.get("model")
try:
session = await manager.load_colony(
session_id,
agent_path,
colony_id=colony_id,
model=model,
)
except ValueError as e:
return web.json_response({"error": str(e)}, status=409)
except FileNotFoundError:
return web.json_response({"error": f"Agent not found: {agent_path}"}, status=404)
except Exception as e:
resp = _credential_error_response(e, agent_path)
if resp is not None:
return resp
logger.exception("Error loading colony: %s", e)
return web.json_response({"error": "Internal server error"}, status=500)
return web.json_response(_session_to_live_dict(session))
async def handle_unload_colony(request: web.Request) -> web.Response:
"""DELETE /api/sessions/{session_id}/colony — unload colony, keep queen alive."""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
removed = await manager.unload_colony(session_id)
if not removed:
session = manager.get_session(session_id)
if session is None:
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
return web.json_response(
{"error": "No colony loaded in this session"},
status=409,
)
return web.json_response({"session_id": session_id, "colony_unloaded": True})
# ------------------------------------------------------------------
# Session info (worker details)
# ------------------------------------------------------------------
async def handle_session_stats(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id}/stats — runtime statistics."""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
session = manager.get_session(session_id)
if session is None:
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
stats = session.colony_runtime.get_stats() if session.colony_runtime else {}
return web.json_response(stats)
async def handle_session_entry_points(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id}/entry-points — list entry points."""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
session = manager.get_session(session_id)
if session is None:
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
rt = session.colony_runtime
eps = rt.get_entry_points() if rt else []
entry_points = [
{
"id": ep.id,
"name": ep.name,
"entry_node": ep.entry_node,
"trigger_type": ep.trigger_type,
"trigger_config": ep.trigger_config,
**({"next_fire_in": nf} if rt and (nf := rt.get_timer_next_fire_in(ep.id)) is not None else {}),
}
for ep in eps
]
# Append triggers from triggers.json (stored on session)
runner = getattr(session, "runner", None)
colony_entry = runner.graph.entry_node if runner else ""
for t in getattr(session, "available_triggers", {}).values():
entry = {
"id": t.id,
"name": t.description or t.id,
"entry_node": colony_entry,
"trigger_type": t.trigger_type,
"trigger_config": t.trigger_config,
"task": t.task,
}
mono = getattr(session, "trigger_next_fire", {}).get(t.id)
if mono is not None:
remaining = max(0.0, mono - time.monotonic())
entry["next_fire_in"] = remaining
entry["next_fire_at"] = int((time.time() + remaining) * 1000)
stats = getattr(session, "trigger_fire_stats", {}).get(t.id)
if stats:
entry["fire_count"] = stats.get("fire_count", 0)
if stats.get("last_fired_at") is not None:
entry["last_fired_at"] = stats["last_fired_at"]
entry_points.append(entry)
return web.json_response({"entry_points": entry_points})
async def handle_update_trigger_task(request: web.Request) -> web.Response:
"""PATCH /api/sessions/{session_id}/triggers/{trigger_id} — update trigger fields."""
session, err = resolve_session(request)
if err:
return err
trigger_id = request.match_info["trigger_id"]
available = getattr(session, "available_triggers", {})
tdef = available.get(trigger_id)
if tdef is None:
return web.json_response(
{"error": f"Trigger '{trigger_id}' not found"},
status=404,
)
try:
body = await request.json()
except Exception:
return web.json_response({"error": "Invalid JSON body"}, status=400)
updates: dict[str, object] = {}
if "task" in body:
task = body.get("task")
if not isinstance(task, str):
return web.json_response({"error": "'task' must be a string"}, status=400)
tdef.task = task
updates["task"] = tdef.task
trigger_config_update = body.get("trigger_config")
if trigger_config_update is not None:
if not isinstance(trigger_config_update, dict):
return web.json_response(
{"error": "'trigger_config' must be an object"},
status=400,
)
merged_trigger_config = dict(tdef.trigger_config)
merged_trigger_config.update(trigger_config_update)
if tdef.trigger_type == "timer":
cron_expr = merged_trigger_config.get("cron")
interval = merged_trigger_config.get("interval_minutes")
if cron_expr is not None and not isinstance(cron_expr, str):
return web.json_response(
{"error": "'trigger_config.cron' must be a string"},
status=400,
)
if cron_expr:
try:
from croniter import croniter
if not croniter.is_valid(cron_expr):
return web.json_response(
{"error": f"Invalid cron expression: {cron_expr}"},
status=400,
)
except ImportError:
return web.json_response(
{"error": ("croniter package not installed — cannot validate cron expression.")},
status=500,
)
merged_trigger_config.pop("interval_minutes", None)
elif interval is None:
return web.json_response(
{"error": ("Timer trigger needs 'cron' or 'interval_minutes' in trigger_config.")},
status=400,
)
elif not isinstance(interval, (int, float)) or interval <= 0:
return web.json_response(
{"error": "'trigger_config.interval_minutes' must be > 0"},
status=400,
)
tdef.trigger_config = merged_trigger_config
updates["trigger_config"] = tdef.trigger_config
if not updates:
return web.json_response(
{"error": "Provide at least one of 'task' or 'trigger_config'"},
status=400,
)
# Persist to session state and agent definition
from framework.tools.queen_lifecycle_tools import (
_persist_active_triggers,
_save_trigger_to_agent,
_start_trigger_timer,
_start_trigger_webhook,
)
if "trigger_config" in updates and trigger_id in getattr(session, "active_trigger_ids", set()):
task = session.active_timer_tasks.pop(trigger_id, None)
if task and not task.done():
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
getattr(session, "trigger_next_fire", {}).pop(trigger_id, None)
webhook_subs = getattr(session, "active_webhook_subs", {})
if sub_id := webhook_subs.pop(trigger_id, None):
with contextlib.suppress(Exception):
session.event_bus.unsubscribe(sub_id)
if tdef.trigger_type == "timer":
await _start_trigger_timer(session, trigger_id, tdef)
elif tdef.trigger_type == "webhook":
await _start_trigger_webhook(session, trigger_id, tdef)
if trigger_id in getattr(session, "active_trigger_ids", set()):
session_id = request.match_info["session_id"]
await _persist_active_triggers(session, session_id)
_save_trigger_to_agent(session, trigger_id, tdef)
# Emit SSE event so the frontend updates the colony and detail panel
bus = getattr(session, "event_bus", None)
if bus:
from framework.host.event_bus import AgentEvent, EventType
await bus.publish(
AgentEvent(
type=EventType.TRIGGER_UPDATED,
stream_id="queen",
data={
"trigger_id": trigger_id,
"task": tdef.task,
"trigger_config": tdef.trigger_config,
"trigger_type": tdef.trigger_type,
"name": tdef.description or trigger_id,
"entry_node": getattr(
getattr(getattr(session, "runner", None), "graph", None),
"entry_node",
None,
),
},
)
)
return web.json_response(
{
"trigger_id": trigger_id,
"task": tdef.task,
"trigger_config": tdef.trigger_config,
}
)
async def handle_run_trigger(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/triggers/{trigger_id}/run — fire the trigger once.
Manual invocation for testing. Works whether the trigger is active or
inactive; does not change active state and does not reset the scheduled
next-fire time of an active timer.
"""
session, err = resolve_session(request)
if err:
return err
trigger_id = request.match_info["trigger_id"]
tdef = getattr(session, "available_triggers", {}).get(trigger_id)
if tdef is None:
return web.json_response(
{"error": f"Trigger '{trigger_id}' not found"},
status=404,
)
if getattr(session, "colony_runtime", None) is None:
return web.json_response({"error": "Colony not loaded"}, status=409)
executor = getattr(session, "queen_executor", None)
queen_node = getattr(executor, "node_registry", {}).get("queen") if executor else None
if queen_node is None:
return web.json_response({"error": "Queen not ready"}, status=409)
from framework.agent_loop.agent_loop import TriggerEvent
try:
await queen_node.inject_trigger(
TriggerEvent(
trigger_type=tdef.trigger_type,
source_id=trigger_id,
payload={
"task": tdef.task or "",
"trigger_config": tdef.trigger_config,
"forced": True,
},
)
)
except Exception as exc: # noqa: BLE001
return web.json_response(
{"error": f"Failed to fire trigger: {exc}"},
status=500,
)
from framework.tools.queen_lifecycle_tools import _emit_trigger_fired
await _emit_trigger_fired(session, trigger_id, tdef.trigger_type)
return web.json_response({"status": "fired", "trigger_id": trigger_id})
async def handle_activate_trigger(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/triggers/{trigger_id}/activate — start a trigger."""
session, err = resolve_session(request)
if err:
return err
trigger_id = request.match_info["trigger_id"]
available = getattr(session, "available_triggers", {})
tdef = available.get(trigger_id)
if tdef is None:
return web.json_response(
{"error": f"Trigger '{trigger_id}' not found"},
status=404,
)
if trigger_id in getattr(session, "active_trigger_ids", set()):
return web.json_response({"status": "already_active", "trigger_id": trigger_id})
from framework.tools.queen_lifecycle_tools import (
_persist_active_triggers,
_start_trigger_timer,
_start_trigger_webhook,
)
try:
if tdef.trigger_type == "timer":
await _start_trigger_timer(session, trigger_id, tdef)
elif tdef.trigger_type == "webhook":
await _start_trigger_webhook(session, trigger_id, tdef)
else:
return web.json_response(
{"error": f"Unsupported trigger type: {tdef.trigger_type}"},
status=400,
)
except Exception as exc: # noqa: BLE001
return web.json_response(
{"error": f"Failed to start trigger: {exc}"},
status=500,
)
tdef.active = True
session.active_trigger_ids.add(trigger_id)
session_id = request.match_info["session_id"]
await _persist_active_triggers(session, session_id)
bus = getattr(session, "event_bus", None)
if bus:
from framework.host.event_bus import AgentEvent, EventType
runner = getattr(session, "runner", None)
colony_entry = runner.graph.entry_node if runner else None
config_out = dict(tdef.trigger_config)
mono = getattr(session, "trigger_next_fire", {}).get(trigger_id)
if mono is not None:
remaining = max(0.0, mono - time.monotonic())
config_out["next_fire_in"] = remaining
config_out["next_fire_at"] = int((time.time() + remaining) * 1000)
stats = getattr(session, "trigger_fire_stats", {}).get(trigger_id)
if stats:
config_out["fire_count"] = stats.get("fire_count", 0)
if stats.get("last_fired_at") is not None:
config_out["last_fired_at"] = stats["last_fired_at"]
await bus.publish(
AgentEvent(
type=EventType.TRIGGER_ACTIVATED,
stream_id="queen",
data={
"trigger_id": trigger_id,
"trigger_type": tdef.trigger_type,
"trigger_config": config_out,
"name": tdef.description or trigger_id,
**({"entry_node": colony_entry} if colony_entry else {}),
},
)
)
return web.json_response({"status": "activated", "trigger_id": trigger_id})
async def handle_deactivate_trigger(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/triggers/{trigger_id}/deactivate — stop a trigger.
Cancels the running timer / webhook subscription but KEEPS the trigger
definition in triggers.json so the user can re-activate later.
"""
session, err = resolve_session(request)
if err:
return err
trigger_id = request.match_info["trigger_id"]
if trigger_id not in getattr(session, "active_trigger_ids", set()):
return web.json_response({"status": "already_inactive", "trigger_id": trigger_id})
task = session.active_timer_tasks.pop(trigger_id, None)
if task and not task.done():
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
getattr(session, "trigger_next_fire", {}).pop(trigger_id, None)
webhook_subs = getattr(session, "active_webhook_subs", {})
if sub_id := webhook_subs.pop(trigger_id, None):
with contextlib.suppress(Exception):
session.event_bus.unsubscribe(sub_id)
session.active_trigger_ids.discard(trigger_id)
available = getattr(session, "available_triggers", {})
tdef = available.get(trigger_id)
if tdef:
tdef.active = False
from framework.tools.queen_lifecycle_tools import _persist_active_triggers
session_id = request.match_info["session_id"]
await _persist_active_triggers(session, session_id)
bus = getattr(session, "event_bus", None)
if bus:
from framework.host.event_bus import AgentEvent, EventType
await bus.publish(
AgentEvent(
type=EventType.TRIGGER_DEACTIVATED,
stream_id="queen",
data={
"trigger_id": trigger_id,
"name": (tdef.description or trigger_id) if tdef else trigger_id,
},
)
)
return web.json_response({"status": "deactivated", "trigger_id": trigger_id})
async def handle_session_colonies(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id}/colonies — list loaded colonies."""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
session = manager.get_session(session_id)
if session is None:
return web.json_response(
{"error": f"Session '{session_id}' not found"},
status=404,
)
colonies = session.colony_runtime.list_graphs() if session.colony_runtime else []
return web.json_response({"colonies": colonies})
_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
_EVENTS_HISTORY_MAX_LIMIT = 10000
# Files at or below this size use the simple forward-scan path (cheap enough
# that the seek-backward dance isn't worth it). Above this threshold we read
# the tail directly from end-of-file so a 50 MB log doesn't have to be paged
# through entirely just to surface the last 2000 lines.
_EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES = 1 << 20 # 1 MB
_EVENTS_HISTORY_REVERSE_TAIL_CHUNK_BYTES = 64 * 1024
def _read_events_tail(events_path: Path, limit: int) -> tuple[list[dict], int, bool]:
"""Read the tail of an append-only JSONL events log.
Returns ``(events, total, truncated)``. ``events`` is at most ``limit``
lines, oldest-first. ``total`` is the total number of non-blank lines in
the file (exact for the small-file path, exact for the large-file path
too — we do a separate fast newline-count pass).
Two paths:
- Small files (< ~1 MB): forward scan. Cheap; gives an exact total for
free. Defers ``json.loads`` to the bounded deque so we never parse a
line that's about to be dropped.
- Large files: seek to EOF and read backward in 64 KB chunks until we have
at least ``limit`` complete lines. Parses only the tail. ``total`` is
counted by a separate forward byte-scan that just counts newlines —
no JSON parse — so it stays cheap even for huge files.
Without these optimizations, mounting the chat for a long-running queen
with a ~50 k-event log used to spend most of its time inside ``json.loads``
on the server thread (and block the event loop while doing it).
"""
from collections import deque
file_size = events_path.stat().st_size
if file_size <= _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES:
tail_raw: deque[str] = deque(maxlen=limit)
total = 0
with open(events_path, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
total += 1
tail_raw.append(line)
events: list[dict] = []
for raw in tail_raw:
try:
events.append(json.loads(raw))
except json.JSONDecodeError:
continue
return events, total, total > len(events)
# Large-file path: read backward until we have enough lines.
import os as _os
chunk_size = _EVENTS_HISTORY_REVERSE_TAIL_CHUNK_BYTES
pieces: list[bytes] = []
newline_count = 0
with open(events_path, "rb") as fb:
fb.seek(0, _os.SEEK_END)
pos = fb.tell()
while pos > 0 and newline_count <= limit:
read_size = min(chunk_size, pos)
pos -= read_size
fb.seek(pos)
chunk = fb.read(read_size)
newline_count += chunk.count(b"\n")
pieces.append(chunk)
pieces.reverse()
blob = b"".join(pieces)
# Drop the leading partial line unless we read from offset 0.
raw_lines = blob.split(b"\n")
if pos > 0 and raw_lines:
raw_lines = raw_lines[1:]
decoded = [ln.decode("utf-8", errors="replace").strip() for ln in raw_lines]
decoded = [ln for ln in decoded if ln]
if len(decoded) > limit:
decoded = decoded[-limit:]
events = []
for raw in decoded:
try:
events.append(json.loads(raw))
except json.JSONDecodeError:
continue
# Separate fast pass for total: count newlines only, no JSON parse.
total = 0
with open(events_path, "rb") as fb:
while True:
chunk = fb.read(1 << 20)
if not chunk:
break
total += chunk.count(b"\n")
# File may end without a trailing newline; if so, the last non-empty line
# was missed. Count it.
if file_size > 0:
with open(events_path, "rb") as fb:
fb.seek(-1, _os.SEEK_END)
if fb.read(1) != b"\n":
total += 1
return events, total, total > len(events)
async def handle_session_events_history(request: web.Request) -> web.Response:
"""GET /api/sessions/{session_id}/events/history — persisted eventbus log.
Reads ``events.jsonl`` from the session directory on disk so it works for
both live sessions and cold (post-server-restart) sessions. The frontend
replays these events through ``sseEventToChatMessage`` to fully reconstruct
the UI state on resume.
Query params:
limit: maximum number of events to return (default 2000, max 10000).
The TAIL of the file is returned — i.e. the most recent N events.
Older events are dropped and ``truncated`` is set to True.
Response shape::
{
"events": [...], # up to ``limit`` events, oldest-first
"session_id": "...",
"total": 12345, # total events in the file
"returned": 2000, # len(events)
"truncated": true, # total > returned
"limit": 2000, # the effective limit used
}
``events.jsonl`` is append-only chronological, so "last N lines" == "most
recent N events". Long-running colonies have produced files with 50k+
events; before this cap, restoring on page-mount shipped the whole thing
down the wire and blocked the UI for seconds.
The actual file read runs in a worker thread via ``asyncio.to_thread`` so
it doesn't block the event loop while other requests are in flight.
"""
session_id = request.match_info["session_id"]
try:
limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
except ValueError:
limit = _EVENTS_HISTORY_DEFAULT_LIMIT
limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
from framework.server.session_manager import _find_queen_session_dir
queen_dir = _find_queen_session_dir(session_id)
events_path = queen_dir / "events.jsonl"
if not events_path.exists():
return web.json_response(
{
"events": [],
"session_id": session_id,
"total": 0,
"returned": 0,
"truncated": False,
"limit": limit,
}
)
try:
events, total, truncated = await asyncio.to_thread(_read_events_tail, events_path, limit)
except OSError:
return web.json_response(
{
"events": [],
"session_id": session_id,
"total": 0,
"returned": 0,
"truncated": False,
"limit": limit,
}
)
return web.json_response(
{
"events": events,
"session_id": session_id,
"total": total,
"returned": len(events),
"truncated": truncated,
"limit": limit,
}
)
async def handle_session_history(request: web.Request) -> web.Response:
"""GET /api/sessions/history — all queen sessions on disk (live + cold).
Returns every queen session directory on disk, newest first.
Live sessions have ``live: true, cold: false``; sessions that survived a
server restart have ``live: false, cold: true``.
"""
manager = _get_manager(request)
live_sessions = {s.id: s for s in manager.list_sessions()}
disk_sessions = SessionManager.list_cold_sessions()
for s in disk_sessions:
if s["session_id"] in live_sessions:
live = live_sessions[s["session_id"]]
s["cold"] = False
s["live"] = True
# Fill in agent_name from live memory if meta.json wasn't written yet
if not s.get("agent_name") and live.worker_info:
s["agent_name"] = live.worker_info.name
if not s.get("agent_path") and live.worker_path:
s["agent_path"] = str(live.worker_path)
return web.json_response({"sessions": disk_sessions})
async def handle_delete_history_session(request: web.Request) -> web.Response:
"""DELETE /api/sessions/history/{session_id} — permanently remove a session.
Stops the live session (if still running) and deletes the queen session
directory from disk.
This is the frontend 'delete from history' action.
"""
manager = _get_manager(request)
session_id = request.match_info["session_id"]
# Stop the live session if it exists (best-effort)
if manager.get_session(session_id):
await manager.stop_session(session_id)
# Delete the queen session directory from disk
from framework.server.session_manager import _find_queen_session_dir
queen_session_dir = _find_queen_session_dir(session_id)
if queen_session_dir.exists() and queen_session_dir.is_dir():
try:
shutil.rmtree(queen_session_dir)
except OSError as e:
logger.warning("Failed to delete session directory %s: %s", queen_session_dir, e)
return web.json_response({"error": f"Failed to delete session: {e}"}, status=500)
return web.json_response({"deleted": session_id})
# ------------------------------------------------------------------
# Agent discovery (not session-specific)
# ------------------------------------------------------------------
async def handle_discover(request: web.Request) -> web.Response:
"""GET /api/discover — discover agents from filesystem."""
from framework.agents.discovery import discover_agents
manager = _get_manager(request)
loaded_paths = {str(s.worker_path) for s in manager.list_sessions() if s.worker_path}
groups = discover_agents()
result = {}
for category, entries in groups.items():
result[category] = [
{
"path": str(entry.path.resolve()),
"name": entry.name,
"description": entry.description,
"category": entry.category,
"session_count": entry.session_count,
"run_count": entry.run_count,
"node_count": entry.node_count,
"tool_count": entry.tool_count,
"tags": entry.tags,
"last_active": entry.last_active,
"created_at": entry.created_at,
"icon": entry.icon,
"is_loaded": str(entry.path.resolve()) in loaded_paths,
"workers": [w.to_dict() for w in entry.workers],
}
for entry in entries
]
return web.json_response(result)
async def handle_delete_agent(request: web.Request) -> web.Response:
"""DELETE /api/agents — permanently remove an agent from disk.
Body: {"agent_path": "exports/my_agent"}
Stops any live sessions for this agent, then deletes the agent
directory so it no longer appears in /discover.
"""
manager = _get_manager(request)
body = await request.json()
agent_path = body.get("agent_path")
if not agent_path:
return web.json_response({"error": "agent_path is required"}, status=400)
try:
resolved = validate_agent_path(agent_path)
except ValueError as exc:
return web.json_response({"error": str(exc)}, status=400)
# Reject deletion of framework agents ($HIVE_HOME/agents/) — those are internal
from framework.config import HIVE_HOME
hive_agents_dir = HIVE_HOME / "agents"
if resolved.is_relative_to(hive_agents_dir):
return web.json_response({"error": "Cannot delete framework agents"}, status=403)
# Stop any live sessions that use this agent
for session in list(manager.list_sessions()):
if session.worker_path and str(session.worker_path) == str(resolved):
try:
await manager.stop_session(session.id)
except Exception:
pass
# Delete the agent directory from disk
if resolved.exists() and resolved.is_dir():
try:
shutil.rmtree(resolved)
except OSError as e:
return web.json_response({"error": f"Failed to delete agent directory: {e}"}, status=500)
return web.json_response({"deleted": str(resolved)})
async def handle_reveal_session_folder(request: web.Request) -> web.Response:
"""POST /api/sessions/{session_id}/reveal — open session data folder in the OS file manager."""
manager: SessionManager = request.app["manager"]
session_id = request.match_info["session_id"]
session = manager.get_session(session_id)
storage_session_id = (session.queen_resume_from or session.id) if session else session_id
if session:
from framework.server.session_manager import _queen_session_dir
folder = _queen_session_dir(storage_session_id, session.queen_name)
else:
from framework.server.session_manager import _find_queen_session_dir
folder = _find_queen_session_dir(storage_session_id)
folder.mkdir(parents=True, exist_ok=True)
try:
if sys.platform == "darwin":
subprocess.Popen(["open", str(folder)])
elif sys.platform == "win32":
subprocess.Popen(["explorer", str(folder)])
else:
subprocess.Popen(["xdg-open", str(folder)])
except Exception as exc:
return web.json_response({"error": str(exc)}, status=500)
return web.json_response({"path": str(folder)})
async def handle_update_colony_metadata(request: web.Request) -> web.Response:
"""PATCH /api/agents/metadata — update colony metadata (e.g. icon).
Body: {"agent_path": "...", "icon": "rocket"}
"""
try:
body = await request.json()
except Exception:
return web.json_response({"error": "Invalid JSON body"}, status=400)
agent_path = body.get("agent_path")
if not agent_path:
return web.json_response({"error": "agent_path is required"}, status=400)
try:
resolved = validate_agent_path(agent_path)
except ValueError as exc:
return web.json_response({"error": str(exc)}, status=400)
metadata_path = resolved / "metadata.json"
metadata: dict = {}
if metadata_path.exists():
try:
metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
except Exception:
pass
if "icon" in body:
metadata["icon"] = body["icon"]
metadata_path.write_text(json.dumps(metadata, indent=2, ensure_ascii=False), encoding="utf-8")
return web.json_response({"ok": True})
# ------------------------------------------------------------------
# Route registration
# ------------------------------------------------------------------
def register_routes(app: web.Application) -> None:
"""Register session routes."""
# Discovery & agent management
app.router.add_get("/api/discover", handle_discover)
app.router.add_delete("/api/agents", handle_delete_agent)
app.router.add_patch("/api/agents/metadata", handle_update_colony_metadata)
# Session lifecycle
app.router.add_post("/api/sessions", handle_create_session)
app.router.add_get("/api/sessions", handle_list_live_sessions)
# history must be registered before {session_id} so it takes priority
app.router.add_get("/api/sessions/history", handle_session_history)
app.router.add_delete("/api/sessions/history/{session_id}", handle_delete_history_session)
app.router.add_get("/api/sessions/{session_id}", handle_get_live_session)
app.router.add_delete("/api/sessions/{session_id}", handle_stop_session)
# Colony lifecycle
app.router.add_post("/api/sessions/{session_id}/colony", handle_load_colony)
app.router.add_delete("/api/sessions/{session_id}/colony", handle_unload_colony)
# Session info
app.router.add_post("/api/sessions/{session_id}/reveal", handle_reveal_session_folder)
app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats)
app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points)
app.router.add_patch("/api/sessions/{session_id}/triggers/{trigger_id}", handle_update_trigger_task)
app.router.add_post(
"/api/sessions/{session_id}/triggers/{trigger_id}/activate",
handle_activate_trigger,
)
app.router.add_post(
"/api/sessions/{session_id}/triggers/{trigger_id}/deactivate",
handle_deactivate_trigger,
)
app.router.add_post(
"/api/sessions/{session_id}/triggers/{trigger_id}/run",
handle_run_trigger,
)
app.router.add_get("/api/sessions/{session_id}/colonies", handle_session_colonies)
app.router.add_get("/api/sessions/{session_id}/events/history", handle_session_events_history)