hive/core/framework/agents/discovery.py

"""Agent discovery — scan known directories and return categorised AgentEntry lists."""

from __future__ import annotations

import json
from dataclasses import dataclass, field
from pathlib import Path


@dataclass
class AgentEntry:
    """Lightweight agent metadata for the picker / API discover endpoint."""

    path: Path
    name: str
    description: str
    category: str
    session_count: int = 0
    run_count: int = 0
    node_count: int = 0
    tool_count: int = 0
    tags: list[str] = field(default_factory=list)
    last_active: str | None = None


def _get_last_active(agent_path: Path) -> str | None:
    """Return the most recent updated_at timestamp across all sessions.

    Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
    queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
    ``meta.json`` references the same *agent_path*.
    """
    from datetime import datetime

    agent_name = agent_path.name
    latest: str | None = None

    # 1. Worker sessions
    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
    if sessions_dir.exists():
        for session_dir in sessions_dir.iterdir():
            if not session_dir.is_dir() or not session_dir.name.startswith("session_"):
                continue
            state_file = session_dir / "state.json"
            if not state_file.exists():
                continue
            try:
                data = json.loads(state_file.read_text(encoding="utf-8"))
                ts = data.get("timestamps", {}).get("updated_at")
                if ts and (latest is None or ts > latest):
                    latest = ts
            except Exception:
                continue

    # 2. Queen sessions (scan all queen identity directories)
    from framework.config import QUEENS_DIR

    if QUEENS_DIR.exists():
        resolved = agent_path.resolve()
        for queen_dir in QUEENS_DIR.iterdir():
            if not queen_dir.is_dir():
                continue
            sessions_dir = queen_dir / "sessions"
            if not sessions_dir.exists():
                continue
            for d in sessions_dir.iterdir():
                if not d.is_dir():
                    continue
                meta_file = d / "meta.json"
                if not meta_file.exists():
                    continue
                try:
                    meta = json.loads(meta_file.read_text(encoding="utf-8"))
                    stored = meta.get("agent_path")
                    if not stored or Path(stored).resolve() != resolved:
                        continue
                    ts = datetime.fromtimestamp(d.stat().st_mtime).isoformat()
                    if latest is None or ts > latest:
                        latest = ts
                except Exception:
                    continue

    return latest


def _count_sessions(agent_name: str) -> int:
    """Count session directories under ~/.hive/agents/{agent_name}/sessions/."""
    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
    if not sessions_dir.exists():
        return 0
    return sum(1 for d in sessions_dir.iterdir() if d.is_dir() and d.name.startswith("session_"))


def _count_runs(agent_name: str) -> int:
    """Count unique run_ids across all sessions for an agent."""
    sessions_dir = Path.home() / ".hive" / "agents" / agent_name / "sessions"
    if not sessions_dir.exists():
        return 0
    run_ids: set[str] = set()
    for session_dir in sessions_dir.iterdir():
        if not session_dir.is_dir() or not session_dir.name.startswith("session_"):
            continue
        # runs.jsonl lives inside workspace subdirectories
        for runs_file in session_dir.rglob("runs.jsonl"):
            try:
                for line in runs_file.read_text(encoding="utf-8").splitlines():
                    line = line.strip()
                    if not line:
                        continue
                    record = json.loads(line)
                    rid = record.get("run_id")
                    if rid:
                        run_ids.add(rid)
            except Exception:
                continue
    return len(run_ids)


def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
    """Extract node count, tool count, and tags from an agent directory.

    Checks agent.json (declarative) first, then agent.py (legacy).
    """
    import ast

    node_count, tool_count, tags = 0, 0, []

    # Declarative JSON agents (preferred)
    agent_json = agent_path / "agent.json"
    if agent_json.exists():
        try:
            data = json.loads(agent_json.read_text(encoding="utf-8"))
            if isinstance(data, dict):
                json_nodes = data.get("nodes", [])
                node_count = len(json_nodes)
                tools: set[str] = set()
                for n in json_nodes:
                    node_tools = n.get("tools", {})
                    if isinstance(node_tools, dict):
                        tools.update(node_tools.get("allowed", []))
                    elif isinstance(node_tools, list):
                        tools.update(node_tools)
                tool_count = len(tools)
                return node_count, tool_count, tags
        except Exception:
            pass

    # Legacy: agent.py (AST-parsed)
    agent_py = agent_path / "agent.py"
    if agent_py.exists():
        try:
            tree = ast.parse(agent_py.read_text(encoding="utf-8"))
            for node in ast.walk(tree):
                if isinstance(node, ast.Assign):
                    for target in node.targets:
                        if isinstance(target, ast.Name) and target.id == "nodes":
                            if isinstance(node.value, ast.List):
                                node_count = len(node.value.elts)
        except Exception:
            pass

    return node_count, tool_count, tags


def discover_agents() -> dict[str, list[AgentEntry]]:
    """Discover agents from all known sources grouped by category."""
    from framework.config import COLONIES_DIR
    from framework.loader.cli import (
        _extract_python_agent_metadata,
        _get_framework_agents_dir,
        _is_valid_agent_dir,
    )

    groups: dict[str, list[AgentEntry]] = {}
    sources = [
        ("Your Agents", COLONIES_DIR),
        ("Your Agents", Path("exports")),  # compat fallback
        ("Framework", _get_framework_agents_dir()),
        ("Examples", Path("examples/templates")),
    ]

    # Track seen agent directory names to avoid duplicates when the same
    # agent exists in both colonies/ and exports/ (colonies takes priority).
    _seen_agent_names: set[str] = set()

    for category, base_dir in sources:
        if not base_dir.exists():
            continue
        entries: list[AgentEntry] = []
        for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
            if not _is_valid_agent_dir(path):
                continue
            if path.name in _seen_agent_names:
                continue
            _seen_agent_names.add(path.name)

            name, desc = _extract_python_agent_metadata(path)
            config_fallback_name = path.name.replace("_", " ").title()
            used_config = name != config_fallback_name

            node_count, tool_count, tags = _extract_agent_stats(path)
            if not used_config:
                # Try agent.json (declarative) for metadata
                agent_json_path = path / "agent.json"
                if agent_json_path.exists():
                    try:
                        data = json.loads(
                            agent_json_path.read_text(encoding="utf-8"),
                        )
                        if isinstance(data, dict):
                            raw_name = data.get("name", name)
                            if "-" in raw_name and " " not in raw_name:
                                raw_name = raw_name.replace("-", " ").title()
                            name = raw_name
                            desc = data.get("description", desc)
                    except Exception:
                        pass

            entries.append(
                AgentEntry(
                    path=path,
                    name=name,
                    description=desc,
                    category=category,
                    session_count=_count_sessions(path.name),
                    run_count=_count_runs(path.name),
                    node_count=node_count,
                    tool_count=tool_count,
                    tags=tags,
                    last_active=_get_last_active(path),
                )
            )
        if entries:
            existing = groups.get(category, [])
            existing.extend(entries)
            groups[category] = existing

    return groups