feat: add runtime config and model switching API

2026-04-07 18:42:08 -07:00
parent c92662bdb1
commit e7506fcd25
4 changed files with 555 additions and 3 deletions
@@ -42,6 +42,48 @@ def get_hive_config() -> dict[str, Any]:
        return {}


+# ---------------------------------------------------------------------------
+# Credential store helpers (for BYOK keys)
+# ---------------------------------------------------------------------------
+
+# Provider name → credential store ID mapping
+_PROVIDER_CRED_MAP: dict[str, str] = {
+    "anthropic": "anthropic",
+    "openai": "openai",
+    "gemini": "gemini",
+    "google": "gemini",
+    "minimax": "minimax",
+    "groq": "groq",
+    "cerebras": "cerebras",
+    "openrouter": "openrouter",
+    "mistral": "mistral",
+    "together": "together",
+    "together_ai": "together",
+    "deepseek": "deepseek",
+    "kimi": "kimi",
+    "hive": "hive",
+}
+
+
+def _get_api_key_from_credential_store(provider: str) -> str | None:
+    """Look up a BYOK API key from the encrypted credential store.
+
+    Returns None if no key is found or the credential store is unavailable.
+    """
+    if not os.environ.get("HIVE_CREDENTIAL_KEY"):
+        return None
+    cred_id = _PROVIDER_CRED_MAP.get(provider.lower())
+    if not cred_id:
+        return None
+    try:
+        from framework.credentials import CredentialStore
+
+        store = CredentialStore.with_encrypted_storage()
+        return store.get(cred_id)
+    except Exception:
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Derived helpers
 # ---------------------------------------------------------------------------
@@ -280,8 +322,12 @@ def get_api_key() -> str | None:
    # Standard env-var path (covers ZAI Code and all API-key providers)
    api_key_env_var = llm.get("api_key_env_var")
    if api_key_env_var:
-        return os.environ.get(api_key_env_var)
-    return None
+        key = os.environ.get(api_key_env_var)
+        if key:
+            return key
+
+    # Credential store fallback — BYOK keys stored via the UI
+    return _get_api_key_from_credential_store(llm.get("provider", ""))


 # OAuth credentials for Antigravity are fetched from the opencode-antigravity-auth project.
@@ -612,6 +612,36 @@ class LiteLLMProvider(LLMProvider):
                "LiteLLM is not installed. Please install it with: uv pip install litellm"
            )

+    def reconfigure(self, model: str, api_key: str | None = None, api_base: str | None = None) -> None:
+        """Hot-swap the model, API key, and/or base URL on this provider instance.
+
+        Since the same LiteLLMProvider object is shared by reference across the
+        session, queen runner, agent runtime, and execution streams, mutating
+        these attributes in-place propagates to all callers on the next LLM call.
+        """
+        _original_model = model
+        if _is_ollama_model(model):
+            model = _ensure_ollama_chat_prefix(model)
+        elif model.lower().startswith("kimi/"):
+            model = "anthropic/" + model[len("kimi/"):]
+            if api_base and api_base.rstrip("/").endswith("/v1"):
+                api_base = api_base.rstrip("/")[:-3]
+        elif model.lower().startswith("hive/"):
+            model = "anthropic/" + model[len("hive/"):]
+            if api_base and api_base.rstrip("/").endswith("/v1"):
+                api_base = api_base.rstrip("/")[:-3]
+        self.model = model
+        self.api_key = api_key
+        self.api_base = api_base or self._default_api_base_for_model(_original_model)
+        self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
+        if self._claude_code_oauth:
+            eh = self.extra_kwargs.setdefault("extra_headers", {})
+            eh.setdefault("user-agent", CLAUDE_CODE_USER_AGENT)
+        self._codex_backend = bool(
+            self.api_base and "chatgpt.com/backend-api/codex" in self.api_base
+        )
+        self._antigravity = bool(self.api_base and "localhost:8069" in self.api_base)
+
        # Note: The Codex ChatGPT backend is a Responses API endpoint at
        # chatgpt.com/backend-api/codex/responses.  LiteLLM's model registry
        # correctly marks codex models with mode="responses", so we do NOT
@@ -124,7 +124,7 @@ async def cors_middleware(request: web.Request, handler):

    if _is_cors_allowed(origin):
        response.headers["Access-Control-Allow-Origin"] = origin
-        response.headers["Access-Control-Allow-Methods"] = "GET, POST, DELETE, OPTIONS"
+        response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, PATCH, DELETE, OPTIONS"
        response.headers["Access-Control-Allow-Headers"] = "Content-Type"
        response.headers["Access-Control-Max-Age"] = "3600"

@@ -250,6 +250,7 @@ def create_app(model: str | None = None) -> web.Application:
    app.router.add_get("/api/browser/status", handle_browser_status)

    # Register route modules
+    from framework.server.routes_config import register_routes as register_config_routes
    from framework.server.routes_credentials import register_routes as register_credential_routes
    from framework.server.routes_events import register_routes as register_event_routes
    from framework.server.routes_execution import register_routes as register_execution_routes
@@ -257,6 +258,7 @@ def create_app(model: str | None = None) -> web.Application:
    from framework.server.routes_logs import register_routes as register_log_routes
    from framework.server.routes_sessions import register_routes as register_session_routes

+    register_config_routes(app)
    register_credential_routes(app)
    register_execution_routes(app)
    register_event_routes(app)
@@ -0,0 +1,474 @@
+"""LLM configuration routes — BYOK key management, subscriptions, and model selection.
+
+Routes:
+- GET  /api/config/llm           — current active LLM configuration
+- PUT  /api/config/llm           — update active provider + model (hot-swaps running sessions)
+- GET  /api/config/models        — curated provider→models list
+"""
+
+import json
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+from aiohttp import web
+
+from framework.config import (
+    HIVE_CONFIG_FILE,
+    OPENROUTER_API_BASE,
+    _PROVIDER_CRED_MAP,
+    get_hive_config,
+)
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Provider metadata (mirrors quickstart.sh)
+# ---------------------------------------------------------------------------
+
+# env var name per provider
+PROVIDER_ENV_VARS: dict[str, str] = {
+    "anthropic": "ANTHROPIC_API_KEY",
+    "openai": "OPENAI_API_KEY",
+    "gemini": "GEMINI_API_KEY",
+    "google": "GOOGLE_API_KEY",
+    "minimax": "MINIMAX_API_KEY",
+    "groq": "GROQ_API_KEY",
+    "cerebras": "CEREBRAS_API_KEY",
+    "openrouter": "OPENROUTER_API_KEY",
+    "mistral": "MISTRAL_API_KEY",
+    "together": "TOGETHER_API_KEY",
+    "together_ai": "TOGETHER_API_KEY",
+    "deepseek": "DEEPSEEK_API_KEY",
+}
+
+# ---------------------------------------------------------------------------
+# Subscription metadata (mirrors quickstart.sh subscription modes)
+# ---------------------------------------------------------------------------
+
+SUBSCRIPTIONS: list[dict] = [
+    {
+        "id": "claude_code",
+        "name": "Claude Code Subscription",
+        "description": "Use your Claude Max/Pro plan",
+        "provider": "anthropic",
+        "flag": "use_claude_code_subscription",
+        "default_model": "claude-sonnet-4-20250514",
+    },
+    {
+        "id": "codex",
+        "name": "OpenAI Codex Subscription",
+        "description": "Use your Codex/ChatGPT Plus plan",
+        "provider": "openai",
+        "flag": "use_codex_subscription",
+        "default_model": "gpt-5-mini",
+        "api_base": "https://chatgpt.com/backend-api/codex",
+    },
+    {
+        "id": "kimi_code",
+        "name": "Kimi Code Subscription",
+        "description": "Use your Kimi Code plan",
+        "provider": "kimi",
+        "flag": "use_kimi_code_subscription",
+        "default_model": "kimi/moonshot-v1",
+    },
+    {
+        "id": "antigravity",
+        "name": "Antigravity Subscription",
+        "description": "Use your Google/Gemini plan",
+        "provider": "antigravity",
+        "flag": "use_antigravity_subscription",
+        "default_model": "antigravity/gemini-2.5-pro",
+    },
+]
+
+# All subscription config flags
+_ALL_SUBSCRIPTION_FLAGS = [s["flag"] for s in SUBSCRIPTIONS]
+
+# Map subscription ID → subscription metadata
+_SUBSCRIPTION_MAP = {s["id"]: s for s in SUBSCRIPTIONS}
+
+# Model catalogue — mirrors quickstart.sh MODEL_CHOICES_*
+MODELS_CATALOGUE: dict[str, list[dict]] = {
+    "anthropic": [
+        {"id": "claude-haiku-4-5-20251001", "label": "Haiku 4.5 - Fast + cheap", "recommended": True, "max_tokens": 8192, "max_context_tokens": 180000},
+        {"id": "claude-sonnet-4-20250514", "label": "Sonnet 4 - Fast + capable", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
+        {"id": "claude-sonnet-4-5-20250929", "label": "Sonnet 4.5 - Best balance", "recommended": False, "max_tokens": 16384, "max_context_tokens": 180000},
+        {"id": "claude-opus-4-6", "label": "Opus 4.6 - Most capable", "recommended": False, "max_tokens": 32768, "max_context_tokens": 180000},
+    ],
+    "openai": [
+        {"id": "gpt-5-mini", "label": "GPT-5 Mini - Fast + cheap", "recommended": True, "max_tokens": 16384, "max_context_tokens": 120000},
+        {"id": "gpt-5.2", "label": "GPT-5.2 - Most capable", "recommended": False, "max_tokens": 16384, "max_context_tokens": 120000},
+    ],
+    "gemini": [
+        {"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash - Fast", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
+        {"id": "gemini-3.1-pro-preview", "label": "Gemini 3.1 Pro - Best quality", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
+    ],
+    "groq": [
+        {"id": "moonshotai/kimi-k2-instruct-0905", "label": "Kimi K2 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+        {"id": "openai/gpt-oss-120b", "label": "GPT-OSS 120B - Fast reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "cerebras": [
+        {"id": "zai-glm-4.7", "label": "ZAI-GLM 4.7 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+        {"id": "qwen3-235b-a22b-instruct-2507", "label": "Qwen3 235B - Frontier reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "minimax": [
+        {"id": "MiniMax-M2.5", "label": "MiniMax-M2.5", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "mistral": [
+        {"id": "mistral-large-latest", "label": "Mistral Large", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "together": [
+        {"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "label": "Llama 3.3 70B Turbo", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "deepseek": [
+        {"id": "deepseek-chat", "label": "DeepSeek Chat", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+    "openrouter": [
+        {"id": "google/gemini-2.5-pro", "label": "Gemini 2.5 Pro", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
+        {"id": "google/gemini-2.5-flash", "label": "Gemini 2.5 Flash", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
+        {"id": "anthropic/claude-sonnet-4", "label": "Claude Sonnet 4 (via OR)", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
+        {"id": "deepseek/deepseek-r1", "label": "DeepSeek R1", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
+    ],
+}
+
+# Default model per provider (matches quickstart DEFAULT_MODELS)
+DEFAULT_MODELS: dict[str, str] = {
+    "anthropic": "claude-haiku-4-5-20251001",
+    "openai": "gpt-5-mini",
+    "minimax": "MiniMax-M2.5",
+    "gemini": "gemini-3-flash-preview",
+    "groq": "moonshotai/kimi-k2-instruct-0905",
+    "cerebras": "zai-glm-4.7",
+    "mistral": "mistral-large-latest",
+    "together": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+    "deepseek": "deepseek-chat",
+    "openrouter": "google/gemini-2.5-pro",
+}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _get_api_base_for_provider(provider: str) -> str | None:
+    """Return the api_base URL for a provider, if needed."""
+    if provider.lower() == "openrouter":
+        return OPENROUTER_API_BASE
+    return None
+
+
+def _find_model_info(provider: str, model_id: str) -> dict | None:
+    """Look up a model in the catalogue to get its token limits."""
+    for m in MODELS_CATALOGUE.get(provider, []):
+        if m["id"] == model_id:
+            return m
+    return None
+
+
+def _write_config_atomic(config: dict) -> None:
+    """Write config to ~/.hive/configuration.json atomically."""
+    HIVE_CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_path = tempfile.mkstemp(
+        dir=str(HIVE_CONFIG_FILE.parent), suffix=".tmp"
+    )
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            json.dump(config, f, indent=2, ensure_ascii=False)
+            f.write("\n")
+        Path(tmp_path).replace(HIVE_CONFIG_FILE)
+    except Exception:
+        Path(tmp_path).unlink(missing_ok=True)
+        raise
+
+
+def _resolve_api_key(provider: str, request: web.Request) -> str | None:
+    """Resolve the API key for a provider from credential store or env var."""
+    # Try credential store first
+    cred_id = _PROVIDER_CRED_MAP.get(provider.lower())
+    if cred_id:
+        try:
+            store = request.app["credential_store"]
+            key = store.get(cred_id)
+            if key:
+                return key
+        except Exception:
+            pass
+    # Fall back to env var
+    env_var = PROVIDER_ENV_VARS.get(provider.lower())
+    if env_var:
+        return os.environ.get(env_var)
+    return None
+
+
+def _detect_subscriptions() -> list[str]:
+    """Detect which subscription credentials are available on the system."""
+    detected = []
+
+    # Claude Code subscription
+    try:
+        from framework.runner.runner import get_claude_code_token
+        if get_claude_code_token():
+            detected.append("claude_code")
+    except Exception:
+        pass
+
+    # Codex subscription
+    try:
+        from framework.runner.runner import get_codex_token
+        if get_codex_token():
+            detected.append("codex")
+    except Exception:
+        pass
+
+    # Kimi Code subscription
+    try:
+        from framework.runner.runner import get_kimi_code_token
+        if get_kimi_code_token():
+            detected.append("kimi_code")
+    except Exception:
+        pass
+
+    # Antigravity subscription
+    try:
+        from framework.runner.runner import get_antigravity_token
+        if get_antigravity_token():
+            detected.append("antigravity")
+    except Exception:
+        pass
+
+    return detected
+
+
+def _get_active_subscription(llm_config: dict) -> str | None:
+    """Return the currently active subscription ID, or None."""
+    for sub in SUBSCRIPTIONS:
+        if llm_config.get(sub["flag"]):
+            return sub["id"]
+    return None
+
+
+def _get_subscription_token(sub_id: str) -> str | None:
+    """Get the token for a subscription."""
+    if sub_id == "claude_code":
+        from framework.runner.runner import get_claude_code_token
+        return get_claude_code_token()
+    elif sub_id == "codex":
+        from framework.runner.runner import get_codex_token
+        return get_codex_token()
+    elif sub_id == "kimi_code":
+        from framework.runner.runner import get_kimi_code_token
+        return get_kimi_code_token()
+    elif sub_id == "antigravity":
+        from framework.runner.runner import get_antigravity_token
+        return get_antigravity_token()
+    return None
+
+
+def _hot_swap_sessions(request: web.Request, full_model: str, api_key: str | None, api_base: str | None) -> int:
+    """Hot-swap the LLM on all running sessions. Returns count of swapped sessions."""
+    from framework.server.session_manager import SessionManager
+
+    manager: SessionManager = request.app["manager"]
+    swapped = 0
+    for session in manager.list_sessions():
+        llm_provider = getattr(session, "llm", None)
+        if llm_provider and hasattr(llm_provider, "reconfigure"):
+            llm_provider.reconfigure(full_model, api_key=api_key, api_base=api_base)
+            swapped += 1
+    return swapped
+
+
+# ------------------------------------------------------------------
+# Handlers
+# ------------------------------------------------------------------
+
+
+async def handle_get_llm_config(request: web.Request) -> web.Response:
+    """GET /api/config/llm — current active LLM configuration."""
+    config = get_hive_config()
+    llm = config.get("llm", {})
+    provider = llm.get("provider", "")
+    model = llm.get("model", "")
+
+    # Check if an API key is available for the current provider
+    has_key = _resolve_api_key(provider, request) is not None
+
+    # Check ALL providers for key availability (env vars + credential store)
+    connected = []
+    for pid in PROVIDER_ENV_VARS:
+        if pid in ("google", "together_ai"):
+            continue  # Skip aliases
+        if _resolve_api_key(pid, request) is not None:
+            connected.append(pid)
+
+    # Subscription detection
+    active_subscription = _get_active_subscription(llm)
+    detected_subscriptions = _detect_subscriptions()
+
+    return web.json_response({
+        "provider": provider,
+        "model": model,
+        "has_api_key": has_key,
+        "max_tokens": llm.get("max_tokens"),
+        "max_context_tokens": llm.get("max_context_tokens"),
+        "connected_providers": connected,
+        "active_subscription": active_subscription,
+        "detected_subscriptions": detected_subscriptions,
+        "subscriptions": SUBSCRIPTIONS,
+    })
+
+
+async def handle_update_llm_config(request: web.Request) -> web.Response:
+    """PUT /api/config/llm — set active provider + model, hot-swap running sessions.
+
+    Accepts two modes:
+    1. API key mode: {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}
+    2. Subscription mode: {"subscription": "claude_code", "model": "claude-sonnet-4-20250514"}
+    """
+    try:
+        body = await request.json()
+    except Exception:
+        return web.json_response({"error": "Invalid JSON body"}, status=400)
+
+    subscription_id = body.get("subscription")
+
+    if subscription_id:
+        # ── Subscription mode ────────────────────────────────────────
+        sub = _SUBSCRIPTION_MAP.get(subscription_id)
+        if not sub:
+            return web.json_response(
+                {"error": f"Unknown subscription: {subscription_id}"}, status=400
+            )
+
+        model = body.get("model") or sub["default_model"]
+        provider = sub["provider"]
+        api_base = sub.get("api_base")
+
+        # Look up token limits
+        # Subscriptions use same models as their provider (e.g., claude_code → anthropic)
+        model_info = _find_model_info(provider, model)
+        if not model_info:
+            # Try looking up in the mapped provider's catalogue
+            for prov_id, models in MODELS_CATALOGUE.items():
+                model_info = next((m for m in models if m["id"] == model), None)
+                if model_info:
+                    break
+        max_tokens = model_info["max_tokens"] if model_info else 8192
+        max_context_tokens = model_info["max_context_tokens"] if model_info else 120000
+
+        # Update config: activate this subscription, clear others
+        config = get_hive_config()
+        llm_section = config.setdefault("llm", {})
+        llm_section["provider"] = provider
+        llm_section["model"] = model
+        llm_section["max_tokens"] = max_tokens
+        llm_section["max_context_tokens"] = max_context_tokens
+        # Clear all subscription flags, then set the active one
+        for flag in _ALL_SUBSCRIPTION_FLAGS:
+            llm_section.pop(flag, None)
+        llm_section[sub["flag"]] = True
+        # Remove api_key_env_var since subscriptions don't use it
+        llm_section.pop("api_key_env_var", None)
+        if api_base:
+            llm_section["api_base"] = api_base
+        elif "api_base" in llm_section:
+            del llm_section["api_base"]
+
+        _write_config_atomic(config)
+
+        # Hot-swap with subscription token
+        token = _get_subscription_token(subscription_id)
+        full_model = f"{provider}/{model}"
+        swapped = _hot_swap_sessions(request, full_model, api_key=token, api_base=api_base)
+
+        logger.info(
+            "LLM config updated: subscription=%s model=%s, hot-swapped %d session(s)",
+            subscription_id, model, swapped,
+        )
+
+        return web.json_response({
+            "provider": provider,
+            "model": model,
+            "has_api_key": token is not None,
+            "max_tokens": max_tokens,
+            "max_context_tokens": max_context_tokens,
+            "sessions_swapped": swapped,
+            "active_subscription": subscription_id,
+        })
+
+    else:
+        # ── API key mode ─────────────────────────────────────────────
+        provider = body.get("provider")
+        model = body.get("model")
+        if not provider or not model:
+            return web.json_response(
+                {"error": "Both 'provider' and 'model' are required"}, status=400
+            )
+
+        # Look up token limits from catalogue
+        model_info = _find_model_info(provider, model)
+        max_tokens = model_info["max_tokens"] if model_info else 8192
+        max_context_tokens = model_info["max_context_tokens"] if model_info else 120000
+
+        # Determine env var and api_base
+        env_var = PROVIDER_ENV_VARS.get(provider.lower(), "")
+        api_base = _get_api_base_for_provider(provider)
+
+        # Update ~/.hive/configuration.json
+        config = get_hive_config()
+        llm_section = config.setdefault("llm", {})
+        llm_section["provider"] = provider
+        llm_section["model"] = model
+        llm_section["max_tokens"] = max_tokens
+        llm_section["max_context_tokens"] = max_context_tokens
+        if env_var:
+            llm_section["api_key_env_var"] = env_var
+        if api_base:
+            llm_section["api_base"] = api_base
+        elif "api_base" in llm_section:
+            del llm_section["api_base"]
+        # Clear subscription flags — switching to direct API key mode
+        for flag in _ALL_SUBSCRIPTION_FLAGS:
+            llm_section.pop(flag, None)
+
+        _write_config_atomic(config)
+
+        # Hot-swap all running sessions
+        api_key = _resolve_api_key(provider, request)
+        full_model = f"{provider}/{model}"
+        swapped = _hot_swap_sessions(request, full_model, api_key=api_key, api_base=api_base)
+
+        logger.info(
+            "LLM config updated: provider=%s model=%s, hot-swapped %d session(s)",
+            provider, model, swapped,
+        )
+
+        return web.json_response({
+            "provider": provider,
+            "model": model,
+            "has_api_key": api_key is not None,
+            "max_tokens": max_tokens,
+            "max_context_tokens": max_context_tokens,
+            "sessions_swapped": swapped,
+            "active_subscription": None,
+        })
+
+
+async def handle_get_models(request: web.Request) -> web.Response:
+    """GET /api/config/models — curated provider→models list."""
+    return web.json_response({"models": MODELS_CATALOGUE})
+
+
+# ------------------------------------------------------------------
+# Route registration
+# ------------------------------------------------------------------
+
+
+def register_routes(app: web.Application) -> None:
+    """Register LLM config routes."""
+    app.router.add_get("/api/config/llm", handle_get_llm_config)
+    app.router.add_put("/api/config/llm", handle_update_llm_config)
+    app.router.add_get("/api/config/models", handle_get_models)