feat: consolidate model config

This commit is contained in:
Richard Tang
2026-04-09 09:53:05 -07:00
parent c65b43c21b
commit 7a2752eb42
12 changed files with 1223 additions and 2756 deletions
+18
View File
@@ -2202,6 +2202,24 @@ class AgentLoop(NodeProtocol):
len(messages),
len(tools),
)
logger.debug(
"[_run_single_turn] inner_turn=%d: request context node=%s roles=%s system_chars=%d max_tokens=%d",
inner_turn,
node_id,
[m.get("role") for m in messages],
len(conversation.system_prompt or ""),
ctx.max_tokens,
)
if not messages:
logger.warning(
"[_run_single_turn] inner_turn=%d: no non-system conversation messages "
"before LLM call for node=%s model=%s api_base=%s. "
"This will produce a system-only payload, which some providers reject.",
inner_turn,
node_id,
getattr(ctx.llm, "model", type(ctx.llm).__name__),
getattr(ctx.llm, "api_base", None),
)
# Stream LLM response in a child task so cancel_current_turn()
# can kill it instantly without terminating the queen's main loop.
+154
View File
@@ -363,10 +363,15 @@ def _dump_failed_request(
"attempt": attempt,
"estimated_tokens": _estimate_tokens(model, messages),
"num_messages": len(messages),
"api_base": kwargs.get("api_base"),
"request_keys": sorted(kwargs.keys()),
"messages": messages,
"tools": kwargs.get("tools"),
"max_tokens": kwargs.get("max_tokens"),
"temperature": kwargs.get("temperature"),
"stream": kwargs.get("stream"),
"tool_choice": kwargs.get("tool_choice"),
"response_format": kwargs.get("response_format"),
}
with open(filepath, "w", encoding="utf-8") as f:
@@ -381,6 +386,108 @@ def _dump_failed_request(
return "log_write_failed"
def _summarize_message_content(content: Any) -> dict[str, Any]:
"""Return a structural summary of one message content payload."""
if isinstance(content, str):
return {
"content_kind": "string",
"text_chars": len(content),
}
if isinstance(content, list):
block_types: list[str] = []
text_chars = 0
for block in content:
if isinstance(block, dict):
block_type = str(block.get("type", "unknown"))
block_types.append(block_type)
if block_type == "text":
text_chars += len(str(block.get("text", "")))
elif block_type == "tool_result":
block_content = block.get("content")
if isinstance(block_content, str):
text_chars += len(block_content)
elif isinstance(block_content, list):
for inner in block_content:
if isinstance(inner, dict) and inner.get("type") == "text":
text_chars += len(str(inner.get("text", "")))
else:
block_types.append(type(block).__name__)
return {
"content_kind": "list",
"blocks": len(content),
"block_types": block_types,
"text_chars": text_chars,
}
return {
"content_kind": type(content).__name__,
}
def _summarize_messages_for_log(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Build a high-signal, no-secret summary of the outgoing messages payload."""
summary: list[dict[str, Any]] = []
for idx, message in enumerate(messages):
item: dict[str, Any] = {
"idx": idx,
"role": message.get("role"),
"keys": sorted(message.keys()),
}
item.update(_summarize_message_content(message.get("content")))
tool_calls = message.get("tool_calls")
if isinstance(tool_calls, list):
item["tool_calls"] = len(tool_calls)
tool_names = []
for tc in tool_calls:
if isinstance(tc, dict):
fn = tc.get("function")
if isinstance(fn, dict) and fn.get("name"):
tool_names.append(str(fn["name"]))
if tool_names:
item["tool_call_names"] = tool_names
if message.get("cache_control"):
item["cache_control"] = True
if message.get("tool_call_id"):
item["tool_call_id"] = str(message.get("tool_call_id"))
summary.append(item)
return summary
def _summarize_request_for_log(kwargs: dict[str, Any]) -> dict[str, Any]:
"""Return a compact structural summary of a LiteLLM request payload."""
tools = kwargs.get("tools")
tool_names: list[str] = []
if isinstance(tools, list):
for tool in tools:
if isinstance(tool, dict):
fn = tool.get("function")
if isinstance(fn, dict) and fn.get("name"):
tool_names.append(str(fn["name"]))
messages = kwargs.get("messages", [])
if isinstance(messages, list):
non_system_roles = [m.get("role") for m in messages if m.get("role") != "system"]
else:
non_system_roles = []
return {
"model": kwargs.get("model"),
"api_base": kwargs.get("api_base"),
"stream": kwargs.get("stream"),
"max_tokens": kwargs.get("max_tokens"),
"tool_count": len(tools) if isinstance(tools, list) else 0,
"tool_names": tool_names,
"tool_choice": kwargs.get("tool_choice"),
"response_format": bool(kwargs.get("response_format")),
"message_count": len(messages) if isinstance(messages, list) else 0,
"non_system_message_count": len(non_system_roles),
"first_non_system_role": non_system_roles[0] if non_system_roles else None,
"last_non_system_role": non_system_roles[-1] if non_system_roles else None,
"system_only": bool(messages) and not non_system_roles,
"messages": _summarize_messages_for_log(messages if isinstance(messages, list) else []),
}
def _compute_retry_delay(
attempt: int,
exception: BaseException | None = None,
@@ -1156,6 +1263,12 @@ class LiteLLMProvider(LLMProvider):
api_base = (self.api_base or "").lower()
return "openrouter.ai/api/v1" in api_base
def _is_zai_openai_backend(self) -> bool:
"""Return True when using Z-AI's OpenAI-compatible chat endpoint."""
model = (self.model or "").lower()
api_base = (self.api_base or "").lower()
return "api.z.ai" in api_base or model.startswith("openai/glm-") or model == "glm-5"
def _should_use_openrouter_tool_compat(
self,
error: BaseException,
@@ -1816,6 +1929,33 @@ class LiteLLMProvider(LLMProvider):
kwargs.pop("max_tokens", None)
kwargs.pop("stream_options", None)
request_summary = _summarize_request_for_log(kwargs)
logger.debug(
"[stream] prepared request: %s",
json.dumps(request_summary, default=str),
)
if request_summary["system_only"]:
logger.warning(
"[stream] %s request has no non-system chat messages "
"(api_base=%s tools=%d system_chars=%d). "
"Some chat-completions backends reject system-only payloads.",
self.model,
self.api_base,
request_summary["tool_count"],
sum(
message.get("text_chars", 0)
for message in request_summary["messages"]
if message.get("role") == "system"
),
)
if self._is_zai_openai_backend():
logger.warning(
"[stream] %s appears to be using Z-AI/GLM's OpenAI-compatible backend. "
"This backend has rejected system-only payloads with "
"'The messages parameter is illegal.' in prior requests.",
self.model,
)
for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
# Post-stream events (ToolCall, TextEnd, Finish) are buffered
# because they depend on the full stream. TextDeltaEvents are
@@ -2179,6 +2319,20 @@ class LiteLLMProvider(LLMProvider):
)
await asyncio.sleep(wait)
continue
dump_path = _dump_failed_request(
model=self.model,
kwargs=kwargs,
error_type=f"stream_exception_{type(e).__name__.lower()}",
attempt=attempt,
)
logger.error(
"[stream] %s request failed with %s: %s | request=%s | dump=%s",
self.model,
type(e).__name__,
e,
json.dumps(_summarize_request_for_log(kwargs), default=str),
dump_path,
)
recoverable = _is_stream_transient_error(e)
yield StreamErrorEvent(error=str(e), recoverable=recoverable)
return
+271
View File
@@ -0,0 +1,271 @@
{
"schema_version": 1,
"providers": {
"anthropic": {
"default_model": "claude-haiku-4-5-20251001",
"models": [
{
"id": "claude-haiku-4-5-20251001",
"label": "Haiku 4.5 - Fast + cheap",
"recommended": false,
"max_tokens": 64000,
"max_context_tokens": 136000
},
{
"id": "claude-sonnet-4-5-20250929",
"label": "Sonnet 4.5 - Best balance",
"recommended": false,
"max_tokens": 64000,
"max_context_tokens": 136000
},
{
"id": "claude-opus-4-6",
"label": "Opus 4.6 - Most capable",
"recommended": true,
"max_tokens": 128000,
"max_context_tokens": 872000
}
]
},
"openai": {
"default_model": "gpt-5.4",
"models": [
{
"id": "gpt-5.4",
"label": "GPT-5.4 - Best intelligence",
"recommended": true,
"max_tokens": 128000,
"max_context_tokens": 960000
},
{
"id": "gpt-5.4-mini",
"label": "GPT-5.4 Mini - Faster + cheaper",
"recommended": false,
"max_tokens": 128000,
"max_context_tokens": 400000
},
{
"id": "gpt-5.4-nano",
"label": "GPT-5.4 Nano - Cheapest high-volume",
"recommended": false,
"max_tokens": 128000,
"max_context_tokens": 400000
}
]
},
"gemini": {
"default_model": "gemini-3-flash-preview",
"models": [
{
"id": "gemini-3-flash-preview",
"label": "Gemini 3 Flash - Fast",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 900000
},
{
"id": "gemini-3.1-pro-preview",
"label": "Gemini 3.1 Pro - Best quality",
"recommended": true,
"max_tokens": 32768,
"max_context_tokens": 900000
}
]
},
"groq": {
"default_model": "moonshotai/kimi-k2-instruct-0905",
"models": [
{
"id": "moonshotai/kimi-k2-instruct-0905",
"label": "Kimi K2 - Best quality",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
},
{
"id": "openai/gpt-oss-120b",
"label": "GPT-OSS 120B - Fast reasoning",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"cerebras": {
"default_model": "zai-glm-4.7",
"models": [
{
"id": "zai-glm-4.7",
"label": "ZAI-GLM 4.7 - Best quality",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
},
{
"id": "qwen3-235b-a22b-instruct-2507",
"label": "Qwen3 235B - Frontier reasoning",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"minimax": {
"default_model": "MiniMax-M2.5",
"models": [
{
"id": "MiniMax-M2.5",
"label": "MiniMax-M2.5",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"mistral": {
"default_model": "mistral-large-latest",
"models": [
{
"id": "mistral-large-latest",
"label": "Mistral Large",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"together": {
"default_model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"models": [
{
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"label": "Llama 3.3 70B Turbo",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"deepseek": {
"default_model": "deepseek-chat",
"models": [
{
"id": "deepseek-chat",
"label": "DeepSeek Chat",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
},
"openrouter": {
"default_model": "google/gemini-2.5-pro",
"models": [
{
"id": "google/gemini-2.5-pro",
"label": "Gemini 2.5 Pro",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 900000
},
{
"id": "google/gemini-2.5-flash",
"label": "Gemini 2.5 Flash",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 900000
},
{
"id": "anthropic/claude-sonnet-4",
"label": "Claude Sonnet 4 (via OR)",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 180000
},
{
"id": "deepseek/deepseek-r1",
"label": "DeepSeek R1",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
}
]
}
},
"presets": {
"claude_code": {
"provider": "anthropic",
"model": "claude-opus-4-6",
"max_tokens": 128000,
"max_context_tokens": 872000
},
"zai_code": {
"provider": "openai",
"api_key_env_var": "ZAI_API_KEY",
"model": "glm-5",
"max_tokens": 32768,
"max_context_tokens": 180000,
"api_base": "https://api.z.ai/api/coding/paas/v4"
},
"codex": {
"provider": "openai",
"model": "gpt-5.3-codex",
"max_tokens": 16384,
"max_context_tokens": 120000,
"api_base": "https://chatgpt.com/backend-api/codex"
},
"minimax_code": {
"provider": "minimax",
"api_key_env_var": "MINIMAX_API_KEY",
"model": "MiniMax-M2.5",
"max_tokens": 32768,
"max_context_tokens": 900000,
"api_base": "https://api.minimax.io/v1"
},
"kimi_code": {
"provider": "kimi",
"api_key_env_var": "KIMI_API_KEY",
"model": "kimi-k2.5",
"max_tokens": 32768,
"max_context_tokens": 240000,
"api_base": "https://api.kimi.com/coding"
},
"hive_llm": {
"provider": "hive",
"api_key_env_var": "HIVE_API_KEY",
"model": "queen",
"max_tokens": 32768,
"max_context_tokens": 180000,
"api_base": "https://api.adenhq.com",
"model_choices": [
{
"id": "queen",
"label": "queen",
"recommended": true
},
{
"id": "kimi-2.5",
"label": "kimi-2.5",
"recommended": false
},
{
"id": "GLM-5",
"label": "GLM-5",
"recommended": false
}
]
},
"antigravity": {
"provider": "openai",
"model": "gemini-3-flash",
"max_tokens": 32768,
"max_context_tokens": 1000000
},
"ollama_local": {
"provider": "ollama",
"max_tokens": 8192,
"max_context_tokens": 16384,
"api_base": "http://localhost:11434"
}
}
}
+201
View File
@@ -0,0 +1,201 @@
"""Shared curated model metadata loaded from ``model_catalog.json``."""
from __future__ import annotations
import copy
import json
from functools import lru_cache
from pathlib import Path
from typing import Any
MODEL_CATALOG_PATH = Path(__file__).with_name("model_catalog.json")
class ModelCatalogError(RuntimeError):
"""Raised when the curated model catalogue is missing or malformed."""
def _require_mapping(value: Any, path: str) -> dict[str, Any]:
if not isinstance(value, dict):
raise ModelCatalogError(f"{path} must be an object")
return value
def _require_list(value: Any, path: str) -> list[Any]:
if not isinstance(value, list):
raise ModelCatalogError(f"{path} must be an array")
return value
def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
providers = _require_mapping(data.get("providers"), "providers")
for provider_id, provider_info in providers.items():
provider_path = f"providers.{provider_id}"
provider_map = _require_mapping(provider_info, provider_path)
default_model = provider_map.get("default_model")
if not isinstance(default_model, str) or not default_model.strip():
raise ModelCatalogError(f"{provider_path}.default_model must be a non-empty string")
models = _require_list(provider_map.get("models"), f"{provider_path}.models")
if not models:
raise ModelCatalogError(f"{provider_path}.models must not be empty")
seen_model_ids: set[str] = set()
default_found = False
for idx, model in enumerate(models):
model_path = f"{provider_path}.models[{idx}]"
model_map = _require_mapping(model, model_path)
model_id = model_map.get("id")
if not isinstance(model_id, str) or not model_id.strip():
raise ModelCatalogError(f"{model_path}.id must be a non-empty string")
if model_id in seen_model_ids:
raise ModelCatalogError(f"Duplicate model id {model_id!r} in {provider_path}.models")
seen_model_ids.add(model_id)
if model_id == default_model:
default_found = True
label = model_map.get("label")
if not isinstance(label, str) or not label.strip():
raise ModelCatalogError(f"{model_path}.label must be a non-empty string")
recommended = model_map.get("recommended")
if not isinstance(recommended, bool):
raise ModelCatalogError(f"{model_path}.recommended must be a boolean")
for key in ("max_tokens", "max_context_tokens"):
value = model_map.get(key)
if not isinstance(value, int) or value <= 0:
raise ModelCatalogError(f"{model_path}.{key} must be a positive integer")
if not default_found:
raise ModelCatalogError(
f"{provider_path}.default_model={default_model!r} is not present in {provider_path}.models"
)
presets = _require_mapping(data.get("presets"), "presets")
for preset_id, preset_info in presets.items():
preset_path = f"presets.{preset_id}"
preset_map = _require_mapping(preset_info, preset_path)
provider = preset_map.get("provider")
if not isinstance(provider, str) or not provider.strip():
raise ModelCatalogError(f"{preset_path}.provider must be a non-empty string")
model = preset_map.get("model")
if model is not None and (not isinstance(model, str) or not model.strip()):
raise ModelCatalogError(f"{preset_path}.model must be a non-empty string when present")
api_base = preset_map.get("api_base")
if api_base is not None and (not isinstance(api_base, str) or not api_base.strip()):
raise ModelCatalogError(f"{preset_path}.api_base must be a non-empty string when present")
api_key_env_var = preset_map.get("api_key_env_var")
if api_key_env_var is not None and (
not isinstance(api_key_env_var, str) or not api_key_env_var.strip()
):
raise ModelCatalogError(
f"{preset_path}.api_key_env_var must be a non-empty string when present"
)
for key in ("max_tokens", "max_context_tokens"):
value = preset_map.get(key)
if not isinstance(value, int) or value <= 0:
raise ModelCatalogError(f"{preset_path}.{key} must be a positive integer")
model_choices = preset_map.get("model_choices")
if model_choices is not None:
for idx, choice in enumerate(_require_list(model_choices, f"{preset_path}.model_choices")):
choice_path = f"{preset_path}.model_choices[{idx}]"
choice_map = _require_mapping(choice, choice_path)
choice_id = choice_map.get("id")
if not isinstance(choice_id, str) or not choice_id.strip():
raise ModelCatalogError(f"{choice_path}.id must be a non-empty string")
label = choice_map.get("label")
if not isinstance(label, str) or not label.strip():
raise ModelCatalogError(f"{choice_path}.label must be a non-empty string")
recommended = choice_map.get("recommended")
if not isinstance(recommended, bool):
raise ModelCatalogError(f"{choice_path}.recommended must be a boolean")
return data
@lru_cache(maxsize=1)
def load_model_catalog() -> dict[str, Any]:
"""Load and validate the curated model catalogue."""
try:
raw = json.loads(MODEL_CATALOG_PATH.read_text(encoding="utf-8"))
except FileNotFoundError as exc:
raise ModelCatalogError(f"Model catalogue not found: {MODEL_CATALOG_PATH}") from exc
except json.JSONDecodeError as exc:
raise ModelCatalogError(f"Model catalogue JSON is invalid: {exc}") from exc
return _validate_model_catalog(_require_mapping(raw, "root"))
def get_models_catalogue() -> dict[str, list[dict[str, Any]]]:
"""Return provider -> model list."""
providers = load_model_catalog()["providers"]
return {provider_id: copy.deepcopy(provider_info["models"]) for provider_id, provider_info in providers.items()}
def get_default_models() -> dict[str, str]:
"""Return provider -> default model id."""
providers = load_model_catalog()["providers"]
return {provider_id: str(provider_info["default_model"]) for provider_id, provider_info in providers.items()}
def get_provider_models(provider: str) -> list[dict[str, Any]]:
"""Return the curated models for one provider."""
provider_info = load_model_catalog()["providers"].get(provider)
if not provider_info:
return []
return copy.deepcopy(provider_info["models"])
def get_default_model(provider: str) -> str | None:
"""Return the curated default model id for one provider."""
provider_info = load_model_catalog()["providers"].get(provider)
if not provider_info:
return None
return str(provider_info["default_model"])
def find_model(provider: str, model_id: str) -> dict[str, Any] | None:
"""Return one model entry for a provider, if present."""
for model in load_model_catalog()["providers"].get(provider, {}).get("models", []):
if model["id"] == model_id:
return copy.deepcopy(model)
return None
def find_model_any_provider(model_id: str) -> tuple[str, dict[str, Any]] | None:
"""Return the first curated provider/model entry matching a model id."""
for provider_id, provider_info in load_model_catalog()["providers"].items():
for model in provider_info["models"]:
if model["id"] == model_id:
return provider_id, copy.deepcopy(model)
return None
def get_model_limits(provider: str, model_id: str) -> tuple[int, int] | None:
"""Return ``(max_tokens, max_context_tokens)`` for one provider/model pair."""
model = find_model(provider, model_id)
if not model:
return None
return int(model["max_tokens"]), int(model["max_context_tokens"])
def get_preset(preset_id: str) -> dict[str, Any] | None:
"""Return one preset entry."""
preset = load_model_catalog()["presets"].get(preset_id)
if not preset:
return None
return copy.deepcopy(preset)
def get_presets() -> dict[str, dict[str, Any]]:
"""Return all preset entries."""
return copy.deepcopy(load_model_catalog()["presets"])
+57 -87
View File
@@ -20,6 +20,12 @@ from framework.config import (
_PROVIDER_CRED_MAP,
get_hive_config,
)
from framework.llm.model_catalog import (
find_model,
find_model_any_provider,
get_models_catalogue,
get_preset,
)
from framework.agents.queen.queen_memory_v2 import (
global_memory_dir,
build_memory_document,
@@ -47,111 +53,67 @@ PROVIDER_ENV_VARS: dict[str, str] = {
"deepseek": "DEEPSEEK_API_KEY",
}
# ---------------------------------------------------------------------------
# Subscription metadata (mirrors quickstart.sh subscription modes)
# ---------------------------------------------------------------------------
SUBSCRIPTIONS: list[dict] = [
_SUBSCRIPTION_DEFINITIONS: list[dict[str, str]] = [
{
"id": "claude_code",
"name": "Claude Code Subscription",
"description": "Use your Claude Max/Pro plan",
"provider": "anthropic",
"flag": "use_claude_code_subscription",
"default_model": "claude-sonnet-4-20250514",
},
{
"id": "codex",
"name": "OpenAI Codex Subscription",
"description": "Use your Codex/ChatGPT Plus plan",
"provider": "openai",
"flag": "use_codex_subscription",
"default_model": "gpt-5.4",
"api_base": "https://chatgpt.com/backend-api/codex",
},
{
"id": "kimi_code",
"name": "Kimi Code Subscription",
"description": "Use your Kimi Code plan",
"provider": "kimi",
"flag": "use_kimi_code_subscription",
"default_model": "kimi/moonshot-v1",
},
{
"id": "antigravity",
"name": "Antigravity Subscription",
"description": "Use your Google/Gemini plan",
"provider": "antigravity",
"flag": "use_antigravity_subscription",
"default_model": "antigravity/gemini-2.5-pro",
},
]
def _build_subscriptions() -> list[dict]:
subscriptions: list[dict] = []
for definition in _SUBSCRIPTION_DEFINITIONS:
preset = get_preset(definition["id"])
if not preset:
raise RuntimeError(f"Missing preset for subscription {definition['id']}")
subscriptions.append({
"id": definition["id"],
"name": definition["name"],
"description": definition["description"],
"provider": preset["provider"],
"flag": definition["flag"],
"default_model": preset.get("model", ""),
**({"api_base": preset["api_base"]} if preset.get("api_base") else {}),
})
return subscriptions
# ---------------------------------------------------------------------------
# Subscription metadata (mirrors quickstart subscription modes)
# ---------------------------------------------------------------------------
SUBSCRIPTIONS: list[dict] = _build_subscriptions()
# All subscription config flags
_ALL_SUBSCRIPTION_FLAGS = [s["flag"] for s in SUBSCRIPTIONS]
# Map subscription ID → subscription metadata
_SUBSCRIPTION_MAP = {s["id"]: s for s in SUBSCRIPTIONS}
# Model catalogue — mirrors quickstart.sh MODEL_CHOICES_*
MODELS_CATALOGUE: dict[str, list[dict]] = {
"anthropic": [
{"id": "claude-haiku-4-5-20251001", "label": "Haiku 4.5 - Fast + cheap", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
{"id": "claude-sonnet-4-20250514", "label": "Sonnet 4 - Fast + capable", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
{"id": "claude-sonnet-4-5-20250929", "label": "Sonnet 4.5 - Best balance", "recommended": False, "max_tokens": 16384, "max_context_tokens": 180000},
{"id": "claude-opus-4-6", "label": "Opus 4.6 - Most capable", "recommended": True, "max_tokens": 32768, "max_context_tokens": 180000},
],
"openai": [
{"id": "gpt-5.4", "label": "GPT-5.4 - Best intelligence", "recommended": True, "max_tokens": 128000, "max_context_tokens": 960000},
{"id": "gpt-5.4-mini", "label": "GPT-5.4 Mini - Faster + cheaper", "recommended": False, "max_tokens": 128000, "max_context_tokens": 400000},
{"id": "gpt-5.4-nano", "label": "GPT-5.4 Nano - Cheapest high-volume", "recommended": False, "max_tokens": 128000, "max_context_tokens": 400000},
],
"gemini": [
{"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash - Fast", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
{"id": "gemini-3.1-pro-preview", "label": "Gemini 3.1 Pro - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
],
"groq": [
{"id": "moonshotai/kimi-k2-instruct-0905", "label": "Kimi K2 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
{"id": "openai/gpt-oss-120b", "label": "GPT-OSS 120B - Fast reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
],
"cerebras": [
{"id": "zai-glm-4.7", "label": "ZAI-GLM 4.7 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
{"id": "qwen3-235b-a22b-instruct-2507", "label": "Qwen3 235B - Frontier reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
],
"minimax": [
{"id": "MiniMax-M2.5", "label": "MiniMax-M2.5", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
],
"mistral": [
{"id": "mistral-large-latest", "label": "Mistral Large", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
],
"together": [
{"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "label": "Llama 3.3 70B Turbo", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
],
"deepseek": [
{"id": "deepseek-chat", "label": "DeepSeek Chat", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
],
"openrouter": [
{"id": "google/gemini-2.5-pro", "label": "Gemini 2.5 Pro", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
{"id": "google/gemini-2.5-flash", "label": "Gemini 2.5 Flash", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
{"id": "anthropic/claude-sonnet-4", "label": "Claude Sonnet 4 (via OR)", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
{"id": "deepseek/deepseek-r1", "label": "DeepSeek R1", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
],
}
# Default model per provider (matches quickstart DEFAULT_MODELS)
DEFAULT_MODELS: dict[str, str] = {
"anthropic": "claude-haiku-4-5-20251001",
"openai": "gpt-5.4",
"minimax": "MiniMax-M2.5",
"gemini": "gemini-3-flash-preview",
"groq": "moonshotai/kimi-k2-instruct-0905",
"cerebras": "zai-glm-4.7",
"mistral": "mistral-large-latest",
"together": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"deepseek": "deepseek-chat",
"openrouter": "google/gemini-2.5-pro",
}
# Model catalogue loaded from the shared JSON source of truth.
MODELS_CATALOGUE: dict[str, list[dict]] = get_models_catalogue()
# ---------------------------------------------------------------------------
# Helpers
@@ -167,10 +129,7 @@ def _get_api_base_for_provider(provider: str) -> str | None:
def _find_model_info(provider: str, model_id: str) -> dict | None:
"""Look up a model in the catalogue to get its token limits."""
for m in MODELS_CATALOGUE.get(provider, []):
if m["id"] == model_id:
return m
return None
return find_model(provider, model_id)
def _write_config_atomic(config: dict) -> None:
@@ -348,21 +307,32 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
{"error": f"Unknown subscription: {subscription_id}"}, status=400
)
preset = get_preset(subscription_id)
model = body.get("model") or sub["default_model"]
provider = sub["provider"]
api_base = sub.get("api_base")
# Look up token limits
# Subscriptions use same models as their provider (e.g., claude_code → anthropic)
model_info = _find_model_info(provider, model)
if not model_info:
# Try looking up in the mapped provider's catalogue
for prov_id, models in MODELS_CATALOGUE.items():
model_info = next((m for m in models if m["id"] == model), None)
if model_info:
break
max_tokens = model_info["max_tokens"] if model_info else 8192
max_context_tokens = model_info["max_context_tokens"] if model_info else 120000
max_tokens: int | None = None
max_context_tokens: int | None = None
if preset and preset.get("model") == model:
max_tokens = int(preset["max_tokens"])
max_context_tokens = int(preset["max_context_tokens"])
else:
# Subscriptions may use the same curated models as their provider.
model_info = _find_model_info(provider, model)
if not model_info:
# Some subscriptions point at curated models owned by a different provider.
match = find_model_any_provider(model)
if match:
_, model_info = match
if model_info:
max_tokens = int(model_info["max_tokens"])
max_context_tokens = int(model_info["max_context_tokens"])
if max_tokens is None or max_context_tokens is None:
max_tokens = 8192
max_context_tokens = 120000
# Update config: activate this subscription, clear others
config = get_hive_config()
+32
View File
@@ -15,6 +15,7 @@ import pytest
from aiohttp.test_utils import TestClient, TestServer
from framework.host.triggers import TriggerDefinition
from framework.llm.model_catalog import get_models_catalogue
from framework.server.app import create_app
from framework.server import routes_messages, routes_queens
from framework.server import session_manager as session_manager_module
@@ -1591,6 +1592,37 @@ class TestCredentials:
assert store.get_key("test_cred", "api_key") == "new-value"
class TestConfigRoutes:
"""Tests for LLM configuration endpoints."""
@pytest.mark.asyncio
async def test_get_models_uses_shared_model_catalogue(self):
app = create_app()
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/config/models")
data = await resp.json()
assert resp.status == 200
assert data["models"] == get_models_catalogue()
@pytest.mark.asyncio
async def test_get_llm_config_exposes_subscription_defaults_from_presets(self):
app = create_app()
app["credential_store"] = MagicMock()
app["credential_store"].get.return_value = None
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/config/llm")
data = await resp.json()
assert resp.status == 200
subscriptions = {subscription["id"]: subscription for subscription in data["subscriptions"]}
assert subscriptions["codex"]["default_model"] == "gpt-5.3-codex"
assert subscriptions["codex"]["api_base"] == "https://chatgpt.com/backend-api/codex"
assert subscriptions["kimi_code"]["default_model"] == "kimi-k2.5"
class TestSSEFormat:
"""Tests for SSE event wire format -- events must be unnamed (data-only)
so the frontend's es.onmessage handler receives them."""
+20
View File
@@ -26,6 +26,7 @@ from framework.llm.litellm import (
_compute_retry_delay,
_ensure_ollama_chat_prefix,
_is_ollama_model,
_summarize_request_for_log,
)
from framework.llm.provider import LLMProvider, LLMResponse, Tool
@@ -100,6 +101,25 @@ class TestLiteLLMProviderInit:
provider = LiteLLMProvider(model="ollama/llama3")
assert provider.model == "ollama_chat/llama3"
def test_summarize_request_flags_system_only_payload(self):
"""Request summaries should make system-only payloads obvious in logs."""
summary = _summarize_request_for_log(
{
"model": "openai/glm-5",
"api_base": "https://api.z.ai/api/coding/paas/v4",
"messages": [{"role": "system", "content": "You are helpful."}],
"tools": [{"type": "function", "function": {"name": "read_file"}}],
"stream": True,
"max_tokens": 8192,
}
)
assert summary["message_count"] == 1
assert summary["non_system_message_count"] == 0
assert summary["first_non_system_role"] is None
assert summary["last_non_system_role"] is None
assert summary["system_only"] is True
class TestLiteLLMProviderComplete:
"""Test LiteLLMProvider.complete() method."""
+97
View File
@@ -0,0 +1,97 @@
"""Tests for the shared curated LLM model catalogue."""
import json
import pytest
from framework.llm import model_catalog
@pytest.fixture(autouse=True)
def clear_model_catalog_cache():
model_catalog.load_model_catalog.cache_clear()
yield
model_catalog.load_model_catalog.cache_clear()
def test_default_models_exist_in_each_provider_catalogue():
defaults = model_catalog.get_default_models()
catalogue = model_catalog.get_models_catalogue()
for provider_id, default_model in defaults.items():
assert provider_id in catalogue
assert any(model["id"] == default_model for model in catalogue[provider_id])
def test_find_model_returns_curated_token_limits():
model = model_catalog.find_model("openai", "gpt-5.4")
assert model is not None
assert model["label"] == "GPT-5.4 - Best intelligence"
assert model["max_tokens"] == 128000
assert model["max_context_tokens"] == 960000
def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget():
haiku = model_catalog.find_model("anthropic", "claude-haiku-4-5-20251001")
sonnet_45 = model_catalog.find_model("anthropic", "claude-sonnet-4-5-20250929")
opus_46 = model_catalog.find_model("anthropic", "claude-opus-4-6")
assert haiku["max_tokens"] == 64000
assert haiku["max_context_tokens"] == 136000
assert sonnet_45["max_tokens"] == 64000
assert sonnet_45["max_context_tokens"] == 136000
assert opus_46["max_tokens"] == 128000
assert opus_46["max_context_tokens"] == 872000
def test_find_model_any_provider_returns_provider_and_model():
provider_id, model = model_catalog.find_model_any_provider("google/gemini-2.5-pro")
assert provider_id == "openrouter"
assert model["max_context_tokens"] == 900000
def test_get_preset_returns_subscription_specific_limits():
preset = model_catalog.get_preset("kimi_code")
assert preset is not None
assert preset["provider"] == "kimi"
assert preset["model"] == "kimi-k2.5"
assert preset["max_tokens"] == 32768
assert preset["max_context_tokens"] == 240000
assert preset["api_base"] == "https://api.kimi.com/coding"
def test_load_model_catalog_rejects_duplicate_model_ids(tmp_path, monkeypatch):
bad_catalog = {
"schema_version": 1,
"providers": {
"anthropic": {
"default_model": "dup-model",
"models": [
{
"id": "dup-model",
"label": "First",
"recommended": True,
"max_tokens": 1,
"max_context_tokens": 1,
},
{
"id": "dup-model",
"label": "Second",
"recommended": False,
"max_tokens": 1,
"max_context_tokens": 1,
},
],
}
},
}
bad_path = tmp_path / "model_catalog.json"
bad_path.write_text(json.dumps(bad_catalog), encoding="utf-8")
monkeypatch.setattr(model_catalog, "MODEL_CATALOG_PATH", bad_path)
with pytest.raises(model_catalog.ModelCatalogError, match="Duplicate model id"):
model_catalog.load_model_catalog()
+142 -92
View File
@@ -829,42 +829,95 @@ $ProviderMap = [ordered]@{
DEEPSEEK_API_KEY = @{ Name = "DeepSeek"; Id = "deepseek" }
}
$DefaultModels = @{
anthropic = "claude-haiku-4-5-20251001"
openai = "gpt-5-mini"
minimax = "MiniMax-M2.5"
gemini = "gemini-3-flash-preview"
groq = "moonshotai/kimi-k2-instruct-0905"
cerebras = "zai-glm-4.7"
mistral = "mistral-large-latest"
together_ai = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
deepseek = "deepseek-chat"
$ModelCatalogPath = Join-Path $ScriptDir "core\framework\llm\model_catalog.json"
$script:ModelCatalog = $null
function Initialize-ModelCatalog {
try {
$script:ModelCatalog = Get-Content -Path $ModelCatalogPath -Raw | ConvertFrom-Json
return $true
} catch {
return $false
}
}
# Model choices: array of hashtables per provider
$ModelChoices = @{
anthropic = @(
@{ Id = "claude-haiku-4-5-20251001"; Label = "Haiku 4.5 - Fast + cheap (recommended)"; MaxTokens = 8192; MaxContextTokens = 180000 },
@{ Id = "claude-sonnet-4-20250514"; Label = "Sonnet 4 - Fast + capable"; MaxTokens = 8192; MaxContextTokens = 180000 },
@{ Id = "claude-sonnet-4-5-20250929"; Label = "Sonnet 4.5 - Best balance"; MaxTokens = 16384; MaxContextTokens = 180000 },
@{ Id = "claude-opus-4-6"; Label = "Opus 4.6 - Most capable"; MaxTokens = 32768; MaxContextTokens = 180000 }
)
openai = @(
@{ Id = "gpt-5-mini"; Label = "GPT-5 Mini - Fast + cheap (recommended)"; MaxTokens = 16384; MaxContextTokens = 120000 },
@{ Id = "gpt-5.2"; Label = "GPT-5.2 - Most capable"; MaxTokens = 16384; MaxContextTokens = 120000 }
)
gemini = @(
@{ Id = "gemini-3-flash-preview"; Label = "Gemini 3 Flash - Fast (recommended)"; MaxTokens = 8192; MaxContextTokens = 900000 },
@{ Id = "gemini-3.1-pro-preview"; Label = "Gemini 3.1 Pro - Best quality"; MaxTokens = 8192; MaxContextTokens = 900000 }
)
groq = @(
@{ Id = "moonshotai/kimi-k2-instruct-0905"; Label = "Kimi K2 - Best quality (recommended)"; MaxTokens = 8192; MaxContextTokens = 120000 },
@{ Id = "openai/gpt-oss-120b"; Label = "GPT-OSS 120B - Fast reasoning"; MaxTokens = 8192; MaxContextTokens = 120000 }
)
cerebras = @(
@{ Id = "zai-glm-4.7"; Label = "ZAI-GLM 4.7 - Best quality (recommended)"; MaxTokens = 8192; MaxContextTokens = 120000 },
@{ Id = "qwen3-235b-a22b-instruct-2507"; Label = "Qwen3 235B - Frontier reasoning"; MaxTokens = 8192; MaxContextTokens = 120000 }
)
function Get-ProviderCatalog {
param([string]$ProviderId)
if (-not $script:ModelCatalog -or -not $script:ModelCatalog.providers) {
return $null
}
$providerProp = $script:ModelCatalog.providers.PSObject.Properties[$ProviderId]
if ($providerProp) {
return $providerProp.Value
}
return $null
}
function Get-DefaultModel {
param([string]$ProviderId)
$providerCatalog = Get-ProviderCatalog $ProviderId
if ($providerCatalog) {
return [string]$providerCatalog.default_model
}
return ""
}
function Get-ModelChoices {
param([string]$ProviderId)
$providerCatalog = Get-ProviderCatalog $ProviderId
if (-not $providerCatalog -or -not $providerCatalog.models) {
return @()
}
return @($providerCatalog.models)
}
function Get-PresetConfig {
param([string]$PresetId)
if (-not $script:ModelCatalog -or -not $script:ModelCatalog.presets) {
return $null
}
$presetProp = $script:ModelCatalog.presets.PSObject.Properties[$PresetId]
if ($presetProp) {
return $presetProp.Value
}
return $null
}
function Apply-Preset {
param([string]$PresetId)
$preset = Get-PresetConfig $PresetId
if (-not $preset) {
throw "Missing preset: $PresetId"
}
$script:SelectedProviderId = [string]$preset.provider
$script:SelectedModel = if ($preset.model) { [string]$preset.model } else { "" }
$script:SelectedMaxTokens = [int]$preset.max_tokens
$script:SelectedMaxContextTokens = [int]$preset.max_context_tokens
$script:SelectedEnvVar = if ($preset.api_key_env_var) { [string]$preset.api_key_env_var } else { "" }
$script:SelectedApiBase = if ($preset.api_base) { [string]$preset.api_base } else { "" }
}
function Get-PresetModelChoices {
param([string]$PresetId)
$preset = Get-PresetConfig $PresetId
if (-not $preset -or -not $preset.model_choices) {
return @()
}
return @($preset.model_choices)
}
function Normalize-OpenRouterModelId {
@@ -942,9 +995,9 @@ function Get-ModelSelection {
}
}
$choices = $ModelChoices[$ProviderId]
$choices = Get-ModelChoices $ProviderId
if (-not $choices -or $choices.Count -eq 0) {
return @{ Model = $DefaultModels[$ProviderId]; MaxTokens = 8192; MaxContextTokens = 120000 }
return @{ Model = (Get-DefaultModel $ProviderId); MaxTokens = 8192; MaxContextTokens = 120000 }
}
if ($choices.Count -eq 1) {
return @{ Model = $choices[0].Id; MaxTokens = $choices[0].MaxTokens; MaxContextTokens = $choices[0].MaxContextTokens }
@@ -1059,6 +1112,13 @@ try {
if ($LASTEXITCODE -eq 0) { $OllamaDetected = $true }
} catch { }
if (-not (Initialize-ModelCatalog)) {
Write-Fail "Failed to load core/framework/llm/model_catalog.json."
Write-Host " Please ensure your Python environment is set up, then rerun quickstart."
Write-Host ""
exit 1
}
# ── Read previous configuration (if any) ──────────────────────
$PrevProvider = ""
$PrevModel = ""
@@ -1261,21 +1321,14 @@ switch ($num) {
exit 1
}
$SubscriptionMode = "claude_code"
$SelectedProviderId = "anthropic"
$SelectedModel = "claude-opus-4-6"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 180000
Apply-Preset "claude_code"
Write-Host ""
Write-Ok "Using Claude Code subscription"
}
2 {
# ZAI Code Subscription
$SubscriptionMode = "zai_code"
$SelectedProviderId = "openai"
$SelectedEnvVar = "ZAI_API_KEY"
$SelectedModel = "glm-5"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 120000
Apply-Preset "zai_code"
Write-Host ""
Write-Ok "Using ZAI Code subscription"
Write-Color -Text " Model: glm-5 | API: api.z.ai" -Color DarkGray
@@ -1305,10 +1358,7 @@ switch ($num) {
}
if ($CodexCredDetected) {
$SubscriptionMode = "codex"
$SelectedProviderId = "openai"
$SelectedModel = "gpt-5.3-codex"
$SelectedMaxTokens = 16384
$SelectedMaxContextTokens = 120000
Apply-Preset "codex"
Write-Host ""
Write-Ok "Using OpenAI Codex subscription"
}
@@ -1316,12 +1366,7 @@ switch ($num) {
4 {
# MiniMax Coding Key
$SubscriptionMode = "minimax_code"
$SelectedProviderId = "minimax"
$SelectedEnvVar = "MINIMAX_API_KEY"
$SelectedModel = "MiniMax-M2.5"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 900000
$SelectedApiBase = "https://api.minimax.io/v1"
Apply-Preset "minimax_code"
Write-Host ""
Write-Ok "Using MiniMax coding key"
Write-Color -Text " Model: MiniMax-M2.5 | API: api.minimax.io" -Color DarkGray
@@ -1329,11 +1374,7 @@ switch ($num) {
5 {
# Kimi Code Subscription
$SubscriptionMode = "kimi_code"
$SelectedProviderId = "kimi"
$SelectedEnvVar = "KIMI_API_KEY"
$SelectedModel = "kimi-k2.5"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 120000
Apply-Preset "kimi_code"
Write-Host ""
Write-Ok "Using Kimi Code subscription"
Write-Color -Text " Model: kimi-k2.5 | API: api.kimi.com/coding" -Color DarkGray
@@ -1341,24 +1382,37 @@ switch ($num) {
6 {
# Hive LLM
$SubscriptionMode = "hive_llm"
$SelectedProviderId = "hive"
$SelectedEnvVar = "HIVE_API_KEY"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 120000
Apply-Preset "hive_llm"
Write-Host ""
Write-Ok "Using Hive LLM"
Write-Host ""
Write-Host " Select a model:"
Write-Host " " -NoNewline; Write-Color -Text "1)" -Color Cyan -NoNewline; Write-Host " queen " -NoNewline; Write-Color -Text "(default - Hive flagship)" -Color DarkGray
Write-Host " " -NoNewline; Write-Color -Text "2)" -Color Cyan -NoNewline; Write-Host " kimi-2.5"
Write-Host " " -NoNewline; Write-Color -Text "3)" -Color Cyan -NoNewline; Write-Host " GLM-5"
$hiveChoices = Get-PresetModelChoices "hive_llm"
$hiveDefaultChoice = "1"
for ($i = 0; $i -lt $hiveChoices.Count; $i++) {
Write-Host " " -NoNewline
Write-Color -Text "$($i + 1))" -Color Cyan -NoNewline
Write-Host " $($hiveChoices[$i].label)" -NoNewline
if ($hiveChoices[$i].recommended -eq $true) {
$hiveDefaultChoice = [string]($i + 1)
Write-Host " " -NoNewline
Write-Color -Text "(default - Hive flagship)" -Color DarkGray
} else {
Write-Host ""
}
}
Write-Host ""
$hiveModelChoice = Read-Host " Enter model choice (1-3) [1]"
if (-not $hiveModelChoice) { $hiveModelChoice = "1" }
switch ($hiveModelChoice) {
"2" { $SelectedModel = "kimi-2.5" }
"3" { $SelectedModel = "GLM-5" }
default { $SelectedModel = "queen" }
while ($true) {
$hiveModelChoice = Read-Host " Enter model choice (1-$($hiveChoices.Count)) [$hiveDefaultChoice]"
if (-not $hiveModelChoice) { $hiveModelChoice = $hiveDefaultChoice }
if ($hiveModelChoice -match '^\d+$') {
$choiceNum = [int]$hiveModelChoice
if ($choiceNum -ge 1 -and $choiceNum -le $hiveChoices.Count) {
$SelectedModel = [string]$hiveChoices[$choiceNum - 1].id
break
}
}
Write-Color -Text "Invalid choice. Please enter 1-$($hiveChoices.Count)" -Color Red
}
Write-Color -Text " Model: $SelectedModel | API: $HiveLlmEndpoint" -Color DarkGray
}
@@ -1390,10 +1444,7 @@ switch ($num) {
if ($AntigravityCredDetected) {
$SubscriptionMode = "antigravity"
$SelectedProviderId = "openai"
$SelectedModel = "gemini-3-flash"
$SelectedMaxTokens = 32768
$SelectedMaxContextTokens = 1000000
Apply-Preset "antigravity"
Write-Host ""
Write-Warn "Using Antigravity can technically cause your account suspension. Please use at your own risk."
Write-Host ""
@@ -1543,9 +1594,10 @@ switch ($num) {
$SelectedModel = $ollamaModels[$num - 1]
Write-Host ""
Write-Ok "Model: $SelectedModel"
$SelectedMaxTokens = 8192
$SelectedMaxContextTokens = 16384
$SelectedApiBase = "http://localhost:11434"
$ollamaPreset = Get-PresetConfig "ollama_local"
$SelectedMaxTokens = [int]$ollamaPreset.max_tokens
$SelectedMaxContextTokens = [int]$ollamaPreset.max_context_tokens
$SelectedApiBase = [string]$ollamaPreset.api_base
break
}
}
@@ -1592,7 +1644,7 @@ if ($SubscriptionMode -eq "minimax_code") {
# Health check the new key
Write-Host " Verifying MiniMax API key... " -NoNewline
try {
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "minimax" $apiKey "https://api.minimax.io/v1" 2>$null
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "minimax" $apiKey $SelectedApiBase 2>$null
$hcJson = $hcResult | ConvertFrom-Json
if ($hcJson.valid -eq $true) {
Write-Color -Text "ok" -Color Green
@@ -1653,7 +1705,7 @@ if ($SubscriptionMode -eq "zai_code") {
# Health check the new key
Write-Host " Verifying ZAI API key... " -NoNewline
try {
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "zai" $apiKey "https://api.z.ai/api/coding/paas/v4" 2>$null
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "zai" $apiKey $SelectedApiBase 2>$null
$hcJson = $hcResult | ConvertFrom-Json
if ($hcJson.valid -eq $true) {
Write-Color -Text "ok" -Color Green
@@ -1721,7 +1773,7 @@ if ($SubscriptionMode -eq "kimi_code") {
# Health check the new key
Write-Host " Verifying Kimi API key... " -NoNewline
try {
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "kimi" $apiKey "https://api.kimi.com/coding" 2>$null
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "kimi" $apiKey $SelectedApiBase 2>$null
$hcJson = $hcResult | ConvertFrom-Json
if ($hcJson.valid -eq $true) {
Write-Color -Text "ok" -Color Green
@@ -1832,7 +1884,7 @@ if ($SelectedProviderId -and -not $SelectedModel) {
# Save configuration
if ($SelectedProviderId) {
if (-not $SelectedModel) {
$SelectedModel = $DefaultModels[$SelectedProviderId]
$SelectedModel = Get-DefaultModel $SelectedProviderId
}
Write-Host ""
Write-Host " Saving configuration... " -NoNewline
@@ -1855,25 +1907,26 @@ if ($SelectedProviderId) {
$config.llm["use_claude_code_subscription"] = $true
} elseif ($SubscriptionMode -eq "codex") {
$config.llm["use_codex_subscription"] = $true
if ($SelectedApiBase) { $config.llm["api_base"] = $SelectedApiBase }
} elseif ($SubscriptionMode -eq "antigravity") {
$config.llm["use_antigravity_subscription"] = $true
} elseif ($SubscriptionMode -eq "zai_code") {
$config.llm["api_base"] = "https://api.z.ai/api/coding/paas/v4"
$config.llm["api_base"] = $SelectedApiBase
$config.llm["api_key_env_var"] = $SelectedEnvVar
} elseif ($SubscriptionMode -eq "minimax_code") {
$config.llm["api_base"] = $SelectedApiBase
$config.llm["api_key_env_var"] = $SelectedEnvVar
} elseif ($SubscriptionMode -eq "kimi_code") {
$config.llm["api_base"] = "https://api.kimi.com/coding"
$config.llm["api_base"] = $SelectedApiBase
$config.llm["api_key_env_var"] = $SelectedEnvVar
} elseif ($SubscriptionMode -eq "hive_llm") {
$config.llm["api_base"] = $HiveLlmEndpoint
$config.llm["api_base"] = $SelectedApiBase
$config.llm["api_key_env_var"] = $SelectedEnvVar
} elseif ($SelectedProviderId -eq "openrouter") {
$config.llm["api_base"] = "https://openrouter.ai/api/v1"
$config.llm["api_key_env_var"] = $SelectedEnvVar
} elseif ($SelectedProviderId -eq "ollama") {
$config.llm["api_base"] = "http://localhost:11434"
$config.llm["api_base"] = $SelectedApiBase
$config.llm.Remove("api_key_env_var")
} elseif ($SelectedEnvVar) {
$config.llm["api_key_env_var"] = $SelectedEnvVar
@@ -2166,7 +2219,7 @@ Write-Host ""
# Show configured provider
if ($SelectedProviderId) {
if (-not $SelectedModel) { $SelectedModel = $DefaultModels[$SelectedProviderId] }
if (-not $SelectedModel) { $SelectedModel = Get-DefaultModel $SelectedProviderId }
Write-Color -Text "Default LLM:" -Color White
if ($SubscriptionMode -eq "claude_code") {
Write-Ok "Claude Code Subscription -> $SelectedModel"
@@ -2190,9 +2243,6 @@ if ($SelectedProviderId) {
Write-Host " -> " -NoNewline
Write-Color -Text $SelectedModel -Color DarkGray
}
Write-Color -Text " To use a different model for worker agents, run:" -Color DarkGray
Write-Host " " -NoNewline
Write-Color -Text ".\scripts\setup_worker_model.ps1" -Color Cyan
Write-Host ""
}
+231 -269
View File
@@ -452,90 +452,6 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
["DEEPSEEK_API_KEY"]="deepseek"
)
declare -A DEFAULT_MODELS=(
["anthropic"]="claude-haiku-4-5-20251001"
["openai"]="gpt-5-mini"
["minimax"]="MiniMax-M2.5"
["gemini"]="gemini-3-flash-preview"
["groq"]="moonshotai/kimi-k2-instruct-0905"
["cerebras"]="zai-glm-4.7"
["mistral"]="mistral-large-latest"
["together_ai"]="meta-llama/Llama-3.3-70B-Instruct-Turbo"
["deepseek"]="deepseek-chat"
)
# Model choices per provider: composite-key associative arrays
# Keys: "provider:index" -> value
declare -A MODEL_CHOICES_ID=(
["anthropic:0"]="claude-haiku-4-5-20251001"
["anthropic:1"]="claude-sonnet-4-20250514"
["anthropic:2"]="claude-sonnet-4-5-20250929"
["anthropic:3"]="claude-opus-4-6"
["openai:0"]="gpt-5-mini"
["openai:1"]="gpt-5.2"
["gemini:0"]="gemini-3-flash-preview"
["gemini:1"]="gemini-3.1-pro-preview"
["groq:0"]="moonshotai/kimi-k2-instruct-0905"
["groq:1"]="openai/gpt-oss-120b"
["cerebras:0"]="zai-glm-4.7"
["cerebras:1"]="qwen3-235b-a22b-instruct-2507"
)
declare -A MODEL_CHOICES_LABEL=(
["anthropic:0"]="Haiku 4.5 - Fast + cheap (recommended)"
["anthropic:1"]="Sonnet 4 - Fast + capable"
["anthropic:2"]="Sonnet 4.5 - Best balance"
["anthropic:3"]="Opus 4.6 - Most capable"
["openai:0"]="GPT-5 Mini - Fast + cheap (recommended)"
["openai:1"]="GPT-5.2 - Most capable"
["gemini:0"]="Gemini 3 Flash - Fast (recommended)"
["gemini:1"]="Gemini 3.1 Pro - Best quality"
["groq:0"]="Kimi K2 - Best quality (recommended)"
["groq:1"]="GPT-OSS 120B - Fast reasoning"
["cerebras:0"]="ZAI-GLM 4.7 - Best quality (recommended)"
["cerebras:1"]="Qwen3 235B - Frontier reasoning"
)
declare -A MODEL_CHOICES_MAXTOKENS=(
["anthropic:0"]=8192
["anthropic:1"]=8192
["anthropic:2"]=16384
["anthropic:3"]=32768
["openai:0"]=16384
["openai:1"]=16384
["gemini:0"]=8192
["gemini:1"]=8192
["groq:0"]=8192
["groq:1"]=8192
["cerebras:0"]=8192
["cerebras:1"]=8192
)
# Max context tokens (input history budget) per model, based on actual context windows.
# Leave ~10% headroom for system prompt and output tokens.
declare -A MODEL_CHOICES_MAXCONTEXTTOKENS=(
["anthropic:0"]=180000 # Claude Haiku 4.5 — 200k context window
["anthropic:1"]=180000 # Claude Sonnet 4 — 200k context window
["anthropic:2"]=180000 # Claude Sonnet 4.5 — 200k context window
["anthropic:3"]=180000 # Claude Opus 4.6 — 200k context window
["openai:0"]=120000 # GPT-5 Mini — 128k context window
["openai:1"]=120000 # GPT-5.2 — 128k context window
["gemini:0"]=900000 # Gemini 3 Flash — 1M context window
["gemini:1"]=900000 # Gemini 3.1 Pro — 1M context window
["groq:0"]=120000 # Kimi K2 — 128k context window
["groq:1"]=120000 # GPT-OSS 120B — 128k context window
["cerebras:0"]=120000 # ZAI-GLM 4.7 — 128k context window
["cerebras:1"]=120000 # Qwen3 235B — 128k context window
)
declare -A MODEL_CHOICES_COUNT=(
["anthropic"]=4
["openai"]=2
["gemini"]=2
["groq"]=2
["cerebras"]=2
)
# Helper functions for Bash 4+
get_provider_name() {
echo "${PROVIDER_NAMES[$1]}"
@@ -544,40 +460,12 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
get_provider_id() {
echo "${PROVIDER_IDS[$1]}"
}
get_default_model() {
echo "${DEFAULT_MODELS[$1]}"
}
get_model_choice_count() {
echo "${MODEL_CHOICES_COUNT[$1]:-0}"
}
get_model_choice_id() {
echo "${MODEL_CHOICES_ID[$1:$2]}"
}
get_model_choice_label() {
echo "${MODEL_CHOICES_LABEL[$1:$2]}"
}
get_model_choice_maxtokens() {
echo "${MODEL_CHOICES_MAXTOKENS[$1:$2]}"
}
get_model_choice_maxcontexttokens() {
echo "${MODEL_CHOICES_MAXCONTEXTTOKENS[$1:$2]}"
}
else
# Bash 3.2 - use parallel indexed arrays
PROVIDER_ENV_VARS=(ANTHROPIC_API_KEY OPENAI_API_KEY MINIMAX_API_KEY GEMINI_API_KEY GOOGLE_API_KEY GROQ_API_KEY CEREBRAS_API_KEY OPENROUTER_API_KEY MISTRAL_API_KEY TOGETHER_API_KEY DEEPSEEK_API_KEY)
PROVIDER_DISPLAY_NAMES=("Anthropic (Claude)" "OpenAI (GPT)" "MiniMax" "Google Gemini" "Google AI" "Groq" "Cerebras" "OpenRouter" "Mistral" "Together AI" "DeepSeek")
PROVIDER_ID_LIST=(anthropic openai minimax gemini google groq cerebras openrouter mistral together deepseek)
# Default models by provider id (parallel arrays)
MODEL_PROVIDER_IDS=(anthropic openai minimax gemini groq cerebras mistral together_ai deepseek)
MODEL_DEFAULTS=("claude-haiku-4-5-20251001" "gpt-5-mini" "MiniMax-M2.5" "gemini-3-flash-preview" "moonshotai/kimi-k2-instruct-0905" "zai-glm-4.7" "mistral-large-latest" "meta-llama/Llama-3.3-70B-Instruct-Turbo" "deepseek-chat")
# Helper: get provider display name for an env var
get_provider_name() {
local env_var="$1"
@@ -603,116 +491,199 @@ else
i=$((i + 1))
done
}
fi
# Helper: get default model for a provider id
get_default_model() {
local provider_id="$1"
local i=0
while [ $i -lt ${#MODEL_PROVIDER_IDS[@]} ]; do
if [ "${MODEL_PROVIDER_IDS[$i]}" = "$provider_id" ]; then
echo "${MODEL_DEFAULTS[$i]}"
MODEL_DEFAULT_ROWS=""
MODEL_CHOICE_ROWS=""
PRESET_ROWS=""
PRESET_MODEL_CHOICE_ROWS=""
load_model_catalog_rows() {
# Bash 3.2 has no native JSON parser, so we materialize the shared catalogue
# into simple tab-separated rows once and reuse them for the interactive flow.
local catalog_lines=""
catalog_lines="$(uv run python -c '
from framework.llm.model_catalog import get_default_models, get_models_catalogue, get_presets
for provider_id, default_model in sorted(get_default_models().items()):
print(f"DEFAULT\t{provider_id}\t{default_model}")
for provider_id, models in sorted(get_models_catalogue().items()):
for model in models:
print(
"MODEL\t{provider}\t{id}\t{label}\t{max_tokens}\t{max_context_tokens}".format(
provider=provider_id,
id=model["id"],
label=model["label"],
max_tokens=model["max_tokens"],
max_context_tokens=model["max_context_tokens"],
)
)
for preset_id, preset in sorted(get_presets().items()):
print(
"PRESET\t{preset_id}\t{provider}\t{model}\t{max_tokens}\t{max_context_tokens}\t{api_key_env_var}\t{api_base}".format(
preset_id=preset_id,
provider=preset["provider"],
model=preset.get("model", ""),
max_tokens=preset["max_tokens"],
max_context_tokens=preset["max_context_tokens"],
api_key_env_var=preset.get("api_key_env_var", ""),
api_base=preset.get("api_base", ""),
)
)
for choice in preset.get("model_choices", []):
print(
"PRESET_MODEL\t{preset_id}\t{id}\t{label}\t{recommended}".format(
preset_id=preset_id,
id=choice["id"],
label=choice["label"],
recommended=str(choice["recommended"]).lower(),
)
)
' 2>/dev/null)" || return 1
MODEL_DEFAULT_ROWS=""
MODEL_CHOICE_ROWS=""
PRESET_ROWS=""
PRESET_MODEL_CHOICE_ROWS=""
while IFS=$'\t' read -r row_type field1 field2 field3 field4 field5 field6 field7; do
[ -n "$row_type" ] || continue
if [ "$row_type" = "DEFAULT" ]; then
MODEL_DEFAULT_ROWS+="${field1}"$'\t'"${field2}"$'\n'
elif [ "$row_type" = "MODEL" ]; then
MODEL_CHOICE_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\t'"${field5}"$'\n'
elif [ "$row_type" = "PRESET" ]; then
PRESET_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\t'"${field5}"$'\t'"${field6}"$'\t'"${field7}"$'\n'
elif [ "$row_type" = "PRESET_MODEL" ]; then
PRESET_MODEL_CHOICE_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\n'
fi
done <<< "$catalog_lines"
}
get_default_model() {
local provider_id="$1"
while IFS=$'\t' read -r row_provider row_model; do
[ -n "$row_provider" ] || continue
if [ "$row_provider" = "$provider_id" ]; then
echo "$row_model"
return
fi
done <<< "$MODEL_DEFAULT_ROWS"
}
get_model_choice_count() {
local provider_id="$1"
local count=0
while IFS=$'\t' read -r row_provider _; do
[ -n "$row_provider" ] || continue
if [ "$row_provider" = "$provider_id" ]; then
count=$((count + 1))
fi
done <<< "$MODEL_CHOICE_ROWS"
echo "$count"
}
get_model_choice_field() {
local provider_id="$1"
local idx="$2"
local field="$3"
local count=0
while IFS=$'\t' read -r row_provider row_id row_label row_max_tokens row_max_context_tokens; do
[ -n "$row_provider" ] || continue
if [ "$row_provider" = "$provider_id" ]; then
if [ "$count" -eq "$idx" ]; then
case "$field" in
id) echo "$row_id" ;;
label) echo "$row_label" ;;
max_tokens) echo "$row_max_tokens" ;;
max_context_tokens) echo "$row_max_context_tokens" ;;
esac
return
fi
i=$((i + 1))
done
}
count=$((count + 1))
fi
done <<< "$MODEL_CHOICE_ROWS"
}
# Model choices per provider - flat parallel arrays with provider offsets
# Provider order: anthropic(4), openai(2), gemini(2), groq(2), cerebras(2)
MC_PROVIDERS=(anthropic anthropic anthropic anthropic openai openai gemini gemini groq groq cerebras cerebras)
MC_IDS=("claude-haiku-4-5-20251001" "claude-sonnet-4-20250514" "claude-sonnet-4-5-20250929" "claude-opus-4-6" "gpt-5-mini" "gpt-5.2" "gemini-3-flash-preview" "gemini-3.1-pro-preview" "moonshotai/kimi-k2-instruct-0905" "openai/gpt-oss-120b" "zai-glm-4.7" "qwen3-235b-a22b-instruct-2507")
MC_LABELS=("Haiku 4.5 - Fast + cheap (recommended)" "Sonnet 4 - Fast + capable" "Sonnet 4.5 - Best balance" "Opus 4.6 - Most capable" "GPT-5 Mini - Fast + cheap (recommended)" "GPT-5.2 - Most capable" "Gemini 3 Flash - Fast (recommended)" "Gemini 3.1 Pro - Best quality" "Kimi K2 - Best quality (recommended)" "GPT-OSS 120B - Fast reasoning" "ZAI-GLM 4.7 - Best quality (recommended)" "Qwen3 235B - Frontier reasoning")
MC_MAXTOKENS=(8192 8192 16384 32768 16384 16384 8192 8192 8192 8192 8192 8192)
# Max context tokens per model (same order as MC_PROVIDERS/MC_IDS above)
# Based on actual context windows with ~10% headroom for system prompt + output.
MC_MAXCONTEXTTOKENS=(180000 180000 180000 180000 120000 120000 900000 900000 120000 120000 120000 120000)
get_model_choice_id() {
get_model_choice_field "$1" "$2" "id"
}
# Helper: get number of model choices for a provider
get_model_choice_count() {
local provider_id="$1"
local count=0
local i=0
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
count=$((count + 1))
get_model_choice_label() {
get_model_choice_field "$1" "$2" "label"
}
get_model_choice_maxtokens() {
get_model_choice_field "$1" "$2" "max_tokens"
}
get_model_choice_maxcontexttokens() {
get_model_choice_field "$1" "$2" "max_context_tokens"
}
get_preset_field() {
local preset_id="$1"
local field="$2"
while IFS=$'\t' read -r row_preset_id row_provider row_model row_max_tokens row_max_context_tokens row_env_var row_api_base; do
[ -n "$row_preset_id" ] || continue
if [ "$row_preset_id" = "$preset_id" ]; then
case "$field" in
provider) echo "$row_provider" ;;
model) echo "$row_model" ;;
max_tokens) echo "$row_max_tokens" ;;
max_context_tokens) echo "$row_max_context_tokens" ;;
api_key_env_var) echo "$row_env_var" ;;
api_base) echo "$row_api_base" ;;
esac
return
fi
done <<< "$PRESET_ROWS"
}
apply_preset() {
local preset_id="$1"
SELECTED_PROVIDER_ID="$(get_preset_field "$preset_id" "provider")"
SELECTED_MODEL="$(get_preset_field "$preset_id" "model")"
SELECTED_MAX_TOKENS="$(get_preset_field "$preset_id" "max_tokens")"
SELECTED_MAX_CONTEXT_TOKENS="$(get_preset_field "$preset_id" "max_context_tokens")"
SELECTED_ENV_VAR="$(get_preset_field "$preset_id" "api_key_env_var")"
SELECTED_API_BASE="$(get_preset_field "$preset_id" "api_base")"
}
get_preset_model_choice_count() {
local preset_id="$1"
local count=0
while IFS=$'\t' read -r row_preset_id _; do
[ -n "$row_preset_id" ] || continue
if [ "$row_preset_id" = "$preset_id" ]; then
count=$((count + 1))
fi
done <<< "$PRESET_MODEL_CHOICE_ROWS"
echo "$count"
}
get_preset_model_choice_field() {
local preset_id="$1"
local idx="$2"
local field="$3"
local count=0
while IFS=$'\t' read -r row_preset_id row_id row_label row_recommended; do
[ -n "$row_preset_id" ] || continue
if [ "$row_preset_id" = "$preset_id" ]; then
if [ "$count" -eq "$idx" ]; then
case "$field" in
id) echo "$row_id" ;;
label) echo "$row_label" ;;
recommended) echo "$row_recommended" ;;
esac
return
fi
i=$((i + 1))
done
echo "$count"
}
# Helper: get model choice id by provider and index (0-based within provider)
get_model_choice_id() {
local provider_id="$1"
local idx="$2"
local count=0
local i=0
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
if [ $count -eq "$idx" ]; then
echo "${MC_IDS[$i]}"
return
fi
count=$((count + 1))
fi
i=$((i + 1))
done
}
# Helper: get model choice label by provider and index
get_model_choice_label() {
local provider_id="$1"
local idx="$2"
local count=0
local i=0
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
if [ $count -eq "$idx" ]; then
echo "${MC_LABELS[$i]}"
return
fi
count=$((count + 1))
fi
i=$((i + 1))
done
}
# Helper: get model choice max_tokens by provider and index
get_model_choice_maxtokens() {
local provider_id="$1"
local idx="$2"
local count=0
local i=0
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
if [ $count -eq "$idx" ]; then
echo "${MC_MAXTOKENS[$i]}"
return
fi
count=$((count + 1))
fi
i=$((i + 1))
done
}
# Helper: get model choice max_context_tokens by provider and index
get_model_choice_maxcontexttokens() {
local provider_id="$1"
local idx="$2"
local count=0
local i=0
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
if [ $count -eq "$idx" ]; then
echo "${MC_MAXCONTEXTTOKENS[$i]}"
return
fi
count=$((count + 1))
fi
i=$((i + 1))
done
}
fi
count=$((count + 1))
fi
done <<< "$PRESET_MODEL_CHOICE_ROWS"
}
# Configuration directory
HIVE_CONFIG_DIR="$HOME/.hive"
@@ -1100,6 +1071,12 @@ if ollama list >/dev/null 2>&1; then
OLLAMA_DETECTED=true
fi
if ! load_model_catalog_rows; then
echo -e "${RED}Failed to load core/framework/llm/model_catalog.json.${NC}"
echo -e "${YELLOW}Please ensure your Python environment is set up, then rerun quickstart.${NC}"
exit 1
fi
# Detect API key providers
if [ "$USE_ASSOC_ARRAYS" = true ]; then
for env_var in "${!PROVIDER_NAMES[@]}"; do
@@ -1344,10 +1321,7 @@ case $choice in
exit 1
else
SUBSCRIPTION_MODE="claude_code"
SELECTED_PROVIDER_ID="anthropic"
SELECTED_MODEL="claude-opus-4-6"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=960000 # Claude — 1M context window
apply_preset "claude_code"
echo ""
echo -e "${GREEN}${NC} Using Claude Code subscription"
fi
@@ -1355,11 +1329,7 @@ case $choice in
2)
# ZAI Code Subscription
SUBSCRIPTION_MODE="zai_code"
SELECTED_PROVIDER_ID="openai"
SELECTED_ENV_VAR="ZAI_API_KEY"
SELECTED_MODEL="glm-5"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=180000 # GLM-5 — 200k context window
apply_preset "zai_code"
PROVIDER_NAME="ZAI"
echo ""
echo -e "${GREEN}${NC} Using ZAI Code subscription"
@@ -1387,10 +1357,7 @@ case $choice in
fi
if [ "$CODEX_CRED_DETECTED" = true ]; then
SUBSCRIPTION_MODE="codex"
SELECTED_PROVIDER_ID="openai"
SELECTED_MODEL="gpt-5.3-codex"
SELECTED_MAX_TOKENS=16384
SELECTED_MAX_CONTEXT_TOKENS=120000 # GPT Codex — 128k context window
apply_preset "codex"
echo ""
echo -e "${GREEN}${NC} Using OpenAI Codex subscription"
fi
@@ -1398,12 +1365,7 @@ case $choice in
4)
# MiniMax Coding Key
SUBSCRIPTION_MODE="minimax_code"
SELECTED_ENV_VAR="MINIMAX_API_KEY"
SELECTED_PROVIDER_ID="minimax"
SELECTED_MODEL="MiniMax-M2.5"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=900000 # MiniMax M2.5 — 1M context window
SELECTED_API_BASE="https://api.minimax.io/v1"
apply_preset "minimax_code"
PROVIDER_NAME="MiniMax"
SIGNUP_URL="https://platform.minimax.io/user-center/basic-information/interface-key"
echo ""
@@ -1413,12 +1375,7 @@ case $choice in
5)
# Kimi Code Subscription
SUBSCRIPTION_MODE="kimi_code"
SELECTED_PROVIDER_ID="kimi"
SELECTED_ENV_VAR="KIMI_API_KEY"
SELECTED_MODEL="kimi-k2.5"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=240000 # Kimi K2.5 — 256k context window
SELECTED_API_BASE="https://api.kimi.com/coding"
apply_preset "kimi_code"
PROVIDER_NAME="Kimi"
SIGNUP_URL="https://www.kimi.com/code"
echo ""
@@ -1428,28 +1385,38 @@ case $choice in
6)
# Hive LLM
SUBSCRIPTION_MODE="hive_llm"
SELECTED_PROVIDER_ID="hive"
SELECTED_ENV_VAR="HIVE_API_KEY"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=180000
SELECTED_API_BASE="$HIVE_LLM_ENDPOINT"
apply_preset "hive_llm"
PROVIDER_NAME="Hive"
SIGNUP_URL="https://discord.com/invite/hQdU7QDkgR"
echo ""
echo -e "${GREEN}${NC} Using Hive LLM"
echo ""
echo -e " Select a model:"
echo -e " ${CYAN}1)${NC} queen ${DIM}(default — Hive flagship)${NC}"
echo -e " ${CYAN}2)${NC} kimi-2.5"
echo -e " ${CYAN}3)${NC} GLM-5"
hive_choice_count="$(get_preset_model_choice_count "hive_llm")"
hive_default_choice=1
hive_idx=0
while [ "$hive_idx" -lt "$hive_choice_count" ]; do
hive_num=$((hive_idx + 1))
hive_model_id="$(get_preset_model_choice_field "hive_llm" "$hive_idx" "id")"
hive_recommended="$(get_preset_model_choice_field "hive_llm" "$hive_idx" "recommended")"
if [ "$hive_recommended" = "true" ]; then
echo -e " ${CYAN}${hive_num})${NC} ${hive_model_id} ${DIM}(default — Hive flagship)${NC}"
hive_default_choice="$hive_num"
else
echo -e " ${CYAN}${hive_num})${NC} ${hive_model_id}"
fi
hive_idx=$((hive_idx + 1))
done
echo ""
read -r -p " Enter model choice (1-3) [1]: " hive_model_choice || true
hive_model_choice="${hive_model_choice:-1}"
case "$hive_model_choice" in
2) SELECTED_MODEL="kimi-2.5" ;;
3) SELECTED_MODEL="GLM-5" ;;
*) SELECTED_MODEL="queen" ;;
esac
while true; do
read -r -p " Enter model choice (1-$hive_choice_count) [$hive_default_choice]: " hive_model_choice || true
hive_model_choice="${hive_model_choice:-$hive_default_choice}"
if [[ "$hive_model_choice" =~ ^[0-9]+$ ]] && [ "$hive_model_choice" -ge 1 ] && [ "$hive_model_choice" -le "$hive_choice_count" ]; then
SELECTED_MODEL="$(get_preset_model_choice_field "hive_llm" "$((hive_model_choice - 1))" "id")"
break
fi
echo -e "${RED}Invalid choice. Please enter 1-$hive_choice_count${NC}"
done
echo -e " ${DIM}Model: $SELECTED_MODEL | API: ${HIVE_LLM_ENDPOINT}${NC}"
;;
7)
@@ -1480,10 +1447,7 @@ case $choice in
if [ "$ANTIGRAVITY_CRED_DETECTED" = true ]; then
SUBSCRIPTION_MODE="antigravity"
SELECTED_PROVIDER_ID="openai"
SELECTED_MODEL="gemini-3-flash"
SELECTED_MAX_TOKENS=32768
SELECTED_MAX_CONTEXT_TOKENS=1000000 # Gemini 3 Flash — 1M context window
apply_preset "antigravity"
echo ""
echo -e "${YELLOW} ⚠ Using Antigravity can technically cause your account suspension. Please use at your own risk.${NC}"
echo ""
@@ -1540,8 +1504,8 @@ case $choice in
fi
SELECTED_PROVIDER_ID="ollama"
SELECTED_ENV_VAR=""
SELECTED_MAX_TOKENS=8192
SELECTED_MAX_CONTEXT_TOKENS=16384
SELECTED_MAX_TOKENS="$(get_preset_field "ollama_local" "max_tokens")"
SELECTED_MAX_CONTEXT_TOKENS="$(get_preset_field "ollama_local" "max_context_tokens")"
OLLAMA_MODELS=()
while IFS= read -r line; do
[ -n "$line" ] && OLLAMA_MODELS+=("$line")
@@ -1559,7 +1523,7 @@ case $choice in
read -r -p "Enter choice (1-${#OLLAMA_MODELS[@]}): " model_choice
if [[ "$model_choice" =~ ^[0-9]+$ ]] && [ "$model_choice" -ge 1 ] && [ "$model_choice" -le ${#OLLAMA_MODELS[@]} ]; then
SELECTED_MODEL="${OLLAMA_MODELS[$((model_choice - 1))]}"
SELECTED_API_BASE="http://localhost:11434"
SELECTED_API_BASE="$(get_preset_field "ollama_local" "api_base")"
break
fi
echo -e "${RED}Invalid choice. Please enter 1-${#OLLAMA_MODELS[@]}${NC}"
@@ -1684,7 +1648,7 @@ if [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
echo -e "${GREEN}${NC} ZAI API key saved to $SHELL_RC_FILE"
# Health check the new key
echo -n " Verifying ZAI API key... "
HC_RESULT=$(uv run python "$SCRIPT_DIR/scripts/check_llm_key.py" "zai" "$API_KEY" "https://api.z.ai/api/coding/paas/v4" 2>/dev/null) || true
HC_RESULT=$(uv run python "$SCRIPT_DIR/scripts/check_llm_key.py" "zai" "$API_KEY" "$SELECTED_API_BASE" 2>/dev/null) || true
HC_VALID=$(echo "$HC_RESULT" | $PYTHON_CMD -c "import json,sys; print(json.loads(sys.stdin.read()).get('valid',''))" 2>/dev/null) || true
HC_MSG=$(echo "$HC_RESULT" | $PYTHON_CMD -c "import json,sys; print(json.loads(sys.stdin.read()).get('message',''))" 2>/dev/null) || true
if [ "$HC_VALID" = "True" ]; then
@@ -1735,11 +1699,11 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "true" "" > /dev/null || SAVE_OK=false
elif [ "$SUBSCRIPTION_MODE" = "codex" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "" "true" > /dev/null || SAVE_OK=false
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" "true" > /dev/null || SAVE_OK=false
elif [ "$SUBSCRIPTION_MODE" = "antigravity" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "" "" "true" > /dev/null || SAVE_OK=false
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "https://api.z.ai/api/coding/paas/v4" > /dev/null || SAVE_OK=false
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
elif [ "$SUBSCRIPTION_MODE" = "minimax_code" ]; then
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
elif [ "$SUBSCRIPTION_MODE" = "kimi_code" ]; then
@@ -1751,7 +1715,7 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
elif [ "$SELECTED_PROVIDER_ID" = "ollama" ]; then
# Pass api_base explicitly — LiteLLM requires this to route ollama/* models
# to the local Ollama server instead of trying to reach a remote endpoint.
save_configuration "ollama" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "http://localhost:11434" > /dev/null || SAVE_OK=false
save_configuration "ollama" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
else
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" > /dev/null || SAVE_OK=false
fi
@@ -2128,8 +2092,6 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
else
echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC}${DIM}$SELECTED_MODEL${NC}"
fi
echo -e " ${DIM}To use a different model for worker agents, run:${NC}"
echo -e " ${CYAN}./scripts/setup_worker_model.sh${NC}"
echo ""
fi
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff