feat: consolidate model config
This commit is contained in:
@@ -2202,6 +2202,24 @@ class AgentLoop(NodeProtocol):
|
||||
len(messages),
|
||||
len(tools),
|
||||
)
|
||||
logger.debug(
|
||||
"[_run_single_turn] inner_turn=%d: request context node=%s roles=%s system_chars=%d max_tokens=%d",
|
||||
inner_turn,
|
||||
node_id,
|
||||
[m.get("role") for m in messages],
|
||||
len(conversation.system_prompt or ""),
|
||||
ctx.max_tokens,
|
||||
)
|
||||
if not messages:
|
||||
logger.warning(
|
||||
"[_run_single_turn] inner_turn=%d: no non-system conversation messages "
|
||||
"before LLM call for node=%s model=%s api_base=%s. "
|
||||
"This will produce a system-only payload, which some providers reject.",
|
||||
inner_turn,
|
||||
node_id,
|
||||
getattr(ctx.llm, "model", type(ctx.llm).__name__),
|
||||
getattr(ctx.llm, "api_base", None),
|
||||
)
|
||||
|
||||
# Stream LLM response in a child task so cancel_current_turn()
|
||||
# can kill it instantly without terminating the queen's main loop.
|
||||
|
||||
@@ -363,10 +363,15 @@ def _dump_failed_request(
|
||||
"attempt": attempt,
|
||||
"estimated_tokens": _estimate_tokens(model, messages),
|
||||
"num_messages": len(messages),
|
||||
"api_base": kwargs.get("api_base"),
|
||||
"request_keys": sorted(kwargs.keys()),
|
||||
"messages": messages,
|
||||
"tools": kwargs.get("tools"),
|
||||
"max_tokens": kwargs.get("max_tokens"),
|
||||
"temperature": kwargs.get("temperature"),
|
||||
"stream": kwargs.get("stream"),
|
||||
"tool_choice": kwargs.get("tool_choice"),
|
||||
"response_format": kwargs.get("response_format"),
|
||||
}
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
@@ -381,6 +386,108 @@ def _dump_failed_request(
|
||||
return "log_write_failed"
|
||||
|
||||
|
||||
def _summarize_message_content(content: Any) -> dict[str, Any]:
|
||||
"""Return a structural summary of one message content payload."""
|
||||
if isinstance(content, str):
|
||||
return {
|
||||
"content_kind": "string",
|
||||
"text_chars": len(content),
|
||||
}
|
||||
|
||||
if isinstance(content, list):
|
||||
block_types: list[str] = []
|
||||
text_chars = 0
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
block_type = str(block.get("type", "unknown"))
|
||||
block_types.append(block_type)
|
||||
if block_type == "text":
|
||||
text_chars += len(str(block.get("text", "")))
|
||||
elif block_type == "tool_result":
|
||||
block_content = block.get("content")
|
||||
if isinstance(block_content, str):
|
||||
text_chars += len(block_content)
|
||||
elif isinstance(block_content, list):
|
||||
for inner in block_content:
|
||||
if isinstance(inner, dict) and inner.get("type") == "text":
|
||||
text_chars += len(str(inner.get("text", "")))
|
||||
else:
|
||||
block_types.append(type(block).__name__)
|
||||
return {
|
||||
"content_kind": "list",
|
||||
"blocks": len(content),
|
||||
"block_types": block_types,
|
||||
"text_chars": text_chars,
|
||||
}
|
||||
|
||||
return {
|
||||
"content_kind": type(content).__name__,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_messages_for_log(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Build a high-signal, no-secret summary of the outgoing messages payload."""
|
||||
summary: list[dict[str, Any]] = []
|
||||
for idx, message in enumerate(messages):
|
||||
item: dict[str, Any] = {
|
||||
"idx": idx,
|
||||
"role": message.get("role"),
|
||||
"keys": sorted(message.keys()),
|
||||
}
|
||||
item.update(_summarize_message_content(message.get("content")))
|
||||
tool_calls = message.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
item["tool_calls"] = len(tool_calls)
|
||||
tool_names = []
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
fn = tc.get("function")
|
||||
if isinstance(fn, dict) and fn.get("name"):
|
||||
tool_names.append(str(fn["name"]))
|
||||
if tool_names:
|
||||
item["tool_call_names"] = tool_names
|
||||
if message.get("cache_control"):
|
||||
item["cache_control"] = True
|
||||
if message.get("tool_call_id"):
|
||||
item["tool_call_id"] = str(message.get("tool_call_id"))
|
||||
summary.append(item)
|
||||
return summary
|
||||
|
||||
|
||||
def _summarize_request_for_log(kwargs: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Return a compact structural summary of a LiteLLM request payload."""
|
||||
tools = kwargs.get("tools")
|
||||
tool_names: list[str] = []
|
||||
if isinstance(tools, list):
|
||||
for tool in tools:
|
||||
if isinstance(tool, dict):
|
||||
fn = tool.get("function")
|
||||
if isinstance(fn, dict) and fn.get("name"):
|
||||
tool_names.append(str(fn["name"]))
|
||||
|
||||
messages = kwargs.get("messages", [])
|
||||
if isinstance(messages, list):
|
||||
non_system_roles = [m.get("role") for m in messages if m.get("role") != "system"]
|
||||
else:
|
||||
non_system_roles = []
|
||||
return {
|
||||
"model": kwargs.get("model"),
|
||||
"api_base": kwargs.get("api_base"),
|
||||
"stream": kwargs.get("stream"),
|
||||
"max_tokens": kwargs.get("max_tokens"),
|
||||
"tool_count": len(tools) if isinstance(tools, list) else 0,
|
||||
"tool_names": tool_names,
|
||||
"tool_choice": kwargs.get("tool_choice"),
|
||||
"response_format": bool(kwargs.get("response_format")),
|
||||
"message_count": len(messages) if isinstance(messages, list) else 0,
|
||||
"non_system_message_count": len(non_system_roles),
|
||||
"first_non_system_role": non_system_roles[0] if non_system_roles else None,
|
||||
"last_non_system_role": non_system_roles[-1] if non_system_roles else None,
|
||||
"system_only": bool(messages) and not non_system_roles,
|
||||
"messages": _summarize_messages_for_log(messages if isinstance(messages, list) else []),
|
||||
}
|
||||
|
||||
|
||||
def _compute_retry_delay(
|
||||
attempt: int,
|
||||
exception: BaseException | None = None,
|
||||
@@ -1156,6 +1263,12 @@ class LiteLLMProvider(LLMProvider):
|
||||
api_base = (self.api_base or "").lower()
|
||||
return "openrouter.ai/api/v1" in api_base
|
||||
|
||||
def _is_zai_openai_backend(self) -> bool:
|
||||
"""Return True when using Z-AI's OpenAI-compatible chat endpoint."""
|
||||
model = (self.model or "").lower()
|
||||
api_base = (self.api_base or "").lower()
|
||||
return "api.z.ai" in api_base or model.startswith("openai/glm-") or model == "glm-5"
|
||||
|
||||
def _should_use_openrouter_tool_compat(
|
||||
self,
|
||||
error: BaseException,
|
||||
@@ -1816,6 +1929,33 @@ class LiteLLMProvider(LLMProvider):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs.pop("stream_options", None)
|
||||
|
||||
request_summary = _summarize_request_for_log(kwargs)
|
||||
logger.debug(
|
||||
"[stream] prepared request: %s",
|
||||
json.dumps(request_summary, default=str),
|
||||
)
|
||||
if request_summary["system_only"]:
|
||||
logger.warning(
|
||||
"[stream] %s request has no non-system chat messages "
|
||||
"(api_base=%s tools=%d system_chars=%d). "
|
||||
"Some chat-completions backends reject system-only payloads.",
|
||||
self.model,
|
||||
self.api_base,
|
||||
request_summary["tool_count"],
|
||||
sum(
|
||||
message.get("text_chars", 0)
|
||||
for message in request_summary["messages"]
|
||||
if message.get("role") == "system"
|
||||
),
|
||||
)
|
||||
if self._is_zai_openai_backend():
|
||||
logger.warning(
|
||||
"[stream] %s appears to be using Z-AI/GLM's OpenAI-compatible backend. "
|
||||
"This backend has rejected system-only payloads with "
|
||||
"'The messages parameter is illegal.' in prior requests.",
|
||||
self.model,
|
||||
)
|
||||
|
||||
for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
|
||||
# Post-stream events (ToolCall, TextEnd, Finish) are buffered
|
||||
# because they depend on the full stream. TextDeltaEvents are
|
||||
@@ -2179,6 +2319,20 @@ class LiteLLMProvider(LLMProvider):
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
dump_path = _dump_failed_request(
|
||||
model=self.model,
|
||||
kwargs=kwargs,
|
||||
error_type=f"stream_exception_{type(e).__name__.lower()}",
|
||||
attempt=attempt,
|
||||
)
|
||||
logger.error(
|
||||
"[stream] %s request failed with %s: %s | request=%s | dump=%s",
|
||||
self.model,
|
||||
type(e).__name__,
|
||||
e,
|
||||
json.dumps(_summarize_request_for_log(kwargs), default=str),
|
||||
dump_path,
|
||||
)
|
||||
recoverable = _is_stream_transient_error(e)
|
||||
yield StreamErrorEvent(error=str(e), recoverable=recoverable)
|
||||
return
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
{
|
||||
"schema_version": 1,
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"default_model": "claude-haiku-4-5-20251001",
|
||||
"models": [
|
||||
{
|
||||
"id": "claude-haiku-4-5-20251001",
|
||||
"label": "Haiku 4.5 - Fast + cheap",
|
||||
"recommended": false,
|
||||
"max_tokens": 64000,
|
||||
"max_context_tokens": 136000
|
||||
},
|
||||
{
|
||||
"id": "claude-sonnet-4-5-20250929",
|
||||
"label": "Sonnet 4.5 - Best balance",
|
||||
"recommended": false,
|
||||
"max_tokens": 64000,
|
||||
"max_context_tokens": 136000
|
||||
},
|
||||
{
|
||||
"id": "claude-opus-4-6",
|
||||
"label": "Opus 4.6 - Most capable",
|
||||
"recommended": true,
|
||||
"max_tokens": 128000,
|
||||
"max_context_tokens": 872000
|
||||
}
|
||||
]
|
||||
},
|
||||
"openai": {
|
||||
"default_model": "gpt-5.4",
|
||||
"models": [
|
||||
{
|
||||
"id": "gpt-5.4",
|
||||
"label": "GPT-5.4 - Best intelligence",
|
||||
"recommended": true,
|
||||
"max_tokens": 128000,
|
||||
"max_context_tokens": 960000
|
||||
},
|
||||
{
|
||||
"id": "gpt-5.4-mini",
|
||||
"label": "GPT-5.4 Mini - Faster + cheaper",
|
||||
"recommended": false,
|
||||
"max_tokens": 128000,
|
||||
"max_context_tokens": 400000
|
||||
},
|
||||
{
|
||||
"id": "gpt-5.4-nano",
|
||||
"label": "GPT-5.4 Nano - Cheapest high-volume",
|
||||
"recommended": false,
|
||||
"max_tokens": 128000,
|
||||
"max_context_tokens": 400000
|
||||
}
|
||||
]
|
||||
},
|
||||
"gemini": {
|
||||
"default_model": "gemini-3-flash-preview",
|
||||
"models": [
|
||||
{
|
||||
"id": "gemini-3-flash-preview",
|
||||
"label": "Gemini 3 Flash - Fast",
|
||||
"recommended": false,
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 900000
|
||||
},
|
||||
{
|
||||
"id": "gemini-3.1-pro-preview",
|
||||
"label": "Gemini 3.1 Pro - Best quality",
|
||||
"recommended": true,
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 900000
|
||||
}
|
||||
]
|
||||
},
|
||||
"groq": {
|
||||
"default_model": "moonshotai/kimi-k2-instruct-0905",
|
||||
"models": [
|
||||
{
|
||||
"id": "moonshotai/kimi-k2-instruct-0905",
|
||||
"label": "Kimi K2 - Best quality",
|
||||
"recommended": true,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-oss-120b",
|
||||
"label": "GPT-OSS 120B - Fast reasoning",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"cerebras": {
|
||||
"default_model": "zai-glm-4.7",
|
||||
"models": [
|
||||
{
|
||||
"id": "zai-glm-4.7",
|
||||
"label": "ZAI-GLM 4.7 - Best quality",
|
||||
"recommended": true,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
},
|
||||
{
|
||||
"id": "qwen3-235b-a22b-instruct-2507",
|
||||
"label": "Qwen3 235B - Frontier reasoning",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"minimax": {
|
||||
"default_model": "MiniMax-M2.5",
|
||||
"models": [
|
||||
{
|
||||
"id": "MiniMax-M2.5",
|
||||
"label": "MiniMax-M2.5",
|
||||
"recommended": true,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"mistral": {
|
||||
"default_model": "mistral-large-latest",
|
||||
"models": [
|
||||
{
|
||||
"id": "mistral-large-latest",
|
||||
"label": "Mistral Large",
|
||||
"recommended": true,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"together": {
|
||||
"default_model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
"models": [
|
||||
{
|
||||
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
"label": "Llama 3.3 70B Turbo",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"deepseek": {
|
||||
"default_model": "deepseek-chat",
|
||||
"models": [
|
||||
{
|
||||
"id": "deepseek-chat",
|
||||
"label": "DeepSeek Chat",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
},
|
||||
"openrouter": {
|
||||
"default_model": "google/gemini-2.5-pro",
|
||||
"models": [
|
||||
{
|
||||
"id": "google/gemini-2.5-pro",
|
||||
"label": "Gemini 2.5 Pro",
|
||||
"recommended": true,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 900000
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-2.5-flash",
|
||||
"label": "Gemini 2.5 Flash",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 900000
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-sonnet-4",
|
||||
"label": "Claude Sonnet 4 (via OR)",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 180000
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-r1",
|
||||
"label": "DeepSeek R1",
|
||||
"recommended": false,
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 120000
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"presets": {
|
||||
"claude_code": {
|
||||
"provider": "anthropic",
|
||||
"model": "claude-opus-4-6",
|
||||
"max_tokens": 128000,
|
||||
"max_context_tokens": 872000
|
||||
},
|
||||
"zai_code": {
|
||||
"provider": "openai",
|
||||
"api_key_env_var": "ZAI_API_KEY",
|
||||
"model": "glm-5",
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 180000,
|
||||
"api_base": "https://api.z.ai/api/coding/paas/v4"
|
||||
},
|
||||
"codex": {
|
||||
"provider": "openai",
|
||||
"model": "gpt-5.3-codex",
|
||||
"max_tokens": 16384,
|
||||
"max_context_tokens": 120000,
|
||||
"api_base": "https://chatgpt.com/backend-api/codex"
|
||||
},
|
||||
"minimax_code": {
|
||||
"provider": "minimax",
|
||||
"api_key_env_var": "MINIMAX_API_KEY",
|
||||
"model": "MiniMax-M2.5",
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 900000,
|
||||
"api_base": "https://api.minimax.io/v1"
|
||||
},
|
||||
"kimi_code": {
|
||||
"provider": "kimi",
|
||||
"api_key_env_var": "KIMI_API_KEY",
|
||||
"model": "kimi-k2.5",
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 240000,
|
||||
"api_base": "https://api.kimi.com/coding"
|
||||
},
|
||||
"hive_llm": {
|
||||
"provider": "hive",
|
||||
"api_key_env_var": "HIVE_API_KEY",
|
||||
"model": "queen",
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 180000,
|
||||
"api_base": "https://api.adenhq.com",
|
||||
"model_choices": [
|
||||
{
|
||||
"id": "queen",
|
||||
"label": "queen",
|
||||
"recommended": true
|
||||
},
|
||||
{
|
||||
"id": "kimi-2.5",
|
||||
"label": "kimi-2.5",
|
||||
"recommended": false
|
||||
},
|
||||
{
|
||||
"id": "GLM-5",
|
||||
"label": "GLM-5",
|
||||
"recommended": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"antigravity": {
|
||||
"provider": "openai",
|
||||
"model": "gemini-3-flash",
|
||||
"max_tokens": 32768,
|
||||
"max_context_tokens": 1000000
|
||||
},
|
||||
"ollama_local": {
|
||||
"provider": "ollama",
|
||||
"max_tokens": 8192,
|
||||
"max_context_tokens": 16384,
|
||||
"api_base": "http://localhost:11434"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
"""Shared curated model metadata loaded from ``model_catalog.json``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
MODEL_CATALOG_PATH = Path(__file__).with_name("model_catalog.json")
|
||||
|
||||
|
||||
class ModelCatalogError(RuntimeError):
|
||||
"""Raised when the curated model catalogue is missing or malformed."""
|
||||
|
||||
|
||||
def _require_mapping(value: Any, path: str) -> dict[str, Any]:
|
||||
if not isinstance(value, dict):
|
||||
raise ModelCatalogError(f"{path} must be an object")
|
||||
return value
|
||||
|
||||
|
||||
def _require_list(value: Any, path: str) -> list[Any]:
|
||||
if not isinstance(value, list):
|
||||
raise ModelCatalogError(f"{path} must be an array")
|
||||
return value
|
||||
|
||||
|
||||
def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
|
||||
providers = _require_mapping(data.get("providers"), "providers")
|
||||
|
||||
for provider_id, provider_info in providers.items():
|
||||
provider_path = f"providers.{provider_id}"
|
||||
provider_map = _require_mapping(provider_info, provider_path)
|
||||
default_model = provider_map.get("default_model")
|
||||
if not isinstance(default_model, str) or not default_model.strip():
|
||||
raise ModelCatalogError(f"{provider_path}.default_model must be a non-empty string")
|
||||
|
||||
models = _require_list(provider_map.get("models"), f"{provider_path}.models")
|
||||
if not models:
|
||||
raise ModelCatalogError(f"{provider_path}.models must not be empty")
|
||||
|
||||
seen_model_ids: set[str] = set()
|
||||
default_found = False
|
||||
for idx, model in enumerate(models):
|
||||
model_path = f"{provider_path}.models[{idx}]"
|
||||
model_map = _require_mapping(model, model_path)
|
||||
model_id = model_map.get("id")
|
||||
if not isinstance(model_id, str) or not model_id.strip():
|
||||
raise ModelCatalogError(f"{model_path}.id must be a non-empty string")
|
||||
if model_id in seen_model_ids:
|
||||
raise ModelCatalogError(f"Duplicate model id {model_id!r} in {provider_path}.models")
|
||||
seen_model_ids.add(model_id)
|
||||
|
||||
if model_id == default_model:
|
||||
default_found = True
|
||||
|
||||
label = model_map.get("label")
|
||||
if not isinstance(label, str) or not label.strip():
|
||||
raise ModelCatalogError(f"{model_path}.label must be a non-empty string")
|
||||
|
||||
recommended = model_map.get("recommended")
|
||||
if not isinstance(recommended, bool):
|
||||
raise ModelCatalogError(f"{model_path}.recommended must be a boolean")
|
||||
|
||||
for key in ("max_tokens", "max_context_tokens"):
|
||||
value = model_map.get(key)
|
||||
if not isinstance(value, int) or value <= 0:
|
||||
raise ModelCatalogError(f"{model_path}.{key} must be a positive integer")
|
||||
|
||||
if not default_found:
|
||||
raise ModelCatalogError(
|
||||
f"{provider_path}.default_model={default_model!r} is not present in {provider_path}.models"
|
||||
)
|
||||
|
||||
presets = _require_mapping(data.get("presets"), "presets")
|
||||
for preset_id, preset_info in presets.items():
|
||||
preset_path = f"presets.{preset_id}"
|
||||
preset_map = _require_mapping(preset_info, preset_path)
|
||||
|
||||
provider = preset_map.get("provider")
|
||||
if not isinstance(provider, str) or not provider.strip():
|
||||
raise ModelCatalogError(f"{preset_path}.provider must be a non-empty string")
|
||||
|
||||
model = preset_map.get("model")
|
||||
if model is not None and (not isinstance(model, str) or not model.strip()):
|
||||
raise ModelCatalogError(f"{preset_path}.model must be a non-empty string when present")
|
||||
|
||||
api_base = preset_map.get("api_base")
|
||||
if api_base is not None and (not isinstance(api_base, str) or not api_base.strip()):
|
||||
raise ModelCatalogError(f"{preset_path}.api_base must be a non-empty string when present")
|
||||
|
||||
api_key_env_var = preset_map.get("api_key_env_var")
|
||||
if api_key_env_var is not None and (
|
||||
not isinstance(api_key_env_var, str) or not api_key_env_var.strip()
|
||||
):
|
||||
raise ModelCatalogError(
|
||||
f"{preset_path}.api_key_env_var must be a non-empty string when present"
|
||||
)
|
||||
|
||||
for key in ("max_tokens", "max_context_tokens"):
|
||||
value = preset_map.get(key)
|
||||
if not isinstance(value, int) or value <= 0:
|
||||
raise ModelCatalogError(f"{preset_path}.{key} must be a positive integer")
|
||||
|
||||
model_choices = preset_map.get("model_choices")
|
||||
if model_choices is not None:
|
||||
for idx, choice in enumerate(_require_list(model_choices, f"{preset_path}.model_choices")):
|
||||
choice_path = f"{preset_path}.model_choices[{idx}]"
|
||||
choice_map = _require_mapping(choice, choice_path)
|
||||
choice_id = choice_map.get("id")
|
||||
if not isinstance(choice_id, str) or not choice_id.strip():
|
||||
raise ModelCatalogError(f"{choice_path}.id must be a non-empty string")
|
||||
label = choice_map.get("label")
|
||||
if not isinstance(label, str) or not label.strip():
|
||||
raise ModelCatalogError(f"{choice_path}.label must be a non-empty string")
|
||||
recommended = choice_map.get("recommended")
|
||||
if not isinstance(recommended, bool):
|
||||
raise ModelCatalogError(f"{choice_path}.recommended must be a boolean")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def load_model_catalog() -> dict[str, Any]:
|
||||
"""Load and validate the curated model catalogue."""
|
||||
try:
|
||||
raw = json.loads(MODEL_CATALOG_PATH.read_text(encoding="utf-8"))
|
||||
except FileNotFoundError as exc:
|
||||
raise ModelCatalogError(f"Model catalogue not found: {MODEL_CATALOG_PATH}") from exc
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ModelCatalogError(f"Model catalogue JSON is invalid: {exc}") from exc
|
||||
|
||||
return _validate_model_catalog(_require_mapping(raw, "root"))
|
||||
|
||||
|
||||
def get_models_catalogue() -> dict[str, list[dict[str, Any]]]:
|
||||
"""Return provider -> model list."""
|
||||
providers = load_model_catalog()["providers"]
|
||||
return {provider_id: copy.deepcopy(provider_info["models"]) for provider_id, provider_info in providers.items()}
|
||||
|
||||
|
||||
def get_default_models() -> dict[str, str]:
|
||||
"""Return provider -> default model id."""
|
||||
providers = load_model_catalog()["providers"]
|
||||
return {provider_id: str(provider_info["default_model"]) for provider_id, provider_info in providers.items()}
|
||||
|
||||
|
||||
def get_provider_models(provider: str) -> list[dict[str, Any]]:
|
||||
"""Return the curated models for one provider."""
|
||||
provider_info = load_model_catalog()["providers"].get(provider)
|
||||
if not provider_info:
|
||||
return []
|
||||
return copy.deepcopy(provider_info["models"])
|
||||
|
||||
|
||||
def get_default_model(provider: str) -> str | None:
|
||||
"""Return the curated default model id for one provider."""
|
||||
provider_info = load_model_catalog()["providers"].get(provider)
|
||||
if not provider_info:
|
||||
return None
|
||||
return str(provider_info["default_model"])
|
||||
|
||||
|
||||
def find_model(provider: str, model_id: str) -> dict[str, Any] | None:
|
||||
"""Return one model entry for a provider, if present."""
|
||||
for model in load_model_catalog()["providers"].get(provider, {}).get("models", []):
|
||||
if model["id"] == model_id:
|
||||
return copy.deepcopy(model)
|
||||
return None
|
||||
|
||||
|
||||
def find_model_any_provider(model_id: str) -> tuple[str, dict[str, Any]] | None:
|
||||
"""Return the first curated provider/model entry matching a model id."""
|
||||
for provider_id, provider_info in load_model_catalog()["providers"].items():
|
||||
for model in provider_info["models"]:
|
||||
if model["id"] == model_id:
|
||||
return provider_id, copy.deepcopy(model)
|
||||
return None
|
||||
|
||||
|
||||
def get_model_limits(provider: str, model_id: str) -> tuple[int, int] | None:
|
||||
"""Return ``(max_tokens, max_context_tokens)`` for one provider/model pair."""
|
||||
model = find_model(provider, model_id)
|
||||
if not model:
|
||||
return None
|
||||
return int(model["max_tokens"]), int(model["max_context_tokens"])
|
||||
|
||||
|
||||
def get_preset(preset_id: str) -> dict[str, Any] | None:
|
||||
"""Return one preset entry."""
|
||||
preset = load_model_catalog()["presets"].get(preset_id)
|
||||
if not preset:
|
||||
return None
|
||||
return copy.deepcopy(preset)
|
||||
|
||||
|
||||
def get_presets() -> dict[str, dict[str, Any]]:
|
||||
"""Return all preset entries."""
|
||||
return copy.deepcopy(load_model_catalog()["presets"])
|
||||
@@ -20,6 +20,12 @@ from framework.config import (
|
||||
_PROVIDER_CRED_MAP,
|
||||
get_hive_config,
|
||||
)
|
||||
from framework.llm.model_catalog import (
|
||||
find_model,
|
||||
find_model_any_provider,
|
||||
get_models_catalogue,
|
||||
get_preset,
|
||||
)
|
||||
from framework.agents.queen.queen_memory_v2 import (
|
||||
global_memory_dir,
|
||||
build_memory_document,
|
||||
@@ -47,111 +53,67 @@ PROVIDER_ENV_VARS: dict[str, str] = {
|
||||
"deepseek": "DEEPSEEK_API_KEY",
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subscription metadata (mirrors quickstart.sh subscription modes)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SUBSCRIPTIONS: list[dict] = [
|
||||
_SUBSCRIPTION_DEFINITIONS: list[dict[str, str]] = [
|
||||
{
|
||||
"id": "claude_code",
|
||||
"name": "Claude Code Subscription",
|
||||
"description": "Use your Claude Max/Pro plan",
|
||||
"provider": "anthropic",
|
||||
"flag": "use_claude_code_subscription",
|
||||
"default_model": "claude-sonnet-4-20250514",
|
||||
},
|
||||
{
|
||||
"id": "codex",
|
||||
"name": "OpenAI Codex Subscription",
|
||||
"description": "Use your Codex/ChatGPT Plus plan",
|
||||
"provider": "openai",
|
||||
"flag": "use_codex_subscription",
|
||||
"default_model": "gpt-5.4",
|
||||
"api_base": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
{
|
||||
"id": "kimi_code",
|
||||
"name": "Kimi Code Subscription",
|
||||
"description": "Use your Kimi Code plan",
|
||||
"provider": "kimi",
|
||||
"flag": "use_kimi_code_subscription",
|
||||
"default_model": "kimi/moonshot-v1",
|
||||
},
|
||||
{
|
||||
"id": "antigravity",
|
||||
"name": "Antigravity Subscription",
|
||||
"description": "Use your Google/Gemini plan",
|
||||
"provider": "antigravity",
|
||||
"flag": "use_antigravity_subscription",
|
||||
"default_model": "antigravity/gemini-2.5-pro",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _build_subscriptions() -> list[dict]:
|
||||
subscriptions: list[dict] = []
|
||||
for definition in _SUBSCRIPTION_DEFINITIONS:
|
||||
preset = get_preset(definition["id"])
|
||||
if not preset:
|
||||
raise RuntimeError(f"Missing preset for subscription {definition['id']}")
|
||||
|
||||
subscriptions.append({
|
||||
"id": definition["id"],
|
||||
"name": definition["name"],
|
||||
"description": definition["description"],
|
||||
"provider": preset["provider"],
|
||||
"flag": definition["flag"],
|
||||
"default_model": preset.get("model", ""),
|
||||
**({"api_base": preset["api_base"]} if preset.get("api_base") else {}),
|
||||
})
|
||||
return subscriptions
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subscription metadata (mirrors quickstart subscription modes)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SUBSCRIPTIONS: list[dict] = _build_subscriptions()
|
||||
|
||||
# All subscription config flags
|
||||
_ALL_SUBSCRIPTION_FLAGS = [s["flag"] for s in SUBSCRIPTIONS]
|
||||
|
||||
# Map subscription ID → subscription metadata
|
||||
_SUBSCRIPTION_MAP = {s["id"]: s for s in SUBSCRIPTIONS}
|
||||
|
||||
# Model catalogue — mirrors quickstart.sh MODEL_CHOICES_*
|
||||
MODELS_CATALOGUE: dict[str, list[dict]] = {
|
||||
"anthropic": [
|
||||
{"id": "claude-haiku-4-5-20251001", "label": "Haiku 4.5 - Fast + cheap", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
|
||||
{"id": "claude-sonnet-4-20250514", "label": "Sonnet 4 - Fast + capable", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
|
||||
{"id": "claude-sonnet-4-5-20250929", "label": "Sonnet 4.5 - Best balance", "recommended": False, "max_tokens": 16384, "max_context_tokens": 180000},
|
||||
{"id": "claude-opus-4-6", "label": "Opus 4.6 - Most capable", "recommended": True, "max_tokens": 32768, "max_context_tokens": 180000},
|
||||
],
|
||||
"openai": [
|
||||
{"id": "gpt-5.4", "label": "GPT-5.4 - Best intelligence", "recommended": True, "max_tokens": 128000, "max_context_tokens": 960000},
|
||||
{"id": "gpt-5.4-mini", "label": "GPT-5.4 Mini - Faster + cheaper", "recommended": False, "max_tokens": 128000, "max_context_tokens": 400000},
|
||||
{"id": "gpt-5.4-nano", "label": "GPT-5.4 Nano - Cheapest high-volume", "recommended": False, "max_tokens": 128000, "max_context_tokens": 400000},
|
||||
],
|
||||
"gemini": [
|
||||
{"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash - Fast", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
|
||||
{"id": "gemini-3.1-pro-preview", "label": "Gemini 3.1 Pro - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
|
||||
],
|
||||
"groq": [
|
||||
{"id": "moonshotai/kimi-k2-instruct-0905", "label": "Kimi K2 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
{"id": "openai/gpt-oss-120b", "label": "GPT-OSS 120B - Fast reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"cerebras": [
|
||||
{"id": "zai-glm-4.7", "label": "ZAI-GLM 4.7 - Best quality", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
{"id": "qwen3-235b-a22b-instruct-2507", "label": "Qwen3 235B - Frontier reasoning", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"minimax": [
|
||||
{"id": "MiniMax-M2.5", "label": "MiniMax-M2.5", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"mistral": [
|
||||
{"id": "mistral-large-latest", "label": "Mistral Large", "recommended": True, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"together": [
|
||||
{"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "label": "Llama 3.3 70B Turbo", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"deepseek": [
|
||||
{"id": "deepseek-chat", "label": "DeepSeek Chat", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
"openrouter": [
|
||||
{"id": "google/gemini-2.5-pro", "label": "Gemini 2.5 Pro", "recommended": True, "max_tokens": 8192, "max_context_tokens": 900000},
|
||||
{"id": "google/gemini-2.5-flash", "label": "Gemini 2.5 Flash", "recommended": False, "max_tokens": 8192, "max_context_tokens": 900000},
|
||||
{"id": "anthropic/claude-sonnet-4", "label": "Claude Sonnet 4 (via OR)", "recommended": False, "max_tokens": 8192, "max_context_tokens": 180000},
|
||||
{"id": "deepseek/deepseek-r1", "label": "DeepSeek R1", "recommended": False, "max_tokens": 8192, "max_context_tokens": 120000},
|
||||
],
|
||||
}
|
||||
|
||||
# Default model per provider (matches quickstart DEFAULT_MODELS)
|
||||
DEFAULT_MODELS: dict[str, str] = {
|
||||
"anthropic": "claude-haiku-4-5-20251001",
|
||||
"openai": "gpt-5.4",
|
||||
"minimax": "MiniMax-M2.5",
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"groq": "moonshotai/kimi-k2-instruct-0905",
|
||||
"cerebras": "zai-glm-4.7",
|
||||
"mistral": "mistral-large-latest",
|
||||
"together": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
"deepseek": "deepseek-chat",
|
||||
"openrouter": "google/gemini-2.5-pro",
|
||||
}
|
||||
|
||||
# Model catalogue loaded from the shared JSON source of truth.
|
||||
MODELS_CATALOGUE: dict[str, list[dict]] = get_models_catalogue()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
@@ -167,10 +129,7 @@ def _get_api_base_for_provider(provider: str) -> str | None:
|
||||
|
||||
def _find_model_info(provider: str, model_id: str) -> dict | None:
|
||||
"""Look up a model in the catalogue to get its token limits."""
|
||||
for m in MODELS_CATALOGUE.get(provider, []):
|
||||
if m["id"] == model_id:
|
||||
return m
|
||||
return None
|
||||
return find_model(provider, model_id)
|
||||
|
||||
|
||||
def _write_config_atomic(config: dict) -> None:
|
||||
@@ -348,21 +307,32 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
|
||||
{"error": f"Unknown subscription: {subscription_id}"}, status=400
|
||||
)
|
||||
|
||||
preset = get_preset(subscription_id)
|
||||
model = body.get("model") or sub["default_model"]
|
||||
provider = sub["provider"]
|
||||
api_base = sub.get("api_base")
|
||||
|
||||
# Look up token limits
|
||||
# Subscriptions use same models as their provider (e.g., claude_code → anthropic)
|
||||
model_info = _find_model_info(provider, model)
|
||||
if not model_info:
|
||||
# Try looking up in the mapped provider's catalogue
|
||||
for prov_id, models in MODELS_CATALOGUE.items():
|
||||
model_info = next((m for m in models if m["id"] == model), None)
|
||||
if model_info:
|
||||
break
|
||||
max_tokens = model_info["max_tokens"] if model_info else 8192
|
||||
max_context_tokens = model_info["max_context_tokens"] if model_info else 120000
|
||||
max_tokens: int | None = None
|
||||
max_context_tokens: int | None = None
|
||||
if preset and preset.get("model") == model:
|
||||
max_tokens = int(preset["max_tokens"])
|
||||
max_context_tokens = int(preset["max_context_tokens"])
|
||||
else:
|
||||
# Subscriptions may use the same curated models as their provider.
|
||||
model_info = _find_model_info(provider, model)
|
||||
if not model_info:
|
||||
# Some subscriptions point at curated models owned by a different provider.
|
||||
match = find_model_any_provider(model)
|
||||
if match:
|
||||
_, model_info = match
|
||||
if model_info:
|
||||
max_tokens = int(model_info["max_tokens"])
|
||||
max_context_tokens = int(model_info["max_context_tokens"])
|
||||
|
||||
if max_tokens is None or max_context_tokens is None:
|
||||
max_tokens = 8192
|
||||
max_context_tokens = 120000
|
||||
|
||||
# Update config: activate this subscription, clear others
|
||||
config = get_hive_config()
|
||||
|
||||
@@ -15,6 +15,7 @@ import pytest
|
||||
from aiohttp.test_utils import TestClient, TestServer
|
||||
|
||||
from framework.host.triggers import TriggerDefinition
|
||||
from framework.llm.model_catalog import get_models_catalogue
|
||||
from framework.server.app import create_app
|
||||
from framework.server import routes_messages, routes_queens
|
||||
from framework.server import session_manager as session_manager_module
|
||||
@@ -1591,6 +1592,37 @@ class TestCredentials:
|
||||
assert store.get_key("test_cred", "api_key") == "new-value"
|
||||
|
||||
|
||||
class TestConfigRoutes:
|
||||
"""Tests for LLM configuration endpoints."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_models_uses_shared_model_catalogue(self):
|
||||
app = create_app()
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/config/models")
|
||||
data = await resp.json()
|
||||
|
||||
assert resp.status == 200
|
||||
assert data["models"] == get_models_catalogue()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_llm_config_exposes_subscription_defaults_from_presets(self):
|
||||
app = create_app()
|
||||
app["credential_store"] = MagicMock()
|
||||
app["credential_store"].get.return_value = None
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/config/llm")
|
||||
data = await resp.json()
|
||||
|
||||
assert resp.status == 200
|
||||
subscriptions = {subscription["id"]: subscription for subscription in data["subscriptions"]}
|
||||
assert subscriptions["codex"]["default_model"] == "gpt-5.3-codex"
|
||||
assert subscriptions["codex"]["api_base"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert subscriptions["kimi_code"]["default_model"] == "kimi-k2.5"
|
||||
|
||||
|
||||
class TestSSEFormat:
|
||||
"""Tests for SSE event wire format -- events must be unnamed (data-only)
|
||||
so the frontend's es.onmessage handler receives them."""
|
||||
|
||||
@@ -26,6 +26,7 @@ from framework.llm.litellm import (
|
||||
_compute_retry_delay,
|
||||
_ensure_ollama_chat_prefix,
|
||||
_is_ollama_model,
|
||||
_summarize_request_for_log,
|
||||
)
|
||||
from framework.llm.provider import LLMProvider, LLMResponse, Tool
|
||||
|
||||
@@ -100,6 +101,25 @@ class TestLiteLLMProviderInit:
|
||||
provider = LiteLLMProvider(model="ollama/llama3")
|
||||
assert provider.model == "ollama_chat/llama3"
|
||||
|
||||
def test_summarize_request_flags_system_only_payload(self):
|
||||
"""Request summaries should make system-only payloads obvious in logs."""
|
||||
summary = _summarize_request_for_log(
|
||||
{
|
||||
"model": "openai/glm-5",
|
||||
"api_base": "https://api.z.ai/api/coding/paas/v4",
|
||||
"messages": [{"role": "system", "content": "You are helpful."}],
|
||||
"tools": [{"type": "function", "function": {"name": "read_file"}}],
|
||||
"stream": True,
|
||||
"max_tokens": 8192,
|
||||
}
|
||||
)
|
||||
|
||||
assert summary["message_count"] == 1
|
||||
assert summary["non_system_message_count"] == 0
|
||||
assert summary["first_non_system_role"] is None
|
||||
assert summary["last_non_system_role"] is None
|
||||
assert summary["system_only"] is True
|
||||
|
||||
|
||||
class TestLiteLLMProviderComplete:
|
||||
"""Test LiteLLMProvider.complete() method."""
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
"""Tests for the shared curated LLM model catalogue."""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.llm import model_catalog
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_model_catalog_cache():
|
||||
model_catalog.load_model_catalog.cache_clear()
|
||||
yield
|
||||
model_catalog.load_model_catalog.cache_clear()
|
||||
|
||||
|
||||
def test_default_models_exist_in_each_provider_catalogue():
|
||||
defaults = model_catalog.get_default_models()
|
||||
catalogue = model_catalog.get_models_catalogue()
|
||||
|
||||
for provider_id, default_model in defaults.items():
|
||||
assert provider_id in catalogue
|
||||
assert any(model["id"] == default_model for model in catalogue[provider_id])
|
||||
|
||||
|
||||
def test_find_model_returns_curated_token_limits():
|
||||
model = model_catalog.find_model("openai", "gpt-5.4")
|
||||
|
||||
assert model is not None
|
||||
assert model["label"] == "GPT-5.4 - Best intelligence"
|
||||
assert model["max_tokens"] == 128000
|
||||
assert model["max_context_tokens"] == 960000
|
||||
|
||||
|
||||
def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget():
|
||||
haiku = model_catalog.find_model("anthropic", "claude-haiku-4-5-20251001")
|
||||
sonnet_45 = model_catalog.find_model("anthropic", "claude-sonnet-4-5-20250929")
|
||||
opus_46 = model_catalog.find_model("anthropic", "claude-opus-4-6")
|
||||
|
||||
assert haiku["max_tokens"] == 64000
|
||||
assert haiku["max_context_tokens"] == 136000
|
||||
assert sonnet_45["max_tokens"] == 64000
|
||||
assert sonnet_45["max_context_tokens"] == 136000
|
||||
assert opus_46["max_tokens"] == 128000
|
||||
assert opus_46["max_context_tokens"] == 872000
|
||||
|
||||
|
||||
def test_find_model_any_provider_returns_provider_and_model():
|
||||
provider_id, model = model_catalog.find_model_any_provider("google/gemini-2.5-pro")
|
||||
|
||||
assert provider_id == "openrouter"
|
||||
assert model["max_context_tokens"] == 900000
|
||||
|
||||
|
||||
def test_get_preset_returns_subscription_specific_limits():
|
||||
preset = model_catalog.get_preset("kimi_code")
|
||||
|
||||
assert preset is not None
|
||||
assert preset["provider"] == "kimi"
|
||||
assert preset["model"] == "kimi-k2.5"
|
||||
assert preset["max_tokens"] == 32768
|
||||
assert preset["max_context_tokens"] == 240000
|
||||
assert preset["api_base"] == "https://api.kimi.com/coding"
|
||||
|
||||
|
||||
def test_load_model_catalog_rejects_duplicate_model_ids(tmp_path, monkeypatch):
|
||||
bad_catalog = {
|
||||
"schema_version": 1,
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"default_model": "dup-model",
|
||||
"models": [
|
||||
{
|
||||
"id": "dup-model",
|
||||
"label": "First",
|
||||
"recommended": True,
|
||||
"max_tokens": 1,
|
||||
"max_context_tokens": 1,
|
||||
},
|
||||
{
|
||||
"id": "dup-model",
|
||||
"label": "Second",
|
||||
"recommended": False,
|
||||
"max_tokens": 1,
|
||||
"max_context_tokens": 1,
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
}
|
||||
bad_path = tmp_path / "model_catalog.json"
|
||||
bad_path.write_text(json.dumps(bad_catalog), encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(model_catalog, "MODEL_CATALOG_PATH", bad_path)
|
||||
|
||||
with pytest.raises(model_catalog.ModelCatalogError, match="Duplicate model id"):
|
||||
model_catalog.load_model_catalog()
|
||||
+142
-92
@@ -829,42 +829,95 @@ $ProviderMap = [ordered]@{
|
||||
DEEPSEEK_API_KEY = @{ Name = "DeepSeek"; Id = "deepseek" }
|
||||
}
|
||||
|
||||
$DefaultModels = @{
|
||||
anthropic = "claude-haiku-4-5-20251001"
|
||||
openai = "gpt-5-mini"
|
||||
minimax = "MiniMax-M2.5"
|
||||
gemini = "gemini-3-flash-preview"
|
||||
groq = "moonshotai/kimi-k2-instruct-0905"
|
||||
cerebras = "zai-glm-4.7"
|
||||
mistral = "mistral-large-latest"
|
||||
together_ai = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
||||
deepseek = "deepseek-chat"
|
||||
$ModelCatalogPath = Join-Path $ScriptDir "core\framework\llm\model_catalog.json"
|
||||
$script:ModelCatalog = $null
|
||||
|
||||
function Initialize-ModelCatalog {
|
||||
try {
|
||||
$script:ModelCatalog = Get-Content -Path $ModelCatalogPath -Raw | ConvertFrom-Json
|
||||
return $true
|
||||
} catch {
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
# Model choices: array of hashtables per provider
|
||||
$ModelChoices = @{
|
||||
anthropic = @(
|
||||
@{ Id = "claude-haiku-4-5-20251001"; Label = "Haiku 4.5 - Fast + cheap (recommended)"; MaxTokens = 8192; MaxContextTokens = 180000 },
|
||||
@{ Id = "claude-sonnet-4-20250514"; Label = "Sonnet 4 - Fast + capable"; MaxTokens = 8192; MaxContextTokens = 180000 },
|
||||
@{ Id = "claude-sonnet-4-5-20250929"; Label = "Sonnet 4.5 - Best balance"; MaxTokens = 16384; MaxContextTokens = 180000 },
|
||||
@{ Id = "claude-opus-4-6"; Label = "Opus 4.6 - Most capable"; MaxTokens = 32768; MaxContextTokens = 180000 }
|
||||
)
|
||||
openai = @(
|
||||
@{ Id = "gpt-5-mini"; Label = "GPT-5 Mini - Fast + cheap (recommended)"; MaxTokens = 16384; MaxContextTokens = 120000 },
|
||||
@{ Id = "gpt-5.2"; Label = "GPT-5.2 - Most capable"; MaxTokens = 16384; MaxContextTokens = 120000 }
|
||||
)
|
||||
gemini = @(
|
||||
@{ Id = "gemini-3-flash-preview"; Label = "Gemini 3 Flash - Fast (recommended)"; MaxTokens = 8192; MaxContextTokens = 900000 },
|
||||
@{ Id = "gemini-3.1-pro-preview"; Label = "Gemini 3.1 Pro - Best quality"; MaxTokens = 8192; MaxContextTokens = 900000 }
|
||||
)
|
||||
groq = @(
|
||||
@{ Id = "moonshotai/kimi-k2-instruct-0905"; Label = "Kimi K2 - Best quality (recommended)"; MaxTokens = 8192; MaxContextTokens = 120000 },
|
||||
@{ Id = "openai/gpt-oss-120b"; Label = "GPT-OSS 120B - Fast reasoning"; MaxTokens = 8192; MaxContextTokens = 120000 }
|
||||
)
|
||||
cerebras = @(
|
||||
@{ Id = "zai-glm-4.7"; Label = "ZAI-GLM 4.7 - Best quality (recommended)"; MaxTokens = 8192; MaxContextTokens = 120000 },
|
||||
@{ Id = "qwen3-235b-a22b-instruct-2507"; Label = "Qwen3 235B - Frontier reasoning"; MaxTokens = 8192; MaxContextTokens = 120000 }
|
||||
)
|
||||
function Get-ProviderCatalog {
|
||||
param([string]$ProviderId)
|
||||
|
||||
if (-not $script:ModelCatalog -or -not $script:ModelCatalog.providers) {
|
||||
return $null
|
||||
}
|
||||
|
||||
$providerProp = $script:ModelCatalog.providers.PSObject.Properties[$ProviderId]
|
||||
if ($providerProp) {
|
||||
return $providerProp.Value
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
function Get-DefaultModel {
|
||||
param([string]$ProviderId)
|
||||
|
||||
$providerCatalog = Get-ProviderCatalog $ProviderId
|
||||
if ($providerCatalog) {
|
||||
return [string]$providerCatalog.default_model
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
function Get-ModelChoices {
|
||||
param([string]$ProviderId)
|
||||
|
||||
$providerCatalog = Get-ProviderCatalog $ProviderId
|
||||
if (-not $providerCatalog -or -not $providerCatalog.models) {
|
||||
return @()
|
||||
}
|
||||
|
||||
return @($providerCatalog.models)
|
||||
}
|
||||
|
||||
function Get-PresetConfig {
|
||||
param([string]$PresetId)
|
||||
|
||||
if (-not $script:ModelCatalog -or -not $script:ModelCatalog.presets) {
|
||||
return $null
|
||||
}
|
||||
|
||||
$presetProp = $script:ModelCatalog.presets.PSObject.Properties[$PresetId]
|
||||
if ($presetProp) {
|
||||
return $presetProp.Value
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
function Apply-Preset {
|
||||
param([string]$PresetId)
|
||||
|
||||
$preset = Get-PresetConfig $PresetId
|
||||
if (-not $preset) {
|
||||
throw "Missing preset: $PresetId"
|
||||
}
|
||||
|
||||
$script:SelectedProviderId = [string]$preset.provider
|
||||
$script:SelectedModel = if ($preset.model) { [string]$preset.model } else { "" }
|
||||
$script:SelectedMaxTokens = [int]$preset.max_tokens
|
||||
$script:SelectedMaxContextTokens = [int]$preset.max_context_tokens
|
||||
$script:SelectedEnvVar = if ($preset.api_key_env_var) { [string]$preset.api_key_env_var } else { "" }
|
||||
$script:SelectedApiBase = if ($preset.api_base) { [string]$preset.api_base } else { "" }
|
||||
}
|
||||
|
||||
function Get-PresetModelChoices {
|
||||
param([string]$PresetId)
|
||||
|
||||
$preset = Get-PresetConfig $PresetId
|
||||
if (-not $preset -or -not $preset.model_choices) {
|
||||
return @()
|
||||
}
|
||||
|
||||
return @($preset.model_choices)
|
||||
}
|
||||
|
||||
function Normalize-OpenRouterModelId {
|
||||
@@ -942,9 +995,9 @@ function Get-ModelSelection {
|
||||
}
|
||||
}
|
||||
|
||||
$choices = $ModelChoices[$ProviderId]
|
||||
$choices = Get-ModelChoices $ProviderId
|
||||
if (-not $choices -or $choices.Count -eq 0) {
|
||||
return @{ Model = $DefaultModels[$ProviderId]; MaxTokens = 8192; MaxContextTokens = 120000 }
|
||||
return @{ Model = (Get-DefaultModel $ProviderId); MaxTokens = 8192; MaxContextTokens = 120000 }
|
||||
}
|
||||
if ($choices.Count -eq 1) {
|
||||
return @{ Model = $choices[0].Id; MaxTokens = $choices[0].MaxTokens; MaxContextTokens = $choices[0].MaxContextTokens }
|
||||
@@ -1059,6 +1112,13 @@ try {
|
||||
if ($LASTEXITCODE -eq 0) { $OllamaDetected = $true }
|
||||
} catch { }
|
||||
|
||||
if (-not (Initialize-ModelCatalog)) {
|
||||
Write-Fail "Failed to load core/framework/llm/model_catalog.json."
|
||||
Write-Host " Please ensure your Python environment is set up, then rerun quickstart."
|
||||
Write-Host ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ── Read previous configuration (if any) ──────────────────────
|
||||
$PrevProvider = ""
|
||||
$PrevModel = ""
|
||||
@@ -1261,21 +1321,14 @@ switch ($num) {
|
||||
exit 1
|
||||
}
|
||||
$SubscriptionMode = "claude_code"
|
||||
$SelectedProviderId = "anthropic"
|
||||
$SelectedModel = "claude-opus-4-6"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 180000
|
||||
Apply-Preset "claude_code"
|
||||
Write-Host ""
|
||||
Write-Ok "Using Claude Code subscription"
|
||||
}
|
||||
2 {
|
||||
# ZAI Code Subscription
|
||||
$SubscriptionMode = "zai_code"
|
||||
$SelectedProviderId = "openai"
|
||||
$SelectedEnvVar = "ZAI_API_KEY"
|
||||
$SelectedModel = "glm-5"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 120000
|
||||
Apply-Preset "zai_code"
|
||||
Write-Host ""
|
||||
Write-Ok "Using ZAI Code subscription"
|
||||
Write-Color -Text " Model: glm-5 | API: api.z.ai" -Color DarkGray
|
||||
@@ -1305,10 +1358,7 @@ switch ($num) {
|
||||
}
|
||||
if ($CodexCredDetected) {
|
||||
$SubscriptionMode = "codex"
|
||||
$SelectedProviderId = "openai"
|
||||
$SelectedModel = "gpt-5.3-codex"
|
||||
$SelectedMaxTokens = 16384
|
||||
$SelectedMaxContextTokens = 120000
|
||||
Apply-Preset "codex"
|
||||
Write-Host ""
|
||||
Write-Ok "Using OpenAI Codex subscription"
|
||||
}
|
||||
@@ -1316,12 +1366,7 @@ switch ($num) {
|
||||
4 {
|
||||
# MiniMax Coding Key
|
||||
$SubscriptionMode = "minimax_code"
|
||||
$SelectedProviderId = "minimax"
|
||||
$SelectedEnvVar = "MINIMAX_API_KEY"
|
||||
$SelectedModel = "MiniMax-M2.5"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 900000
|
||||
$SelectedApiBase = "https://api.minimax.io/v1"
|
||||
Apply-Preset "minimax_code"
|
||||
Write-Host ""
|
||||
Write-Ok "Using MiniMax coding key"
|
||||
Write-Color -Text " Model: MiniMax-M2.5 | API: api.minimax.io" -Color DarkGray
|
||||
@@ -1329,11 +1374,7 @@ switch ($num) {
|
||||
5 {
|
||||
# Kimi Code Subscription
|
||||
$SubscriptionMode = "kimi_code"
|
||||
$SelectedProviderId = "kimi"
|
||||
$SelectedEnvVar = "KIMI_API_KEY"
|
||||
$SelectedModel = "kimi-k2.5"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 120000
|
||||
Apply-Preset "kimi_code"
|
||||
Write-Host ""
|
||||
Write-Ok "Using Kimi Code subscription"
|
||||
Write-Color -Text " Model: kimi-k2.5 | API: api.kimi.com/coding" -Color DarkGray
|
||||
@@ -1341,24 +1382,37 @@ switch ($num) {
|
||||
6 {
|
||||
# Hive LLM
|
||||
$SubscriptionMode = "hive_llm"
|
||||
$SelectedProviderId = "hive"
|
||||
$SelectedEnvVar = "HIVE_API_KEY"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 120000
|
||||
Apply-Preset "hive_llm"
|
||||
Write-Host ""
|
||||
Write-Ok "Using Hive LLM"
|
||||
Write-Host ""
|
||||
Write-Host " Select a model:"
|
||||
Write-Host " " -NoNewline; Write-Color -Text "1)" -Color Cyan -NoNewline; Write-Host " queen " -NoNewline; Write-Color -Text "(default - Hive flagship)" -Color DarkGray
|
||||
Write-Host " " -NoNewline; Write-Color -Text "2)" -Color Cyan -NoNewline; Write-Host " kimi-2.5"
|
||||
Write-Host " " -NoNewline; Write-Color -Text "3)" -Color Cyan -NoNewline; Write-Host " GLM-5"
|
||||
$hiveChoices = Get-PresetModelChoices "hive_llm"
|
||||
$hiveDefaultChoice = "1"
|
||||
for ($i = 0; $i -lt $hiveChoices.Count; $i++) {
|
||||
Write-Host " " -NoNewline
|
||||
Write-Color -Text "$($i + 1))" -Color Cyan -NoNewline
|
||||
Write-Host " $($hiveChoices[$i].label)" -NoNewline
|
||||
if ($hiveChoices[$i].recommended -eq $true) {
|
||||
$hiveDefaultChoice = [string]($i + 1)
|
||||
Write-Host " " -NoNewline
|
||||
Write-Color -Text "(default - Hive flagship)" -Color DarkGray
|
||||
} else {
|
||||
Write-Host ""
|
||||
}
|
||||
}
|
||||
Write-Host ""
|
||||
$hiveModelChoice = Read-Host " Enter model choice (1-3) [1]"
|
||||
if (-not $hiveModelChoice) { $hiveModelChoice = "1" }
|
||||
switch ($hiveModelChoice) {
|
||||
"2" { $SelectedModel = "kimi-2.5" }
|
||||
"3" { $SelectedModel = "GLM-5" }
|
||||
default { $SelectedModel = "queen" }
|
||||
while ($true) {
|
||||
$hiveModelChoice = Read-Host " Enter model choice (1-$($hiveChoices.Count)) [$hiveDefaultChoice]"
|
||||
if (-not $hiveModelChoice) { $hiveModelChoice = $hiveDefaultChoice }
|
||||
if ($hiveModelChoice -match '^\d+$') {
|
||||
$choiceNum = [int]$hiveModelChoice
|
||||
if ($choiceNum -ge 1 -and $choiceNum -le $hiveChoices.Count) {
|
||||
$SelectedModel = [string]$hiveChoices[$choiceNum - 1].id
|
||||
break
|
||||
}
|
||||
}
|
||||
Write-Color -Text "Invalid choice. Please enter 1-$($hiveChoices.Count)" -Color Red
|
||||
}
|
||||
Write-Color -Text " Model: $SelectedModel | API: $HiveLlmEndpoint" -Color DarkGray
|
||||
}
|
||||
@@ -1390,10 +1444,7 @@ switch ($num) {
|
||||
|
||||
if ($AntigravityCredDetected) {
|
||||
$SubscriptionMode = "antigravity"
|
||||
$SelectedProviderId = "openai"
|
||||
$SelectedModel = "gemini-3-flash"
|
||||
$SelectedMaxTokens = 32768
|
||||
$SelectedMaxContextTokens = 1000000
|
||||
Apply-Preset "antigravity"
|
||||
Write-Host ""
|
||||
Write-Warn "Using Antigravity can technically cause your account suspension. Please use at your own risk."
|
||||
Write-Host ""
|
||||
@@ -1543,9 +1594,10 @@ switch ($num) {
|
||||
$SelectedModel = $ollamaModels[$num - 1]
|
||||
Write-Host ""
|
||||
Write-Ok "Model: $SelectedModel"
|
||||
$SelectedMaxTokens = 8192
|
||||
$SelectedMaxContextTokens = 16384
|
||||
$SelectedApiBase = "http://localhost:11434"
|
||||
$ollamaPreset = Get-PresetConfig "ollama_local"
|
||||
$SelectedMaxTokens = [int]$ollamaPreset.max_tokens
|
||||
$SelectedMaxContextTokens = [int]$ollamaPreset.max_context_tokens
|
||||
$SelectedApiBase = [string]$ollamaPreset.api_base
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -1592,7 +1644,7 @@ if ($SubscriptionMode -eq "minimax_code") {
|
||||
# Health check the new key
|
||||
Write-Host " Verifying MiniMax API key... " -NoNewline
|
||||
try {
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "minimax" $apiKey "https://api.minimax.io/v1" 2>$null
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "minimax" $apiKey $SelectedApiBase 2>$null
|
||||
$hcJson = $hcResult | ConvertFrom-Json
|
||||
if ($hcJson.valid -eq $true) {
|
||||
Write-Color -Text "ok" -Color Green
|
||||
@@ -1653,7 +1705,7 @@ if ($SubscriptionMode -eq "zai_code") {
|
||||
# Health check the new key
|
||||
Write-Host " Verifying ZAI API key... " -NoNewline
|
||||
try {
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "zai" $apiKey "https://api.z.ai/api/coding/paas/v4" 2>$null
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "zai" $apiKey $SelectedApiBase 2>$null
|
||||
$hcJson = $hcResult | ConvertFrom-Json
|
||||
if ($hcJson.valid -eq $true) {
|
||||
Write-Color -Text "ok" -Color Green
|
||||
@@ -1721,7 +1773,7 @@ if ($SubscriptionMode -eq "kimi_code") {
|
||||
# Health check the new key
|
||||
Write-Host " Verifying Kimi API key... " -NoNewline
|
||||
try {
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "kimi" $apiKey "https://api.kimi.com/coding" 2>$null
|
||||
$hcResult = & $UvCmd run python (Join-Path $ScriptDir "scripts/check_llm_key.py") "kimi" $apiKey $SelectedApiBase 2>$null
|
||||
$hcJson = $hcResult | ConvertFrom-Json
|
||||
if ($hcJson.valid -eq $true) {
|
||||
Write-Color -Text "ok" -Color Green
|
||||
@@ -1832,7 +1884,7 @@ if ($SelectedProviderId -and -not $SelectedModel) {
|
||||
# Save configuration
|
||||
if ($SelectedProviderId) {
|
||||
if (-not $SelectedModel) {
|
||||
$SelectedModel = $DefaultModels[$SelectedProviderId]
|
||||
$SelectedModel = Get-DefaultModel $SelectedProviderId
|
||||
}
|
||||
Write-Host ""
|
||||
Write-Host " Saving configuration... " -NoNewline
|
||||
@@ -1855,25 +1907,26 @@ if ($SelectedProviderId) {
|
||||
$config.llm["use_claude_code_subscription"] = $true
|
||||
} elseif ($SubscriptionMode -eq "codex") {
|
||||
$config.llm["use_codex_subscription"] = $true
|
||||
if ($SelectedApiBase) { $config.llm["api_base"] = $SelectedApiBase }
|
||||
} elseif ($SubscriptionMode -eq "antigravity") {
|
||||
$config.llm["use_antigravity_subscription"] = $true
|
||||
} elseif ($SubscriptionMode -eq "zai_code") {
|
||||
$config.llm["api_base"] = "https://api.z.ai/api/coding/paas/v4"
|
||||
$config.llm["api_base"] = $SelectedApiBase
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
} elseif ($SubscriptionMode -eq "minimax_code") {
|
||||
$config.llm["api_base"] = $SelectedApiBase
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
} elseif ($SubscriptionMode -eq "kimi_code") {
|
||||
$config.llm["api_base"] = "https://api.kimi.com/coding"
|
||||
$config.llm["api_base"] = $SelectedApiBase
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
} elseif ($SubscriptionMode -eq "hive_llm") {
|
||||
$config.llm["api_base"] = $HiveLlmEndpoint
|
||||
$config.llm["api_base"] = $SelectedApiBase
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
} elseif ($SelectedProviderId -eq "openrouter") {
|
||||
$config.llm["api_base"] = "https://openrouter.ai/api/v1"
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
} elseif ($SelectedProviderId -eq "ollama") {
|
||||
$config.llm["api_base"] = "http://localhost:11434"
|
||||
$config.llm["api_base"] = $SelectedApiBase
|
||||
$config.llm.Remove("api_key_env_var")
|
||||
} elseif ($SelectedEnvVar) {
|
||||
$config.llm["api_key_env_var"] = $SelectedEnvVar
|
||||
@@ -2166,7 +2219,7 @@ Write-Host ""
|
||||
|
||||
# Show configured provider
|
||||
if ($SelectedProviderId) {
|
||||
if (-not $SelectedModel) { $SelectedModel = $DefaultModels[$SelectedProviderId] }
|
||||
if (-not $SelectedModel) { $SelectedModel = Get-DefaultModel $SelectedProviderId }
|
||||
Write-Color -Text "Default LLM:" -Color White
|
||||
if ($SubscriptionMode -eq "claude_code") {
|
||||
Write-Ok "Claude Code Subscription -> $SelectedModel"
|
||||
@@ -2190,9 +2243,6 @@ if ($SelectedProviderId) {
|
||||
Write-Host " -> " -NoNewline
|
||||
Write-Color -Text $SelectedModel -Color DarkGray
|
||||
}
|
||||
Write-Color -Text " To use a different model for worker agents, run:" -Color DarkGray
|
||||
Write-Host " " -NoNewline
|
||||
Write-Color -Text ".\scripts\setup_worker_model.ps1" -Color Cyan
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
|
||||
+231
-269
@@ -452,90 +452,6 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
["DEEPSEEK_API_KEY"]="deepseek"
|
||||
)
|
||||
|
||||
declare -A DEFAULT_MODELS=(
|
||||
["anthropic"]="claude-haiku-4-5-20251001"
|
||||
["openai"]="gpt-5-mini"
|
||||
["minimax"]="MiniMax-M2.5"
|
||||
["gemini"]="gemini-3-flash-preview"
|
||||
["groq"]="moonshotai/kimi-k2-instruct-0905"
|
||||
["cerebras"]="zai-glm-4.7"
|
||||
["mistral"]="mistral-large-latest"
|
||||
["together_ai"]="meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
||||
["deepseek"]="deepseek-chat"
|
||||
)
|
||||
|
||||
# Model choices per provider: composite-key associative arrays
|
||||
# Keys: "provider:index" -> value
|
||||
declare -A MODEL_CHOICES_ID=(
|
||||
["anthropic:0"]="claude-haiku-4-5-20251001"
|
||||
["anthropic:1"]="claude-sonnet-4-20250514"
|
||||
["anthropic:2"]="claude-sonnet-4-5-20250929"
|
||||
["anthropic:3"]="claude-opus-4-6"
|
||||
["openai:0"]="gpt-5-mini"
|
||||
["openai:1"]="gpt-5.2"
|
||||
["gemini:0"]="gemini-3-flash-preview"
|
||||
["gemini:1"]="gemini-3.1-pro-preview"
|
||||
["groq:0"]="moonshotai/kimi-k2-instruct-0905"
|
||||
["groq:1"]="openai/gpt-oss-120b"
|
||||
["cerebras:0"]="zai-glm-4.7"
|
||||
["cerebras:1"]="qwen3-235b-a22b-instruct-2507"
|
||||
)
|
||||
|
||||
declare -A MODEL_CHOICES_LABEL=(
|
||||
["anthropic:0"]="Haiku 4.5 - Fast + cheap (recommended)"
|
||||
["anthropic:1"]="Sonnet 4 - Fast + capable"
|
||||
["anthropic:2"]="Sonnet 4.5 - Best balance"
|
||||
["anthropic:3"]="Opus 4.6 - Most capable"
|
||||
["openai:0"]="GPT-5 Mini - Fast + cheap (recommended)"
|
||||
["openai:1"]="GPT-5.2 - Most capable"
|
||||
["gemini:0"]="Gemini 3 Flash - Fast (recommended)"
|
||||
["gemini:1"]="Gemini 3.1 Pro - Best quality"
|
||||
["groq:0"]="Kimi K2 - Best quality (recommended)"
|
||||
["groq:1"]="GPT-OSS 120B - Fast reasoning"
|
||||
["cerebras:0"]="ZAI-GLM 4.7 - Best quality (recommended)"
|
||||
["cerebras:1"]="Qwen3 235B - Frontier reasoning"
|
||||
)
|
||||
|
||||
declare -A MODEL_CHOICES_MAXTOKENS=(
|
||||
["anthropic:0"]=8192
|
||||
["anthropic:1"]=8192
|
||||
["anthropic:2"]=16384
|
||||
["anthropic:3"]=32768
|
||||
["openai:0"]=16384
|
||||
["openai:1"]=16384
|
||||
["gemini:0"]=8192
|
||||
["gemini:1"]=8192
|
||||
["groq:0"]=8192
|
||||
["groq:1"]=8192
|
||||
["cerebras:0"]=8192
|
||||
["cerebras:1"]=8192
|
||||
)
|
||||
|
||||
# Max context tokens (input history budget) per model, based on actual context windows.
|
||||
# Leave ~10% headroom for system prompt and output tokens.
|
||||
declare -A MODEL_CHOICES_MAXCONTEXTTOKENS=(
|
||||
["anthropic:0"]=180000 # Claude Haiku 4.5 — 200k context window
|
||||
["anthropic:1"]=180000 # Claude Sonnet 4 — 200k context window
|
||||
["anthropic:2"]=180000 # Claude Sonnet 4.5 — 200k context window
|
||||
["anthropic:3"]=180000 # Claude Opus 4.6 — 200k context window
|
||||
["openai:0"]=120000 # GPT-5 Mini — 128k context window
|
||||
["openai:1"]=120000 # GPT-5.2 — 128k context window
|
||||
["gemini:0"]=900000 # Gemini 3 Flash — 1M context window
|
||||
["gemini:1"]=900000 # Gemini 3.1 Pro — 1M context window
|
||||
["groq:0"]=120000 # Kimi K2 — 128k context window
|
||||
["groq:1"]=120000 # GPT-OSS 120B — 128k context window
|
||||
["cerebras:0"]=120000 # ZAI-GLM 4.7 — 128k context window
|
||||
["cerebras:1"]=120000 # Qwen3 235B — 128k context window
|
||||
)
|
||||
|
||||
declare -A MODEL_CHOICES_COUNT=(
|
||||
["anthropic"]=4
|
||||
["openai"]=2
|
||||
["gemini"]=2
|
||||
["groq"]=2
|
||||
["cerebras"]=2
|
||||
)
|
||||
|
||||
# Helper functions for Bash 4+
|
||||
get_provider_name() {
|
||||
echo "${PROVIDER_NAMES[$1]}"
|
||||
@@ -544,40 +460,12 @@ if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
get_provider_id() {
|
||||
echo "${PROVIDER_IDS[$1]}"
|
||||
}
|
||||
|
||||
get_default_model() {
|
||||
echo "${DEFAULT_MODELS[$1]}"
|
||||
}
|
||||
|
||||
get_model_choice_count() {
|
||||
echo "${MODEL_CHOICES_COUNT[$1]:-0}"
|
||||
}
|
||||
|
||||
get_model_choice_id() {
|
||||
echo "${MODEL_CHOICES_ID[$1:$2]}"
|
||||
}
|
||||
|
||||
get_model_choice_label() {
|
||||
echo "${MODEL_CHOICES_LABEL[$1:$2]}"
|
||||
}
|
||||
|
||||
get_model_choice_maxtokens() {
|
||||
echo "${MODEL_CHOICES_MAXTOKENS[$1:$2]}"
|
||||
}
|
||||
|
||||
get_model_choice_maxcontexttokens() {
|
||||
echo "${MODEL_CHOICES_MAXCONTEXTTOKENS[$1:$2]}"
|
||||
}
|
||||
else
|
||||
# Bash 3.2 - use parallel indexed arrays
|
||||
PROVIDER_ENV_VARS=(ANTHROPIC_API_KEY OPENAI_API_KEY MINIMAX_API_KEY GEMINI_API_KEY GOOGLE_API_KEY GROQ_API_KEY CEREBRAS_API_KEY OPENROUTER_API_KEY MISTRAL_API_KEY TOGETHER_API_KEY DEEPSEEK_API_KEY)
|
||||
PROVIDER_DISPLAY_NAMES=("Anthropic (Claude)" "OpenAI (GPT)" "MiniMax" "Google Gemini" "Google AI" "Groq" "Cerebras" "OpenRouter" "Mistral" "Together AI" "DeepSeek")
|
||||
PROVIDER_ID_LIST=(anthropic openai minimax gemini google groq cerebras openrouter mistral together deepseek)
|
||||
|
||||
# Default models by provider id (parallel arrays)
|
||||
MODEL_PROVIDER_IDS=(anthropic openai minimax gemini groq cerebras mistral together_ai deepseek)
|
||||
MODEL_DEFAULTS=("claude-haiku-4-5-20251001" "gpt-5-mini" "MiniMax-M2.5" "gemini-3-flash-preview" "moonshotai/kimi-k2-instruct-0905" "zai-glm-4.7" "mistral-large-latest" "meta-llama/Llama-3.3-70B-Instruct-Turbo" "deepseek-chat")
|
||||
|
||||
# Helper: get provider display name for an env var
|
||||
get_provider_name() {
|
||||
local env_var="$1"
|
||||
@@ -603,116 +491,199 @@ else
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
fi
|
||||
|
||||
# Helper: get default model for a provider id
|
||||
get_default_model() {
|
||||
local provider_id="$1"
|
||||
local i=0
|
||||
while [ $i -lt ${#MODEL_PROVIDER_IDS[@]} ]; do
|
||||
if [ "${MODEL_PROVIDER_IDS[$i]}" = "$provider_id" ]; then
|
||||
echo "${MODEL_DEFAULTS[$i]}"
|
||||
MODEL_DEFAULT_ROWS=""
|
||||
MODEL_CHOICE_ROWS=""
|
||||
PRESET_ROWS=""
|
||||
PRESET_MODEL_CHOICE_ROWS=""
|
||||
|
||||
load_model_catalog_rows() {
|
||||
# Bash 3.2 has no native JSON parser, so we materialize the shared catalogue
|
||||
# into simple tab-separated rows once and reuse them for the interactive flow.
|
||||
local catalog_lines=""
|
||||
catalog_lines="$(uv run python -c '
|
||||
from framework.llm.model_catalog import get_default_models, get_models_catalogue, get_presets
|
||||
|
||||
for provider_id, default_model in sorted(get_default_models().items()):
|
||||
print(f"DEFAULT\t{provider_id}\t{default_model}")
|
||||
|
||||
for provider_id, models in sorted(get_models_catalogue().items()):
|
||||
for model in models:
|
||||
print(
|
||||
"MODEL\t{provider}\t{id}\t{label}\t{max_tokens}\t{max_context_tokens}".format(
|
||||
provider=provider_id,
|
||||
id=model["id"],
|
||||
label=model["label"],
|
||||
max_tokens=model["max_tokens"],
|
||||
max_context_tokens=model["max_context_tokens"],
|
||||
)
|
||||
)
|
||||
|
||||
for preset_id, preset in sorted(get_presets().items()):
|
||||
print(
|
||||
"PRESET\t{preset_id}\t{provider}\t{model}\t{max_tokens}\t{max_context_tokens}\t{api_key_env_var}\t{api_base}".format(
|
||||
preset_id=preset_id,
|
||||
provider=preset["provider"],
|
||||
model=preset.get("model", ""),
|
||||
max_tokens=preset["max_tokens"],
|
||||
max_context_tokens=preset["max_context_tokens"],
|
||||
api_key_env_var=preset.get("api_key_env_var", ""),
|
||||
api_base=preset.get("api_base", ""),
|
||||
)
|
||||
)
|
||||
for choice in preset.get("model_choices", []):
|
||||
print(
|
||||
"PRESET_MODEL\t{preset_id}\t{id}\t{label}\t{recommended}".format(
|
||||
preset_id=preset_id,
|
||||
id=choice["id"],
|
||||
label=choice["label"],
|
||||
recommended=str(choice["recommended"]).lower(),
|
||||
)
|
||||
)
|
||||
' 2>/dev/null)" || return 1
|
||||
|
||||
MODEL_DEFAULT_ROWS=""
|
||||
MODEL_CHOICE_ROWS=""
|
||||
PRESET_ROWS=""
|
||||
PRESET_MODEL_CHOICE_ROWS=""
|
||||
|
||||
while IFS=$'\t' read -r row_type field1 field2 field3 field4 field5 field6 field7; do
|
||||
[ -n "$row_type" ] || continue
|
||||
if [ "$row_type" = "DEFAULT" ]; then
|
||||
MODEL_DEFAULT_ROWS+="${field1}"$'\t'"${field2}"$'\n'
|
||||
elif [ "$row_type" = "MODEL" ]; then
|
||||
MODEL_CHOICE_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\t'"${field5}"$'\n'
|
||||
elif [ "$row_type" = "PRESET" ]; then
|
||||
PRESET_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\t'"${field5}"$'\t'"${field6}"$'\t'"${field7}"$'\n'
|
||||
elif [ "$row_type" = "PRESET_MODEL" ]; then
|
||||
PRESET_MODEL_CHOICE_ROWS+="${field1}"$'\t'"${field2}"$'\t'"${field3}"$'\t'"${field4}"$'\n'
|
||||
fi
|
||||
done <<< "$catalog_lines"
|
||||
}
|
||||
|
||||
get_default_model() {
|
||||
local provider_id="$1"
|
||||
while IFS=$'\t' read -r row_provider row_model; do
|
||||
[ -n "$row_provider" ] || continue
|
||||
if [ "$row_provider" = "$provider_id" ]; then
|
||||
echo "$row_model"
|
||||
return
|
||||
fi
|
||||
done <<< "$MODEL_DEFAULT_ROWS"
|
||||
}
|
||||
|
||||
get_model_choice_count() {
|
||||
local provider_id="$1"
|
||||
local count=0
|
||||
while IFS=$'\t' read -r row_provider _; do
|
||||
[ -n "$row_provider" ] || continue
|
||||
if [ "$row_provider" = "$provider_id" ]; then
|
||||
count=$((count + 1))
|
||||
fi
|
||||
done <<< "$MODEL_CHOICE_ROWS"
|
||||
echo "$count"
|
||||
}
|
||||
|
||||
get_model_choice_field() {
|
||||
local provider_id="$1"
|
||||
local idx="$2"
|
||||
local field="$3"
|
||||
local count=0
|
||||
while IFS=$'\t' read -r row_provider row_id row_label row_max_tokens row_max_context_tokens; do
|
||||
[ -n "$row_provider" ] || continue
|
||||
if [ "$row_provider" = "$provider_id" ]; then
|
||||
if [ "$count" -eq "$idx" ]; then
|
||||
case "$field" in
|
||||
id) echo "$row_id" ;;
|
||||
label) echo "$row_label" ;;
|
||||
max_tokens) echo "$row_max_tokens" ;;
|
||||
max_context_tokens) echo "$row_max_context_tokens" ;;
|
||||
esac
|
||||
return
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
count=$((count + 1))
|
||||
fi
|
||||
done <<< "$MODEL_CHOICE_ROWS"
|
||||
}
|
||||
|
||||
# Model choices per provider - flat parallel arrays with provider offsets
|
||||
# Provider order: anthropic(4), openai(2), gemini(2), groq(2), cerebras(2)
|
||||
MC_PROVIDERS=(anthropic anthropic anthropic anthropic openai openai gemini gemini groq groq cerebras cerebras)
|
||||
MC_IDS=("claude-haiku-4-5-20251001" "claude-sonnet-4-20250514" "claude-sonnet-4-5-20250929" "claude-opus-4-6" "gpt-5-mini" "gpt-5.2" "gemini-3-flash-preview" "gemini-3.1-pro-preview" "moonshotai/kimi-k2-instruct-0905" "openai/gpt-oss-120b" "zai-glm-4.7" "qwen3-235b-a22b-instruct-2507")
|
||||
MC_LABELS=("Haiku 4.5 - Fast + cheap (recommended)" "Sonnet 4 - Fast + capable" "Sonnet 4.5 - Best balance" "Opus 4.6 - Most capable" "GPT-5 Mini - Fast + cheap (recommended)" "GPT-5.2 - Most capable" "Gemini 3 Flash - Fast (recommended)" "Gemini 3.1 Pro - Best quality" "Kimi K2 - Best quality (recommended)" "GPT-OSS 120B - Fast reasoning" "ZAI-GLM 4.7 - Best quality (recommended)" "Qwen3 235B - Frontier reasoning")
|
||||
MC_MAXTOKENS=(8192 8192 16384 32768 16384 16384 8192 8192 8192 8192 8192 8192)
|
||||
# Max context tokens per model (same order as MC_PROVIDERS/MC_IDS above)
|
||||
# Based on actual context windows with ~10% headroom for system prompt + output.
|
||||
MC_MAXCONTEXTTOKENS=(180000 180000 180000 180000 120000 120000 900000 900000 120000 120000 120000 120000)
|
||||
get_model_choice_id() {
|
||||
get_model_choice_field "$1" "$2" "id"
|
||||
}
|
||||
|
||||
# Helper: get number of model choices for a provider
|
||||
get_model_choice_count() {
|
||||
local provider_id="$1"
|
||||
local count=0
|
||||
local i=0
|
||||
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
|
||||
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
|
||||
count=$((count + 1))
|
||||
get_model_choice_label() {
|
||||
get_model_choice_field "$1" "$2" "label"
|
||||
}
|
||||
|
||||
get_model_choice_maxtokens() {
|
||||
get_model_choice_field "$1" "$2" "max_tokens"
|
||||
}
|
||||
|
||||
get_model_choice_maxcontexttokens() {
|
||||
get_model_choice_field "$1" "$2" "max_context_tokens"
|
||||
}
|
||||
|
||||
get_preset_field() {
|
||||
local preset_id="$1"
|
||||
local field="$2"
|
||||
while IFS=$'\t' read -r row_preset_id row_provider row_model row_max_tokens row_max_context_tokens row_env_var row_api_base; do
|
||||
[ -n "$row_preset_id" ] || continue
|
||||
if [ "$row_preset_id" = "$preset_id" ]; then
|
||||
case "$field" in
|
||||
provider) echo "$row_provider" ;;
|
||||
model) echo "$row_model" ;;
|
||||
max_tokens) echo "$row_max_tokens" ;;
|
||||
max_context_tokens) echo "$row_max_context_tokens" ;;
|
||||
api_key_env_var) echo "$row_env_var" ;;
|
||||
api_base) echo "$row_api_base" ;;
|
||||
esac
|
||||
return
|
||||
fi
|
||||
done <<< "$PRESET_ROWS"
|
||||
}
|
||||
|
||||
apply_preset() {
|
||||
local preset_id="$1"
|
||||
SELECTED_PROVIDER_ID="$(get_preset_field "$preset_id" "provider")"
|
||||
SELECTED_MODEL="$(get_preset_field "$preset_id" "model")"
|
||||
SELECTED_MAX_TOKENS="$(get_preset_field "$preset_id" "max_tokens")"
|
||||
SELECTED_MAX_CONTEXT_TOKENS="$(get_preset_field "$preset_id" "max_context_tokens")"
|
||||
SELECTED_ENV_VAR="$(get_preset_field "$preset_id" "api_key_env_var")"
|
||||
SELECTED_API_BASE="$(get_preset_field "$preset_id" "api_base")"
|
||||
}
|
||||
|
||||
get_preset_model_choice_count() {
|
||||
local preset_id="$1"
|
||||
local count=0
|
||||
while IFS=$'\t' read -r row_preset_id _; do
|
||||
[ -n "$row_preset_id" ] || continue
|
||||
if [ "$row_preset_id" = "$preset_id" ]; then
|
||||
count=$((count + 1))
|
||||
fi
|
||||
done <<< "$PRESET_MODEL_CHOICE_ROWS"
|
||||
echo "$count"
|
||||
}
|
||||
|
||||
get_preset_model_choice_field() {
|
||||
local preset_id="$1"
|
||||
local idx="$2"
|
||||
local field="$3"
|
||||
local count=0
|
||||
while IFS=$'\t' read -r row_preset_id row_id row_label row_recommended; do
|
||||
[ -n "$row_preset_id" ] || continue
|
||||
if [ "$row_preset_id" = "$preset_id" ]; then
|
||||
if [ "$count" -eq "$idx" ]; then
|
||||
case "$field" in
|
||||
id) echo "$row_id" ;;
|
||||
label) echo "$row_label" ;;
|
||||
recommended) echo "$row_recommended" ;;
|
||||
esac
|
||||
return
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
echo "$count"
|
||||
}
|
||||
|
||||
# Helper: get model choice id by provider and index (0-based within provider)
|
||||
get_model_choice_id() {
|
||||
local provider_id="$1"
|
||||
local idx="$2"
|
||||
local count=0
|
||||
local i=0
|
||||
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
|
||||
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
|
||||
if [ $count -eq "$idx" ]; then
|
||||
echo "${MC_IDS[$i]}"
|
||||
return
|
||||
fi
|
||||
count=$((count + 1))
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
|
||||
# Helper: get model choice label by provider and index
|
||||
get_model_choice_label() {
|
||||
local provider_id="$1"
|
||||
local idx="$2"
|
||||
local count=0
|
||||
local i=0
|
||||
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
|
||||
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
|
||||
if [ $count -eq "$idx" ]; then
|
||||
echo "${MC_LABELS[$i]}"
|
||||
return
|
||||
fi
|
||||
count=$((count + 1))
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
|
||||
# Helper: get model choice max_tokens by provider and index
|
||||
get_model_choice_maxtokens() {
|
||||
local provider_id="$1"
|
||||
local idx="$2"
|
||||
local count=0
|
||||
local i=0
|
||||
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
|
||||
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
|
||||
if [ $count -eq "$idx" ]; then
|
||||
echo "${MC_MAXTOKENS[$i]}"
|
||||
return
|
||||
fi
|
||||
count=$((count + 1))
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
|
||||
# Helper: get model choice max_context_tokens by provider and index
|
||||
get_model_choice_maxcontexttokens() {
|
||||
local provider_id="$1"
|
||||
local idx="$2"
|
||||
local count=0
|
||||
local i=0
|
||||
while [ $i -lt ${#MC_PROVIDERS[@]} ]; do
|
||||
if [ "${MC_PROVIDERS[$i]}" = "$provider_id" ]; then
|
||||
if [ $count -eq "$idx" ]; then
|
||||
echo "${MC_MAXCONTEXTTOKENS[$i]}"
|
||||
return
|
||||
fi
|
||||
count=$((count + 1))
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
fi
|
||||
count=$((count + 1))
|
||||
fi
|
||||
done <<< "$PRESET_MODEL_CHOICE_ROWS"
|
||||
}
|
||||
|
||||
# Configuration directory
|
||||
HIVE_CONFIG_DIR="$HOME/.hive"
|
||||
@@ -1100,6 +1071,12 @@ if ollama list >/dev/null 2>&1; then
|
||||
OLLAMA_DETECTED=true
|
||||
fi
|
||||
|
||||
if ! load_model_catalog_rows; then
|
||||
echo -e "${RED}Failed to load core/framework/llm/model_catalog.json.${NC}"
|
||||
echo -e "${YELLOW}Please ensure your Python environment is set up, then rerun quickstart.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Detect API key providers
|
||||
if [ "$USE_ASSOC_ARRAYS" = true ]; then
|
||||
for env_var in "${!PROVIDER_NAMES[@]}"; do
|
||||
@@ -1344,10 +1321,7 @@ case $choice in
|
||||
exit 1
|
||||
else
|
||||
SUBSCRIPTION_MODE="claude_code"
|
||||
SELECTED_PROVIDER_ID="anthropic"
|
||||
SELECTED_MODEL="claude-opus-4-6"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=960000 # Claude — 1M context window
|
||||
apply_preset "claude_code"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using Claude Code subscription"
|
||||
fi
|
||||
@@ -1355,11 +1329,7 @@ case $choice in
|
||||
2)
|
||||
# ZAI Code Subscription
|
||||
SUBSCRIPTION_MODE="zai_code"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_ENV_VAR="ZAI_API_KEY"
|
||||
SELECTED_MODEL="glm-5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=180000 # GLM-5 — 200k context window
|
||||
apply_preset "zai_code"
|
||||
PROVIDER_NAME="ZAI"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using ZAI Code subscription"
|
||||
@@ -1387,10 +1357,7 @@ case $choice in
|
||||
fi
|
||||
if [ "$CODEX_CRED_DETECTED" = true ]; then
|
||||
SUBSCRIPTION_MODE="codex"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_MODEL="gpt-5.3-codex"
|
||||
SELECTED_MAX_TOKENS=16384
|
||||
SELECTED_MAX_CONTEXT_TOKENS=120000 # GPT Codex — 128k context window
|
||||
apply_preset "codex"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using OpenAI Codex subscription"
|
||||
fi
|
||||
@@ -1398,12 +1365,7 @@ case $choice in
|
||||
4)
|
||||
# MiniMax Coding Key
|
||||
SUBSCRIPTION_MODE="minimax_code"
|
||||
SELECTED_ENV_VAR="MINIMAX_API_KEY"
|
||||
SELECTED_PROVIDER_ID="minimax"
|
||||
SELECTED_MODEL="MiniMax-M2.5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=900000 # MiniMax M2.5 — 1M context window
|
||||
SELECTED_API_BASE="https://api.minimax.io/v1"
|
||||
apply_preset "minimax_code"
|
||||
PROVIDER_NAME="MiniMax"
|
||||
SIGNUP_URL="https://platform.minimax.io/user-center/basic-information/interface-key"
|
||||
echo ""
|
||||
@@ -1413,12 +1375,7 @@ case $choice in
|
||||
5)
|
||||
# Kimi Code Subscription
|
||||
SUBSCRIPTION_MODE="kimi_code"
|
||||
SELECTED_PROVIDER_ID="kimi"
|
||||
SELECTED_ENV_VAR="KIMI_API_KEY"
|
||||
SELECTED_MODEL="kimi-k2.5"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=240000 # Kimi K2.5 — 256k context window
|
||||
SELECTED_API_BASE="https://api.kimi.com/coding"
|
||||
apply_preset "kimi_code"
|
||||
PROVIDER_NAME="Kimi"
|
||||
SIGNUP_URL="https://www.kimi.com/code"
|
||||
echo ""
|
||||
@@ -1428,28 +1385,38 @@ case $choice in
|
||||
6)
|
||||
# Hive LLM
|
||||
SUBSCRIPTION_MODE="hive_llm"
|
||||
SELECTED_PROVIDER_ID="hive"
|
||||
SELECTED_ENV_VAR="HIVE_API_KEY"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=180000
|
||||
SELECTED_API_BASE="$HIVE_LLM_ENDPOINT"
|
||||
apply_preset "hive_llm"
|
||||
PROVIDER_NAME="Hive"
|
||||
SIGNUP_URL="https://discord.com/invite/hQdU7QDkgR"
|
||||
echo ""
|
||||
echo -e "${GREEN}⬢${NC} Using Hive LLM"
|
||||
echo ""
|
||||
echo -e " Select a model:"
|
||||
echo -e " ${CYAN}1)${NC} queen ${DIM}(default — Hive flagship)${NC}"
|
||||
echo -e " ${CYAN}2)${NC} kimi-2.5"
|
||||
echo -e " ${CYAN}3)${NC} GLM-5"
|
||||
hive_choice_count="$(get_preset_model_choice_count "hive_llm")"
|
||||
hive_default_choice=1
|
||||
hive_idx=0
|
||||
while [ "$hive_idx" -lt "$hive_choice_count" ]; do
|
||||
hive_num=$((hive_idx + 1))
|
||||
hive_model_id="$(get_preset_model_choice_field "hive_llm" "$hive_idx" "id")"
|
||||
hive_recommended="$(get_preset_model_choice_field "hive_llm" "$hive_idx" "recommended")"
|
||||
if [ "$hive_recommended" = "true" ]; then
|
||||
echo -e " ${CYAN}${hive_num})${NC} ${hive_model_id} ${DIM}(default — Hive flagship)${NC}"
|
||||
hive_default_choice="$hive_num"
|
||||
else
|
||||
echo -e " ${CYAN}${hive_num})${NC} ${hive_model_id}"
|
||||
fi
|
||||
hive_idx=$((hive_idx + 1))
|
||||
done
|
||||
echo ""
|
||||
read -r -p " Enter model choice (1-3) [1]: " hive_model_choice || true
|
||||
hive_model_choice="${hive_model_choice:-1}"
|
||||
case "$hive_model_choice" in
|
||||
2) SELECTED_MODEL="kimi-2.5" ;;
|
||||
3) SELECTED_MODEL="GLM-5" ;;
|
||||
*) SELECTED_MODEL="queen" ;;
|
||||
esac
|
||||
while true; do
|
||||
read -r -p " Enter model choice (1-$hive_choice_count) [$hive_default_choice]: " hive_model_choice || true
|
||||
hive_model_choice="${hive_model_choice:-$hive_default_choice}"
|
||||
if [[ "$hive_model_choice" =~ ^[0-9]+$ ]] && [ "$hive_model_choice" -ge 1 ] && [ "$hive_model_choice" -le "$hive_choice_count" ]; then
|
||||
SELECTED_MODEL="$(get_preset_model_choice_field "hive_llm" "$((hive_model_choice - 1))" "id")"
|
||||
break
|
||||
fi
|
||||
echo -e "${RED}Invalid choice. Please enter 1-$hive_choice_count${NC}"
|
||||
done
|
||||
echo -e " ${DIM}Model: $SELECTED_MODEL | API: ${HIVE_LLM_ENDPOINT}${NC}"
|
||||
;;
|
||||
7)
|
||||
@@ -1480,10 +1447,7 @@ case $choice in
|
||||
|
||||
if [ "$ANTIGRAVITY_CRED_DETECTED" = true ]; then
|
||||
SUBSCRIPTION_MODE="antigravity"
|
||||
SELECTED_PROVIDER_ID="openai"
|
||||
SELECTED_MODEL="gemini-3-flash"
|
||||
SELECTED_MAX_TOKENS=32768
|
||||
SELECTED_MAX_CONTEXT_TOKENS=1000000 # Gemini 3 Flash — 1M context window
|
||||
apply_preset "antigravity"
|
||||
echo ""
|
||||
echo -e "${YELLOW} ⚠ Using Antigravity can technically cause your account suspension. Please use at your own risk.${NC}"
|
||||
echo ""
|
||||
@@ -1540,8 +1504,8 @@ case $choice in
|
||||
fi
|
||||
SELECTED_PROVIDER_ID="ollama"
|
||||
SELECTED_ENV_VAR=""
|
||||
SELECTED_MAX_TOKENS=8192
|
||||
SELECTED_MAX_CONTEXT_TOKENS=16384
|
||||
SELECTED_MAX_TOKENS="$(get_preset_field "ollama_local" "max_tokens")"
|
||||
SELECTED_MAX_CONTEXT_TOKENS="$(get_preset_field "ollama_local" "max_context_tokens")"
|
||||
OLLAMA_MODELS=()
|
||||
while IFS= read -r line; do
|
||||
[ -n "$line" ] && OLLAMA_MODELS+=("$line")
|
||||
@@ -1559,7 +1523,7 @@ case $choice in
|
||||
read -r -p "Enter choice (1-${#OLLAMA_MODELS[@]}): " model_choice
|
||||
if [[ "$model_choice" =~ ^[0-9]+$ ]] && [ "$model_choice" -ge 1 ] && [ "$model_choice" -le ${#OLLAMA_MODELS[@]} ]; then
|
||||
SELECTED_MODEL="${OLLAMA_MODELS[$((model_choice - 1))]}"
|
||||
SELECTED_API_BASE="http://localhost:11434"
|
||||
SELECTED_API_BASE="$(get_preset_field "ollama_local" "api_base")"
|
||||
break
|
||||
fi
|
||||
echo -e "${RED}Invalid choice. Please enter 1-${#OLLAMA_MODELS[@]}${NC}"
|
||||
@@ -1684,7 +1648,7 @@ if [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
|
||||
echo -e "${GREEN}⬢${NC} ZAI API key saved to $SHELL_RC_FILE"
|
||||
# Health check the new key
|
||||
echo -n " Verifying ZAI API key... "
|
||||
HC_RESULT=$(uv run python "$SCRIPT_DIR/scripts/check_llm_key.py" "zai" "$API_KEY" "https://api.z.ai/api/coding/paas/v4" 2>/dev/null) || true
|
||||
HC_RESULT=$(uv run python "$SCRIPT_DIR/scripts/check_llm_key.py" "zai" "$API_KEY" "$SELECTED_API_BASE" 2>/dev/null) || true
|
||||
HC_VALID=$(echo "$HC_RESULT" | $PYTHON_CMD -c "import json,sys; print(json.loads(sys.stdin.read()).get('valid',''))" 2>/dev/null) || true
|
||||
HC_MSG=$(echo "$HC_RESULT" | $PYTHON_CMD -c "import json,sys; print(json.loads(sys.stdin.read()).get('message',''))" 2>/dev/null) || true
|
||||
if [ "$HC_VALID" = "True" ]; then
|
||||
@@ -1735,11 +1699,11 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||
if [ "$SUBSCRIPTION_MODE" = "claude_code" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "true" "" > /dev/null || SAVE_OK=false
|
||||
elif [ "$SUBSCRIPTION_MODE" = "codex" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "" "true" > /dev/null || SAVE_OK=false
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" "true" > /dev/null || SAVE_OK=false
|
||||
elif [ "$SUBSCRIPTION_MODE" = "antigravity" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "" "" "true" > /dev/null || SAVE_OK=false
|
||||
elif [ "$SUBSCRIPTION_MODE" = "zai_code" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "https://api.z.ai/api/coding/paas/v4" > /dev/null || SAVE_OK=false
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
|
||||
elif [ "$SUBSCRIPTION_MODE" = "minimax_code" ]; then
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
|
||||
elif [ "$SUBSCRIPTION_MODE" = "kimi_code" ]; then
|
||||
@@ -1751,7 +1715,7 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||
elif [ "$SELECTED_PROVIDER_ID" = "ollama" ]; then
|
||||
# Pass api_base explicitly — LiteLLM requires this to route ollama/* models
|
||||
# to the local Ollama server instead of trying to reach a remote endpoint.
|
||||
save_configuration "ollama" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "http://localhost:11434" > /dev/null || SAVE_OK=false
|
||||
save_configuration "ollama" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false
|
||||
else
|
||||
save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" > /dev/null || SAVE_OK=false
|
||||
fi
|
||||
@@ -2128,8 +2092,6 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||
else
|
||||
echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC} → ${DIM}$SELECTED_MODEL${NC}"
|
||||
fi
|
||||
echo -e " ${DIM}To use a different model for worker agents, run:${NC}"
|
||||
echo -e " ${CYAN}./scripts/setup_worker_model.sh${NC}"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user