feat: update provider config

This commit is contained in:
Richard Tang
2026-04-09 11:59:41 -07:00
parent 0bf4d8b9fa
commit acca008772
5 changed files with 293 additions and 63 deletions
+159 -54
View File
@@ -73,76 +73,167 @@
]
},
"groq": {
"default_model": "moonshotai/kimi-k2-instruct-0905",
"default_model": "openai/gpt-oss-120b",
"models": [
{
"id": "moonshotai/kimi-k2-instruct-0905",
"label": "Kimi K2 - Best quality",
"id": "openai/gpt-oss-120b",
"label": "GPT-OSS 120B - Best reasoning",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 65536,
"max_context_tokens": 131072
},
{
"id": "openai/gpt-oss-120b",
"label": "GPT-OSS 120B - Fast reasoning",
"id": "openai/gpt-oss-20b",
"label": "GPT-OSS 20B - Fast + cheaper",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 65536,
"max_context_tokens": 131072
},
{
"id": "llama-3.3-70b-versatile",
"label": "Llama 3.3 70B - General purpose",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 131072
},
{
"id": "llama-3.1-8b-instant",
"label": "Llama 3.1 8B - Fastest",
"recommended": false,
"max_tokens": 131072,
"max_context_tokens": 131072
}
]
},
"cerebras": {
"default_model": "zai-glm-4.7",
"default_model": "gpt-oss-120b",
"models": [
{
"id": "zai-glm-4.7",
"label": "ZAI-GLM 4.7 - Best quality",
"id": "gpt-oss-120b",
"label": "GPT-OSS 120B - Best production reasoning",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 40960,
"max_context_tokens": 131072
},
{
"id": "qwen3-235b-a22b-instruct-2507",
"label": "Qwen3 235B - Frontier reasoning",
"id": "llama3.1-8b",
"label": "Llama 3.1 8B - Fastest production",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_context_tokens": 32768
},
{
"id": "zai-glm-4.7",
"label": "Z.ai GLM 4.7 - Strong coding preview",
"recommended": true,
"max_tokens": 40960,
"max_context_tokens": 131072
},
{
"id": "qwen-3-235b-a22b-instruct-2507",
"label": "Qwen 3 235B Instruct - Frontier preview",
"recommended": false,
"max_tokens": 40960,
"max_context_tokens": 131072
}
]
},
"minimax": {
"default_model": "MiniMax-M2.5",
"default_model": "MiniMax-M2.7",
"models": [
{
"id": "MiniMax-M2.5",
"label": "MiniMax-M2.5",
"id": "MiniMax-M2.7",
"label": "MiniMax M2.7 - Best coding quality",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 32768,
"max_context_tokens": 204800
},
{
"id": "MiniMax-M2.7-highspeed",
"label": "MiniMax M2.7 Highspeed - Faster",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 204800
},
{
"id": "MiniMax-M2.5",
"label": "MiniMax M2.5 - Strong value",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 204800
},
{
"id": "MiniMax-M2.5-highspeed",
"label": "MiniMax M2.5 Highspeed - Faster",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 204800
}
]
},
"mistral": {
"default_model": "mistral-large-latest",
"default_model": "mistral-large-2512",
"models": [
{
"id": "mistral-large-latest",
"label": "Mistral Large",
"id": "mistral-large-2512",
"label": "Mistral Large 3 - Best quality",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 32768,
"max_context_tokens": 256000
},
{
"id": "mistral-medium-2508",
"label": "Mistral Medium 3.1 - Balanced",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 128000
},
{
"id": "mistral-small-2603",
"label": "Mistral Small 4 - Fast + capable",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 256000
},
{
"id": "codestral-2508",
"label": "Codestral - Coding specialist",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 128000
}
]
},
"together": {
"default_model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"default_model": "deepseek-ai/DeepSeek-V3.1",
"models": [
{
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"label": "Llama 3.3 70B Turbo",
"id": "deepseek-ai/DeepSeek-V3.1",
"label": "DeepSeek V3.1 - Best general coding",
"recommended": true,
"max_tokens": 32768,
"max_context_tokens": 128000
},
{
"id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
"label": "Qwen3 Coder 480B - Advanced coding",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 32768,
"max_context_tokens": 262144
},
{
"id": "openai/gpt-oss-120b",
"label": "GPT-OSS 120B - Strong reasoning",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 128000
},
{
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"label": "Llama 3.3 70B Turbo - Fast baseline",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 131072
}
]
},
@@ -151,43 +242,57 @@
"models": [
{
"id": "deepseek-chat",
"label": "DeepSeek Chat",
"recommended": false,
"label": "DeepSeek Chat - Fast default",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_context_tokens": 128000
},
{
"id": "deepseek-reasoner",
"label": "DeepSeek Reasoner - Deep thinking",
"recommended": false,
"max_tokens": 64000,
"max_context_tokens": 128000
}
]
},
"openrouter": {
"default_model": "google/gemini-2.5-pro",
"default_model": "openai/gpt-5.4",
"models": [
{
"id": "google/gemini-2.5-pro",
"label": "Gemini 2.5 Pro",
"id": "openai/gpt-5.4",
"label": "GPT-5.4 - Best overall",
"recommended": true,
"max_tokens": 8192,
"max_context_tokens": 900000
"max_tokens": 128000,
"max_context_tokens": 922000
},
{
"id": "google/gemini-2.5-flash",
"label": "Gemini 2.5 Flash",
"id": "anthropic/claude-sonnet-4.6",
"label": "Claude Sonnet 4.6 - Best coding balance",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 900000
"max_tokens": 64000,
"max_context_tokens": 936000
},
{
"id": "anthropic/claude-sonnet-4",
"label": "Claude Sonnet 4 (via OR)",
"id": "anthropic/claude-opus-4.6",
"label": "Claude Opus 4.6 - Most capable",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 180000
"max_tokens": 128000,
"max_context_tokens": 872000
},
{
"id": "deepseek/deepseek-r1",
"label": "DeepSeek R1",
"id": "google/gemini-3.1-pro-preview",
"label": "Gemini 3.1 Pro Preview - Long-context reasoning",
"recommended": false,
"max_tokens": 8192,
"max_context_tokens": 120000
"max_tokens": 32768,
"max_context_tokens": 1048576
},
{
"id": "deepseek/deepseek-v3.2",
"label": "DeepSeek V3.2 - Best value",
"recommended": false,
"max_tokens": 32768,
"max_context_tokens": 163840
}
]
}
@@ -217,9 +322,9 @@
"minimax_code": {
"provider": "minimax",
"api_key_env_var": "MINIMAX_API_KEY",
"model": "MiniMax-M2.5",
"model": "MiniMax-M2.7",
"max_tokens": 32768,
"max_context_tokens": 900000,
"max_context_tokens": 204800,
"api_base": "https://api.minimax.io/v1"
},
"kimi_code": {
+7 -7
View File
@@ -43,19 +43,19 @@ API_KEY_PROVIDERS = [
(
"GROQ_API_KEY",
"Groq",
"moonshotai/kimi-k2-instruct-0905",
"moonshotai/kimi-k2-instruct-0905",
"openai/gpt-oss-120b",
"openai/gpt-oss-120b",
),
("MISTRAL_API_KEY", "Mistral", "mistral-large-latest", "mistral-large-latest"),
("CEREBRAS_API_KEY", "Cerebras", "cerebras/zai-glm-4.7", "cerebras/zai-glm-4.7"),
("MISTRAL_API_KEY", "Mistral", "mistral-large-2512", "mistral-large-2512"),
("CEREBRAS_API_KEY", "Cerebras", "cerebras/gpt-oss-120b", "cerebras/gpt-oss-120b"),
(
"TOGETHER_API_KEY",
"Together AI",
"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
"together_ai/deepseek-ai/DeepSeek-V3.1",
"together_ai/deepseek-ai/DeepSeek-V3.1",
),
("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat", "deepseek-chat"),
("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5", "MiniMax-M2.5"),
("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.7", "MiniMax-M2.7"),
("HIVE_API_KEY", "Hive LLM", "hive/queen", "hive/queen"),
]
+125
View File
@@ -45,6 +45,122 @@ def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget()
assert opus_46["max_context_tokens"] == 872000
def test_groq_catalog_tracks_current_production_models():
groq_default = model_catalog.get_default_models()["groq"]
groq_models = model_catalog.get_models_catalogue()["groq"]
assert groq_default == "openai/gpt-oss-120b"
assert [model["id"] for model in groq_models] == [
"openai/gpt-oss-120b",
"openai/gpt-oss-20b",
"llama-3.3-70b-versatile",
"llama-3.1-8b-instant",
]
assert groq_models[0]["max_tokens"] == 65536
assert groq_models[0]["max_context_tokens"] == 131072
def test_cerebras_catalog_tracks_public_models_endpoint():
cerebras_default = model_catalog.get_default_models()["cerebras"]
cerebras_models = model_catalog.get_models_catalogue()["cerebras"]
assert cerebras_default == "gpt-oss-120b"
assert [model["id"] for model in cerebras_models] == [
"gpt-oss-120b",
"llama3.1-8b",
"zai-glm-4.7",
"qwen-3-235b-a22b-instruct-2507",
]
assert cerebras_models[0]["max_tokens"] == 40960
assert cerebras_models[0]["max_context_tokens"] == 131072
assert cerebras_models[1]["max_tokens"] == 8192
assert cerebras_models[1]["max_context_tokens"] == 32768
def test_minimax_catalog_tracks_current_non_legacy_text_models():
minimax_default = model_catalog.get_default_models()["minimax"]
minimax_models = model_catalog.get_models_catalogue()["minimax"]
assert minimax_default == "MiniMax-M2.7"
assert [model["id"] for model in minimax_models] == [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
]
assert all(model["max_context_tokens"] == 204800 for model in minimax_models)
assert all(model["max_tokens"] == 32768 for model in minimax_models)
def test_mistral_catalog_tracks_current_curated_models():
mistral_default = model_catalog.get_default_models()["mistral"]
mistral_models = model_catalog.get_models_catalogue()["mistral"]
assert mistral_default == "mistral-large-2512"
assert [model["id"] for model in mistral_models] == [
"mistral-large-2512",
"mistral-medium-2508",
"mistral-small-2603",
"codestral-2508",
]
assert mistral_models[0]["max_context_tokens"] == 256000
assert mistral_models[1]["max_context_tokens"] == 128000
assert mistral_models[2]["max_context_tokens"] == 256000
assert mistral_models[3]["max_context_tokens"] == 128000
def test_together_catalog_tracks_current_serverless_recommendations():
together_default = model_catalog.get_default_models()["together"]
together_models = model_catalog.get_models_catalogue()["together"]
assert together_default == "deepseek-ai/DeepSeek-V3.1"
assert [model["id"] for model in together_models] == [
"deepseek-ai/DeepSeek-V3.1",
"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
"openai/gpt-oss-120b",
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
]
assert together_models[0]["max_context_tokens"] == 128000
assert together_models[1]["max_context_tokens"] == 262144
assert together_models[2]["max_context_tokens"] == 128000
assert together_models[3]["max_context_tokens"] == 131072
def test_deepseek_catalog_tracks_current_api_models():
deepseek_default = model_catalog.get_default_models()["deepseek"]
deepseek_models = model_catalog.get_models_catalogue()["deepseek"]
assert deepseek_default == "deepseek-chat"
assert [model["id"] for model in deepseek_models] == [
"deepseek-chat",
"deepseek-reasoner",
]
assert deepseek_models[0]["max_tokens"] == 8192
assert deepseek_models[0]["max_context_tokens"] == 128000
assert deepseek_models[1]["max_tokens"] == 64000
assert deepseek_models[1]["max_context_tokens"] == 128000
def test_openrouter_catalog_tracks_current_frontier_set():
openrouter_default = model_catalog.get_default_models()["openrouter"]
openrouter_models = model_catalog.get_models_catalogue()["openrouter"]
assert openrouter_default == "openai/gpt-5.4"
assert [model["id"] for model in openrouter_models] == [
"openai/gpt-5.4",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-opus-4.6",
"google/gemini-3.1-pro-preview",
"deepseek/deepseek-v3.2",
]
assert openrouter_models[0]["max_tokens"] == 128000
assert openrouter_models[0]["max_context_tokens"] == 922000
assert openrouter_models[1]["max_context_tokens"] == 936000
assert openrouter_models[2]["max_context_tokens"] == 872000
assert openrouter_models[3]["max_context_tokens"] == 1048576
assert openrouter_models[4]["max_context_tokens"] == 163840
def test_find_model_any_provider_returns_provider_and_model():
provider_id, model = model_catalog.find_model_any_provider("google/gemini-2.5-pro")
@@ -63,6 +179,15 @@ def test_get_preset_returns_subscription_specific_limits():
assert preset["api_base"] == "https://api.kimi.com/coding"
def test_minimax_preset_uses_current_default_model():
preset = model_catalog.get_preset("minimax_code")
assert preset is not None
assert preset["model"] == "MiniMax-M2.7"
assert preset["max_tokens"] == 32768
assert preset["max_context_tokens"] == 204800
def test_load_model_catalog_rejects_duplicate_model_ids(tmp_path, monkeypatch):
bad_catalog = {
"schema_version": 1,
+1 -1
View File
@@ -1369,7 +1369,7 @@ switch ($num) {
Apply-Preset "minimax_code"
Write-Host ""
Write-Ok "Using MiniMax coding key"
Write-Color -Text " Model: MiniMax-M2.5 | API: api.minimax.io" -Color DarkGray
Write-Color -Text " Model: MiniMax-M2.7 | API: api.minimax.io" -Color DarkGray
}
5 {
# Kimi Code Subscription
+1 -1
View File
@@ -1370,7 +1370,7 @@ case $choice in
SIGNUP_URL="https://platform.minimax.io/user-center/basic-information/interface-key"
echo ""
echo -e "${GREEN}${NC} Using MiniMax coding key"
echo -e " ${DIM}Model: MiniMax-M2.5 | API: api.minimax.io${NC}"
echo -e " ${DIM}Model: MiniMax-M2.7 | API: api.minimax.io${NC}"
;;
5)
# Kimi Code Subscription