From acca008772dc0a52608ada29e60284c07ad9710d Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Thu, 9 Apr 2026 11:59:41 -0700 Subject: [PATCH] feat: update provider config --- core/framework/llm/model_catalog.json | 213 +++++++++++++++++++------- core/tests/dummy_agents/run_all.py | 14 +- core/tests/test_model_catalog.py | 125 +++++++++++++++ quickstart.ps1 | 2 +- quickstart.sh | 2 +- 5 files changed, 293 insertions(+), 63 deletions(-) diff --git a/core/framework/llm/model_catalog.json b/core/framework/llm/model_catalog.json index 7f033017..47d6209f 100644 --- a/core/framework/llm/model_catalog.json +++ b/core/framework/llm/model_catalog.json @@ -73,76 +73,167 @@ ] }, "groq": { - "default_model": "moonshotai/kimi-k2-instruct-0905", + "default_model": "openai/gpt-oss-120b", "models": [ { - "id": "moonshotai/kimi-k2-instruct-0905", - "label": "Kimi K2 - Best quality", + "id": "openai/gpt-oss-120b", + "label": "GPT-OSS 120B - Best reasoning", "recommended": true, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 65536, + "max_context_tokens": 131072 }, { - "id": "openai/gpt-oss-120b", - "label": "GPT-OSS 120B - Fast reasoning", + "id": "openai/gpt-oss-20b", + "label": "GPT-OSS 20B - Fast + cheaper", "recommended": false, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 65536, + "max_context_tokens": 131072 + }, + { + "id": "llama-3.3-70b-versatile", + "label": "Llama 3.3 70B - General purpose", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 131072 + }, + { + "id": "llama-3.1-8b-instant", + "label": "Llama 3.1 8B - Fastest", + "recommended": false, + "max_tokens": 131072, + "max_context_tokens": 131072 } ] }, "cerebras": { - "default_model": "zai-glm-4.7", + "default_model": "gpt-oss-120b", "models": [ { - "id": "zai-glm-4.7", - "label": "ZAI-GLM 4.7 - Best quality", + "id": "gpt-oss-120b", + "label": "GPT-OSS 120B - Best production reasoning", "recommended": true, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 40960, + "max_context_tokens": 131072 }, { - "id": "qwen3-235b-a22b-instruct-2507", - "label": "Qwen3 235B - Frontier reasoning", + "id": "llama3.1-8b", + "label": "Llama 3.1 8B - Fastest production", "recommended": false, "max_tokens": 8192, - "max_context_tokens": 120000 + "max_context_tokens": 32768 + }, + { + "id": "zai-glm-4.7", + "label": "Z.ai GLM 4.7 - Strong coding preview", + "recommended": true, + "max_tokens": 40960, + "max_context_tokens": 131072 + }, + { + "id": "qwen-3-235b-a22b-instruct-2507", + "label": "Qwen 3 235B Instruct - Frontier preview", + "recommended": false, + "max_tokens": 40960, + "max_context_tokens": 131072 } ] }, "minimax": { - "default_model": "MiniMax-M2.5", + "default_model": "MiniMax-M2.7", "models": [ { - "id": "MiniMax-M2.5", - "label": "MiniMax-M2.5", + "id": "MiniMax-M2.7", + "label": "MiniMax M2.7 - Best coding quality", "recommended": true, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 32768, + "max_context_tokens": 204800 + }, + { + "id": "MiniMax-M2.7-highspeed", + "label": "MiniMax M2.7 Highspeed - Faster", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 204800 + }, + { + "id": "MiniMax-M2.5", + "label": "MiniMax M2.5 - Strong value", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 204800 + }, + { + "id": "MiniMax-M2.5-highspeed", + "label": "MiniMax M2.5 Highspeed - Faster", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 204800 } ] }, "mistral": { - "default_model": "mistral-large-latest", + "default_model": "mistral-large-2512", "models": [ { - "id": "mistral-large-latest", - "label": "Mistral Large", + "id": "mistral-large-2512", + "label": "Mistral Large 3 - Best quality", "recommended": true, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 32768, + "max_context_tokens": 256000 + }, + { + "id": "mistral-medium-2508", + "label": "Mistral Medium 3.1 - Balanced", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 128000 + }, + { + "id": "mistral-small-2603", + "label": "Mistral Small 4 - Fast + capable", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 256000 + }, + { + "id": "codestral-2508", + "label": "Codestral - Coding specialist", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 128000 } ] }, "together": { - "default_model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "default_model": "deepseek-ai/DeepSeek-V3.1", "models": [ { - "id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "label": "Llama 3.3 70B Turbo", + "id": "deepseek-ai/DeepSeek-V3.1", + "label": "DeepSeek V3.1 - Best general coding", + "recommended": true, + "max_tokens": 32768, + "max_context_tokens": 128000 + }, + { + "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "label": "Qwen3 Coder 480B - Advanced coding", "recommended": false, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 32768, + "max_context_tokens": 262144 + }, + { + "id": "openai/gpt-oss-120b", + "label": "GPT-OSS 120B - Strong reasoning", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 128000 + }, + { + "id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "label": "Llama 3.3 70B Turbo - Fast baseline", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 131072 } ] }, @@ -151,43 +242,57 @@ "models": [ { "id": "deepseek-chat", - "label": "DeepSeek Chat", - "recommended": false, + "label": "DeepSeek Chat - Fast default", + "recommended": true, "max_tokens": 8192, - "max_context_tokens": 120000 + "max_context_tokens": 128000 + }, + { + "id": "deepseek-reasoner", + "label": "DeepSeek Reasoner - Deep thinking", + "recommended": false, + "max_tokens": 64000, + "max_context_tokens": 128000 } ] }, "openrouter": { - "default_model": "google/gemini-2.5-pro", + "default_model": "openai/gpt-5.4", "models": [ { - "id": "google/gemini-2.5-pro", - "label": "Gemini 2.5 Pro", + "id": "openai/gpt-5.4", + "label": "GPT-5.4 - Best overall", "recommended": true, - "max_tokens": 8192, - "max_context_tokens": 900000 + "max_tokens": 128000, + "max_context_tokens": 922000 }, { - "id": "google/gemini-2.5-flash", - "label": "Gemini 2.5 Flash", + "id": "anthropic/claude-sonnet-4.6", + "label": "Claude Sonnet 4.6 - Best coding balance", "recommended": false, - "max_tokens": 8192, - "max_context_tokens": 900000 + "max_tokens": 64000, + "max_context_tokens": 936000 }, { - "id": "anthropic/claude-sonnet-4", - "label": "Claude Sonnet 4 (via OR)", + "id": "anthropic/claude-opus-4.6", + "label": "Claude Opus 4.6 - Most capable", "recommended": false, - "max_tokens": 8192, - "max_context_tokens": 180000 + "max_tokens": 128000, + "max_context_tokens": 872000 }, { - "id": "deepseek/deepseek-r1", - "label": "DeepSeek R1", + "id": "google/gemini-3.1-pro-preview", + "label": "Gemini 3.1 Pro Preview - Long-context reasoning", "recommended": false, - "max_tokens": 8192, - "max_context_tokens": 120000 + "max_tokens": 32768, + "max_context_tokens": 1048576 + }, + { + "id": "deepseek/deepseek-v3.2", + "label": "DeepSeek V3.2 - Best value", + "recommended": false, + "max_tokens": 32768, + "max_context_tokens": 163840 } ] } @@ -217,9 +322,9 @@ "minimax_code": { "provider": "minimax", "api_key_env_var": "MINIMAX_API_KEY", - "model": "MiniMax-M2.5", + "model": "MiniMax-M2.7", "max_tokens": 32768, - "max_context_tokens": 900000, + "max_context_tokens": 204800, "api_base": "https://api.minimax.io/v1" }, "kimi_code": { diff --git a/core/tests/dummy_agents/run_all.py b/core/tests/dummy_agents/run_all.py index f28c67e4..02427c69 100644 --- a/core/tests/dummy_agents/run_all.py +++ b/core/tests/dummy_agents/run_all.py @@ -43,19 +43,19 @@ API_KEY_PROVIDERS = [ ( "GROQ_API_KEY", "Groq", - "moonshotai/kimi-k2-instruct-0905", - "moonshotai/kimi-k2-instruct-0905", + "openai/gpt-oss-120b", + "openai/gpt-oss-120b", ), - ("MISTRAL_API_KEY", "Mistral", "mistral-large-latest", "mistral-large-latest"), - ("CEREBRAS_API_KEY", "Cerebras", "cerebras/zai-glm-4.7", "cerebras/zai-glm-4.7"), + ("MISTRAL_API_KEY", "Mistral", "mistral-large-2512", "mistral-large-2512"), + ("CEREBRAS_API_KEY", "Cerebras", "cerebras/gpt-oss-120b", "cerebras/gpt-oss-120b"), ( "TOGETHER_API_KEY", "Together AI", - "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", - "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", + "together_ai/deepseek-ai/DeepSeek-V3.1", + "together_ai/deepseek-ai/DeepSeek-V3.1", ), ("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat", "deepseek-chat"), - ("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5", "MiniMax-M2.5"), + ("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.7", "MiniMax-M2.7"), ("HIVE_API_KEY", "Hive LLM", "hive/queen", "hive/queen"), ] diff --git a/core/tests/test_model_catalog.py b/core/tests/test_model_catalog.py index d189d363..92ff9107 100644 --- a/core/tests/test_model_catalog.py +++ b/core/tests/test_model_catalog.py @@ -45,6 +45,122 @@ def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget() assert opus_46["max_context_tokens"] == 872000 +def test_groq_catalog_tracks_current_production_models(): + groq_default = model_catalog.get_default_models()["groq"] + groq_models = model_catalog.get_models_catalogue()["groq"] + + assert groq_default == "openai/gpt-oss-120b" + assert [model["id"] for model in groq_models] == [ + "openai/gpt-oss-120b", + "openai/gpt-oss-20b", + "llama-3.3-70b-versatile", + "llama-3.1-8b-instant", + ] + assert groq_models[0]["max_tokens"] == 65536 + assert groq_models[0]["max_context_tokens"] == 131072 + + +def test_cerebras_catalog_tracks_public_models_endpoint(): + cerebras_default = model_catalog.get_default_models()["cerebras"] + cerebras_models = model_catalog.get_models_catalogue()["cerebras"] + + assert cerebras_default == "gpt-oss-120b" + assert [model["id"] for model in cerebras_models] == [ + "gpt-oss-120b", + "llama3.1-8b", + "zai-glm-4.7", + "qwen-3-235b-a22b-instruct-2507", + ] + assert cerebras_models[0]["max_tokens"] == 40960 + assert cerebras_models[0]["max_context_tokens"] == 131072 + assert cerebras_models[1]["max_tokens"] == 8192 + assert cerebras_models[1]["max_context_tokens"] == 32768 + + +def test_minimax_catalog_tracks_current_non_legacy_text_models(): + minimax_default = model_catalog.get_default_models()["minimax"] + minimax_models = model_catalog.get_models_catalogue()["minimax"] + + assert minimax_default == "MiniMax-M2.7" + assert [model["id"] for model in minimax_models] == [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", + "MiniMax-M2.5", + "MiniMax-M2.5-highspeed", + ] + assert all(model["max_context_tokens"] == 204800 for model in minimax_models) + assert all(model["max_tokens"] == 32768 for model in minimax_models) + + +def test_mistral_catalog_tracks_current_curated_models(): + mistral_default = model_catalog.get_default_models()["mistral"] + mistral_models = model_catalog.get_models_catalogue()["mistral"] + + assert mistral_default == "mistral-large-2512" + assert [model["id"] for model in mistral_models] == [ + "mistral-large-2512", + "mistral-medium-2508", + "mistral-small-2603", + "codestral-2508", + ] + assert mistral_models[0]["max_context_tokens"] == 256000 + assert mistral_models[1]["max_context_tokens"] == 128000 + assert mistral_models[2]["max_context_tokens"] == 256000 + assert mistral_models[3]["max_context_tokens"] == 128000 + + +def test_together_catalog_tracks_current_serverless_recommendations(): + together_default = model_catalog.get_default_models()["together"] + together_models = model_catalog.get_models_catalogue()["together"] + + assert together_default == "deepseek-ai/DeepSeek-V3.1" + assert [model["id"] for model in together_models] == [ + "deepseek-ai/DeepSeek-V3.1", + "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "openai/gpt-oss-120b", + "meta-llama/Llama-3.3-70B-Instruct-Turbo", + ] + assert together_models[0]["max_context_tokens"] == 128000 + assert together_models[1]["max_context_tokens"] == 262144 + assert together_models[2]["max_context_tokens"] == 128000 + assert together_models[3]["max_context_tokens"] == 131072 + + +def test_deepseek_catalog_tracks_current_api_models(): + deepseek_default = model_catalog.get_default_models()["deepseek"] + deepseek_models = model_catalog.get_models_catalogue()["deepseek"] + + assert deepseek_default == "deepseek-chat" + assert [model["id"] for model in deepseek_models] == [ + "deepseek-chat", + "deepseek-reasoner", + ] + assert deepseek_models[0]["max_tokens"] == 8192 + assert deepseek_models[0]["max_context_tokens"] == 128000 + assert deepseek_models[1]["max_tokens"] == 64000 + assert deepseek_models[1]["max_context_tokens"] == 128000 + + +def test_openrouter_catalog_tracks_current_frontier_set(): + openrouter_default = model_catalog.get_default_models()["openrouter"] + openrouter_models = model_catalog.get_models_catalogue()["openrouter"] + + assert openrouter_default == "openai/gpt-5.4" + assert [model["id"] for model in openrouter_models] == [ + "openai/gpt-5.4", + "anthropic/claude-sonnet-4.6", + "anthropic/claude-opus-4.6", + "google/gemini-3.1-pro-preview", + "deepseek/deepseek-v3.2", + ] + assert openrouter_models[0]["max_tokens"] == 128000 + assert openrouter_models[0]["max_context_tokens"] == 922000 + assert openrouter_models[1]["max_context_tokens"] == 936000 + assert openrouter_models[2]["max_context_tokens"] == 872000 + assert openrouter_models[3]["max_context_tokens"] == 1048576 + assert openrouter_models[4]["max_context_tokens"] == 163840 + + def test_find_model_any_provider_returns_provider_and_model(): provider_id, model = model_catalog.find_model_any_provider("google/gemini-2.5-pro") @@ -63,6 +179,15 @@ def test_get_preset_returns_subscription_specific_limits(): assert preset["api_base"] == "https://api.kimi.com/coding" +def test_minimax_preset_uses_current_default_model(): + preset = model_catalog.get_preset("minimax_code") + + assert preset is not None + assert preset["model"] == "MiniMax-M2.7" + assert preset["max_tokens"] == 32768 + assert preset["max_context_tokens"] == 204800 + + def test_load_model_catalog_rejects_duplicate_model_ids(tmp_path, monkeypatch): bad_catalog = { "schema_version": 1, diff --git a/quickstart.ps1 b/quickstart.ps1 index dc19682c..01eaff40 100644 --- a/quickstart.ps1 +++ b/quickstart.ps1 @@ -1369,7 +1369,7 @@ switch ($num) { Apply-Preset "minimax_code" Write-Host "" Write-Ok "Using MiniMax coding key" - Write-Color -Text " Model: MiniMax-M2.5 | API: api.minimax.io" -Color DarkGray + Write-Color -Text " Model: MiniMax-M2.7 | API: api.minimax.io" -Color DarkGray } 5 { # Kimi Code Subscription diff --git a/quickstart.sh b/quickstart.sh index 165e69d9..d825a0ae 100755 --- a/quickstart.sh +++ b/quickstart.sh @@ -1370,7 +1370,7 @@ case $choice in SIGNUP_URL="https://platform.minimax.io/user-center/basic-information/interface-key" echo "" echo -e "${GREEN}⬢${NC} Using MiniMax coding key" - echo -e " ${DIM}Model: MiniMax-M2.5 | API: api.minimax.io${NC}" + echo -e " ${DIM}Model: MiniMax-M2.7 | API: api.minimax.io${NC}" ;; 5) # Kimi Code Subscription