feat: model support
This commit is contained in:
@@ -228,22 +228,16 @@ def _vision_fallback_active(model: str | None) -> bool:
|
|||||||
"""Return True if tool-result images for *model* should be routed
|
"""Return True if tool-result images for *model* should be routed
|
||||||
through the vision-fallback chain rather than sent to the model.
|
through the vision-fallback chain rather than sent to the model.
|
||||||
|
|
||||||
Trigger: the model appears in Hive's curated text-only deny list
|
Trigger: the model's catalog entry has ``supports_vision: false``
|
||||||
(``capabilities.supports_image_tool_results`` returns False).
|
(resolved via :func:`capabilities.supports_image_tool_results`,
|
||||||
That list is the only reliable signal — LiteLLM's
|
which reads ``model_catalog.json``). Unknown models default to
|
||||||
``supports_vision`` returns False for any unknown model
|
vision-capable, so the fallback only fires when the catalog
|
||||||
(including custom-served vision-capable models like Jackrong/Qwopus3.5)
|
explicitly says the model is text-only.
|
||||||
so it cannot be used as a gate; and LiteLLM's openai chat
|
|
||||||
transformer doesn't strip image blocks anyway, so passing them
|
|
||||||
through to a vision-capable but litellm-unrecognised model still
|
|
||||||
works end-to-end.
|
|
||||||
|
|
||||||
The ``vision_fallback`` config block is the *substitution* model —
|
The ``vision_fallback`` config block is the *substitution* model —
|
||||||
it doesn't widen the trigger. To force fallback for a model the
|
it doesn't widen the trigger. To force fallback for a model that
|
||||||
deny list doesn't cover yet, add it to
|
isn't catalogued yet, add an entry to ``model_catalog.json`` with
|
||||||
``capabilities._TEXT_ONLY_MODEL_BARE_PREFIXES`` /
|
``supports_vision: false`` rather than relying on a runtime config.
|
||||||
``_TEXT_ONLY_PROVIDER_PREFIXES`` rather than relying on a runtime
|
|
||||||
config.
|
|
||||||
"""
|
"""
|
||||||
if not model:
|
if not model:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
"""Vision-fallback subagent for tool-result images on text-only LLMs.
|
"""Vision-fallback subagent for tool-result images on text-only LLMs.
|
||||||
|
|
||||||
When a tool returns image content but the main agent's model can't
|
When a tool returns image content but the main agent's model can't
|
||||||
accept image blocks (per ``supports_image_tool_results``), the framework
|
accept image blocks (i.e. its catalog entry has ``supports_vision: false``),
|
||||||
strips the images before they ever reach the LLM. Without this module,
|
the framework strips the images before they ever reach the LLM. Without
|
||||||
the agent then sees only the tool's text envelope (URL, dimensions,
|
this module, the agent then sees only the tool's text envelope (URL,
|
||||||
size) and is blind to whatever the image actually shows.
|
dimensions, size) and is blind to whatever the image actually shows.
|
||||||
|
|
||||||
This module provides:
|
This module provides:
|
||||||
|
|
||||||
|
|||||||
@@ -1,114 +1,32 @@
|
|||||||
"""Model capability checks for LLM providers.
|
"""Model capability checks for LLM providers.
|
||||||
|
|
||||||
Vision support rules are derived from official vendor documentation:
|
Vision support is sourced from the curated ``model_catalog.json``. Each model
|
||||||
- ZAI (z.ai): docs.z.ai/guides/vlm — GLM-4.6V variants are vision; GLM-5/4.6/4.7 are text-only
|
entry carries an optional ``supports_vision`` boolean; unknown models default
|
||||||
- MiniMax: platform.minimax.io/docs — minimax-vl-01 is vision; M2.x are text-only
|
to vision-capable so hosted frontier models work out of the box. To toggle
|
||||||
- DeepSeek: api-docs.deepseek.com — deepseek-vl2 is vision; chat/reasoner are text-only
|
support for a model, edit its catalog entry rather than this file.
|
||||||
- Cerebras: inference-docs.cerebras.ai — no vision models at all
|
|
||||||
- Groq: console.groq.com/docs/vision — vision capable; treat as supported by default
|
|
||||||
- Ollama/LM Studio/vLLM/llama.cpp: local runners denied by default; model names
|
|
||||||
don't reliably indicate vision support, so users must configure explicitly
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from framework.llm.model_catalog import model_supports_vision
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.llm.provider import Tool
|
from framework.llm.provider import Tool
|
||||||
|
|
||||||
|
|
||||||
def _model_name(model: str) -> str:
|
|
||||||
"""Return the bare model name after stripping any 'provider/' prefix."""
|
|
||||||
if "/" in model:
|
|
||||||
return model.split("/", 1)[1]
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
# Step 1: explicit vision allow-list — these always support images regardless
|
|
||||||
# of what the provider-level rules say. Checked first so that e.g. glm-4.6v
|
|
||||||
# is allowed even though glm-4.6 is denied.
|
|
||||||
_VISION_ALLOW_BARE_PREFIXES: tuple[str, ...] = (
|
|
||||||
# ZAI/GLM vision models (docs.z.ai/guides/vlm)
|
|
||||||
"glm-4v", # GLM-4V series (legacy)
|
|
||||||
"glm-4.6v", # GLM-4.6V, GLM-4.6V-flash, GLM-4.6V-flashx
|
|
||||||
# DeepSeek vision models
|
|
||||||
"deepseek-vl", # deepseek-vl2, deepseek-vl2-small, deepseek-vl2-tiny
|
|
||||||
# MiniMax vision model
|
|
||||||
"minimax-vl", # minimax-vl-01
|
|
||||||
)
|
|
||||||
|
|
||||||
# Step 2: provider-level deny — every model from this provider is text-only.
|
|
||||||
_TEXT_ONLY_PROVIDER_PREFIXES: tuple[str, ...] = (
|
|
||||||
# Cerebras: inference-docs.cerebras.ai lists only text models
|
|
||||||
"cerebras/",
|
|
||||||
# Local runners: model names don't reliably indicate vision support
|
|
||||||
"ollama/",
|
|
||||||
"ollama_chat/",
|
|
||||||
"lm_studio/",
|
|
||||||
"vllm/",
|
|
||||||
"llamacpp/",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Step 3: per-model deny — text-only models within otherwise mixed providers.
|
|
||||||
# Matched against the bare model name (provider prefix stripped, lower-cased).
|
|
||||||
# The vision allow-list above is checked first, so vision variants of the same
|
|
||||||
# family are already handled before these deny patterns are reached.
|
|
||||||
_TEXT_ONLY_MODEL_BARE_PREFIXES: tuple[str, ...] = (
|
|
||||||
# --- ZAI / GLM family ---
|
|
||||||
# text-only: glm-5, glm-4.6, glm-4.7, glm-4.5, zai-glm-*
|
|
||||||
# vision: glm-4v, glm-4.6v (caught by allow-list above)
|
|
||||||
"glm-5",
|
|
||||||
"glm-4.6", # bare glm-4.6 is text-only; glm-4.6v is caught by allow-list
|
|
||||||
"glm-4.7",
|
|
||||||
"glm-4.5",
|
|
||||||
"zai-glm",
|
|
||||||
# --- DeepSeek ---
|
|
||||||
# text-only: deepseek-chat, deepseek-coder, deepseek-reasoner
|
|
||||||
# vision: deepseek-vl2 (caught by allow-list above)
|
|
||||||
# Note: LiteLLM's deepseek handler may flatten content lists for some models;
|
|
||||||
# VL models are allowed through and rely on LiteLLM's native VL support.
|
|
||||||
"deepseek-chat",
|
|
||||||
"deepseek-coder",
|
|
||||||
"deepseek-reasoner",
|
|
||||||
# --- MiniMax ---
|
|
||||||
# text-only: minimax-m2.*, minimax-text-*, abab* (legacy)
|
|
||||||
# vision: minimax-vl-01 (caught by allow-list above)
|
|
||||||
"minimax-m2",
|
|
||||||
"minimax-text",
|
|
||||||
"abab",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def supports_image_tool_results(model: str) -> bool:
|
def supports_image_tool_results(model: str) -> bool:
|
||||||
"""Return whether *model* can receive image content in messages.
|
"""Return whether *model* can receive image content in messages.
|
||||||
|
|
||||||
Used to gate both user-message images and tool-result image blocks.
|
Thin wrapper over :func:`model_supports_vision` so existing call sites
|
||||||
|
keep working. Used to gate both user-message images and tool-result
|
||||||
Logic (checked in order):
|
image blocks. Empty model strings are treated as capable so the default
|
||||||
1. Vision allow-list → True (known vision model, skip all denies)
|
code path doesn't strip images before a provider is selected.
|
||||||
2. Provider deny → False (entire provider is text-only)
|
|
||||||
3. Model deny → False (specific text-only model within a mixed provider)
|
|
||||||
4. Default → True (assume capable; unknown providers and models)
|
|
||||||
"""
|
"""
|
||||||
model_lower = model.lower()
|
if not model:
|
||||||
bare = _model_name(model_lower)
|
|
||||||
|
|
||||||
# 1. Explicit vision allow — takes priority over all denies
|
|
||||||
if any(bare.startswith(p) for p in _VISION_ALLOW_BARE_PREFIXES):
|
|
||||||
return True
|
return True
|
||||||
|
return model_supports_vision(model)
|
||||||
# 2. Provider-level deny (all models from this provider are text-only)
|
|
||||||
if any(model_lower.startswith(p) for p in _TEXT_ONLY_PROVIDER_PREFIXES):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 3. Per-model deny (text-only variants within mixed-capability families)
|
|
||||||
if any(bare.startswith(p) for p in _TEXT_ONLY_MODEL_BARE_PREFIXES):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 5. Default: assume vision capable
|
|
||||||
# Covers: OpenAI, Anthropic, Google, Mistral, Kimi, and other hosted providers
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def filter_tools_for_model(tools: list[Tool], model: str) -> tuple[list[Tool], list[str]]:
|
def filter_tools_for_model(tools: list[Tool], model: str) -> tuple[list[Tool], list[str]]:
|
||||||
|
|||||||
@@ -9,47 +9,65 @@
|
|||||||
"label": "Haiku 4.5 - Fast + cheap",
|
"label": "Haiku 4.5 - Fast + cheap",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 64000,
|
"max_tokens": 64000,
|
||||||
"max_context_tokens": 136000
|
"max_context_tokens": 136000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "claude-sonnet-4-5-20250929",
|
"id": "claude-sonnet-4-5-20250929",
|
||||||
"label": "Sonnet 4.5 - Best balance",
|
"label": "Sonnet 4.5 - Best balance",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 64000,
|
"max_tokens": 64000,
|
||||||
"max_context_tokens": 136000
|
"max_context_tokens": 136000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "claude-opus-4-6",
|
"id": "claude-opus-4-6",
|
||||||
"label": "Opus 4.6 - Most capable",
|
"label": "Opus 4.6 - Most capable",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 872000
|
"max_context_tokens": 872000,
|
||||||
|
"supports_vision": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"openai": {
|
"openai": {
|
||||||
"default_model": "gpt-5.4",
|
"default_model": "gpt-5.5",
|
||||||
"models": [
|
"models": [
|
||||||
{
|
{
|
||||||
"id": "gpt-5.4",
|
"id": "gpt-5.5",
|
||||||
"label": "GPT-5.4 - Best intelligence",
|
"label": "GPT-5.5 - Frontier coding + reasoning",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 960000
|
"max_context_tokens": 1050000,
|
||||||
|
"pricing_usd_per_mtok": {
|
||||||
|
"input": 5.00,
|
||||||
|
"output": 30.00
|
||||||
|
},
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4",
|
||||||
|
"label": "GPT-5.4 - Previous flagship",
|
||||||
|
"recommended": false,
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_context_tokens": 960000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "gpt-5.4-mini",
|
"id": "gpt-5.4-mini",
|
||||||
"label": "GPT-5.4 Mini - Faster + cheaper",
|
"label": "GPT-5.4 Mini - Faster + cheaper",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 400000
|
"max_context_tokens": 400000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "gpt-5.4-nano",
|
"id": "gpt-5.4-nano",
|
||||||
"label": "GPT-5.4 Nano - Cheapest high-volume",
|
"label": "GPT-5.4 Nano - Cheapest high-volume",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 400000
|
"max_context_tokens": 400000,
|
||||||
|
"supports_vision": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -61,14 +79,16 @@
|
|||||||
"label": "Gemini 3 Flash - Fast",
|
"label": "Gemini 3 Flash - Fast",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 240000
|
"max_context_tokens": 240000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "gemini-3.1-pro-preview-customtools",
|
"id": "gemini-3.1-pro-preview-customtools",
|
||||||
"label": "Gemini 3.1 Pro - Best quality",
|
"label": "Gemini 3.1 Pro - Best quality",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 240000
|
"max_context_tokens": 240000,
|
||||||
|
"supports_vision": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -80,28 +100,32 @@
|
|||||||
"label": "GPT-OSS 120B - Best reasoning",
|
"label": "GPT-OSS 120B - Best reasoning",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "openai/gpt-oss-20b",
|
"id": "openai/gpt-oss-20b",
|
||||||
"label": "GPT-OSS 20B - Fast + cheaper",
|
"label": "GPT-OSS 20B - Fast + cheaper",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "llama-3.3-70b-versatile",
|
"id": "llama-3.3-70b-versatile",
|
||||||
"label": "Llama 3.3 70B - General purpose",
|
"label": "Llama 3.3 70B - General purpose",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "llama-3.1-8b-instant",
|
"id": "llama-3.1-8b-instant",
|
||||||
"label": "Llama 3.1 8B - Fastest",
|
"label": "Llama 3.1 8B - Fastest",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 131072,
|
"max_tokens": 131072,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -113,21 +137,24 @@
|
|||||||
"label": "GPT-OSS 120B - Best production reasoning",
|
"label": "GPT-OSS 120B - Best production reasoning",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 40960,
|
"max_tokens": 40960,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "zai-glm-4.7",
|
"id": "zai-glm-4.7",
|
||||||
"label": "Z.ai GLM 4.7 - Strong coding preview",
|
"label": "Z.ai GLM 4.7 - Strong coding preview",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 40960,
|
"max_tokens": 40960,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "qwen-3-235b-a22b-instruct-2507",
|
"id": "qwen-3-235b-a22b-instruct-2507",
|
||||||
"label": "Qwen 3 235B Instruct - Frontier preview",
|
"label": "Qwen 3 235B Instruct - Frontier preview",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 40960,
|
"max_tokens": 40960,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -143,14 +170,16 @@
|
|||||||
"pricing_usd_per_mtok": {
|
"pricing_usd_per_mtok": {
|
||||||
"input": 0.30,
|
"input": 0.30,
|
||||||
"output": 1.20
|
"output": 1.20
|
||||||
}
|
},
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "MiniMax-M2.5",
|
"id": "MiniMax-M2.5",
|
||||||
"label": "MiniMax M2.5 - Strong value",
|
"label": "MiniMax M2.5 - Strong value",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 40960,
|
"max_tokens": 40960,
|
||||||
"max_context_tokens": 180000
|
"max_context_tokens": 180000,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -162,28 +191,32 @@
|
|||||||
"label": "Mistral Large 3 - Best quality",
|
"label": "Mistral Large 3 - Best quality",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 256000
|
"max_context_tokens": 256000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "mistral-medium-2508",
|
"id": "mistral-medium-2508",
|
||||||
"label": "Mistral Medium 3.1 - Balanced",
|
"label": "Mistral Medium 3.1 - Balanced",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 128000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "mistral-small-2603",
|
"id": "mistral-small-2603",
|
||||||
"label": "Mistral Small 4 - Fast + capable",
|
"label": "Mistral Small 4 - Fast + capable",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 256000
|
"max_context_tokens": 256000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "codestral-2508",
|
"id": "codestral-2508",
|
||||||
"label": "Codestral - Coding specialist",
|
"label": "Codestral - Coding specialist",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 128000,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -195,47 +228,71 @@
|
|||||||
"label": "DeepSeek V3.1 - Best general coding",
|
"label": "DeepSeek V3.1 - Best general coding",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 128000,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
|
"id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
|
||||||
"label": "Qwen3 Coder 480B - Advanced coding",
|
"label": "Qwen3 Coder 480B - Advanced coding",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 262144
|
"max_context_tokens": 262144,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "openai/gpt-oss-120b",
|
"id": "openai/gpt-oss-120b",
|
||||||
"label": "GPT-OSS 120B - Strong reasoning",
|
"label": "GPT-OSS 120B - Strong reasoning",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 128000,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||||
"label": "Llama 3.3 70B Turbo - Fast baseline",
|
"label": "Llama 3.3 70B Turbo - Fast baseline",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 131072
|
"max_context_tokens": 131072,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"deepseek": {
|
"deepseek": {
|
||||||
"default_model": "deepseek-chat",
|
"default_model": "deepseek-v4-pro",
|
||||||
"models": [
|
"models": [
|
||||||
{
|
{
|
||||||
"id": "deepseek-chat",
|
"id": "deepseek-v4-pro",
|
||||||
"label": "DeepSeek Chat - Fast default",
|
"label": "DeepSeek V4 Pro - Most capable",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 8192,
|
"max_tokens": 384000,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 1000000,
|
||||||
|
"pricing_usd_per_mtok": {
|
||||||
|
"input": 1.74,
|
||||||
|
"output": 3.48,
|
||||||
|
"cache_read": 0.145
|
||||||
|
},
|
||||||
|
"supports_vision": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "deepseek-v4-flash",
|
||||||
|
"label": "DeepSeek V4 Flash - Fast + cheap",
|
||||||
|
"recommended": true,
|
||||||
|
"max_tokens": 384000,
|
||||||
|
"max_context_tokens": 1000000,
|
||||||
|
"pricing_usd_per_mtok": {
|
||||||
|
"input": 0.14,
|
||||||
|
"output": 0.28,
|
||||||
|
"cache_read": 0.028
|
||||||
|
},
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "deepseek-reasoner",
|
"id": "deepseek-reasoner",
|
||||||
"label": "DeepSeek Reasoner - Deep thinking",
|
"label": "DeepSeek Reasoner - Legacy (deprecating)",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 64000,
|
"max_tokens": 64000,
|
||||||
"max_context_tokens": 128000
|
"max_context_tokens": 128000,
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -252,7 +309,8 @@
|
|||||||
"input": 0.60,
|
"input": 0.60,
|
||||||
"output": 2.50,
|
"output": 2.50,
|
||||||
"cache_read": 0.15
|
"cache_read": 0.15
|
||||||
}
|
},
|
||||||
|
"supports_vision": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -264,14 +322,16 @@
|
|||||||
"label": "Queen - Hive native",
|
"label": "Queen - Hive native",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 180000
|
"max_context_tokens": 180000,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "kimi-2.5",
|
"id": "kimi-2.5",
|
||||||
"label": "Kimi 2.5 - Via Hive",
|
"label": "Kimi 2.5 - Via Hive",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 240000
|
"max_context_tokens": 240000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "glm-5.1",
|
"id": "glm-5.1",
|
||||||
@@ -284,7 +344,8 @@
|
|||||||
"output": 4.40,
|
"output": 4.40,
|
||||||
"cache_read": 0.26,
|
"cache_read": 0.26,
|
||||||
"cache_creation": 0.0
|
"cache_creation": 0.0
|
||||||
}
|
},
|
||||||
|
"supports_vision": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -296,42 +357,48 @@
|
|||||||
"label": "GPT-5.4 - Best overall",
|
"label": "GPT-5.4 - Best overall",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 872000
|
"max_context_tokens": 872000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "anthropic/claude-sonnet-4.6",
|
"id": "anthropic/claude-sonnet-4.6",
|
||||||
"label": "Claude Sonnet 4.6 - Best coding balance",
|
"label": "Claude Sonnet 4.6 - Best coding balance",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 64000,
|
"max_tokens": 64000,
|
||||||
"max_context_tokens": 872000
|
"max_context_tokens": 872000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "anthropic/claude-opus-4.6",
|
"id": "anthropic/claude-opus-4.6",
|
||||||
"label": "Claude Opus 4.6 - Most capable",
|
"label": "Claude Opus 4.6 - Most capable",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_context_tokens": 872000
|
"max_context_tokens": 872000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "google/gemini-3.1-pro-preview-customtools",
|
"id": "google/gemini-3.1-pro-preview-customtools",
|
||||||
"label": "Gemini 3.1 Pro Preview - Long-context reasoning",
|
"label": "Gemini 3.1 Pro Preview - Long-context reasoning",
|
||||||
"recommended": false,
|
"recommended": false,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 872000
|
"max_context_tokens": 872000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "qwen/qwen3.6-plus",
|
"id": "qwen/qwen3.6-plus",
|
||||||
"label": "Qwen 3.6 Plus - Strong reasoning",
|
"label": "Qwen 3.6 Plus - Strong reasoning",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 240000
|
"max_context_tokens": 240000,
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "z-ai/glm-5v-turbo",
|
"id": "z-ai/glm-5v-turbo",
|
||||||
"label": "GLM-5V Turbo - Vision capable",
|
"label": "GLM-5V Turbo - Vision capable",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_context_tokens": 192000
|
"max_context_tokens": 192000,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "z-ai/glm-5.1",
|
"id": "z-ai/glm-5.1",
|
||||||
@@ -344,7 +411,8 @@
|
|||||||
"output": 4.40,
|
"output": 4.40,
|
||||||
"cache_read": 0.26,
|
"cache_read": 0.26,
|
||||||
"cache_creation": 0.0
|
"cache_creation": 0.0
|
||||||
}
|
},
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "minimax/minimax-m2.7",
|
"id": "minimax/minimax-m2.7",
|
||||||
@@ -355,14 +423,16 @@
|
|||||||
"pricing_usd_per_mtok": {
|
"pricing_usd_per_mtok": {
|
||||||
"input": 0.30,
|
"input": 0.30,
|
||||||
"output": 1.20
|
"output": 1.20
|
||||||
}
|
},
|
||||||
|
"supports_vision": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "xiaomi/mimo-v2-pro",
|
"id": "xiaomi/mimo-v2-pro",
|
||||||
"label": "MiMo V2 Pro - Xiaomi multimodal",
|
"label": "MiMo V2 Pro - Xiaomi multimodal",
|
||||||
"recommended": true,
|
"recommended": true,
|
||||||
"max_tokens": 64000,
|
"max_tokens": 64000,
|
||||||
"max_context_tokens": 240000
|
"max_context_tokens": 240000,
|
||||||
|
"supports_vision": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -95,6 +95,10 @@ def _validate_model_catalog(data: dict[str, Any]) -> dict[str, Any]:
|
|||||||
if pricing is not None:
|
if pricing is not None:
|
||||||
_validate_pricing(pricing, f"{model_path}.pricing_usd_per_mtok")
|
_validate_pricing(pricing, f"{model_path}.pricing_usd_per_mtok")
|
||||||
|
|
||||||
|
supports_vision = model_map.get("supports_vision")
|
||||||
|
if supports_vision is not None and not isinstance(supports_vision, bool):
|
||||||
|
raise ModelCatalogError(f"{model_path}.supports_vision must be a boolean when present")
|
||||||
|
|
||||||
if not default_found:
|
if not default_found:
|
||||||
raise ModelCatalogError(
|
raise ModelCatalogError(
|
||||||
f"{provider_path}.default_model={default_model!r} is not present in {provider_path}.models"
|
f"{provider_path}.default_model={default_model!r} is not present in {provider_path}.models"
|
||||||
@@ -229,6 +233,34 @@ def get_model_pricing(model_id: str) -> dict[str, float] | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def model_supports_vision(model_id: str) -> bool:
|
||||||
|
"""Return whether *model_id* supports image inputs per the curated catalog.
|
||||||
|
|
||||||
|
Looks up the bare model id (and the provider-prefix-stripped form) in the
|
||||||
|
catalog. Returns the model's ``supports_vision`` flag when found, defaulting
|
||||||
|
to ``True`` for unknown models or when the flag is absent — assume vision
|
||||||
|
capable for hosted providers, since modern frontier models support images
|
||||||
|
by default and the captioning fallback is more expensive than just letting
|
||||||
|
the provider handle the image.
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return True
|
||||||
|
|
||||||
|
candidates = [model_id]
|
||||||
|
if "/" in model_id:
|
||||||
|
candidates.append(model_id.split("/", 1)[1])
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
for provider_info in load_model_catalog()["providers"].values():
|
||||||
|
for model in provider_info["models"]:
|
||||||
|
if model["id"] == candidate:
|
||||||
|
flag = model.get("supports_vision")
|
||||||
|
if isinstance(flag, bool):
|
||||||
|
return flag
|
||||||
|
return True
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_preset(preset_id: str) -> dict[str, Any] | None:
|
def get_preset(preset_id: str) -> dict[str, Any] | None:
|
||||||
"""Return one preset entry."""
|
"""Return one preset entry."""
|
||||||
preset = load_model_catalog()["presets"].get(preset_id)
|
preset = load_model_catalog()["presets"].get(preset_id)
|
||||||
|
|||||||
@@ -24,12 +24,12 @@ def test_default_models_exist_in_each_provider_catalogue():
|
|||||||
|
|
||||||
|
|
||||||
def test_find_model_returns_curated_token_limits():
|
def test_find_model_returns_curated_token_limits():
|
||||||
model = model_catalog.find_model("openai", "gpt-5.4")
|
model = model_catalog.find_model("openai", "gpt-5.5")
|
||||||
|
|
||||||
assert model is not None
|
assert model is not None
|
||||||
assert model["label"] == "GPT-5.4 - Best intelligence"
|
assert model["label"] == "GPT-5.5 - Frontier coding + reasoning"
|
||||||
assert model["max_tokens"] == 128000
|
assert model["max_tokens"] == 128000
|
||||||
assert model["max_context_tokens"] == 960000
|
assert model["max_context_tokens"] == 1050000
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget():
|
def test_anthropic_curated_limits_track_documented_caps_with_safe_input_budget():
|
||||||
@@ -125,15 +125,22 @@ def test_deepseek_catalog_tracks_current_api_models():
|
|||||||
deepseek_default = model_catalog.get_default_models()["deepseek"]
|
deepseek_default = model_catalog.get_default_models()["deepseek"]
|
||||||
deepseek_models = model_catalog.get_models_catalogue()["deepseek"]
|
deepseek_models = model_catalog.get_models_catalogue()["deepseek"]
|
||||||
|
|
||||||
assert deepseek_default == "deepseek-chat"
|
assert deepseek_default == "deepseek-v4-pro"
|
||||||
assert [model["id"] for model in deepseek_models] == [
|
assert [model["id"] for model in deepseek_models] == [
|
||||||
"deepseek-chat",
|
"deepseek-v4-pro",
|
||||||
|
"deepseek-v4-flash",
|
||||||
"deepseek-reasoner",
|
"deepseek-reasoner",
|
||||||
]
|
]
|
||||||
assert deepseek_models[0]["max_tokens"] == 8192
|
# V4 family — 1M context, 384k max output, mirrors api-docs.deepseek.com pricing.
|
||||||
assert deepseek_models[0]["max_context_tokens"] == 128000
|
assert deepseek_models[0]["max_tokens"] == 384000
|
||||||
assert deepseek_models[1]["max_tokens"] == 64000
|
assert deepseek_models[0]["max_context_tokens"] == 1000000
|
||||||
assert deepseek_models[1]["max_context_tokens"] == 128000
|
assert deepseek_models[0]["pricing_usd_per_mtok"]["input"] == 1.74
|
||||||
|
assert deepseek_models[0]["pricing_usd_per_mtok"]["output"] == 3.48
|
||||||
|
assert deepseek_models[1]["pricing_usd_per_mtok"]["input"] == 0.14
|
||||||
|
assert deepseek_models[1]["pricing_usd_per_mtok"]["output"] == 0.28
|
||||||
|
# Legacy reasoner kept for back-compat while users migrate.
|
||||||
|
assert deepseek_models[2]["max_tokens"] == 64000
|
||||||
|
assert deepseek_models[2]["max_context_tokens"] == 128000
|
||||||
|
|
||||||
|
|
||||||
def test_openrouter_catalog_tracks_current_frontier_set():
|
def test_openrouter_catalog_tracks_current_frontier_set():
|
||||||
|
|||||||
+65
-17
@@ -1352,9 +1352,11 @@ fi
|
|||||||
echo ""
|
echo ""
|
||||||
echo -e " ${CYAN}${BOLD}API key providers:${NC}"
|
echo -e " ${CYAN}${BOLD}API key providers:${NC}"
|
||||||
|
|
||||||
# 8-13) API key providers — show (credential detected) if key already set
|
# 8-N) API key providers — show (credential detected) if key already set.
|
||||||
PROVIDER_MENU_ENVS=(ANTHROPIC_API_KEY OPENAI_API_KEY GEMINI_API_KEY GROQ_API_KEY CEREBRAS_API_KEY OPENROUTER_API_KEY)
|
# Order is reflected directly in the menu numbering; the case dispatcher
|
||||||
PROVIDER_MENU_NAMES=("Anthropic (Claude) - Recommended" "OpenAI (GPT)" "Google Gemini - Free tier available" "Groq - Fast, free tier" "Cerebras - Fast, free tier" "OpenRouter - Bring any OpenRouter model")
|
# below resolves choice numbers via $((8 + index_in_arrays)).
|
||||||
|
PROVIDER_MENU_ENVS=(ANTHROPIC_API_KEY OPENAI_API_KEY GEMINI_API_KEY GROQ_API_KEY CEREBRAS_API_KEY OPENROUTER_API_KEY DEEPSEEK_API_KEY)
|
||||||
|
PROVIDER_MENU_NAMES=("Anthropic (Claude) - Recommended" "OpenAI (GPT)" "Google Gemini - Free tier available" "Groq - Fast, free tier" "Cerebras - Fast, free tier" "OpenRouter - Bring any OpenRouter model" "DeepSeek - V4 family")
|
||||||
for idx in "${!PROVIDER_MENU_ENVS[@]}"; do
|
for idx in "${!PROVIDER_MENU_ENVS[@]}"; do
|
||||||
num=$((idx + 8))
|
num=$((idx + 8))
|
||||||
env_var="${PROVIDER_MENU_ENVS[$idx]}"
|
env_var="${PROVIDER_MENU_ENVS[$idx]}"
|
||||||
@@ -1365,14 +1367,16 @@ for idx in "${!PROVIDER_MENU_ENVS[@]}"; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# 14) Local (Ollama) — no API key needed
|
# Local (Ollama) — slot computed from the provider list so adding/removing
|
||||||
|
# API-key providers above doesn't require renumbering by hand.
|
||||||
|
OLLAMA_CHOICE=$((8 + ${#PROVIDER_MENU_ENVS[@]}))
|
||||||
if [ "$OLLAMA_DETECTED" = true ]; then
|
if [ "$OLLAMA_DETECTED" = true ]; then
|
||||||
echo -e " ${CYAN}14)${NC} Local (Ollama) - No API key needed ${GREEN}(ollama detected)${NC}"
|
echo -e " ${CYAN}$OLLAMA_CHOICE)${NC} Local (Ollama) - No API key needed ${GREEN}(ollama detected)${NC}"
|
||||||
else
|
else
|
||||||
echo -e " ${CYAN}14)${NC} Local (Ollama) - No API key needed"
|
echo -e " ${CYAN}$OLLAMA_CHOICE)${NC} Local (Ollama) - No API key needed"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
SKIP_CHOICE=$((8 + ${#PROVIDER_MENU_ENVS[@]} + 1))
|
SKIP_CHOICE=$((OLLAMA_CHOICE + 1))
|
||||||
echo -e " ${CYAN}$SKIP_CHOICE)${NC} Skip for now"
|
echo -e " ${CYAN}$SKIP_CHOICE)${NC} Skip for now"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
@@ -1578,6 +1582,13 @@ case $choice in
|
|||||||
SIGNUP_URL="https://openrouter.ai/keys"
|
SIGNUP_URL="https://openrouter.ai/keys"
|
||||||
;;
|
;;
|
||||||
14)
|
14)
|
||||||
|
SELECTED_ENV_VAR="DEEPSEEK_API_KEY"
|
||||||
|
SELECTED_PROVIDER_ID="deepseek"
|
||||||
|
SELECTED_API_BASE="https://api.deepseek.com"
|
||||||
|
PROVIDER_NAME="DeepSeek"
|
||||||
|
SIGNUP_URL="https://platform.deepseek.com/api_keys"
|
||||||
|
;;
|
||||||
|
"$OLLAMA_CHOICE")
|
||||||
# Local (Ollama) — no API key; pick model from ollama list
|
# Local (Ollama) — no API key; pick model from ollama list
|
||||||
if [ "$OLLAMA_DETECTED" != true ]; then
|
if [ "$OLLAMA_DETECTED" != true ]; then
|
||||||
echo ""
|
echo ""
|
||||||
@@ -1824,12 +1835,29 @@ echo ""
|
|||||||
# image through a separate VLM subagent that returns a text caption,
|
# image through a separate VLM subagent that returns a text caption,
|
||||||
# preserving the agent's ability to reason about visual state.
|
# preserving the agent's ability to reason about visual state.
|
||||||
#
|
#
|
||||||
# We always offer the prompt — even for vision-capable main models —
|
# Skip entirely when the chosen main model already supports vision per
|
||||||
# so the user gets a working fallback if they ever swap to a text-only
|
# the catalog's ``supports_vision`` flag — the fallback would never fire
|
||||||
# model. The block is dormant for vision-capable mains (the gating
|
# in that case, and prompting for it just adds friction. For text-only
|
||||||
# in agent_loop only fires for models on Hive's deny list).
|
# mains we still offer the prompt so the user can wire up a captioning
|
||||||
|
# subagent.
|
||||||
|
|
||||||
if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
MAIN_MODEL_HAS_VISION="false"
|
||||||
|
if [ -n "$SELECTED_MODEL" ]; then
|
||||||
|
MAIN_MODEL_HAS_VISION=$(uv run python - "$SELECTED_MODEL" <<'PY' 2>/dev/null || echo "false"
|
||||||
|
import sys
|
||||||
|
from framework.llm.model_catalog import model_supports_vision
|
||||||
|
print("true" if model_supports_vision(sys.argv[1]) else "false")
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$SELECTED_PROVIDER_ID" ] && [ "$MAIN_MODEL_HAS_VISION" = "true" ]; then
|
||||||
|
# Drop any stale vision_fallback block so the config reflects the
|
||||||
|
# current main model's capabilities.
|
||||||
|
save_vision_fallback "" "" "" "" > /dev/null 2>&1 || true
|
||||||
|
echo -e "${GREEN}⬢${NC} Vision fallback ${DIM}skipped — ${SELECTED_MODEL} already supports vision${NC}"
|
||||||
|
echo ""
|
||||||
|
elif [ -n "$SELECTED_PROVIDER_ID" ]; then
|
||||||
echo -e "${YELLOW}⬢${NC} ${BLUE}${BOLD}Vision fallback subagent${NC}"
|
echo -e "${YELLOW}⬢${NC} ${BLUE}${BOLD}Vision fallback subagent${NC}"
|
||||||
echo ""
|
echo ""
|
||||||
echo -e " ${DIM}When a screenshot/image tool is called from a text-only model,${NC}"
|
echo -e " ${DIM}When a screenshot/image tool is called from a text-only model,${NC}"
|
||||||
@@ -1840,9 +1868,13 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then
|
|||||||
|
|
||||||
# Build the candidate list from the same model_catalog.json the main
|
# Build the candidate list from the same model_catalog.json the main
|
||||||
# LLM step uses — never hardcode model IDs in this script. For each
|
# LLM step uses — never hardcode model IDs in this script. For each
|
||||||
# provider in the catalogue, take the catalogue's default model and
|
# provider in the catalogue, pick a model whose ``supports_vision``
|
||||||
# the env var name it expects, then keep only providers the user
|
# flag is true (since the fallback subagent's whole purpose is to
|
||||||
# already has an API key for. Output one TSV row per candidate:
|
# caption images — a text-only candidate would be useless). Prefer
|
||||||
|
# the provider's default when it supports vision, otherwise fall
|
||||||
|
# back to the first vision-capable model in the provider's list.
|
||||||
|
# Skip the provider entirely if no model in its catalog supports
|
||||||
|
# vision. Output one TSV row per candidate:
|
||||||
# provider_id<TAB>model<TAB>env_var<TAB>display_name
|
# provider_id<TAB>model<TAB>env_var<TAB>display_name
|
||||||
VISION_CANDIDATES_TSV=$(uv run python - <<'PY'
|
VISION_CANDIDATES_TSV=$(uv run python - <<'PY'
|
||||||
import os
|
import os
|
||||||
@@ -1879,9 +1911,25 @@ for provider_id, default_model in sorted(defaults.items()):
|
|||||||
env = "GOOGLE_API_KEY"
|
env = "GOOGLE_API_KEY"
|
||||||
if not has_key:
|
if not has_key:
|
||||||
continue
|
continue
|
||||||
|
# Pick a vision-capable model: prefer the catalog default if it has
|
||||||
|
# supports_vision=true, else the first vision-capable model in the
|
||||||
|
# provider's list. Skip the provider if none exist.
|
||||||
|
models = catalog.get(provider_id, [])
|
||||||
|
chosen = None
|
||||||
|
for m in models:
|
||||||
|
if m["id"] == default_model and m.get("supports_vision") is True:
|
||||||
|
chosen = m["id"]
|
||||||
|
break
|
||||||
|
if chosen is None:
|
||||||
|
for m in models:
|
||||||
|
if m.get("supports_vision") is True:
|
||||||
|
chosen = m["id"]
|
||||||
|
break
|
||||||
|
if chosen is None:
|
||||||
|
continue
|
||||||
# Display name: provider/model from the catalogue verbatim
|
# Display name: provider/model from the catalogue verbatim
|
||||||
display = f"{provider_id}/{default_model}"
|
display = f"{provider_id}/{chosen}"
|
||||||
print(f"{provider_id}\t{default_model}\t{env}\t{display}")
|
print(f"{provider_id}\t{chosen}\t{env}\t{display}")
|
||||||
PY
|
PY
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user