Files
hive/core/tests/test_capabilities.py
T
Hundao de8d6f0946 fix(tests): unblock main CI (#7141)
Two unrelated test failures were keeping main red:

- test_capabilities.py: fixtures referenced deprecated model identifiers
  no longer in model_catalog.json. After the catalog refactor unknown
  models default to vision-capable, so 12 "expect False" assertions
  flipped to True. Replace fixtures with current catalog entries that
  carry an explicit supports_vision flag.

- test_colony_runtime_overseer.py: a 200ms hard sleep racing the
  background worker was flaky on Windows CI. Poll for llm.stream_calls
  with a 5s deadline instead.
2026-04-26 21:34:21 +08:00

109 lines
4.1 KiB
Python

"""Tests for LLM model capability checks."""
from __future__ import annotations
import pytest
from framework.llm.capabilities import filter_tools_for_model, supports_image_tool_results
from framework.llm.provider import Tool
class TestSupportsImageToolResults:
"""Verify catalog-driven vision capability checks."""
@pytest.mark.parametrize(
"model",
[
# Catalog entries with supports_vision=true
"claude-haiku-4-5-20251001",
"claude-sonnet-4-5-20250929",
"claude-opus-4-6",
"gpt-5.4",
"gpt-5.4-mini",
"gemini-3-flash-preview",
"kimi-k2.5",
# Provider-prefixed catalog entries
"openrouter/openai/gpt-5.4",
"openrouter/anthropic/claude-sonnet-4.6",
# Unknown models default to True (hosted frontier assumption)
"some-future-model",
"azure/gpt-5",
],
)
def test_supported_models(self, model: str):
assert supports_image_tool_results(model) is True
@pytest.mark.parametrize(
"model",
[
# Catalog entries with supports_vision=false
"deepseek-reasoner",
"deepseek-v4-pro",
"deepseek-v4-flash",
"glm-5.1",
"queen",
"MiniMax-M2.7",
"codestral-2508",
"llama-3.3-70b-versatile",
# Provider-prefixed forms resolve to the same catalog entry
"deepseek/deepseek-reasoner",
"hive/glm-5.1",
"groq/llama-3.3-70b-versatile",
],
)
def test_unsupported_models(self, model: str):
assert supports_image_tool_results(model) is False
class TestFilterToolsForModel:
"""Verify ``filter_tools_for_model`` — the real helper used by AgentLoop."""
def test_hides_image_tool_from_text_only_model(self):
tools = [
Tool(name="read_file", description="read a file"),
Tool(name="browser_screenshot", description="take a screenshot", produces_image=True),
Tool(name="browser_snapshot", description="get page content"),
]
filtered, hidden = filter_tools_for_model(tools, "glm-5.1")
names = [t.name for t in filtered]
assert "browser_screenshot" not in names
assert "read_file" in names
assert "browser_snapshot" in names
assert hidden == ["browser_screenshot"]
def test_keeps_image_tool_for_vision_model(self):
tools = [
Tool(name="read_file", description="read a file"),
Tool(name="browser_screenshot", description="take a screenshot", produces_image=True),
]
filtered, hidden = filter_tools_for_model(tools, "claude-sonnet-4-5-20250929")
assert {t.name for t in filtered} == {"read_file", "browser_screenshot"}
assert hidden == []
def test_default_tools_are_not_filtered(self):
"""Tools without produces_image (default False) are kept for all models."""
tools = [
Tool(name="read_file", description="read a file"),
Tool(name="web_search", description="search the web"),
]
text_only, text_hidden = filter_tools_for_model(tools, "glm-5.1")
vision, vision_hidden = filter_tools_for_model(tools, "claude-sonnet-4-5-20250929")
assert len(text_only) == 2 and text_hidden == []
assert len(vision) == 2 and vision_hidden == []
def test_empty_model_string_returns_tools_unchanged(self):
"""Guards the ctx.llm-missing path where model is empty."""
tools = [
Tool(name="browser_screenshot", description="", produces_image=True),
]
filtered, hidden = filter_tools_for_model(tools, "")
assert len(filtered) == 1
assert hidden == []
def test_returned_list_is_a_copy(self):
"""Caller should be free to mutate the filtered list without affecting input."""
tools = [Tool(name="read_file", description="")]
filtered, _ = filter_tools_for_model(tools, "gpt-4o")
filtered.append(Tool(name="extra", description=""))
assert len(tools) == 1