feat: strip image content for non-vision models

This commit is contained in:
bryan
2026-03-18 12:40:30 -07:00
parent 9dadb5264d
commit c0da3bec02
3 changed files with 104 additions and 1 deletions
+12 -1
View File
@@ -24,6 +24,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from framework.graph.conversation import ConversationStore, NodeConversation
from framework.graph.node import NodeContext, NodeProtocol, NodeResult
from framework.llm.capabilities import supports_image_tool_results
from framework.llm.provider import Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
FinishEvent,
@@ -2703,11 +2704,21 @@ class EventLoopNode(NodeProtocol):
real_tool_results.append(tool_entry)
logged_tool_calls.append(tool_entry)
# Strip image content for models that can't handle it
image_content = result.image_content
if image_content and ctx.llm and not supports_image_tool_results(ctx.llm.model):
logger.info(
"Stripping image_content from tool result — model '%s' "
"does not support images in tool results",
ctx.llm.model,
)
image_content = None
await conversation.add_tool_result(
tool_use_id=tc.tool_use_id,
content=result.content,
is_error=result.is_error,
image_content=result.image_content,
image_content=image_content,
)
if tc.tool_name in ("ask_user", "ask_user_multiple"):
# Defer tool_call_completed until after user responds
+34
View File
@@ -0,0 +1,34 @@
"""Model capability checks for LLM providers."""
from __future__ import annotations
# Prefixes of models/providers known to NOT support image content blocks
# inside tool result messages. We use a deny-list (rather than an allow-list)
# because most OpenAI-compatible providers pass content lists through to the
# API unchanged — only a few are known to silently strip or break on images.
_IMAGE_TOOL_RESULT_DENY_PREFIXES: tuple[str, ...] = (
# DeepSeek: LiteLLM explicitly flattens all content lists to strings,
# silently dropping image blocks.
"deepseek/",
"deepseek-",
# Local model providers: most models lack vision support, and those that
# do typically handle images in user messages only, not tool results.
"ollama/",
"ollama_chat/",
"lm_studio/",
"vllm/",
"llamacpp/",
# Cerebras: no known vision/multimodal support.
"cerebras/",
)
def supports_image_tool_results(model: str) -> bool:
"""Return whether *model* can receive image content in tool result messages.
Models on the deny-list are known to either silently strip images or lack
vision support entirely. Everything else is assumed to work (OpenAI,
Anthropic, Gemini, Mistral, Groq, etc. all handle it correctly via LiteLLM).
"""
model_lower = model.lower()
return not any(model_lower.startswith(prefix) for prefix in _IMAGE_TOOL_RESULT_DENY_PREFIXES)
+58
View File
@@ -0,0 +1,58 @@
"""Tests for LLM model capability checks."""
from __future__ import annotations
import pytest
from framework.llm.capabilities import supports_image_tool_results
class TestSupportsImageToolResults:
"""Verify the deny-list correctly identifies models that can't handle images."""
@pytest.mark.parametrize(
"model",
[
"gpt-4o",
"gpt-4o-mini",
"gpt-4-turbo",
"openai/gpt-4o",
"anthropic/claude-sonnet-4-20250514",
"claude-haiku-4-5-20251001",
"gemini/gemini-1.5-pro",
"google/gemini-1.5-flash",
"mistral/mistral-large",
"groq/llama3-70b",
"together/meta-llama/Llama-3-70b",
"fireworks_ai/llama-v3-70b",
"azure/gpt-4o",
"kimi/claude-sonnet-4-20250514",
"hive/claude-sonnet-4-20250514",
],
)
def test_supported_models(self, model: str):
assert supports_image_tool_results(model) is True
@pytest.mark.parametrize(
"model",
[
"deepseek/deepseek-chat",
"deepseek/deepseek-coder",
"deepseek-chat",
"deepseek-reasoner",
"ollama/llama3",
"ollama/mistral",
"ollama_chat/llama3",
"lm_studio/my-model",
"vllm/meta-llama/Llama-3-70b",
"llamacpp/model",
"cerebras/llama3-70b",
],
)
def test_unsupported_models(self, model: str):
assert supports_image_tool_results(model) is False
def test_case_insensitive(self):
assert supports_image_tool_results("DeepSeek/deepseek-chat") is False
assert supports_image_tool_results("OLLAMA/llama3") is False
assert supports_image_tool_results("GPT-4o") is True