Merge pull request #6682 from aden-hive/feat/image-capabilities
Release / Create Release (push) Waiting to run
Release / Create Release (push) Waiting to run
feat: image capabilities — upload, screenshot passthrough, vision detection & fallback, aria refs
This commit is contained in:
@@ -702,6 +702,15 @@ stop_worker() to return to STAGING phase.
|
||||
_queen_behavior_always = """
|
||||
# Behavior
|
||||
|
||||
## Images attached by the user
|
||||
|
||||
Users can attach images directly to their chat messages. When you see an \
|
||||
image in the conversation, analyze it using your native vision capability — \
|
||||
do NOT say you cannot see images or that you lack access to files. The image \
|
||||
is embedded in the message; no tool call is needed to view it. Describe what \
|
||||
you see, answer questions about it, and use the visual content to inform your \
|
||||
response just as you would text.
|
||||
|
||||
## CRITICAL RULE — ask_user / ask_user_multiple
|
||||
|
||||
Every response that ends with a question, a prompt, or expects user \
|
||||
|
||||
@@ -150,7 +150,7 @@ Call all three subagents in a single response to run them in parallel:
|
||||
|
||||
## GCU Anti-Patterns
|
||||
|
||||
- Using `browser_screenshot` to read text (use `browser_snapshot`)
|
||||
- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
|
||||
- Re-navigating after scrolling (resets scroll position)
|
||||
- Attempting login on auth walls
|
||||
- Forgetting `target_id` in multi-tab scenarios
|
||||
|
||||
@@ -33,12 +33,20 @@ class Message:
|
||||
is_transition_marker: bool = False
|
||||
# True when this message is real human input (from /chat), not a system prompt
|
||||
is_client_input: bool = False
|
||||
# Optional image content blocks (e.g. from browser_screenshot)
|
||||
image_content: list[dict[str, Any]] | None = None
|
||||
# True when message contains an activated skill body (AS-10: never prune)
|
||||
is_skill_content: bool = False
|
||||
|
||||
def to_llm_dict(self) -> dict[str, Any]:
|
||||
"""Convert to OpenAI-format message dict."""
|
||||
if self.role == "user":
|
||||
if self.image_content:
|
||||
blocks: list[dict[str, Any]] = []
|
||||
if self.content:
|
||||
blocks.append({"type": "text", "text": self.content})
|
||||
blocks.extend(self.image_content)
|
||||
return {"role": "user", "content": blocks}
|
||||
return {"role": "user", "content": self.content}
|
||||
|
||||
if self.role == "assistant":
|
||||
@@ -49,6 +57,15 @@ class Message:
|
||||
|
||||
# role == "tool"
|
||||
content = f"ERROR: {self.content}" if self.is_error else self.content
|
||||
if self.image_content:
|
||||
# Multimodal tool result: text + image content blocks
|
||||
blocks: list[dict[str, Any]] = [{"type": "text", "text": content}]
|
||||
blocks.extend(self.image_content)
|
||||
return {
|
||||
"role": "tool",
|
||||
"tool_call_id": self.tool_use_id,
|
||||
"content": blocks,
|
||||
}
|
||||
return {
|
||||
"role": "tool",
|
||||
"tool_call_id": self.tool_use_id,
|
||||
@@ -74,6 +91,8 @@ class Message:
|
||||
d["is_transition_marker"] = self.is_transition_marker
|
||||
if self.is_client_input:
|
||||
d["is_client_input"] = self.is_client_input
|
||||
if self.image_content is not None:
|
||||
d["image_content"] = self.image_content
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
@@ -89,6 +108,7 @@ class Message:
|
||||
phase_id=data.get("phase_id"),
|
||||
is_transition_marker=data.get("is_transition_marker", False),
|
||||
is_client_input=data.get("is_client_input", False),
|
||||
image_content=data.get("image_content"),
|
||||
)
|
||||
|
||||
|
||||
@@ -375,6 +395,7 @@ class NodeConversation:
|
||||
*,
|
||||
is_transition_marker: bool = False,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
) -> Message:
|
||||
msg = Message(
|
||||
seq=self._next_seq,
|
||||
@@ -383,6 +404,7 @@ class NodeConversation:
|
||||
phase_id=self._current_phase,
|
||||
is_transition_marker=is_transition_marker,
|
||||
is_client_input=is_client_input,
|
||||
image_content=image_content,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
self._next_seq += 1
|
||||
@@ -411,6 +433,7 @@ class NodeConversation:
|
||||
tool_use_id: str,
|
||||
content: str,
|
||||
is_error: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
is_skill_content: bool = False,
|
||||
) -> Message:
|
||||
msg = Message(
|
||||
@@ -420,6 +443,7 @@ class NodeConversation:
|
||||
tool_use_id=tool_use_id,
|
||||
is_error=is_error,
|
||||
phase_id=self._current_phase,
|
||||
image_content=image_content,
|
||||
is_skill_content=is_skill_content,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
|
||||
@@ -14,6 +14,7 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
@@ -24,6 +25,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from framework.graph.conversation import ConversationStore, NodeConversation
|
||||
from framework.graph.node import NodeContext, NodeProtocol, NodeResult
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
from framework.llm.provider import Tool, ToolResult, ToolUse
|
||||
from framework.llm.stream_events import (
|
||||
FinishEvent,
|
||||
@@ -37,6 +39,56 @@ from framework.runtime.llm_debug_logger import log_llm_turn
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _describe_images_as_text(image_content: list[dict[str, Any]]) -> str | None:
|
||||
"""Describe images using the best available vision model.
|
||||
|
||||
Called when the queen's model lacks vision support. Tries vision-capable
|
||||
models in priority order based on available API keys and returns a bracketed
|
||||
description to inject into the message text, or None if no vision model is
|
||||
reachable.
|
||||
"""
|
||||
import litellm
|
||||
|
||||
# Build content blocks: prompt + all images
|
||||
blocks: list[dict[str, Any]] = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
"Describe the following image(s) concisely but with enough detail "
|
||||
"that a text-only AI assistant can understand the content and context."
|
||||
),
|
||||
}
|
||||
]
|
||||
blocks.extend(image_content)
|
||||
|
||||
# Ordered candidates based on available env vars
|
||||
candidates: list[str] = []
|
||||
if os.environ.get("OPENAI_API_KEY"):
|
||||
candidates.append("gpt-4o-mini")
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
candidates.append("claude-3-haiku-20240307")
|
||||
if os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY"):
|
||||
candidates.append("gemini/gemini-1.5-flash")
|
||||
|
||||
for model in candidates:
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": blocks}],
|
||||
max_tokens=512,
|
||||
)
|
||||
description = (response.choices[0].message.content or "").strip()
|
||||
if description:
|
||||
count = len(image_content)
|
||||
label = "image" if count == 1 else f"{count} images"
|
||||
return f"[{label} attached — description: {description}]"
|
||||
except Exception as exc:
|
||||
logger.debug("Vision fallback model '%s' failed: %s", model, exc)
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TriggerEvent:
|
||||
"""A framework-level trigger signal (timer tick or webhook hit).
|
||||
@@ -90,7 +142,13 @@ class _EscalationReceiver:
|
||||
self._response: str | None = None
|
||||
self._awaiting_input = True # So inject_worker_message() can prefer us
|
||||
|
||||
async def inject_event(self, content: str, *, is_client_input: bool = False) -> None:
|
||||
async def inject_event(
|
||||
self,
|
||||
content: str,
|
||||
*,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict] | None = None,
|
||||
) -> None:
|
||||
"""Called by ExecutionStream.inject_input() when the user responds."""
|
||||
self._response = content
|
||||
self._event.set()
|
||||
@@ -426,7 +484,9 @@ class EventLoopNode(NodeProtocol):
|
||||
self._config = config or LoopConfig()
|
||||
self._tool_executor = tool_executor
|
||||
self._conversation_store = conversation_store
|
||||
self._injection_queue: asyncio.Queue[tuple[str, bool]] = asyncio.Queue()
|
||||
self._injection_queue: asyncio.Queue[tuple[str, bool, list[dict[str, Any]] | None]] = (
|
||||
asyncio.Queue()
|
||||
)
|
||||
self._trigger_queue: asyncio.Queue[TriggerEvent] = asyncio.Queue()
|
||||
# Client-facing input blocking state
|
||||
self._input_ready = asyncio.Event()
|
||||
@@ -784,7 +844,7 @@ class EventLoopNode(NodeProtocol):
|
||||
)
|
||||
|
||||
# 6b. Drain injection queue
|
||||
await self._drain_injection_queue(conversation)
|
||||
await self._drain_injection_queue(conversation, ctx)
|
||||
# 6b1. Drain trigger queue (framework-level signals)
|
||||
await self._drain_trigger_queue(conversation)
|
||||
|
||||
@@ -1910,7 +1970,13 @@ class EventLoopNode(NodeProtocol):
|
||||
conversation=conversation if _is_continuous else None,
|
||||
)
|
||||
|
||||
async def inject_event(self, content: str, *, is_client_input: bool = False) -> None:
|
||||
async def inject_event(
|
||||
self,
|
||||
content: str,
|
||||
*,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
"""Inject an external event or user input into the running loop.
|
||||
|
||||
The content becomes a user message prepended to the next iteration.
|
||||
@@ -1926,8 +1992,10 @@ class EventLoopNode(NodeProtocol):
|
||||
human user (e.g. /chat endpoint), False for external events
|
||||
(e.g. worker question forwarded by the frontend). Controls
|
||||
message formatting in _drain_injection_queue, not wake behavior.
|
||||
image_content: Optional list of image content blocks (OpenAI
|
||||
image_url format) to include alongside the text.
|
||||
"""
|
||||
await self._injection_queue.put((content, is_client_input))
|
||||
await self._injection_queue.put((content, is_client_input, image_content))
|
||||
self._input_ready.set()
|
||||
|
||||
async def inject_trigger(self, trigger: TriggerEvent) -> None:
|
||||
@@ -2101,6 +2169,24 @@ class EventLoopNode(NodeProtocol):
|
||||
|
||||
messages = conversation.to_llm_messages()
|
||||
|
||||
# Debug: log whether the last user message contains image blocks
|
||||
for _m in reversed(messages):
|
||||
if _m.get("role") == "user":
|
||||
_content = _m.get("content")
|
||||
if isinstance(_content, list):
|
||||
_img_count = sum(
|
||||
1
|
||||
for _b in _content
|
||||
if isinstance(_b, dict) and _b.get("type") == "image_url"
|
||||
)
|
||||
if _img_count:
|
||||
logger.info(
|
||||
"[%s] LLM call: last user message has %d image block(s)",
|
||||
node_id,
|
||||
_img_count,
|
||||
)
|
||||
break
|
||||
|
||||
# Defensive guard: ensure messages don't end with an assistant
|
||||
# message. The Anthropic API rejects "assistant message prefill"
|
||||
# (conversations must end with a user or tool message). This can
|
||||
@@ -2770,10 +2856,21 @@ class EventLoopNode(NodeProtocol):
|
||||
real_tool_results.append(tool_entry)
|
||||
logged_tool_calls.append(tool_entry)
|
||||
|
||||
# Strip image content for models that can't handle it
|
||||
image_content = result.image_content
|
||||
if image_content and ctx.llm and not supports_image_tool_results(ctx.llm.model):
|
||||
logger.info(
|
||||
"Stripping image_content from tool result — model '%s' "
|
||||
"does not support images in tool results",
|
||||
ctx.llm.model,
|
||||
)
|
||||
image_content = None
|
||||
|
||||
await conversation.add_tool_result(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=result.content,
|
||||
is_error=result.is_error,
|
||||
image_content=image_content,
|
||||
is_skill_content=result.is_skill_content,
|
||||
)
|
||||
if (
|
||||
@@ -3914,6 +4011,7 @@ class EventLoopNode(NodeProtocol):
|
||||
tool_use_id=result.tool_use_id,
|
||||
content=truncated,
|
||||
is_error=False,
|
||||
image_content=result.image_content,
|
||||
)
|
||||
|
||||
spill_dir = self._config.spillover_dir
|
||||
@@ -3986,6 +4084,7 @@ class EventLoopNode(NodeProtocol):
|
||||
tool_use_id=result.tool_use_id,
|
||||
content=content,
|
||||
is_error=False,
|
||||
image_content=result.image_content,
|
||||
)
|
||||
|
||||
# No spillover_dir — truncate in-place if needed
|
||||
@@ -4028,6 +4127,7 @@ class EventLoopNode(NodeProtocol):
|
||||
tool_use_id=result.tool_use_id,
|
||||
content=truncated,
|
||||
is_error=False,
|
||||
image_content=result.image_content,
|
||||
)
|
||||
|
||||
return result
|
||||
@@ -4698,20 +4798,37 @@ class EventLoopNode(NodeProtocol):
|
||||
]
|
||||
await self._conversation_store.write_cursor(cursor)
|
||||
|
||||
async def _drain_injection_queue(self, conversation: NodeConversation) -> int:
|
||||
async def _drain_injection_queue(self, conversation: NodeConversation, ctx: NodeContext) -> int:
|
||||
"""Drain all pending injected events as user messages. Returns count."""
|
||||
count = 0
|
||||
while not self._injection_queue.empty():
|
||||
try:
|
||||
content, is_client_input = self._injection_queue.get_nowait()
|
||||
content, is_client_input, image_content = self._injection_queue.get_nowait()
|
||||
logger.info(
|
||||
"[drain] injected message (client_input=%s): %s",
|
||||
"[drain] injected message (client_input=%s, images=%d): %s",
|
||||
is_client_input,
|
||||
len(image_content) if image_content else 0,
|
||||
content[:200] if content else "(empty)",
|
||||
)
|
||||
# For models that don't support images, fall back to a text description
|
||||
if image_content and ctx.llm:
|
||||
if not supports_image_tool_results(ctx.llm.model):
|
||||
logger.info(
|
||||
"Model '%s' does not support images — attempting vision fallback",
|
||||
ctx.llm.model,
|
||||
)
|
||||
description = await _describe_images_as_text(image_content)
|
||||
if description:
|
||||
content = f"{content}\n\n{description}" if content else description
|
||||
logger.info("[drain] image described as text via vision fallback")
|
||||
else:
|
||||
logger.info("[drain] no vision fallback available — images dropped")
|
||||
image_content = None
|
||||
# Real user input is stored as-is; external events get a prefix
|
||||
if is_client_input:
|
||||
await conversation.add_user_message(content, is_client_input=True)
|
||||
await conversation.add_user_message(
|
||||
content, is_client_input=True, image_content=image_content
|
||||
)
|
||||
else:
|
||||
await conversation.add_user_message(f"[External event]: {content}")
|
||||
count += 1
|
||||
|
||||
@@ -43,8 +43,11 @@ Follow these rules for reliable, efficient browser interaction.
|
||||
`browser_snapshot` separately after every action.
|
||||
Only call `browser_snapshot` when you need a fresh view without
|
||||
performing an action, or after setting `auto_snapshot=false`.
|
||||
- Do NOT use `browser_screenshot` for reading text content
|
||||
— it produces huge base64 images with no searchable text.
|
||||
- Do NOT use `browser_screenshot` to read text — use
|
||||
`browser_snapshot` for that (compact, searchable, fast).
|
||||
- DO use `browser_screenshot` when you need visual context:
|
||||
charts, images, canvas elements, layout verification, or when
|
||||
the snapshot doesn't capture what you need.
|
||||
- Only fall back to `browser_get_text` for extracting specific
|
||||
small elements by CSS selector.
|
||||
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
"""Model capability checks for LLM providers.
|
||||
|
||||
Vision support rules are derived from official vendor documentation:
|
||||
- ZAI (z.ai): docs.z.ai/guides/vlm — GLM-4.6V variants are vision; GLM-5/4.6/4.7 are text-only
|
||||
- MiniMax: platform.minimax.io/docs — minimax-vl-01 is vision; M2.x are text-only
|
||||
- DeepSeek: api-docs.deepseek.com — deepseek-vl2 is vision; chat/reasoner are text-only
|
||||
- Cerebras: inference-docs.cerebras.ai — no vision models at all
|
||||
- Groq: console.groq.com/docs/vision — vision capable; treat as supported by default
|
||||
- Ollama/LM Studio/vLLM/llama.cpp: local runners denied by default; model names
|
||||
don't reliably indicate vision support, so users must configure explicitly
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def _model_name(model: str) -> str:
|
||||
"""Return the bare model name after stripping any 'provider/' prefix."""
|
||||
if "/" in model:
|
||||
return model.split("/", 1)[1]
|
||||
return model
|
||||
|
||||
|
||||
# Step 1: explicit vision allow-list — these always support images regardless
|
||||
# of what the provider-level rules say. Checked first so that e.g. glm-4.6v
|
||||
# is allowed even though glm-4.6 is denied.
|
||||
_VISION_ALLOW_BARE_PREFIXES: tuple[str, ...] = (
|
||||
# ZAI/GLM vision models (docs.z.ai/guides/vlm)
|
||||
"glm-4v", # GLM-4V series (legacy)
|
||||
"glm-4.6v", # GLM-4.6V, GLM-4.6V-flash, GLM-4.6V-flashx
|
||||
# DeepSeek vision models
|
||||
"deepseek-vl", # deepseek-vl2, deepseek-vl2-small, deepseek-vl2-tiny
|
||||
# MiniMax vision model
|
||||
"minimax-vl", # minimax-vl-01
|
||||
)
|
||||
|
||||
# Step 2: provider-level deny — every model from this provider is text-only.
|
||||
_TEXT_ONLY_PROVIDER_PREFIXES: tuple[str, ...] = (
|
||||
# Cerebras: inference-docs.cerebras.ai lists only text models
|
||||
"cerebras/",
|
||||
# Local runners: model names don't reliably indicate vision support
|
||||
"ollama/",
|
||||
"ollama_chat/",
|
||||
"lm_studio/",
|
||||
"vllm/",
|
||||
"llamacpp/",
|
||||
)
|
||||
|
||||
# Step 3: per-model deny — text-only models within otherwise mixed providers.
|
||||
# Matched against the bare model name (provider prefix stripped, lower-cased).
|
||||
# The vision allow-list above is checked first, so vision variants of the same
|
||||
# family are already handled before these deny patterns are reached.
|
||||
_TEXT_ONLY_MODEL_BARE_PREFIXES: tuple[str, ...] = (
|
||||
# --- ZAI / GLM family ---
|
||||
# text-only: glm-5, glm-4.6, glm-4.7, glm-4.5, zai-glm-*
|
||||
# vision: glm-4v, glm-4.6v (caught by allow-list above)
|
||||
"glm-5",
|
||||
"glm-4.6", # bare glm-4.6 is text-only; glm-4.6v is caught by allow-list
|
||||
"glm-4.7",
|
||||
"glm-4.5",
|
||||
"zai-glm",
|
||||
# --- DeepSeek ---
|
||||
# text-only: deepseek-chat, deepseek-coder, deepseek-reasoner
|
||||
# vision: deepseek-vl2 (caught by allow-list above)
|
||||
# Note: LiteLLM's deepseek handler may flatten content lists for some models;
|
||||
# VL models are allowed through and rely on LiteLLM's native VL support.
|
||||
"deepseek-chat",
|
||||
"deepseek-coder",
|
||||
"deepseek-reasoner",
|
||||
# --- MiniMax ---
|
||||
# text-only: minimax-m2.*, minimax-text-*, abab* (legacy)
|
||||
# vision: minimax-vl-01 (caught by allow-list above)
|
||||
"minimax-m2",
|
||||
"minimax-text",
|
||||
"abab",
|
||||
)
|
||||
|
||||
|
||||
def supports_image_tool_results(model: str) -> bool:
|
||||
"""Return whether *model* can receive image content in messages.
|
||||
|
||||
Used to gate both user-message images and tool-result image blocks.
|
||||
|
||||
Logic (checked in order):
|
||||
1. Vision allow-list → True (known vision model, skip all denies)
|
||||
2. Provider deny → False (entire provider is text-only)
|
||||
3. Model deny → False (specific text-only model within a mixed provider)
|
||||
4. Default → True (assume capable; unknown providers and models)
|
||||
"""
|
||||
model_lower = model.lower()
|
||||
bare = _model_name(model_lower)
|
||||
|
||||
# 1. Explicit vision allow — takes priority over all denies
|
||||
if any(bare.startswith(p) for p in _VISION_ALLOW_BARE_PREFIXES):
|
||||
return True
|
||||
|
||||
# 2. Provider-level deny (all models from this provider are text-only)
|
||||
if any(model_lower.startswith(p) for p in _TEXT_ONLY_PROVIDER_PREFIXES):
|
||||
return False
|
||||
|
||||
# 3. Per-model deny (text-only variants within mixed-capability families)
|
||||
if any(bare.startswith(p) for p in _TEXT_ONLY_MODEL_BARE_PREFIXES):
|
||||
return False
|
||||
|
||||
# 5. Default: assume vision capable
|
||||
# Covers: OpenAI, Anthropic, Google, Mistral, Kimi, and other hosted providers
|
||||
return True
|
||||
@@ -45,6 +45,7 @@ class ToolResult:
|
||||
tool_use_id: str
|
||||
content: str
|
||||
is_error: bool = False
|
||||
image_content: list[dict[str, Any]] | None = None
|
||||
is_skill_content: bool = False # AS-10: marks activated skill body, protected from pruning
|
||||
|
||||
|
||||
|
||||
@@ -509,17 +509,30 @@ class MCPClient:
|
||||
error_text = content_item.text
|
||||
raise RuntimeError(f"MCP tool '{tool_name}' failed: {error_text}")
|
||||
|
||||
# Extract content
|
||||
# Extract content — preserve image blocks alongside text
|
||||
if result.content:
|
||||
# MCP returns content as a list of content items
|
||||
if len(result.content) > 0:
|
||||
content_item = result.content[0]
|
||||
# Check if it's a text content item
|
||||
if hasattr(content_item, "text"):
|
||||
return content_item.text
|
||||
elif hasattr(content_item, "data"):
|
||||
return content_item.data
|
||||
return result.content
|
||||
text_parts: list[str] = []
|
||||
image_parts: list[dict[str, Any]] = []
|
||||
for item in result.content:
|
||||
if hasattr(item, "text"):
|
||||
text_parts.append(item.text)
|
||||
elif hasattr(item, "data") and hasattr(item, "mimeType"):
|
||||
# MCP ImageContent — preserve as structured image block
|
||||
image_parts.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{item.mimeType};base64,{item.data}",
|
||||
},
|
||||
}
|
||||
)
|
||||
elif hasattr(item, "data"):
|
||||
text_parts.append(str(item.data))
|
||||
|
||||
text = "\n".join(text_parts) if text_parts else ""
|
||||
if image_parts:
|
||||
return {"_text": text, "_images": image_parts}
|
||||
return text if text else None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -245,6 +245,13 @@ class ToolRegistry:
|
||||
def _wrap_result(tool_use_id: str, result: Any) -> ToolResult:
|
||||
if isinstance(result, ToolResult):
|
||||
return result
|
||||
# MCP client returns dict with _images when image content is present
|
||||
if isinstance(result, dict) and "_images" in result:
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use_id,
|
||||
content=result.get("_text", ""),
|
||||
image_content=result["_images"],
|
||||
)
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use_id,
|
||||
content=json.dumps(result) if not isinstance(result, str) else result,
|
||||
@@ -572,7 +579,9 @@ class ToolRegistry:
|
||||
}
|
||||
merged_inputs = {**clean_inputs, **filtered_context}
|
||||
result = client_ref.call_tool(tool_name, merged_inputs)
|
||||
# MCP tools return content array, extract the result
|
||||
# MCP client already extracts content (returns str
|
||||
# or {"_text": ..., "_images": ...} for image results).
|
||||
# Handle legacy list format from HTTP transport.
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
if isinstance(result[0], dict) and "text" in result[0]:
|
||||
return result[0]["text"]
|
||||
|
||||
@@ -1474,6 +1474,7 @@ class AgentRuntime:
|
||||
graph_id: str | None = None,
|
||||
*,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
) -> bool:
|
||||
"""Inject user input into a running client-facing node.
|
||||
|
||||
@@ -1486,6 +1487,8 @@ class AgentRuntime:
|
||||
graph_id: Optional graph to search first (defaults to active graph)
|
||||
is_client_input: True when the message originates from a real
|
||||
human user (e.g. /chat endpoint), False for external events.
|
||||
image_content: Optional list of image content blocks (OpenAI
|
||||
image_url format) to include alongside the text.
|
||||
|
||||
Returns:
|
||||
True if input was delivered, False if no matching node found
|
||||
@@ -1497,7 +1500,9 @@ class AgentRuntime:
|
||||
target = graph_id or self._active_graph_id
|
||||
if target in self._graphs:
|
||||
for stream in self._graphs[target].streams.values():
|
||||
if await stream.inject_input(node_id, content, is_client_input=is_client_input):
|
||||
if await stream.inject_input(
|
||||
node_id, content, is_client_input=is_client_input, image_content=image_content
|
||||
):
|
||||
return True
|
||||
|
||||
# Then search all other graphs
|
||||
@@ -1505,7 +1510,9 @@ class AgentRuntime:
|
||||
if gid == target:
|
||||
continue
|
||||
for stream in reg.streams.values():
|
||||
if await stream.inject_input(node_id, content, is_client_input=is_client_input):
|
||||
if await stream.inject_input(
|
||||
node_id, content, is_client_input=is_client_input, image_content=image_content
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -433,6 +433,7 @@ class ExecutionStream:
|
||||
content: str,
|
||||
*,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
) -> bool:
|
||||
"""Inject user input into a running client-facing EventLoopNode.
|
||||
|
||||
@@ -444,7 +445,9 @@ class ExecutionStream:
|
||||
for executor in self._active_executors.values():
|
||||
node = executor.node_registry.get(node_id)
|
||||
if node is not None and hasattr(node, "inject_event"):
|
||||
await node.inject_event(content, is_client_input=is_client_input)
|
||||
await node.inject_event(
|
||||
content, is_client_input=is_client_input, image_content=image_content
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -108,7 +108,10 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
The input box is permanently connected to the queen agent.
|
||||
Worker input is handled separately via /worker-input.
|
||||
|
||||
Body: {"message": "hello"}
|
||||
Body: {"message": "hello", "images": [{"type": "image_url", "image_url": {"url": "data:..."}}]}
|
||||
|
||||
The optional ``images`` field accepts a list of OpenAI-format image_url
|
||||
content blocks. The frontend encodes images as base64 data URIs.
|
||||
"""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
@@ -116,15 +119,16 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
|
||||
body = await request.json()
|
||||
message = body.get("message", "")
|
||||
image_content = body.get("images") or None # list[dict] | None
|
||||
|
||||
if not message:
|
||||
if not message and not image_content:
|
||||
return web.json_response({"error": "message is required"}, status=400)
|
||||
|
||||
queen_executor = session.queen_executor
|
||||
if queen_executor is not None:
|
||||
node = queen_executor.node_registry.get("queen")
|
||||
if node is not None and hasattr(node, "inject_event"):
|
||||
await node.inject_event(message, is_client_input=True)
|
||||
await node.inject_event(message, is_client_input=True, image_content=image_content)
|
||||
# Publish to EventBus so the session event log captures user messages
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
@@ -134,7 +138,10 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
stream_id="queen",
|
||||
node_id="queen",
|
||||
execution_id=session.id,
|
||||
data={"content": message},
|
||||
data={
|
||||
"content": message,
|
||||
"image_count": len(image_content) if image_content else 0,
|
||||
},
|
||||
)
|
||||
)
|
||||
return web.json_response(
|
||||
|
||||
@@ -28,6 +28,8 @@ import contextlib
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
@@ -51,8 +53,11 @@ def _get_manager(request: web.Request) -> SessionManager:
|
||||
|
||||
def _session_to_live_dict(session) -> dict:
|
||||
"""Serialize a live Session to the session-primary JSON shape."""
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
|
||||
info = session.worker_info
|
||||
phase_state = getattr(session, "phase_state", None)
|
||||
queen_model: str = getattr(getattr(session, "runner", None), "model", "") or ""
|
||||
return {
|
||||
"session_id": session.id,
|
||||
"worker_id": session.worker_id,
|
||||
@@ -68,6 +73,7 @@ def _session_to_live_dict(session) -> dict:
|
||||
"queen_phase": phase_state.phase
|
||||
if phase_state
|
||||
else ("staging" if session.worker_runtime else "planning"),
|
||||
"queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True,
|
||||
}
|
||||
|
||||
|
||||
@@ -978,6 +984,29 @@ async def handle_discover(request: web.Request) -> web.Response:
|
||||
return web.json_response(result)
|
||||
|
||||
|
||||
async def handle_reveal_session_folder(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/reveal — open session data folder in the OS file manager."""
|
||||
manager: SessionManager = request.app["manager"]
|
||||
session_id = request.match_info["session_id"]
|
||||
|
||||
session = manager.get_session(session_id)
|
||||
storage_session_id = (session.queen_resume_from or session.id) if session else session_id
|
||||
folder = Path.home() / ".hive" / "queen" / "session" / storage_session_id
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
if sys.platform == "darwin":
|
||||
subprocess.Popen(["open", str(folder)])
|
||||
elif sys.platform == "win32":
|
||||
subprocess.Popen(["explorer", str(folder)])
|
||||
else:
|
||||
subprocess.Popen(["xdg-open", str(folder)])
|
||||
except Exception as exc:
|
||||
return web.json_response({"error": str(exc)}, status=500)
|
||||
|
||||
return web.json_response({"path": str(folder)})
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Route registration
|
||||
# ------------------------------------------------------------------
|
||||
@@ -1002,6 +1031,7 @@ def register_routes(app: web.Application) -> None:
|
||||
app.router.add_delete("/api/sessions/{session_id}/worker", handle_unload_worker)
|
||||
|
||||
# Session info
|
||||
app.router.add_post("/api/sessions/{session_id}/reveal", handle_reveal_session_folder)
|
||||
app.router.add_get("/api/sessions/{session_id}/stats", handle_session_stats)
|
||||
app.router.add_get("/api/sessions/{session_id}/entry-points", handle_session_entry_points)
|
||||
app.router.add_patch(
|
||||
|
||||
@@ -34,8 +34,8 @@ export const executionApi = {
|
||||
graph_id: graphId,
|
||||
}),
|
||||
|
||||
chat: (sessionId: string, message: string) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message }),
|
||||
chat: (sessionId: string, message: string, images?: { type: string; image_url: { url: string } }[]) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message, ...(images?.length ? { images } : {}) }),
|
||||
|
||||
/** Queue context for the queen without triggering an LLM response. */
|
||||
queenContext: (sessionId: string, message: string) =>
|
||||
|
||||
@@ -81,6 +81,10 @@ export const sessionsApi = {
|
||||
eventsHistory: (sessionId: string) =>
|
||||
api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
|
||||
|
||||
/** Open the session's data folder in the OS file manager. */
|
||||
revealFolder: (sessionId: string) =>
|
||||
api.post<{ path: string }>(`/sessions/${sessionId}/reveal`),
|
||||
|
||||
/** List all queen sessions on disk — live + cold (post-restart). */
|
||||
history: () =>
|
||||
api.get<{ sessions: Array<{ session_id: string; cold: boolean; live: boolean; has_messages: boolean; created_at: number; agent_name?: string | null; agent_path?: string | null }> }>("/sessions/history"),
|
||||
|
||||
@@ -14,6 +14,8 @@ export interface LiveSession {
|
||||
intro_message?: string;
|
||||
/** Queen operating phase — "planning", "building", "staging", or "running" */
|
||||
queen_phase?: "planning" | "building" | "staging" | "running";
|
||||
/** Whether the queen's LLM supports image content in messages */
|
||||
queen_supports_images?: boolean;
|
||||
/** Present in 409 conflict responses when worker is still loading */
|
||||
loading?: boolean;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
import { memo, useState, useRef, useEffect, useMemo } from "react";
|
||||
import { Send, Square, Crown, Cpu, Check, Loader2 } from "lucide-react";
|
||||
import {
|
||||
Send,
|
||||
Square,
|
||||
Crown,
|
||||
Cpu,
|
||||
Check,
|
||||
Loader2,
|
||||
Paperclip,
|
||||
X,
|
||||
} from "lucide-react";
|
||||
|
||||
export interface ImageContent {
|
||||
type: "image_url";
|
||||
image_url: { url: string };
|
||||
}
|
||||
|
||||
export interface ContextUsageEntry {
|
||||
usagePct: number;
|
||||
@@ -10,7 +24,9 @@ export interface ContextUsageEntry {
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
import QuestionWidget from "@/components/QuestionWidget";
|
||||
import MultiQuestionWidget from "@/components/MultiQuestionWidget";
|
||||
import ParallelSubagentBubble, { type SubagentGroup } from "@/components/ParallelSubagentBubble";
|
||||
import ParallelSubagentBubble, {
|
||||
type SubagentGroup,
|
||||
} from "@/components/ParallelSubagentBubble";
|
||||
|
||||
export interface ChatMessage {
|
||||
id: string;
|
||||
@@ -18,7 +34,13 @@ export interface ChatMessage {
|
||||
agentColor: string;
|
||||
content: string;
|
||||
timestamp: string;
|
||||
type?: "system" | "agent" | "user" | "tool_status" | "worker_input_request" | "run_divider";
|
||||
type?:
|
||||
| "system"
|
||||
| "agent"
|
||||
| "user"
|
||||
| "tool_status"
|
||||
| "worker_input_request"
|
||||
| "run_divider";
|
||||
role?: "queen" | "worker";
|
||||
/** Which worker thread this message belongs to (worker agent name) */
|
||||
thread?: string;
|
||||
@@ -26,6 +48,8 @@ export interface ChatMessage {
|
||||
createdAt?: number;
|
||||
/** Queen phase active when this message was created */
|
||||
phase?: "planning" | "building" | "staging" | "running";
|
||||
/** Images attached to a user message */
|
||||
images?: ImageContent[];
|
||||
/** Backend node_id that produced this message — used for subagent grouping */
|
||||
nodeId?: string;
|
||||
/** Backend execution_id for this message */
|
||||
@@ -34,7 +58,7 @@ export interface ChatMessage {
|
||||
|
||||
interface ChatPanelProps {
|
||||
messages: ChatMessage[];
|
||||
onSend: (message: string, thread: string) => void;
|
||||
onSend: (message: string, thread: string, images?: ImageContent[]) => void;
|
||||
isWaiting?: boolean;
|
||||
/** When true a worker is thinking (not yet streaming) */
|
||||
isWorkerWaiting?: boolean;
|
||||
@@ -43,6 +67,8 @@ interface ChatPanelProps {
|
||||
activeThread: string;
|
||||
/** When true, the input is disabled (e.g. during loading) */
|
||||
disabled?: boolean;
|
||||
/** When false, the image attach button is hidden (model lacks vision support) */
|
||||
supportsImages?: boolean;
|
||||
/** Called when user clicks the stop button to cancel the queen's current turn */
|
||||
onCancel?: () => void;
|
||||
/** Pending question from ask_user — replaces textarea when present */
|
||||
@@ -50,7 +76,9 @@ interface ChatPanelProps {
|
||||
/** Options for the pending question */
|
||||
pendingOptions?: string[] | null;
|
||||
/** Multiple questions from ask_user_multiple */
|
||||
pendingQuestions?: { id: string; prompt: string; options?: string[] }[] | null;
|
||||
pendingQuestions?:
|
||||
| { id: string; prompt: string; options?: string[] }[]
|
||||
| null;
|
||||
/** Called when user submits an answer to the pending question */
|
||||
onQuestionSubmit?: (answer: string, isOther: boolean) => void;
|
||||
/** Called when user submits answers to multiple questions */
|
||||
@@ -86,7 +114,8 @@ const TOOL_HEX = [
|
||||
|
||||
function toolHex(name: string): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < name.length; i++) hash = (hash * 31 + name.charCodeAt(i)) | 0;
|
||||
for (let i = 0; i < name.length; i++)
|
||||
hash = (hash * 31 + name.charCodeAt(i)) | 0;
|
||||
return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
|
||||
}
|
||||
|
||||
@@ -134,12 +163,18 @@ function ToolActivityRow({ content }: { content: string }) {
|
||||
<span
|
||||
key={`run-${p.name}`}
|
||||
className="inline-flex items-center gap-1 text-[11px] px-2.5 py-0.5 rounded-full"
|
||||
style={{ color: hex, backgroundColor: `${hex}18`, border: `1px solid ${hex}35` }}
|
||||
style={{
|
||||
color: hex,
|
||||
backgroundColor: `${hex}18`,
|
||||
border: `1px solid ${hex}35`,
|
||||
}}
|
||||
>
|
||||
<Loader2 className="w-2.5 h-2.5 animate-spin" />
|
||||
{p.name}
|
||||
{p.count > 1 && (
|
||||
<span className="text-[10px] font-medium opacity-70">×{p.count}</span>
|
||||
<span className="text-[10px] font-medium opacity-70">
|
||||
×{p.count}
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
);
|
||||
@@ -150,7 +185,11 @@ function ToolActivityRow({ content }: { content: string }) {
|
||||
<span
|
||||
key={`done-${p.name}`}
|
||||
className="inline-flex items-center gap-1 text-[11px] px-2.5 py-0.5 rounded-full"
|
||||
style={{ color: hex, backgroundColor: `${hex}18`, border: `1px solid ${hex}35` }}
|
||||
style={{
|
||||
color: hex,
|
||||
backgroundColor: `${hex}18`,
|
||||
border: `1px solid ${hex}35`,
|
||||
}}
|
||||
>
|
||||
<Check className="w-2.5 h-2.5" />
|
||||
{p.name}
|
||||
@@ -165,103 +204,157 @@ function ToolActivityRow({ content }: { content: string }) {
|
||||
);
|
||||
}
|
||||
|
||||
const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: ChatMessage; queenPhase?: "planning" | "building" | "staging" | "running" }) {
|
||||
const isUser = msg.type === "user";
|
||||
const isQueen = msg.role === "queen";
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
const MessageBubble = memo(
|
||||
function MessageBubble({
|
||||
msg,
|
||||
queenPhase,
|
||||
}: {
|
||||
msg: ChatMessage;
|
||||
queenPhase?: "planning" | "building" | "staging" | "running";
|
||||
}) {
|
||||
const isUser = msg.type === "user";
|
||||
const isQueen = msg.role === "queen";
|
||||
const color = getColor(msg.agent, msg.role);
|
||||
|
||||
if (msg.type === "run_divider") {
|
||||
return (
|
||||
<div className="flex items-center gap-3 py-2 my-1">
|
||||
<div className="flex-1 h-px bg-border/60" />
|
||||
<span className="text-[10px] text-muted-foreground font-medium uppercase tracking-wider">
|
||||
{msg.content}
|
||||
</span>
|
||||
<div className="flex-1 h-px bg-border/60" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (msg.type === "system") {
|
||||
return (
|
||||
<div className="flex justify-center py-1">
|
||||
<span className="text-[11px] text-muted-foreground bg-muted/60 px-3 py-1.5 rounded-full">
|
||||
{msg.content}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (msg.type === "tool_status") {
|
||||
return <ToolActivityRow content={msg.content} />;
|
||||
}
|
||||
|
||||
if (isUser) {
|
||||
return (
|
||||
<div className="flex justify-end">
|
||||
<div className="max-w-[75%] bg-primary text-primary-foreground text-sm leading-relaxed rounded-2xl rounded-br-md px-4 py-3">
|
||||
<p className="whitespace-pre-wrap break-words">{msg.content}</p>
|
||||
if (msg.type === "run_divider") {
|
||||
return (
|
||||
<div className="flex items-center gap-3 py-2 my-1">
|
||||
<div className="flex-1 h-px bg-border/60" />
|
||||
<span className="text-[10px] text-muted-foreground font-medium uppercase tracking-wider">
|
||||
{msg.content}
|
||||
</span>
|
||||
<div className="flex-1 h-px bg-border/60" />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
) : (
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color }} />
|
||||
)}
|
||||
</div>
|
||||
<div className={`flex-1 min-w-0 ${isQueen ? "max-w-[85%]" : "max-w-[75%]"}`}>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`} style={{ color }}>
|
||||
{msg.agent}
|
||||
</span>
|
||||
<span
|
||||
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
|
||||
isQueen ? "bg-primary/15 text-primary" : "bg-muted text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{isQueen
|
||||
? ((msg.phase ?? queenPhase) === "running"
|
||||
? "running"
|
||||
: (msg.phase ?? queenPhase) === "staging"
|
||||
? "staging"
|
||||
: (msg.phase ?? queenPhase) === "planning"
|
||||
? "planning"
|
||||
: "building")
|
||||
: "Worker"}
|
||||
if (msg.type === "system") {
|
||||
return (
|
||||
<div className="flex justify-center py-1">
|
||||
<span className="text-[11px] text-muted-foreground bg-muted/60 px-3 py-1.5 rounded-full">
|
||||
{msg.content}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (msg.type === "tool_status") {
|
||||
return <ToolActivityRow content={msg.content} />;
|
||||
}
|
||||
|
||||
if (isUser) {
|
||||
return (
|
||||
<div className="flex justify-end">
|
||||
<div className="max-w-[75%] bg-primary text-primary-foreground text-sm leading-relaxed rounded-2xl rounded-br-md px-4 py-3">
|
||||
{msg.images && msg.images.length > 0 && (
|
||||
<div className="flex flex-wrap gap-2 mb-2">
|
||||
{msg.images.map((img, i) => (
|
||||
<img
|
||||
key={i}
|
||||
src={img.image_url.url}
|
||||
alt={`attachment ${i + 1}`}
|
||||
className="max-h-48 max-w-full rounded-lg object-contain"
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{msg.content && (
|
||||
<p className="whitespace-pre-wrap break-words">{msg.content}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex gap-3">
|
||||
<div
|
||||
className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
|
||||
style={{
|
||||
backgroundColor: `${color}18`,
|
||||
border: `1.5px solid ${color}35`,
|
||||
boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
|
||||
}}
|
||||
>
|
||||
{isQueen ? (
|
||||
<Crown className="w-4 h-4" style={{ color }} />
|
||||
) : (
|
||||
<Cpu className="w-3.5 h-3.5" style={{ color }} />
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={`text-sm leading-relaxed rounded-2xl rounded-tl-md px-4 py-3 ${
|
||||
isQueen ? "border border-primary/20 bg-primary/5" : "bg-muted/60"
|
||||
}`}
|
||||
className={`flex-1 min-w-0 ${isQueen ? "max-w-[85%]" : "max-w-[75%]"}`}
|
||||
>
|
||||
<MarkdownContent content={msg.content} />
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span
|
||||
className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
|
||||
style={{ color }}
|
||||
>
|
||||
{msg.agent}
|
||||
</span>
|
||||
<span
|
||||
className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
|
||||
isQueen
|
||||
? "bg-primary/15 text-primary"
|
||||
: "bg-muted text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{isQueen
|
||||
? (msg.phase ?? queenPhase) === "running"
|
||||
? "running"
|
||||
: (msg.phase ?? queenPhase) === "staging"
|
||||
? "staging"
|
||||
: (msg.phase ?? queenPhase) === "planning"
|
||||
? "planning"
|
||||
: "building"
|
||||
: "Worker"}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
className={`text-sm leading-relaxed rounded-2xl rounded-tl-md px-4 py-3 ${
|
||||
isQueen ? "border border-primary/20 bg-primary/5" : "bg-muted/60"
|
||||
}`}
|
||||
>
|
||||
<MarkdownContent content={msg.content} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}, (prev, next) => prev.msg.id === next.msg.id && prev.msg.content === next.msg.content && prev.msg.phase === next.msg.phase && prev.queenPhase === next.queenPhase);
|
||||
);
|
||||
},
|
||||
(prev, next) =>
|
||||
prev.msg.id === next.msg.id &&
|
||||
prev.msg.content === next.msg.content &&
|
||||
prev.msg.phase === next.msg.phase &&
|
||||
prev.queenPhase === next.queenPhase,
|
||||
);
|
||||
|
||||
export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, pendingQuestions, onQuestionSubmit, onMultiQuestionSubmit, onQuestionDismiss, queenPhase, contextUsage }: ChatPanelProps) {
|
||||
export default function ChatPanel({
|
||||
messages,
|
||||
onSend,
|
||||
isWaiting,
|
||||
isWorkerWaiting,
|
||||
isBusy,
|
||||
activeThread,
|
||||
disabled,
|
||||
onCancel,
|
||||
pendingQuestion,
|
||||
pendingOptions,
|
||||
pendingQuestions,
|
||||
onQuestionSubmit,
|
||||
onMultiQuestionSubmit,
|
||||
onQuestionDismiss,
|
||||
queenPhase,
|
||||
contextUsage,
|
||||
supportsImages = true,
|
||||
}: ChatPanelProps) {
|
||||
const [input, setInput] = useState("");
|
||||
const [pendingImages, setPendingImages] = useState<ImageContent[]>([]);
|
||||
const [readMap, setReadMap] = useState<Record<string, number>>({});
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
const scrollRef = useRef<HTMLDivElement>(null);
|
||||
const stickToBottom = useRef(true);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
const threadMessages = messages.filter((m) => {
|
||||
if (m.type === "system" && !m.thread) return false;
|
||||
@@ -270,7 +363,8 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
// tool-use-only turns that have no visible text. During live operation
|
||||
// tool pills provide context, but on resume the pills are gone so
|
||||
// the empty bubble is meaningless.
|
||||
if (m.role === "queen" && !m.type && (!m.content || !m.content.trim())) return false;
|
||||
if (m.role === "queen" && !m.type && (!m.content || !m.content.trim()))
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -317,7 +411,8 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
// Worker message from a non-subagent node means the graph has
|
||||
// moved on to the next stage. Close the bubble even if some
|
||||
// subagents are still streaming in the background.
|
||||
if (m.role === "worker" && m.nodeId && !m.nodeId.includes(":subagent:")) break;
|
||||
if (m.role === "worker" && m.nodeId && !m.nodeId.includes(":subagent:"))
|
||||
break;
|
||||
|
||||
// Soft interruption (queen output, system, tool_status without
|
||||
// nodeId) — render it normally but keep the subagent run going
|
||||
@@ -382,31 +477,63 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!input.trim()) return;
|
||||
onSend(input.trim(), activeThread);
|
||||
if (!input.trim() && pendingImages.length === 0) return;
|
||||
onSend(
|
||||
input.trim(),
|
||||
activeThread,
|
||||
pendingImages.length > 0 ? pendingImages : undefined,
|
||||
);
|
||||
setInput("");
|
||||
setPendingImages([]);
|
||||
if (textareaRef.current) textareaRef.current.style.height = "auto";
|
||||
};
|
||||
|
||||
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const files = Array.from(e.target.files ?? []);
|
||||
if (files.length === 0) return;
|
||||
files.forEach((file) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (ev) => {
|
||||
const url = ev.target?.result as string;
|
||||
setPendingImages((prev) => [
|
||||
...prev,
|
||||
{ type: "image_url", image_url: { url } },
|
||||
]);
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
// Reset so the same file can be re-selected
|
||||
e.target.value = "";
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full min-w-0">
|
||||
{/* Compact sub-header */}
|
||||
<div className="px-5 pt-4 pb-2 flex items-center gap-2">
|
||||
<p className="text-[11px] text-muted-foreground font-medium uppercase tracking-wider">Conversation</p>
|
||||
<p className="text-[11px] text-muted-foreground font-medium uppercase tracking-wider">
|
||||
Conversation
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Messages */}
|
||||
<div ref={scrollRef} onScroll={handleScroll} className="flex-1 overflow-auto px-5 py-4 space-y-3">
|
||||
<div
|
||||
ref={scrollRef}
|
||||
onScroll={handleScroll}
|
||||
className="flex-1 overflow-auto px-5 py-4 space-y-3"
|
||||
>
|
||||
{renderItems.map((item) =>
|
||||
item.kind === "parallel" ? (
|
||||
<div key={item.groupId}>
|
||||
<ParallelSubagentBubble groupId={item.groupId} groups={item.groups} />
|
||||
<ParallelSubagentBubble
|
||||
groupId={item.groupId}
|
||||
groups={item.groups}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div key={item.msg.id}>
|
||||
<MessageBubble msg={item.msg} queenPhase={queenPhase} />
|
||||
</div>
|
||||
)
|
||||
),
|
||||
)}
|
||||
|
||||
{/* Show typing indicator while waiting for first queen response (disabled + empty chat) */}
|
||||
@@ -424,9 +551,18 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
</div>
|
||||
<div className="border border-primary/20 bg-primary/5 rounded-2xl rounded-tl-md px-4 py-3">
|
||||
<div className="flex gap-1.5">
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "0ms" }} />
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "150ms" }} />
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "300ms" }} />
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "0ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "150ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "300ms" }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -444,9 +580,18 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
</div>
|
||||
<div className="bg-muted/60 rounded-2xl rounded-tl-md px-4 py-3">
|
||||
<div className="flex gap-1.5">
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "0ms" }} />
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "150ms" }} />
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce" style={{ animationDelay: "300ms" }} />
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "0ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "150ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 rounded-full bg-muted-foreground animate-bounce"
|
||||
style={{ animationDelay: "300ms" }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -458,46 +603,84 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
{(() => {
|
||||
if (!contextUsage) return null;
|
||||
const queenUsage = contextUsage["__queen__"];
|
||||
const workerEntries = Object.entries(contextUsage).filter(([k]) => k !== "__queen__");
|
||||
const workerUsage = workerEntries.length > 0
|
||||
? workerEntries.reduce((best, [, v]) => (v.usagePct > best.usagePct ? v : best), workerEntries[0][1])
|
||||
: undefined;
|
||||
const workerEntries = Object.entries(contextUsage).filter(
|
||||
([k]) => k !== "__queen__",
|
||||
);
|
||||
const workerUsage =
|
||||
workerEntries.length > 0
|
||||
? workerEntries.reduce(
|
||||
(best, [, v]) => (v.usagePct > best.usagePct ? v : best),
|
||||
workerEntries[0][1],
|
||||
)
|
||||
: undefined;
|
||||
if (!queenUsage && !workerUsage) return null;
|
||||
return (
|
||||
<div className="flex items-center gap-3 mx-4 px-3 py-1 rounded-lg bg-muted/30 border border-border/20 group/ctx flex-shrink-0">
|
||||
{queenUsage && (
|
||||
<div className="flex items-center gap-2 flex-1 min-w-0" title={`Queen: ${(queenUsage.estimatedTokens / 1000).toFixed(1)}k / ${(queenUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${queenUsage.messageCount} messages`}>
|
||||
<Crown className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(45,95%,58%)" }} />
|
||||
<div
|
||||
className="flex items-center gap-2 flex-1 min-w-0"
|
||||
title={`Queen: ${(queenUsage.estimatedTokens / 1000).toFixed(1)}k / ${(queenUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${queenUsage.messageCount} messages`}
|
||||
>
|
||||
<Crown
|
||||
className="w-3 h-3 flex-shrink-0"
|
||||
style={{ color: "hsl(45,95%,58%)" }}
|
||||
/>
|
||||
<div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(queenUsage.usagePct, 100)}%`,
|
||||
backgroundColor: queenUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : queenUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(45,95%,58%)",
|
||||
backgroundColor:
|
||||
queenUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: queenUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(45,95%,58%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
|
||||
<span className="group-hover/ctx:hidden">{queenUsage.usagePct}%</span>
|
||||
<span className="hidden group-hover/ctx:inline">{(queenUsage.estimatedTokens / 1000).toFixed(1)}k / {(queenUsage.maxTokens / 1000).toFixed(0)}k</span>
|
||||
<span className="group-hover/ctx:hidden">
|
||||
{queenUsage.usagePct}%
|
||||
</span>
|
||||
<span className="hidden group-hover/ctx:inline">
|
||||
{(queenUsage.estimatedTokens / 1000).toFixed(1)}k /{" "}
|
||||
{(queenUsage.maxTokens / 1000).toFixed(0)}k
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
{workerUsage && (
|
||||
<div className="flex items-center gap-2 flex-1 min-w-0" title={`Worker: ${(workerUsage.estimatedTokens / 1000).toFixed(1)}k / ${(workerUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${workerUsage.messageCount} messages`}>
|
||||
<Cpu className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(220,60%,55%)" }} />
|
||||
<div
|
||||
className="flex items-center gap-2 flex-1 min-w-0"
|
||||
title={`Worker: ${(workerUsage.estimatedTokens / 1000).toFixed(1)}k / ${(workerUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${workerUsage.messageCount} messages`}
|
||||
>
|
||||
<Cpu
|
||||
className="w-3 h-3 flex-shrink-0"
|
||||
style={{ color: "hsl(220,60%,55%)" }}
|
||||
/>
|
||||
<div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(workerUsage.usagePct, 100)}%`,
|
||||
backgroundColor: workerUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : workerUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(220,60%,55%)",
|
||||
backgroundColor:
|
||||
workerUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: workerUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(220,60%,55%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
|
||||
<span className="group-hover/ctx:hidden">{workerUsage.usagePct}%</span>
|
||||
<span className="hidden group-hover/ctx:inline">{(workerUsage.estimatedTokens / 1000).toFixed(1)}k / {(workerUsage.maxTokens / 1000).toFixed(0)}k</span>
|
||||
<span className="group-hover/ctx:hidden">
|
||||
{workerUsage.usagePct}%
|
||||
</span>
|
||||
<span className="hidden group-hover/ctx:inline">
|
||||
{(workerUsage.estimatedTokens / 1000).toFixed(1)}k /{" "}
|
||||
{(workerUsage.maxTokens / 1000).toFixed(0)}k
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
@@ -506,7 +689,9 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
})()}
|
||||
|
||||
{/* Input area — question widget replaces textarea when a question is pending */}
|
||||
{pendingQuestions && pendingQuestions.length >= 2 && onMultiQuestionSubmit ? (
|
||||
{pendingQuestions &&
|
||||
pendingQuestions.length >= 2 &&
|
||||
onMultiQuestionSubmit ? (
|
||||
<MultiQuestionWidget
|
||||
questions={pendingQuestions}
|
||||
onSubmit={onMultiQuestionSubmit}
|
||||
@@ -521,7 +706,47 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
/>
|
||||
) : (
|
||||
<form onSubmit={handleSubmit} className="p-4">
|
||||
{/* Image preview strip */}
|
||||
{pendingImages.length > 0 && (
|
||||
<div className="flex flex-wrap gap-2 mb-2 px-1">
|
||||
{pendingImages.map((img, i) => (
|
||||
<div key={i} className="relative group">
|
||||
<img
|
||||
src={img.image_url.url}
|
||||
alt={`preview ${i + 1}`}
|
||||
className="h-16 w-16 object-cover rounded-lg border border-border"
|
||||
/>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() =>
|
||||
setPendingImages((prev) => prev.filter((_, j) => j !== i))
|
||||
}
|
||||
className="absolute -top-1.5 -right-1.5 w-4 h-4 rounded-full bg-destructive text-destructive-foreground flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity"
|
||||
>
|
||||
<X className="w-2.5 h-2.5" />
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-center gap-3 bg-muted/40 rounded-xl px-4 py-2.5 border border-border focus-within:border-primary/40 transition-colors">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="image/*"
|
||||
multiple
|
||||
className="hidden"
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<button
|
||||
type="button"
|
||||
disabled={disabled || !supportsImages}
|
||||
onClick={() => supportsImages && fileInputRef.current?.click()}
|
||||
className="flex-shrink-0 p-1 rounded-md text-muted-foreground hover:text-foreground disabled:opacity-30 transition-colors"
|
||||
title={supportsImages ? "Attach image" : "Image not supported by the current model"}
|
||||
>
|
||||
<Paperclip className="w-4 h-4" />
|
||||
</button>
|
||||
<textarea
|
||||
ref={textareaRef}
|
||||
rows={1}
|
||||
@@ -538,7 +763,9 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
handleSubmit(e);
|
||||
}
|
||||
}}
|
||||
placeholder={disabled ? "Connecting to agent..." : "Message Queen Bee..."}
|
||||
placeholder={
|
||||
disabled ? "Connecting to agent..." : "Message Queen Bee..."
|
||||
}
|
||||
disabled={disabled}
|
||||
className="flex-1 bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-50 disabled:cursor-not-allowed resize-none overflow-y-auto"
|
||||
/>
|
||||
@@ -553,7 +780,9 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
) : (
|
||||
<button
|
||||
type="submit"
|
||||
disabled={!input.trim() || disabled}
|
||||
disabled={
|
||||
(!input.trim() && pendingImages.length === 0) || disabled
|
||||
}
|
||||
className="p-2 rounded-lg bg-primary text-primary-foreground disabled:opacity-30 hover:opacity-90 transition-opacity"
|
||||
>
|
||||
<Send className="w-4 h-4" />
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
||||
import ReactDOM from "react-dom";
|
||||
import { useSearchParams, useNavigate } from "react-router-dom";
|
||||
import { Plus, KeyRound, Sparkles, Layers, ChevronLeft, Bot, Loader2, WifiOff, X } from "lucide-react";
|
||||
import { Plus, KeyRound, Sparkles, Layers, ChevronLeft, Bot, Loader2, WifiOff, X, FolderOpen } from "lucide-react";
|
||||
import type { GraphNode, NodeStatus } from "@/components/graph-types";
|
||||
import DraftGraph from "@/components/DraftGraph";
|
||||
import ChatPanel, { type ChatMessage } from "@/components/ChatPanel";
|
||||
@@ -354,6 +354,8 @@ interface AgentBackendState {
|
||||
pendingQuestionSource: "queen" | "worker" | null;
|
||||
/** Per-node context window usage (from context_usage_updated events) */
|
||||
contextUsage: Record<string, { usagePct: number; messageCount: number; estimatedTokens: number; maxTokens: number }>;
|
||||
/** Whether the queen's LLM supports image content — false disables the attach button */
|
||||
queenSupportsImages: boolean;
|
||||
}
|
||||
|
||||
function defaultAgentState(): AgentBackendState {
|
||||
@@ -392,6 +394,7 @@ function defaultAgentState(): AgentBackendState {
|
||||
pendingQuestions: null,
|
||||
pendingQuestionSource: null,
|
||||
contextUsage: {},
|
||||
queenSupportsImages: true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -923,6 +926,7 @@ export default function Workspace() {
|
||||
queenReady: true,
|
||||
queenPhase: qPhase,
|
||||
queenBuilding: qPhase === "building",
|
||||
queenSupportsImages: liveSession.queen_supports_images !== false,
|
||||
// Restore flowchart overlay from persisted events
|
||||
...(restoredFlowchartMap ? { flowchartMap: restoredFlowchartMap } : {}),
|
||||
...(restoredOriginalDraft ? { originalDraft: restoredOriginalDraft, draftGraph: null } : {}),
|
||||
@@ -1122,6 +1126,7 @@ export default function Workspace() {
|
||||
displayName,
|
||||
queenPhase: initialPhase,
|
||||
queenBuilding: initialPhase === "building",
|
||||
queenSupportsImages: session.queen_supports_images !== false,
|
||||
// Restore flowchart overlay from persisted events
|
||||
...(restoredFlowchartMap ? { flowchartMap: restoredFlowchartMap } : {}),
|
||||
...(restoredOriginalDraft ? { originalDraft: restoredOriginalDraft, draftGraph: null } : {}),
|
||||
@@ -2613,7 +2618,7 @@ export default function Workspace() {
|
||||
});
|
||||
|
||||
// --- handleSend ---
|
||||
const handleSend = useCallback((text: string, thread: string) => {
|
||||
const handleSend = useCallback((text: string, thread: string, images?: import("@/components/ChatPanel").ImageContent[]) => {
|
||||
if (!activeSession) return;
|
||||
const state = agentStates[activeWorker];
|
||||
|
||||
@@ -2679,6 +2684,7 @@ export default function Workspace() {
|
||||
const userMsg: ChatMessage = {
|
||||
id: makeId(), agent: "You", agentColor: "",
|
||||
content: text, timestamp: "", type: "user", thread, createdAt: Date.now(),
|
||||
images,
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
@@ -2690,7 +2696,7 @@ export default function Workspace() {
|
||||
updateAgentState(activeWorker, { isTyping: true, queenIsTyping: true });
|
||||
|
||||
if (state?.sessionId && state?.ready) {
|
||||
executionApi.chat(state.sessionId, text).catch((err: unknown) => {
|
||||
executionApi.chat(state.sessionId, text, images).catch((err: unknown) => {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errorChatMsg: ChatMessage = {
|
||||
id: makeId(), agent: "System", agentColor: "",
|
||||
@@ -3106,6 +3112,16 @@ export default function Workspace() {
|
||||
<KeyRound className="w-3.5 h-3.5" />
|
||||
Credentials
|
||||
</button>
|
||||
{activeAgentState?.sessionId && (
|
||||
<button
|
||||
onClick={() => sessionsApi.revealFolder(activeAgentState.sessionId!).catch(() => {})}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors flex-shrink-0"
|
||||
title="Open session data folder"
|
||||
>
|
||||
<FolderOpen className="w-3.5 h-3.5" />
|
||||
Data
|
||||
</button>
|
||||
)}
|
||||
</TopBar>
|
||||
|
||||
{/* Main content area */}
|
||||
@@ -3224,6 +3240,7 @@ export default function Workspace() {
|
||||
onMultiQuestionSubmit={handleMultiQuestionAnswer}
|
||||
onQuestionDismiss={handleQuestionDismiss}
|
||||
contextUsage={activeAgentState?.contextUsage}
|
||||
supportsImages={activeAgentState?.queenSupportsImages ?? true}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Tests for LLM model capability checks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.llm.capabilities import supports_image_tool_results
|
||||
|
||||
|
||||
class TestSupportsImageToolResults:
|
||||
"""Verify the deny-list correctly identifies models that can't handle images."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"gpt-4-turbo",
|
||||
"openai/gpt-4o",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"claude-haiku-4-5-20251001",
|
||||
"gemini/gemini-1.5-pro",
|
||||
"google/gemini-1.5-flash",
|
||||
"mistral/mistral-large",
|
||||
"groq/llama3-70b",
|
||||
"together/meta-llama/Llama-3-70b",
|
||||
"fireworks_ai/llama-v3-70b",
|
||||
"azure/gpt-4o",
|
||||
"kimi/claude-sonnet-4-20250514",
|
||||
"hive/claude-sonnet-4-20250514",
|
||||
],
|
||||
)
|
||||
def test_supported_models(self, model: str):
|
||||
assert supports_image_tool_results(model) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"deepseek/deepseek-chat",
|
||||
"deepseek/deepseek-coder",
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
"ollama/llama3",
|
||||
"ollama/mistral",
|
||||
"ollama_chat/llama3",
|
||||
"lm_studio/my-model",
|
||||
"vllm/meta-llama/Llama-3-70b",
|
||||
"llamacpp/model",
|
||||
"cerebras/llama3-70b",
|
||||
],
|
||||
)
|
||||
def test_unsupported_models(self, model: str):
|
||||
assert supports_image_tool_results(model) is False
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert supports_image_tool_results("DeepSeek/deepseek-chat") is False
|
||||
assert supports_image_tool_results("OLLAMA/llama3") is False
|
||||
assert supports_image_tool_results("GPT-4o") is True
|
||||
@@ -48,6 +48,9 @@ dev = [
|
||||
sandbox = [
|
||||
"RestrictedPython>=7.0",
|
||||
]
|
||||
browser = [
|
||||
"pillow>=10.0.0",
|
||||
]
|
||||
ocr = [
|
||||
"pytesseract>=0.3.10",
|
||||
"pillow>=10.0.0",
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
"""Ref system for aria snapshots.
|
||||
|
||||
Assigns short `[ref=eN]` markers to interactive elements in Playwright's
|
||||
aria_snapshot() output so the LLM can reference elements by ref instead of
|
||||
constructing fragile CSS selectors.
|
||||
|
||||
Usage:
|
||||
annotated, ref_map = annotate_snapshot(raw_snapshot)
|
||||
# ... later, when the LLM says selector="e5" ...
|
||||
playwright_selector = resolve_ref("e5", ref_map)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .session import BrowserSession
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Role sets (matching Playwright's aria roles that matter for interaction)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INTERACTIVE_ROLES: frozenset[str] = frozenset(
|
||||
{
|
||||
"button",
|
||||
"checkbox",
|
||||
"combobox",
|
||||
"link",
|
||||
"listbox",
|
||||
"menuitem",
|
||||
"menuitemcheckbox",
|
||||
"menuitemradio",
|
||||
"option",
|
||||
"radio",
|
||||
"scrollbar",
|
||||
"searchbox",
|
||||
"slider",
|
||||
"spinbutton",
|
||||
"switch",
|
||||
"tab",
|
||||
"textbox",
|
||||
"treeitem",
|
||||
}
|
||||
)
|
||||
|
||||
NAMED_CONTENT_ROLES: frozenset[str] = frozenset(
|
||||
{
|
||||
"cell",
|
||||
"heading",
|
||||
"img",
|
||||
}
|
||||
)
|
||||
|
||||
# Regex: captures indent, role, optional quoted name, and trailing text.
|
||||
# Example line: " - button \"Submit\" [disabled]"
|
||||
# group(1)=indent " ", group(2)=role "button",
|
||||
# group(3)=name "Submit" (or None), group(4)=rest " [disabled]"
|
||||
_LINE_RE = re.compile(r"^(\s*-\s+)(\w+)(?:\s+\"([^\"]*)\")?(.*?)$")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RefEntry:
|
||||
"""A single ref entry mapping to a Playwright role selector."""
|
||||
|
||||
role: str
|
||||
name: str | None
|
||||
nth: int
|
||||
|
||||
|
||||
# ref_id (e.g. "e0") -> RefEntry
|
||||
RefMap = dict[str, RefEntry]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# annotate_snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def annotate_snapshot(snapshot: str) -> tuple[str, RefMap]:
|
||||
"""Inject ``[ref=eN]`` markers into an aria snapshot.
|
||||
|
||||
Returns:
|
||||
(annotated_text, ref_map) where ref_map maps ref ids to RefEntry.
|
||||
"""
|
||||
lines = snapshot.split("\n")
|
||||
|
||||
# First pass: identify which lines get refs and count (role, name) pairs
|
||||
# for nth disambiguation.
|
||||
candidates: list[tuple[int, str, str | None]] = [] # (line_idx, role, name)
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
m = _LINE_RE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
role = m.group(2)
|
||||
name = m.group(3) # None if no quoted name
|
||||
|
||||
if role in INTERACTIVE_ROLES or (role in NAMED_CONTENT_ROLES and name):
|
||||
candidates.append((i, role, name))
|
||||
|
||||
# Second pass: assign refs with nth indices.
|
||||
ref_map: RefMap = {}
|
||||
pair_seen: dict[tuple[str, str | None], int] = {}
|
||||
ref_counter = 0
|
||||
|
||||
for line_idx, role, name in candidates:
|
||||
key = (role, name)
|
||||
nth = pair_seen.get(key, 0)
|
||||
pair_seen[key] = nth + 1
|
||||
|
||||
ref_id = f"e{ref_counter}"
|
||||
ref_counter += 1
|
||||
|
||||
ref_map[ref_id] = RefEntry(role=role, name=name, nth=nth)
|
||||
|
||||
# Inject [ref=eN] at end of line (before any trailing whitespace)
|
||||
lines[line_idx] = lines[line_idx].rstrip() + f" [ref={ref_id}]"
|
||||
|
||||
return "\n".join(lines), ref_map
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# resolve_ref
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REF_PATTERN = re.compile(r"^e\d+$")
|
||||
|
||||
|
||||
def resolve_ref(selector: str, ref_map: RefMap | None) -> str:
|
||||
"""Resolve a ref id (e.g. ``"e5"``) to a Playwright role selector.
|
||||
|
||||
If *selector* doesn't look like a ref (``e\\d+``), it's returned as-is
|
||||
so that plain CSS selectors keep working.
|
||||
|
||||
Raises:
|
||||
ValueError: If the ref is not found or no snapshot has been taken.
|
||||
"""
|
||||
if not _REF_PATTERN.match(selector):
|
||||
return selector # Pass through CSS / XPath / role selectors
|
||||
|
||||
if ref_map is None:
|
||||
raise ValueError(
|
||||
f"Ref '{selector}' used but no snapshot has been taken yet. "
|
||||
"Call browser_snapshot first."
|
||||
)
|
||||
|
||||
entry = ref_map.get(selector)
|
||||
if entry is None:
|
||||
valid = ", ".join(sorted(ref_map.keys(), key=lambda k: int(k[1:])))
|
||||
raise ValueError(
|
||||
f"Ref '{selector}' not found. Valid refs: {valid}. "
|
||||
"The page may have changed — take a new snapshot."
|
||||
)
|
||||
|
||||
# Build Playwright role selector
|
||||
if entry.name is not None:
|
||||
escaped_name = entry.name.replace("\\", "\\\\").replace('"', '\\"')
|
||||
sel = f'role={entry.role}[name="{escaped_name}"]'
|
||||
else:
|
||||
sel = f"role={entry.role}"
|
||||
|
||||
# Always include nth to disambiguate
|
||||
sel += f" >> nth={entry.nth}"
|
||||
return sel
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Convenience wrapper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_selector(
|
||||
selector: str,
|
||||
session: BrowserSession,
|
||||
target_id: str | None,
|
||||
) -> str:
|
||||
"""Resolve a selector that might be a ref, using the session's ref maps.
|
||||
|
||||
Args:
|
||||
selector: A CSS selector or ref id (e.g. ``"e5"``).
|
||||
session: The current BrowserSession.
|
||||
target_id: The target page id (falls back to session.active_page_id).
|
||||
"""
|
||||
tid = target_id or session.active_page_id
|
||||
ref_map = session.ref_maps.get(tid) if tid else None
|
||||
return resolve_ref(selector, ref_map)
|
||||
@@ -353,6 +353,7 @@ class BrowserSession:
|
||||
active_page_id: str | None = None
|
||||
console_messages: dict[str, list[dict]] = field(default_factory=dict)
|
||||
page_meta: dict[str, TabMeta] = field(default_factory=dict)
|
||||
ref_maps: dict[str, dict] = field(default_factory=dict) # target_id → RefMap
|
||||
_playwright: Any = None
|
||||
_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
||||
|
||||
@@ -447,6 +448,7 @@ class BrowserSession:
|
||||
self.active_page_id = None
|
||||
self.console_messages.clear()
|
||||
self.page_meta.clear()
|
||||
self.ref_maps.clear()
|
||||
|
||||
async def start(self, headless: bool = True, persistent: bool = True) -> dict:
|
||||
"""
|
||||
@@ -623,6 +625,7 @@ class BrowserSession:
|
||||
self.active_page_id = None
|
||||
self.console_messages.clear()
|
||||
self.page_meta.clear()
|
||||
self.ref_maps.clear()
|
||||
self.user_data_dir = None
|
||||
self.persistent = False
|
||||
|
||||
@@ -801,6 +804,7 @@ class BrowserSession:
|
||||
self.pages.pop(target_id, None)
|
||||
self.console_messages.pop(target_id, None)
|
||||
self.page_meta.pop(target_id, None)
|
||||
self.ref_maps.pop(target_id, None)
|
||||
|
||||
if self.active_page_id == target_id:
|
||||
self.active_page_id = next(iter(self.pages), None)
|
||||
|
||||
@@ -16,6 +16,7 @@ from playwright.async_api import (
|
||||
)
|
||||
|
||||
from ..highlight import highlight_element
|
||||
from ..refs import resolve_selector
|
||||
from ..session import DEFAULT_TIMEOUT_MS, get_session
|
||||
|
||||
|
||||
@@ -52,6 +53,10 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
if selector:
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
await page.wait_for_selector(selector, timeout=timeout_ms)
|
||||
return {"ok": True, "action": "wait", "condition": "selector", "selector": selector}
|
||||
elif text:
|
||||
@@ -122,6 +127,11 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
element = await page.wait_for_selector(selector, timeout=timeout_ms)
|
||||
if not element:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
@@ -160,6 +170,11 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
element = await page.wait_for_selector(selector, timeout=timeout_ms)
|
||||
if not element:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
@@ -238,6 +253,11 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
if not Path(path).exists():
|
||||
return {"ok": False, "error": f"File not found: {path}"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await highlight_element(page, selector)
|
||||
|
||||
element = await page.wait_for_selector(selector, timeout=timeout_ms)
|
||||
|
||||
@@ -7,14 +7,113 @@ Tools for extracting content and capturing page state.
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from mcp.types import ImageContent, TextContent
|
||||
from playwright.async_api import Error as PlaywrightError
|
||||
|
||||
from ..session import get_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Screenshot normalization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_QUALITY_STEPS = (85, 70, 50)
|
||||
_MIN_DIMENSION = 400
|
||||
_DIMENSION_STEP = 200
|
||||
|
||||
|
||||
def _normalize_screenshot(
|
||||
raw_bytes: bytes,
|
||||
image_type: str,
|
||||
*,
|
||||
max_dimension: int = 2000,
|
||||
max_bytes: int = 5_000_000,
|
||||
) -> tuple[bytes, str]:
|
||||
"""Normalize a screenshot to fit within size and dimension limits.
|
||||
|
||||
Progressively resizes and compresses to JPEG until the image fits
|
||||
under *max_bytes* and *max_dimension*. If Pillow is not installed
|
||||
the original bytes are returned unchanged.
|
||||
|
||||
Args:
|
||||
raw_bytes: Raw PNG or JPEG image bytes from Playwright.
|
||||
image_type: Original format (``"png"`` or ``"jpeg"``).
|
||||
max_dimension: Maximum width or height in pixels.
|
||||
max_bytes: Maximum file size in bytes.
|
||||
|
||||
Returns:
|
||||
``(normalized_bytes, image_type)`` where *image_type* may change
|
||||
to ``"jpeg"`` if compression was applied.
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
logger.debug("Pillow not installed — skipping screenshot normalization")
|
||||
return raw_bytes, image_type
|
||||
|
||||
try:
|
||||
img = Image.open(io.BytesIO(raw_bytes))
|
||||
width, height = img.size
|
||||
max_dim = max(width, height)
|
||||
|
||||
# Already within limits — return as-is
|
||||
if len(raw_bytes) <= max_bytes and max_dim <= max_dimension:
|
||||
return raw_bytes, image_type
|
||||
|
||||
# Build candidate dimensions (descending), skip anything >= original
|
||||
candidates = [
|
||||
d for d in range(max_dimension, _MIN_DIMENSION - 1, -_DIMENSION_STEP) if d < max_dim
|
||||
]
|
||||
# If the original is already <= max_dimension but over max_bytes,
|
||||
# still try compressing at original size first.
|
||||
if max_dim <= max_dimension:
|
||||
candidates = [max_dim] + candidates
|
||||
|
||||
smallest: tuple[bytes, int] | None = None
|
||||
|
||||
for side in candidates:
|
||||
# Re-open from source each iteration (thumbnail is destructive)
|
||||
img = Image.open(io.BytesIO(raw_bytes))
|
||||
img.thumbnail((side, side), Image.LANCZOS)
|
||||
|
||||
# JPEG doesn't support alpha
|
||||
if img.mode in ("RGBA", "LA", "P"):
|
||||
img = img.convert("RGB")
|
||||
|
||||
for quality in _QUALITY_STEPS:
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="JPEG", quality=quality, optimize=True)
|
||||
out_bytes = buf.getvalue()
|
||||
|
||||
if smallest is None or len(out_bytes) < smallest[1]:
|
||||
smallest = (out_bytes, len(out_bytes))
|
||||
|
||||
if len(out_bytes) <= max_bytes:
|
||||
return out_bytes, "jpeg"
|
||||
|
||||
# Nothing fit — return the smallest we produced
|
||||
if smallest is not None:
|
||||
logger.warning(
|
||||
"Screenshot normalization: could not fit under %d bytes (best: %d bytes)",
|
||||
max_bytes,
|
||||
smallest[1],
|
||||
)
|
||||
return smallest[0], "jpeg"
|
||||
|
||||
return raw_bytes, image_type
|
||||
|
||||
except Exception:
|
||||
logger.warning("Screenshot normalization failed — returning original", exc_info=True)
|
||||
return raw_bytes, image_type
|
||||
|
||||
|
||||
def _format_ax_tree(nodes: list[dict[str, Any]]) -> str:
|
||||
"""Format a CDP Accessibility.getFullAXTree result into an indented text tree.
|
||||
@@ -102,10 +201,13 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
full_page: bool = False,
|
||||
selector: str | None = None,
|
||||
image_type: Literal["png", "jpeg"] = "png",
|
||||
) -> dict:
|
||||
) -> list:
|
||||
"""
|
||||
Take a screenshot of the current page.
|
||||
|
||||
Returns the screenshot as an image the LLM can see, alongside
|
||||
text metadata (URL, size, etc.).
|
||||
|
||||
Args:
|
||||
target_id: Tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
@@ -114,18 +216,32 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
image_type: Image format - png or jpeg (default: png)
|
||||
|
||||
Returns:
|
||||
Dict with screenshot data (base64 encoded) and metadata
|
||||
List of content blocks: text metadata + image
|
||||
"""
|
||||
try:
|
||||
session = get_session(profile)
|
||||
page = session.get_page(target_id)
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=json.dumps({"ok": False, "error": "No active tab"})
|
||||
)
|
||||
]
|
||||
|
||||
if selector:
|
||||
from ..refs import resolve_selector
|
||||
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
element = await page.query_selector(selector)
|
||||
if not element:
|
||||
return {"ok": False, "error": f"Element not found: {selector}"}
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{"ok": False, "error": f"Element not found: {selector}"}
|
||||
),
|
||||
)
|
||||
]
|
||||
screenshot_bytes = await element.screenshot(type=image_type)
|
||||
else:
|
||||
screenshot_bytes = await page.screenshot(
|
||||
@@ -133,16 +249,31 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
type=image_type,
|
||||
)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"targetId": target_id or session.active_page_id,
|
||||
"url": page.url,
|
||||
"imageType": image_type,
|
||||
"imageBase64": base64.b64encode(screenshot_bytes).decode(),
|
||||
"size": len(screenshot_bytes),
|
||||
}
|
||||
normalized_bytes, normalized_type = _normalize_screenshot(screenshot_bytes, image_type)
|
||||
meta = json.dumps(
|
||||
{
|
||||
"ok": True,
|
||||
"targetId": target_id or session.active_page_id,
|
||||
"url": page.url,
|
||||
"imageType": normalized_type,
|
||||
"size": len(normalized_bytes),
|
||||
"originalSize": len(screenshot_bytes),
|
||||
}
|
||||
)
|
||||
return [
|
||||
TextContent(type="text", text=meta),
|
||||
ImageContent(
|
||||
type="image",
|
||||
data=base64.b64encode(normalized_bytes).decode(),
|
||||
mimeType=f"image/{normalized_type}",
|
||||
),
|
||||
]
|
||||
except PlaywrightError as e:
|
||||
return {"ok": False, "error": f"Browser error: {e!s}"}
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=json.dumps({"ok": False, "error": f"Browser error: {e!s}"})
|
||||
)
|
||||
]
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_snapshot(
|
||||
@@ -196,6 +327,13 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
await cdp.detach()
|
||||
else:
|
||||
snapshot = await page.locator(":root").aria_snapshot()
|
||||
# Annotate with [ref=eN] markers for interactive elements
|
||||
from ..refs import annotate_snapshot
|
||||
|
||||
snapshot, ref_map = annotate_snapshot(snapshot)
|
||||
tid = target_id or session.active_page_id
|
||||
if tid:
|
||||
session.ref_maps[tid] = ref_map
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
|
||||
@@ -17,7 +17,8 @@ from playwright.async_api import (
|
||||
)
|
||||
|
||||
from ..highlight import highlight_coordinate, highlight_element
|
||||
from ..session import DEFAULT_TIMEOUT_MS, get_session
|
||||
from ..refs import annotate_snapshot, resolve_selector
|
||||
from ..session import DEFAULT_TIMEOUT_MS, BrowserSession, get_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,6 +28,8 @@ _AUTO_SNAPSHOT_MAX_CHARS = 4000
|
||||
async def _auto_snapshot(
|
||||
page: Page,
|
||||
*,
|
||||
session: BrowserSession | None = None,
|
||||
target_id: str | None = None,
|
||||
wait_for_nav: bool = False,
|
||||
max_chars: int = _AUTO_SNAPSHOT_MAX_CHARS,
|
||||
) -> str | None:
|
||||
@@ -34,6 +37,8 @@ async def _auto_snapshot(
|
||||
|
||||
Args:
|
||||
page: Playwright Page instance.
|
||||
session: BrowserSession to store ref maps in.
|
||||
target_id: Target page id for ref map storage.
|
||||
wait_for_nav: If True, briefly wait for any in-flight navigation to
|
||||
settle before snapshotting. Used after click actions that may
|
||||
trigger page navigation.
|
||||
@@ -48,6 +53,14 @@ async def _auto_snapshot(
|
||||
except Exception:
|
||||
pass # No navigation happened — that's fine
|
||||
snapshot = await page.locator(":root").aria_snapshot()
|
||||
|
||||
# Annotate with refs before truncation so the full RefMap is captured
|
||||
if snapshot and session:
|
||||
snapshot, ref_map = annotate_snapshot(snapshot)
|
||||
tid = target_id or session.active_page_id
|
||||
if tid:
|
||||
session.ref_maps[tid] = ref_map
|
||||
|
||||
if snapshot and max_chars > 0 and len(snapshot) > max_chars:
|
||||
snapshot = (
|
||||
snapshot[:max_chars]
|
||||
@@ -96,6 +109,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await highlight_element(page, selector)
|
||||
|
||||
if double_click:
|
||||
@@ -105,7 +123,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
result: dict = {"ok": True, "action": "click", "selector": selector}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page, wait_for_nav=True)
|
||||
snapshot = await _auto_snapshot(
|
||||
page,
|
||||
session=session,
|
||||
target_id=target_id,
|
||||
wait_for_nav=True,
|
||||
)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -151,7 +174,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
await page.mouse.click(x, y, button=button)
|
||||
result: dict = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page, wait_for_nav=True)
|
||||
snapshot = await _auto_snapshot(
|
||||
page,
|
||||
session=session,
|
||||
target_id=target_id,
|
||||
wait_for_nav=True,
|
||||
)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -194,6 +222,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await highlight_element(page, selector)
|
||||
|
||||
if clear_first:
|
||||
@@ -202,7 +235,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
await page.type(selector, text, delay=delay_ms, timeout=timeout_ms)
|
||||
result: dict = {"ok": True, "action": "type", "selector": selector, "length": len(text)}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page)
|
||||
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -244,12 +277,17 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await highlight_element(page, selector)
|
||||
|
||||
await page.fill(selector, value, timeout=timeout_ms)
|
||||
result: dict = {"ok": True, "action": "fill", "selector": selector}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page)
|
||||
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -287,6 +325,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
if selector:
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
await page.press(selector, key, timeout=timeout_ms)
|
||||
else:
|
||||
await page.keyboard.press(key)
|
||||
@@ -322,6 +364,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await page.hover(selector, timeout=timeout_ms)
|
||||
return {"ok": True, "action": "hover", "selector": selector}
|
||||
except PlaywrightTimeout:
|
||||
@@ -360,6 +407,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
selected = await page.select_option(selector, values, timeout=timeout_ms)
|
||||
result: dict = {
|
||||
"ok": True,
|
||||
@@ -368,7 +420,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
"selected": selected,
|
||||
}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page)
|
||||
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -422,6 +474,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
delta_x = -amount
|
||||
|
||||
if selector:
|
||||
try:
|
||||
selector = resolve_selector(selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
element = await page.query_selector(selector)
|
||||
if element:
|
||||
await element.evaluate(f"e => e.scrollBy({delta_x}, {delta_y})")
|
||||
@@ -435,7 +491,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
"amount": amount,
|
||||
}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page)
|
||||
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
@@ -474,6 +530,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not page:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
start_selector = resolve_selector(start_selector, session, target_id)
|
||||
end_selector = resolve_selector(end_selector, session, target_id)
|
||||
except ValueError as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
await page.drag_and_drop(
|
||||
start_selector,
|
||||
end_selector,
|
||||
@@ -486,7 +548,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
"to": end_selector,
|
||||
}
|
||||
if auto_snapshot:
|
||||
snapshot = await _auto_snapshot(page)
|
||||
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
|
||||
if snapshot:
|
||||
result["snapshot"] = snapshot
|
||||
result["url"] = page.url
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
"""Tests for the browser ref system (annotate_snapshot / resolve_ref)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from gcu.browser.refs import (
|
||||
RefEntry,
|
||||
annotate_snapshot,
|
||||
resolve_ref,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# annotate_snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SAMPLE_SNAPSHOT = """\
|
||||
- navigation "Main":
|
||||
- link "Home"
|
||||
- link "About"
|
||||
- main:
|
||||
- heading "Welcome"
|
||||
- textbox "Search"
|
||||
- button "Submit"
|
||||
- paragraph: some text here
|
||||
- img "Logo"
|
||||
- list:
|
||||
- listitem:
|
||||
- link "Item 1"
|
||||
- listitem:
|
||||
- link "Item 2\""""
|
||||
|
||||
|
||||
class TestAnnotateSnapshot:
|
||||
def test_assigns_refs_to_interactive_roles(self):
|
||||
annotated, ref_map = annotate_snapshot(SAMPLE_SNAPSHOT)
|
||||
# link, textbox, button should all get refs
|
||||
assert "[ref=e" in annotated
|
||||
# Check that specific interactive elements got refs
|
||||
roles_in_map = {entry.role for entry in ref_map.values()}
|
||||
assert "link" in roles_in_map
|
||||
assert "textbox" in roles_in_map
|
||||
assert "button" in roles_in_map
|
||||
|
||||
def test_skips_structural_roles(self):
|
||||
annotated, ref_map = annotate_snapshot(SAMPLE_SNAPSHOT)
|
||||
roles_in_map = {entry.role for entry in ref_map.values()}
|
||||
# navigation, main, list, listitem, paragraph are structural — no refs
|
||||
assert "navigation" not in roles_in_map
|
||||
assert "main" not in roles_in_map
|
||||
assert "list" not in roles_in_map
|
||||
assert "listitem" not in roles_in_map
|
||||
assert "paragraph" not in roles_in_map
|
||||
|
||||
def test_named_content_roles_get_refs(self):
|
||||
annotated, ref_map = annotate_snapshot(SAMPLE_SNAPSHOT)
|
||||
roles_in_map = {entry.role for entry in ref_map.values()}
|
||||
# heading and img have names, so they should get refs
|
||||
assert "heading" in roles_in_map
|
||||
assert "img" in roles_in_map
|
||||
|
||||
def test_unnamed_content_roles_skip(self):
|
||||
snapshot = "- heading\n- img"
|
||||
_, ref_map = annotate_snapshot(snapshot)
|
||||
# No names → no refs for content roles
|
||||
assert len(ref_map) == 0
|
||||
|
||||
def test_preserves_non_matching_lines(self):
|
||||
snapshot = 'some random text\n- button "OK"\nanother line'
|
||||
annotated, _ = annotate_snapshot(snapshot)
|
||||
lines = annotated.split("\n")
|
||||
assert lines[0] == "some random text"
|
||||
assert lines[2] == "another line"
|
||||
|
||||
def test_nth_disambiguation(self):
|
||||
snapshot = '- button "Save"\n- button "Save"\n- button "Cancel"'
|
||||
annotated, ref_map = annotate_snapshot(snapshot)
|
||||
|
||||
# Two "Save" buttons should have nth=0 and nth=1
|
||||
save_entries = [
|
||||
(rid, e) for rid, e in ref_map.items() if e.role == "button" and e.name == "Save"
|
||||
]
|
||||
assert len(save_entries) == 2
|
||||
nths = sorted(e.nth for _, e in save_entries)
|
||||
assert nths == [0, 1]
|
||||
|
||||
# "Cancel" should have nth=0
|
||||
cancel_entries = [e for e in ref_map.values() if e.role == "button" and e.name == "Cancel"]
|
||||
assert len(cancel_entries) == 1
|
||||
assert cancel_entries[0].nth == 0
|
||||
|
||||
def test_sequential_ref_ids(self):
|
||||
snapshot = '- link "A"\n- link "B"\n- link "C"'
|
||||
_, ref_map = annotate_snapshot(snapshot)
|
||||
assert set(ref_map.keys()) == {"e0", "e1", "e2"}
|
||||
|
||||
def test_empty_snapshot(self):
|
||||
annotated, ref_map = annotate_snapshot("")
|
||||
assert annotated == ""
|
||||
assert ref_map == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# resolve_ref
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveRef:
|
||||
def test_resolves_valid_ref(self):
|
||||
ref_map = {
|
||||
"e0": RefEntry(role="button", name="Submit", nth=0),
|
||||
}
|
||||
result = resolve_ref("e0", ref_map)
|
||||
assert result == 'role=button[name="Submit"] >> nth=0'
|
||||
|
||||
def test_passes_through_css_selectors(self):
|
||||
ref_map = {"e0": RefEntry(role="button", name="OK", nth=0)}
|
||||
assert resolve_ref("#my-button", ref_map) == "#my-button"
|
||||
assert resolve_ref(".btn-primary", ref_map) == ".btn-primary"
|
||||
assert resolve_ref("div > button", ref_map) == "div > button"
|
||||
|
||||
def test_passes_through_role_selectors(self):
|
||||
ref_map = {"e0": RefEntry(role="button", name="OK", nth=0)}
|
||||
sel = 'role=button[name="OK"]'
|
||||
assert resolve_ref(sel, ref_map) == sel
|
||||
|
||||
def test_raises_on_unknown_ref(self):
|
||||
ref_map = {"e0": RefEntry(role="button", name="OK", nth=0)}
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
resolve_ref("e99", ref_map)
|
||||
|
||||
def test_raises_when_no_ref_map(self):
|
||||
with pytest.raises(ValueError, match="no snapshot"):
|
||||
resolve_ref("e0", None)
|
||||
|
||||
def test_escapes_quotes_in_name(self):
|
||||
ref_map = {
|
||||
"e0": RefEntry(role="button", name='Say "Hello"', nth=0),
|
||||
}
|
||||
result = resolve_ref("e0", ref_map)
|
||||
assert result == 'role=button[name="Say \\"Hello\\""] >> nth=0'
|
||||
|
||||
def test_no_name_produces_role_only_selector(self):
|
||||
ref_map = {
|
||||
"e0": RefEntry(role="textbox", name=None, nth=0),
|
||||
}
|
||||
result = resolve_ref("e0", ref_map)
|
||||
assert result == "role=textbox >> nth=0"
|
||||
|
||||
def test_empty_name(self):
|
||||
ref_map = {
|
||||
"e0": RefEntry(role="button", name="", nth=0),
|
||||
}
|
||||
result = resolve_ref("e0", ref_map)
|
||||
assert result == 'role=button[name=""] >> nth=0'
|
||||
|
||||
def test_nth_in_selector(self):
|
||||
ref_map = {
|
||||
"e0": RefEntry(role="link", name="Next", nth=2),
|
||||
}
|
||||
result = resolve_ref("e0", ref_map)
|
||||
assert result == 'role=link[name="Next"] >> nth=2'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Round-trip: annotate → resolve
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
def test_annotate_then_resolve(self):
|
||||
snapshot = '- button "Submit"\n- textbox "Email"\n- link "Home"'
|
||||
_, ref_map = annotate_snapshot(snapshot)
|
||||
|
||||
# Each ref should resolve to a valid Playwright role selector
|
||||
for ref_id, entry in ref_map.items():
|
||||
resolved = resolve_ref(ref_id, ref_map)
|
||||
assert resolved.startswith(f"role={entry.role}")
|
||||
if entry.name is not None:
|
||||
assert f'name="{entry.name}"' in resolved
|
||||
assert f"nth={entry.nth}" in resolved
|
||||
|
||||
def test_css_selectors_still_work_after_annotate(self):
|
||||
snapshot = '- button "OK"'
|
||||
_, ref_map = annotate_snapshot(snapshot)
|
||||
# CSS selectors pass through even when a ref_map exists
|
||||
assert resolve_ref("#submit-btn", ref_map) == "#submit-btn"
|
||||
@@ -0,0 +1,159 @@
|
||||
"""Tests for screenshot normalization.
|
||||
|
||||
Requires the ``browser`` extra (Pillow). Skipped automatically when
|
||||
Pillow is not installed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
Image = pytest.importorskip(
|
||||
"PIL.Image", reason="Pillow not installed (install with: pip install pillow)"
|
||||
)
|
||||
|
||||
from gcu.browser.tools.inspection import _normalize_screenshot # noqa: E402
|
||||
|
||||
|
||||
def _make_png(width: int, height: int, *, mode: str = "RGB") -> bytes:
|
||||
"""Create a solid-color PNG image of the given size."""
|
||||
img = Image.new(
|
||||
mode, (width, height), color=(100, 150, 200) if mode == "RGB" else (100, 150, 200, 128)
|
||||
)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def _make_large_png(width: int, height: int, min_bytes: int) -> bytes:
|
||||
"""Create a PNG that's at least *min_bytes* by using random-ish pixel data."""
|
||||
# Gradient with noise produces poorly-compressible PNGs
|
||||
img = Image.new("RGB", (width, height))
|
||||
pixels = img.load()
|
||||
for y in range(height):
|
||||
for x in range(width):
|
||||
pixels[x, y] = ((x * 7 + y * 13) % 256, (x * 11 + y * 3) % 256, (x * 5 + y * 17) % 256)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
raw = buf.getvalue()
|
||||
# If still under target, that's fine for most tests — the important
|
||||
# thing is we have a large-dimension image.
|
||||
return raw
|
||||
|
||||
|
||||
class TestPassthrough:
|
||||
"""Images already within limits should pass through unchanged."""
|
||||
|
||||
def test_small_image_unchanged(self):
|
||||
raw = _make_png(100, 100)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
assert result_bytes is raw
|
||||
assert result_type == "png"
|
||||
|
||||
def test_within_dimension_and_size_unchanged(self):
|
||||
raw = _make_png(1920, 1080)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
assert result_bytes is raw
|
||||
assert result_type == "png"
|
||||
|
||||
|
||||
class TestDimensionResize:
|
||||
"""Images exceeding max_dimension should be resized."""
|
||||
|
||||
def test_large_dimension_gets_resized(self):
|
||||
raw = _make_png(4000, 3000)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
|
||||
# Should be JPEG after normalization
|
||||
assert result_type == "jpeg"
|
||||
|
||||
# Verify dimensions are within limit
|
||||
img = Image.open(io.BytesIO(result_bytes))
|
||||
assert max(img.size) <= 2000
|
||||
|
||||
def test_custom_max_dimension(self):
|
||||
raw = _make_png(2000, 1500)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png", max_dimension=800)
|
||||
assert result_type == "jpeg"
|
||||
|
||||
img = Image.open(io.BytesIO(result_bytes))
|
||||
assert max(img.size) <= 800
|
||||
|
||||
def test_aspect_ratio_preserved(self):
|
||||
raw = _make_png(4000, 2000) # 2:1 ratio
|
||||
result_bytes, _ = _normalize_screenshot(raw, "png")
|
||||
|
||||
img = Image.open(io.BytesIO(result_bytes))
|
||||
w, h = img.size
|
||||
ratio = w / h
|
||||
assert abs(ratio - 2.0) < 0.1 # Allow small rounding error
|
||||
|
||||
|
||||
class TestSizeCompression:
|
||||
"""Images exceeding max_bytes should be compressed."""
|
||||
|
||||
def test_custom_max_bytes(self):
|
||||
raw = _make_large_png(1500, 1500, min_bytes=100_000)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png", max_bytes=50_000)
|
||||
assert result_type == "jpeg"
|
||||
assert len(result_bytes) <= 50_000
|
||||
|
||||
def test_over_size_within_dimension_compresses(self):
|
||||
"""Image within dimension limit but over byte limit gets JPEG-compressed."""
|
||||
raw = _make_large_png(1800, 1800, min_bytes=100_000)
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png", max_bytes=50_000)
|
||||
assert result_type == "jpeg"
|
||||
assert len(result_bytes) <= 50_000
|
||||
|
||||
|
||||
class TestAlphaChannel:
|
||||
"""RGBA images should be converted to RGB for JPEG output."""
|
||||
|
||||
def test_rgba_to_rgb(self):
|
||||
raw = _make_png(4000, 3000, mode="RGBA")
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
|
||||
assert result_type == "jpeg"
|
||||
img = Image.open(io.BytesIO(result_bytes))
|
||||
assert img.mode == "RGB"
|
||||
|
||||
|
||||
class TestGracefulDegradation:
|
||||
"""Normalization should never break screenshots."""
|
||||
|
||||
def test_pillow_not_available(self):
|
||||
raw = _make_png(4000, 3000)
|
||||
with patch.dict("sys.modules", {"PIL": None, "PIL.Image": None}):
|
||||
# Need to force reimport failure — patch builtins.__import__
|
||||
original_import = (
|
||||
__builtins__.__import__ if hasattr(__builtins__, "__import__") else __import__
|
||||
)
|
||||
|
||||
def mock_import(name, *args, **kwargs):
|
||||
if name == "PIL" or name.startswith("PIL."):
|
||||
raise ImportError("No module named 'PIL'")
|
||||
return original_import(name, *args, **kwargs)
|
||||
|
||||
with patch("builtins.__import__", side_effect=mock_import):
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
|
||||
# Should return original unchanged
|
||||
assert result_bytes is raw
|
||||
assert result_type == "png"
|
||||
|
||||
def test_corrupt_bytes_returns_original(self):
|
||||
raw = b"not an image at all"
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
|
||||
assert result_bytes is raw
|
||||
assert result_type == "png"
|
||||
|
||||
def test_empty_bytes_returns_original(self):
|
||||
raw = b""
|
||||
result_bytes, result_type = _normalize_screenshot(raw, "png")
|
||||
|
||||
assert result_bytes is raw
|
||||
assert result_type == "png"
|
||||
@@ -3523,6 +3523,9 @@ all = [
|
||||
bigquery = [
|
||||
{ name = "google-cloud-bigquery" },
|
||||
]
|
||||
browser = [
|
||||
{ name = "pillow" },
|
||||
]
|
||||
databricks = [
|
||||
{ name = "databricks-mcp" },
|
||||
{ name = "databricks-sdk" },
|
||||
@@ -3577,6 +3580,7 @@ requires-dist = [
|
||||
{ name = "openpyxl", marker = "extra == 'excel'", specifier = ">=3.1.0" },
|
||||
{ name = "pandas", specifier = ">=2.0.0" },
|
||||
{ name = "pillow", marker = "extra == 'all'", specifier = ">=10.0.0" },
|
||||
{ name = "pillow", marker = "extra == 'browser'", specifier = ">=10.0.0" },
|
||||
{ name = "pillow", marker = "extra == 'ocr'", specifier = ">=10.0.0" },
|
||||
{ name = "playwright", specifier = ">=1.40.0" },
|
||||
{ name = "playwright-stealth", specifier = ">=1.0.5" },
|
||||
@@ -3594,7 +3598,7 @@ requires-dist = [
|
||||
{ name = "restrictedpython", marker = "extra == 'sandbox'", specifier = ">=7.0" },
|
||||
{ name = "stripe", specifier = ">=14.3.0" },
|
||||
]
|
||||
provides-extras = ["dev", "sandbox", "ocr", "excel", "sql", "bigquery", "databricks", "all"]
|
||||
provides-extras = ["dev", "sandbox", "browser", "ocr", "excel", "sql", "bigquery", "databricks", "all"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
|
||||
Reference in New Issue
Block a user