fix: improve tool_registry error handling with stack traces and context (#6518)
* fix: improve tool_registry error handling with stack traces and context When tool execution fails, errors now include: - Stack traces for debugging - Tool name, tool_use_id, and inputs in error logs - Same behavior for both sync and async tools Fixes #2447 * fix: use exc_info=True and truncate inputs in tool error logs - Replace traceback.format_exc() with exc_info=True (codebase convention) - Truncate tool inputs to 500 chars to prevent log flooding - Add test for input truncation
This commit is contained in:
@@ -16,6 +16,8 @@ from framework.llm.provider import Tool, ToolResult, ToolUse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_INPUT_LOG_MAX_LEN = 500
|
||||
|
||||
# Per-execution context overrides. Each asyncio task (and thus each
|
||||
# concurrent graph execution) gets its own copy, so there are no races
|
||||
# when multiple ExecutionStreams run in parallel.
|
||||
@@ -278,6 +280,17 @@ class ToolRegistry:
|
||||
r = await result
|
||||
return _wrap_result(tool_use.id, r)
|
||||
except Exception as exc:
|
||||
inputs_str = json.dumps(tool_use.input, default=str)
|
||||
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
|
||||
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
|
||||
logger.error(
|
||||
"Async tool '%s' failed (tool_use_id=%s): %s\nInputs: %s",
|
||||
tool_use.name,
|
||||
tool_use.id,
|
||||
exc,
|
||||
inputs_str,
|
||||
exc_info=True,
|
||||
)
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
content=json.dumps({"error": str(exc)}),
|
||||
@@ -288,6 +301,17 @@ class ToolRegistry:
|
||||
|
||||
return _wrap_result(tool_use.id, result)
|
||||
except Exception as e:
|
||||
inputs_str = json.dumps(tool_use.input, default=str)
|
||||
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
|
||||
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
|
||||
logger.error(
|
||||
"Tool '%s' execution failed for tool_use_id=%s: %s\nInputs: %s",
|
||||
tool_use.name,
|
||||
tool_use.id,
|
||||
e,
|
||||
inputs_str,
|
||||
exc_info=True,
|
||||
)
|
||||
return ToolResult(
|
||||
tool_use_id=tool_use.id,
|
||||
content=json.dumps({"error": str(e)}),
|
||||
@@ -588,7 +612,16 @@ class ToolRegistry:
|
||||
return result[0]
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"MCP tool '{tool_name}' execution failed: {e}")
|
||||
inputs_str = json.dumps(inputs, default=str)
|
||||
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
|
||||
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
|
||||
logger.error(
|
||||
"MCP tool '%s' execution failed: %s\nInputs: %s",
|
||||
tool_name,
|
||||
e,
|
||||
inputs_str,
|
||||
exc_info=True,
|
||||
)
|
||||
return {"error": str(e)}
|
||||
|
||||
return executor
|
||||
|
||||
@@ -6,10 +6,12 @@ ToolResult instances. Historically, invalid JSON in ToolResult.content
|
||||
could cause a json.JSONDecodeError and crash execution.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from framework.llm.provider import Tool, ToolUse
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
|
||||
|
||||
@@ -206,3 +208,111 @@ def test_register_mcp_server_direct_client_when_manager_disabled(monkeypatch):
|
||||
registry.cleanup()
|
||||
|
||||
assert created_clients[0].disconnect_calls == 1
|
||||
|
||||
|
||||
def test_tool_execution_error_logs_stack_trace_and_context(caplog):
|
||||
"""ToolRegistry should log stack traces and context when tool execution fails."""
|
||||
registry = ToolRegistry()
|
||||
|
||||
def failing_executor(inputs: dict) -> None:
|
||||
raise ValueError("Intentional test failure")
|
||||
|
||||
tool = Tool(
|
||||
name="failing_tool",
|
||||
description="A tool that always fails",
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("failing_tool", tool, failing_executor)
|
||||
|
||||
tool_use = ToolUse(
|
||||
id="test_call_123",
|
||||
name="failing_tool",
|
||||
input={"param": "value"},
|
||||
)
|
||||
|
||||
with caplog.at_level(logging.ERROR):
|
||||
executor = registry.get_executor()
|
||||
result = executor(tool_use)
|
||||
|
||||
assert result.is_error is True
|
||||
assert "Intentional test failure" in result.content
|
||||
|
||||
assert any("failing_tool" in record.message for record in caplog.records)
|
||||
assert any("test_call_123" in record.message for record in caplog.records)
|
||||
assert any(record.exc_info is not None for record in caplog.records)
|
||||
|
||||
|
||||
def test_tool_execution_error_logs_inputs(caplog):
|
||||
"""ToolRegistry should log tool inputs when execution fails."""
|
||||
registry = ToolRegistry()
|
||||
|
||||
def failing_executor(inputs: dict) -> None:
|
||||
raise RuntimeError("Tool failed")
|
||||
|
||||
tool = Tool(
|
||||
name="input_logging_tool",
|
||||
description="Tests input logging",
|
||||
parameters={"type": "object", "properties": {"foo": {"type": "string"}}},
|
||||
)
|
||||
registry.register("input_logging_tool", tool, failing_executor)
|
||||
|
||||
tool_use = ToolUse(
|
||||
id="call_456",
|
||||
name="input_logging_tool",
|
||||
input={"foo": "bar", "nested": {"key": "value"}},
|
||||
)
|
||||
|
||||
with caplog.at_level(logging.ERROR):
|
||||
executor = registry.get_executor()
|
||||
executor(tool_use)
|
||||
|
||||
log_messages = [record.message for record in caplog.records]
|
||||
full_log = " ".join(log_messages)
|
||||
assert '"foo": "bar"' in full_log or "'foo': 'bar'" in full_log
|
||||
|
||||
|
||||
def test_unknown_tool_error_returns_proper_result():
|
||||
"""ToolRegistry should return proper error for unknown tools."""
|
||||
registry = ToolRegistry()
|
||||
tool_use = ToolUse(
|
||||
id="unknown_call",
|
||||
name="nonexistent_tool",
|
||||
input={},
|
||||
)
|
||||
|
||||
executor = registry.get_executor()
|
||||
result = executor(tool_use)
|
||||
|
||||
assert result.is_error is True
|
||||
assert "Unknown tool" in result.content
|
||||
assert "nonexistent_tool" in result.content
|
||||
|
||||
|
||||
def test_tool_execution_error_truncates_large_inputs(caplog):
|
||||
"""ToolRegistry should truncate large inputs in error logs."""
|
||||
registry = ToolRegistry()
|
||||
|
||||
def failing_executor(inputs: dict) -> None:
|
||||
raise RuntimeError("Tool failed")
|
||||
|
||||
tool = Tool(
|
||||
name="large_input_tool",
|
||||
description="Tests input truncation",
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("large_input_tool", tool, failing_executor)
|
||||
|
||||
large_input = {"data": "x" * 1000}
|
||||
tool_use = ToolUse(
|
||||
id="call_789",
|
||||
name="large_input_tool",
|
||||
input=large_input,
|
||||
)
|
||||
|
||||
with caplog.at_level(logging.ERROR):
|
||||
executor = registry.get_executor()
|
||||
executor(tool_use)
|
||||
|
||||
log_messages = [record.message for record in caplog.records]
|
||||
full_log = " ".join(log_messages)
|
||||
assert "...(truncated)" in full_log
|
||||
|
||||
Reference in New Issue
Block a user