fix: improve tool_registry error handling with stack traces and context (#6518)

* fix: improve tool_registry error handling with stack traces and context

When tool execution fails, errors now include:
- Stack traces for debugging
- Tool name, tool_use_id, and inputs in error logs
- Same behavior for both sync and async tools

Fixes #2447

* fix: use exc_info=True and truncate inputs in tool error logs

- Replace traceback.format_exc() with exc_info=True (codebase convention)
- Truncate tool inputs to 500 chars to prevent log flooding
- Add test for input truncation
This commit is contained in:
Samer Attrah
2026-03-22 13:01:28 +03:00
committed by GitHub
parent 27d5061d97
commit 88ec4b7e64
2 changed files with 144 additions and 1 deletions
+34 -1
View File
@@ -16,6 +16,8 @@ from framework.llm.provider import Tool, ToolResult, ToolUse
logger = logging.getLogger(__name__)
_INPUT_LOG_MAX_LEN = 500
# Per-execution context overrides. Each asyncio task (and thus each
# concurrent graph execution) gets its own copy, so there are no races
# when multiple ExecutionStreams run in parallel.
@@ -278,6 +280,17 @@ class ToolRegistry:
r = await result
return _wrap_result(tool_use.id, r)
except Exception as exc:
inputs_str = json.dumps(tool_use.input, default=str)
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
logger.error(
"Async tool '%s' failed (tool_use_id=%s): %s\nInputs: %s",
tool_use.name,
tool_use.id,
exc,
inputs_str,
exc_info=True,
)
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps({"error": str(exc)}),
@@ -288,6 +301,17 @@ class ToolRegistry:
return _wrap_result(tool_use.id, result)
except Exception as e:
inputs_str = json.dumps(tool_use.input, default=str)
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
logger.error(
"Tool '%s' execution failed for tool_use_id=%s: %s\nInputs: %s",
tool_use.name,
tool_use.id,
e,
inputs_str,
exc_info=True,
)
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps({"error": str(e)}),
@@ -588,7 +612,16 @@ class ToolRegistry:
return result[0]
return result
except Exception as e:
logger.error(f"MCP tool '{tool_name}' execution failed: {e}")
inputs_str = json.dumps(inputs, default=str)
if len(inputs_str) > _INPUT_LOG_MAX_LEN:
inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
logger.error(
"MCP tool '%s' execution failed: %s\nInputs: %s",
tool_name,
e,
inputs_str,
exc_info=True,
)
return {"error": str(e)}
return executor
+110
View File
@@ -6,10 +6,12 @@ ToolResult instances. Historically, invalid JSON in ToolResult.content
could cause a json.JSONDecodeError and crash execution.
"""
import logging
import textwrap
from pathlib import Path
from types import SimpleNamespace
from framework.llm.provider import Tool, ToolUse
from framework.runner.tool_registry import ToolRegistry
@@ -206,3 +208,111 @@ def test_register_mcp_server_direct_client_when_manager_disabled(monkeypatch):
registry.cleanup()
assert created_clients[0].disconnect_calls == 1
def test_tool_execution_error_logs_stack_trace_and_context(caplog):
"""ToolRegistry should log stack traces and context when tool execution fails."""
registry = ToolRegistry()
def failing_executor(inputs: dict) -> None:
raise ValueError("Intentional test failure")
tool = Tool(
name="failing_tool",
description="A tool that always fails",
parameters={"type": "object", "properties": {}},
)
registry.register("failing_tool", tool, failing_executor)
tool_use = ToolUse(
id="test_call_123",
name="failing_tool",
input={"param": "value"},
)
with caplog.at_level(logging.ERROR):
executor = registry.get_executor()
result = executor(tool_use)
assert result.is_error is True
assert "Intentional test failure" in result.content
assert any("failing_tool" in record.message for record in caplog.records)
assert any("test_call_123" in record.message for record in caplog.records)
assert any(record.exc_info is not None for record in caplog.records)
def test_tool_execution_error_logs_inputs(caplog):
"""ToolRegistry should log tool inputs when execution fails."""
registry = ToolRegistry()
def failing_executor(inputs: dict) -> None:
raise RuntimeError("Tool failed")
tool = Tool(
name="input_logging_tool",
description="Tests input logging",
parameters={"type": "object", "properties": {"foo": {"type": "string"}}},
)
registry.register("input_logging_tool", tool, failing_executor)
tool_use = ToolUse(
id="call_456",
name="input_logging_tool",
input={"foo": "bar", "nested": {"key": "value"}},
)
with caplog.at_level(logging.ERROR):
executor = registry.get_executor()
executor(tool_use)
log_messages = [record.message for record in caplog.records]
full_log = " ".join(log_messages)
assert '"foo": "bar"' in full_log or "'foo': 'bar'" in full_log
def test_unknown_tool_error_returns_proper_result():
"""ToolRegistry should return proper error for unknown tools."""
registry = ToolRegistry()
tool_use = ToolUse(
id="unknown_call",
name="nonexistent_tool",
input={},
)
executor = registry.get_executor()
result = executor(tool_use)
assert result.is_error is True
assert "Unknown tool" in result.content
assert "nonexistent_tool" in result.content
def test_tool_execution_error_truncates_large_inputs(caplog):
"""ToolRegistry should truncate large inputs in error logs."""
registry = ToolRegistry()
def failing_executor(inputs: dict) -> None:
raise RuntimeError("Tool failed")
tool = Tool(
name="large_input_tool",
description="Tests input truncation",
parameters={"type": "object", "properties": {}},
)
registry.register("large_input_tool", tool, failing_executor)
large_input = {"data": "x" * 1000}
tool_use = ToolUse(
id="call_789",
name="large_input_tool",
input=large_input,
)
with caplog.at_level(logging.ERROR):
executor = registry.get_executor()
executor(tool_use)
log_messages = [record.message for record in caplog.records]
full_log = " ".join(log_messages)
assert "...(truncated)" in full_log