fix: improve tool_registry error handling with stack traces and context (#6518)

* fix: improve tool_registry error handling with stack traces and context When tool execution fails, errors now include: - Stack traces for debugging - Tool name, tool_use_id, and inputs in error logs - Same behavior for both sync and async tools Fixes #2447 * fix: use exc_info=True and truncate inputs in tool error logs - Replace traceback.format_exc() with exc_info=True (codebase convention) - Truncate tool inputs to 500 chars to prevent log flooding - Add test for input truncation
2026-03-22 13:01:28 +03:00
parent 27d5061d97
commit 88ec4b7e64
2 changed files with 144 additions and 1 deletions
@@ -16,6 +16,8 @@ from framework.llm.provider import Tool, ToolResult, ToolUse

 logger = logging.getLogger(__name__)

+_INPUT_LOG_MAX_LEN = 500
+
 # Per-execution context overrides.  Each asyncio task (and thus each
 # concurrent graph execution) gets its own copy, so there are no races
 # when multiple ExecutionStreams run in parallel.
@@ -278,6 +280,17 @@ class ToolRegistry:
                            r = await result
                            return _wrap_result(tool_use.id, r)
                        except Exception as exc:
+                            inputs_str = json.dumps(tool_use.input, default=str)
+                            if len(inputs_str) > _INPUT_LOG_MAX_LEN:
+                                inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
+                            logger.error(
+                                "Async tool '%s' failed (tool_use_id=%s): %s\nInputs: %s",
+                                tool_use.name,
+                                tool_use.id,
+                                exc,
+                                inputs_str,
+                                exc_info=True,
+                            )
                            return ToolResult(
                                tool_use_id=tool_use.id,
                                content=json.dumps({"error": str(exc)}),
@@ -288,6 +301,17 @@ class ToolRegistry:

                return _wrap_result(tool_use.id, result)
            except Exception as e:
+                inputs_str = json.dumps(tool_use.input, default=str)
+                if len(inputs_str) > _INPUT_LOG_MAX_LEN:
+                    inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
+                logger.error(
+                    "Tool '%s' execution failed for tool_use_id=%s: %s\nInputs: %s",
+                    tool_use.name,
+                    tool_use.id,
+                    e,
+                    inputs_str,
+                    exc_info=True,
+                )
                return ToolResult(
                    tool_use_id=tool_use.id,
                    content=json.dumps({"error": str(e)}),
@@ -588,7 +612,16 @@ class ToolRegistry:
                                return result[0]
                            return result
                        except Exception as e:
-                            logger.error(f"MCP tool '{tool_name}' execution failed: {e}")
+                            inputs_str = json.dumps(inputs, default=str)
+                            if len(inputs_str) > _INPUT_LOG_MAX_LEN:
+                                inputs_str = inputs_str[:_INPUT_LOG_MAX_LEN] + "...(truncated)"
+                            logger.error(
+                                "MCP tool '%s' execution failed: %s\nInputs: %s",
+                                tool_name,
+                                e,
+                                inputs_str,
+                                exc_info=True,
+                            )
                            return {"error": str(e)}

                    return executor
@@ -6,10 +6,12 @@ ToolResult instances. Historically, invalid JSON in ToolResult.content
 could cause a json.JSONDecodeError and crash execution.
 """

+import logging
 import textwrap
 from pathlib import Path
 from types import SimpleNamespace

+from framework.llm.provider import Tool, ToolUse
 from framework.runner.tool_registry import ToolRegistry


@@ -206,3 +208,111 @@ def test_register_mcp_server_direct_client_when_manager_disabled(monkeypatch):
    registry.cleanup()

    assert created_clients[0].disconnect_calls == 1
+
+
+def test_tool_execution_error_logs_stack_trace_and_context(caplog):
+    """ToolRegistry should log stack traces and context when tool execution fails."""
+    registry = ToolRegistry()
+
+    def failing_executor(inputs: dict) -> None:
+        raise ValueError("Intentional test failure")
+
+    tool = Tool(
+        name="failing_tool",
+        description="A tool that always fails",
+        parameters={"type": "object", "properties": {}},
+    )
+    registry.register("failing_tool", tool, failing_executor)
+
+    tool_use = ToolUse(
+        id="test_call_123",
+        name="failing_tool",
+        input={"param": "value"},
+    )
+
+    with caplog.at_level(logging.ERROR):
+        executor = registry.get_executor()
+        result = executor(tool_use)
+
+    assert result.is_error is True
+    assert "Intentional test failure" in result.content
+
+    assert any("failing_tool" in record.message for record in caplog.records)
+    assert any("test_call_123" in record.message for record in caplog.records)
+    assert any(record.exc_info is not None for record in caplog.records)
+
+
+def test_tool_execution_error_logs_inputs(caplog):
+    """ToolRegistry should log tool inputs when execution fails."""
+    registry = ToolRegistry()
+
+    def failing_executor(inputs: dict) -> None:
+        raise RuntimeError("Tool failed")
+
+    tool = Tool(
+        name="input_logging_tool",
+        description="Tests input logging",
+        parameters={"type": "object", "properties": {"foo": {"type": "string"}}},
+    )
+    registry.register("input_logging_tool", tool, failing_executor)
+
+    tool_use = ToolUse(
+        id="call_456",
+        name="input_logging_tool",
+        input={"foo": "bar", "nested": {"key": "value"}},
+    )
+
+    with caplog.at_level(logging.ERROR):
+        executor = registry.get_executor()
+        executor(tool_use)
+
+    log_messages = [record.message for record in caplog.records]
+    full_log = " ".join(log_messages)
+    assert '"foo": "bar"' in full_log or "'foo': 'bar'" in full_log
+
+
+def test_unknown_tool_error_returns_proper_result():
+    """ToolRegistry should return proper error for unknown tools."""
+    registry = ToolRegistry()
+    tool_use = ToolUse(
+        id="unknown_call",
+        name="nonexistent_tool",
+        input={},
+    )
+
+    executor = registry.get_executor()
+    result = executor(tool_use)
+
+    assert result.is_error is True
+    assert "Unknown tool" in result.content
+    assert "nonexistent_tool" in result.content
+
+
+def test_tool_execution_error_truncates_large_inputs(caplog):
+    """ToolRegistry should truncate large inputs in error logs."""
+    registry = ToolRegistry()
+
+    def failing_executor(inputs: dict) -> None:
+        raise RuntimeError("Tool failed")
+
+    tool = Tool(
+        name="large_input_tool",
+        description="Tests input truncation",
+        parameters={"type": "object", "properties": {}},
+    )
+    registry.register("large_input_tool", tool, failing_executor)
+
+    large_input = {"data": "x" * 1000}
+    tool_use = ToolUse(
+        id="call_789",
+        name="large_input_tool",
+        input=large_input,
+    )
+
+    with caplog.at_level(logging.ERROR):
+        executor = registry.get_executor()
+        executor(tool_use)
+
+    log_messages = [record.message for record in caplog.records]
+    full_log = " ".join(log_messages)
+    assert "...(truncated)" in full_log