From 6cb7b43591471c26196e0ca9ef4099f39f431224 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 30 Sep 2025 11:17:40 +0200 Subject: [PATCH 1/8] fix(openai-agents): also emit spans for MCP tool calls done by the LLM --- .../openai_agents/spans/ai_client.py | 2 ++ .../integrations/openai_agents/utils.py | 20 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index d325ae86e3..cf99adc5fb 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -7,6 +7,7 @@ _set_input_data, _set_output_data, _set_usage_data, + _create_mcp_execute_tool_spans, ) from typing import TYPE_CHECKING @@ -37,3 +38,4 @@ def update_ai_client_span(span, agent, get_response_kwargs, result): _set_usage_data(span, result.usage) _set_input_data(span, get_response_kwargs) _set_output_data(span, result) + _create_mcp_execute_tool_spans(span, result) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 73d2858e7f..5f77f37990 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -1,6 +1,6 @@ import sentry_sdk from sentry_sdk.ai.utils import set_data_normalized -from sentry_sdk.consts import SPANDATA +from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii from sentry_sdk.tracing_utils import set_span_errored @@ -156,3 +156,21 @@ def _set_output_data(span, result): set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) + + +def _create_mcp_execute_tool_spans(span, result): + # type: (sentry_sdk.tracing.Span, agents.Result) -> None + for output in result.output: + if output.__class__.__name__ == "McpCall": + with sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + description=f"execute_tool {output.name}", + start_timestamp=span.start_timestamp, + ) as span: + span.set_tag(SPANDATA.GEN_AI_TOOL_TYPE, "mcp") + span.set_tag(SPANDATA.GEN_AI_TOOL_NAME, output.name) + if should_send_default_pii(): + span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, output.arguments) + span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, output.output) + if output.error: + span.set_status(SPANSTATUS.ERROR) From 96df8c1c08768d7f1d1369e66a9c4e7f6ebfc04c Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 30 Sep 2025 11:28:04 +0200 Subject: [PATCH 2/8] fix(openai-agents): improve span handling in MCP tool execution --- sentry_sdk/integrations/openai_agents/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 5f77f37990..951510317e 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -166,11 +166,13 @@ def _create_mcp_execute_tool_spans(span, result): op=OP.GEN_AI_EXECUTE_TOOL, description=f"execute_tool {output.name}", start_timestamp=span.start_timestamp, - ) as span: - span.set_tag(SPANDATA.GEN_AI_TOOL_TYPE, "mcp") - span.set_tag(SPANDATA.GEN_AI_TOOL_NAME, output.name) + ) as execute_tool_span: + execute_tool_span.set_tag(SPANDATA.GEN_AI_TOOL_TYPE, "mcp") + execute_tool_span.set_tag(SPANDATA.GEN_AI_TOOL_NAME, output.name) if should_send_default_pii(): - span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, output.arguments) + execute_tool_span.set_data( + SPANDATA.GEN_AI_TOOL_INPUT, output.arguments + ) span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, output.output) if output.error: - span.set_status(SPANSTATUS.ERROR) + execute_tool_span.set_status(SPANSTATUS.ERROR) From 99b1ddb643c217449a2eaa8f5e11cd6ee474b45d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 1 Oct 2025 11:17:40 +0200 Subject: [PATCH 3/8] fix(openai-agents): correct span data assignment for tool execution output --- sentry_sdk/integrations/openai_agents/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 951510317e..adb7e0bbe8 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -173,6 +173,8 @@ def _create_mcp_execute_tool_spans(span, result): execute_tool_span.set_data( SPANDATA.GEN_AI_TOOL_INPUT, output.arguments ) - span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, output.output) + execute_tool_span.set_data( + SPANDATA.GEN_AI_TOOL_OUTPUT, output.output + ) if output.error: execute_tool_span.set_status(SPANSTATUS.ERROR) From b219cd8ae4c52c5eff6793c0efa6846a3419bd31 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 1 Oct 2025 11:41:09 +0200 Subject: [PATCH 4/8] fix(openai-agents): refactor span data assignment to use set_data_normalized for tool execution --- sentry_sdk/integrations/openai_agents/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index adb7e0bbe8..b0ad6bf903 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -167,8 +167,10 @@ def _create_mcp_execute_tool_spans(span, result): description=f"execute_tool {output.name}", start_timestamp=span.start_timestamp, ) as execute_tool_span: - execute_tool_span.set_tag(SPANDATA.GEN_AI_TOOL_TYPE, "mcp") - execute_tool_span.set_tag(SPANDATA.GEN_AI_TOOL_NAME, output.name) + set_data_normalized(execute_tool_span, SPANDATA.GEN_AI_TOOL_TYPE, "mcp") + set_data_normalized( + execute_tool_span, SPANDATA.GEN_AI_TOOL_NAME, output.name + ) if should_send_default_pii(): execute_tool_span.set_data( SPANDATA.GEN_AI_TOOL_INPUT, output.arguments From 3012e3a8f75b5dd395663b85556daf7d1ea01f41 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 1 Oct 2025 12:22:28 +0200 Subject: [PATCH 5/8] test(integrations): add tests for MCP Tool call spans --- .../openai_agents/test_openai_agents.py | 293 ++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index bd7f15faff..33392d9c64 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -683,6 +683,299 @@ async def test_span_status_error(sentry_init, capture_events, test_agent): assert transaction["contexts"]["trace"]["status"] == "error" +@pytest.mark.asyncio +async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent): + """ + Test that MCP (Model Context Protocol) tool calls create execute_tool spans. + This tests the functionality added in the PR for MCP tool execution tracking. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock McpCall object + mcp_call = MagicMock() + mcp_call.__class__.__name__ = "McpCall" + mcp_call.name = "test_mcp_tool" + mcp_call.arguments = '{"query": "search term"}' + mcp_call.output = "MCP tool executed successfully" + mcp_call.error = None + + # Create a ModelResponse with an McpCall in the output + mcp_response = ModelResponse( + output=[mcp_call], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_123", + ) + + # Final response after MCP tool execution + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed using MCP tool", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool test_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' + assert ( + mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully" + ) + + # Verify no error status since error was None + assert mcp_tool_span.get("status") != "error" + + +@pytest.mark.asyncio +async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent): + """ + Test that MCP tool calls with errors are tracked with error status. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock McpCall object with an error + mcp_call_with_error = MagicMock() + mcp_call_with_error.__class__.__name__ = "McpCall" + mcp_call_with_error.name = "failing_mcp_tool" + mcp_call_with_error.arguments = '{"query": "test"}' + mcp_call_with_error.output = None + mcp_call_with_error.error = "MCP tool execution failed" + + # Create a ModelResponse with a failing McpCall + mcp_response = ModelResponse( + output=[mcp_call_with_error], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_error_123", + ) + + # Final response after error + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="The MCP tool encountered an error", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_error_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use failing MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span with error + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool failing_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created with error status + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' + assert mcp_tool_span["data"]["gen_ai.tool.output"] is None + + # Verify error status was set + assert mcp_tool_span["status"] == "error" + + +@pytest.mark.asyncio +async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent): + """ + Test that MCP tool input/output are not included when send_default_pii is False. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock McpCall object + mcp_call = MagicMock() + mcp_call.__class__.__name__ = "McpCall" + mcp_call.name = "test_mcp_tool" + mcp_call.arguments = '{"query": "sensitive data"}' + mcp_call.output = "Result with sensitive info" + mcp_call.error = None + + # Create a ModelResponse with an McpCall + mcp_response = ModelResponse( + output=[mcp_call], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_123", + ) + + # Final response + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool test_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["data"] + assert "gen_ai.tool.output" not in mcp_tool_span["data"] + + @pytest.mark.asyncio async def test_multiple_agents_asyncio( sentry_init, capture_events, test_agent, mock_model_response From 94a369519dfd075009c0cb59fe17e3849b9e57a8 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 2 Oct 2025 13:01:44 +0200 Subject: [PATCH 6/8] test(openai-agents): fix some tests --- .../openai_agents/test_openai_agents.py | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 33392d9c64..e7548b1b2a 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -15,6 +15,7 @@ ModelSettings, ) from agents.items import ( + McpCall, ResponseOutputMessage, ResponseOutputText, ResponseFunctionToolCall, @@ -452,6 +453,8 @@ def simple_test_tool(message: str) -> str: "on_invoke_tool": "._create_function_tool.._on_invoke_tool>", "strict_json_schema": True, "is_enabled": True, + "tool_input_guardrails": None, + "tool_output_guardrails": None, } ] ) @@ -694,13 +697,16 @@ async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent) with patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: - # Create a mock McpCall object - mcp_call = MagicMock() - mcp_call.__class__.__name__ = "McpCall" - mcp_call.name = "test_mcp_tool" - mcp_call.arguments = '{"query": "search term"}' - mcp_call.output = "MCP tool executed successfully" - mcp_call.error = None + # Create a McpCall object + mcp_call = McpCall( + id="mcp_call_123", + name="test_mcp_tool", + arguments='{"query": "search term"}', + output="MCP tool executed successfully", + error=None, + type="mcp_call", + server_label="test_server", + ) # Create a ModelResponse with an McpCall in the output mcp_response = ModelResponse( @@ -780,7 +786,7 @@ async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent) ) # Verify no error status since error was None - assert mcp_tool_span.get("status") != "error" + assert mcp_tool_span.get("tags", {}).get("status") != "error" @pytest.mark.asyncio @@ -793,13 +799,16 @@ async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_a with patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: - # Create a mock McpCall object with an error - mcp_call_with_error = MagicMock() - mcp_call_with_error.__class__.__name__ = "McpCall" - mcp_call_with_error.name = "failing_mcp_tool" - mcp_call_with_error.arguments = '{"query": "test"}' - mcp_call_with_error.output = None - mcp_call_with_error.error = "MCP tool execution failed" + # Create a McpCall object with an error + mcp_call_with_error = McpCall( + id="mcp_call_error_123", + name="failing_mcp_tool", + arguments='{"query": "test"}', + output=None, + error="MCP tool execution failed", + type="mcp_call", + server_label="test_server", + ) # Create a ModelResponse with a failing McpCall mcp_response = ModelResponse( @@ -877,7 +886,7 @@ async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_a assert mcp_tool_span["data"]["gen_ai.tool.output"] is None # Verify error status was set - assert mcp_tool_span["status"] == "error" + assert mcp_tool_span["tags"]["status"] == "error" @pytest.mark.asyncio @@ -890,13 +899,16 @@ async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_ with patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: - # Create a mock McpCall object - mcp_call = MagicMock() - mcp_call.__class__.__name__ = "McpCall" - mcp_call.name = "test_mcp_tool" - mcp_call.arguments = '{"query": "sensitive data"}' - mcp_call.output = "Result with sensitive info" - mcp_call.error = None + # Create a McpCall object + mcp_call = McpCall( + id="mcp_call_pii_123", + name="test_mcp_tool", + arguments='{"query": "sensitive data"}', + output="Result with sensitive info", + error=None, + type="mcp_call", + server_label="test_server", + ) # Create a ModelResponse with an McpCall mcp_response = ModelResponse( From 58027509fd39fe7838e6da78ccd8a4baf20a0956 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 2 Oct 2025 13:25:34 +0200 Subject: [PATCH 7/8] test(openai-agents): fix tests --- tests/integrations/openai_agents/test_openai_agents.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index e7548b1b2a..17d521af3d 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -453,8 +453,6 @@ def simple_test_tool(message: str) -> str: "on_invoke_tool": "._create_function_tool.._on_invoke_tool>", "strict_json_schema": True, "is_enabled": True, - "tool_input_guardrails": None, - "tool_output_guardrails": None, } ] ) From c485fde4732d4418679221cabdf663b12b1306cb Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 2 Oct 2025 16:25:26 +0200 Subject: [PATCH 8/8] Update tests/integrations/openai_agents/test_openai_agents.py Co-authored-by: Ivana Kellyer --- tests/integrations/openai_agents/test_openai_agents.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 17d521af3d..1768971c99 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -688,7 +688,6 @@ async def test_span_status_error(sentry_init, capture_events, test_agent): async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. - This tests the functionality added in the PR for MCP tool execution tracking. """ with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):