Skip to content
2 changes: 2 additions & 0 deletions sentry_sdk/integrations/openai_agents/spans/ai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
_set_input_data,
_set_output_data,
_set_usage_data,
_create_mcp_execute_tool_spans,
)

from typing import TYPE_CHECKING
Expand Down Expand Up @@ -37,3 +38,4 @@ def update_ai_client_span(span, agent, get_response_kwargs, result):
_set_usage_data(span, result.usage)
_set_input_data(span, get_response_kwargs)
_set_output_data(span, result)
_create_mcp_execute_tool_spans(span, result)
26 changes: 25 additions & 1 deletion sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.consts import SPANDATA
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.tracing_utils import set_span_errored
Expand Down Expand Up @@ -156,3 +156,27 @@ def _set_output_data(span, result):
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
)


def _create_mcp_execute_tool_spans(span, result):
# type: (sentry_sdk.tracing.Span, agents.Result) -> None
for output in result.output:
if output.__class__.__name__ == "McpCall":
with sentry_sdk.start_span(
op=OP.GEN_AI_EXECUTE_TOOL,
description=f"execute_tool {output.name}",
start_timestamp=span.start_timestamp,
) as execute_tool_span:
set_data_normalized(execute_tool_span, SPANDATA.GEN_AI_TOOL_TYPE, "mcp")
set_data_normalized(
execute_tool_span, SPANDATA.GEN_AI_TOOL_NAME, output.name
)
if should_send_default_pii():
execute_tool_span.set_data(
SPANDATA.GEN_AI_TOOL_INPUT, output.arguments
)
execute_tool_span.set_data(
SPANDATA.GEN_AI_TOOL_OUTPUT, output.output
)
if output.error:
execute_tool_span.set_status(SPANSTATUS.ERROR)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: MCP Tool Spans Lack Agent Context Data

The _create_mcp_execute_tool_spans function creates execute_tool spans for MCP calls without setting agent context data. These spans are missing information like gen_ai.agent.name, gen_ai.system, and gen_ai.request.model, making them inconsistent with regular execute_tool spans. This occurs because the function doesn't receive the agent parameter.

Additional Locations (1)

Fix in Cursor Fix in Web

302 changes: 302 additions & 0 deletions tests/integrations/openai_agents/test_openai_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ModelSettings,
)
from agents.items import (
McpCall,
ResponseOutputMessage,
ResponseOutputText,
ResponseFunctionToolCall,
Expand Down Expand Up @@ -683,6 +684,307 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
assert transaction["contexts"]["trace"]["status"] == "error"


@pytest.mark.asyncio
async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent):
"""
Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
"""

with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
# Create a McpCall object
mcp_call = McpCall(
id="mcp_call_123",
name="test_mcp_tool",
arguments='{"query": "search term"}',
output="MCP tool executed successfully",
error=None,
type="mcp_call",
server_label="test_server",
)

# Create a ModelResponse with an McpCall in the output
mcp_response = ModelResponse(
output=[mcp_call],
usage=Usage(
requests=1,
input_tokens=10,
output_tokens=5,
total_tokens=15,
),
response_id="resp_mcp_123",
)

# Final response after MCP tool execution
final_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="Task completed using MCP tool",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=Usage(
requests=1,
input_tokens=15,
output_tokens=10,
total_tokens=25,
),
response_id="resp_final_123",
)

mock_get_response.side_effect = [mcp_response, final_response]

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

await agents.Runner.run(
test_agent,
"Please use MCP tool",
run_config=test_run_config,
)

(transaction,) = events
spans = transaction["spans"]

# Find the MCP execute_tool span
mcp_tool_span = None
for span in spans:
if (
span.get("description") == "execute_tool test_mcp_tool"
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
):
mcp_tool_span = span
break

# Verify the MCP tool span was created
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
assert (
mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
)

# Verify no error status since error was None
assert mcp_tool_span.get("tags", {}).get("status") != "error"


@pytest.mark.asyncio
async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent):
"""
Test that MCP tool calls with errors are tracked with error status.
"""

with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
# Create a McpCall object with an error
mcp_call_with_error = McpCall(
id="mcp_call_error_123",
name="failing_mcp_tool",
arguments='{"query": "test"}',
output=None,
error="MCP tool execution failed",
type="mcp_call",
server_label="test_server",
)

# Create a ModelResponse with a failing McpCall
mcp_response = ModelResponse(
output=[mcp_call_with_error],
usage=Usage(
requests=1,
input_tokens=10,
output_tokens=5,
total_tokens=15,
),
response_id="resp_mcp_error_123",
)

# Final response after error
final_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="The MCP tool encountered an error",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=Usage(
requests=1,
input_tokens=15,
output_tokens=10,
total_tokens=25,
),
response_id="resp_final_error_123",
)

mock_get_response.side_effect = [mcp_response, final_response]

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

await agents.Runner.run(
test_agent,
"Please use failing MCP tool",
run_config=test_run_config,
)

(transaction,) = events
spans = transaction["spans"]

# Find the MCP execute_tool span with error
mcp_tool_span = None
for span in spans:
if (
span.get("description") == "execute_tool failing_mcp_tool"
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
):
mcp_tool_span = span
break

# Verify the MCP tool span was created with error status
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
assert mcp_tool_span["data"]["gen_ai.tool.output"] is None

# Verify error status was set
assert mcp_tool_span["tags"]["status"] == "error"


@pytest.mark.asyncio
async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent):
"""
Test that MCP tool input/output are not included when send_default_pii is False.
"""

with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
# Create a McpCall object
mcp_call = McpCall(
id="mcp_call_pii_123",
name="test_mcp_tool",
arguments='{"query": "sensitive data"}',
output="Result with sensitive info",
error=None,
type="mcp_call",
server_label="test_server",
)

# Create a ModelResponse with an McpCall
mcp_response = ModelResponse(
output=[mcp_call],
usage=Usage(
requests=1,
input_tokens=10,
output_tokens=5,
total_tokens=15,
),
response_id="resp_mcp_123",
)

# Final response
final_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="Task completed",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=Usage(
requests=1,
input_tokens=15,
output_tokens=10,
total_tokens=25,
),
response_id="resp_final_123",
)

mock_get_response.side_effect = [mcp_response, final_response]

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=False, # PII disabled
)

events = capture_events()

await agents.Runner.run(
test_agent,
"Please use MCP tool",
run_config=test_run_config,
)

(transaction,) = events
spans = transaction["spans"]

# Find the MCP execute_tool span
mcp_tool_span = None
for span in spans:
if (
span.get("description") == "execute_tool test_mcp_tool"
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
):
mcp_tool_span = span
break

# Verify the MCP tool span was created but without input/output
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"

# Verify input and output are not included when send_default_pii is False
assert "gen_ai.tool.input" not in mcp_tool_span["data"]
assert "gen_ai.tool.output" not in mcp_tool_span["data"]


@pytest.mark.asyncio
async def test_multiple_agents_asyncio(
sentry_init, capture_events, test_agent, mock_model_response
Expand Down
Loading