Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sentry_sdk/ai/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .utils import (
set_data_normalized,
GEN_AI_MESSAGE_ROLE_MAPPING,
GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING,
normalize_message_role,
normalize_message_roles,
) # noqa: F401
48 changes: 48 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@
from sentry_sdk.utils import logger


class GEN_AI_ALLOWED_MESSAGE_ROLES:
SYSTEM = "system"
USER = "user"
ASSISTANT = "assistant"
TOOL_CALL = "tool"


GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING = {
GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM: ["system"],
GEN_AI_ALLOWED_MESSAGE_ROLES.USER: ["user", "human"],
GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT: ["assistant", "ai"],
GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL_CALL: ["tool", "tool_call"],
}

GEN_AI_MESSAGE_ROLE_MAPPING = {}
for target_role, source_roles in GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING.items():
for source_role in source_roles:
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role


def _normalize_data(data, unpack=True):
# type: (Any, bool) -> Any
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
Expand Down Expand Up @@ -40,6 +60,34 @@ def set_data_normalized(span, key, value, unpack=True):
span.set_data(key, json.dumps(normalized))


def normalize_message_role(role):
# type: (str) -> str
"""
Normalize a message role to one of the 4 allowed gen_ai role values.
Maps "ai" -> "assistant" and keeps other standard roles unchanged.
"""
return GEN_AI_MESSAGE_ROLE_MAPPING.get(role, role)


def normalize_message_roles(messages):
# type: (list[dict[str, Any]]) -> list[dict[str, Any]]
"""
Normalize roles in a list of messages to use standard gen_ai role values.
Creates a deep copy to avoid modifying the original messages.
"""
normalized_messages = []
for message in messages:
if not isinstance(message, dict):
normalized_messages.append(message)
continue
normalized_message = message.copy()
if "role" in message:
normalized_message["role"] = normalize_message_role(message["role"])
normalized_messages.append(normalized_message)

return normalized_messages
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Shallow Copy Bug in Message Normalization

The normalize_message_roles function performs a shallow copy of messages. If messages contain nested mutable objects, changes to the normalized version will inadvertently modify the original message data. The docstring also incorrectly states a deep copy is made.

Fix in Cursor Fix in Web



def get_start_span_function():
# type: () -> Callable[..., Any]
current_span = sentry_sdk.get_current_span()
Expand Down
12 changes: 10 additions & 2 deletions sentry_sdk/integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

import sentry_sdk
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import set_data_normalized, get_start_span_function
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
get_start_span_function,
)
from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS
from sentry_sdk.integrations import _check_minimum_version, DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -140,8 +144,12 @@ def _set_input_data(span, kwargs, integration):
else:
normalized_messages.append(message)

role_normalized_messages = normalize_message_roles(normalized_messages)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
role_normalized_messages,
unpack=False,
)

set_data_normalized(
Expand Down
33 changes: 29 additions & 4 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

import sentry_sdk
from sentry_sdk.ai.monitoring import set_ai_pipeline_name
from sentry_sdk.ai.utils import set_data_normalized, get_start_span_function
from sentry_sdk.ai.utils import (
GEN_AI_ALLOWED_MESSAGE_ROLES,
normalize_message_roles,
set_data_normalized,
get_start_span_function,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -209,8 +214,18 @@ def on_llm_start(
_set_tools_on_span(span, all_params.get("tools"))

if should_send_default_pii() and self.include_prompts:
normalized_messages = [
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.USER,
"content": {"type": "text", "text": prompt},
}
for prompt in prompts
]
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompts, unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)

def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
Expand Down Expand Up @@ -262,6 +277,8 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
normalized_messages.append(
self._normalize_langchain_message(message)
)
normalized_messages = normalize_message_roles(normalized_messages)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
Expand Down Expand Up @@ -740,8 +757,12 @@ def new_invoke(self, *args, **kwargs):
and should_send_default_pii()
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)

output = result.get("output")
Expand Down Expand Up @@ -791,8 +812,12 @@ def new_stream(self, *args, **kwargs):
and should_send_default_pii()
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)

# Run the agent
Expand Down
8 changes: 5 additions & 3 deletions sentry_sdk/integrations/langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Callable, List, Optional

import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -180,10 +180,11 @@ def new_invoke(self, *args, **kwargs):
):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
normalized_input_messages = normalize_message_roles(input_messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
input_messages,
normalized_input_messages,
unpack=False,
)

Expand Down Expand Up @@ -230,10 +231,11 @@ async def new_ainvoke(self, *args, **kwargs):
):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
normalized_input_messages = normalize_message_roles(input_messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
input_messages,
normalized_input_messages,
unpack=False,
)

Expand Down
5 changes: 3 additions & 2 deletions sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -182,8 +182,9 @@ def _set_input_data(span, kwargs, operation, integration):
and should_send_default_pii()
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
)

# Input attributes: Common
Expand Down
12 changes: 10 additions & 2 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import sentry_sdk
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
from sentry_sdk.ai.utils import (
get_start_span_function,
set_data_normalized,
normalize_message_roles,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import safe_serialize
Expand Down Expand Up @@ -56,8 +60,12 @@ def invoke_agent_span(context, agent, kwargs):
)

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)

_set_agent_data(span, agent)
Expand Down
53 changes: 35 additions & 18 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.ai.utils import (
GEN_AI_ALLOWED_MESSAGE_ROLES,
normalize_message_roles,
set_data_normalized,
normalize_message_role,
)
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -94,35 +99,47 @@ def _set_input_data(span, get_response_kwargs):
# type: (sentry_sdk.tracing.Span, dict[str, Any]) -> None
if not should_send_default_pii():
return
request_messages = []

messages_by_role = {
"system": [],
"user": [],
"assistant": [],
"tool": [],
} # type: (dict[str, list[Any]])
system_instructions = get_response_kwargs.get("system_instructions")
if system_instructions:
messages_by_role["system"].append({"type": "text", "text": system_instructions})
request_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM,
"content": [{"type": "text", "text": system_instructions}],
}
)

for message in get_response_kwargs.get("input", []):
if "role" in message:
messages_by_role[message.get("role")].append(
{"type": "text", "text": message.get("content")}
normalized_role = normalize_message_role(message.get("role"))
request_messages.append(
{
"role": normalized_role,
"content": [{"type": "text", "text": message.get("content")}],
}
)
else:
if message.get("type") == "function_call":
messages_by_role["assistant"].append(message)
request_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT,
"content": [message],
}
)
elif message.get("type") == "function_call_output":
messages_by_role["tool"].append(message)

request_messages = []
for role, messages in messages_by_role.items():
if len(messages) > 0:
request_messages.append({"role": role, "content": messages})
request_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL_CALL,
"content": [message],
}
)

set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, request_messages, unpack=False
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalize_message_roles(request_messages),
unpack=False,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Role Normalization and Semantic Mismatch

Message roles are normalized redundantly, first during message construction and again on the final list. Additionally, GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL_CALL (value 'tool') is used for function_call_output messages, creating a semantic mismatch with the constant's name TOOL_CALL.

Fix in Cursor Fix in Web

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Refactoring Breaks Sentry Data, Introduces Confusion

The _set_input_data refactoring alters the request_messages structure from grouped-by-role to individual messages, altering Sentry data and potentially impacting dashboards. It also introduces redundant message role normalization and a semantically confusing TOOL_CALL constant that holds the value 'tool'.

Fix in Cursor Fix in Web

)


Expand Down
Loading
Loading