-
Notifications
You must be signed in to change notification settings - Fork 562
fix(ai): add mapping for gen_ai message roles #4884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
ae8e359
f680921
906da3d
0081cf8
e7bf105
ab4dec0
0d7102e
8810ee6
96b3ad6
16342b1
3286292
eb8c05d
0473c7f
526acf3
8be69b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .utils import ( | ||
set_data_normalized, | ||
GEN_AI_MESSAGE_ROLE_MAPPING, | ||
GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING, | ||
normalize_message_role, | ||
normalize_message_roles, | ||
) # noqa: F401 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,26 @@ | |
from sentry_sdk.utils import logger | ||
|
||
|
||
class GEN_AI_ALLOWED_MESSAGE_ROLES: | ||
SYSTEM = "system" | ||
USER = "user" | ||
ASSISTANT = "assistant" | ||
TOOL_CALL = "tool" | ||
|
||
|
||
GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING = { | ||
GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM: ["system"], | ||
GEN_AI_ALLOWED_MESSAGE_ROLES.USER: ["user", "human"], | ||
GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT: ["assistant", "ai"], | ||
GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL_CALL: ["tool", "tool_call"], | ||
} | ||
|
||
GEN_AI_MESSAGE_ROLE_MAPPING = {} | ||
for target_role, source_roles in GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING.items(): | ||
for source_role in source_roles: | ||
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role | ||
|
||
|
||
def _normalize_data(data, unpack=True): | ||
# type: (Any, bool) -> Any | ||
# convert pydantic data (e.g. OpenAI v1+) to json compatible format | ||
|
@@ -40,6 +60,34 @@ def set_data_normalized(span, key, value, unpack=True): | |
span.set_data(key, json.dumps(normalized)) | ||
|
||
|
||
def normalize_message_role(role): | ||
# type: (str) -> str | ||
""" | ||
Normalize a message role to one of the 4 allowed gen_ai role values. | ||
Maps "ai" -> "assistant" and keeps other standard roles unchanged. | ||
""" | ||
return GEN_AI_MESSAGE_ROLE_MAPPING.get(role, role) | ||
|
||
|
||
def normalize_message_roles(messages): | ||
# type: (list[dict[str, Any]]) -> list[dict[str, Any]] | ||
""" | ||
Normalize roles in a list of messages to use standard gen_ai role values. | ||
Creates a deep copy to avoid modifying the original messages. | ||
""" | ||
normalized_messages = [] | ||
for message in messages: | ||
if not isinstance(message, dict): | ||
normalized_messages.append(message) | ||
continue | ||
normalized_message = message.copy() | ||
if "role" in message: | ||
normalized_message["role"] = normalize_message_role(message["role"]) | ||
normalized_messages.append(normalized_message) | ||
|
||
return normalized_messages | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Shallow Copy Bug in Message NormalizationThe |
||
|
||
|
||
def get_start_span_function(): | ||
# type: () -> Callable[..., Any] | ||
current_span = sentry_sdk.get_current_span() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
import sentry_sdk | ||
from sentry_sdk.ai.utils import set_data_normalized | ||
from sentry_sdk.ai.utils import ( | ||
GEN_AI_ALLOWED_MESSAGE_ROLES, | ||
normalize_message_roles, | ||
set_data_normalized, | ||
normalize_message_role, | ||
) | ||
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP | ||
from sentry_sdk.integrations import DidNotEnable | ||
from sentry_sdk.scope import should_send_default_pii | ||
|
@@ -94,35 +99,47 @@ def _set_input_data(span, get_response_kwargs): | |
# type: (sentry_sdk.tracing.Span, dict[str, Any]) -> None | ||
if not should_send_default_pii(): | ||
return | ||
request_messages = [] | ||
|
||
messages_by_role = { | ||
"system": [], | ||
"user": [], | ||
"assistant": [], | ||
"tool": [], | ||
} # type: (dict[str, list[Any]]) | ||
system_instructions = get_response_kwargs.get("system_instructions") | ||
if system_instructions: | ||
messages_by_role["system"].append({"type": "text", "text": system_instructions}) | ||
request_messages.append( | ||
{ | ||
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM, | ||
"content": [{"type": "text", "text": system_instructions}], | ||
} | ||
) | ||
|
||
for message in get_response_kwargs.get("input", []): | ||
if "role" in message: | ||
messages_by_role[message.get("role")].append( | ||
{"type": "text", "text": message.get("content")} | ||
normalized_role = normalize_message_role(message.get("role")) | ||
request_messages.append( | ||
{ | ||
"role": normalized_role, | ||
"content": [{"type": "text", "text": message.get("content")}], | ||
} | ||
) | ||
else: | ||
if message.get("type") == "function_call": | ||
messages_by_role["assistant"].append(message) | ||
request_messages.append( | ||
{ | ||
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT, | ||
"content": [message], | ||
} | ||
) | ||
elif message.get("type") == "function_call_output": | ||
messages_by_role["tool"].append(message) | ||
|
||
request_messages = [] | ||
for role, messages in messages_by_role.items(): | ||
if len(messages) > 0: | ||
request_messages.append({"role": role, "content": messages}) | ||
request_messages.append( | ||
{ | ||
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL_CALL, | ||
"content": [message], | ||
} | ||
) | ||
|
||
set_data_normalized( | ||
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, request_messages, unpack=False | ||
span, | ||
SPANDATA.GEN_AI_REQUEST_MESSAGES, | ||
normalize_message_roles(request_messages), | ||
unpack=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Role Normalization and Semantic MismatchMessage roles are normalized redundantly, first during message construction and again on the final list. Additionally, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Refactoring Breaks Sentry Data, Introduces ConfusionThe |
||
) | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.