Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sentry_sdk/integrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
"sentry_sdk.integrations.langchain.LangchainIntegration",
"sentry_sdk.integrations.langgraph.LanggraphIntegration",
"sentry_sdk.integrations.litestar.LitestarIntegration",
"sentry_sdk.integrations.litellm.LiteLLMIntegration",
"sentry_sdk.integrations.loguru.LoguruIntegration",
"sentry_sdk.integrations.openai.OpenAIIntegration",
"sentry_sdk.integrations.pymongo.PyMongoIntegration",
Expand Down
281 changes: 281 additions & 0 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from datetime import datetime

try:
import litellm
except ImportError:
raise DidNotEnable("LiteLLM not installed")


def _get_provider_from_model(model):
# type: (str) -> str
"""Extract provider name from model string using LiteLLM's logic"""
if not model:
return "unknown"

# Common provider prefixes/patterns
if model.startswith("gpt-") or model.startswith("o1-") or "openai/" in model:
return "openai"
elif model.startswith("claude-") or "anthropic/" in model:
return "anthropic"
elif (
model.startswith("gemini-")
or "google/" in model
or model.startswith("vertex_ai/")
):
return "google"
elif "cohere/" in model or model.startswith("command-"):
return "cohere"
elif "azure/" in model:
return "azure"
elif "bedrock/" in model:
return "bedrock"
elif "ollama/" in model:
return "ollama"
else:
# Try to use LiteLLM's internal provider detection if available
try:
if hasattr(litellm, "get_llm_provider"):
provider_info = litellm.get_llm_provider(model)
if isinstance(provider_info, tuple) and len(provider_info) > 1:
return provider_info[1] or "unknown"
return "unknown"
except Exception:
return "unknown"


def _input_callback(
kwargs, # type: Dict[str, Any]
):
# type: (...) -> None
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)

if integration is None:
return

# Get key parameters
model = kwargs.get("model", "")
messages = kwargs.get("messages", [])
operation = "chat" if messages else "embeddings"

# Start a new span/transaction
span = get_start_span_function()(
op=(
consts.OP.GEN_AI_CHAT
if operation == "chat"
else consts.OP.GEN_AI_EMBEDDINGS
),
name=f"{operation} {model}",
origin=LiteLLMIntegration.origin,
)
span.__enter__()

# Store span for later
kwargs["_sentry_span"] = span

# Set basic data
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "litellm")
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
set_data_normalized(
span, "gen_ai.litellm.provider", _get_provider_from_model(model)
)

# Record messages if allowed
if messages and should_send_default_pii() and integration.include_prompts:
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
)

# Record other parameters
params = {
"model": SPANDATA.GEN_AI_REQUEST_MODEL,
"stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
"max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
"presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
}
Comment on lines +80 to +88
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not clear where to actually put these parameters in the arguments to completion

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't understand this comment, can you elaborate? What do the params have to do with completion?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

completion takes quite generic kwargs that are then passed on to the model provider API. These above are used for openai (at least I suspect that this is where and how we retrieve them)

for key, attribute in params.items():
value = kwargs.get(key)
if value is not None:
set_data_normalized(span, attribute, value)

# Record LiteLLM-specific parameters
litellm_params = {
"api_base": kwargs.get("api_base"),
"api_version": kwargs.get("api_version"),
"custom_llm_provider": kwargs.get("custom_llm_provider"),
}
for key, value in litellm_params.items():
if value is not None:
set_data_normalized(span, f"gen_ai.litellm.{key}", value)


def _success_callback(
kwargs, # type: Dict[str, Any]
completion_response, # type: Any
start_time, # type: datetime
end_time, # type: datetime
):
# type: (...) -> None
"""Handle successful completion."""

span = kwargs.get("_sentry_span")
if span is None:
return

integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
if integration is None:
return

try:
# Record model information
if hasattr(completion_response, "model"):
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
)

# Record response content if allowed
if should_send_default_pii() and integration.include_prompts:
if hasattr(completion_response, "choices"):
response_messages = []
for choice in completion_response.choices:
if hasattr(choice, "message"):
if hasattr(choice.message, "model_dump"):
response_messages.append(choice.message.model_dump())
elif hasattr(choice.message, "dict"):
response_messages.append(choice.message.dict())
else:
# Fallback for basic message objects
msg = {}
if hasattr(choice.message, "role"):
msg["role"] = choice.message.role
if hasattr(choice.message, "content"):
msg["content"] = choice.message.content
if hasattr(choice.message, "tool_calls"):
msg["tool_calls"] = choice.message.tool_calls
response_messages.append(msg)

if response_messages:
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
)

# Record token usage
if hasattr(completion_response, "usage"):
usage = completion_response.usage
record_token_usage(
span,
input_tokens=getattr(usage, "prompt_tokens", None),
output_tokens=getattr(usage, "completion_tokens", None),
total_tokens=getattr(usage, "total_tokens", None),
)

finally:
# Always finish the span and clean up
span.__exit__(None, None, None)


def _failure_callback(
kwargs, # type: Dict[str, Any]
exception, # type: Exception
start_time, # type: datetime
end_time, # type: datetime
):
# type: (...) -> None
"""Handle request failure."""
span = kwargs.get("_sentry_span")

try:
# Capture the exception
event, hint = event_from_exception(
exception,
client_options=sentry_sdk.get_client().options,
mechanism={"type": "litellm", "handled": False},
)
sentry_sdk.capture_event(event, hint=hint)
finally:
# Always finish the span and clean up
span.__exit__(None, None, None)


class LiteLLMIntegration(Integration):
"""
LiteLLM integration for Sentry.
This integration automatically captures LiteLLM API calls and sends them to Sentry
for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
supports, including OpenAI, Anthropic, Google, Cohere, and many others.
Features:
- Automatic exception capture for all LiteLLM calls
- Token usage tracking across all providers
- Provider detection and attribution
- Input/output message capture (configurable)
- Streaming response support
- Cost tracking integration
Usage:
```python
import litellm
import sentry_sdk
# Initialize Sentry with the LiteLLM integration
sentry_sdk.init(
dsn="your-dsn",
integrations=[
sentry_sdk.integrations.LiteLLMIntegration(
include_prompts=True # Set to False to exclude message content
)
]
)
# All LiteLLM calls will now be monitored
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}]
)
```
Configuration:
- include_prompts (bool): Whether to include prompts and responses in spans.
Defaults to True. Set to False to exclude potentially sensitive data.
"""

identifier = "litellm"
origin = f"auto.ai.{identifier}"

def __init__(self, include_prompts=True):
# type: (LiteLLMIntegration, bool) -> None
self.include_prompts = include_prompts

@staticmethod
def setup_once():
# type: () -> None
"""Set up LiteLLM callbacks for monitoring."""
litellm.input_callback = litellm.input_callback or []
if _input_callback not in litellm.input_callback:
litellm.input_callback.append(_input_callback)

litellm.success_callback = litellm.success_callback or []
if _success_callback not in litellm.success_callback:
litellm.success_callback.append(_success_callback)

litellm.failure_callback = litellm.failure_callback or []
if _failure_callback not in litellm.failure_callback:
litellm.failure_callback.append(_failure_callback)
Comment on lines +245 to +251
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems as if both success_callback and failure_callback are run in a thread, which might finish after completion returns. As the span is closed in either callback, it may occur that the span is finished after the surrounding transaction does, resulting it being absent completely. This should definitely be pointed out somewhere.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is definitely the potential for a timing issue but I don't see a way around it at the moment since the LiteLLM integration might not be in control of the overarching transaction.

From your testing when developing this, was this a real issue when something like a web framework was managing the transaction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was only an issue when writing code like this:

with sentry_sdk.start_transaction(...):
    result = completion(...)

When using it in a framework (tried with FastAPI) I could not reproduce this error.

Loading