Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,40 @@ def _input_callback(kwargs):
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)

# Record messages if allowed
messages = kwargs.get("messages", [])
if messages and should_send_default_pii() and integration.include_prompts:
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
)
# Record input/messages if allowed
if should_send_default_pii() and integration.include_prompts:
if operation == "embeddings":
# For embeddings, look for the 'input' parameter
embedding_input = kwargs.get("input")
if embedding_input:
scope = sentry_sdk.get_current_scope()
# Normalize to list format
input_list = (
embedding_input
if isinstance(embedding_input, list)
else [embedding_input]
)
messages_data = truncate_and_annotate_messages(input_list, span, scope)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
messages_data,
unpack=False,
)
else:
# For chat, look for the 'messages' parameter
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)

# Record other parameters
params = {
Expand Down
198 changes: 173 additions & 25 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import pytest
import time
from unittest import mock
from datetime import datetime

Expand All @@ -17,6 +18,7 @@ async def __call__(self, *args, **kwargs):
except ImportError:
pytest.skip("litellm not installed", allow_module_level=True)

import sentry_sdk
from sentry_sdk import start_transaction
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations.litellm import (
Expand All @@ -31,6 +33,36 @@ async def __call__(self, *args, **kwargs):
LITELLM_VERSION = package_version("litellm")


@pytest.fixture
def clear_litellm_cache():
"""
Clear litellm's client cache and reset integration state to ensure test isolation.

The LiteLLM integration uses setup_once() which only runs once per Python process.
This fixture ensures the integration is properly re-initialized for each test.
"""

# Stop all existing mocks
mock.patch.stopall()

# Clear client cache
if (
hasattr(litellm, "in_memory_llm_clients_cache")
and litellm.in_memory_llm_clients_cache
):
litellm.in_memory_llm_clients_cache.flush_cache()

yield

# Clean up after test as well
mock.patch.stopall()
if (
hasattr(litellm, "in_memory_llm_clients_cache")
and litellm.in_memory_llm_clients_cache
):
litellm.in_memory_llm_clients_cache.flush_cache()


# Mock response objects
class MockMessage:
def __init__(self, role="assistant", content="Test response"):
Expand Down Expand Up @@ -87,6 +119,21 @@ def __init__(self, model="text-embedding-ada-002", data=None, usage=None):
)
self.object = "list"

def model_dump(self):
return {
"model": self.model,
"data": [
{"embedding": d.embedding, "index": d.index, "object": d.object}
for d in self.data
],
"usage": {
"prompt_tokens": self.usage.prompt_tokens,
"completion_tokens": self.usage.completion_tokens,
"total_tokens": self.usage.total_tokens,
},
"object": self.object,
}


@pytest.mark.parametrize(
"send_default_pii, include_prompts",
Expand Down Expand Up @@ -201,44 +248,145 @@ def test_streaming_chat_completion(
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True


def test_embeddings_create(sentry_init, capture_events):
def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
"""
Test that litellm.embedding() calls are properly instrumented.

This test calls the actual litellm.embedding() function (not just callbacks)
to ensure proper integration testing.
"""
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [{"role": "user", "content": "Some text to test embeddings"}]
mock_response = MockEmbeddingResponse()

with start_transaction(name="litellm test"):
kwargs = {
"model": "text-embedding-ada-002",
"input": "Hello!",
"messages": messages,
"call_type": "embedding",
}
# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)

with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input="Hello, world!",
api_key="test-key", # Provide a fake API key to avoid authentication errors
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)

# Response is processed by litellm, so just check it exists
assert response is not None
assert len(events) == 1
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
assert span["description"] == "embeddings text-embedding-ada-002"
assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
# Check that embeddings input is captured (it's JSON serialized)
embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
assert json.loads(embeddings_input) == ["Hello, world!"]


def test_embeddings_create_with_list_input(
sentry_init, capture_events, clear_litellm_cache
):
"""Test embedding with list input."""
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

_input_callback(kwargs)
_success_callback(
kwargs,
mock_response,
datetime.now(),
datetime.now(),
)
mock_response = MockEmbeddingResponse()

assert len(events) == 1
(event,) = events
# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)

with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input=["First text", "Second text", "Third text"],
api_key="test-key", # Provide a fake API key to avoid authentication errors
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)

# Response is processed by litellm, so just check it exists
assert response is not None
assert len(events) == 1
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
# Check that list of embeddings input is captured (it's JSON serialized)
embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
assert json.loads(embeddings_input) == [
"First text",
"Second text",
"Third text",
]


def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
"""Test that PII is not captured when disabled."""
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=False, # PII disabled
)
events = capture_events()

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]
mock_response = MockEmbeddingResponse()

# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)

with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input="Hello, world!",
api_key="test-key", # Provide a fake API key to avoid authentication errors
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)

# Response is processed by litellm, so just check it exists
assert response is not None
assert len(events) == 1
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
assert span["description"] == "embeddings text-embedding-ada-002"
assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
assert span["op"] == OP.GEN_AI_EMBEDDINGS
# Check that embeddings input is NOT captured when PII is disabled
assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]


def test_exception_handling(sentry_init, capture_events):
Expand Down