diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 43661e2432..35fb1b1048 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -77,15 +77,40 @@ def _input_callback(kwargs): set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) - # Record messages if allowed - messages = kwargs.get("messages", []) - if messages and should_send_default_pii() and integration.include_prompts: - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages(messages, span, scope) - if messages_data is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False - ) + # Record input/messages if allowed + if should_send_default_pii() and integration.include_prompts: + if operation == "embeddings": + # For embeddings, look for the 'input' parameter + embedding_input = kwargs.get("input") + if embedding_input: + scope = sentry_sdk.get_current_scope() + # Normalize to list format + input_list = ( + embedding_input + if isinstance(embedding_input, list) + else [embedding_input] + ) + messages_data = truncate_and_annotate_messages(input_list, span, scope) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_EMBEDDINGS_INPUT, + messages_data, + unpack=False, + ) + else: + # For chat, look for the 'messages' parameter + messages = kwargs.get("messages", []) + if messages: + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) # Record other parameters params = { diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8e1ad21254..1b925fb61f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,5 +1,6 @@ import json import pytest +import time from unittest import mock from datetime import datetime @@ -17,6 +18,7 @@ async def __call__(self, *args, **kwargs): except ImportError: pytest.skip("litellm not installed", allow_module_level=True) +import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.litellm import ( @@ -31,6 +33,36 @@ async def __call__(self, *args, **kwargs): LITELLM_VERSION = package_version("litellm") +@pytest.fixture +def clear_litellm_cache(): + """ + Clear litellm's client cache and reset integration state to ensure test isolation. + + The LiteLLM integration uses setup_once() which only runs once per Python process. + This fixture ensures the integration is properly re-initialized for each test. + """ + + # Stop all existing mocks + mock.patch.stopall() + + # Clear client cache + if ( + hasattr(litellm, "in_memory_llm_clients_cache") + and litellm.in_memory_llm_clients_cache + ): + litellm.in_memory_llm_clients_cache.flush_cache() + + yield + + # Clean up after test as well + mock.patch.stopall() + if ( + hasattr(litellm, "in_memory_llm_clients_cache") + and litellm.in_memory_llm_clients_cache + ): + litellm.in_memory_llm_clients_cache.flush_cache() + + # Mock response objects class MockMessage: def __init__(self, role="assistant", content="Test response"): @@ -87,6 +119,21 @@ def __init__(self, model="text-embedding-ada-002", data=None, usage=None): ) self.object = "list" + def model_dump(self): + return { + "model": self.model, + "data": [ + {"embedding": d.embedding, "index": d.index, "object": d.object} + for d in self.data + ], + "usage": { + "prompt_tokens": self.usage.prompt_tokens, + "completion_tokens": self.usage.completion_tokens, + "total_tokens": self.usage.total_tokens, + }, + "object": self.object, + } + @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -201,7 +248,13 @@ def test_streaming_chat_completion( assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_embeddings_create(sentry_init, capture_events): +def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache): + """ + Test that litellm.embedding() calls are properly instrumented. + + This test calls the actual litellm.embedding() function (not just callbacks) + to ensure proper integration testing. + """ sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -209,36 +262,131 @@ def test_embeddings_create(sentry_init, capture_events): ) events = capture_events() - messages = [{"role": "user", "content": "Some text to test embeddings"}] mock_response = MockEmbeddingResponse() - with start_transaction(name="litellm test"): - kwargs = { - "model": "text-embedding-ada-002", - "input": "Hello!", - "messages": messages, - "call_type": "embedding", - } + # Mock within the test to ensure proper ordering with cache clearing + with mock.patch( + "litellm.openai_chat_completions.make_sync_openai_embedding_request" + ) as mock_http: + # The function returns (headers, response) + mock_http.return_value = ({}, mock_response) + + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + api_key="test-key", # Provide a fake API key to avoid authentication errors + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["description"] == "embeddings text-embedding-ada-002" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == ["Hello, world!"] + + +def test_embeddings_create_with_list_input( + sentry_init, capture_events, clear_litellm_cache +): + """Test embedding with list input.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + mock_response = MockEmbeddingResponse() - assert len(events) == 1 - (event,) = events + # Mock within the test to ensure proper ordering with cache clearing + with mock.patch( + "litellm.openai_chat_completions.make_sync_openai_embedding_request" + ) as mock_http: + # The function returns (headers, response) + mock_http.return_value = ({}, mock_response) + + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + api_key="test-key", # Provide a fake API key to avoid authentication errors + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] + + +def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): + """Test that PII is not captured when disabled.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + ) + events = capture_events() - assert event["type"] == "transaction" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + mock_response = MockEmbeddingResponse() + + # Mock within the test to ensure proper ordering with cache clearing + with mock.patch( + "litellm.openai_chat_completions.make_sync_openai_embedding_request" + ) as mock_http: + # The function returns (headers, response) + mock_http.return_value = ({}, mock_response) + + with start_transaction(name="litellm test"): + response = litellm.embedding( + model="text-embedding-ada-002", + input="Hello, world!", + api_key="test-key", # Provide a fake API key to avoid authentication errors + ) + # Allow time for callbacks to complete (they may run in separate threads) + time.sleep(0.1) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert len(event["spans"]) == 1 + (span,) = event["spans"] - assert span["op"] == OP.GEN_AI_EMBEDDINGS - assert span["description"] == "embeddings text-embedding-ada-002" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert span["op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] def test_exception_handling(sentry_init, capture_events):