From a716949d1c28341b2fb9af75f43b541ddeac9335 Mon Sep 17 00:00:00 2001 From: Drew Robbins <drew@drewby.com> Date: Thu, 16 Jan 2025 07:24:35 +0900 Subject: [PATCH] Add metrics to the Python OpenAI instrumentation (#3180) --- .../CHANGELOG.md | 1 + .../README.rst | 45 ++++- .../instrumentation/openai_v2/__init__.py | 18 +- .../instrumentation/openai_v2/instruments.py | 11 + .../instrumentation/openai_v2/patch.py | 107 +++++++++- .../test_async_chat_completion_metrics.yaml | 133 ++++++++++++ .../test_chat_completion_metrics.yaml | 135 +++++++++++++ .../tests/conftest.py | 83 +++++++- .../tests/test_chat_completions.py | 49 ++++- .../tests/test_chat_metrics.py | 190 ++++++++++++++++++ 10 files changed, 763 insertions(+), 9 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md index 4644ee3dc5..ed27904e63 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add example to `opentelemetry-instrumentation-openai-v2` ([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006)) - Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984)) +- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180)) ## Version 2.0b0 (2024-11-08) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst index d2cb0b5724..c402b30bc0 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst @@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation :target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/ This library allows tracing LLM requests and logging of messages made by the -`OpenAI Python API library <https://pypi.org/project/openai/>`_. +`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures +the duration of the operations and the number of tokens used as metrics. Installation @@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method: # Uninstrument all clients OpenAIInstrumentor().uninstrument() +Bucket Boundaries +----------------- + +This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions. + +The bucket boundaries are defined as follows: + +- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864] +- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92] + +To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example: + +.. code-block:: python + + from opentelemetry.sdk.metrics import MeterProvider, View + from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter + from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation + + views = [ + View( + instrument_name="gen_ai.client.token.usage", + aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]), + ), + View( + instrument_name="gen_ai.client.operation.duration", + aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]), + ), + ] + + metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317") + metric_reader = PeriodicExportingMetricReader(metric_exporter) + provider = MeterProvider( + metric_readers=[metric_reader], + views=views + ) + + from opentelemetry.sdk.metrics import set_meter_provider + set_meter_provider(provider) + +For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_. + References ---------- * `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_ diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py index ee3bbfdb73..ab4b6f9d7b 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py @@ -49,13 +49,18 @@ from opentelemetry.instrumentation.openai_v2.package import _instruments from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled from opentelemetry.instrumentation.utils import unwrap +from opentelemetry.metrics import get_meter from opentelemetry.semconv.schemas import Schemas from opentelemetry.trace import get_tracer +from .instruments import Instruments from .patch import async_chat_completions_create, chat_completions_create class OpenAIInstrumentor(BaseInstrumentor): + def __init__(self): + self._meter = None + def instrumentation_dependencies(self) -> Collection[str]: return _instruments @@ -75,12 +80,21 @@ def _instrument(self, **kwargs): schema_url=Schemas.V1_28_0.value, event_logger_provider=event_logger_provider, ) + meter_provider = kwargs.get("meter_provider") + self._meter = get_meter( + __name__, + "", + meter_provider, + schema_url=Schemas.V1_28_0.value, + ) + + instruments = Instruments(self._meter) wrap_function_wrapper( module="openai.resources.chat.completions", name="Completions.create", wrapper=chat_completions_create( - tracer, event_logger, is_content_enabled() + tracer, event_logger, instruments, is_content_enabled() ), ) @@ -88,7 +102,7 @@ def _instrument(self, **kwargs): module="openai.resources.chat.completions", name="AsyncCompletions.create", wrapper=async_chat_completions_create( - tracer, event_logger, is_content_enabled() + tracer, event_logger, instruments, is_content_enabled() ), ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py new file mode 100644 index 0000000000..d1e184ac84 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py @@ -0,0 +1,11 @@ +from opentelemetry.semconv._incubating.metrics import gen_ai_metrics + + +class Instruments: + def __init__(self, meter): + self.operation_duration_histogram = ( + gen_ai_metrics.create_gen_ai_client_operation_duration(meter) + ) + self.token_usage_histogram = ( + gen_ai_metrics.create_gen_ai_client_token_usage(meter) + ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index cd284473ce..307b312fca 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -13,6 +13,7 @@ # limitations under the License. +from timeit import default_timer from typing import Optional from openai import Stream @@ -21,8 +22,12 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes import ( + server_attributes as ServerAttributes, +) from opentelemetry.trace import Span, SpanKind, Tracer +from .instruments import Instruments from .utils import ( choice_to_event, get_llm_request_attributes, @@ -34,7 +39,10 @@ def chat_completions_create( - tracer: Tracer, event_logger: EventLogger, capture_content: bool + tracer: Tracer, + event_logger: EventLogger, + instruments: Instruments, + capture_content: bool, ): """Wrap the `create` method of the `ChatCompletion` class to trace it.""" @@ -54,6 +62,9 @@ def traced_method(wrapped, instance, args, kwargs): message_to_event(message, capture_content) ) + start = default_timer() + result = None + error_type = None try: result = wrapped(*args, **kwargs) if is_streaming(kwargs): @@ -69,14 +80,27 @@ def traced_method(wrapped, instance, args, kwargs): return result except Exception as error: + error_type = type(error).__qualname__ handle_span_exception(span, error) raise + finally: + duration = max((default_timer() - start), 0) + _record_metrics( + instruments, + duration, + result, + span_attributes, + error_type, + ) return traced_method def async_chat_completions_create( - tracer: Tracer, event_logger: EventLogger, capture_content: bool + tracer: Tracer, + event_logger: EventLogger, + instruments: Instruments, + capture_content: bool, ): """Wrap the `create` method of the `AsyncChatCompletion` class to trace it.""" @@ -96,6 +120,9 @@ async def traced_method(wrapped, instance, args, kwargs): message_to_event(message, capture_content) ) + start = default_timer() + result = None + error_type = None try: result = await wrapped(*args, **kwargs) if is_streaming(kwargs): @@ -111,12 +138,88 @@ async def traced_method(wrapped, instance, args, kwargs): return result except Exception as error: + error_type = type(error).__qualname__ handle_span_exception(span, error) raise + finally: + duration = max((default_timer() - start), 0) + _record_metrics( + instruments, + duration, + result, + span_attributes, + error_type, + ) return traced_method +def _record_metrics( + instruments: Instruments, + duration: float, + result, + span_attributes: dict, + error_type: Optional[str], +): + common_attributes = { + GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value, + GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value, + GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[ + GenAIAttributes.GEN_AI_REQUEST_MODEL + ], + } + + if error_type: + common_attributes["error.type"] = error_type + + if result and getattr(result, "model", None): + common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model + + if result and getattr(result, "service_tier", None): + common_attributes[ + GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + ] = result.service_tier + + if result and getattr(result, "system_fingerprint", None): + common_attributes["gen_ai.openai.response.system_fingerprint"] = ( + result.system_fingerprint + ) + + if ServerAttributes.SERVER_ADDRESS in span_attributes: + common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[ + ServerAttributes.SERVER_ADDRESS + ] + + if ServerAttributes.SERVER_PORT in span_attributes: + common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[ + ServerAttributes.SERVER_PORT + ] + + instruments.operation_duration_histogram.record( + duration, + attributes=common_attributes, + ) + + if result and getattr(result, "usage", None): + input_attributes = { + **common_attributes, + GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value, + } + instruments.token_usage_histogram.record( + result.usage.prompt_tokens, + attributes=input_attributes, + ) + + completion_attributes = { + **common_attributes, + GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value, + } + instruments.token_usage_histogram.record( + result.usage.completion_tokens, + attributes=completion_attributes, + ) + + def _set_response_attributes( span, result, event_logger: EventLogger, capture_content: bool ): diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml new file mode 100644 index 0000000000..e771e93cbe --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.26.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.26.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASv9R2E7Yhb2e7bj4Xl0qm9s3J42Y", + "object": "chat.completion", + "created": 1731456237, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test. How can I assist you further?", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "service_tier": "default", + "usage": { + "prompt_tokens": 12, + "completion_tokens": 12, + "total_tokens": 24, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e1a80679a8311a6-MRS + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Wed, 13 Nov 2024 00:03:58 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '796' + openai-organization: test_openai_org_id + openai-processing-ms: + - '359' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999978' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_41ea134c1fc450d4ca4cf8d0c6a7c53a + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml new file mode 100644 index 0000000000..1c6c11c858 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml @@ -0,0 +1,135 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.54.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.54.3 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q", + "object": "chat.completion", + "created": 1731368630, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "service_tier": "default", + "usage": { + "prompt_tokens": 12, + "completion_tokens": 5, + "total_tokens": 17, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e122593ff368bc8-SIN + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 11 Nov 2024 23:43:50 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '765' + openai-organization: test_openai_org_id + openai-processing-ms: + - '287' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199977' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_58cff97afd0e7c0bba910ccf0b044a6f + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py index 18e6582dff..51521dbadd 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py @@ -17,6 +17,17 @@ InMemoryLogExporter, SimpleLogRecordProcessor, ) +from opentelemetry.sdk.metrics import ( + Histogram, + MeterProvider, +) +from opentelemetry.sdk.metrics.export import ( + InMemoryMetricReader, +) +from opentelemetry.sdk.metrics.view import ( + ExplicitBucketHistogramAggregation, + View, +) from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( @@ -36,6 +47,12 @@ def fixture_log_exporter(): yield exporter +@pytest.fixture(scope="function", name="metric_reader") +def fixture_metric_reader(): + exporter = InMemoryMetricReader() + yield exporter + + @pytest.fixture(scope="function", name="tracer_provider") def fixture_tracer_provider(span_exporter): provider = TracerProvider() @@ -52,6 +69,62 @@ def fixture_event_logger_provider(log_exporter): return event_logger_provider +@pytest.fixture(scope="function", name="meter_provider") +def fixture_meter_provider(metric_reader): + token_usage_histogram_view = View( + instrument_type=Histogram, + instrument_name="gen_ai.client.token.usage", + aggregation=ExplicitBucketHistogramAggregation( + boundaries=[ + 1, + 4, + 16, + 64, + 256, + 1024, + 4096, + 16384, + 65536, + 262144, + 1048576, + 4194304, + 16777216, + 67108864, + ] + ), + ) + + duration_histogram_view = View( + instrument_type=Histogram, + instrument_name="gen_ai.client.operation.duration", + aggregation=ExplicitBucketHistogramAggregation( + boundaries=[ + 0.01, + 0.02, + 0.04, + 0.08, + 0.16, + 0.32, + 0.64, + 1.28, + 2.56, + 5.12, + 10.24, + 20.48, + 40.96, + 81.92, + ] + ), + ) + + meter_provider = MeterProvider( + metric_readers=[metric_reader], + views=[token_usage_histogram_view, duration_histogram_view], + ) + + return meter_provider + + @pytest.fixture(autouse=True) def environment(): if not os.getenv("OPENAI_API_KEY"): @@ -83,7 +156,9 @@ def vcr_config(): @pytest.fixture(scope="function") -def instrument_no_content(tracer_provider, event_logger_provider): +def instrument_no_content( + tracer_provider, event_logger_provider, meter_provider +): os.environ.update( {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"} ) @@ -92,6 +167,7 @@ def instrument_no_content(tracer_provider, event_logger_provider): instrumentor.instrument( tracer_provider=tracer_provider, event_logger_provider=event_logger_provider, + meter_provider=meter_provider, ) yield instrumentor @@ -100,7 +176,9 @@ def instrument_no_content(tracer_provider, event_logger_provider): @pytest.fixture(scope="function") -def instrument_with_content(tracer_provider, event_logger_provider): +def instrument_with_content( + tracer_provider, event_logger_provider, meter_provider +): os.environ.update( {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"} ) @@ -108,6 +186,7 @@ def instrument_with_content(tracer_provider, event_logger_provider): instrumentor.instrument( tracer_provider=tracer_provider, event_logger_provider=event_logger_provider, + meter_provider=meter_provider, ) yield instrumentor diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py index 4677b7cb95..9685903603 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py @@ -32,6 +32,7 @@ from opentelemetry.semconv._incubating.attributes import ( server_attributes as ServerAttributes, ) +from opentelemetry.semconv._incubating.metrics import gen_ai_metrics @pytest.mark.vcr() @@ -94,7 +95,9 @@ def test_chat_completion_no_content( assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) -def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content): +def test_chat_completion_bad_endpoint( + span_exporter, metric_reader, instrument_no_content +): llm_model_value = "gpt-4o-mini" messages_value = [{"role": "user", "content": "Say this is a test"}] @@ -116,10 +119,31 @@ def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content): "APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] ) + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert ( + duration_metric.data.data_points[0].attributes[ + ErrorAttributes.ERROR_TYPE + ] + == "APIConnectionError" + ) + @pytest.mark.vcr() def test_chat_completion_404( - span_exporter, openai_client, instrument_no_content + span_exporter, openai_client, metric_reader, instrument_no_content ): llm_model_value = "this-model-does-not-exist" messages_value = [{"role": "user", "content": "Say this is a test"}] @@ -135,6 +159,27 @@ def test_chat_completion_404( assert_all_attributes(spans[0], llm_model_value) assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert ( + duration_metric.data.data_points[0].attributes[ + ErrorAttributes.ERROR_TYPE + ] + == "NotFoundError" + ) + @pytest.mark.vcr() def test_chat_completion_extra_params( diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py new file mode 100644 index 0000000000..d0f7c5a596 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py @@ -0,0 +1,190 @@ +import pytest + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv._incubating.attributes import ( + server_attributes as ServerAttributes, +) +from opentelemetry.semconv._incubating.metrics import gen_ai_metrics + + +def assert_all_metric_attributes(data_point): + assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes + assert ( + data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAIAttributes.GenAiOperationNameValues.CHAT.value + ) + assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes + assert ( + data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM] + == GenAIAttributes.GenAiSystemValues.OPENAI.value + ) + assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes + assert ( + data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + == "gpt-4o-mini" + ) + assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in data_point.attributes + assert ( + data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] + == "gpt-4o-mini-2024-07-18" + ) + assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes + assert ( + data_point.attributes["gen_ai.openai.response.system_fingerprint"] + == "fp_0ba0d124f1" + ) + assert ( + GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + in data_point.attributes + ) + assert ( + data_point.attributes[ + GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + ] + == "default" + ) + assert ( + data_point.attributes[ServerAttributes.SERVER_ADDRESS] + == "api.openai.com" + ) + + +@pytest.mark.vcr() +def test_chat_completion_metrics( + metric_reader, openai_client, instrument_with_content +): + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) + + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + assert len(metric_data) == 2 + + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert_all_metric_attributes(duration_metric.data.data_points[0]) + + token_usage_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE + ), + None, + ) + assert token_usage_metric is not None + + input_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.INPUT.value + ), + None, + ) + assert input_token_usage is not None + assert input_token_usage.sum == 12 + # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864] + assert input_token_usage.bucket_counts[2] == 1 + assert_all_metric_attributes(input_token_usage) + + output_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value + ), + None, + ) + assert output_token_usage is not None + assert output_token_usage.sum == 5 + # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864] + assert output_token_usage.bucket_counts[2] == 1 + assert_all_metric_attributes(output_token_usage) + + +@pytest.mark.vcr() +@pytest.mark.asyncio() +async def test_async_chat_completion_metrics( + metric_reader, async_openai_client, instrument_with_content +): + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + await async_openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) + + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + assert len(metric_data) == 2 + + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert_all_metric_attributes(duration_metric.data.data_points[0]) + + token_usage_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE + ), + None, + ) + assert token_usage_metric is not None + + input_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.INPUT.value + ), + None, + ) + + assert input_token_usage is not None + assert input_token_usage.sum == 12 + assert_all_metric_attributes(input_token_usage) + + output_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value + ), + None, + ) + + assert output_token_usage is not None + assert output_token_usage.sum == 12 + assert_all_metric_attributes(output_token_usage)