Add metrics to the Python OpenAI instrumentation (#3180)

open-telemetry · Jan 15, 2025 · a716949 · a716949
1 parent 07c97ea
commit a716949
Show file tree

Hide file tree

Showing 10 changed files with 763 additions and 9 deletions.
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add example to `opentelemetry-instrumentation-openai-v2`
   ([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
 - Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
+- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))
 
 ## Version 2.0b0 (2024-11-08)
 

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
@@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
    :target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/
 
 This library allows tracing LLM requests and logging of messages made by the
-`OpenAI Python API library <https://pypi.org/project/openai/>`_.
+`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
+the duration of the operations and the number of tokens used as metrics.
 
 
 Installation
@@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
     # Uninstrument all clients
     OpenAIInstrumentor().uninstrument()
 
+Bucket Boundaries
+-----------------
+
+This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.
+
+The bucket boundaries are defined as follows:
+
+- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
+
+To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:
+
+.. code-block:: python
+
+    from opentelemetry.sdk.metrics import MeterProvider, View
+    from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+    from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+    from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
+
+    views = [
+        View(
+            instrument_name="gen_ai.client.token.usage",
+            aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
+        ),
+        View(
+            instrument_name="gen_ai.client.operation.duration",
+            aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
+        ),
+    ]
+
+    metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
+    metric_reader = PeriodicExportingMetricReader(metric_exporter)
+    provider = MeterProvider(
+        metric_readers=[metric_reader],
+        views=views
+    )
+
+    from opentelemetry.sdk.metrics import set_meter_provider
+    set_meter_provider(provider)
+
+For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.
+
 References
 ----------
 * `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_

diff --git a/...lemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/...lemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py
@@ -49,13 +49,18 @@
 from opentelemetry.instrumentation.openai_v2.package import _instruments
 from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
 from opentelemetry.instrumentation.utils import unwrap
+from opentelemetry.metrics import get_meter
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
+from .instruments import Instruments
 from .patch import async_chat_completions_create, chat_completions_create
 
 
 class OpenAIInstrumentor(BaseInstrumentor):
+    def __init__(self):
+        self._meter = None
+
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
 
@@ -75,20 +80,29 @@ def _instrument(self, **kwargs):
             schema_url=Schemas.V1_28_0.value,
             event_logger_provider=event_logger_provider,
         )
+        meter_provider = kwargs.get("meter_provider")
+        self._meter = get_meter(
+            __name__,
+            "",
+            meter_provider,
+            schema_url=Schemas.V1_28_0.value,
+        )
+
+        instruments = Instruments(self._meter)
 
         wrap_function_wrapper(
             module="openai.resources.chat.completions",
             name="Completions.create",
             wrapper=chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, instruments, is_content_enabled()
             ),
         )
 
         wrap_function_wrapper(
             module="openai.resources.chat.completions",
             name="AsyncCompletions.create",
             wrapper=async_chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, instruments, is_content_enabled()
             ),
         )
 

diff --git a/...etry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py b/...etry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
@@ -0,0 +1,11 @@
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
+
+
+class Instruments:
+    def __init__(self, meter):
+        self.operation_duration_histogram = (
+            gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
+        )
+        self.token_usage_histogram = (
+            gen_ai_metrics.create_gen_ai_client_token_usage(meter)
+        )
diff --git a/...ntelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/...ntelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+from timeit import default_timer
 from typing import Optional
 
 from openai import Stream
@@ -21,8 +22,12 @@
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
 )
+from opentelemetry.semconv._incubating.attributes import (
+    server_attributes as ServerAttributes,
+)
 from opentelemetry.trace import Span, SpanKind, Tracer
 
+from .instruments import Instruments
 from .utils import (
     choice_to_event,
     get_llm_request_attributes,
@@ -34,7 +39,10 @@
 
 
 def chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
+    event_logger: EventLogger,
+    instruments: Instruments,
+    capture_content: bool,
 ):
     """Wrap the `create` method of the `ChatCompletion` class to trace it."""
 
@@ -54,6 +62,9 @@ def traced_method(wrapped, instance, args, kwargs):
                         message_to_event(message, capture_content)
                     )
 
+            start = default_timer()
+            result = None
+            error_type = None
             try:
                 result = wrapped(*args, **kwargs)
                 if is_streaming(kwargs):
@@ -69,14 +80,27 @@ def traced_method(wrapped, instance, args, kwargs):
                 return result
 
             except Exception as error:
+                error_type = type(error).__qualname__
                 handle_span_exception(span, error)
                 raise
+            finally:
+                duration = max((default_timer() - start), 0)
+                _record_metrics(
+                    instruments,
+                    duration,
+                    result,
+                    span_attributes,
+                    error_type,
+                )
 
     return traced_method
 
 
 def async_chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
+    event_logger: EventLogger,
+    instruments: Instruments,
+    capture_content: bool,
 ):
     """Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""
 
@@ -96,6 +120,9 @@ async def traced_method(wrapped, instance, args, kwargs):
                         message_to_event(message, capture_content)
                     )
 
+            start = default_timer()
+            result = None
+            error_type = None
             try:
                 result = await wrapped(*args, **kwargs)
                 if is_streaming(kwargs):
@@ -111,12 +138,88 @@ async def traced_method(wrapped, instance, args, kwargs):
                 return result
 
             except Exception as error:
+                error_type = type(error).__qualname__
                 handle_span_exception(span, error)
                 raise
+            finally:
+                duration = max((default_timer() - start), 0)
+                _record_metrics(
+                    instruments,
+                    duration,
+                    result,
+                    span_attributes,
+                    error_type,
+                )
 
     return traced_method
 
 
+def _record_metrics(
+    instruments: Instruments,
+    duration: float,
+    result,
+    span_attributes: dict,
+    error_type: Optional[str],
+):
+    common_attributes = {
+        GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
+        GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
+        GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
+            GenAIAttributes.GEN_AI_REQUEST_MODEL
+        ],
+    }
+
+    if error_type:
+        common_attributes["error.type"] = error_type
+
+    if result and getattr(result, "model", None):
+        common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model
+
+    if result and getattr(result, "service_tier", None):
+        common_attributes[
+            GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
+        ] = result.service_tier
+
+    if result and getattr(result, "system_fingerprint", None):
+        common_attributes["gen_ai.openai.response.system_fingerprint"] = (
+            result.system_fingerprint
+        )
+
+    if ServerAttributes.SERVER_ADDRESS in span_attributes:
+        common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
+            ServerAttributes.SERVER_ADDRESS
+        ]
+
+    if ServerAttributes.SERVER_PORT in span_attributes:
+        common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
+            ServerAttributes.SERVER_PORT
+        ]
+
+    instruments.operation_duration_histogram.record(
+        duration,
+        attributes=common_attributes,
+    )
+
+    if result and getattr(result, "usage", None):
+        input_attributes = {
+            **common_attributes,
+            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
+        }
+        instruments.token_usage_histogram.record(
+            result.usage.prompt_tokens,
+            attributes=input_attributes,
+        )
+
+        completion_attributes = {
+            **common_attributes,
+            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
+        }
+        instruments.token_usage_histogram.record(
+            result.usage.completion_tokens,
+            attributes=completion_attributes,
+        )
+
+
 def _set_response_attributes(
     span, result, event_logger: EventLogger, capture_content: bool
 ):