Skip to content

Commit

Permalink
Add metrics to the Python OpenAI instrumentation (#3180)
Browse files Browse the repository at this point in the history
  • Loading branch information
drewby authored Jan 15, 2025
1 parent 07c97ea commit a716949
Show file tree
Hide file tree
Showing 10 changed files with 763 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add example to `opentelemetry-instrumentation-openai-v2`
([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
- Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))

## Version 2.0b0 (2024-11-08)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
:target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/

This library allows tracing LLM requests and logging of messages made by the
`OpenAI Python API library <https://pypi.org/project/openai/>`_.
`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
the duration of the operations and the number of tokens used as metrics.


Installation
Expand Down Expand Up @@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
# Uninstrument all clients
OpenAIInstrumentor().uninstrument()
Bucket Boundaries
-----------------

This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.

The bucket boundaries are defined as follows:

- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]

To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:

.. code-block:: python
from opentelemetry.sdk.metrics import MeterProvider, View
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
views = [
View(
instrument_name="gen_ai.client.token.usage",
aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
),
View(
instrument_name="gen_ai.client.operation.duration",
aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
),
]
metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
metric_reader = PeriodicExportingMetricReader(metric_exporter)
provider = MeterProvider(
metric_readers=[metric_reader],
views=views
)
from opentelemetry.sdk.metrics import set_meter_provider
set_meter_provider(provider)
For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.

References
----------
* `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,18 @@
from opentelemetry.instrumentation.openai_v2.package import _instruments
from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
from opentelemetry.instrumentation.utils import unwrap
from opentelemetry.metrics import get_meter
from opentelemetry.semconv.schemas import Schemas
from opentelemetry.trace import get_tracer

from .instruments import Instruments
from .patch import async_chat_completions_create, chat_completions_create


class OpenAIInstrumentor(BaseInstrumentor):
def __init__(self):
self._meter = None

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments

Expand All @@ -75,20 +80,29 @@ def _instrument(self, **kwargs):
schema_url=Schemas.V1_28_0.value,
event_logger_provider=event_logger_provider,
)
meter_provider = kwargs.get("meter_provider")
self._meter = get_meter(
__name__,
"",
meter_provider,
schema_url=Schemas.V1_28_0.value,
)

instruments = Instruments(self._meter)

wrap_function_wrapper(
module="openai.resources.chat.completions",
name="Completions.create",
wrapper=chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)

wrap_function_wrapper(
module="openai.resources.chat.completions",
name="AsyncCompletions.create",
wrapper=async_chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics


class Instruments:
def __init__(self, meter):
self.operation_duration_histogram = (
gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
)
self.token_usage_histogram = (
gen_ai_metrics.create_gen_ai_client_token_usage(meter)
)
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.


from timeit import default_timer
from typing import Optional

from openai import Stream
Expand All @@ -21,8 +22,12 @@
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.trace import Span, SpanKind, Tracer

from .instruments import Instruments
from .utils import (
choice_to_event,
get_llm_request_attributes,
Expand All @@ -34,7 +39,10 @@


def chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""

Expand All @@ -54,6 +62,9 @@ def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)

start = default_timer()
result = None
error_type = None
try:
result = wrapped(*args, **kwargs)
if is_streaming(kwargs):
Expand All @@ -69,14 +80,27 @@ def traced_method(wrapped, instance, args, kwargs):
return result

except Exception as error:
error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)

return traced_method


def async_chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""

Expand All @@ -96,6 +120,9 @@ async def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)

start = default_timer()
result = None
error_type = None
try:
result = await wrapped(*args, **kwargs)
if is_streaming(kwargs):
Expand All @@ -111,12 +138,88 @@ async def traced_method(wrapped, instance, args, kwargs):
return result

except Exception as error:
error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)

return traced_method


def _record_metrics(
instruments: Instruments,
duration: float,
result,
span_attributes: dict,
error_type: Optional[str],
):
common_attributes = {
GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
GenAIAttributes.GEN_AI_REQUEST_MODEL
],
}

if error_type:
common_attributes["error.type"] = error_type

if result and getattr(result, "model", None):
common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model

if result and getattr(result, "service_tier", None):
common_attributes[
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
] = result.service_tier

if result and getattr(result, "system_fingerprint", None):
common_attributes["gen_ai.openai.response.system_fingerprint"] = (
result.system_fingerprint
)

if ServerAttributes.SERVER_ADDRESS in span_attributes:
common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
ServerAttributes.SERVER_ADDRESS
]

if ServerAttributes.SERVER_PORT in span_attributes:
common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
ServerAttributes.SERVER_PORT
]

instruments.operation_duration_histogram.record(
duration,
attributes=common_attributes,
)

if result and getattr(result, "usage", None):
input_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
}
instruments.token_usage_histogram.record(
result.usage.prompt_tokens,
attributes=input_attributes,
)

completion_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
}
instruments.token_usage_histogram.record(
result.usage.completion_tokens,
attributes=completion_attributes,
)


def _set_response_attributes(
span, result, event_logger: EventLogger, capture_content: bool
):
Expand Down
Loading

0 comments on commit a716949

Please sign in to comment.