diff --git a/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc b/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc index 0f0f6ee2f3..09583dfbf8 100644 --- a/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc +++ b/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc @@ -14,7 +14,7 @@ For this tutorial, we'll be using an https://github.com/mdbirnstiehl/AzureOpenAI To start collecting APM data for your Azure OpenAI applications, gather the OpenTelemetry OTLP exporter endpoint and authentication header from your {ecloud} instance: -. From the {kib} homepage, click **Add integrations**. +. From the {kib} homepage, select **Add integrations**. . Select the **APM** integration. . Scroll down to **APM Agents** and select the **OpenTelemetry** tab. . Make note of the configuration values for the following configuration settings: @@ -28,25 +28,20 @@ With the configuration values from the APM integration and your https://learn.mi export AZURE_OPENAI_API_KEY="your-Azure-OpenAI-API-key" export AZURE_OPENAI_ENDPOINT="your-Azure-OpenAI-endpoint" export OPENAI_API_VERSION="your_api_version" -export OTEL_EXPORTER_OTLP_AUTH_HEADER="your-otel-exporter-auth-header" +export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer%20" export OTEL_EXPORTER_OTLP_ENDPOINT="your-otel-exporter-endpoint" +export OTEL_RESOURCE_ATTRIBUTES=service.name=your-service-name ---- [discrete] [[azure-openai-apm-python-libraries]] ==== Download Python libraries -Install the following Python libraries using these commands: +Install the necessary Python libraries using this command: [source,bash] ---- -pip3 install opentelemetry-api -pip3 install opentelemetry-sdk -pip3 install opentelemetry-exporter-otlp -pip3 install opentelemetry-instrumentation -pip3 install opentelemetry-instrumentation-requests -pip3 install openai -pip3 install flask +pip3 install openai flask opentelemetry-distro[otlp] opentelemetry-instrumentation ---- [discrete] @@ -60,74 +55,59 @@ The app we're using in this tutorial is a simple example that calls Azure OpenAI [source,python] ---- -from openai import AzureOpenAI -import openai -from flask import Flask -import monitor # Import the module -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter -import urllib import os +from flask import Flask +from openai import AzureOpenAI from opentelemetry import trace -from opentelemetry.sdk.resources import SERVICE_NAME, Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.instrumentation.requests import RequestsInstrumentor - -# Service name is required for most backends -resource = Resource(attributes={ - SERVICE_NAME: "your-service-name" -}) -provider = TracerProvider(resource=resource) -processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'), - headers="Authorization=Bearer%20"+os.getenv('OTEL_EXPORTER_OTLP_AUTH_HEADER'))) +from monitor import count_completion_requests_and_tokens -provider.add_span_processor(processor) -trace.set_tracer_provider(provider) -tracer = trace.get_tracer(__name__) -RequestsInstrumentor().instrument() - - -# Initialize Flask app and instrument it +# Initialize Flask app app = Flask(__name__) # Set OpenAI API key client = AzureOpenAI( api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=os.getenv("OPENAI_API_VERSION"), - azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), +) + +# Monkey-patch the openai.Completion.create function +client.chat.completions.create = count_completion_requests_and_tokens( + client.chat.completions.create ) +tracer = trace.get_tracer("counter") + + @app.route("/completion") -@tracer.start_as_current_span("do_work") +@tracer.start_as_current_span("completion") def completion(): - response = openai.chat.completions.create( + response = client.chat.completions.create( model="gpt-4", messages=[ - {"role": "user", "content": "How do I send my APM data to Elastic Observability?"} + { + "role": "user", + "content": "How do I send my APM data to Elastic Observability?", + } ], max_tokens=20, - temperature=0 + temperature=0, ) - - return(response.choices[0].message.content.strip()) - -if __name__ == "__main__": - app.run(host="localhost", port=8000, debug=True) + return response.choices[0].message.content.strip() ---- +The code uses monkey patching, a technique in Python that dynamically modifies the behavior of a class or module at runtime by modifying its attributes or methods, to modify the behavior of the `chat.completions` call so we can add the response metrics to the OpenTelemetry spans. The https://github.com/mdbirnstiehl/AzureOpenAIAPMmonitoringOtel/blob/main/monitor.py[`monitor.py` file] in the example application instruments the application and can be used to instrument your own applications. -The `monitor.py` code uses monkey patching, a technique in Python that dynamically modifies the behavior of a class or module at runtime by modifying its attributes or methods, to modify the behavior of the `chat.completions` call so we can add the response metrics to the OpenTelemetry spans: - [source,python] ---- def count_completion_requests_and_tokens(func): @wraps(func) def wrapper(*args, **kwargs): - counters['completion_count'] += 1 + counters["completion_count"] += 1 response = func(*args, **kwargs) token_count = response.usage.total_tokens @@ -139,7 +119,7 @@ def count_completion_requests_and_tokens(func): # Set OpenTelemetry attributes span = trace.get_current_span() if span: - span.set_attribute("completion_count", counters['completion_count']) + span.set_attribute("completion_count", counters["completion_count"]) span.set_attribute("token_count", token_count) span.set_attribute("prompt_tokens", prompt_tokens) span.set_attribute("completion_tokens", completion_tokens) @@ -147,10 +127,8 @@ def count_completion_requests_and_tokens(func): span.set_attribute("cost", cost) span.set_attribute("response", strResponse) return response - return wrapper -# Monkey-patch the openai.Completion.create function -openai.chat.completions.create = count_completion_requests_and_tokens(openai.chat.completions.create) + return wrapper ---- Adding this data to our span lets us send it to our OTLP endpoint, so you can search for the data in {observability} and build dashboards and visualizations. @@ -160,19 +138,25 @@ Implementing the following function allows you to calculate the cost of a single [source,python] ---- def calculate_cost(response): - if response.model in ['gpt-4', 'gpt-4-0314']: - cost = (response.usage.prompt_tokens * 0.03 + response.usage.completion_tokens * 0.06) / 1000 - elif response.model in ['gpt-4-32k', 'gpt-4-32k-0314']: - cost = (response.usage.prompt_tokens * 0.06 + response.usage.completion_tokens * 0.12) / 1000 - elif 'gpt-3.5-turbo' in response.model: + if response.model in ["gpt-4", "gpt-4-0314"]: + cost = ( + response.usage.prompt_tokens * 0.03 + + response.usage.completion_tokens * 0.06 + ) / 1000 + elif response.model in ["gpt-4-32k", "gpt-4-32k-0314"]: + cost = ( + response.usage.prompt_tokens * 0.06 + + response.usage.completion_tokens * 0.12 + ) / 1000 + elif "gpt-3.5-turbo" in response.model: cost = response.usage.total_tokens * 0.002 / 1000 - elif 'davinci' in response.model: + elif "davinci" in response.model: cost = response.usage.total_tokens * 0.02 / 1000 - elif 'curie' in response.model: + elif "curie" in response.model: cost = response.usage.total_tokens * 0.002 / 1000 - elif 'babbage' in response.model: + elif "babbage" in response.model: cost = response.usage.total_tokens * 0.0005 / 1000 - elif 'ada' in response.model: + elif "ada" in response.model: cost = response.usage.total_tokens * 0.0004 / 1000 else: cost = 0