Update the Azure OpenAI docs to include example app updates (#4042)

elastic · Jun 27, 2024 · 36af2f0 · 36af2f0
1 parent 3f8b605
commit 36af2f0
Showing 1 changed file with 45 additions and 61 deletions.
diff --git a/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc b/docs/en/observability/cloud-monitoring/azure/monitor-azure-openai-apm.asciidoc
@@ -14,7 +14,7 @@ For this tutorial, we'll be using an https://github.com/mdbirnstiehl/AzureOpenAI
 
 To start collecting APM data for your Azure OpenAI applications, gather the OpenTelemetry OTLP exporter endpoint and authentication header from your {ecloud} instance:
 
-. From the {kib} homepage, click **Add integrations**.
+. From the {kib} homepage, select **Add integrations**.
 . Select the **APM** integration.
 . Scroll down to **APM Agents** and select the **OpenTelemetry** tab.
 . Make note of the configuration values for the following configuration settings:
@@ -28,25 +28,20 @@ With the configuration values from the APM integration and your https://learn.mi
 export AZURE_OPENAI_API_KEY="your-Azure-OpenAI-API-key"
 export AZURE_OPENAI_ENDPOINT="your-Azure-OpenAI-endpoint"
 export OPENAI_API_VERSION="your_api_version"
-export OTEL_EXPORTER_OTLP_AUTH_HEADER="your-otel-exporter-auth-header"
+export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer%20<your-otel-exporter-auth-header>"
 export OTEL_EXPORTER_OTLP_ENDPOINT="your-otel-exporter-endpoint"
+export OTEL_RESOURCE_ATTRIBUTES=service.name=your-service-name
 ----
 
 [discrete]
 [[azure-openai-apm-python-libraries]]
 ==== Download Python libraries
 
-Install the following Python libraries using these commands:
+Install the necessary Python libraries using this command:
 
 [source,bash]
 ----
-pip3 install opentelemetry-api
-pip3 install opentelemetry-sdk
-pip3 install opentelemetry-exporter-otlp
-pip3 install opentelemetry-instrumentation
-pip3 install opentelemetry-instrumentation-requests
-pip3 install openai
-pip3 install flask
+pip3 install openai flask opentelemetry-distro[otlp] opentelemetry-instrumentation
 ----
 
 [discrete]
@@ -60,74 +55,59 @@ The app we're using in this tutorial is a simple example that calls Azure OpenAI
 [source,python]
 ----
 
-from openai import AzureOpenAI
-import openai
-from flask import Flask
-import monitor  # Import the module
-from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
-import urllib
 import os
 
+from flask import Flask
+from openai import AzureOpenAI
 from opentelemetry import trace
-from opentelemetry.sdk.resources import SERVICE_NAME, Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-from opentelemetry.instrumentation.requests import RequestsInstrumentor
-
-# Service name is required for most backends
-resource = Resource(attributes={
-    SERVICE_NAME: "your-service-name"
-})
 
-provider = TracerProvider(resource=resource)
-processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'),
-        headers="Authorization=Bearer%20"+os.getenv('OTEL_EXPORTER_OTLP_AUTH_HEADER')))
+from monitor import count_completion_requests_and_tokens
 
-provider.add_span_processor(processor)
-trace.set_tracer_provider(provider)
-tracer = trace.get_tracer(__name__)
-RequestsInstrumentor().instrument()
 
-
-
-# Initialize Flask app and instrument it
+# Initialize Flask app
 app = Flask(__name__)
 
 # Set OpenAI API key
 client = AzureOpenAI(
     api_key=os.getenv("AZURE_OPENAI_API_KEY"),
     api_version=os.getenv("OPENAI_API_VERSION"),
-    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+)
+
+# Monkey-patch the openai.Completion.create function
+client.chat.completions.create = count_completion_requests_and_tokens(
+    client.chat.completions.create
 )
 
+tracer = trace.get_tracer("counter")
+
+
 @app.route("/completion")
-@tracer.start_as_current_span("do_work")
+@tracer.start_as_current_span("completion")
 def completion():
-    response = openai.chat.completions.create(
+    response = client.chat.completions.create(
         model="gpt-4",
         messages=[
-            {"role": "user", "content": "How do I send my APM data to Elastic Observability?"}
+            {
+                "role": "user",
+                "content": "How do I send my APM data to Elastic Observability?",
+            }
         ],
         max_tokens=20,
-        temperature=0
+        temperature=0,
     )
-
-    return(response.choices[0].message.content.strip())
-
-if __name__ == "__main__":
-    app.run(host="localhost", port=8000, debug=True)
+    return response.choices[0].message.content.strip()
 ----
+The code uses monkey patching, a technique in Python that dynamically modifies the behavior of a class or module at runtime by modifying its attributes or methods, to modify the behavior of the `chat.completions` call so we can add the response metrics to the OpenTelemetry spans.
 
 The https://github.com/mdbirnstiehl/AzureOpenAIAPMmonitoringOtel/blob/main/monitor.py[`monitor.py` file] in the example application instruments the application and can be used to instrument your own applications.
 
-The `monitor.py` code uses monkey patching, a technique in Python that dynamically modifies the behavior of a class or module at runtime by modifying its attributes or methods, to modify the behavior of the `chat.completions` call so we can add the response metrics to the OpenTelemetry spans:
-
 [source,python]
 ----
 def count_completion_requests_and_tokens(func):
     @wraps(func)
     def wrapper(*args, **kwargs):
-        counters['completion_count'] += 1
+        counters["completion_count"] += 1
         response = func(*args, **kwargs)
 
         token_count = response.usage.total_tokens
@@ -139,18 +119,16 @@ def count_completion_requests_and_tokens(func):
         # Set OpenTelemetry attributes
         span = trace.get_current_span()
         if span:
-            span.set_attribute("completion_count", counters['completion_count'])
+            span.set_attribute("completion_count", counters["completion_count"])
             span.set_attribute("token_count", token_count)
             span.set_attribute("prompt_tokens", prompt_tokens)
             span.set_attribute("completion_tokens", completion_tokens)
             span.set_attribute("model", response.model)
             span.set_attribute("cost", cost)
             span.set_attribute("response", strResponse)
         return response
-    return wrapper
 
-# Monkey-patch the openai.Completion.create function
-openai.chat.completions.create = count_completion_requests_and_tokens(openai.chat.completions.create)
+    return wrapper
 ----
 
 Adding this data to our span lets us send it to our OTLP endpoint, so you can search for the data in {observability} and build dashboards and visualizations.
@@ -160,19 +138,25 @@ Implementing the following function allows you to calculate the cost of a single
 [source,python]
 ----
 def calculate_cost(response):
-    if response.model in ['gpt-4', 'gpt-4-0314']:
-        cost = (response.usage.prompt_tokens * 0.03 + response.usage.completion_tokens * 0.06) / 1000
-    elif response.model in ['gpt-4-32k', 'gpt-4-32k-0314']:
-        cost = (response.usage.prompt_tokens * 0.06 + response.usage.completion_tokens * 0.12) / 1000
-    elif 'gpt-3.5-turbo' in response.model:
+    if response.model in ["gpt-4", "gpt-4-0314"]:
+        cost = (
+            response.usage.prompt_tokens * 0.03
+            + response.usage.completion_tokens * 0.06
+        ) / 1000
+    elif response.model in ["gpt-4-32k", "gpt-4-32k-0314"]:
+        cost = (
+            response.usage.prompt_tokens * 0.06
+            + response.usage.completion_tokens * 0.12
+        ) / 1000
+    elif "gpt-3.5-turbo" in response.model:
         cost = response.usage.total_tokens * 0.002 / 1000
-    elif 'davinci' in response.model:
+    elif "davinci" in response.model:
         cost = response.usage.total_tokens * 0.02 / 1000
-    elif 'curie' in response.model:
+    elif "curie" in response.model:
         cost = response.usage.total_tokens * 0.002 / 1000
-    elif 'babbage' in response.model:
+    elif "babbage" in response.model:
         cost = response.usage.total_tokens * 0.0005 / 1000
-    elif 'ada' in response.model:
+    elif "ada" in response.model:
         cost = response.usage.total_tokens * 0.0004 / 1000
     else:
         cost = 0