From 0a8d2f1ff97ffdf5ff01c7c52fcae1969be67cda Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 16:41:18 +0000 Subject: [PATCH 1/3] Handle issues introduced by OTLP gRPC protocol In this commit, we are handling issues that arise from gRPC. Essentially, if we build gRPC artifacts into our Docker image, it causes the Docker image to only be compatible with applications built using the same Python version. To solve this, we are doing two things: 1) we are removing gRPC artifacts from the docker image and 2) we are changing the default OTLP protocol to be HTTP. --- Dockerfile | 16 +++-- .../distro/aws_opentelemetry_configurator.py | 12 +++- .../distro/aws_opentelemetry_distro.py | 29 +++++++-- .../test_aws_opentelementry_configurator.py | 65 +++++++++++++++++-- 4 files changed, 100 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index a0192e5be..f447597f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,13 +3,7 @@ # The packages are installed in the `/autoinstrumentation` directory. This is required as when instrumenting the pod by CWOperator, # one init container will be created to copy all the content in `/autoinstrumentation` directory to app's container. Then # update the `PYTHONPATH` environment variable accordingly. Then in the second stage, copy the directory to `/autoinstrumentation`. - -# Using Python 3.10 because we are utilizing the opentelemetry-exporter-otlp-proto-grpc exporter, -# which relies on grpcio as a dependency. grpcio has strict dependencies on the OS and Python version. -# Also mentioned in Docker build template in the upstream repository: -# https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 -# For further details, please refer to: https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#the-python-interpre[…]tions-python-worker -FROM python:3.10 AS build +FROM python:3.11 AS build WORKDIR /operator-build @@ -17,6 +11,14 @@ ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/ RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro +# Remove opentelemetry-exporter-otlp-proto-grpc and grpcio, as grpcio has strict dependencies on the Python version and +# will cause confusing failures if gRPC protocol is used. Now if gRPC protocol is requested by the user, instrumentation +# will complain that grpc is not installed, which is more understandable. References: +# * https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 +# * https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#troubleshoot-cannot-import-cygrpc +RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y +RUN pip uninstall grpcio -y + FROM public.ecr.aws/amazonlinux/amazonlinux:minimal # Required to copy attribute files to distributed docker images diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 2e9963e81..f996d5b22 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -17,7 +17,6 @@ ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler -from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk._configuration import ( _get_exporter_names, @@ -274,13 +273,13 @@ def __new__(cls, *args, **kwargs): # pylint: disable=no-self-use def create_exporter(self): protocol = os.environ.get( - OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "grpc") + OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf") ) _logger.debug("AWS Application Signals export protocol: %s", protocol) application_signals_endpoint = os.environ.get( APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, - os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4315"), + os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316"), ) _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) @@ -302,6 +301,13 @@ def create_exporter(self): endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) if protocol == "grpc": + # pylint: disable=import-outside-toplevel + # Delay import to only occur if gRPC specifically requested. Vended Docker image will not have gRPC bundled, + # so importing it at the class level can cause runtime failures. + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter as OTLPGrpcOTLPMetricExporter, + ) + return OTLPGrpcOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py index 9d7ff43ab..11c2fc36d 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py @@ -5,12 +5,29 @@ from amazon.opentelemetry.distro.patches._instrumentation_patch import apply_instrumentation_patches from opentelemetry.distro import OpenTelemetryDistro from opentelemetry.environment_variables import OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR -from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION +from opentelemetry.sdk.environment_variables import ( + OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, + OTEL_EXPORTER_OTLP_PROTOCOL, +) class AwsOpenTelemetryDistro(OpenTelemetryDistro): def _configure(self, **kwargs): - """ + """Sets up default environment variables and apply patches + + Set default OTEL_EXPORTER_OTLP_PROTOCOL to be HTTP. This must be run before super(), which attempts to set the + default to gRPC. If we run afterwards, we don't know if the default was set by base OpenTelemetryDistro or if it + was set by the user. We are setting to HTTP as gRPC does not work out of the box for the vended docker image, + due to gRPC having a strict dependency on the Python version the artifact was built for (OTEL observed this: + https://github.com/open-telemetry/opentelemetry-operator/blob/461ba68e80e8ac6bf2603eb353547cd026119ed2/autoinstrumentation/python/requirements.txt#L2-L3) + + Also sets default OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR, and + OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION to ensure good compatibility with X-Ray and Application + Signals. + + Also applies patches to upstream instrumentation - usually these are stopgap measures until we can contribute + long-term changes to upstream. + kwargs: apply_patches: bool - apply patches to upstream instrumentation. Default is True. @@ -19,13 +36,15 @@ def _configure(self, **kwargs): OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION environment variable. Need to work with upstream to make it to be configurable. """ + os.environ.setdefault(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf") + super(AwsOpenTelemetryDistro, self)._configure() + + os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi") + os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray") os.environ.setdefault( OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram" ) - os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi") - os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray") - # Apply patches to upstream instrumentation - usually stopgap measures until we can contribute long-term changes if kwargs.get("apply_patches", True): apply_instrumentation_patches() diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index e7d946c38..69e453392 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -9,6 +9,7 @@ from amazon.opentelemetry.distro.attribute_propagating_span_processor import AttributePropagatingSpanProcessor from amazon.opentelemetry.distro.aws_metric_attributes_span_exporter import AwsMetricAttributesSpanExporter from amazon.opentelemetry.distro.aws_opentelemetry_configurator import ( + ApplicationSignalsExporterProvider, AwsOpenTelemetryConfigurator, _custom_import_sampler, _customize_exporter, @@ -21,6 +22,9 @@ from amazon.opentelemetry.distro.sampler._aws_xray_sampling_client import _AwsXRaySamplingClient from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from opentelemetry.environment_variables import OTEL_LOGS_EXPORTER, OTEL_METRICS_EXPORTER, OTEL_TRACES_EXPORTER +from opentelemetry.exporter.otlp.proto.common._internal.metrics_encoder import OTLPMetricExporterMixin +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter +from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, SpanProcessor, Tracer, TracerProvider @@ -29,18 +33,28 @@ from opentelemetry.trace import get_tracer_provider -# This class setup Tracer Provider Globally, which can only set once -# if there is another setup for tracer provider, may cause issue class TestAwsOpenTelemetryConfigurator(TestCase): + """Tests AwsOpenTelemetryConfigurator and AwsOpenTelemetryDistro + + NOTE: This class setup Tracer Provider Globally, which can only be set once. If there is another setup for tracer + provider, it may cause issues for those tests. + """ + @classmethod def setUpClass(cls): - os.environ.setdefault(OTEL_TRACES_EXPORTER, "none") - os.environ.setdefault(OTEL_METRICS_EXPORTER, "none") - os.environ.setdefault(OTEL_LOGS_EXPORTER, "none") - os.environ.setdefault(OTEL_TRACES_SAMPLER, "traceidratio") - os.environ.setdefault(OTEL_TRACES_SAMPLER_ARG, "0.01") + # Run AwsOpenTelemetryDistro to set up environment, then validate expected env values. aws_open_telemetry_distro: AwsOpenTelemetryDistro = AwsOpenTelemetryDistro() aws_open_telemetry_distro.configure(apply_patches=False) + validate_distro_environ() + + # Overwrite exporter configs to keep tests clean, set sampler configs for tests + os.environ[OTEL_TRACES_EXPORTER] = "none" + os.environ[OTEL_METRICS_EXPORTER] = "none" + os.environ[OTEL_LOGS_EXPORTER] = "none" + os.environ[OTEL_TRACES_SAMPLER] = "traceidratio" + os.environ[OTEL_TRACES_SAMPLER_ARG] = "0.01" + + # Run configurator and get trace provider aws_otel_configurator: AwsOpenTelemetryConfigurator = AwsOpenTelemetryConfigurator() aws_otel_configurator.configure() cls.tracer_provider: TracerProvider = get_tracer_provider() @@ -249,3 +263,40 @@ def test_customize_span_processors(self): second_processor: SpanProcessor = mock_tracer_provider.add_span_processor.call_args_list[1].args[0] self.assertIsInstance(second_processor, AwsSpanMetricsProcessor) os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None) + + def test_application_signals_exporter_provider(self): + # Check default protocol - HTTP, as specified by AwsOpenTelemetryDistro + exporter: OTLPMetricExporterMixin = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) + self.assertEqual("http://localhost:4316", exporter._endpoint) + + # Overwrite protocol to gRPC. Note that this causes `http://` to be stripped from endpoint + os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "grpc" + exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPGrpcOTLPMetricExporter) + self.assertEqual("localhost:4316", exporter._endpoint) + + # Overwrite protocol back to HTTP. Note that `http://` comes back to endpoint + os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" + exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) + self.assertEqual("http://localhost:4316", exporter._endpoint) + + +def validate_distro_environ(): + tc: TestCase = TestCase() + # Set by OpenTelemetryDistro + tc.assertEqual("otlp", os.environ.get("OTEL_TRACES_EXPORTER")) + tc.assertEqual("otlp", os.environ.get("OTEL_METRICS_EXPORTER")) + + # Set by AwsOpenTelemetryDistro + tc.assertEqual("http/protobuf", os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL")) + tc.assertEqual( + "base2_exponential_bucket_histogram", os.environ.get("OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION") + ) + tc.assertEqual("xray,tracecontext,b3,b3multi", os.environ.get("OTEL_PROPAGATORS")) + tc.assertEqual("xray", os.environ.get("OTEL_PYTHON_ID_GENERATOR")) + + # Not set + tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER")) + tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER_ARG")) From 4c5774ccf2b74398bcfd2b7436637a97c3575e4d Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 17:02:55 +0000 Subject: [PATCH 2/3] Fix contract tests --- contract-tests/tests/test/amazon/base/contract_test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/contract-tests/tests/test/amazon/base/contract_test_base.py b/contract-tests/tests/test/amazon/base/contract_test_base.py index 262933d99..8364bd830 100644 --- a/contract-tests/tests/test/amazon/base/contract_test_base.py +++ b/contract-tests/tests/test/amazon/base/contract_test_base.py @@ -90,6 +90,7 @@ def setUp(self) -> None: .with_env("OTEL_METRIC_EXPORT_INTERVAL", "50") .with_env("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", "true") .with_env("OTEL_METRICS_EXPORTER", "none") + .with_env("OTEL_EXPORTER_OTLP_PROTOCOL", "grpc") .with_env("OTEL_BSP_SCHEDULE_DELAY", "1") .with_env("OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}") .with_env("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}") From 0183eabd0c7ac80da0fbd8ac2c06e9781e47276a Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 18:16:06 +0000 Subject: [PATCH 3/3] Fix endpoints --- .../distro/aws_opentelemetry_configurator.py | 17 ++++++++++------- .../test_aws_opentelementry_configurator.py | 12 ++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f996d5b22..db86703b7 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -277,13 +277,6 @@ def create_exporter(self): ) _logger.debug("AWS Application Signals export protocol: %s", protocol) - application_signals_endpoint = os.environ.get( - APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, - os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316"), - ) - - _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) - temporality_dict: Dict[type, AggregationTemporality] = {} for typ in [ Counter, @@ -297,6 +290,11 @@ def create_exporter(self): temporality_dict[typ] = AggregationTemporality.DELTA if protocol == "http/protobuf": + application_signals_endpoint = os.environ.get( + APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, + os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316/v1/metrics"), + ) + _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) return OTLPHttpOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) @@ -308,6 +306,11 @@ def create_exporter(self): OTLPMetricExporter as OTLPGrpcOTLPMetricExporter, ) + application_signals_endpoint = os.environ.get( + APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, + os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "localhost:4315"), + ) + _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) return OTLPGrpcOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 69e453392..d3e8cf872 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -265,22 +265,22 @@ def test_customize_span_processors(self): os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None) def test_application_signals_exporter_provider(self): - # Check default protocol - HTTP, as specified by AwsOpenTelemetryDistro + # Check default protocol - HTTP, as specified by AwsOpenTelemetryDistro. exporter: OTLPMetricExporterMixin = ApplicationSignalsExporterProvider().create_exporter() self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) - self.assertEqual("http://localhost:4316", exporter._endpoint) + self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint) - # Overwrite protocol to gRPC. Note that this causes `http://` to be stripped from endpoint + # Overwrite protocol to gRPC. os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "grpc" exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() self.assertIsInstance(exporter, OTLPGrpcOTLPMetricExporter) - self.assertEqual("localhost:4316", exporter._endpoint) + self.assertEqual("localhost:4315", exporter._endpoint) - # Overwrite protocol back to HTTP. Note that `http://` comes back to endpoint + # Overwrite protocol back to HTTP. os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) - self.assertEqual("http://localhost:4316", exporter._endpoint) + self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint) def validate_distro_environ():