From 6bab9db0980301a825a63e847f0298fca7b1bd4c Mon Sep 17 00:00:00 2001 From: zzhlogin Date: Thu, 18 Apr 2024 10:57:03 -0700 Subject: [PATCH 1/8] Use new created cluster to run EKS E2E test (#167) Use new created cluster to run EKS E2E test preparing for test migration to [aws-application-signals-test-framework](https://github.com/aws-observability/aws-application-signals-test-framework) repo. Test workflow: EC2: https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8741157281 EKS: https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8741152630 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .github/workflows/appsignals-python-e2e-eks-canary-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/appsignals-python-e2e-eks-canary-test.yml b/.github/workflows/appsignals-python-e2e-eks-canary-test.yml index 4af7b29b9..2028df78d 100644 --- a/.github/workflows/appsignals-python-e2e-eks-canary-test.yml +++ b/.github/workflows/appsignals-python-e2e-eks-canary-test.yml @@ -33,5 +33,5 @@ jobs: secrets: inherit with: aws-region: ${{ matrix.aws-region }} - test-cluster-name: 'e2e-python-canary-test' + test-cluster-name: 'e2e-python-second-test' caller-workflow-name: 'appsignals-python-e2e-eks-canary-test' From b1bad4eb34a0249d8e872639aea88f8e477fde0e Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Fri, 19 Apr 2024 17:46:00 -0700 Subject: [PATCH 2/8] Remove gRPC dependency for Docker - temporary (#168) Temporary PR, will be reverted, merging to quickly test E2E on main build. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .github/workflows/main_build.yml | 4 ++++ Dockerfile | 2 ++ .../opentelemetry/distro/aws_opentelemetry_configurator.py | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main_build.yml b/.github/workflows/main_build.yml index e574f8bf6..56f365786 100644 --- a/.github/workflows/main_build.yml +++ b/.github/workflows/main_build.yml @@ -5,6 +5,10 @@ on: branches: - main - "release/v*" + pull_request: + branches: + - main + env: AWS_DEFAULT_REGION: us-east-1 STAGING_ECR_REGISTRY: 637423224110.dkr.ecr.us-east-1.amazonaws.com diff --git a/Dockerfile b/Dockerfile index a0192e5be..8d7fab445 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,8 @@ WORKDIR /operator-build ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/ RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro +RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y +RUN pip uninstall grpcio -y FROM public.ecr.aws/amazonlinux/amazonlinux:minimal diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 2e9963e81..6c234a0b5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -17,7 +17,6 @@ ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler -from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk._configuration import ( _get_exporter_names, @@ -302,6 +301,12 @@ def create_exporter(self): endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) if protocol == "grpc": + # pylint: disable=import-outside-toplevel + # Delay import to only occur if grpc required. + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter as OTLPGrpcOTLPMetricExporter, + ) + return OTLPGrpcOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) From 1219b5d7f40ca6ebc40a86c8eafcfb37c1374811 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Fri, 19 Apr 2024 17:59:09 -0700 Subject: [PATCH 3/8] Revert "Remove gRPC dependency for Docker - temporary (#168)" (#169) This reverts commit b1bad4eb34a0249d8e872639aea88f8e477fde0e. *Issue #, if available:* *Description of changes:* By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .github/workflows/main_build.yml | 4 ---- Dockerfile | 2 -- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 7 +------ 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/main_build.yml b/.github/workflows/main_build.yml index 56f365786..e574f8bf6 100644 --- a/.github/workflows/main_build.yml +++ b/.github/workflows/main_build.yml @@ -5,10 +5,6 @@ on: branches: - main - "release/v*" - pull_request: - branches: - - main - env: AWS_DEFAULT_REGION: us-east-1 STAGING_ECR_REGISTRY: 637423224110.dkr.ecr.us-east-1.amazonaws.com diff --git a/Dockerfile b/Dockerfile index 8d7fab445..a0192e5be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,8 +16,6 @@ WORKDIR /operator-build ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/ RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro -RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y -RUN pip uninstall grpcio -y FROM public.ecr.aws/amazonlinux/amazonlinux:minimal diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 6c234a0b5..2e9963e81 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -17,6 +17,7 @@ ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk._configuration import ( _get_exporter_names, @@ -301,12 +302,6 @@ def create_exporter(self): endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) if protocol == "grpc": - # pylint: disable=import-outside-toplevel - # Delay import to only occur if grpc required. - from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( - OTLPMetricExporter as OTLPGrpcOTLPMetricExporter, - ) - return OTLPGrpcOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) From 80be26b1102bedf3d760341536a0c2e8a07c23b5 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 11:35:40 -0700 Subject: [PATCH 4/8] Handle issues introduced by OTLP gRPC protocol (#170) In this commit, we are handling issues that arise from gRPC. Essentially, if we build gRPC artifacts into our Docker image, it causes the Docker image to only be compatible with applications built using the same Python version. To solve this, we are doing two things: 1) we are removing gRPC artifacts from the docker image and 2) we are changing the default OTLP protocol to be HTTP. If customers attempt to set the protocol as gRPC for ApplicationSignals, we will set the default endpoint correctly. Also we are changing Docker image to build with Python 3.11, which is what we were originally doing when we encountered this issue (reference: https://github.com/aws-observability/aws-otel-python-instrumentation/commit/5b3ed74eb8fd93a7810380dcd82234c6028423a4). This is what the upstream does (see [autoinstrumentation/python/Dockerfile](https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/Dockerfile)), and having parity here is beneficial to us. Testing: * Create `app.py`: ``` from time import sleep import boto3 try: boto3.client('s3').list_buckets() except Exception: sleep(100) ``` * Run `./scripts/build_and_install_distro.sh` * Run: ``` export OTEL_PYTHON_DISTRO="aws_distro" export OTEL_PYTHON_CONFIGURATOR="aws_configurator" export OTEL_METRICS_EXPORTER="none" unset OTEL_EXPORTER_OTLP_PROTOCOL unset OTEL_AWS_APPLICATION_SIGNALS_ENABLED ``` * Run `opentelemetry-instrument python ./app.py` ``` urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=4318): Max retries exceeded with url: /v1/traces (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) ``` * Run `export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf; opentelemetry-instrument python ./app.py` ``` requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=4318): Max retries exceeded with url: /v1/traces (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) ``` * Run `export OTEL_EXPORTER_OTLP_PROTOCOL=grpc; opentelemetry-instrument python ./app.py` ``` Transient error StatusCode.UNAVAILABLE encountered while exporting traces to localhost:4317, retrying in 1s. ``` * Run ``` unset OTEL_EXPORTER_OTLP_PROTOCOL export OTEL_METRIC_EXPORT_INTERVAL=1000 export OTEL_AWS_APPLICATION_SIGNALS_ENABLED=True ``` * Run `opentelemetry-instrument python ./app.py` ``` urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=4316): Max retries exceeded with url: /v1/metrics (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=4318): Max retries exceeded with url: /v1/traces (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) ``` * Run `export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf; opentelemetry-instrument python ./app.py` ``` urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=4318): Max retries exceeded with url: /v1/traces (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=4318): Max retries exceeded with url: /v1/traces (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')) ``` * Run `export OTEL_EXPORTER_OTLP_PROTOCOL=grpc; opentelemetry-instrument python ./app.py` ``` Transient error StatusCode.UNAVAILABLE encountered while exporting metrics to localhost:4315, retrying in 1s. Transient error StatusCode.UNAVAILABLE encountered while exporting traces to localhost:4317, retrying in 1s. ``` By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- Dockerfile | 16 +++-- .../distro/aws_opentelemetry_configurator.py | 27 +++++--- .../distro/aws_opentelemetry_distro.py | 29 +++++++-- .../test_aws_opentelementry_configurator.py | 65 +++++++++++++++++-- .../test/amazon/base/contract_test_base.py | 1 + 5 files changed, 110 insertions(+), 28 deletions(-) diff --git a/Dockerfile b/Dockerfile index a0192e5be..f447597f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,13 +3,7 @@ # The packages are installed in the `/autoinstrumentation` directory. This is required as when instrumenting the pod by CWOperator, # one init container will be created to copy all the content in `/autoinstrumentation` directory to app's container. Then # update the `PYTHONPATH` environment variable accordingly. Then in the second stage, copy the directory to `/autoinstrumentation`. - -# Using Python 3.10 because we are utilizing the opentelemetry-exporter-otlp-proto-grpc exporter, -# which relies on grpcio as a dependency. grpcio has strict dependencies on the OS and Python version. -# Also mentioned in Docker build template in the upstream repository: -# https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 -# For further details, please refer to: https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#the-python-interpre[…]tions-python-worker -FROM python:3.10 AS build +FROM python:3.11 AS build WORKDIR /operator-build @@ -17,6 +11,14 @@ ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/ RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro +# Remove opentelemetry-exporter-otlp-proto-grpc and grpcio, as grpcio has strict dependencies on the Python version and +# will cause confusing failures if gRPC protocol is used. Now if gRPC protocol is requested by the user, instrumentation +# will complain that grpc is not installed, which is more understandable. References: +# * https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 +# * https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#troubleshoot-cannot-import-cygrpc +RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y +RUN pip uninstall grpcio -y + FROM public.ecr.aws/amazonlinux/amazonlinux:minimal # Required to copy attribute files to distributed docker images diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 2e9963e81..db86703b7 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -17,7 +17,6 @@ ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler -from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk._configuration import ( _get_exporter_names, @@ -274,17 +273,10 @@ def __new__(cls, *args, **kwargs): # pylint: disable=no-self-use def create_exporter(self): protocol = os.environ.get( - OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "grpc") + OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf") ) _logger.debug("AWS Application Signals export protocol: %s", protocol) - application_signals_endpoint = os.environ.get( - APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, - os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4315"), - ) - - _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) - temporality_dict: Dict[type, AggregationTemporality] = {} for typ in [ Counter, @@ -298,10 +290,27 @@ def create_exporter(self): temporality_dict[typ] = AggregationTemporality.DELTA if protocol == "http/protobuf": + application_signals_endpoint = os.environ.get( + APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, + os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316/v1/metrics"), + ) + _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) return OTLPHttpOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) if protocol == "grpc": + # pylint: disable=import-outside-toplevel + # Delay import to only occur if gRPC specifically requested. Vended Docker image will not have gRPC bundled, + # so importing it at the class level can cause runtime failures. + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter as OTLPGrpcOTLPMetricExporter, + ) + + application_signals_endpoint = os.environ.get( + APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG, + os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "localhost:4315"), + ) + _logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint) return OTLPGrpcOTLPMetricExporter( endpoint=application_signals_endpoint, preferred_temporality=temporality_dict ) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py index 9d7ff43ab..11c2fc36d 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py @@ -5,12 +5,29 @@ from amazon.opentelemetry.distro.patches._instrumentation_patch import apply_instrumentation_patches from opentelemetry.distro import OpenTelemetryDistro from opentelemetry.environment_variables import OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR -from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION +from opentelemetry.sdk.environment_variables import ( + OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, + OTEL_EXPORTER_OTLP_PROTOCOL, +) class AwsOpenTelemetryDistro(OpenTelemetryDistro): def _configure(self, **kwargs): - """ + """Sets up default environment variables and apply patches + + Set default OTEL_EXPORTER_OTLP_PROTOCOL to be HTTP. This must be run before super(), which attempts to set the + default to gRPC. If we run afterwards, we don't know if the default was set by base OpenTelemetryDistro or if it + was set by the user. We are setting to HTTP as gRPC does not work out of the box for the vended docker image, + due to gRPC having a strict dependency on the Python version the artifact was built for (OTEL observed this: + https://github.com/open-telemetry/opentelemetry-operator/blob/461ba68e80e8ac6bf2603eb353547cd026119ed2/autoinstrumentation/python/requirements.txt#L2-L3) + + Also sets default OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR, and + OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION to ensure good compatibility with X-Ray and Application + Signals. + + Also applies patches to upstream instrumentation - usually these are stopgap measures until we can contribute + long-term changes to upstream. + kwargs: apply_patches: bool - apply patches to upstream instrumentation. Default is True. @@ -19,13 +36,15 @@ def _configure(self, **kwargs): OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION environment variable. Need to work with upstream to make it to be configurable. """ + os.environ.setdefault(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf") + super(AwsOpenTelemetryDistro, self)._configure() + + os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi") + os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray") os.environ.setdefault( OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram" ) - os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi") - os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray") - # Apply patches to upstream instrumentation - usually stopgap measures until we can contribute long-term changes if kwargs.get("apply_patches", True): apply_instrumentation_patches() diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index e7d946c38..d3e8cf872 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -9,6 +9,7 @@ from amazon.opentelemetry.distro.attribute_propagating_span_processor import AttributePropagatingSpanProcessor from amazon.opentelemetry.distro.aws_metric_attributes_span_exporter import AwsMetricAttributesSpanExporter from amazon.opentelemetry.distro.aws_opentelemetry_configurator import ( + ApplicationSignalsExporterProvider, AwsOpenTelemetryConfigurator, _custom_import_sampler, _customize_exporter, @@ -21,6 +22,9 @@ from amazon.opentelemetry.distro.sampler._aws_xray_sampling_client import _AwsXRaySamplingClient from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from opentelemetry.environment_variables import OTEL_LOGS_EXPORTER, OTEL_METRICS_EXPORTER, OTEL_TRACES_EXPORTER +from opentelemetry.exporter.otlp.proto.common._internal.metrics_encoder import OTLPMetricExporterMixin +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter +from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, SpanProcessor, Tracer, TracerProvider @@ -29,18 +33,28 @@ from opentelemetry.trace import get_tracer_provider -# This class setup Tracer Provider Globally, which can only set once -# if there is another setup for tracer provider, may cause issue class TestAwsOpenTelemetryConfigurator(TestCase): + """Tests AwsOpenTelemetryConfigurator and AwsOpenTelemetryDistro + + NOTE: This class setup Tracer Provider Globally, which can only be set once. If there is another setup for tracer + provider, it may cause issues for those tests. + """ + @classmethod def setUpClass(cls): - os.environ.setdefault(OTEL_TRACES_EXPORTER, "none") - os.environ.setdefault(OTEL_METRICS_EXPORTER, "none") - os.environ.setdefault(OTEL_LOGS_EXPORTER, "none") - os.environ.setdefault(OTEL_TRACES_SAMPLER, "traceidratio") - os.environ.setdefault(OTEL_TRACES_SAMPLER_ARG, "0.01") + # Run AwsOpenTelemetryDistro to set up environment, then validate expected env values. aws_open_telemetry_distro: AwsOpenTelemetryDistro = AwsOpenTelemetryDistro() aws_open_telemetry_distro.configure(apply_patches=False) + validate_distro_environ() + + # Overwrite exporter configs to keep tests clean, set sampler configs for tests + os.environ[OTEL_TRACES_EXPORTER] = "none" + os.environ[OTEL_METRICS_EXPORTER] = "none" + os.environ[OTEL_LOGS_EXPORTER] = "none" + os.environ[OTEL_TRACES_SAMPLER] = "traceidratio" + os.environ[OTEL_TRACES_SAMPLER_ARG] = "0.01" + + # Run configurator and get trace provider aws_otel_configurator: AwsOpenTelemetryConfigurator = AwsOpenTelemetryConfigurator() aws_otel_configurator.configure() cls.tracer_provider: TracerProvider = get_tracer_provider() @@ -249,3 +263,40 @@ def test_customize_span_processors(self): second_processor: SpanProcessor = mock_tracer_provider.add_span_processor.call_args_list[1].args[0] self.assertIsInstance(second_processor, AwsSpanMetricsProcessor) os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None) + + def test_application_signals_exporter_provider(self): + # Check default protocol - HTTP, as specified by AwsOpenTelemetryDistro. + exporter: OTLPMetricExporterMixin = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) + self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint) + + # Overwrite protocol to gRPC. + os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "grpc" + exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPGrpcOTLPMetricExporter) + self.assertEqual("localhost:4315", exporter._endpoint) + + # Overwrite protocol back to HTTP. + os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" + exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter() + self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter) + self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint) + + +def validate_distro_environ(): + tc: TestCase = TestCase() + # Set by OpenTelemetryDistro + tc.assertEqual("otlp", os.environ.get("OTEL_TRACES_EXPORTER")) + tc.assertEqual("otlp", os.environ.get("OTEL_METRICS_EXPORTER")) + + # Set by AwsOpenTelemetryDistro + tc.assertEqual("http/protobuf", os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL")) + tc.assertEqual( + "base2_exponential_bucket_histogram", os.environ.get("OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION") + ) + tc.assertEqual("xray,tracecontext,b3,b3multi", os.environ.get("OTEL_PROPAGATORS")) + tc.assertEqual("xray", os.environ.get("OTEL_PYTHON_ID_GENERATOR")) + + # Not set + tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER")) + tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER_ARG")) diff --git a/contract-tests/tests/test/amazon/base/contract_test_base.py b/contract-tests/tests/test/amazon/base/contract_test_base.py index 262933d99..8364bd830 100644 --- a/contract-tests/tests/test/amazon/base/contract_test_base.py +++ b/contract-tests/tests/test/amazon/base/contract_test_base.py @@ -90,6 +90,7 @@ def setUp(self) -> None: .with_env("OTEL_METRIC_EXPORT_INTERVAL", "50") .with_env("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", "true") .with_env("OTEL_METRICS_EXPORTER", "none") + .with_env("OTEL_EXPORTER_OTLP_PROTOCOL", "grpc") .with_env("OTEL_BSP_SCHEDULE_DELAY", "1") .with_env("OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}") .with_env("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}") From 67c05e13efc7f4830d6571a56d98a1cfd9b3bc28 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 13:12:44 -0700 Subject: [PATCH 5/8] Fix removing gRPC in Dockerfile (#172) We are installing using `pip install --target`. To fix this we must set PYTHONPATH so pip can find and uninstall these packages. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index f447597f0..39fdce615 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro # will complain that grpc is not installed, which is more understandable. References: # * https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 # * https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#troubleshoot-cannot-import-cygrpc +ENV PYTHONPATH=/operator-build/workspace RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y RUN pip uninstall grpcio -y From 1f13ce8202fd72fe9edb421ec2b4948709b26195 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Mon, 22 Apr 2024 14:36:43 -0700 Subject: [PATCH 6/8] Fix removing gRPC from Dockerfile (#173) The previous fix (https://github.com/aws-observability/aws-otel-python-instrumentation/pull/172) was not working as deleting the files after installation caused dangling dependencies - the instrumentation expected a dependency, but it was not found, causing instrumentation to fail. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- Dockerfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 39fdce615..e8aa35db1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,16 +9,14 @@ WORKDIR /operator-build ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/ -RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro - # Remove opentelemetry-exporter-otlp-proto-grpc and grpcio, as grpcio has strict dependencies on the Python version and # will cause confusing failures if gRPC protocol is used. Now if gRPC protocol is requested by the user, instrumentation # will complain that grpc is not installed, which is more understandable. References: # * https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2 # * https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#troubleshoot-cannot-import-cygrpc -ENV PYTHONPATH=/operator-build/workspace -RUN pip uninstall opentelemetry-exporter-otlp-proto-grpc -y -RUN pip uninstall grpcio -y +RUN sed -i "/opentelemetry-exporter-otlp-proto-grpc/d" ./aws-opentelemetry-distro/pyproject.toml + +RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro FROM public.ecr.aws/amazonlinux/amazonlinux:minimal From 61542d4180de27c055604fba349eb514aa0fcf03 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Tue, 23 Apr 2024 10:36:14 -0700 Subject: [PATCH 7/8] Post release 0.1.1: Update version to 0.1.1.dev0 (#177) This PR prepares the main branch for the next development cycle by updating the version to 0.1.1.dev0 and updating the image version to be scanned to the latest released. This PR should only be merge when release for version v0.1.1 is successful. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .github/workflows/daily_scan.yml | 4 ++-- .../src/amazon/opentelemetry/distro/version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily_scan.yml b/.github/workflows/daily_scan.yml index fae785788..8c5ccbe99 100644 --- a/.github/workflows/daily_scan.yml +++ b/.github/workflows/daily_scan.yml @@ -82,7 +82,7 @@ jobs: id: high_scan uses: ./.github/actions/image_scan with: - image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.0" + image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.1" severity: 'CRITICAL,HIGH' - name: Perform low image scan @@ -90,7 +90,7 @@ jobs: id: low_scan uses: ./.github/actions/image_scan with: - image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.0" + image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.1" severity: 'MEDIUM,LOW,UNKNOWN' - name: Configure AWS Credentials for emitting metrics diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/version.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/version.py index 41afb9a0d..41e946c01 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/version.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/version.py @@ -1,4 +1,4 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "0.1.0.dev0" +__version__ = "0.1.1.dev0" From e042cfeea7a3ffee31d08c971a9f8623b8dccc0d Mon Sep 17 00:00:00 2001 From: XinRan Zhang Date: Wed, 24 Apr 2024 14:39:09 -0700 Subject: [PATCH 8/8] Implemented Misc Contract Tests (#163) *Description of changes:* Implement resource and configuration contract test comparable to [java tests](https://github.com/aws-observability/aws-otel-java-instrumentation/tree/main/appsignals-tests/contract-tests/src/test/java/software/amazon/opentelemetry/appsignals/test/misc) By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .../test/amazon/misc/configuration_test.py | 87 +++++++++++++++++++ .../misc/resource_attributes_test_base.py | 78 +++++++++++++++++ .../misc/service_name_in_env_var_test.py | 25 ++++++ ...ervice_name_in_resource_attributes_test.py | 21 +++++ .../amazon/misc/unknown_service_name_test.py | 20 +++++ 5 files changed, 231 insertions(+) create mode 100644 contract-tests/tests/test/amazon/misc/configuration_test.py create mode 100644 contract-tests/tests/test/amazon/misc/resource_attributes_test_base.py create mode 100644 contract-tests/tests/test/amazon/misc/service_name_in_env_var_test.py create mode 100644 contract-tests/tests/test/amazon/misc/service_name_in_resource_attributes_test.py create mode 100644 contract-tests/tests/test/amazon/misc/unknown_service_name_test.py diff --git a/contract-tests/tests/test/amazon/misc/configuration_test.py b/contract-tests/tests/test/amazon/misc/configuration_test.py new file mode 100644 index 000000000..1810cd2ba --- /dev/null +++ b/contract-tests/tests/test/amazon/misc/configuration_test.py @@ -0,0 +1,87 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import time +from typing import List + +from mock_collector_client import ResourceScopeMetric, ResourceScopeSpan +from requests import Response, request +from typing_extensions import override + +from amazon.base.contract_test_base import ContractTestBase +from amazon.utils.application_signals_constants import ERROR_METRIC, FAULT_METRIC, LATENCY_METRIC +from opentelemetry.sdk.metrics.export import AggregationTemporality + +# Tests in this class are supposed to validate that the SDK was configured in the correct way: It +# uses the X-Ray ID format. Metrics are deltaPreferred. Type of the metrics are exponentialHistogram + + +class ConfigurationTest(ContractTestBase): + @override + def get_application_image_name(self) -> str: + return "aws-application-signals-tests-django-app" + + @override + def get_application_wait_pattern(self) -> str: + return "Quit the server with CONTROL-C." + + @override + def get_application_extra_environment_variables(self): + return {"DJANGO_SETTINGS_MODULE": "django_server.settings"} + + def test_configuration_metrics(self): + address: str = self.application.get_container_host_ip() + port: str = self.application.get_exposed_port(self.get_application_port()) + url: str = f"http://{address}:{port}/success" + response: Response = request("GET", url, timeout=20) + self.assertEqual(200, response.status_code) + metrics: List[ResourceScopeMetric] = self.mock_collector_client.get_metrics( + {LATENCY_METRIC, ERROR_METRIC, FAULT_METRIC} + ) + + self.assertEqual(len(metrics), 3) + for metric in metrics: + self.assertIsNotNone(metric.metric.exponential_histogram) + self.assertEqual(metric.metric.exponential_histogram.aggregation_temporality, AggregationTemporality.DELTA) + + def test_xray_id_format(self): + """ + We are testing here that the X-Ray id format is always used by inspecting the traceid that + was in the span received by the collector, which should be consistent across multiple spans. + We are testing the following properties: + 1. Traceid is random + 2. First 32 bits of traceid is a timestamp + It is important to remember that the X-Ray traceId format had to be adapted to fit into the + definition of the OpenTelemetry traceid: + https://opentelemetry.io/docs/specs/otel/trace/api/#retrieving-the-traceid-and-spanid + Specifically for an X-Ray traceid to be a valid Otel traceId, the version digit had to be + dropped. Reference: + https://github.com/open-telemetry/opentelemetry-python-contrib/blob/main/sdk-extension/opentelemetry-sdk-extension-aws/src/opentelemetry/sdk/extension/aws/trace/aws_xray_id_generator.py + """ + + seen: List[str] = [] + for _ in range(100): + address: str = self.application.get_container_host_ip() + port: str = self.application.get_exposed_port(self.get_application_port()) + url: str = f"http://{address}:{port}/success" + response: Response = request("GET", url, timeout=20) + self.assertEqual(200, response.status_code) + + # Since we just made the request, the time in epoch registered in the traceid should be + # approximate equal to the current time in the test, since both run on the same host. + start_time_sec: int = int(time.time()) + + resource_scope_spans: List[ResourceScopeSpan] = self.mock_collector_client.get_traces() + target_span: ResourceScopeSpan = resource_scope_spans[0] + self.assertEqual(target_span.span.name, "GET success") + + self.assertTrue(target_span.span.trace_id.hex() not in seen) + seen.append(target_span.span.trace_id.hex()) + + # trace_id is bytes, so we convert it to hex string and pick the first 8 byte + # that represent the timestamp, then convert it to int for timestamp in second + trace_id_time_stamp_int: int = int(target_span.span.trace_id.hex()[:8], 16) + + # Give 2 minutes time range of tolerance for the trace timestamp + self.assertGreater(trace_id_time_stamp_int, start_time_sec - 60) + self.assertGreater(start_time_sec + 60, trace_id_time_stamp_int) + self.mock_collector_client.clear_signals() diff --git a/contract-tests/tests/test/amazon/misc/resource_attributes_test_base.py b/contract-tests/tests/test/amazon/misc/resource_attributes_test_base.py new file mode 100644 index 000000000..cdb8327b9 --- /dev/null +++ b/contract-tests/tests/test/amazon/misc/resource_attributes_test_base.py @@ -0,0 +1,78 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import Dict, List + +from mock_collector_client import ResourceScopeMetric, ResourceScopeSpan +from requests import Response, request +from typing_extensions import override + +from amazon.base.contract_test_base import ContractTestBase +from amazon.utils.application_signals_constants import ERROR_METRIC, FAULT_METRIC, LATENCY_METRIC +from opentelemetry.proto.common.v1.common_pb2 import AnyValue +from opentelemetry.proto.metrics.v1.metrics_pb2 import Metric +from opentelemetry.proto.trace.v1.trace_pb2 import Span + + +def _get_k8s_attributes(): + return { + "k8s.namespace.name": "namespace-name", + "k8s.pod.name": "pod-name", + "k8s.deployment.name": "deployment-name", + } + + +# Tests consuming this class are supposed to validate that the agent is able to get the resource +# attributes through the environment variables OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME +# +# These tests are structured with nested classes since it is only possible to change the +# resource attributes during the initialization of the OpenTelemetry SDK. + + +class ResourceAttributesTest(ContractTestBase): + @override + def get_application_image_name(self) -> str: + return "aws-application-signals-tests-django-app" + + @override + def get_application_wait_pattern(self) -> str: + return "Quit the server with CONTROL-C." + + @override + def get_application_extra_environment_variables(self): + return {"DJANGO_SETTINGS_MODULE": "django_server.settings"} + + def do_test_resource_attributes(self, service_name): + address: str = self.application.get_container_host_ip() + port: str = self.application.get_exposed_port(self.get_application_port()) + url: str = f"http://{address}:{port}/success" + response: Response = request("GET", url, timeout=20) + self.assertEqual(200, response.status_code) + self.assert_resource_attributes(service_name) + + def assert_resource_attributes(self, service_name): + resource_scope_spans: List[ResourceScopeSpan] = self.mock_collector_client.get_traces() + metrics: List[ResourceScopeMetric] = self.mock_collector_client.get_metrics( + {LATENCY_METRIC, ERROR_METRIC, FAULT_METRIC} + ) + target_spans: List[Span] = [] + for resource_scope_span in resource_scope_spans: + # pylint: disable=no-member + if resource_scope_span.span.name == "GET success": + target_spans.append(resource_scope_span.resource_spans) + + self.assertEqual(len(target_spans), 1) + attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(target_spans[0].resource.attributes) + for key, value in _get_k8s_attributes().items(): + self._assert_str_attribute(attributes_dict, key, value) + self._assert_str_attribute(attributes_dict, "service.name", service_name) + + target_metrics: List[Metric] = [] + for resource_scope_metric in metrics: + if resource_scope_metric.metric.name in ["Error", "Fault", "Latency"]: + target_metrics.append(resource_scope_metric.resource_metrics) + self.assertEqual(len(target_metrics), 3) + for target_metric in target_metrics: + metric_attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(target_metric.resource.attributes) + for key, value in _get_k8s_attributes().items(): + self._assert_str_attribute(metric_attributes_dict, key, value) + self._assert_str_attribute(metric_attributes_dict, "service.name", service_name) diff --git a/contract-tests/tests/test/amazon/misc/service_name_in_env_var_test.py b/contract-tests/tests/test/amazon/misc/service_name_in_env_var_test.py new file mode 100644 index 000000000..f8270d0ce --- /dev/null +++ b/contract-tests/tests/test/amazon/misc/service_name_in_env_var_test.py @@ -0,0 +1,25 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import List + +from resource_attributes_test_base import ResourceAttributesTest, _get_k8s_attributes +from typing_extensions import override + + +class ServiceNameInEnvVarTest(ResourceAttributesTest): + + @override + # pylint: disable=no-self-use + def get_application_extra_environment_variables(self) -> str: + return {"DJANGO_SETTINGS_MODULE": "django_server.settings", "OTEL_SERVICE_NAME": "service-name-test"} + + @override + # pylint: disable=no-self-use + def get_application_otel_resource_attributes(self) -> str: + pairlist: List[str] = [] + for key, value in _get_k8s_attributes().items(): + pairlist.append(key + "=" + value) + return ",".join(pairlist) + + def test_service(self) -> None: + self.do_test_resource_attributes("service-name-test") diff --git a/contract-tests/tests/test/amazon/misc/service_name_in_resource_attributes_test.py b/contract-tests/tests/test/amazon/misc/service_name_in_resource_attributes_test.py new file mode 100644 index 000000000..4449ee5b7 --- /dev/null +++ b/contract-tests/tests/test/amazon/misc/service_name_in_resource_attributes_test.py @@ -0,0 +1,21 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import List + +from resource_attributes_test_base import ResourceAttributesTest, _get_k8s_attributes +from typing_extensions import override + + +class ServiceNameInResourceAttributesTest(ResourceAttributesTest): + + @override + # pylint: disable=no-self-use + def get_application_otel_resource_attributes(self) -> str: + pairlist: List[str] = [] + for key, value in _get_k8s_attributes().items(): + pairlist.append(key + "=" + value) + pairlist.append("service.name=service-name") + return ",".join(pairlist) + + def test_service(self) -> None: + self.do_test_resource_attributes("service-name") diff --git a/contract-tests/tests/test/amazon/misc/unknown_service_name_test.py b/contract-tests/tests/test/amazon/misc/unknown_service_name_test.py new file mode 100644 index 000000000..b9834c74a --- /dev/null +++ b/contract-tests/tests/test/amazon/misc/unknown_service_name_test.py @@ -0,0 +1,20 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import List + +from resource_attributes_test_base import ResourceAttributesTest, _get_k8s_attributes +from typing_extensions import override + + +class UnknownServiceNameTest(ResourceAttributesTest): + + @override + # pylint: disable=no-self-use + def get_application_otel_resource_attributes(self) -> str: + pairlist: List[str] = [] + for key, value in _get_k8s_attributes().items(): + pairlist.append(key + "=" + value) + return ",".join(pairlist) + + def test_service(self) -> None: + self.do_test_resource_attributes("unknown_service")