Skip to content

Commit

Permalink
Merge branch 'main' into post-release-bot
Browse files Browse the repository at this point in the history
  • Loading branch information
XinRanZhAWS authored Apr 26, 2024
2 parents 39a8aff + e042cfe commit 46e802b
Show file tree
Hide file tree
Showing 13 changed files with 344 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ jobs:
secrets: inherit
with:
aws-region: ${{ matrix.aws-region }}
test-cluster-name: 'e2e-python-canary-test'
test-cluster-name: 'e2e-python-second-test'
caller-workflow-name: 'appsignals-python-e2e-eks-canary-test'
4 changes: 2 additions & 2 deletions .github/workflows/daily_scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ jobs:
id: high_scan
uses: ./.github/actions/image_scan
with:
image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.0"
image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.1"
severity: 'CRITICAL,HIGH'

- name: Perform low image scan
if: always()
id: low_scan
uses: ./.github/actions/image_scan
with:
image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.0"
image-ref: "public.ecr.aws/aws-observability/adot-autoinstrumentation-python:v0.1.1"
severity: 'MEDIUM,LOW,UNKNOWN'

- name: Configure AWS Credentials for emitting metrics
Expand Down
15 changes: 8 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
# The packages are installed in the `/autoinstrumentation` directory. This is required as when instrumenting the pod by CWOperator,
# one init container will be created to copy all the content in `/autoinstrumentation` directory to app's container. Then
# update the `PYTHONPATH` environment variable accordingly. Then in the second stage, copy the directory to `/autoinstrumentation`.

# Using Python 3.10 because we are utilizing the opentelemetry-exporter-otlp-proto-grpc exporter,
# which relies on grpcio as a dependency. grpcio has strict dependencies on the OS and Python version.
# Also mentioned in Docker build template in the upstream repository:
# https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2
# For further details, please refer to: https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#the-python-interpre[…]tions-python-worker
FROM python:3.10 AS build
FROM python:3.11 AS build

WORKDIR /operator-build

ADD aws-opentelemetry-distro/ ./aws-opentelemetry-distro/

# Remove opentelemetry-exporter-otlp-proto-grpc and grpcio, as grpcio has strict dependencies on the Python version and
# will cause confusing failures if gRPC protocol is used. Now if gRPC protocol is requested by the user, instrumentation
# will complain that grpc is not installed, which is more understandable. References:
# * https://github.com/open-telemetry/opentelemetry-operator/blob/b5bb0ae34720d4be2d229dafecb87b61b37699b0/autoinstrumentation/python/requirements.txt#L2
# * https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-functions/recover-python-functions.md#troubleshoot-cannot-import-cygrpc
RUN sed -i "/opentelemetry-exporter-otlp-proto-grpc/d" ./aws-opentelemetry-distro/pyproject.toml

RUN mkdir workspace && pip install --target workspace ./aws-opentelemetry-distro

FROM public.ecr.aws/amazonlinux/amazonlinux:minimal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
)
from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder
from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter
from opentelemetry.sdk._configuration import (
_get_exporter_names,
Expand Down Expand Up @@ -274,17 +273,10 @@ def __new__(cls, *args, **kwargs):
# pylint: disable=no-self-use
def create_exporter(self):
protocol = os.environ.get(
OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "grpc")
OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, os.environ.get(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf")
)
_logger.debug("AWS Application Signals export protocol: %s", protocol)

application_signals_endpoint = os.environ.get(
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG,
os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4315"),
)

_logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint)

temporality_dict: Dict[type, AggregationTemporality] = {}
for typ in [
Counter,
Expand All @@ -298,10 +290,27 @@ def create_exporter(self):
temporality_dict[typ] = AggregationTemporality.DELTA

if protocol == "http/protobuf":
application_signals_endpoint = os.environ.get(
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG,
os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316/v1/metrics"),
)
_logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint)
return OTLPHttpOTLPMetricExporter(
endpoint=application_signals_endpoint, preferred_temporality=temporality_dict
)
if protocol == "grpc":
# pylint: disable=import-outside-toplevel
# Delay import to only occur if gRPC specifically requested. Vended Docker image will not have gRPC bundled,
# so importing it at the class level can cause runtime failures.
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
OTLPMetricExporter as OTLPGrpcOTLPMetricExporter,
)

application_signals_endpoint = os.environ.get(
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG,
os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "localhost:4315"),
)
_logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint)
return OTLPGrpcOTLPMetricExporter(
endpoint=application_signals_endpoint, preferred_temporality=temporality_dict
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,29 @@
from amazon.opentelemetry.distro.patches._instrumentation_patch import apply_instrumentation_patches
from opentelemetry.distro import OpenTelemetryDistro
from opentelemetry.environment_variables import OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR
from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION
from opentelemetry.sdk.environment_variables import (
OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION,
OTEL_EXPORTER_OTLP_PROTOCOL,
)


class AwsOpenTelemetryDistro(OpenTelemetryDistro):
def _configure(self, **kwargs):
"""
"""Sets up default environment variables and apply patches
Set default OTEL_EXPORTER_OTLP_PROTOCOL to be HTTP. This must be run before super(), which attempts to set the
default to gRPC. If we run afterwards, we don't know if the default was set by base OpenTelemetryDistro or if it
was set by the user. We are setting to HTTP as gRPC does not work out of the box for the vended docker image,
due to gRPC having a strict dependency on the Python version the artifact was built for (OTEL observed this:
https://github.com/open-telemetry/opentelemetry-operator/blob/461ba68e80e8ac6bf2603eb353547cd026119ed2/autoinstrumentation/python/requirements.txt#L2-L3)
Also sets default OTEL_PROPAGATORS, OTEL_PYTHON_ID_GENERATOR, and
OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION to ensure good compatibility with X-Ray and Application
Signals.
Also applies patches to upstream instrumentation - usually these are stopgap measures until we can contribute
long-term changes to upstream.
kwargs:
apply_patches: bool - apply patches to upstream instrumentation. Default is True.
Expand All @@ -19,13 +36,15 @@ def _configure(self, **kwargs):
OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION environment variable. Need to work with upstream to
make it to be configurable.
"""
os.environ.setdefault(OTEL_EXPORTER_OTLP_PROTOCOL, "http/protobuf")

super(AwsOpenTelemetryDistro, self)._configure()

os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi")
os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray")
os.environ.setdefault(
OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram"
)
os.environ.setdefault(OTEL_PROPAGATORS, "xray,tracecontext,b3,b3multi")
os.environ.setdefault(OTEL_PYTHON_ID_GENERATOR, "xray")

# Apply patches to upstream instrumentation - usually stopgap measures until we can contribute long-term changes
if kwargs.get("apply_patches", True):
apply_instrumentation_patches()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

__version__ = "0.1.0.dev0"
__version__ = "0.1.1.dev0"
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from amazon.opentelemetry.distro.attribute_propagating_span_processor import AttributePropagatingSpanProcessor
from amazon.opentelemetry.distro.aws_metric_attributes_span_exporter import AwsMetricAttributesSpanExporter
from amazon.opentelemetry.distro.aws_opentelemetry_configurator import (
ApplicationSignalsExporterProvider,
AwsOpenTelemetryConfigurator,
_custom_import_sampler,
_customize_exporter,
Expand All @@ -21,6 +22,9 @@
from amazon.opentelemetry.distro.sampler._aws_xray_sampling_client import _AwsXRaySamplingClient
from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler
from opentelemetry.environment_variables import OTEL_LOGS_EXPORTER, OTEL_METRICS_EXPORTER, OTEL_TRACES_EXPORTER
from opentelemetry.exporter.otlp.proto.common._internal.metrics_encoder import OTLPMetricExporterMixin
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as OTLPGrpcOTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter
from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import Span, SpanProcessor, Tracer, TracerProvider
Expand All @@ -29,18 +33,28 @@
from opentelemetry.trace import get_tracer_provider


# This class setup Tracer Provider Globally, which can only set once
# if there is another setup for tracer provider, may cause issue
class TestAwsOpenTelemetryConfigurator(TestCase):
"""Tests AwsOpenTelemetryConfigurator and AwsOpenTelemetryDistro
NOTE: This class setup Tracer Provider Globally, which can only be set once. If there is another setup for tracer
provider, it may cause issues for those tests.
"""

@classmethod
def setUpClass(cls):
os.environ.setdefault(OTEL_TRACES_EXPORTER, "none")
os.environ.setdefault(OTEL_METRICS_EXPORTER, "none")
os.environ.setdefault(OTEL_LOGS_EXPORTER, "none")
os.environ.setdefault(OTEL_TRACES_SAMPLER, "traceidratio")
os.environ.setdefault(OTEL_TRACES_SAMPLER_ARG, "0.01")
# Run AwsOpenTelemetryDistro to set up environment, then validate expected env values.
aws_open_telemetry_distro: AwsOpenTelemetryDistro = AwsOpenTelemetryDistro()
aws_open_telemetry_distro.configure(apply_patches=False)
validate_distro_environ()

# Overwrite exporter configs to keep tests clean, set sampler configs for tests
os.environ[OTEL_TRACES_EXPORTER] = "none"
os.environ[OTEL_METRICS_EXPORTER] = "none"
os.environ[OTEL_LOGS_EXPORTER] = "none"
os.environ[OTEL_TRACES_SAMPLER] = "traceidratio"
os.environ[OTEL_TRACES_SAMPLER_ARG] = "0.01"

# Run configurator and get trace provider
aws_otel_configurator: AwsOpenTelemetryConfigurator = AwsOpenTelemetryConfigurator()
aws_otel_configurator.configure()
cls.tracer_provider: TracerProvider = get_tracer_provider()
Expand Down Expand Up @@ -249,3 +263,40 @@ def test_customize_span_processors(self):
second_processor: SpanProcessor = mock_tracer_provider.add_span_processor.call_args_list[1].args[0]
self.assertIsInstance(second_processor, AwsSpanMetricsProcessor)
os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None)

def test_application_signals_exporter_provider(self):
# Check default protocol - HTTP, as specified by AwsOpenTelemetryDistro.
exporter: OTLPMetricExporterMixin = ApplicationSignalsExporterProvider().create_exporter()
self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter)
self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint)

# Overwrite protocol to gRPC.
os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "grpc"
exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter()
self.assertIsInstance(exporter, OTLPGrpcOTLPMetricExporter)
self.assertEqual("localhost:4315", exporter._endpoint)

# Overwrite protocol back to HTTP.
os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf"
exporter: SpanExporter = ApplicationSignalsExporterProvider().create_exporter()
self.assertIsInstance(exporter, OTLPHttpOTLPMetricExporter)
self.assertEqual("http://localhost:4316/v1/metrics", exporter._endpoint)


def validate_distro_environ():
tc: TestCase = TestCase()
# Set by OpenTelemetryDistro
tc.assertEqual("otlp", os.environ.get("OTEL_TRACES_EXPORTER"))
tc.assertEqual("otlp", os.environ.get("OTEL_METRICS_EXPORTER"))

# Set by AwsOpenTelemetryDistro
tc.assertEqual("http/protobuf", os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL"))
tc.assertEqual(
"base2_exponential_bucket_histogram", os.environ.get("OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION")
)
tc.assertEqual("xray,tracecontext,b3,b3multi", os.environ.get("OTEL_PROPAGATORS"))
tc.assertEqual("xray", os.environ.get("OTEL_PYTHON_ID_GENERATOR"))

# Not set
tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER"))
tc.assertEqual(None, os.environ.get("OTEL_TRACES_SAMPLER_ARG"))
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def setUp(self) -> None:
.with_env("OTEL_METRIC_EXPORT_INTERVAL", "50")
.with_env("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", "true")
.with_env("OTEL_METRICS_EXPORTER", "none")
.with_env("OTEL_EXPORTER_OTLP_PROTOCOL", "grpc")
.with_env("OTEL_BSP_SCHEDULE_DELAY", "1")
.with_env("OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}")
.with_env("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", f"http://collector:{_MOCK_COLLECTOR_PORT}")
Expand Down
87 changes: 87 additions & 0 deletions contract-tests/tests/test/amazon/misc/configuration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import time
from typing import List

from mock_collector_client import ResourceScopeMetric, ResourceScopeSpan
from requests import Response, request
from typing_extensions import override

from amazon.base.contract_test_base import ContractTestBase
from amazon.utils.application_signals_constants import ERROR_METRIC, FAULT_METRIC, LATENCY_METRIC
from opentelemetry.sdk.metrics.export import AggregationTemporality

# Tests in this class are supposed to validate that the SDK was configured in the correct way: It
# uses the X-Ray ID format. Metrics are deltaPreferred. Type of the metrics are exponentialHistogram


class ConfigurationTest(ContractTestBase):
@override
def get_application_image_name(self) -> str:
return "aws-application-signals-tests-django-app"

@override
def get_application_wait_pattern(self) -> str:
return "Quit the server with CONTROL-C."

@override
def get_application_extra_environment_variables(self):
return {"DJANGO_SETTINGS_MODULE": "django_server.settings"}

def test_configuration_metrics(self):
address: str = self.application.get_container_host_ip()
port: str = self.application.get_exposed_port(self.get_application_port())
url: str = f"http://{address}:{port}/success"
response: Response = request("GET", url, timeout=20)
self.assertEqual(200, response.status_code)
metrics: List[ResourceScopeMetric] = self.mock_collector_client.get_metrics(
{LATENCY_METRIC, ERROR_METRIC, FAULT_METRIC}
)

self.assertEqual(len(metrics), 3)
for metric in metrics:
self.assertIsNotNone(metric.metric.exponential_histogram)
self.assertEqual(metric.metric.exponential_histogram.aggregation_temporality, AggregationTemporality.DELTA)

def test_xray_id_format(self):
"""
We are testing here that the X-Ray id format is always used by inspecting the traceid that
was in the span received by the collector, which should be consistent across multiple spans.
We are testing the following properties:
1. Traceid is random
2. First 32 bits of traceid is a timestamp
It is important to remember that the X-Ray traceId format had to be adapted to fit into the
definition of the OpenTelemetry traceid:
https://opentelemetry.io/docs/specs/otel/trace/api/#retrieving-the-traceid-and-spanid
Specifically for an X-Ray traceid to be a valid Otel traceId, the version digit had to be
dropped. Reference:
https://github.com/open-telemetry/opentelemetry-python-contrib/blob/main/sdk-extension/opentelemetry-sdk-extension-aws/src/opentelemetry/sdk/extension/aws/trace/aws_xray_id_generator.py
"""

seen: List[str] = []
for _ in range(100):
address: str = self.application.get_container_host_ip()
port: str = self.application.get_exposed_port(self.get_application_port())
url: str = f"http://{address}:{port}/success"
response: Response = request("GET", url, timeout=20)
self.assertEqual(200, response.status_code)

# Since we just made the request, the time in epoch registered in the traceid should be
# approximate equal to the current time in the test, since both run on the same host.
start_time_sec: int = int(time.time())

resource_scope_spans: List[ResourceScopeSpan] = self.mock_collector_client.get_traces()
target_span: ResourceScopeSpan = resource_scope_spans[0]
self.assertEqual(target_span.span.name, "GET success")

self.assertTrue(target_span.span.trace_id.hex() not in seen)
seen.append(target_span.span.trace_id.hex())

# trace_id is bytes, so we convert it to hex string and pick the first 8 byte
# that represent the timestamp, then convert it to int for timestamp in second
trace_id_time_stamp_int: int = int(target_span.span.trace_id.hex()[:8], 16)

# Give 2 minutes time range of tolerance for the trace timestamp
self.assertGreater(trace_id_time_stamp_int, start_time_sec - 60)
self.assertGreater(start_time_sec + 60, trace_id_time_stamp_int)
self.mock_collector_client.clear_signals()
Loading

0 comments on commit 46e802b

Please sign in to comment.