diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py new file mode 100644 index 000000000..1d4060bd7 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py @@ -0,0 +1,158 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Utility module designed to support shared logic across AWS Span Processors.""" +from amazon.opentelemetry.distro._aws_attribute_keys import AwsAttributeKeys +from opentelemetry.sdk.trace import InstrumentationScope, ReadableSpan, SpanContext +from opentelemetry.semconv.trace import MessagingOperationValues, SpanAttributes +from opentelemetry.trace import SpanKind + +# Default attribute values if no valid span attribute value is identified +UNKNOWN_SERVICE: str = "UnknownService" +UNKNOWN_OPERATION: str = "UnknownOperation" +UNKNOWN_REMOTE_SERVICE: str = "UnknownRemoteService" +UNKNOWN_REMOTE_OPERATION: str = "UnknownRemoteOperation" +INTERNAL_OPERATION: str = "InternalOperation" +LOCAL_ROOT: str = "LOCAL_ROOT" + +# Useful constants +_SQS_RECEIVE_MESSAGE_SPAN_NAME: str = "Sqs.ReceiveMessage" +_AWS_SDK_INSTRUMENTATION_SCOPE_PREFIX: str = "io.opentelemetry.aws-sdk-" + + +def get_ingress_operation(span: ReadableSpan) -> str: + """ + Ingress operation (i.e. operation for Server and Consumer spans) will be generated from "http.method + http.target/ + with the first API path parameter" if the default span name is None, UnknownOperation or http.method value. + """ + operation: str = span.name + if should_use_internal_operation(span): + operation = INTERNAL_OPERATION + elif not _is_valid_operation(span, operation): + operation = _generate_ingress_operation(span) + return operation + + +def get_egress_operation(span: ReadableSpan) -> str: + if should_use_internal_operation(span): + return INTERNAL_OPERATION + return span.attributes.get(AwsAttributeKeys.AWS_LOCAL_OPERATION) + + +def extract_api_path_value(http_target: str) -> str: + """Extract the first part from API http target if it exists + + Args + http_target - http request target string value. Eg, /payment/1234 + Returns + the first part from the http target. Eg, /payment + :return: + """ + if http_target is None or len(http_target) == 0: + return "/" + paths: [str] = http_target.split("/") + if len(paths) > 1: + return "/" + paths[1] + return "/" + + +def is_key_present(span: ReadableSpan, key: str) -> bool: + return span.attributes.get(key) is not None + + +def is_aws_sdk_span(span: ReadableSpan) -> bool: + # https://opentelemetry.io/docs/specs/otel/trace/semantic_conventions/instrumentation/aws-sdk/#common-attributes + return "aws-api" == span.attributes.get(SpanAttributes.RPC_SYSTEM) + + +def should_generate_service_metric_attributes(span: ReadableSpan) -> bool: + return (is_local_root(span) and not _is_sqs_receive_message_consumer_span(span)) or SpanKind.SERVER == span.kind + + +def should_generate_dependency_metric_attributes(span: ReadableSpan) -> bool: + return ( + SpanKind.CLIENT == span.kind + or SpanKind.PRODUCER == span.kind + or (_is_dependency_consumer_span(span) and not _is_sqs_receive_message_consumer_span(span)) + ) + + +def is_consumer_process_span(span: ReadableSpan) -> bool: + messaging_operation: str = span.attributes.get(SpanAttributes.MESSAGING_OPERATION) + return SpanKind.CONSUMER == span.kind and MessagingOperationValues.PROCESS == messaging_operation + + +def should_use_internal_operation(span: ReadableSpan) -> bool: + """ + Any spans that are Local Roots and also not SERVER should have aws.local.operation renamed toInternalOperation. + """ + return is_local_root(span) and not SpanKind.SERVER == span.kind + + +def is_local_root(span: ReadableSpan) -> bool: + """ + A span is a local root if it has no parent or if the parent is remote. This function checks the parent context + and returns true if it is a local root. + """ + return span.parent is None or not span.parent.is_valid or span.parent.is_remote + + +def _is_sqs_receive_message_consumer_span(span: ReadableSpan) -> bool: + """To identify the SQS consumer spans produced by AWS SDK instrumentation""" + messaging_operation: str = span.attributes.get(SpanAttributes.MESSAGING_OPERATION) + instrumentation_scope: InstrumentationScope = span.instrumentation_scope + + return ( + _SQS_RECEIVE_MESSAGE_SPAN_NAME.casefold() == span.name.casefold() + and SpanKind.CONSUMER == span.kind + and instrumentation_scope is not None + and instrumentation_scope.name.startswith(_AWS_SDK_INSTRUMENTATION_SCOPE_PREFIX) + and (messaging_operation is None or messaging_operation == MessagingOperationValues.PROCESS) + ) + + +def _is_dependency_consumer_span(span: ReadableSpan) -> bool: + if SpanKind.CONSUMER != span.kind: + return False + + if is_consumer_process_span(span): + if is_local_root(span): + return True + parent_span_kind: str = span.attributes.get(AwsAttributeKeys.AWS_CONSUMER_PARENT_SPAN_KIND) + return SpanKind.CONSUMER != parent_span_kind + + return True + + +def _is_valid_operation(span: ReadableSpan, operation: str) -> bool: + """ + When Span name is null, UnknownOperation or HttpMethod value, it will be treated as invalid local operation value + that needs to be further processed + """ + if operation is None or operation == UNKNOWN_OPERATION: + return False + + if is_key_present(span, SpanAttributes.HTTP_METHOD): + http_method: str = span.attributes.get(SpanAttributes.HTTP_METHOD) + return operation != http_method + + return True + + +def _generate_ingress_operation(span: ReadableSpan) -> str: + """ + When span name is not meaningful(null, unknown or http_method value) as operation name for http use cases. Will try + to extract the operation name from http target string + """ + operation: str = UNKNOWN_OPERATION + if is_key_present(span, SpanAttributes.HTTP_TARGET): + http_target: str = span.attributes.get(SpanAttributes.HTTP_TARGET) + # get the first part from API path string as operation value + # the more levels/parts we get from API path the higher chance for getting high cardinality data + if http_target is not None: + operation = extract_api_path_value(http_target) + if is_key_present(span, SpanAttributes.HTTP_METHOD): + http_method: str = span.attributes.get(SpanAttributes.HTTP_METHOD) + if http_method is not None: + operation = http_method + " " + operation + + return operation diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_span_processing_util.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_span_processing_util.py new file mode 100644 index 000000000..84fb6c7fb --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_span_processing_util.py @@ -0,0 +1,12 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from unittest import TestCase + +from amazon.opentelemetry.distro._aws_span_processing_util import is_key_present +from opentelemetry.sdk.trace import ReadableSpan + + +class TestAwsSpanProcessingUtil(TestCase): + def test_basic(self): + span: ReadableSpan = ReadableSpan(name="test") + self.assertFalse(is_key_present(span, "test"))