From 26e62a66f1f58c3c1fe096c614e5d851c772f539 Mon Sep 17 00:00:00 2001 From: Thomas Pierce Date: Thu, 9 May 2024 10:37:42 -0700 Subject: [PATCH] Metric Schema changes (#150) In this commit, we are removing RemoteTarget and replacing with RemoteResourceIdentifier and RemoteResourceType. Further, we are formatting RemoteService, and the content of the RemoteResource attributes such that they align with AWS Cloud Control resource names. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../distro/_aws_attribute_keys.py | 3 +- .../distro/_aws_metric_attribute_generator.py | 78 +++++---- .../opentelemetry/distro/sqs_url_parser.py | 147 +++-------------- .../test_aws_metric_attribute_generator.py | 153 ++++-------------- .../distro/test_sqs_url_parsers.py | 53 ++++++ .../test/amazon/botocore/botocore_test.py | 116 ++++++++----- .../utils/application_signals_constants.py | 3 +- 7 files changed, 229 insertions(+), 324 deletions(-) create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py index ef340ed6a..f6498ac76 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py @@ -6,7 +6,8 @@ AWS_LOCAL_OPERATION: str = "aws.local.operation" AWS_REMOTE_SERVICE: str = "aws.remote.service" AWS_REMOTE_OPERATION: str = "aws.remote.operation" -AWS_REMOTE_TARGET: str = "aws.remote.target" +AWS_REMOTE_RESOURCE_TYPE: str = "aws.remote.resource.type" +AWS_REMOTE_RESOURCE_IDENTIFIER: str = "aws.remote.resource.identifier" AWS_SDK_DESCENDANT: str = "aws.sdk.descendant" AWS_CONSUMER_PARENT_SPAN_KIND: str = "aws.consumer.parent.span.kind" diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py index 9313e5a4a..51fd01e68 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py @@ -11,8 +11,9 @@ AWS_QUEUE_NAME, AWS_QUEUE_URL, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, - AWS_REMOTE_TARGET, AWS_SPAN_KIND, AWS_STREAM_NAME, ) @@ -65,6 +66,12 @@ _AWS_TABLE_NAMES: str = SpanAttributes.AWS_DYNAMODB_TABLE_NAMES _AWS_BUCKET_NAME: str = SpanAttributes.AWS_S3_BUCKET +# Normalized remote service names for supported AWS services +_NORMALIZED_DYNAMO_DB_SERVICE_NAME: str = "AWS::DynamoDB" +_NORMALIZED_KINESIS_SERVICE_NAME: str = "AWS::Kinesis" +_NORMALIZED_S3_SERVICE_NAME: str = "AWS::S3" +_NORMALIZED_SQS_SERVICE_NAME: str = "AWS::SQS" + # Special DEPENDENCY attribute value if GRAPHQL_OPERATION_TYPE attribute key is present. _GRAPHQL: str = "graphql" @@ -110,7 +117,7 @@ def _generate_dependency_metric_attributes(span: ReadableSpan, resource: Resourc _set_service(resource, span, attributes) _set_egress_operation(span, attributes) _set_remote_service_and_operation(span, attributes) - _set_remote_target(span, attributes) + _set_remote_type_and_identifier(span, attributes) _set_span_kind_for_dependency(span, attributes) return attributes @@ -198,7 +205,7 @@ def _set_remote_service_and_operation(span: ReadableSpan, attributes: BoundedAtt remote_service = _get_remote_service(span, AWS_REMOTE_SERVICE) remote_operation = _get_remote_operation(span, AWS_REMOTE_OPERATION) elif is_key_present(span, _RPC_SERVICE) or is_key_present(span, _RPC_METHOD): - remote_service = _normalize_service_name(span, _get_remote_service(span, _RPC_SERVICE)) + remote_service = _normalize_remote_service_name(span, _get_remote_service(span, _RPC_SERVICE)) remote_operation = _get_remote_operation(span, _RPC_METHOD) elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION) or is_key_present(span, _DB_STATEMENT): remote_service = _get_remote_service(span, _DB_SYSTEM) @@ -268,10 +275,14 @@ def _get_db_statement_remote_operation(span: ReadableSpan, statement_key: str) - return remote_operation -def _normalize_service_name(span: ReadableSpan, service_name: str) -> str: +def _normalize_remote_service_name(span: ReadableSpan, service_name: str) -> str: + """ + If the span is an AWS SDK span, normalize the name to align with AWS Cloud Control + resource format as much as possible. Long term, we would like to normalize service name in the upstream. + """ if is_aws_sdk_span(span): - return "AWS.SDK." + service_name - + return "AWS::" + service_name return service_name @@ -320,38 +331,39 @@ def _generate_remote_operation(span: ReadableSpan) -> str: return remote_operation -def _set_remote_target(span: ReadableSpan, attributes: BoundedAttributes) -> None: - remote_target: Optional[str] = _get_remote_target(span) - if remote_target is not None: - attributes[AWS_REMOTE_TARGET] = remote_target - - -def _get_remote_target(span: ReadableSpan) -> Optional[str]: +def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttributes) -> None: """ - RemoteTarget attribute AWS_REMOTE_TARGET is used to store the resource - name of the remote invokes, such as S3 bucket name, mysql table name, etc. - TODO: currently only support AWS resource name, will be extended to support - the general remote targets, such as ActiveMQ name, etc. - """ - if is_key_present(span, _AWS_BUCKET_NAME): - return "::s3:::" + span.attributes.get(_AWS_BUCKET_NAME) - - if is_key_present(span, AWS_QUEUE_URL): - arn = SqsUrlParser.get_sqs_remote_target(span.attributes.get(AWS_QUEUE_URL)) - if arn: - return arn - - if is_key_present(span, AWS_QUEUE_NAME): - return "::sqs:::" + span.attributes.get(AWS_QUEUE_NAME) + Remote resource attributes {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_TYPE} and {@link + AwsAttributeKeys#AWS_REMOTE_RESOURCE_IDENTIFIER} are used to store information about the resource associated with + the remote invocation, such as S3 bucket name, etc. We should only ever set both type and identifier or neither. - if is_key_present(span, AWS_STREAM_NAME): - return "::kinesis:::stream/" + span.attributes.get(AWS_STREAM_NAME) + AWS resources type and identifier adhere to AWS Cloud Control + resource format. + """ + remote_resource_type: Optional[str] = None + remote_resource_identifier: Optional[str] = None # Only extract the table name when _AWS_TABLE_NAMES has size equals to one if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1: - return "::dynamodb:::table/" + span.attributes.get(_AWS_TABLE_NAMES)[0] - - return None + remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table" + remote_resource_identifier = span.attributes.get(_AWS_TABLE_NAMES)[0] + elif is_key_present(span, AWS_STREAM_NAME): + remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream" + remote_resource_identifier = span.attributes.get(AWS_STREAM_NAME) + elif is_key_present(span, _AWS_BUCKET_NAME): + remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket" + remote_resource_identifier = span.attributes.get(_AWS_BUCKET_NAME) + elif is_key_present(span, AWS_QUEUE_NAME): + remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue" + remote_resource_identifier = span.attributes.get(AWS_QUEUE_NAME) + elif is_key_present(span, AWS_QUEUE_URL): + remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue" + remote_resource_identifier = SqsUrlParser.get_queue_name(span.attributes.get(AWS_QUEUE_URL)) + + if remote_resource_type is not None and remote_resource_identifier is not None: + attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type + attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier def _set_span_kind_for_dependency(span: ReadableSpan, attributes: BoundedAttributes) -> None: diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py index 28c8d4a73..4cc5e2935 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py @@ -2,106 +2,30 @@ # SPDX-License-Identifier: Apache-2.0 from typing import List, Optional -_ARN_DELIMETER: str = ":" _HTTP_SCHEMA: str = "http://" _HTTPS_SCHEMA: str = "https://" class SqsUrlParser: @staticmethod - def get_sqs_remote_target(sqs_url: str) -> Optional[str]: - sqs_url: str = _strip_schema_from_url(sqs_url) - - if not _is_sqs_url(sqs_url) and not _is_legacy_sqs_url(sqs_url) and not _is_custom_url(sqs_url): + def get_queue_name(url: str) -> Optional[str]: + """ + Best-effort logic to extract queue name from an HTTP url. This method should only be used with a string that is, + with reasonably high confidence, an SQS queue URL. Handles new/legacy/some custom URLs. Essentially, we require + that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be a + 12-digit account id, and the third part should be a valid queue name, per SQS naming conventions. + """ + if url is None: return None - - region: str = _get_region(sqs_url) - account_id: str = _get_account_id(sqs_url) - partition: str = _get_partition(sqs_url) - queue_name: str = _get_queue_name(sqs_url) - - remote_target: List[Optional[str]] = [] - - if all((region, account_id, partition, queue_name)): - remote_target.append("arn") - - remote_target.extend( - [ - _ARN_DELIMETER, - _null_to_empty(partition), - _ARN_DELIMETER, - "sqs", - _ARN_DELIMETER, - _null_to_empty(region), - _ARN_DELIMETER, - _null_to_empty(account_id), - _ARN_DELIMETER, - queue_name, - ] - ) - - return "".join(remote_target) - - -def _strip_schema_from_url(url: str) -> str: - return url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "") - - -def _get_region(sqs_url: str) -> Optional[str]: - if sqs_url is None: + url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "") + split_url: List[Optional[str]] = url.split("/") + if len(split_url) == 3 and _is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]): + return split_url[2] return None - if sqs_url.startswith("queue.amazonaws.com/"): - return "us-east-1" - - if _is_sqs_url(sqs_url): - return _get_region_from_sqs_url(sqs_url) - - if _is_legacy_sqs_url(sqs_url): - return _get_region_from_legacy_sqs_url(sqs_url) - - return None - - -def _is_sqs_url(sqs_url: str) -> bool: - split: List[Optional[str]] = sqs_url.split("/") - return ( - len(split) == 3 - and split[0].startswith("sqs.") - and split[0].endswith(".amazonaws.com") - and _is_account_id(split[1]) - and _is_valid_queue_name(split[2]) - ) - - -def _is_legacy_sqs_url(sqs_url: str) -> bool: - split: List[Optional[str]] = sqs_url.split("/") - return ( - len(split) == 3 - and split[0].endswith(".queue.amazonaws.com") - and _is_account_id(split[1]) - and _is_valid_queue_name(split[2]) - ) - - -def _is_custom_url(sqs_url: str) -> bool: - split: List[Optional[str]] = sqs_url.split("/") - return len(split) == 3 and _is_account_id(split[1]) and _is_valid_queue_name(split[2]) - - -def _is_valid_queue_name(input_str: str) -> bool: - if len(input_str) == 0 or len(input_str) > 80: - return False - - for char in input_str: - if char != "_" and char != "-" and not char.isalpha() and not char.isdigit(): - return False - - return True - def _is_account_id(input_str: str) -> bool: - if len(input_str) != 12: + if input_str is None or len(input_str) != 12: return False try: @@ -112,43 +36,12 @@ def _is_account_id(input_str: str) -> bool: return True -def _get_region_from_sqs_url(sqs_url: str) -> Optional[str]: - split: List[Optional[str]] = sqs_url.split(".") - return split[1] if len(split) >= 2 else None - - -def _get_region_from_legacy_sqs_url(sqs_url: str) -> Optional[str]: - split: List[Optional[str]] = sqs_url.split(".") - return split[0] - - -def _get_account_id(sqs_url: str) -> Optional[str]: - if sqs_url is None: - return None - - split: List[Optional[str]] = sqs_url.split("/") - return split[1] if len(split) >= 2 else None - - -def _get_partition(sqs_url: str) -> Optional[str]: - region: Optional[str] = _get_region(sqs_url) - - if region is None: - return None - - if region.startswith("us-gov-"): - return "aws-us-gov" - - if region.startswith("cn-"): - return "aws-cn" - - return "aws" - - -def _get_queue_name(sqs_url: str) -> Optional[str]: - split: List[Optional[str]] = sqs_url.split("/") - return split[2] if len(split) >= 3 else None +def _is_valid_queue_name(input_str: str) -> bool: + if input_str is None or len(input_str) == 0 or len(input_str) > 80: + return False + for char in input_str: + if char != "_" and char != "-" and not char.isalpha() and not char.isdigit(): + return False -def _null_to_empty(input_str: str) -> str: - return input_str if input_str is not None else "" + return True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py index d98dc8a4e..2909c0ff9 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py @@ -14,8 +14,9 @@ AWS_QUEUE_NAME, AWS_QUEUE_URL, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, - AWS_REMOTE_TARGET, AWS_SPAN_KIND, AWS_STREAM_NAME, ) @@ -805,7 +806,7 @@ def test_non_local_root_boto3_span(self): # boto3sqs spans shouldn't generate aws dependency attributes self.assertIsNone(dependency_attributes) - def test_normalize_service_name_non_aws_sdk_span(self): + def test_normalize_remote_service_name_no_normalization(self): service_name: str = "non aws service" self._mock_attribute([SpanAttributes.RPC_SERVICE], [service_name]) self.span_mock.kind = SpanKind.CLIENT @@ -815,14 +816,20 @@ def test_normalize_service_name_non_aws_sdk_span(self): ).get(DEPENDENCY_METRIC) self.assertEqual(actual_attributes.get(AWS_REMOTE_SERVICE), service_name) - def test_normalize_service_name_aws_sdk_span(self): - self._mock_attribute([SpanAttributes.RPC_SYSTEM, SpanAttributes.RPC_SERVICE], ["aws-api", "EC2"]) + def test_normalize_remote_service_name_aws_sdk(self): + self.validate_aws_sdk_service_normalization("DynamoDB", "AWS::DynamoDB") + self.validate_aws_sdk_service_normalization("Kinesis", "AWS::Kinesis") + self.validate_aws_sdk_service_normalization("S3", "AWS::S3") + self.validate_aws_sdk_service_normalization("SQS", "AWS::SQS") + + def validate_aws_sdk_service_normalization(self, service_name: str, expected_remote_service: str): + self._mock_attribute([SpanAttributes.RPC_SYSTEM, SpanAttributes.RPC_SERVICE], ["aws-api", service_name]) self.span_mock.kind = SpanKind.CLIENT actual_attributes: Attributes = _GENERATOR.generate_metric_attributes_dict_from_span( self.span_mock, self.resource ).get(DEPENDENCY_METRIC) - self.assertEqual(actual_attributes.get(AWS_REMOTE_SERVICE), "AWS.SDK.EC2") + self.assertEqual(actual_attributes.get(AWS_REMOTE_SERVICE), expected_remote_service) def _update_resource_with_service_name(self) -> None: self.resource: Resource = Resource(attributes={SERVICE_NAME: _SERVICE_NAME_VALUE}) @@ -904,173 +911,81 @@ def _validate_peer_service_does_override(self, remote_service_key: str) -> None: self._mock_attribute([remote_service_key, SpanAttributes.PEER_SERVICE], [None, None]) - def test_client_span_with_remote_target_attributes(self): - keys: List[str] = [ - SpanAttributes.AWS_S3_BUCKET, - AWS_QUEUE_NAME, - AWS_QUEUE_URL, - AWS_STREAM_NAME, - SpanAttributes.AWS_DYNAMODB_TABLE_NAMES, - ] - values: List[str] = [ - "TestString", - "TestString", - "TestString", - "TestString", - "TestString", - ] - self._mock_attribute(keys, values) - + def test_client_span_with_remote_resource_attributes(self): # Validate behaviour of aws bucket name attribute, then remove it. self._mock_attribute([SpanAttributes.AWS_S3_BUCKET], ["aws_s3_bucket_name"]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "::s3:::aws_s3_bucket_name") + self._validate_remote_resource_attributes("AWS::S3::Bucket", "aws_s3_bucket_name") self._mock_attribute([SpanAttributes.AWS_S3_BUCKET], [None]) # Validate behaviour of AWS_QUEUE_NAME attribute, then remove it self._mock_attribute([AWS_QUEUE_NAME], ["aws_queue_name"]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "::sqs:::aws_queue_name") + self._validate_remote_resource_attributes("AWS::SQS::Queue", "aws_queue_name") self._mock_attribute([AWS_QUEUE_NAME], [None]) - # Validate behaviour of having both AWS_QUEUE_NAME and AWS_QUEUE_URL attribute, then remove them + # Validate behaviour of having both AWS_QUEUE_NAME and AWS_QUEUE_URL attribute, then remove them. Queue name is + # more reliable than queue URL, so we prefer to use name over URL. self._mock_attribute( [AWS_QUEUE_URL, AWS_QUEUE_NAME], ["https://sqs.us-east-2.amazonaws.com/123456789012/Queue", "aws_queue_name"], ) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "arn:aws:sqs:us-east-2:123456789012:Queue") + self._validate_remote_resource_attributes("AWS::SQS::Queue", "aws_queue_name") self._mock_attribute([AWS_QUEUE_URL, AWS_QUEUE_NAME], [None, None]) # Valid queue name with invalid queue URL, we should default to using the queue name. self._mock_attribute([AWS_QUEUE_URL, AWS_QUEUE_NAME], ["invalidUrl", "aws_queue_name"]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "::sqs:::aws_queue_name") + self._validate_remote_resource_attributes("AWS::SQS::Queue", "aws_queue_name") self._mock_attribute([AWS_QUEUE_URL, AWS_QUEUE_NAME], [None, None]) # Validate behaviour of AWS_STREAM_NAME attribute, then remove it. self._mock_attribute([AWS_STREAM_NAME], ["aws_stream_name"]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "::kinesis:::stream/aws_stream_name") + self._validate_remote_resource_attributes("AWS::Kinesis::Stream", "aws_stream_name") self._mock_attribute([AWS_STREAM_NAME], [None]) # Validate behaviour of SpanAttributes.AWS_DYNAMODB_TABLE_NAMES attribute with one table name, then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [["aws_table_name"]]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, "::dynamodb:::table/aws_table_name") + self._validate_remote_resource_attributes("AWS::DynamoDB::Table", "aws_table_name") self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) # Validate behaviour of SpanAttributes.AWS_DYNAMODB_TABLE_NAMES attribute with no table name, then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [[]]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, None) + self._validate_remote_resource_attributes(None, None) self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) # Validate behaviour of SpanAttributes.AWS_DYNAMODB_TABLE_NAMES attribute with two table names, then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [["aws_table_name1", "aws_table_name1"]]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, None) + self._validate_remote_resource_attributes(None, None) self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) - def test_sqs_client_span_basic_urls(self): - self._test_sqs_url( - "https://sqs.us-east-1.amazonaws.com/123412341234/Q_Name-5", "arn:aws:sqs:us-east-1:123412341234:Q_Name-5" - ) - self._test_sqs_url( - "https://sqs.af-south-1.amazonaws.com/999999999999/-_ThisIsValid", - "arn:aws:sqs:af-south-1:999999999999:-_ThisIsValid", - ) - self._test_sqs_url( - "http://sqs.eu-west-3.amazonaws.com/000000000000/FirstQueue", - "arn:aws:sqs:eu-west-3:000000000000:FirstQueue", - ) - self._test_sqs_url( - "sqs.sa-east-1.amazonaws.com/123456781234/SecondQueue", "arn:aws:sqs:sa-east-1:123456781234:SecondQueue" - ) - - def test_sqs_client_span_us_gov_urls(self): - self._test_sqs_url( - "https://sqs.us-gov-east-1.amazonaws.com/123456789012/MyQueue", - "arn:aws-us-gov:sqs:us-gov-east-1:123456789012:MyQueue", - ) - self._test_sqs_url( - "sqs.us-gov-west-1.amazonaws.com/112233445566/Queue", "arn:aws-us-gov:sqs:us-gov-west-1:112233445566:Queue" - ) - - def test_sqs_client_span_legacy_format_urls(self): - self._test_sqs_url( - "https://ap-northeast-2.queue.amazonaws.com/123456789012/MyQueue", - "arn:aws:sqs:ap-northeast-2:123456789012:MyQueue", - ) - self._test_sqs_url( - "http://cn-northwest-1.queue.amazonaws.com/123456789012/MyQueue", - "arn:aws-cn:sqs:cn-northwest-1:123456789012:MyQueue", - ) - self._test_sqs_url( - "http://cn-north-1.queue.amazonaws.com/123456789012/MyQueue", - "arn:aws-cn:sqs:cn-north-1:123456789012:MyQueue", - ) - self._test_sqs_url( - "ap-south-1.queue.amazonaws.com/123412341234/MyLongerQueueNameHere", - "arn:aws:sqs:ap-south-1:123412341234:MyLongerQueueNameHere", - ) - self._test_sqs_url( - "https://us-gov-east-1.queue.amazonaws.com/123456789012/MyQueue", - "arn:aws-us-gov:sqs:us-gov-east-1:123456789012:MyQueue", - ) - - def test_sqs_client_span_north_virginia_legacy_url(self): - self._test_sqs_url( - "https://queue.amazonaws.com/123456789012/MyQueue", "arn:aws:sqs:us-east-1:123456789012:MyQueue" - ) - - def test_sqs_client_span_custom_urls(self): - self._test_sqs_url("http://127.0.0.1:1212/123456789012/MyQueue", "::sqs::123456789012:MyQueue") - self._test_sqs_url("https://127.0.0.1:1212/123412341234/RRR", "::sqs::123412341234:RRR") - self._test_sqs_url("127.0.0.1:1212/123412341234/QQ", "::sqs::123412341234:QQ") - self._test_sqs_url("https://amazon.com/123412341234/BB", "::sqs::123412341234:BB") - - def test_sqs_client_span_long_urls(self): - queue_name = "a" * 80 - self._test_sqs_url("http://127.0.0.1:1212/123456789012/" + queue_name, "::sqs::123456789012:" + queue_name) - - queue_name_too_long = "a" * 81 - self._test_sqs_url("http://127.0.0.1:1212/123456789012/" + queue_name_too_long, None) - - def test_client_span_sqs_invalid_or_empty_urls(self): - self._test_sqs_url(None, None) - self._test_sqs_url("", None) - self._test_sqs_url("invalidUrl", None) - self._test_sqs_url("https://www.amazon.com", None) - self._test_sqs_url("https://sqs.us-east-1.amazonaws.com/123412341234/.", None) - self._test_sqs_url("https://sqs.us-east-1.amazonaws.com/12/Queue", None) - self._test_sqs_url("https://sqs.us-east-1.amazonaws.com/A/A", None) - self._test_sqs_url("https://sqs.us-east-1.amazonaws.com/123412341234/A/ThisShouldNotBeHere", None) - - def _test_sqs_url(self, sqs_url, expected_remote_target): - self._mock_attribute([AWS_QUEUE_URL], [sqs_url]) - self._validate_remote_target_attributes(AWS_REMOTE_TARGET, expected_remote_target) - self._mock_attribute([AWS_QUEUE_URL], [None]) - - def _validate_remote_target_attributes(self, remote_target_key, remote_target) -> None: - # Client, Producer, and Consumer spans should generate the expected RemoteTarget attribute + def _validate_remote_resource_attributes(self, expected_type: str, expected_identifier: str) -> None: + # Client, Producer, and Consumer spans should generate the expected remote resource attribute self.span_mock.kind = SpanKind.CLIENT actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( DEPENDENCY_METRIC ) - self.assertEqual(actual_attributes.get(remote_target_key), remote_target) + self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) + self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) self.span_mock.kind = SpanKind.PRODUCER actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( DEPENDENCY_METRIC ) - self.assertEqual(actual_attributes.get(remote_target_key), remote_target) + self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) + self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) self.span_mock.kind = SpanKind.CONSUMER actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( DEPENDENCY_METRIC ) - self.assertEqual(actual_attributes.get(remote_target_key), remote_target) + self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) + self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) - # Server span should not generate RemoteTarget attribute + # Server span should not generate remote resource attribute self.span_mock.kind = SpanKind.SERVER actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( SERVICE_METRIC ) - - self.assertNotIn(remote_target_key, actual_attributes) + self.assertNotIn(AWS_REMOTE_RESOURCE_TYPE, actual_attributes) + self.assertNotIn(AWS_REMOTE_RESOURCE_IDENTIFIER, actual_attributes) def _validate_attributes_produced_for_non_local_root_span_of_kind( self, expected_attributes: Attributes, kind: SpanKind diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py new file mode 100644 index 000000000..564024a58 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py @@ -0,0 +1,53 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from unittest import TestCase + +from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser + + +class TestSqsUrlParser(TestCase): + def test_sqs_client_span_basic_urls(self): + self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/Q_Name-5", "Q_Name-5") + self.validate("https://sqs.af-south-1.amazonaws.com/999999999999/-_ThisIsValid", "-_ThisIsValid") + self.validate("http://sqs.eu-west-3.amazonaws.com/000000000000/FirstQueue", "FirstQueue") + self.validate("sqs.sa-east-1.amazonaws.com/123456781234/SecondQueue", "SecondQueue") + + def test_sqs_client_span_legacy_format_urls(self): + self.validate("https://ap-northeast-2.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate("http://cn-northwest-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate("http://cn-north-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate("ap-south-1.queue.amazonaws.com/123412341234/MyLongerQueueNameHere", "MyLongerQueueNameHere") + self.validate("https://queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + + def test_sqs_client_span_custom_urls(self): + self.validate("http://127.0.0.1:1212/123456789012/MyQueue", "MyQueue") + self.validate("https://127.0.0.1:1212/123412341234/RRR", "RRR") + self.validate("127.0.0.1:1212/123412341234/QQ", "QQ") + self.validate("https://amazon.com/123412341234/BB", "BB") + + def test_sqs_client_span_long_urls(self): + queue_name = "a" * 80 + self.validate("http://127.0.0.1:1212/123456789012/" + queue_name, queue_name) + + queue_name_too_long = "a" * 81 + self.validate("http://127.0.0.1:1212/123456789012/" + queue_name_too_long, None) + + def test_client_span_sqs_invalid_or_empty_urls(self): + self.validate(None, None) + self.validate("", None) + self.validate(" ", None) + self.validate("/", None) + self.validate("//", None) + self.validate("///", None) + self.validate("//asdf", None) + self.validate("/123412341234/as?df", None) + self.validate("invalidUrl", None) + self.validate("https://www.amazon.com", None) + self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/.", None) + self.validate("https://sqs.us-east-1.amazonaws.com/12/Queue", None) + self.validate("https://sqs.us-east-1.amazonaws.com/A/A", None) + self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/A/ThisShouldNotBeHere", None) + + def validate(self, url, expected_name): + self.assertEqual(SqsUrlParser.get_queue_name(url), expected_name) diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py index 19b6fa4e4..f309f343b 100644 --- a/contract-tests/tests/test/amazon/botocore/botocore_test.py +++ b/contract-tests/tests/test/amazon/botocore/botocore_test.py @@ -13,8 +13,9 @@ AWS_LOCAL_OPERATION, AWS_LOCAL_SERVICE, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, - AWS_REMOTE_TARGET, AWS_SPAN_KIND, ) from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValue @@ -88,9 +89,10 @@ def test_s3_create_bucket(self): 200, 0, 0, - remote_service="AWS.SDK.S3", + remote_service="AWS::S3", remote_operation="CreateBucket", - remote_target="::s3:::test-bucket-name", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="test-bucket-name", request_specific_attributes={ SpanAttributes.AWS_S3_BUCKET: "test-bucket-name", }, @@ -104,9 +106,10 @@ def test_s3_create_object(self): 200, 0, 0, - remote_service="AWS.SDK.S3", + remote_service="AWS::S3", remote_operation="PutObject", - remote_target="::s3:::test-put-object-bucket-name", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="test-put-object-bucket-name", request_specific_attributes={ SpanAttributes.AWS_S3_BUCKET: "test-put-object-bucket-name", }, @@ -120,9 +123,10 @@ def test_s3_get_object(self): 200, 0, 0, - remote_service="AWS.SDK.S3", + remote_service="AWS::S3", remote_operation="GetObject", - remote_target="::s3:::test-get-object-bucket-name", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="test-get-object-bucket-name", request_specific_attributes={ SpanAttributes.AWS_S3_BUCKET: "test-get-object-bucket-name", }, @@ -136,9 +140,10 @@ def test_s3_error(self): 400, 1, 0, - remote_service="AWS.SDK.S3", + remote_service="AWS::S3", remote_operation="CreateBucket", - remote_target="::s3:::-", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="-", request_specific_attributes={ SpanAttributes.AWS_S3_BUCKET: "-", }, @@ -152,9 +157,10 @@ def test_s3_fault(self): 500, 0, 1, - remote_service="AWS.SDK.S3", + remote_service="AWS::S3", remote_operation="CreateBucket", - remote_target="::s3:::valid-bucket-name", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="valid-bucket-name", request_specific_attributes={ SpanAttributes.AWS_S3_BUCKET: "valid-bucket-name", }, @@ -168,9 +174,10 @@ def test_dynamodb_create_table(self): 200, 0, 0, - remote_service="AWS.SDK.DynamoDB", + remote_service="AWS::DynamoDB", remote_operation="CreateTable", - remote_target="::dynamodb:::table/test_table", + remote_resource_type="AWS::DynamoDB::Table", + remote_resource_identifier="test_table", request_specific_attributes={ SpanAttributes.AWS_DYNAMODB_TABLE_NAMES: ["test_table"], }, @@ -184,9 +191,10 @@ def test_dynamodb_put_item(self): 200, 0, 0, - remote_service="AWS.SDK.DynamoDB", + remote_service="AWS::DynamoDB", remote_operation="PutItem", - remote_target="::dynamodb:::table/put_test_table", + remote_resource_type="AWS::DynamoDB::Table", + remote_resource_identifier="put_test_table", request_specific_attributes={ SpanAttributes.AWS_DYNAMODB_TABLE_NAMES: ["put_test_table"], }, @@ -200,9 +208,10 @@ def test_dynamodb_error(self): 400, 1, 0, - remote_service="AWS.SDK.DynamoDB", + remote_service="AWS::DynamoDB", remote_operation="PutItem", - remote_target="::dynamodb:::table/invalid_table", + remote_resource_type="AWS::DynamoDB::Table", + remote_resource_identifier="invalid_table", request_specific_attributes={ SpanAttributes.AWS_DYNAMODB_TABLE_NAMES: ["invalid_table"], }, @@ -216,9 +225,10 @@ def test_dynamodb_fault(self): 500, 0, 1, - remote_service="AWS.SDK.DynamoDB", + remote_service="AWS::DynamoDB", remote_operation="PutItem", - remote_target="::dynamodb:::table/invalid_table", + remote_resource_type="AWS::DynamoDB::Table", + remote_resource_identifier="invalid_table", request_specific_attributes={ SpanAttributes.AWS_DYNAMODB_TABLE_NAMES: ["invalid_table"], }, @@ -232,9 +242,10 @@ def test_sqs_create_queue(self): 200, 0, 0, - remote_service="AWS.SDK.SQS", + remote_service="AWS::SQS", remote_operation="CreateQueue", - remote_target="::sqs:::test_queue", + remote_resource_type="AWS::SQS::Queue", + remote_resource_identifier="test_queue", request_specific_attributes={ _AWS_QUEUE_NAME: "test_queue", }, @@ -248,9 +259,10 @@ def test_sqs_send_message(self): 200, 0, 0, - remote_service="AWS.SDK.SQS", + remote_service="AWS::SQS", remote_operation="SendMessage", - remote_target="::sqs::000000000000:test_put_get_queue", + remote_resource_type="AWS::SQS::Queue", + remote_resource_identifier="test_put_get_queue", request_specific_attributes={ _AWS_QUEUE_URL: "http://localstack:4566/000000000000/test_put_get_queue", }, @@ -264,9 +276,10 @@ def test_sqs_receive_message(self): 200, 0, 0, - remote_service="AWS.SDK.SQS", + remote_service="AWS::SQS", remote_operation="ReceiveMessage", - remote_target="::sqs::000000000000:test_put_get_queue", + remote_resource_type="AWS::SQS::Queue", + remote_resource_identifier="test_put_get_queue", request_specific_attributes={ _AWS_QUEUE_URL: "http://localstack:4566/000000000000/test_put_get_queue", }, @@ -280,9 +293,10 @@ def test_sqs_error(self): 400, 1, 0, - remote_service="AWS.SDK.SQS", + remote_service="AWS::SQS", remote_operation="SendMessage", - remote_target="::sqs::000000000000:sqserror", + remote_resource_type="AWS::SQS::Queue", + remote_resource_identifier="sqserror", request_specific_attributes={ _AWS_QUEUE_URL: "http://error.test:8080/000000000000/sqserror", }, @@ -296,9 +310,10 @@ def test_sqs_fault(self): 500, 0, 1, - remote_service="AWS.SDK.SQS", + remote_service="AWS::SQS", remote_operation="CreateQueue", - remote_target="::sqs:::invalid_test", + remote_resource_type="AWS::SQS::Queue", + remote_resource_identifier="invalid_test", request_specific_attributes={ _AWS_QUEUE_NAME: "invalid_test", }, @@ -312,9 +327,10 @@ def test_kinesis_put_record(self): 200, 0, 0, - remote_service="AWS.SDK.Kinesis", + remote_service="AWS::Kinesis", remote_operation="PutRecord", - remote_target="::kinesis:::stream/test_stream", + remote_resource_type="AWS::Kinesis::Stream", + remote_resource_identifier="test_stream", request_specific_attributes={ _AWS_STREAM_NAME: "test_stream", }, @@ -328,9 +344,10 @@ def test_kinesis_error(self): 400, 1, 0, - remote_service="AWS.SDK.Kinesis", + remote_service="AWS::Kinesis", remote_operation="PutRecord", - remote_target="::kinesis:::stream/invalid_stream", + remote_resource_type="AWS::Kinesis::Stream", + remote_resource_identifier="invalid_stream", request_specific_attributes={ _AWS_STREAM_NAME: "invalid_stream", }, @@ -344,9 +361,10 @@ def test_kinesis_fault(self): 500, 0, 1, - remote_service="AWS.SDK.Kinesis", + remote_service="AWS::Kinesis", remote_operation="PutRecord", - remote_target="::kinesis:::stream/test_stream", + remote_resource_type="AWS::Kinesis::Stream", + remote_resource_identifier="test_stream", request_specific_attributes={ _AWS_STREAM_NAME: "test_stream", }, @@ -367,11 +385,18 @@ def _assert_aws_span_attributes(self, resource_scope_spans: List[ResourceScopeSp kwargs.get("remote_service"), kwargs.get("remote_operation"), "LOCAL_ROOT", - kwargs.get("remote_target", "None"), + kwargs.get("remote_resource_type", "None"), + kwargs.get("remote_resource_identifier", "None"), ) def _assert_aws_attributes( - self, attributes_list: List[KeyValue], service: str, operation: str, span_kind: str, remote_target: str + self, + attributes_list: List[KeyValue], + service: str, + operation: str, + span_kind: str, + remote_resource_type: str, + remote_resource_identifier: str, ) -> None: attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list) self._assert_str_attribute(attributes_dict, AWS_LOCAL_SERVICE, self.get_application_otel_service_name()) @@ -380,8 +405,10 @@ def _assert_aws_attributes( self._assert_str_attribute(attributes_dict, AWS_LOCAL_OPERATION, "InternalOperation") self._assert_str_attribute(attributes_dict, AWS_REMOTE_SERVICE, service) self._assert_str_attribute(attributes_dict, AWS_REMOTE_OPERATION, operation) - if remote_target != "None": - self._assert_str_attribute(attributes_dict, AWS_REMOTE_TARGET, remote_target) + if remote_resource_type != "None": + self._assert_str_attribute(attributes_dict, AWS_REMOTE_RESOURCE_TYPE, remote_resource_type) + if remote_resource_identifier != "None": + self._assert_str_attribute(attributes_dict, AWS_REMOTE_RESOURCE_IDENTIFIER, remote_resource_identifier) # See comment above AWS_LOCAL_OPERATION self._assert_str_attribute(attributes_dict, AWS_SPAN_KIND, span_kind) @@ -417,7 +444,7 @@ def _assert_semantic_conventions_attributes( attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list) self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_METHOD, operation) self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_SYSTEM, "aws-api") - self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_SERVICE, service.split(".")[-1]) + self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_SERVICE, service.split("::")[-1]) self._assert_int_attribute(attributes_dict, SpanAttributes.HTTP_STATUS_CODE, status_code) # TODO: botocore instrumentation is not respecting PEER_SERVICE # self._assert_str_attribute(attributes_dict, SpanAttributes.PEER_SERVICE, "backend:8080") @@ -459,9 +486,12 @@ def _assert_metric_attributes( self._assert_str_attribute(attribute_dict, AWS_REMOTE_SERVICE, kwargs.get("remote_service")) self._assert_str_attribute(attribute_dict, AWS_REMOTE_OPERATION, kwargs.get("remote_operation")) self._assert_str_attribute(attribute_dict, AWS_SPAN_KIND, "CLIENT") - remote_target = kwargs.get("remote_target", "None") - if remote_target != "None": - self._assert_str_attribute(attribute_dict, AWS_REMOTE_TARGET, remote_target) + remote_resource_type = kwargs.get("remote_resource_type", "None") + remote_resource_identifier = kwargs.get("remote_resource_identifier", "None") + if remote_resource_type != "None": + self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_TYPE, remote_resource_type) + if remote_resource_identifier != "None": + self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_IDENTIFIER, remote_resource_identifier) self.check_sum(metric_name, dependency_dp.sum, expected_sum) attribute_dict: Dict[str, AnyValue] = self._get_attributes_dict(service_dp.attributes) diff --git a/contract-tests/tests/test/amazon/utils/application_signals_constants.py b/contract-tests/tests/test/amazon/utils/application_signals_constants.py index 94115e8a8..b75b1f9f7 100644 --- a/contract-tests/tests/test/amazon/utils/application_signals_constants.py +++ b/contract-tests/tests/test/amazon/utils/application_signals_constants.py @@ -14,5 +14,6 @@ AWS_LOCAL_OPERATION: str = "aws.local.operation" AWS_REMOTE_SERVICE: str = "aws.remote.service" AWS_REMOTE_OPERATION: str = "aws.remote.operation" -AWS_REMOTE_TARGET: str = "aws.remote.target" +AWS_REMOTE_RESOURCE_TYPE: str = "aws.remote.resource.type" +AWS_REMOTE_RESOURCE_IDENTIFIER: str = "aws.remote.resource.identifier" AWS_SPAN_KIND: str = "aws.span.kind"