Skip to content

Commit

Permalink
Metric Schema changes (#150)
Browse files Browse the repository at this point in the history
In this commit, we are removing RemoteTarget and replacing with
RemoteResourceIdentifier and RemoteResourceType. Further, we are
formatting RemoteService, and the content of the RemoteResource
attributes such that they align with AWS Cloud Control resource names.

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
thpierce authored May 9, 2024
1 parent d7052e0 commit 26e62a6
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 324 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
AWS_LOCAL_OPERATION: str = "aws.local.operation"
AWS_REMOTE_SERVICE: str = "aws.remote.service"
AWS_REMOTE_OPERATION: str = "aws.remote.operation"
AWS_REMOTE_TARGET: str = "aws.remote.target"
AWS_REMOTE_RESOURCE_TYPE: str = "aws.remote.resource.type"
AWS_REMOTE_RESOURCE_IDENTIFIER: str = "aws.remote.resource.identifier"
AWS_SDK_DESCENDANT: str = "aws.sdk.descendant"
AWS_CONSUMER_PARENT_SPAN_KIND: str = "aws.consumer.parent.span.kind"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
AWS_QUEUE_NAME,
AWS_QUEUE_URL,
AWS_REMOTE_OPERATION,
AWS_REMOTE_RESOURCE_IDENTIFIER,
AWS_REMOTE_RESOURCE_TYPE,
AWS_REMOTE_SERVICE,
AWS_REMOTE_TARGET,
AWS_SPAN_KIND,
AWS_STREAM_NAME,
)
Expand Down Expand Up @@ -65,6 +66,12 @@
_AWS_TABLE_NAMES: str = SpanAttributes.AWS_DYNAMODB_TABLE_NAMES
_AWS_BUCKET_NAME: str = SpanAttributes.AWS_S3_BUCKET

# Normalized remote service names for supported AWS services
_NORMALIZED_DYNAMO_DB_SERVICE_NAME: str = "AWS::DynamoDB"
_NORMALIZED_KINESIS_SERVICE_NAME: str = "AWS::Kinesis"
_NORMALIZED_S3_SERVICE_NAME: str = "AWS::S3"
_NORMALIZED_SQS_SERVICE_NAME: str = "AWS::SQS"

# Special DEPENDENCY attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
_GRAPHQL: str = "graphql"

Expand Down Expand Up @@ -110,7 +117,7 @@ def _generate_dependency_metric_attributes(span: ReadableSpan, resource: Resourc
_set_service(resource, span, attributes)
_set_egress_operation(span, attributes)
_set_remote_service_and_operation(span, attributes)
_set_remote_target(span, attributes)
_set_remote_type_and_identifier(span, attributes)
_set_span_kind_for_dependency(span, attributes)
return attributes

Expand Down Expand Up @@ -198,7 +205,7 @@ def _set_remote_service_and_operation(span: ReadableSpan, attributes: BoundedAtt
remote_service = _get_remote_service(span, AWS_REMOTE_SERVICE)
remote_operation = _get_remote_operation(span, AWS_REMOTE_OPERATION)
elif is_key_present(span, _RPC_SERVICE) or is_key_present(span, _RPC_METHOD):
remote_service = _normalize_service_name(span, _get_remote_service(span, _RPC_SERVICE))
remote_service = _normalize_remote_service_name(span, _get_remote_service(span, _RPC_SERVICE))
remote_operation = _get_remote_operation(span, _RPC_METHOD)
elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION) or is_key_present(span, _DB_STATEMENT):
remote_service = _get_remote_service(span, _DB_SYSTEM)
Expand Down Expand Up @@ -268,10 +275,14 @@ def _get_db_statement_remote_operation(span: ReadableSpan, statement_key: str) -
return remote_operation


def _normalize_service_name(span: ReadableSpan, service_name: str) -> str:
def _normalize_remote_service_name(span: ReadableSpan, service_name: str) -> str:
"""
If the span is an AWS SDK span, normalize the name to align with <a
href="https://docs.aws.amazon.com/cloudcontrolapi/latest/userguide/supported-resources.html">AWS Cloud Control
resource format</a> as much as possible. Long term, we would like to normalize service name in the upstream.
"""
if is_aws_sdk_span(span):
return "AWS.SDK." + service_name

return "AWS::" + service_name
return service_name


Expand Down Expand Up @@ -320,38 +331,39 @@ def _generate_remote_operation(span: ReadableSpan) -> str:
return remote_operation


def _set_remote_target(span: ReadableSpan, attributes: BoundedAttributes) -> None:
remote_target: Optional[str] = _get_remote_target(span)
if remote_target is not None:
attributes[AWS_REMOTE_TARGET] = remote_target


def _get_remote_target(span: ReadableSpan) -> Optional[str]:
def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttributes) -> None:
"""
RemoteTarget attribute AWS_REMOTE_TARGET is used to store the resource
name of the remote invokes, such as S3 bucket name, mysql table name, etc.
TODO: currently only support AWS resource name, will be extended to support
the general remote targets, such as ActiveMQ name, etc.
"""
if is_key_present(span, _AWS_BUCKET_NAME):
return "::s3:::" + span.attributes.get(_AWS_BUCKET_NAME)

if is_key_present(span, AWS_QUEUE_URL):
arn = SqsUrlParser.get_sqs_remote_target(span.attributes.get(AWS_QUEUE_URL))
if arn:
return arn

if is_key_present(span, AWS_QUEUE_NAME):
return "::sqs:::" + span.attributes.get(AWS_QUEUE_NAME)
Remote resource attributes {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_TYPE} and {@link
AwsAttributeKeys#AWS_REMOTE_RESOURCE_IDENTIFIER} are used to store information about the resource associated with
the remote invocation, such as S3 bucket name, etc. We should only ever set both type and identifier or neither.
if is_key_present(span, AWS_STREAM_NAME):
return "::kinesis:::stream/" + span.attributes.get(AWS_STREAM_NAME)
AWS resources type and identifier adhere to <a
href="https://docs.aws.amazon.com/cloudcontrolapi/latest/userguide/supported-resources.html">AWS Cloud Control
resource format</a>.
"""
remote_resource_type: Optional[str] = None
remote_resource_identifier: Optional[str] = None

# Only extract the table name when _AWS_TABLE_NAMES has size equals to one
if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1:
return "::dynamodb:::table/" + span.attributes.get(_AWS_TABLE_NAMES)[0]

return None
remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table"
remote_resource_identifier = span.attributes.get(_AWS_TABLE_NAMES)[0]
elif is_key_present(span, AWS_STREAM_NAME):
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
remote_resource_identifier = span.attributes.get(AWS_STREAM_NAME)
elif is_key_present(span, _AWS_BUCKET_NAME):
remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket"
remote_resource_identifier = span.attributes.get(_AWS_BUCKET_NAME)
elif is_key_present(span, AWS_QUEUE_NAME):
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
remote_resource_identifier = span.attributes.get(AWS_QUEUE_NAME)
elif is_key_present(span, AWS_QUEUE_URL):
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
remote_resource_identifier = SqsUrlParser.get_queue_name(span.attributes.get(AWS_QUEUE_URL))

if remote_resource_type is not None and remote_resource_identifier is not None:
attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type
attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier


def _set_span_kind_for_dependency(span: ReadableSpan, attributes: BoundedAttributes) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,106 +2,30 @@
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional

_ARN_DELIMETER: str = ":"
_HTTP_SCHEMA: str = "http://"
_HTTPS_SCHEMA: str = "https://"


class SqsUrlParser:
@staticmethod
def get_sqs_remote_target(sqs_url: str) -> Optional[str]:
sqs_url: str = _strip_schema_from_url(sqs_url)

if not _is_sqs_url(sqs_url) and not _is_legacy_sqs_url(sqs_url) and not _is_custom_url(sqs_url):
def get_queue_name(url: str) -> Optional[str]:
"""
Best-effort logic to extract queue name from an HTTP url. This method should only be used with a string that is,
with reasonably high confidence, an SQS queue URL. Handles new/legacy/some custom URLs. Essentially, we require
that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be a
12-digit account id, and the third part should be a valid queue name, per SQS naming conventions.
"""
if url is None:
return None

region: str = _get_region(sqs_url)
account_id: str = _get_account_id(sqs_url)
partition: str = _get_partition(sqs_url)
queue_name: str = _get_queue_name(sqs_url)

remote_target: List[Optional[str]] = []

if all((region, account_id, partition, queue_name)):
remote_target.append("arn")

remote_target.extend(
[
_ARN_DELIMETER,
_null_to_empty(partition),
_ARN_DELIMETER,
"sqs",
_ARN_DELIMETER,
_null_to_empty(region),
_ARN_DELIMETER,
_null_to_empty(account_id),
_ARN_DELIMETER,
queue_name,
]
)

return "".join(remote_target)


def _strip_schema_from_url(url: str) -> str:
return url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")


def _get_region(sqs_url: str) -> Optional[str]:
if sqs_url is None:
url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
split_url: List[Optional[str]] = url.split("/")
if len(split_url) == 3 and _is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]):
return split_url[2]
return None

if sqs_url.startswith("queue.amazonaws.com/"):
return "us-east-1"

if _is_sqs_url(sqs_url):
return _get_region_from_sqs_url(sqs_url)

if _is_legacy_sqs_url(sqs_url):
return _get_region_from_legacy_sqs_url(sqs_url)

return None


def _is_sqs_url(sqs_url: str) -> bool:
split: List[Optional[str]] = sqs_url.split("/")
return (
len(split) == 3
and split[0].startswith("sqs.")
and split[0].endswith(".amazonaws.com")
and _is_account_id(split[1])
and _is_valid_queue_name(split[2])
)


def _is_legacy_sqs_url(sqs_url: str) -> bool:
split: List[Optional[str]] = sqs_url.split("/")
return (
len(split) == 3
and split[0].endswith(".queue.amazonaws.com")
and _is_account_id(split[1])
and _is_valid_queue_name(split[2])
)


def _is_custom_url(sqs_url: str) -> bool:
split: List[Optional[str]] = sqs_url.split("/")
return len(split) == 3 and _is_account_id(split[1]) and _is_valid_queue_name(split[2])


def _is_valid_queue_name(input_str: str) -> bool:
if len(input_str) == 0 or len(input_str) > 80:
return False

for char in input_str:
if char != "_" and char != "-" and not char.isalpha() and not char.isdigit():
return False

return True


def _is_account_id(input_str: str) -> bool:
if len(input_str) != 12:
if input_str is None or len(input_str) != 12:
return False

try:
Expand All @@ -112,43 +36,12 @@ def _is_account_id(input_str: str) -> bool:
return True


def _get_region_from_sqs_url(sqs_url: str) -> Optional[str]:
split: List[Optional[str]] = sqs_url.split(".")
return split[1] if len(split) >= 2 else None


def _get_region_from_legacy_sqs_url(sqs_url: str) -> Optional[str]:
split: List[Optional[str]] = sqs_url.split(".")
return split[0]


def _get_account_id(sqs_url: str) -> Optional[str]:
if sqs_url is None:
return None

split: List[Optional[str]] = sqs_url.split("/")
return split[1] if len(split) >= 2 else None


def _get_partition(sqs_url: str) -> Optional[str]:
region: Optional[str] = _get_region(sqs_url)

if region is None:
return None

if region.startswith("us-gov-"):
return "aws-us-gov"

if region.startswith("cn-"):
return "aws-cn"

return "aws"


def _get_queue_name(sqs_url: str) -> Optional[str]:
split: List[Optional[str]] = sqs_url.split("/")
return split[2] if len(split) >= 3 else None
def _is_valid_queue_name(input_str: str) -> bool:
if input_str is None or len(input_str) == 0 or len(input_str) > 80:
return False

for char in input_str:
if char != "_" and char != "-" and not char.isalpha() and not char.isdigit():
return False

def _null_to_empty(input_str: str) -> str:
return input_str if input_str is not None else ""
return True
Loading

0 comments on commit 26e62a6

Please sign in to comment.