From 4b0886b333e8c4ec8a75683c43046127f759b10b Mon Sep 17 00:00:00 2001 From: John Knollmeyer Date: Thu, 12 Oct 2023 11:59:19 -0700 Subject: [PATCH] [exporter/awsxray] Add aws sdk http error events to x-ray subsegment and strip prefix AWS.SDK. from aws remote service name (#113) * add aws sdk http error events to x-ray subsegment * Pull Request review changes for #27232 - Define "aws-api" as a const - Rename some variables in cause.go to make the intended behavior a little more clear - PR: https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/27232 * Change the time used for X-Ray exception events to unix epoch time - timestamp.String() generates a string like "2023-10-10 01:58:24.675761 +0000 UTC", which is not a standardized format - Under the hood, it is just calling AsTime().String() https://github.com/open-telemetry/opentelemetry-collector/blob/pdata/v0.66.0/pdata/pcommon/timestamp.go#L36 - Time.String() has the warning "The returned string is meant for debugging; for a stable serialized representation, use t.MarshalText, t.MarshalBinary, or t.Format with an explicit format string." https://pkg.go.dev/time#Time.String * Change the time used for X-Ray exception events to microsecond precision - This matches X-Ray guidance for timestamps - "Microsecond reoslution is recommended when available" https://docs.aws.amazon.com/xray/latest/devguide/xray-api-segmentdocuments.html#api-segmentdocuments-fields --------- Co-authored-by: Ping Xiang --- .chloggen/add-aws-http-error-event.yaml | 27 ++++++++++++++ .../internal/translator/cause.go | 35 +++++++++++++++++-- .../internal/translator/cause_test.go | 33 +++++++++++++++++ .../internal/translator/segment.go | 20 ++++++++--- .../internal/translator/segment_test.go | 28 +++++++++++++++ 5 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 .chloggen/add-aws-http-error-event.yaml diff --git a/.chloggen/add-aws-http-error-event.yaml b/.chloggen/add-aws-http-error-event.yaml new file mode 100644 index 000000000000..a92317bec3e3 --- /dev/null +++ b/.chloggen/add-aws-http-error-event.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: awsxrayexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Convert individual HTTP error events into exceptions within subsegments for AWS SDK spans and strip AWS.SDK prefix from remote aws service name + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [27232] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/exporter/awsxrayexporter/internal/translator/cause.go b/exporter/awsxrayexporter/internal/translator/cause.go index b5d477a0e282..3f26c84d7b75 100644 --- a/exporter/awsxrayexporter/internal/translator/cause.go +++ b/exporter/awsxrayexporter/internal/translator/cause.go @@ -22,6 +22,10 @@ import ( // ExceptionEventName the name of the exception event. // TODO: Remove this when collector defines this semantic convention. const ExceptionEventName = "exception" +const AwsIndividualHTTPEventName = "HTTP request failure" +const AwsIndividualHTTPErrorEventType = "aws.http.error.event" +const AwsIndividualHTTPErrorCodeAttr = "http.response.status_code" +const AwsIndividualHTTPErrorMsgAttr = "aws.http.error_message" func makeCause(span ptrace.Span, attributes map[string]pcommon.Value, resource pcommon.Resource) (isError, isFault, isThrottle bool, filtered map[string]pcommon.Value, cause *awsxray.CauseData) { @@ -34,14 +38,21 @@ func makeCause(span ptrace.Span, attributes map[string]pcommon.Value, resource p errorKind string ) - hasExceptions := false + isAwsSdkSpan := isAwsSdkSpan(span) + hasExceptionEvents := false + hasAwsIndividualHTTPError := false for i := 0; i < span.Events().Len(); i++ { event := span.Events().At(i) if event.Name() == ExceptionEventName { - hasExceptions = true + hasExceptionEvents = true + break + } + if isAwsSdkSpan && event.Name() == AwsIndividualHTTPEventName { + hasAwsIndividualHTTPError = true break } } + hasExceptions := hasExceptionEvents || hasAwsIndividualHTTPError switch { case hasExceptions: @@ -76,6 +87,26 @@ func makeCause(span ptrace.Span, attributes map[string]pcommon.Value, resource p parsed := parseException(exceptionType, message, stacktrace, isRemote, language) exceptions = append(exceptions, parsed...) + } else if isAwsSdkSpan && event.Name() == AwsIndividualHTTPEventName { + errorCode, ok1 := event.Attributes().Get(AwsIndividualHTTPErrorCodeAttr) + errorMessage, ok2 := event.Attributes().Get(AwsIndividualHTTPErrorMsgAttr) + if ok1 && ok2 { + eventEpochTime := event.Timestamp().AsTime().UnixMicro() + strs := []string{ + errorCode.AsString(), + strconv.FormatFloat(float64(eventEpochTime)/1_000_000, 'f', 6, 64), + errorMessage.Str(), + } + message = strings.Join(strs, "@") + segmentID := newSegmentID() + exception := awsxray.Exception{ + ID: aws.String(hex.EncodeToString(segmentID[:])), + Type: aws.String(AwsIndividualHTTPErrorEventType), + Remote: aws.Bool(true), + Message: aws.String(message), + } + exceptions = append(exceptions, exception) + } } } cause = &awsxray.CauseData{ diff --git a/exporter/awsxrayexporter/internal/translator/cause_test.go b/exporter/awsxrayexporter/internal/translator/cause_test.go index 3a6466f2028f..0d9d4388ef7f 100644 --- a/exporter/awsxrayexporter/internal/translator/cause_test.go +++ b/exporter/awsxrayexporter/internal/translator/cause_test.go @@ -59,6 +59,39 @@ Caused by: java.lang.IllegalArgumentException: bad argument`) assert.Empty(t, cause.Exceptions[2].Message) } +func TestMakeCauseAwsSdkSpan(t *testing.T) { + errorMsg := "this is a test" + attributeMap := make(map[string]interface{}) + attributeMap[conventions.AttributeRPCSystem] = "aws-api" + span := constructExceptionServerSpan(attributeMap, ptrace.StatusCodeError) + span.Status().SetMessage(errorMsg) + + event1 := span.Events().AppendEmpty() + event1.SetName(AwsIndividualHTTPEventName) + event1.Attributes().PutStr(AwsIndividualHTTPErrorCodeAttr, "503") + event1.Attributes().PutStr(AwsIndividualHTTPErrorMsgAttr, "service is temporarily unavailable") + timestamp := pcommon.NewTimestampFromTime(time.UnixMicro(1696954761000001)) + event1.SetTimestamp(timestamp) + + res := pcommon.NewResource() + isError, isFault, isThrottle, _, cause := makeCause(span, nil, res) + + assert.False(t, isError) + assert.True(t, isFault) + assert.False(t, isThrottle) + assert.NotNil(t, cause) + + assert.Equal(t, 1, len(cause.CauseObject.Exceptions)) + exception := cause.CauseObject.Exceptions[0] + assert.Equal(t, AwsIndividualHTTPErrorEventType, *exception.Type) + assert.True(t, *exception.Remote) + + messageParts := strings.SplitN(*exception.Message, "@", 3) + assert.Equal(t, "503", messageParts[0]) + assert.Equal(t, "1696954761.000001", messageParts[1]) + assert.Equal(t, "service is temporarily unavailable", messageParts[2]) +} + func TestCauseExceptionWithoutError(t *testing.T) { var nonErrorStatusCodes = []ptrace.StatusCode{ptrace.StatusCodeUnset, ptrace.StatusCodeOk} diff --git a/exporter/awsxrayexporter/internal/translator/segment.go b/exporter/awsxrayexporter/internal/translator/segment.go index 682b83444979..efda3ed22f2e 100644 --- a/exporter/awsxrayexporter/internal/translator/segment.go +++ b/exporter/awsxrayexporter/internal/translator/segment.go @@ -58,6 +58,8 @@ const ( defaultSegmentName = "span" // maxSegmentNameLength the maximum length of a Segment name maxSegmentNameLength = 200 + // rpc.system value for AWS service remotes + awsAPIRPCSystem = "aws-api" ) const ( @@ -300,6 +302,14 @@ func MakeDocumentFromSegment(segment *awsxray.Segment) (string, error) { return jsonStr, nil } +func isAwsSdkSpan(span ptrace.Span) bool { + attributes := span.Attributes() + if rpcSystem, ok := attributes.Get(conventions.AttributeRPCSystem); ok { + return rpcSystem.Str() == awsAPIRPCSystem + } + return false +} + // MakeSegment converts an OpenTelemetry Span to an X-Ray Segment func MakeSegment(span ptrace.Span, resource pcommon.Resource, indexedAttrs []string, indexAllAttrs bool, logGroupNames []string, skipTimestampValidation bool) (*awsxray.Segment, error) { var segmentType string @@ -359,6 +369,10 @@ func MakeSegment(span ptrace.Span, resource pcommon.Resource, indexedAttrs []str if span.Kind() == ptrace.SpanKindClient || span.Kind() == ptrace.SpanKindProducer || span.Kind() == ptrace.SpanKindConsumer { if remoteServiceName, ok := attributes.Get(awsRemoteService); ok { name = remoteServiceName.Str() + // only strip the prefix for AWS spans + if isAwsSdkSpan(span) && strings.HasPrefix(name, "AWS.SDK.") { + name = strings.TrimPrefix(name, "AWS.SDK.") + } } } @@ -371,10 +385,8 @@ func MakeSegment(span ptrace.Span, resource pcommon.Resource, indexedAttrs []str } if namespace == "" { - if rpcSystem, ok := attributes.Get(conventions.AttributeRPCSystem); ok { - if rpcSystem.Str() == "aws-api" { - namespace = conventions.AttributeCloudProviderAWS - } + if isAwsSdkSpan(span) { + namespace = conventions.AttributeCloudProviderAWS } } diff --git a/exporter/awsxrayexporter/internal/translator/segment_test.go b/exporter/awsxrayexporter/internal/translator/segment_test.go index 4c5e5b470951..dd5978c61309 100644 --- a/exporter/awsxrayexporter/internal/translator/segment_test.go +++ b/exporter/awsxrayexporter/internal/translator/segment_test.go @@ -992,6 +992,34 @@ func TestClientSpanWithAwsRemoteServiceName(t *testing.T) { assert.False(t, strings.Contains(jsonStr, "user")) } +func TestAwsSdkSpanWithAwsRemoteServiceName(t *testing.T) { + spanName := "DynamoDB.PutItem" + parentSpanID := newSegmentID() + user := "testingT" + attributes := make(map[string]interface{}) + attributes[conventions.AttributeRPCSystem] = "aws-api" + attributes[conventions.AttributeHTTPMethod] = "POST" + attributes[conventions.AttributeHTTPScheme] = "https" + attributes[conventions.AttributeRPCService] = "DynamoDb" + attributes[awsRemoteService] = "AWS.SDK.DynamoDb" + + resource := constructDefaultResource() + span := constructClientSpan(parentSpanID, spanName, 0, "OK", attributes) + + segment, _ := MakeSegment(span, resource, nil, false, nil, false) + assert.Equal(t, "DynamoDb", *segment.Name) + assert.Equal(t, "subsegment", *segment.Type) + + jsonStr, err := MakeSegmentDocumentString(span, resource, nil, false, nil, false) + + assert.NotNil(t, jsonStr) + assert.Nil(t, err) + assert.True(t, strings.Contains(jsonStr, "DynamoDb")) + assert.False(t, strings.Contains(jsonStr, "DynamoDb.PutItem")) + assert.False(t, strings.Contains(jsonStr, user)) + assert.False(t, strings.Contains(jsonStr, "user")) +} + func TestProducerSpanWithAwsRemoteServiceName(t *testing.T) { spanName := "ABC.payment" parentSpanID := newSegmentID()