Skip to content

Commit

Permalink
[Application Signals] Improve metric schema for EKS, Native K8s, EC2 (#…
Browse files Browse the repository at this point in the history
…1179)

Co-authored-by: Harry <[email protected]>
Co-authored-by: Lisa Guo <[email protected]>
  • Loading branch information
3 people authored May 20, 2024
1 parent fb7f27f commit 4193fa1
Show file tree
Hide file tree
Showing 76 changed files with 2,602 additions and 1,314 deletions.
168 changes: 168 additions & 0 deletions .github/workflows/application-signals-java-e2e-ec2-asg-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
## SPDX-License-Identifier: Apache-2.0

# This is a reusable workflow for running the E2E test for App Signals.
# It is meant to be called from another workflow.
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
name: App Signals Enablement E2E Testing - EC2 ASG Use Case
on:
workflow_call:

permissions:
id-token: write
contents: read

env:
# The presence of this env var is required for use by terraform and AWS CLI commands
# It is not redundant
AWS_DEFAULT_REGION: us-east-1
APP_SIGNALS_E2E_TEST_ACCOUNT_ID: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
SAMPLE_APP_FRONTEND_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/main-service.jar"
SAMPLE_APP_REMOTE_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/remote-service.jar"
GET_ADOT_JAR_COMMAND: "aws s3 cp s3://adot-main-build-staging-jar/aws-opentelemetry-agent.jar ./adot.jar"
GET_CW_AGENT_RPM_COMMAND: "aws s3 cp s3://${{ secrets.S3_INTEGRATION_BUCKET }}/integration-test/binary/${{ github.sha }}/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm ./cw-agent.rpm"
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data

jobs:
e2e-ec2-single-asg-test:
runs-on: ubuntu-latest
steps:
- name: Get testing resources from aws-application-signals-test-framework
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: add-ec2-platform-support

- name: Generate testing id
run: echo TESTING_ID="java-asg-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }}
aws-region: ${{ env.AWS_DEFAULT_REGION }}

- name: Set up terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_wrapper: false

- name: Deploy sample app via terraform
working-directory: terraform/ec2/asg
run: |
terraform init
terraform validate
terraform apply -auto-approve \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="test_id=${{ env.TESTING_ID }}" \
-var="sample_app_jar=${{ env.SAMPLE_APP_FRONTEND_SERVICE_JAR }}" \
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
-var="get_adot_jar_command=${{ env.GET_ADOT_JAR_COMMAND }}"
- name: Get sample app and EC2 instance information
working-directory: terraform/ec2/asg
run: |
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
echo "MAIN_SERVICE_ENDPOINT=$main_service_public_ip:8080" >> $GITHUB_ENV
echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
- name: Wait for app endpoint to come online
id: endpoint-check
run: |
attempt_counter=0
max_attempts=30
until $(curl --output /dev/null --silent --head --fail http://${{ env.MAIN_SERVICE_ENDPOINT }}); do
if [ ${attempt_counter} -eq ${max_attempts} ];then
echo "Max attempts reached"
exit 1
fi
printf '.'
attempt_counter=$(($attempt_counter+1))
sleep 10
done
# This steps increases the speed of the validation by creating the telemetry data in advance
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"
# Validation for pulse telemetry data
- name: Validate generated EMF logs
id: log-validation
run: ./gradlew validator:run --args='-c java/ec2/asg/log-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Validate generated metrics
id: metric-validation
if: (success() || steps.log-validation-1.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c java/ec2/asg/metric-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Validate generated traces
id: trace-validation
if: (success() || steps.log-validation-1.outcome == 'failure' || steps.metric-validation-1.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c java/ec2/asg/trace-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

# Clean up Procedures
- name: Terraform destroy
if: always()
continue-on-error: true
working-directory: terraform/ec2/asg
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ env:
APP_SIGNALS_E2E_TEST_ACCOUNT_ID: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
SAMPLE_APP_FRONTEND_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/main-service.jar"
SAMPLE_APP_REMOTE_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/remote-service.jar"
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
GET_ADOT_JAR_COMMAND: "aws s3 cp s3://adot-main-build-staging-jar/aws-opentelemetry-agent.jar ./adot.jar"
GET_CW_AGENT_RPM_COMMAND: "aws s3 cp s3://${{ secrets.S3_INTEGRATION_BUCKET }}/integration-test/binary/${{ github.sha }}/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm ./cw-agent.rpm"
METRIC_NAMESPACE: AppSignals
LOG_GROUP_NAME: /aws/appsignals/generic
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data

jobs:
e2e-ec2-test:
Expand All @@ -31,7 +31,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: main
ref: ga-release

- name: Generate testing id
run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
Expand Down Expand Up @@ -70,6 +70,7 @@ jobs:
run: |
echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8080" >> $GITHUB_ENV
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
echo "MAIN_SERVICE_INSTANCE_ID=$(terraform output main_service_instance_id)" >> $GITHUB_ENV
- name: Wait for app endpoint to come online
id: endpoint-check
Expand All @@ -91,10 +92,10 @@ jobs:
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/; echo
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"
- name: Build Gradle
working-directory: ${{ env.TEST_RESOURCES_FOLDER }}
Expand All @@ -113,8 +114,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

- name: Validate generated metrics
Expand All @@ -130,8 +132,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

- name: Validate generated traces
Expand All @@ -147,8 +150,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

# Clean up Procedures
Expand All @@ -159,4 +163,3 @@ jobs:
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ env:
SAMPLE_APP_NAMESPACE: sample-app-namespace
SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }}
SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }}
METRIC_NAMESPACE: AppSignals
LOG_GROUP_NAME: /aws/appsignals/eks
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data
ECR_INTEGRATION_TEST_REPO: "cwagent-integration-test"
APPLICATION_SIGNALS_ADOT_IMAGE: 611364707713.dkr.ecr.us-west-2.amazonaws.com/adot-autoinstrumentation-java-operator-staging:1.33.0-SNAPSHOT-91cbba8

jobs:
appsignals-e2e-test:
Expand All @@ -36,7 +37,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: main
ref: ga-release

- name: Download enablement script
uses: actions/checkout@v4
Expand Down Expand Up @@ -131,6 +132,15 @@ jobs:
run: |
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
- name: Patch the ADOT image and restart CloudWatch pods
run: |
kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--auto-instrumentation-java-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
# Application pods need to be restarted for the
Expand Down Expand Up @@ -202,10 +212,10 @@ jobs:
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call/; echo
curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call"
- name: Build Gradle
working-directory: ${{ env.TEST_RESOURCES_FOLDER }}
Expand All @@ -226,7 +236,7 @@ jobs:
--platform-info ${{ inputs.test-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Call endpoints and validate generated metrics
Expand All @@ -244,7 +254,7 @@ jobs:
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Call endpoints and validate generated traces
Expand All @@ -261,7 +271,7 @@ jobs:
--platform-info ${{ inputs.test-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

# Clean up Procedures
Expand Down Expand Up @@ -312,4 +322,4 @@ jobs:
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--region ${{ env.AWS_DEFAULT_REGION }}
--region ${{ env.AWS_DEFAULT_REGION }}
Loading

0 comments on commit 4193fa1

Please sign in to comment.