Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Application Signals] Improve metric schema for EKS, Native K8s, EC2 #1179

Merged
merged 12 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions .github/workflows/application-signals-java-e2e-ec2-asg-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
## SPDX-License-Identifier: Apache-2.0

# This is a reusable workflow for running the E2E test for App Signals.
# It is meant to be called from another workflow.
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
name: App Signals Enablement E2E Testing - EC2 ASG Use Case
on:
workflow_call:

permissions:
id-token: write
contents: read

env:
# The presence of this env var is required for use by terraform and AWS CLI commands
# It is not redundant
AWS_DEFAULT_REGION: us-east-1
APP_SIGNALS_E2E_TEST_ACCOUNT_ID: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
SAMPLE_APP_FRONTEND_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/main-service.jar"
SAMPLE_APP_REMOTE_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/remote-service.jar"
GET_ADOT_JAR_COMMAND: "aws s3 cp s3://adot-main-build-staging-jar/aws-opentelemetry-agent.jar ./adot.jar"
GET_CW_AGENT_RPM_COMMAND: "aws s3 cp s3://${{ secrets.S3_INTEGRATION_BUCKET }}/integration-test/binary/${{ github.sha }}/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm ./cw-agent.rpm"
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data

jobs:
e2e-ec2-single-asg-test:
runs-on: ubuntu-latest
steps:
- name: Get testing resources from aws-application-signals-test-framework
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: add-ec2-platform-support

- name: Generate testing id
run: echo TESTING_ID="java-asg-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }}
aws-region: ${{ env.AWS_DEFAULT_REGION }}

- name: Set up terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_wrapper: false

- name: Deploy sample app via terraform
working-directory: terraform/ec2/asg
run: |
terraform init
terraform validate
terraform apply -auto-approve \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="test_id=${{ env.TESTING_ID }}" \
-var="sample_app_jar=${{ env.SAMPLE_APP_FRONTEND_SERVICE_JAR }}" \
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
-var="get_adot_jar_command=${{ env.GET_ADOT_JAR_COMMAND }}"

- name: Get sample app and EC2 instance information
working-directory: terraform/ec2/asg
run: |
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
echo "MAIN_SERVICE_ENDPOINT=$main_service_public_ip:8080" >> $GITHUB_ENV
echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV

- name: Wait for app endpoint to come online
id: endpoint-check
run: |
attempt_counter=0
max_attempts=30
until $(curl --output /dev/null --silent --head --fail http://${{ env.MAIN_SERVICE_ENDPOINT }}); do
if [ ${attempt_counter} -eq ${max_attempts} ];then
echo "Max attempts reached"
exit 1
fi

printf '.'
attempt_counter=$(($attempt_counter+1))
sleep 10
done

# This steps increases the speed of the validation by creating the telemetry data in advance
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"

# Validation for pulse telemetry data
- name: Validate generated EMF logs
id: log-validation
run: ./gradlew validator:run --args='-c java/ec2/asg/log-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Validate generated metrics
id: metric-validation
if: (success() || steps.log-validation-1.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c java/ec2/asg/metric-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Validate generated traces
id: trace-validation
if: (success() || steps.log-validation-1.outcome == 'failure' || steps.metric-validation-1.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c java/ec2/asg/trace-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--platform-info ec2-single-asg-${{ env.TESTING_ID }}
--instance-id ${{ env.INSTANCE_ID }}
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

# Clean up Procedures
- name: Terraform destroy
if: always()
continue-on-error: true
working-directory: terraform/ec2/asg
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ env:
APP_SIGNALS_E2E_TEST_ACCOUNT_ID: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
SAMPLE_APP_FRONTEND_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/main-service.jar"
SAMPLE_APP_REMOTE_SERVICE_JAR: "s3://aws-appsignals-sample-app-prod-us-east-1/remote-service.jar"
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
GET_ADOT_JAR_COMMAND: "aws s3 cp s3://adot-main-build-staging-jar/aws-opentelemetry-agent.jar ./adot.jar"
GET_CW_AGENT_RPM_COMMAND: "aws s3 cp s3://${{ secrets.S3_INTEGRATION_BUCKET }}/integration-test/binary/${{ github.sha }}/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm ./cw-agent.rpm"
METRIC_NAMESPACE: AppSignals
LOG_GROUP_NAME: /aws/appsignals/generic
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data

jobs:
e2e-ec2-test:
Expand All @@ -31,7 +31,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: main
ref: ga-release

- name: Generate testing id
run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
Expand Down Expand Up @@ -70,6 +70,7 @@ jobs:
run: |
echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8080" >> $GITHUB_ENV
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
echo "MAIN_SERVICE_INSTANCE_ID=$(terraform output main_service_instance_id)" >> $GITHUB_ENV

- name: Wait for app endpoint to come online
id: endpoint-check
Expand All @@ -91,10 +92,10 @@ jobs:
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/; echo
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/; echo
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"

- name: Build Gradle
working-directory: ${{ env.TEST_RESOURCES_FOLDER }}
Expand All @@ -113,8 +114,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

- name: Validate generated metrics
Expand All @@ -130,8 +132,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

- name: Validate generated traces
Expand All @@ -147,8 +150,9 @@ jobs:
--log-group ${{ env.LOG_GROUP_NAME }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }}
--rollup'

# Clean up Procedures
Expand All @@ -159,4 +163,3 @@ jobs:
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"

Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ env:
SAMPLE_APP_NAMESPACE: sample-app-namespace
SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }}
SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }}
METRIC_NAMESPACE: AppSignals
LOG_GROUP_NAME: /aws/appsignals/eks
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data
ECR_INTEGRATION_TEST_REPO: "cwagent-integration-test"
APPLICATION_SIGNALS_ADOT_IMAGE: 611364707713.dkr.ecr.us-west-2.amazonaws.com/adot-autoinstrumentation-java-operator-staging:1.33.0-SNAPSHOT-91cbba8

jobs:
appsignals-e2e-test:
Expand All @@ -36,7 +37,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: main
ref: ga-release

- name: Download enablement script
uses: actions/checkout@v4
Expand Down Expand Up @@ -131,6 +132,15 @@ jobs:
run: |
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch

- name: Patch the ADOT image and restart CloudWatch pods
run: |
kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--auto-instrumentation-java-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch

# Application pods need to be restarted for the
Expand Down Expand Up @@ -202,10 +212,10 @@ jobs:
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}/; echo
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call/; echo
curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call"

- name: Build Gradle
working-directory: ${{ env.TEST_RESOURCES_FOLDER }}
Expand All @@ -226,7 +236,7 @@ jobs:
--platform-info ${{ inputs.test-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Call endpoints and validate generated metrics
Expand All @@ -244,7 +254,7 @@ jobs:
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

- name: Call endpoints and validate generated traces
Expand All @@ -261,7 +271,7 @@ jobs:
--platform-info ${{ inputs.test-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--request-body ip=${{ env.REMOTE_SERVICE_POD_IP }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'

# Clean up Procedures
Expand Down Expand Up @@ -312,4 +322,4 @@ jobs:
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--region ${{ env.AWS_DEFAULT_REGION }}
--region ${{ env.AWS_DEFAULT_REGION }}
Loading
Loading