-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update Release Testing With Latest Changes
- Loading branch information
Showing
3 changed files
with
750 additions
and
0 deletions.
There are no files selected for viewing
256 changes: 256 additions & 0 deletions
256
.github/workflows/application-signals-python-e2e-ec2-test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
## SPDX-License-Identifier: Apache-2.0 | ||
|
||
# This is a reusable workflow for running the Python E2E Canary test for Application Signals. | ||
# It is meant to be called from another workflow. | ||
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview | ||
name: Application Signals Enablement E2E Testing - Python EC2 Use Case | ||
on: | ||
workflow_call: | ||
inputs: | ||
aws-region: | ||
required: true | ||
type: string | ||
staging_wheel_name: | ||
required: false | ||
default: 'aws-opentelemetry-distro' | ||
type: string | ||
caller-workflow-name: | ||
required: true | ||
type: string | ||
|
||
permissions: | ||
id-token: write | ||
contents: read | ||
|
||
env: | ||
SAMPLE_APP_ZIP: s3://${{ secrets.APP_SIGNALS_E2E_EC2_JAR }}-prod-${{ inputs.aws-region }}/python-sample-app.zip | ||
METRIC_NAMESPACE: AppSignals | ||
LOG_GROUP_NAME: /aws/appsignals/generic | ||
ADOT_WHEEL_NAME: ${{ inputs.staging_wheel_name }} | ||
TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} | ||
GET_CW_AGENT_RPM_COMMAND: "wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/1.300035.0b547/amazon-cloudwatch-agent.rpm" | ||
|
||
jobs: | ||
python-e2e-ec2-test: | ||
runs-on: ubuntu-latest | ||
container: | ||
image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
repository: ${{ inputs.caller-workflow-name == 'main-build' && 'aws-observability/aws-application-signals-test-framework' || github.repository }} | ||
ref: ${{ inputs.caller-workflow-name == 'main-build' && 'adot-pending-release' || github.ref }} | ||
fetch-depth: 0 | ||
|
||
- name: Generate testing id | ||
run: echo TESTING_ID="${{ github.job }}-${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV | ||
|
||
- name: Configure AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }} | ||
aws-region: us-east-1 | ||
|
||
- name: Retrieve account | ||
uses: aws-actions/aws-secretsmanager-get-secrets@v1 | ||
with: | ||
secret-ids: | ||
ACCOUNT_ID, region-account/${{ inputs.aws-region }} | ||
|
||
- name: Configure AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ secrets.E2E_TEST_ROLE_ARN }} | ||
aws-region: ${{ inputs.aws-region }} | ||
|
||
- uses: actions/download-artifact@v3 | ||
if: inputs.caller-workflow-name == 'main-build' | ||
with: | ||
name: ${{ env.ADOT_WHEEL_NAME }} | ||
|
||
- name: Upload main-build adot.whl to s3 | ||
if: inputs.caller-workflow-name == 'main-build' | ||
run: aws s3 cp ${{ env.ADOT_WHEEL_NAME }} s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} | ||
|
||
- name: Set Get ADOT Wheel command environment variable | ||
working-directory: terraform/python/ec2 | ||
run: | | ||
if [ "${{ inputs.caller-workflow-name }}" = "main-build" ]; then | ||
# Reusing the adot-main-build-staging-jar bucket to store the python wheel file | ||
echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && python3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV | ||
else | ||
echo GET_ADOT_WHEEL_COMMAND="python3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}==0.1.1" >> $GITHUB_ENV | ||
fi | ||
- name: Initiate Terraform | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
with: | ||
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2 && terraform init && terraform validate" | ||
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" | ||
|
||
- name: Deploy sample app via terraform and wait for endpoint to come online | ||
working-directory: terraform/python/ec2 | ||
run: | | ||
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online. | ||
# There may be occasional failures due to transitivity issues, so try up to 2 times. | ||
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates | ||
# that it failed at some point | ||
retry_counter=0 | ||
max_retry=2 | ||
while [ $retry_counter -lt $max_retry ]; do | ||
echo "Attempt $retry_counter" | ||
deployment_failed=0 | ||
terraform apply -auto-approve \ | ||
-var="aws_region=${{ inputs.aws-region }}" \ | ||
-var="test_id=${{ env.TESTING_ID }}" \ | ||
-var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" \ | ||
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \ | ||
-var="get_adot_wheel_command=${{ env.GET_ADOT_WHEEL_COMMAND }}" \ | ||
|| deployment_failed=$? | ||
if [ $deployment_failed -eq 1 ]; then | ||
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment." | ||
fi | ||
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint. | ||
# Attempts to connect will be made for up to 10 minutes | ||
if [ $deployment_failed -eq 0 ]; then | ||
echo "Attempting to connect to the endpoint" | ||
sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8000 | ||
attempt_counter=0 | ||
max_attempts=60 | ||
until $(curl --output /dev/null --silent --head --fail $(echo "$sample_app_endpoint" | tr -d '"')); do | ||
if [ ${attempt_counter} -eq ${max_attempts} ];then | ||
echo "Failed to connect to endpoint. Will attempt to redeploy sample app." | ||
deployment_failed=1 | ||
break | ||
fi | ||
printf '.' | ||
attempt_counter=$(($attempt_counter+1)) | ||
sleep 10 | ||
done | ||
fi | ||
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the | ||
# resources created from terraform and try again. | ||
if [ $deployment_failed -eq 1 ]; then | ||
echo "Destroying terraform" | ||
terraform destroy -auto-approve \ | ||
-var="test_id=${{ env.TESTING_ID }}" | ||
retry_counter=$(($retry_counter+1)) | ||
else | ||
# If deployment succeeded, then exit the loop | ||
break | ||
fi | ||
if [ $retry_counter -eq $max_retry ]; then | ||
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code" | ||
exit 1 | ||
fi | ||
done | ||
- name: Get the ec2 instance ami id | ||
run: | | ||
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV | ||
working-directory: terraform/python/ec2 | ||
|
||
- name: Get the sample app endpoint | ||
run: | | ||
echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8000" >> $GITHUB_ENV | ||
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV | ||
working-directory: terraform/python/ec2 | ||
|
||
# This steps increases the speed of the validation by creating the telemetry data in advance | ||
- name: Call all test APIs | ||
continue-on-error: true | ||
run: | | ||
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call; echo | ||
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?testingId=${{ env.TESTING_ID }}; echo | ||
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}; echo | ||
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call; echo | ||
- name: Initiate Gradlew Daemon | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
with: | ||
command: "./gradlew" | ||
cleanup: "./gradlew clean" | ||
max_retry: 4 | ||
sleep_time: 30 | ||
|
||
# Validation for pulse telemetry data | ||
- name: Validate generated EMF logs | ||
id: log-validation | ||
run: ./gradlew validator:run --args='-c python/ec2/log-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ inputs.aws-region }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--rollup' | ||
|
||
- name: Validate generated metrics | ||
id: metric-validation | ||
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() | ||
run: ./gradlew validator:run --args='-c python/ec2/metric-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ inputs.aws-region }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--rollup' | ||
|
||
- name: Validate generated traces | ||
id: trace-validation | ||
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() | ||
run: ./gradlew validator:run --args='-c python/ec2/trace-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ inputs.aws-region }} | ||
--account-id ${{ env.ACCOUNT_ID }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--rollup' | ||
|
||
- name: Publish metric on test result | ||
if: always() | ||
run: | | ||
if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then | ||
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ | ||
--metric-name Failure \ | ||
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ | ||
--value 0.0 \ | ||
--region ${{ inputs.aws-region }} | ||
else | ||
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ | ||
--metric-name Failure \ | ||
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ | ||
--value 1.0 \ | ||
--region ${{ inputs.aws-region }} | ||
fi | ||
# Clean up Procedures | ||
- name: Terraform destroy | ||
if: always() | ||
continue-on-error: true | ||
working-directory: terraform/python/ec2 | ||
run: | | ||
terraform destroy -auto-approve \ | ||
-var="test_id=${{ env.TESTING_ID }}" |
Oops, something went wrong.