From a4a0c33eec902e0c3b0c5d9f0ced002a58e71fd5 Mon Sep 17 00:00:00 2001 From: zzhlogin Date: Wed, 1 May 2024 15:30:29 -0700 Subject: [PATCH] Remove Python E2E canary tests. (#178) *Issue #, if available:* *Description of changes:* Remove Python E2E canary tests since it is already migrated to [aws-application-signals-test-framework](https://github.com/aws-observability/aws-application-signals-test-framework) repo. Main build workflow tested: https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8914090859 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- .github/actions/execute_and_retry/action.yml | 60 --- ...pplication-signals-python-e2e-ec2-test.yml | 255 ------------ ...pplication-signals-python-e2e-eks-test.yml | 367 ------------------ .../appsignals-python-e2e-ec2-canary-test.yml | 31 -- .../appsignals-python-e2e-eks-canary-test.yml | 37 -- .github/workflows/main_build.yml | 5 +- 6 files changed, 3 insertions(+), 752 deletions(-) delete mode 100644 .github/actions/execute_and_retry/action.yml delete mode 100644 .github/workflows/application-signals-python-e2e-ec2-test.yml delete mode 100644 .github/workflows/application-signals-python-e2e-eks-test.yml delete mode 100644 .github/workflows/appsignals-python-e2e-ec2-canary-test.yml delete mode 100644 .github/workflows/appsignals-python-e2e-eks-canary-test.yml diff --git a/.github/actions/execute_and_retry/action.yml b/.github/actions/execute_and_retry/action.yml deleted file mode 100644 index ae96d17e8..000000000 --- a/.github/actions/execute_and_retry/action.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Reusable Action for executing commands and retrying them if it fails -name: Command Retry Logic - -inputs: - # (Optional) Command to run before the retry command. To be used for environment setup, etc - pre-command: - required: false - type: string - # (Optional) Number of retries to perform. Default is 2 - max_retry: - required: false - type: number - default: 2 - # (Required) Command to execute with the retry mechanism - command: - required: true - type: string - # (Required) Command to clean up resources before retrying the main command - cleanup: - required: false - type: string - # (Optional) Follow-up command after the main command is finished. - post-command: - required: false - type: string - -runs: - using: "composite" - steps: - - name: Run command - shell: bash - env: - PRE_COMMAND: ${{ inputs.pre-command }} - MAX_RETRY: ${{ inputs.max_retry }} - COMMAND: ${{ inputs.command }} - CLEANUP: ${{ inputs.cleanup }} - POST_COMMAND: ${{ inputs.post-command }} - run: | - eval "$PRE_COMMAND" - - retry_counter=0 - while [ $retry_counter -lt $MAX_RETRY ]; do - attempt_failed=0 - eval "$COMMAND" || attempt_failed=$? - - if [ $attempt_failed -ne 0 ]; then - eval "$CLEANUP" - retry_counter=$(($retry_counter+1)) - sleep 5 - else - break - fi - - if [ $retry_counter -eq $max_retry ]; then - echo "Max retry reached, command failed to execute properly. Exiting code" - exit 1 - fi - done - - eval "$POST_COMMAND" \ No newline at end of file diff --git a/.github/workflows/application-signals-python-e2e-ec2-test.yml b/.github/workflows/application-signals-python-e2e-ec2-test.yml deleted file mode 100644 index c446f1594..000000000 --- a/.github/workflows/application-signals-python-e2e-ec2-test.yml +++ /dev/null @@ -1,255 +0,0 @@ -## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -## SPDX-License-Identifier: Apache-2.0 - -# This is a reusable workflow for running the Python E2E Canary test for Application Signals. -# It is meant to be called from another workflow. -# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview -name: Application Signals Enablement E2E Testing - Python EC2 Use Case -on: - workflow_call: - inputs: - aws-region: - required: true - type: string - staging_wheel_name: - required: false - default: 'aws-opentelemetry-distro' - type: string - caller-workflow-name: - required: true - type: string - -permissions: - id-token: write - contents: read - -env: - # The precense of this env var is required for use by terraform and AWS CLI commands - # It is not redundant - AWS_DEFAULT_REGION: ${{ inputs.aws-region }} - TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACC }} - SAMPLE_APP_ZIP: s3://${{ secrets.E2E_TEST_BUCKET }}-prod-${{ inputs.aws-region }}/python-sample-app.zip - METRIC_NAMESPACE: AppSignals - LOG_GROUP_NAME: /aws/appsignals/generic - ADOT_WHEEL_NAME: ${{ inputs.staging_wheel_name }} - -jobs: - python-e2e-ec2-test: - runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest - steps: - - name: Get testing resources from aws-application-signals-test-framework - uses: actions/checkout@v4 - with: - repository: aws-observability/aws-application-signals-test-framework - ref: main - - - name: Set CW Agent RPM environment variable - run: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV - - - name: Generate testing id - run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }} - aws-region: us-east-1 - - - name: Retrieve account - uses: aws-actions/aws-secretsmanager-get-secrets@v1 - with: - secret-ids: - ACCOUNT_ID, region-account/${{ inputs.aws-region }} - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ secrets.E2E_TEST_ROLE_ARN }} - aws-region: ${{ inputs.aws-region }} - - - uses: actions/download-artifact@v3 - if: inputs.caller-workflow-name == 'main-build' - with: - name: ${{ inputs.staging_wheel_name }} - - - name: Upload main-build adot.whl to s3 - if: inputs.caller-workflow-name == 'main-build' - run: aws s3 cp ${{ inputs.staging_wheel_name }} s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} - - - name: Set Get ADOT Wheel command environment variable - working-directory: terraform/python/ec2 - run: | - if [ ${{ inputs.caller-workflow-name }} == "main-build" ]; then - # Reusing the adot-main-build-staging-jar bucket to store the python wheel file - echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && python3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV - else - echo GET_ADOT_WHEEL_COMMAND="python3.9 -m pip install aws-opentelemetry-distro" >> $GITHUB_ENV - fi - - - name: Initiate Terraform - uses: ./.github/workflows/actions/execute_and_retry - with: - command: "cd terraform/python/ec2 && terraform init && terraform validate" - cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" - - - name: Deploy sample app via terraform and wait for endpoint to come online - working-directory: terraform/python/ec2 - run: | - # Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online. - # There may be occasional failures due to transitivity issues, so try up to 2 times. - # deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates - # that it failed at some point - retry_counter=0 - max_retry=2 - while [ $retry_counter -lt $max_retry ]; do - echo "Attempt $retry_counter" - deployment_failed=0 - terraform apply -auto-approve \ - -var="aws_region=${{ inputs.aws-region }}" \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" \ - -var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \ - -var="get_adot_wheel_command=${{ env.GET_ADOT_WHEEL_COMMAND }}" \ - || deployment_failed=$? - - if [ $deployment_failed -eq 1 ]; then - echo "Terraform deployment was unsuccessful. Will attempt to retry deployment." - fi - - # If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint. - # Attempts to connect will be made for up to 10 minutes - if [ $deployment_failed -eq 0 ]; then - echo "Attempting to connect to the endpoint" - sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8000 - attempt_counter=0 - max_attempts=60 - until $(curl --output /dev/null --silent --head --fail $(echo "$sample_app_endpoint" | tr -d '"')); do - if [ ${attempt_counter} -eq ${max_attempts} ];then - echo "Failed to connect to endpoint. Will attempt to redeploy sample app." - deployment_failed=1 - break - fi - - printf '.' - attempt_counter=$(($attempt_counter+1)) - sleep 10 - done - fi - - # If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the - # resources created from terraform and try again. - if [ $deployment_failed -eq 1 ]; then - echo "Destroying terraform" - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" - - retry_counter=$(($retry_counter+1)) - else - # If deployment succeeded, then exit the loop - break - fi - - if [ $retry_counter -eq $max_retry ]; then - echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code" - exit 1 - fi - done - - - name: Get the ec2 instance ami id - run: | - echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV - working-directory: terraform/python/ec2 - - - name: Get the sample app endpoint - run: | - echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8000" >> $GITHUB_ENV - echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV - working-directory: terraform/python/ec2 - - # This steps increases the speed of the validation by creating the telemetry data in advance - - name: Call all test APIs - continue-on-error: true - run: | - curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call - curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call - curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }} - curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call - - # Validation for pulse telemetry data - - name: Validate generated EMF logs - id: log-validation - run: ./gradlew validator:run --args='-c python/ec2/log-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --service-name python-sample-application-${{ env.TESTING_ID }} - --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} - --request-body ip=${{ env.REMOTE_SERVICE_IP }} - --instance-ami ${{ env.EC2_INSTANCE_AMI }} - --rollup' - - - name: Validate generated metrics - id: metric-validation - if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c python/ec2/metric-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --service-name python-sample-application-${{ env.TESTING_ID }} - --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} - --request-body ip=${{ env.REMOTE_SERVICE_IP }} - --instance-ami ${{ env.EC2_INSTANCE_AMI }} - --rollup' - - - name: Validate generated traces - id: trace-validation - if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c python/ec2/trace-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --service-name python-sample-application-${{ env.TESTING_ID }} - --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} - --request-body ip=${{ env.REMOTE_SERVICE_IP }} - --instance-ami ${{ env.EC2_INSTANCE_AMI }} - --rollup' - - - name: Publish metric on test result - if: always() - run: | - if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then - aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ - --metric-name Failure \ - --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ - --value 0.0 \ - --region ${{ inputs.aws-region }} - else - aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ - --metric-name Failure \ - --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ - --value 1.0 \ - --region ${{ inputs.aws-region }} - fi - - # Clean up Procedures - - name: Terraform destroy - if: always() - continue-on-error: true - working-directory: terraform/python/ec2 - run: | - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ No newline at end of file diff --git a/.github/workflows/application-signals-python-e2e-eks-test.yml b/.github/workflows/application-signals-python-e2e-eks-test.yml deleted file mode 100644 index 86fb24db8..000000000 --- a/.github/workflows/application-signals-python-e2e-eks-test.yml +++ /dev/null @@ -1,367 +0,0 @@ -## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -## SPDX-License-Identifier: Apache-2.0 - -# This is a reusable workflow for running the E2E test for Application Signals. -# It is meant to be called from another workflow. -# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview -name: Application Signals Enablement E2E Testing - Python EKS -on: - workflow_call: - inputs: - aws-region: - required: true - type: string - test-cluster-name: - required: true - type: string - application-signals-adot-image: - required: false - type: string - application-signals-adot-image-tag: - required: false - type: string - caller-workflow-name: - required: true - type: string - -permissions: - id-token: write - contents: read - -env: - # The precense of this env var is required for use by terraform and AWS CLI commands - # It is not redundant - AWS_DEFAULT_REGION: ${{ inputs.aws-region }} - METRIC_NAMESPACE: AppSignals - LOG_GROUP_NAME: /aws/appsignals/eks - -jobs: - python-e2e-eks-test: - runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest - steps: - - uses: actions/checkout@v4 - with: - repository: aws-observability/aws-application-signals-test-framework - ref: main - fetch-depth: 0 - - - name: Download enablement script - uses: ./.github/workflows/actions/execute_and_retry - with: - pre-command: "mkdir enablement-script && cd enablement-script" - command: "wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/enable-app-signals.sh - && wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/clean-app-signals.sh" - cleanup: "rm -f enable-app-signals.sh && rm -f clean-app-signals.sh" - post-command: "chmod +x enable-app-signals.sh && chmod +x clean-app-signals.sh" - - - name: Remove log group deletion command - if: always() - working-directory: enablement-script - run: | - delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION" - sed -i "s#$delete_log_group##g" clean-app-signals.sh - - - name: Generate testing id and python sample app namespace - run: | - echo TESTING_ID="${{ inputs.aws-region }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV - echo PYTHON_SAMPLE_APP_NAMESPACE="ns-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }} - aws-region: us-east-1 - - - name: Retrieve account - uses: aws-actions/aws-secretsmanager-get-secrets@v1 - with: - secret-ids: - ACCOUNT_ID, region-account/${{ inputs.aws-region }} - - # ADOT_E2E_TEST_ROLE_ARN is used to access main build e2e test cluster - # E2E_TEST_ROLE_ARN is used to access canary e2e test cluster - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ inputs['caller-workflow-name'] == 'main-build' && secrets.ADOT_E2E_TEST_ROLE_ARN || secrets.E2E_TEST_ROLE_ARN }} - aws-region: ${{ inputs.aws-region }} - - - name: Set up kubeconfig - run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }} - - - name: Add eksctl to Github Path - run: | - echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH - - - name: Create role for AWS access from the sample app - id: create_service_account - uses: ./.github/workflows/actions/execute_and_retry - with: - command: "eksctl create iamserviceaccount \ - --name service-account-${{ env.TESTING_ID }} \ - --namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-cluster-name }} \ - --role-name eks-s3-access-${{ env.TESTING_ID }} \ - --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ - --region ${{ inputs.aws-region }} \ - --approve" - - - name: Initiate Terraform - uses: ./.github/workflows/actions/execute_and_retry - with: - command: "cd terraform/python/eks && terraform init && terraform validate" - cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" - - - name: Deploy sample app via terraform and wait for the endpoint to come online - id: deploy-python-app - working-directory: terraform/python/eks - run: | - # Attempt to deploy the sample app on an EKS instance and wait for its endpoint to come online. - # There may be occasional failures due to transitivity issues, so try up to 2 times. - # deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates - # that it failed at some point - retry_counter=0 - max_retry=2 - while [ $retry_counter -lt $max_retry ]; do - echo "Attempt $retry_counter" - deployment_failed=0 - terraform apply -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ inputs.aws-region }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ - -var="eks_cluster_context_name=$(kubectl config current-context)" \ - -var="test_namespace=${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ - -var="python_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_FE_SA_IMG }}" \ - -var="python_remote_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_RE_SA_IMG }}" \ - || deployment_failed=$? - - if [ $deployment_failed -eq 1 ]; then - echo "Terraform deployment was unsuccessful. Will attempt to retry deployment." - fi - - # If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint - # after installing Application Signals. Attempts to connect will be made for up to 10 minutes - if [ $deployment_failed -eq 0 ]; then - echo "Installing application signals to the sample app" - . ${GITHUB_WORKSPACE}/.github/workflows/util/execute_and_retry.sh - execute_and_retry 2 \ - "${GITHUB_WORKSPACE}/enablement-script/enable-app-signals.sh \ - ${{ inputs.test-cluster-name }} \ - ${{ inputs.aws-region }} \ - ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \ - "${GITHUB_WORKSPACE}/enablement-script/clean-app-signals.sh \ - ${{ inputs.test-cluster-name }} \ - ${{ inputs.aws-region }} \ - ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} && \ - aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}" - - if [ "${{ inputs.caller-workflow-name }}" = "main-build" ]; then - echo "Patching staging adot image for main build:" - execute_and_retry 2 "kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \ - -p='[{"op": \"replace\", \"path\": \"/spec/template/spec/containers/0/args/2\", \"value\": \"--auto-instrumentation-python-image=${{ inputs.application-signals-adot-image }}:${{ inputs.application-signals-adot-image-tag }}\"}]'" - execute_and_retry 2 "kubectl delete pods --all -n amazon-cloudwatch" - execute_and_retry 2 "kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch" - fi - - execute_and_retry 2 "kubectl delete pods --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" - execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" - - echo "Attempting to connect to the main sample app endpoint" - python_app_endpoint=http://$(terraform output python_app_endpoint) - attempt_counter=0 - max_attempts=60 - until $(curl --output /dev/null --silent --head --fail $(echo "$python_app_endpoint" | tr -d '"')); do - if [ ${attempt_counter} -eq ${max_attempts} ];then - echo "Failed to connect to endpoint. Will attempt to redeploy sample app." - deployment_failed=1 - break - fi - - printf '.' - attempt_counter=$(($attempt_counter+1)) - sleep 10 - done - fi - - # If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the - # resources created from terraform and try again. - if [ $deployment_failed -eq 1 ]; then - echo "Cleaning up Application Signal" - ${GITHUB_WORKSPACE}/enablement-script/clean-app-signals.sh \ - ${{ inputs.test-cluster-name }} \ - ${{ inputs.aws-region }} \ - ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - - # Running clean-app-signal.sh removes the current cluster from the config. Update the cluster again for subsequent runs. - aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }} - - echo "Destroying terraform" - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ inputs.aws-region }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ - -var="eks_cluster_context_name=$(kubectl config current-context)" \ - -var="test_namespace=${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ - -var="python_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_FE_SA_IMG }}" \ - -var="python_remote_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_RE_SA_IMG }}" - - retry_counter=$(($retry_counter+1)) - else - # If deployment succeeded, then exit the loop - break - fi - - if [ $retry_counter -eq $max_retry ]; then - echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code" - exit 1 - fi - done - - - name: Get remote service pod name and IP - run: | - echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV - echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV - - - name: Verify pod Adot image - run: | - kubectl get pods -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} --output json | \ - jq '.items[0].status.initContainerStatuses[0].imageID' - - - name: Verify pod CWAgent image - run: | - kubectl get pods -n amazon-cloudwatch --output json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - - name: Get the sample app endpoint - run: echo "APP_ENDPOINT=$(terraform output python_app_endpoint)" >> $GITHUB_ENV - working-directory: terraform/python/eks - - # This steps increases the speed of the validation by creating the telemetry data in advance - - name: Call all test APIs - continue-on-error: true - run: | - curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call - curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call - curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }} - curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call - - # Validation for application signals telemetry data - - name: Call endpoint and validate generated EMF logs - id: log-validation - if: steps.deploy-python-app.outcome == 'success' && !cancelled() - run: ./gradlew validator:run --args='-c python/eks/log-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --app-namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} - --rollup' - - - name: Call endpoints and validate generated metrics - id: metric-validation - if: (steps.deploy-python-app.outcome == 'success' || steps.log-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c python/eks/metric-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --app-namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-name python-remote-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} - --rollup' - - - name: Call endpoints and validate generated traces - id: trace-validation - if: (steps.deploy-python-app.outcome == 'success' || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c python/eks/trace-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ inputs.aws-region }} - --account-id ${{ env.ACCOUNT_ID }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --app-namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} - --rollup' - - - name: Publish metric on test result - if: always() - run: | - if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then - aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ - --metric-name Failure \ - --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ - --value 0.0 \ - --region ${{ inputs.aws-region }} - else - aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \ - --metric-name Failure \ - --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \ - --value 1.0 \ - --region ${{ inputs.aws-region }} - fi - - # Clean up Procedures - - - name: Clean Up Application Signals - if: always() - continue-on-error: true - working-directory: enablement-script - run: | - ./clean-app-signals.sh \ - ${{ inputs.test-cluster-name }} \ - ${{ inputs.aws-region }} \ - ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - - # This step also deletes lingering resources from previous test runs - - name: Delete all sample app resources - if: always() - continue-on-error: true - timeout-minutes: 10 - run: kubectl delete namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} - - - name: Terraform destroy - if: always() - continue-on-error: true - timeout-minutes: 5 - working-directory: terraform/python/eks - run: | - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ inputs.aws-region }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ - -var="test_namespace=${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ - -var="python_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_FE_SA_IMG }}" \ - -var="python_remote_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ inputs.aws-region }}.amazonaws.com/${{ secrets.APP_SIGNALS_PYTHON_E2E_RE_SA_IMG }}" - - - name: Remove aws access service account - if: always() - continue-on-error: true - run: | - eksctl delete iamserviceaccount \ - --name service-account-${{ env.TESTING_ID }} \ - --namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-cluster-name }} \ - --region ${{ inputs.aws-region }} diff --git a/.github/workflows/appsignals-python-e2e-ec2-canary-test.yml b/.github/workflows/appsignals-python-e2e-ec2-canary-test.yml deleted file mode 100644 index f97f72183..000000000 --- a/.github/workflows/appsignals-python-e2e-ec2-canary-test.yml +++ /dev/null @@ -1,31 +0,0 @@ -## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -## SPDX-License-Identifier: Apache-2.0 - -## This workflow aims to run the Application Signals Python end-to-end tests as a canary to -## test the artifacts for Application Signals enablement. It will deploy a sample app and remote -## service on two EC2 instances, call the APIs, and validate the generated telemetry, -## including logs, metrics, and traces. -name: App Signals Enablement - Python E2E EC2 Canary Testing -on: - schedule: - - cron: '*/15 * * * *' # run the workflow every 15 minutes - workflow_dispatch: # be able to run the workflow on demand - -permissions: - id-token: write - contents: read - -jobs: - python-e2e-ec2-test: - strategy: - fail-fast: false - matrix: - aws-region: ['af-south-1','ap-east-1','ap-northeast-1','ap-northeast-2','ap-northeast-3','ap-south-1','ap-south-2','ap-southeast-1', - 'ap-southeast-2','ap-southeast-3','ap-southeast-4','ca-central-1','eu-central-1','eu-central-2','eu-north-1', - 'eu-south-1','eu-south-2','eu-west-1','eu-west-2','eu-west-3','il-central-1','me-central-1','me-south-1', 'sa-east-1', - 'us-east-1','us-east-2', 'us-west-1', 'us-west-2'] - uses: ./.github/workflows/application-signals-python-e2e-ec2-test.yml - secrets: inherit - with: - aws-region: ${{ matrix.aws-region }} - caller-workflow-name: 'appsignals-python-e2e-ec2-canary-test' \ No newline at end of file diff --git a/.github/workflows/appsignals-python-e2e-eks-canary-test.yml b/.github/workflows/appsignals-python-e2e-eks-canary-test.yml deleted file mode 100644 index 2028df78d..000000000 --- a/.github/workflows/appsignals-python-e2e-eks-canary-test.yml +++ /dev/null @@ -1,37 +0,0 @@ -## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -## SPDX-License-Identifier: Apache-2.0 - -## This workflow aims to run the Application Signals Python end-to-end tests as a canary to -## test the artifacts for Application Signals enablement. It will deploy a sample app and remote -## service onto an EKS cluster, call the APIs, and validate the generated telemetry, -## including logs, metrics, and traces. -name: App Signals Enablement - Python E2E EKS Canary Testing -on: - schedule: - - cron: '*/15 * * * *' # run the workflow every 15 minutes - workflow_dispatch: # be able to run the workflow on demand - -concurrency: - group: ${{ github.workflow }} - cancel-in-progress: false - -permissions: - id-token: write - contents: read - - -jobs: - python-e2e-eks-test: - strategy: - fail-fast: false - matrix: - aws-region: ['af-south-1','ap-east-1','ap-northeast-1','ap-northeast-2','ap-northeast-3','ap-south-1','ap-south-2','ap-southeast-1', - 'ap-southeast-2','ap-southeast-3','ap-southeast-4','ca-central-1','eu-central-1','eu-central-2','eu-north-1', - 'eu-south-1','eu-south-2','eu-west-1','eu-west-2','eu-west-3','il-central-1','me-central-1','me-south-1', 'sa-east-1', - 'us-east-1','us-east-2', 'us-west-1', 'us-west-2'] - uses: ./.github/workflows/application-signals-python-e2e-eks-test.yml - secrets: inherit - with: - aws-region: ${{ matrix.aws-region }} - test-cluster-name: 'e2e-python-second-test' - caller-workflow-name: 'appsignals-python-e2e-eks-canary-test' diff --git a/.github/workflows/main_build.yml b/.github/workflows/main_build.yml index e574f8bf6..55282f911 100644 --- a/.github/workflows/main_build.yml +++ b/.github/workflows/main_build.yml @@ -5,6 +5,7 @@ on: branches: - main - "release/v*" + - e2e-parallel env: AWS_DEFAULT_REGION: us-east-1 STAGING_ECR_REGISTRY: 637423224110.dkr.ecr.us-east-1.amazonaws.com @@ -89,7 +90,7 @@ jobs: # Application Signals specific e2e eks tests application-signals-python-e2e-eks-test: needs: [build] - uses: ./.github/workflows/application-signals-python-e2e-eks-test.yml + uses: aws-observability/aws-application-signals-test-framework/.github/workflows/application-signals-python-e2e-eks-test.yml@main secrets: inherit with: aws-region: ${{ needs.build.outputs.aws_default_region }} @@ -101,7 +102,7 @@ jobs: # Application Signals specific e2e tests for ec2 application-signals-python-e2e-ec2-test: needs: [ build ] - uses: ./.github/workflows/application-signals-python-e2e-ec2-test.yml + uses: aws-observability/aws-application-signals-test-framework/.github/workflows/application-signals-python-e2e-ec2-test.yml@main secrets: inherit with: aws-region: ${{ needs.build.outputs.aws_default_region }}