Skip to content

Run Integration Tests #1132

Run Integration Tests

Run Integration Tests #1132

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT
name: Run Integration Tests
env:
PRIVATE_KEY: ${{ secrets.AWS_PRIVATE_KEY }}
TERRAFORM_AWS_ASSUME_ROLE: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
TERRAFORM_AWS_ASSUME_ROLE_DURATION: 14400 # 4 hours
S3_INTEGRATION_BUCKET: ${{ vars.S3_INTEGRATION_BUCKET }}
KEY_NAME: ${{ secrets.KEY_NAME }}
CF_IAM_ROLE: ${{ secrets.CF_IAM_ROLE }}
CF_KEY_NAME: ${{ secrets.CF_KEY_NAME }}
ECR_INTEGRATION_TEST_REPO: "cwagent-integration-test"
CWA_GITHUB_TEST_REPO_NAME: "aws/amazon-cloudwatch-agent-test"
CWA_GITHUB_TEST_REPO_URL: "https://github.com/aws/amazon-cloudwatch-agent-test.git"
CWA_GITHUB_TEST_REPO_BRANCH: "gpuE2eTest"
TERRAFORM_AWS_ASSUME_ROLE_ITAR: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_ITAR }}
S3_INTEGRATION_BUCKET_ITAR: ${{ vars.S3_INTEGRATION_BUCKET_ITAR }}
TERRAFORM_AWS_ASSUME_ROLE_CN: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_CN }}
S3_INTEGRATION_BUCKET_CN: ${{ vars.S3_INTEGRATION_BUCKET_CN }}
on:
push:
branches:
- main*
paths-ignore:
- '**/*.md'
- 'NOTICE'
- 'RELEASE_NOTES'
- 'THIRD-PARTY'
- 'LICENSE'
- '.github/**'
- '!.github/workflows/integration-test.yml'
workflow_dispatch:
inputs:
plugins:
description: 'Comma delimited list of plugins to test. Default is empty, and tests everything'
required: false
default: ''
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
jobs:
BuildAndUpload:
uses: ./.github/workflows/test-build.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
BucketKey: "integration-test/binary/${{ github.sha }}"
PackageBucketKey: "integration-test/packaging/${{ github.sha }}"
TerraformAWSAssumeRole: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
Bucket: ${{ vars.S3_INTEGRATION_BUCKET }}
BuildAndUploadPackages:
uses: ./.github/workflows/test-build-packages.yml
needs: [BuildAndUpload]
secrets: inherit
permissions:
id-token: write
contents: read
with:
BucketKey: "integration-test/binary/${{ github.sha }}"
PackageBucketKey: "integration-test/packaging/${{ github.sha }}"
TerraformAWSAssumeRole: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
Bucket: ${{ vars.S3_INTEGRATION_BUCKET }}
BuildDocker:
needs: [BuildAndUpload]
uses: ./.github/workflows/test-build-docker.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
ContainerRepositoryNameAndTag: "cwagent-integration-test:${{ github.sha }}"
BucketKey: "integration-test/binary/${{ github.sha }}"
PackageBucketKey: "integration-test/packaging/${{ github.sha }}"
BuildAndUploadITAR:
uses: ./.github/workflows/test-build.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
BucketKey: "integration-test/binary/${{ github.sha }}"
PackageBucketKey: "integration-test/packaging/${{ github.sha }}"
Region: "us-gov-east-1"
TerraformAWSAssumeRole: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_ITAR }}
Bucket: ${{ vars.S3_INTEGRATION_BUCKET_ITAR }}
BuildAndUploadCN:
uses: ./.github/workflows/test-build.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
BucketKey: "integration-test/binary/${{ github.sha }}"
PackageBucketKey: "integration-test/packaging/${{ github.sha }}"
Region: "cn-north-1"
TerraformAWSAssumeRole: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_CN }}
Bucket: ${{ vars.S3_INTEGRATION_BUCKET_CN }}
GenerateTestMatrix:
name: 'GenerateTestMatrix'
runs-on: ubuntu-latest
outputs:
ec2_gpu_matrix: ${{ steps.set-matrix.outputs.ec2_gpu_matrix }}
ec2_linux_matrix: ${{ steps.set-matrix.outputs.ec2_linux_matrix }}
ec2_windows_matrix: ${{ steps.set-matrix.outputs.ec2_windows_matrix }}
ec2_mac_matrix: ${{ steps.set-matrix.outputs.ec2_mac_matrix }}
ec2_performance_matrix: ${{steps.set-matrix.outputs.ec2_performance_matrix}}
ec2_windows_performance_matrix: ${{steps.set-matrix.outputs.ec2_windows_performance_matrix}}
ec2_stress_matrix: ${{steps.set-matrix.outputs.ec2_stress_matrix}}
ec2_windows_stress_matrix: ${{steps.set-matrix.outputs.ec2_windows_stress_matrix}}
ecs_ec2_launch_daemon_matrix: ${{ steps.set-matrix.outputs.ecs_ec2_launch_daemon_matrix }}
ecs_fargate_matrix: ${{ steps.set-matrix.outputs.ecs_fargate_matrix }}
eks_daemon_matrix: ${{ steps.set-matrix.outputs.eks_daemon_matrix }}
eks_deployment_matrix: ${{ steps.set-matrix.outputs.eks_deployment_matrix }}
ec2_linux_itar_matrix: ${{ steps.set-matrix.outputs.ec2_linux_itar_matrix }}
ec2_linux_china_matrix: ${{ steps.set-matrix.outputs.ec2_linux_china_matrix }}
eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}
steps:
- uses: actions/checkout@v3
with:
repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}}
ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}}
- name: Set up Go 1.x
uses: actions/setup-go@v4
with:
go-version: ~1.22.2
- name: Generate matrix
id: set-matrix
run: |
go run --tags=generator generator/test_case_generator.go
echo "::set-output name=ec2_gpu_matrix::$(echo $(cat generator/resources/ec2_gpu_complete_test_matrix.json))"
echo "::set-output name=eks_addon_matrix::$(echo $(cat generator/resources/eks_addon_complete_test_matrix.json))"
echo "::set-output name=ec2_linux_matrix::$(echo $(cat generator/resources/ec2_linux_complete_test_matrix.json))"
echo "::set-output name=ec2_windows_matrix::$(echo $(cat generator/resources/ec2_windows_complete_test_matrix.json))"
echo "::set-output name=ec2_mac_matrix::$(echo $(cat generator/resources/ec2_mac_complete_test_matrix.json))"
echo "::set-output name=ec2_performance_matrix::$(echo $(cat generator/resources/ec2_performance_complete_test_matrix.json))"
echo "::set-output name=ec2_windows_performance_matrix::$(echo $(cat generator/resources/ec2_windows_performance_complete_test_matrix.json))"
echo "::set-output name=ec2_stress_matrix::$(echo $(cat generator/resources/ec2_stress_complete_test_matrix.json))"
echo "::set-output name=ec2_windows_stress_matrix::$(echo $(cat generator/resources/ec2_windows_stress_complete_test_matrix.json))"
echo "::set-output name=ecs_ec2_launch_daemon_matrix::$(echo $(cat generator/resources/ecs_ec2_daemon_complete_test_matrix.json))"
echo "::set-output name=ecs_fargate_matrix::$(echo $(cat generator/resources/ecs_fargate_complete_test_matrix.json))"
echo "::set-output name=eks_daemon_matrix::$(echo $(cat generator/resources/eks_daemon_complete_test_matrix.json))"
echo "::set-output name=eks_deployment_matrix::$(echo $(cat generator/resources/eks_deployment_complete_test_matrix.json))"
echo "::set-output name=ec2_linux_itar_matrix::$(echo $(cat generator/resources/ec2_linux_itar_complete_test_matrix.json))"
echo "::set-output name=ec2_linux_china_matrix::$(echo $(cat generator/resources/ec2_linux_china_complete_test_matrix.json))"
- name: Echo test plan matrix
run: |
echo "ec2_gpu_matrix: ${{ steps.set-matrix.outputs.ec2_gpu_matrix }}"
echo "eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}"
echo "ec2_linux_matrix: ${{ steps.set-matrix.outputs.ec2_linux_matrix }}"
echo "ec2_windows_matrix: ${{ steps.set-matrix.outputs.ec2_windows_matrix }}"
echo "ec2_mac_matrix: ${{ steps.set-matrix.outputs.ec2_mac_matrix }}"
echo "ec2_performance_matrix: ${{ steps.set-matrix.outputs.ec2_performance_matrix}}"
echo "ec2_windows_performance_matrix: ${{ steps.set-matrix.outputs.ec2_windows_performance_matrix}}"
echo "ec2_stress_matrix: ${{ steps.set-matrix.outputs.ec2_stress_matrix}}"
echo "ec2_windows_stress_matrix: ${{ steps.set-matrix.outputs.ec2_windows_stress_matrix}}"
echo "ecs_ec2_launch_daemon_matrix: ${{ steps.set-matrix.outputs.ecs_ec2_launch_daemon_matrix }}"
echo "ecs_fargate_matrix: ${{ steps.set-matrix.outputs.ecs_fargate_matrix }}"
echo "eks_daemon_matrix: ${{ steps.set-matrix.outputs.eks_daemon_matrix }}"
echo "eks_deployment_matrix: ${{ steps.set-matrix.outputs.eks_deployment_matrix }}"
echo "ec2_linux_itar_matrix: ${{ steps.set-matrix.outputs.ec2_linux_itar_matrix }}"
echo "ec2_linux_china_matrix: ${{ steps.set-matrix.outputs.ec2_linux_china_matrix }}"
CloudformationTest:
needs: [BuildAndUpload, GenerateTestMatrix]
name: 'CFTest'
runs-on: ubuntu-latest
strategy:
fail-fast: false
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}}
ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}}
path: test
- name: Set up Go 1.x
uses: actions/setup-go@v2
with:
go-version: ~1.22.2
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
- name: Cache if success
id: cf-integration-test
uses: actions/cache@v2
with:
path: go.mod
key: "cf-integration-${{ github.sha }}-test"
- name: Test cf
if: steps.ec2-linux-integration-test.outputs.cache-hit != 'true'
run: |
cd test/test/cloudformation
go test -timeout 1h -package_path=s3://${S3_INTEGRATION_BUCKET}/integration-test/binary/${{ github.sha }}/linux/amd64/amazon-cloudwatch-agent.rpm -iam_role=${CF_IAM_ROLE} -key_name=${CF_KEY_NAME} -metric_name=mem_used_percent
StartLocalStack:
name: 'StartLocalStack'
needs: [OutputEnvVariables]
uses: ./.github/workflows/start-localstack.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
region: us-west-2
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
terraform_assume_role: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
test_repo_url: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_URL }}
github_sha: ${{github.sha}}
s3_integration_bucket: ${{ vars.S3_INTEGRATION_BUCKET }}
StartLocalStackITAR:
name: 'StartLocalStackITAR'
needs: [OutputEnvVariables]
uses: ./.github/workflows/start-localstack.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
region: us-gov-east-1
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
terraform_assume_role: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_ITAR }}
test_repo_url: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_URL }}
github_sha: ${{github.sha}}
s3_integration_bucket: ${{ vars.S3_INTEGRATION_BUCKET_ITAR }}
StartLocalStackCN:
name: 'StartLocalStackCN'
needs: [ OutputEnvVariables ]
uses: ./.github/workflows/start-localstack.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
region: cn-north-1
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
terraform_assume_role: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_CN }}
test_repo_url: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_URL }}
github_sha: ${{github.sha}}
s3_integration_bucket: ${{ vars.S3_INTEGRATION_BUCKET_CN }}
EC2NvidiaGPUIntegrationTest:
needs: [ BuildAndUpload, StartLocalStack, GenerateTestMatrix ]
name: 'EC2NVIDIAGPUIntegrationTest'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.ec2_gpu_matrix) }}
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}}
ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
- name: Cache if success
id: ec2-linux-integration-test
uses: actions/cache@v3
with:
path: go.mod
key: ec2-nvidia-integration-test-${{ github.sha }}-${{ matrix.arrays.os }}-${{ matrix.arrays.arc }}-${{ matrix.arrays.test_dir }}
- name: Echo Test Info
run: echo run on ec2 instance os ${{ matrix.arrays.os }} arc ${{ matrix.arrays.arc }} test dir ${{ matrix.arrays.test_dir }}
- name: Verify Terraform version
run: terraform --version
# nick-fields/retry@v2 starts at base dir
- name: Terraform apply
if: ${{ matrix.arrays.family == 'linux' && steps.ec2-nvidia-integration-test.outputs.cache-hit != 'true' }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 30
retry_wait_seconds: 5
command: |
if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then
cd "${{ matrix.arrays.terraform_dir }}"
else
cd terraform/ec2/linux
fi
terraform init
if terraform apply --auto-approve \
-var="ssh_key_value=${PRIVATE_KEY}" -var="github_test_repo=${{env.CWA_GITHUB_TEST_REPO_URL}}" \
-var="test_name=${{ matrix.arrays.os }}" \
-var="cwa_github_sha=${GITHUB_SHA}" -var="install_agent=${{ matrix.arrays.installAgentCommand }}" \
-var="github_test_repo_branch=${{env.CWA_GITHUB_TEST_REPO_BRANCH}}" \
-var="ec2_instance_type=${{ matrix.arrays.instanceType }}" \
-var="user=${{ matrix.arrays.username }}" \
-var="ami=${{ matrix.arrays.ami }}" \
-var="ca_cert_path=${{ matrix.arrays.caCertPath }}" \
-var="arc=${{ matrix.arrays.arc }}" \
-var="binary_name=${{ matrix.arrays.binaryName }}" \
-var="local_stack_host_name=${{ needs.StartLocalStack.outputs.local_stack_host_name }}" \
-var="s3_bucket=${S3_INTEGRATION_BUCKET}" \
-var="ssh_key_name=${KEY_NAME}" \
-var="test_dir=${{ matrix.arrays.test_dir }}" ; then terraform destroy -auto-approve
else
terraform destroy -auto-approve && exit 1
fi
- name: Terraform apply
if: ${{ matrix.arrays.family == 'window' && steps.ec2-nvidia-integration-test.outputs.cache-hit != 'true' }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 30
retry_wait_seconds: 5
command: |
if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then
cd "${{ matrix.arrays.terraform_dir }}"
else
cd terraform/ec2/win
fi
terraform init
if terraform apply --auto-approve \
-var="ssh_key_value=${PRIVATE_KEY}" -var="ssh_key_name=${KEY_NAME}" \
-var="github_repo=${{env.CWA_GITHUB_TEST_REPO_URL}}" \
-var="cwa_github_sha=${GITHUB_SHA}" -var="ami=${{ matrix.arrays.ami }}" \
-var="test_dir=${{ matrix.arrays.test_dir }}" \
-var="ec2_instance_type=${{ matrix.arrays.instanceType }}" \
-var="github_test_repo=${{env.CWA_GITHUB_TEST_REPO_URL}}" \
-var="github_test_repo_branch=${{env.CWA_GITHUB_TEST_REPO_BRANCH}}" \
-var="s3_bucket=${S3_INTEGRATION_BUCKET}" ; then terraform destroy -auto-approve
else
terraform destroy -auto-approve && exit 1
fi
#This is here just in case workflow cancel
- name: Terraform destroy
if: ${{ cancelled() || failure() }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 8
retry_wait_seconds: 5
command: |
if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then
cd "${{ matrix.arrays.terraform_dir }}"
elif if "${{ matrix.arrays.os }}" == window; then
cd terraform/ec2/win
else
cd terraform/ec2/linux
fi
terraform destroy --auto-approve
OutputEnvVariables:
name: 'OutputEnvVariables'
runs-on: ubuntu-latest
outputs:
CWA_GITHUB_TEST_REPO_NAME: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_NAME }}
CWA_GITHUB_TEST_REPO_URL: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_URL }}
CWA_GITHUB_TEST_REPO_BRANCH: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
steps:
- uses: actions/checkout@v3
with:
repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}}
ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}}
- name: Set up Go 1.x
uses: actions/setup-go@v4
with:
go-version: ~1.22.2
- name: SetOutputs
id: set-outputs
run: |
echo "::set-output name=CWA_GITHUB_TEST_REPO_NAME::${{ env.CWA_GITHUB_TEST_REPO_NAME }}"
echo "::set-output name=CWA_GITHUB_TEST_REPO_URL::${{ env.CWA_GITHUB_TEST_REPO_URL }}"
echo "::set-output name=CWA_GITHUB_TEST_REPO_BRANCH::${{ env.CWA_GITHUB_TEST_REPO_BRANCH }}"
- name: Echo test variables
run: |
echo "CWA_GITHUB_TEST_REPO_NAME: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_NAME }}"
echo "CWA_GITHUB_TEST_REPO_URL: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_URL }}"
echo "CWA_GITHUB_TEST_REPO_BRANCH: ${{ steps.set-outputs.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}"
EC2LinuxIntegrationTest:
needs: [ BuildAndUpload, StartLocalStack, GenerateTestMatrix, OutputEnvVariables ]
name: 'EC2Linux'
uses: ./.github/workflows/ec2-integration-test.yml
with:
github_sha: ${{github.sha}}
test_dir: terraform/ec2/linux
job_id: ec2-linux-integration-test
test_props: ${{needs.GenerateTestMatrix.outputs.ec2_linux_matrix}}
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_url: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_URL }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
localstack_host: ${{needs.StartLocalStack.outputs.local_stack_host_name}}
region: us-west-2
terraform_assume_role: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
s3_integration_bucket: ${{ vars.S3_INTEGRATION_BUCKET }}
secrets: inherit
LinuxOnPremIntegrationTest:
needs: [BuildAndUpload, StartLocalStack, GenerateTestMatrix, OutputEnvVariables]
name: 'OnpremLinux'
uses: ./.github/workflows/ec2-integration-test.yml
with:
github_sha: ${{github.sha}}
test_dir: terraform/ec2/linux_onprem
job_id: linux-onprem-integration-test
test_props: ${{needs.GenerateTestMatrix.outputs.ec2_linux_onprem_matrix}}
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_url: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_URL }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
localstack_host: ${{needs.StartLocalStack.outputs.local_stack_host_name}}
region: us-west-2
secrets: inherit
StopLocalStack:
name: 'StopLocalStack'
if: ${{ always() }}
needs: [ StartLocalStack, EC2LinuxIntegrationTest, LinuxOnPremIntegrationTest, OutputEnvVariables ]
uses: ./.github/workflows/stop-localstack.yml
secrets: inherit
permissions:
id-token: write
contents: read
with:
region: us-west-2
test_repo_name: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_NAME }}
test_repo_branch: ${{ needs.OutputEnvVariables.outputs.CWA_GITHUB_TEST_REPO_BRANCH }}
terraform_assume_role: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
github_sha: ${{github.sha}}
s3_integration_bucket: ${{ vars.S3_INTEGRATION_BUCKET }}
GPUE2E:
name: "GPUE2E"
needs: [ BuildAndUpload, BuildAndUploadPackages, GenerateTestMatrix ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.eks_addon_matrix) }}
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}}
ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
- name: Verify Terraform version
run: terraform --version
- name: Terraform apply
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 60
retry_wait_seconds: 5
command: |
if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then
cd "${{ matrix.arrays.terraform_dir }}"
else
cd terraform/eks/addon/gpu
fi
terraform init
if terraform apply --auto-approve \
-var="aws_ecr_private_registry=${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}" \
-var="ecr_integration_test_repo=${{ env.ECR_INTEGRATION_TEST_REPO }}" \
-var="github_sha=${{ github.sha }}" \
-var="test_dir=${{ matrix.arrays.test_dir }}"\
-var="cwagent_image_repo=${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_INTEGRATION_TEST_REPO }}" \
-var="cwagent_image_tag=${{ github.sha }}" \
-var="beta=true" \
-var="addon_name=amazon-cloudwatch-observability" \
-var="addon_version=v1.6.0-eksbuild.1" \
-var="k8s_version=1.29" ; then
echo "Terraform apply successful."
# Capture the output
echo "Getting EKS cluster name"
EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name)
echo "Cluster name is ${EKS_CLUSTER_NAME}"
kubectl get pods -A
kubectl apply -f ./gpuBurner.yaml
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]'
if go test ../../../../test/gpu -eksClusterName ${EKS_CLUSTER_NAME} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -eksGpuType=nvidia; then
echo "Tests passed"
else
echo "Tests failed"
exit 1
fi
else
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1
fi
- name: Terraform destroy
if: ${{ cancelled() || failure() }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 8
retry_wait_seconds: 5
command: cd terraform/eks/addon/gpu && terraform destroy --auto-approve