diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 7c964ca13e..46d9364409 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -13,7 +13,7 @@ env: ECR_INTEGRATION_TEST_REPO: "cwagent-integration-test" CWA_GITHUB_TEST_REPO_NAME: "aws/amazon-cloudwatch-agent-test" CWA_GITHUB_TEST_REPO_URL: "https://github.com/aws/amazon-cloudwatch-agent-test.git" - CWA_GITHUB_TEST_REPO_BRANCH: "gpuE2eTest" + CWA_GITHUB_TEST_REPO_BRANCH: "main" TERRAFORM_AWS_ASSUME_ROLE_ITAR: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_ITAR }} S3_INTEGRATION_BUCKET_ITAR: ${{ vars.S3_INTEGRATION_BUCKET_ITAR }} TERRAFORM_AWS_ASSUME_ROLE_CN: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE_CN }} @@ -127,6 +127,7 @@ jobs: ec2_linux_china_matrix: ${{ steps.set-matrix.outputs.ec2_linux_china_matrix }} eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }} + steps: - uses: actions/checkout@v3 with: @@ -175,7 +176,7 @@ jobs: echo "eks_deployment_matrix: ${{ steps.set-matrix.outputs.eks_deployment_matrix }}" echo "ec2_linux_itar_matrix: ${{ steps.set-matrix.outputs.ec2_linux_itar_matrix }}" echo "ec2_linux_china_matrix: ${{ steps.set-matrix.outputs.ec2_linux_china_matrix }}" - + CloudformationTest: needs: [BuildAndUpload, GenerateTestMatrix] name: 'CFTest' @@ -1358,81 +1359,7 @@ jobs: id-token: write contents: read secrets: inherit - 

GPUE2E: - name: "GPUE2E" - needs: [ BuildAndUpload, BuildAndUploadPackages, GenerateTestMatrix ] - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.eks_addon_matrix) }} - permissions: - id-token: write - contents: read - steps: - - uses: actions/checkout@v3 - with: - repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}} - ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}} - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }} - aws-region: us-west-2 - role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }} - - - - name: Verify Terraform version - run: terraform --version - - - name: Terraform apply - uses: nick-fields/retry@v2 - with: - max_attempts: 3 - timeout_minutes: 60 - retry_wait_seconds: 5 - command: | - if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then - cd "${{ matrix.arrays.terraform_dir }}" - else - cd terraform/eks/addon/gpu - fi - - terraform init - if terraform apply --auto-approve \ - -var="beta=true" \ - -var="addon_name=amazon-cloudwatch-observability" \ - -var="addon_version=v1.6.0-eksbuild.1" \ - -var="k8s_version=1.29" ; then - echo "Terraform apply successful." - - # Capture the output - echo "Getting EKS cluster name" - EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) - echo "Cluster name is ${EKS_CLUSTER_NAME}" - kubectl get pods -A - kubectl apply -f ./gpuBurner.yaml - kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml - kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]' - if go test ${{ matrix.arrays.test_dir }} -eksClusterName ${EKS_CLUSTER_NAME} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -eksGpuType=nvidia; then - echo "Tests passed" - else - echo "Tests failed" - exit 1 - fi - else - terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1 - fi - - name: Terraform destroy - if: ${{ cancelled() || failure() }} - uses: nick-fields/retry@v2 - with: - max_attempts: 3 - timeout_minutes: 8 - retry_wait_seconds: 5 - command: cd terraform/eks/addon/gpu && terraform destroy --auto-approve GPUE2E: name: "GPUE2E" @@ -1474,7 +1401,7 @@ jobs: else cd terraform/eks/addon/gpu fi - + terraform init if terraform apply --auto-approve \ -var="beta=true" \ @@ -1482,7 +1409,7 @@ jobs: -var="addon_version=v1.6.0-eksbuild.1" \ -var="k8s_version=1.29" ; then echo "Terraform apply successful." - + # Capture the output echo "Getting EKS cluster name" EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) @@ -1508,6 +1435,4 @@ jobs: max_attempts: 3 timeout_minutes: 8 retry_wait_seconds: 5 - command: cd terraform/eks/addon/gpu && terraform destroy --auto-approve - - + command: cd terraform/eks/addon/gpu && terraform destroy --auto-approve \ No newline at end of file