forked from aws/amazon-cloudwatch-agent-operator
-
Notifications
You must be signed in to change notification settings - Fork 0
130 lines (113 loc) · 4.73 KB
/
gpu-e2e-test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
name: Run GPU E2E Test
env:
TERRAFORM_AWS_ASSUME_ROLE: ${{ secrets.TERRAFORM_AWS_ASSUME_ROLE }}
on:
workflow_dispatch:
inputs:
addon_name:
required: true
type: string
default: "amazon-cloudwatch-observability"
description: "GPU E2E Test"
addon_version:
required: true
type: string
default: "v1.1.0-eksbuild.1"
description: "EKS addon version"
run_in_beta:
required: true
type: boolean
default: true
description: "Run in EKS Addon Beta environment"
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
GenerateTestMatrix:
name: 'GenerateTestMatrix'
runs-on: ubuntu-latest
outputs:
eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2
- name: Generate matrix
id: set-matrix
run: |
echo "::set-output name=eks_addon_matrix::$(echo $(cat integration-tests/generator/k8s_versions_matrix.json))"
- name: Echo test plan matrix
run: |
echo "eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}"
echo "Addon name ${{ github.event.inputs.addon_name }}, addon version ${{ github.event.inputs.addon_version }} "
GPUE2ETest:
needs: [GenerateTestMatrix]
name: GPUE2ETest
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.eks_addon_matrix) }}
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2
- name: Confirm EKS Version Support
run: |
if [[
$(go list -m k8s.io/client-go | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/apimachinery | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/component-base | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/kubectl | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
]]; then
echo k8s.io/client-go $(go list -m k8s.io/client-go) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/apimachinery $(go list -m k8s.io/apimachinery) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/component-base $(go list -m k8s.io/component-base) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/kubectl $(go list -m k8s.io/kubectl) is less than ${{ matrix.arrays.k8sVersion }}, fail test
echo "please run go get -u && go mod tidy"
exit 1;
fi
- name: Verify Terraform version
run: terraform --version
- name: Terraform apply
uses: nick-fields/retry@v2
with:
max_attempts: 1
timeout_minutes: 60 # EKS takes about 20 minutes to spin up a cluster and service on the cluster
retry_wait_seconds: 5
command: |
cd integration-tests/terraform/gpu
terraform init
if terraform apply -var="beta=${{ github.event.inputs.run_in_beta }}" -var="addon_name=${{ github.event.inputs.addon_name }}" -var="addon_version=${{ github.event.inputs.addon_version }}" -var="k8s_version=${{ matrix.arrays.k8sVersion }}" --auto-approve; then
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve
else
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1
fi
- name: Terraform destroy
if: ${{ cancelled() || failure() }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 8
retry_wait_seconds: 5
command: |
cd integration-tests/terraform/gpu
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" --auto-approve