-
Notifications
You must be signed in to change notification settings - Fork 0
101 lines (88 loc) · 3.61 KB
/
deploy-to-aks.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
name: Create AKS cluster, deploy CKF and run bundle test
on:
workflow_dispatch:
inputs:
bundle_version:
description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"'
default: '"1.8"'
required: true
# schedule:
# - cron: "23 0 * * 2"
jobs:
deploy-ckf-to-aks:
runs-on: ubuntu-22.04
strategy:
matrix:
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.7","1.8"')) }}
fail-fast: false
env:
AZURE_CORE_OUTPUT: none
K8S_VERSION: ${{ fromJSON('{"1.7":"1.24","1.8":"1.26"}')[matrix.bundle_version] }}
JUJU_VERSION: ${{ fromJSON('{"1.7":"2.9","1.8":"3.1"}')[ matrix.bundle_version ] }}
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install CLI tools tox charmcraft juju
env:
JUJU_VERSION:
run: |
python -m pip install --upgrade pip
pip install tox
sudo snap install juju --classic --channel=${{ env.JUJU_VERSION }}/stable
sudo snap install charmcraft --classic
juju version
- uses: azure/login@v1
with:
creds: ${{ secrets.AZURE_CREDENTIALS }}
- name: Create resource group and cluster
env:
K8S_VERSION: if [[ "$BUNDLE_VERSION" == "1.7" ]]
run: |
# We need to remove the dot from version
# due to cluster naming restrictions
version=${{ matrix.bundle_version }}
NAME="kf${version//.}"
echo "NAME=${NAME}" >> $GITHUB_ENV
az group create --name ${NAME}ResourceGroup --location westeurope
# Standard_D8s_v3
az aks create \
--resource-group ${NAME}ResourceGroup \
--name ${NAME}AKSCluster \
--kubernetes-version ${{ env.K8S_VERSION }} \
--node-count 2 \
--node-vm-size Standard_DS2_v2 \
--node-osdisk-size 100 \
--node-osdisk-type Managed \
--os-sku Ubuntu \
--no-ssh-key
- name: Add AKS cloud to juju and bootstrap controller
run: |
az aks get-credentials --resource-group ${NAME}ResourceGroup --name ${NAME}AKSCluster --admin
juju add-k8s aks --client
juju bootstrap aks aks-controller
juju add-model kubeflow
- name: Test bundle deployment
run: |
tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s
# On failure, capture debugging resources
- name: Get juju status
run: juju status
if: failure()
- name: Get juju debug logs
run: juju debug-log --replay --no-tail
if: failure()
- name: Get all kubernetes resources
run: kubectl get all -A
if: failure()
- name: Get logs from pods with status = Pending
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Get logs from pods with status = Failed
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Get logs from pods with status = CrashLoopBackOff
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Delete AKS resources
if: always()
run: az group delete --name ${NAME}ResourceGroup --yes