cluster-autoscaler: Upgrade to k8s 1.29 - take two #3809
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: e2e-test | |
on: | |
pull_request: | |
push: | |
branches: | |
- main | |
workflow_dispatch: | |
inputs: | |
kernel-image: | |
type: string | |
description: 'The kernel image to use for the VMs. If not specified, a kernel will be built from source' | |
required: false | |
cluster: | |
type: choice | |
description: 'The cluster to run the tests on' | |
options: | |
- k3d | |
- kind | |
default: k3d | |
workflow_call: | |
inputs: | |
tag: | |
type: string | |
description: 'Tag to use for images, skipping building' | |
required: false | |
push-yamls: | |
type: boolean | |
description: 'If true, pushes a tarball containing the rendered yaml manifests as an artifact' | |
required: false | |
env: | |
IMG_E2E_TEST: vm-postgres:15-bullseye | |
defaults: | |
run: | |
shell: bash -euo pipefail {0} | |
jobs: | |
get-tag: | |
outputs: | |
tag: ${{ inputs.tag || steps.get-tag.outputs.tag }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: get tag | |
if: ${{ inputs.tag == '' }} | |
id: get-tag | |
env: | |
SHA: ${{ github.event.pull_request.head.sha || github.sha }} | |
run: | | |
test -n "$SHA" | |
sha="${SHA::7}" | |
echo "tag=$sha.$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT | |
build-images: | |
needs: get-tag | |
uses: ./.github/workflows/build-images.yaml | |
with: | |
skip: ${{ inputs.tag != '' }} | |
tag: ${{ inputs.tag || needs.get-tag.outputs.tag }} | |
kernel-image: ${{ inputs.kernel-image }} | |
# note: setting to preserve runner pods will mean that if !skip, they'll be built with those | |
# settings and used properly in the tests. But if skip (because inputs.tag != ''), then this | |
# setting will have no effect and the release images will be normal. | |
controller-preserve-runner-pods: true | |
secrets: inherit | |
build-test-vm: | |
needs: get-tag | |
uses: ./.github/workflows/build-test-vm.yaml | |
with: | |
skip: ${{ inputs.tag != '' }} | |
tag: ${{ inputs.tag || needs.get-tag.outputs.tag }} | |
secrets: inherit | |
e2e-tests: | |
needs: [ build-images, build-test-vm ] | |
strategy: | |
fail-fast: false | |
matrix: | |
cluster: | |
- ${{ inputs.cluster || 'k3d' }} | |
runs-on: [ self-hosted, gen3, large ] | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 # fetch all, so that we also include tags | |
- uses: actions/setup-go@v5 | |
with: | |
go-version-file: 'go.mod' | |
# Disable cache on self-hosted runners to avoid /usr/bin/tar errors, see https://github.com/actions/setup-go/issues/403 | |
cache: false | |
# Sometimes setup-go gets stuck. Without this, it'll keep going until the job gets killed | |
timeout-minutes: 10 | |
- name: Install dependencies | |
run: | | |
sudo apt install -y python3-venv | |
make e2e-tools | |
echo $(pwd)/bin >> $GITHUB_PATH | |
- name: Check dependencies | |
run: | | |
kubectl version --client --output=yaml | |
k3d version | |
kind version | |
kuttl version | |
docker version | |
- run: make render-release | |
env: | |
IMG_CONTROLLER: ${{ needs.build-images.outputs.controller }} | |
IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs.vxlan-controller }} | |
IMG_RUNNER: ${{ needs.build-images.outputs.runner }} | |
IMG_SCHEDULER: ${{ needs.build-images.outputs.scheduler }} | |
IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs.autoscaler-agent }} | |
- name: upload manifests | |
# nb: use format(..) to catch both inputs.push-yamls = true AND inputs.push-yamls = 'true'. | |
if: ${{ format('{0}', inputs.push-yamls) == 'true' }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: rendered_manifests | |
# nb: prefix before wildcard is removed from the uploaded files, so the artifact should | |
# contain e.g. | |
# - autoscale-scheduler.yaml | |
# - autoscaler-agent.yaml | |
# ... | |
# ref https://github.com/actions/upload-artifact#upload-using-multiple-paths-and-exclusions | |
path: rendered_manifests/* | |
if-no-files-found: error | |
retention-days: 2 # minimum is 1 day; 0 is default. These are only used temporarily. | |
- name: set custom docker config directory | |
uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193 | |
- uses: docker/login-action@v3 | |
with: | |
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} | |
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} | |
# https://docs.k3s.io/installation/private-registry#registries-configuration-file | |
# https://github.com/neondatabase/autoscaling/issues/975 | |
- name: set k3d registries.yaml | |
# TODO: Implement an equivalent for kind? | |
# Relevant docs seem to be here: https://kind.sigs.k8s.io/docs/user/private-registries | |
if: ${{ matrix.cluster == 'k3d' }} | |
env: | |
DOCKERHUB_USERNAME: ${{ secrets.NEON_DOCKERHUB_USERNAME }} | |
DOCKERHUB_PASSWORD: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} | |
run: | | |
{ | |
echo "configs:" | |
echo " registry-1.docker.io:" | |
echo " auth:" | |
echo " username: $DOCKERHUB_USERNAME" | |
echo " password: $DOCKERHUB_PASSWORD" | |
} >> $(pwd)/k3d/registries.yaml | |
- run: make ${{ matrix.cluster }}-setup | |
env: | |
USE_REGISTRIES_FILE: true | |
- name: deploy components | |
timeout-minutes: 3 | |
run: | | |
rendered () { echo "rendered_manifests/$1"; } | |
kubectl apply -f $(rendered multus.yaml) | |
kubectl -n kube-system rollout status daemonset kube-multus-ds | |
kubectl apply -f $(rendered whereabouts.yaml) | |
kubectl -n kube-system rollout status daemonset whereabouts | |
kubectl apply -f $(rendered neonvm-runner-image-loader.yaml) | |
kubectl -n neonvm-system rollout status daemonset neonvm-runner-image-loader | |
kubectl apply -f $(rendered neonvm.yaml) | |
kubectl -n neonvm-system rollout status daemonset neonvm-device-plugin | |
kubectl apply -f $(rendered neonvm-controller.yaml) | |
kubectl -n neonvm-system rollout status deployment neonvm-controller | |
kubectl apply -f $(rendered neonvm-vxlan-controller.yaml) | |
kubectl -n neonvm-system rollout status daemonset neonvm-vxlan-controller | |
kubectl apply -f $(rendered autoscale-scheduler.yaml) | |
kubectl -n kube-system rollout status deployment autoscale-scheduler | |
kubectl apply -f $(rendered autoscaler-agent.yaml) | |
kubectl -n kube-system rollout status daemonset autoscaler-agent | |
- name: load e2e test vm image | |
env: | |
TEST_IMAGE: ${{ needs.build-test-vm.outputs.vm-postgres-16-bullseye }} | |
timeout-minutes: 2 | |
run: | | |
# Pull the docker image so we can re-tag it, because using a consistent tag inside the | |
# cluster means we can avoid dynamically editing the image used in the kuttl files. | |
docker pull "$TEST_IMAGE" | |
docker image tag "$TEST_IMAGE" "$IMG_E2E_TEST" | |
make load-example-vms | |
- run: make e2e | |
timeout-minutes: 15 | |
- name: Get k8s logs and events | |
if: always() | |
run: | | |
if ! kubectl config current-context; then | |
echo "skipping cluster logs because no cluster found in kubectl context" | |
exit 0 | |
fi | |
namespaces=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}') | |
for namespace in $namespaces; do | |
if [[ "$namespace" == "neonvm-system" ]] || [[ "$namespace" == kuttl-test-* ]]; then | |
tee_if_needed=$GITHUB_STEP_SUMMARY | |
else | |
tee_if_needed=/dev/null | |
fi | |
{ | |
echo "<details>" | |
echo "<summary>Namespace=$namespace</summary>" | |
} | tee -a $tee_if_needed | |
pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') | |
for pod in $pods; do | |
{ | |
echo "<details>" | |
echo "<summary>- Namespace=$namespace Pod=$pod Logs</summary>" | |
echo "<pre>" | |
} | tee -a $tee_if_needed | |
restarts=$( | |
kubectl get pod -n $namespace $pod -o jsonpath='{.status.containerStatuses[0].restartCount}' || echo '0' | |
) | |
{ | |
if [ "$restarts" -ne 0 ]; then | |
echo "CONTAINER RESTARTED $restarts TIME(S)" | |
echo "Previous logs:" | |
kubectl logs -n $namespace -p $pod || echo 'Error getting logs' | |
echo "Current logs:" | |
kubectl logs -n $namespace $pod || echo 'Error getting logs' | |
else | |
echo "Logs:" | |
kubectl logs -n $namespace $pod || echo 'Error getting logs' | |
fi | |
} | tee -a $tee_if_needed | |
{ | |
echo "</pre>" | |
echo "</details>" | |
} | tee -a $tee_if_needed | |
{ | |
echo "<details>" | |
echo "<summary>- Namespace=$namespace Pod=$pod Events</summary>" | |
echo "<pre>" | |
} | tee -a $tee_if_needed | |
(kubectl get events --namespace $namespace --field-selector involvedObject.name=$pod || echo 'Error getting events') | tee -a $tee_if_needed | |
{ | |
echo "</pre>" | |
echo "</pre>" | |
echo "</details>" | |
} | tee -a $tee_if_needed | |
done | |
echo "</details>" | tee -a $tee_if_needed | |
done | |
- name: Cleanup | |
if: always() | |
run: make ${{ matrix.cluster }}-destroy |