From 02f31e0dee82b5e67944a44b7be084c7a1d470d9 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 21 Aug 2024 17:18:41 +0100 Subject: [PATCH] WIP --- .github/workflows/tests.yml | 119 ++++++------------------------------ tests/devlxd-container | 3 +- tests/vm-migration | 107 ++++++++++++++++++++------------ 3 files changed, 88 insertions(+), 141 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 904efbc8a..1dc842c81 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -83,103 +83,11 @@ jobs: strategy: fail-fast: false matrix: - os: ${{ fromJSON(inputs.ubuntu-releases || '["20.04", "22.04", "24.04"]') }} - track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge", "5.21/edge", "5.0/edge", "4.0/edge"]') }} + os: ${{ fromJSON(inputs.ubuntu-releases || '["24.04"]') }} + track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge"]') }} test: - - cgroup - - cluster - - container - - container-copy - - conversion - - cpu-vm - - devlxd-vm - - devlxd-container - - docker - - efi-vars-editor-vm - - interception - - pylxd - - network-bridge-firewall - - network-ovn - - network-routed - - snapd - - storage-buckets - - storage-disks-vm - - "storage-vm dir" - - "storage-vm btrfs" - - "storage-vm ceph" - - "storage-vm lvm" - - "storage-vm lvm-thin" - - "storage-vm zfs" - - storage-volumes-vm - - tpm-vm - - vm-nesting - vm-migration - include: - - test: qemu-external-vm - track: "latest/edge" - os: "24.04" - exclude: - # not compatible with 4.0/* - - test: container-copy - track: "4.0/edge" - - test: conversion - track: "4.0/edge" - - test: cpu-vm - track: "4.0/edge" - - test: devlxd-vm - track: "4.0/edge" - - test: efi-vars-editor-vm - track: "4.0/edge" - - test: network-bridge-firewall - os: 20.04 - track: "4.0/edge" - - test: network-ovn - track: "4.0/edge" - # https://github.com/canonical/pylxd/issues/590 - - test: pylxd - track: "4.0/edge" - - test: storage-buckets - track: "4.0/edge" - - test: storage-disks-vm - track: "4.0/edge" - - test: "storage-vm dir" - track: "4.0/edge" - - test: "storage-vm btrfs" - track: "4.0/edge" - - test: "storage-vm ceph" - track: "4.0/edge" - - test: "storage-vm lvm" - track: "4.0/edge" - - test: "storage-vm lvm-thin" - track: "4.0/edge" - - test: "storage-vm zfs" - track: "4.0/edge" - - test: storage-volumes-vm - track: "4.0/edge" - - test: tpm-vm - track: "4.0/edge" - # not compatible with 5.0/* - - test: efi-vars-editor-vm # not compatible with 5.0/* - track: "5.0/edge" - # waiting for integration with microceph - - test: "storage-vm ceph" - # skip track/os combinaisons that are too far appart - - track: "4.0/edge" - os: "24.04" - - track: "5.0/edge" - os: "24.04" - - track: "5.0/edge" - os: "20.04" - - track: "5.21/edge" - os: "20.04" - - track: "latest/edge" - os: "20.04" - - track: "latest/edge" - os: "22.04" - - test: "vm-migration" - track: "4.0/edge" - - test: "vm-migration" - track: "5.0/edge" + - devlxd-container steps: - name: Performance tuning @@ -195,7 +103,7 @@ jobs: echo "force-unsafe-io" | sudo tee /etc/dpkg/dpkg.cfg.d/force-unsafe-io - name: Reclaim some space (storage tests only) - if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' }} + if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' || matrix.test == 'vm-migration' }} run: | set -eux df -h @@ -225,6 +133,16 @@ jobs: sudo rm -rf /opt/ghc df -h + - name: Reclaim some memory (VM migration tests only) + if: ${{ matrix.test == 'vm-migration' }} + run: | + set -eux + + free -mt + sudo systemctl stop dpkg-db-backup.timer e2scrub_all.timer fstrim.timer logrotate.timer man-db.timer motd-news.timer phpsessionclean.timer update-notifier-download.timer update-notifier-motd.timer + sudo systemctl stop iscsid.socket multipathd.socket + free -mt + - name: Remove docker run: | set -eux @@ -255,10 +173,6 @@ jobs: run: | set -eux - # XXX: prevent accidental usage of `images:` in CI test jobs. - # All tests should be done using officially supported images. - echo '127.0.0.1 images.lxd.canonical.com' | sudo tee /etc/hosts - TEST_SCRIPT="$(echo ${{ matrix.test }} | cut -d " " -f 1)" EXTRA_ARGS="$(echo ${{ matrix.test }} | cut -d " " -f 2- --only-delimited)" if [ "${TEST_SCRIPT}" = "cluster" ]; then @@ -266,7 +180,10 @@ jobs: src_track="$(echo "${dst_track}" | cut -d/ -f1)/stable" EXTRA_ARGS="${EXTRA_ARGS:-3} ${src_track} ${{ matrix.track }}" fi - sudo --preserve-env=PURGE_LXD,TEST_IMG ./bin/local-run "tests/${TEST_SCRIPT}" ${{ matrix.track }} ${EXTRA_ARGS:-} + sudo --preserve-env=PURGE_LXD,TEST_IMG,GITHUB_ACTIONS ./bin/local-run "tests/${TEST_SCRIPT}" ${{ matrix.track }} ${EXTRA_ARGS:-} + + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 # always update cache as we have our own logic of # cache invalidation and updates in addition to a date check diff --git a/tests/devlxd-container b/tests/devlxd-container index bb0d57afd..5aab8a24f 100755 --- a/tests/devlxd-container +++ b/tests/devlxd-container @@ -83,7 +83,8 @@ if hasNeededAPIExtension instance_ready_state; then [ "$(lxc config get c1 volatile.last_state.ready)" = "false" ] lxc start c1 - waitInstanceBooted c1 + waitInstanceReady c1 + lxc exec c1 -- systemctl start snapd # This will wait for snapd to start if it hasn't already. else echo "Skipping instance Ready state tests, not supported" fi diff --git a/tests/vm-migration b/tests/vm-migration index 7bdd40aec..c3655f7c2 100644 --- a/tests/vm-migration +++ b/tests/vm-migration @@ -12,7 +12,7 @@ lxc network create lxdbr0 lxc profile device add default eth0 nic network=lxdbr0 poolName="ctpool$$" -poolDriver=dir +poolDriver=zfs echo "==> Create storage pool using driver ${poolDriver}" lxc storage create "${poolName}" "${poolDriver}" @@ -20,8 +20,32 @@ lxc profile device add default root disk path="/" pool="${poolName}" # Create ceph node lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" ceph --vm -c limits.cpu=2 -c limits.memory=4GiB -lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block -lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk +if [ -n "${GITHUB_ACTIONS:-}" ]; then + # If the rootfs and the ephemeral part are on the same physical disk, giving the whole + # disk to microceph would wipe our rootfs. Since it is pretty rare for GitHub Action + # runners to have a single disk, we immediately bail rather than trying to gracefully + # handle it. Once snapd releases with https://github.com/snapcore/snapd/pull/13150, + # we will be able to stop worrying about that special case. + if [ "$(stat -c '%d' /)" = "$(stat -c '%d' /mnt)" ]; then + echo "FAIL: rootfs and ephemeral part on the same disk, aborting" + exit 1 + fi + + # Free-up the ephemeral disk to use it as ceph OSD. + # https://github.com/canonical/microceph/issues/288 and https://github.com/canonical/microceph/issues/289 + swapoff /mnt/swapfile + ephemeral_disk="$(findmnt --noheadings --output SOURCE --target /mnt | sed 's/[0-9]\+$//')" + umount /mnt + + lxc config device add ceph ceph-disk disk source="${ephemeral_disk}" path=/dev/sdb +else + lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block + lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk +fi + + +# Disable vGPU to save RAM +lxc config set ceph raw.qemu.conf='[device "qemu_gpu"]' lxc start ceph # Wait for snap in ceph instance. @@ -40,7 +64,7 @@ lxc exec ceph -- microceph.ceph osd crush rule create-replicated replicated defa for flag in nosnaptrim noscrub nobackfill norebalance norecover noscrub nodeep-scrub; do lxc exec ceph -- microceph.ceph osd set "${flag}" done -lxc exec ceph -- microceph disk add /dev/sdb +lxc exec ceph -- microceph disk add /dev/sdb --wipe lxc exec ceph -- microceph.ceph osd pool create cephfs_meta 32 lxc exec ceph -- microceph.ceph osd pool create cephfs_data 32 lxc exec ceph -- microceph.ceph fs new cephfs cephfs_meta cephfs_data @@ -53,22 +77,26 @@ for _ in $(seq 60); do fi done -# Launch two instances for our LXD cluster and wait for them to be ready. If the host supports `devlxd_images_vm` then -# set `security.devlxd.images=true` so that we don't have to download the image again. +# Initialise two instances for our LXD cluster. If the host supports `devlxd_images_vm` then set +# `security.devlxd.images=true` so that we don't have to download the image again. if hasNeededAPIExtension devlxd_images_vm; then - lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.memory=2GiB -c security.devlxd.images=true - lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.memory=2GiB -c security.devlxd.images=true + lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.cpu=2 -c limits.memory=4GiB -c security.devlxd.images=true + lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.cpu=2 -c limits.memory=4GiB -c security.devlxd.images=true else - lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.memory=2GiB - lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.memory=2GiB + lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.cpu=2 -c limits.memory=4GiB + lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.cpu=2 -c limits.memory=4GiB fi +# Disable vGPU to save RAM +lxc config set member1 raw.qemu.conf='[device "qemu_gpu"]' +lxc config set member2 raw.qemu.conf='[device "qemu_gpu"]' + +# Start the instances and wait for member1 to be ready. +lxc start member1 +lxc start member2 waitInstanceReady member1 -waitInstanceReady member2 # shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error. lxc exec member1 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed" -# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error. -lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed" # Install LXD in the first member. lxc exec member1 -- snap remove --purge lxd || true @@ -91,6 +119,11 @@ lxc exec member1 -- lxc config set core.https_address="${member1Address}:8443" lxc exec member1 -- lxc cluster enable member1 joinToken="$(lxc exec member1 -- lxc cluster add member2 --quiet)" +# Ensure member2 is ready. +waitInstanceReady member2 +# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error. +lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed" + # Install LXD on the second member. lxc exec member2 -- snap remove --purge lxd || true lxc exec member2 -- snap install lxd --channel="${LXD_SNAP_CHANNEL}" @@ -136,35 +169,31 @@ lxc exec member1 -- lxc storage create ceph ceph lxc exec member1 -- lxc storage volume create ceph vol1 --type=block size=500MiB # Create a VM in the cluster, on member1. -lxc exec member1 -- lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.memory=1GiB +lxc exec member1 -- lxc init images:alpine/3.20 v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.cpu=1 -c limits.memory=1GiB -c security.secureboot=false # Add vol1 as a disk device to the VM. lxc exec member1 -- lxc config device add v1 vol1-disk disk pool=ceph source=vol1 -# Start the VM. -lxc exec member1 -- lxc start v1 - -# Wait for a long time for it to boot (doubly nested VM takes a while). -while [ "$(lxc exec member1 -- lxc info v1 | awk '{if ($1 == "Processes:") print $2}')" -le 1 ]; do - sleep 30 -done - -# vol1 should be available as /dev/sdb. Format it as ext4. Then mount it and create a file. -lxc exec member1 -- lxc exec v1 -- mkfs -t ext4 /dev/sdb -lxc exec member1 -- lxc exec v1 -- mkdir /mnt/vol1 -lxc exec member1 -- lxc exec v1 -- mount -t ext4 /dev/sdb /mnt/vol1 -lxc exec member1 -- lxc exec v1 -- cp /etc/hostname /mnt/vol1/bar - -# Move the instance -lxc exec member1 -- lxc move v1 --target member2 - -# The VM is slow. So the agent isn't immediately available after the live migration. -while [ "$(lxc exec member1 -- lxc info v1 | awk '{if ($1 == "Processes:") print $2}')" -le 1 ]; do - sleep 5 -done - -# The volume should be functional, still mounted, and the file we created should still be there with the same contents. -[ "$(lxc exec member2 -- lxc exec v1 -- cat /mnt/vol1/bar)" = "v1" ] - +## Start the VM. +#lxc exec member1 -- lxc start v1 +# +## Wait for a long time for it to boot (doubly nested VM takes a while). +#lxc exec member1 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:") print $2}'"'"')" -le 1 ]; do echo "Instance v1 still not booted, waiting 60s..." && sleep 60; done' +# +## vol1 should be available as /dev/sdb. Format it as ext4. Then mount it and create a file. +#lxc exec member1 -- lxc exec v1 -- mkfs -t ext4 /dev/sdb +#lxc exec member1 -- lxc exec v1 -- mkdir /mnt/vol1 +#lxc exec member1 -- lxc exec v1 -- mount -t ext4 /dev/sdb /mnt/vol1 +#lxc exec member1 -- lxc exec v1 -- cp /etc/hostname /mnt/vol1/bar +# +## Move the instance +#lxc exec member1 -- lxc move v1 --target member2 +# +## The VM is slow. So the agent isn't immediately available after the live migration. +#lxc exec member1 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:") print $2}'"'"')" -le 1 ]; do echo "Instance v1 still not booted, waiting 60s..." && sleep 60; done' +# +## The volume should be functional, still mounted, and the file we created should still be there with the same contents. +#[ "$(lxc exec member2 -- lxc exec v1 -- cat /mnt/vol1/bar)" = "v1" ] +# # shellcheck disable=SC2034 FAIL=0 \ No newline at end of file