Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: Check that volume is functional after live migration. #256

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 17 additions & 97 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,103 +83,10 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ${{ fromJSON(inputs.ubuntu-releases || '["20.04", "22.04", "24.04"]') }}
track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge", "5.21/edge", "5.0/edge", "4.0/edge"]') }}
os: ${{ fromJSON(inputs.ubuntu-releases || '["24.04"]') }}
track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge"]') }}
test:
- cgroup
- cluster
- container
- container-copy
- conversion
- cpu-vm
- devlxd-container
- devlxd-vm
- docker
- efi-vars-editor-vm
- interception
- network-bridge-firewall
- network-ovn
- network-routed
- pylxd
- snapd
- storage-buckets
- storage-disks-vm
- "storage-vm btrfs"
- "storage-vm ceph"
- "storage-vm dir"
- "storage-vm lvm"
- "storage-vm lvm-thin"
- "storage-vm zfs"
- storage-volumes-vm
- tpm-vm
- vm-migration
- vm-nesting
include:
- test: qemu-external-vm
track: "latest/edge"
os: "24.04"
exclude:
# not compatible with 4.0/*
- test: container-copy
track: "4.0/edge"
- test: conversion
track: "4.0/edge"
- test: cpu-vm
track: "4.0/edge"
- test: devlxd-vm
track: "4.0/edge"
- test: efi-vars-editor-vm
track: "4.0/edge"
- test: network-bridge-firewall
os: 20.04
track: "4.0/edge"
- test: network-ovn
track: "4.0/edge"
# https://github.com/canonical/pylxd/issues/590
- test: pylxd
track: "4.0/edge"
- test: storage-buckets
track: "4.0/edge"
- test: storage-disks-vm
track: "4.0/edge"
- test: "storage-vm dir"
track: "4.0/edge"
- test: "storage-vm btrfs"
track: "4.0/edge"
- test: "storage-vm ceph"
track: "4.0/edge"
- test: "storage-vm lvm"
track: "4.0/edge"
- test: "storage-vm lvm-thin"
track: "4.0/edge"
- test: "storage-vm zfs"
track: "4.0/edge"
- test: storage-volumes-vm
track: "4.0/edge"
- test: tpm-vm
track: "4.0/edge"
# not compatible with 5.0/*
- test: efi-vars-editor-vm # not compatible with 5.0/*
track: "5.0/edge"
# waiting for integration with microceph
- test: "storage-vm ceph"
# skip track/os combinaisons that are too far appart
- track: "4.0/edge"
os: "24.04"
- track: "5.0/edge"
os: "24.04"
- track: "5.0/edge"
os: "20.04"
- track: "5.21/edge"
os: "20.04"
- track: "latest/edge"
os: "20.04"
- track: "latest/edge"
os: "22.04"
- test: "vm-migration"
track: "4.0/edge"
- test: "vm-migration"
track: "5.0/edge"

steps:
- name: Performance tuning
Expand All @@ -195,7 +102,7 @@ jobs:
echo "force-unsafe-io" | sudo tee /etc/dpkg/dpkg.cfg.d/force-unsafe-io

- name: Reclaim some space (storage tests only)
if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' }}
if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' || matrix.test == 'vm-migration' }}
run: |
set -eux
df -h
Expand Down Expand Up @@ -225,6 +132,16 @@ jobs:
sudo rm -rf /opt/ghc
df -h

- name: Reclaim some memory (VM migration tests only)
if: ${{ matrix.test == 'vm-migration' }}
run: |
set -eux

free -mt
sudo systemctl stop dpkg-db-backup.timer e2scrub_all.timer fstrim.timer logrotate.timer man-db.timer motd-news.timer phpsessionclean.timer update-notifier-download.timer update-notifier-motd.timer
sudo systemctl stop iscsid.socket multipathd.socket
free -mt

- name: Remove docker
run: |
set -eux
Expand Down Expand Up @@ -266,7 +183,10 @@ jobs:
src_track="$(echo "${dst_track}" | cut -d/ -f1)/stable"
EXTRA_ARGS="${EXTRA_ARGS:-3} ${src_track} ${{ matrix.track }}"
fi
sudo --preserve-env=PURGE_LXD,TEST_IMG ./bin/local-run "tests/${TEST_SCRIPT}" ${{ matrix.track }} ${EXTRA_ARGS:-}
sudo --preserve-env=PURGE_LXD,TEST_IMG,GITHUB_ACTIONS ./bin/local-run "tests/${TEST_SCRIPT}" ${{ matrix.track }} ${EXTRA_ARGS:-}

- name: Setup tmate session
uses: mxschmitt/action-tmate@v3

# always update cache as we have our own logic of
# cache invalidation and updates in addition to a date check
Expand Down
122 changes: 106 additions & 16 deletions tests/vm-migration
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,51 @@
lxc profile device add default eth0 nic network=lxdbr0

poolName="ctpool$$"
poolDriver=dir
poolDriver=zfs

echo "==> Create storage pool using driver ${poolDriver}"
lxc storage create "${poolName}" "${poolDriver}"
lxc profile device add default root disk path="/" pool="${poolName}"

LOCAL_LOOP_DEVICE=""
cleanup() {
if [ -n "${LOCAL_LOOP_DEVICE:-}" ]; then
losetup --detach "${LOCAL_LOOP_DEVICE}"
fi
}

trap cleanup EXIT HUP INT TERM

# Create ceph node
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" ceph --vm -c limits.cpu=2 -c limits.memory=4GiB
lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block
lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" ceph --vm -c limits.cpu=4 -c limits.memory=4GiB
if [ -n "${GITHUB_ACTIONS:-}" ]; then
# If the rootfs and the ephemeral part are on the same physical disk, giving the whole
# disk to microceph would wipe our rootfs. Since it is pretty rare for GitHub Action
# runners to have a single disk, we immediately bail rather than trying to gracefully
# handle it. Once snapd releases with https://github.com/snapcore/snapd/pull/13150,
# we will be able to stop worrying about that special case.
if [ "$(stat -c '%d' /)" = "$(stat -c '%d' /mnt)" ]; then
echo "FAIL: rootfs and ephemeral part on the same disk, aborting"
exit 1
fi

# Free-up the ephemeral disk to use it as ceph OSD.
# https://github.com/canonical/microceph/issues/288 and https://github.com/canonical/microceph/issues/289
swapoff /mnt/swapfile
ephemeral_disk="$(findmnt --noheadings --output SOURCE --target /mnt | sed 's/[0-9]\+$//')"
umount /mnt

# lxc config device add ceph ceph-disk unix-block source="${ephemeral_disk}" path=/dev/sdb
lxc config device add ceph ceph-disk disk source="${ephemeral_disk}" path=/dev/sdb
else
lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block
lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk
# dd if=/dev/zero of=blockfile count=20480 bs=1M # 20GiB
# LOCAL_LOOP_DEVICE="$(losetup --find)"
# losetup "${LOCAL_LOOP_DEVICE}" blockfile
# lxc config device add ceph ceph-disk unix-block source="${LOCAL_LOOP_DEVICE}" path=/dev/sdb
fi

lxc start ceph

# Wait for snap in ceph instance.
Expand All @@ -40,7 +75,7 @@
for flag in nosnaptrim noscrub nobackfill norebalance norecover noscrub nodeep-scrub; do
lxc exec ceph -- microceph.ceph osd set "${flag}"
done
lxc exec ceph -- microceph disk add /dev/sdb
lxc exec ceph -- microceph disk add /dev/sdb --wipe
lxc exec ceph -- microceph.ceph osd pool create cephfs_meta 32
lxc exec ceph -- microceph.ceph osd pool create cephfs_data 32
lxc exec ceph -- microceph.ceph fs new cephfs cephfs_meta cephfs_data
Expand All @@ -53,15 +88,39 @@
fi
done

# Launch two instances for our LXD cluster and wait for them to be ready.
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.memory=2GiB
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.memory=2GiB
cat << EOF | lxc profile create kvm
config:
limits.cpu: 4
limits.memory: 4GiB
linux.kernel_modules: kvm,vhost_net,vhost_vsock,rbd
security.devlxd.images: "true"
security.idmap.isolated: "false"
security.nesting: "true"
devices:
kvm:
source: /dev/kvm
type: unix-char
vhost-net:
source: /dev/vhost-net
type: unix-char
vhost-vsock:
source: /dev/vhost-vsock
type: unix-char
vsock:
mode: "0666"
source: /dev/vsock
type: unix-char
EOF

lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --profile default --profile kvm
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --profile default --profile kvm

# Start the instances and wait for member1 to be ready.
lxc start member1
lxc start member2
waitInstanceReady member1
waitInstanceReady member2
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member1 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"

# Install LXD in the first member.
lxc exec member1 -- snap remove --purge lxd || true
Expand All @@ -71,14 +130,24 @@
lxc file push "${LXD_SIDELOAD_PATH}" member1/var/snap/lxd/common/lxd.debug
lxc exec member1 -- systemctl restart snap.lxd.daemon
fi
if [ -n "${LXD_AGENT_SIDELOAD_PATH:-}" ]; then
lxc file push "${LXD_AGENT_SIDELOAD_PATH}" "member1/root/$(basename "${LXD_AGENT_SIDELOAD_PATH}")"
lxc exec member1 -- mount --bind "$(basename "${LXD_AGENT_SIDELOAD_PATH}")" /snap/lxd/current/bin/lxd-agent
lxc exec member1 -- systemctl restart snap.lxd.daemon
fi

# Initialise and configure LXD in the first member.
lxc exec member1 -- lxd init --auto
member1Address="$(lxc query /1.0/instances/member1?recursion=2 | jq -r ".state.network.enp5s0.addresses[0].address")"
member1Address="$(lxc query /1.0/instances/member1?recursion=2 | jq -r ".state.network.eth0.addresses[0].address")"
lxc exec member1 -- lxc config set core.https_address="${member1Address}:8443"
lxc exec member1 -- lxc cluster enable member1
joinToken="$(lxc exec member1 -- lxc cluster add member2 --quiet)"

# Ensure member2 is ready.
waitInstanceReady member2
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"

# Install LXD on the second member.
lxc exec member2 -- snap remove --purge lxd || true
lxc exec member2 -- snap install lxd --channel="${LXD_SNAP_CHANNEL}"
Expand All @@ -87,9 +156,14 @@
lxc file push "${LXD_SIDELOAD_PATH}" member2/var/snap/lxd/common/lxd.debug
lxc exec member2 -- systemctl restart snap.lxd.daemon
fi
if [ -n "${LXD_AGENT_SIDELOAD_PATH:-}" ]; then
lxc file push "${LXD_AGENT_SIDELOAD_PATH}" "member2/root/$(basename "${LXD_AGENT_SIDELOAD_PATH}")"
lxc exec member2 -- mount --bind "$(basename "${LXD_AGENT_SIDELOAD_PATH}")" /snap/lxd/current/bin/lxd-agent
lxc exec member2 -- systemctl restart snap.lxd.daemon
fi

# Create a preseed file for member2 to join member1.
member2Address="$(lxc query /1.0/instances/member2?recursion=2 | jq -r ".state.network.enp5s0.addresses[0].address")"
member2Address="$(lxc query /1.0/instances/member2?recursion=2 | jq -r ".state.network.eth0.addresses[0].address")"
preseed="$(
cat <<EOF
cluster:
Expand Down Expand Up @@ -119,15 +193,31 @@
lxc exec member1 -- lxc storage volume create ceph vol1 --type=block size=500MiB

# Create a VM in the cluster, on member1.
lxc exec member1 -- lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.memory=512MiB
lxc exec member1 -- lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.memory=2GiB

# Add vol1 as a disk device to the VM.
lxc exec member1 -- lxc config device add v1 vol1-disk disk pool=ceph source=vol1

# Start the VM.
Start the VM.
lxc exec member1 -- lxc start v1
sleep 60

# Wait for a long time for it to boot (doubly nested VM takes a while).
lxc exec member1 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:" print $2)}'"'"')" -le 1 ]; do echo "Waiting for instance to boot (retry 30s)" && sleep 30; done'

# vol1 should be available as /dev/sdb. Format it as ext4. Then mount it and create a file.
lxc exec member1 -- lxc exec v1 -- mkfs -t ext4 /dev/sdb
lxc exec member1 -- lxc exec v1 -- mkdir /mnt/vol1
lxc exec member1 -- lxc exec v1 -- mount -t ext4 /dev/sdb /mnt/vol1
lxc exec member1 -- lxc exec v1 -- cp /etc/hostname /mnt/vol1/bar

# Move the instance
lxc exec member1 -- lxc move v1 --target member2

# The VM is slow. So the agent isn't immediately available after the live migration.
lxc exec member2 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:" print $2)}'"'"')" -le 1 ]; do echo "Waiting for instance to boot (retry 5s)" && sleep 5; done'

# The volume should be functional, still mounted, and the file we created should still be there with the same contents.
[ "$(lxc exec member2 -- lxc exec v1 -- cat /mnt/vol1/bar)" = "v1" ]

# shellcheck disable=SC2034
FAIL=0
Loading