Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
markylaing committed Aug 22, 2024
1 parent 139525c commit a4b95cf
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 141 deletions.
117 changes: 17 additions & 100 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,103 +83,11 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ${{ fromJSON(inputs.ubuntu-releases || '["20.04", "22.04", "24.04"]') }}
track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge", "5.21/edge", "5.0/edge", "4.0/edge"]') }}
os: ${{ fromJSON(inputs.ubuntu-releases || '["24.04"]') }}
track: ${{ fromJSON(inputs.snap-tracks || '["latest/edge"]') }}
test:
- cgroup
- cluster
- container
- container-copy
- conversion
- cpu-vm
- devlxd-vm
- devlxd-container
- docker
- efi-vars-editor-vm
- interception
- pylxd
- network-bridge-firewall
- network-ovn
- network-routed
- snapd
- storage-buckets
- storage-disks-vm
- "storage-vm dir"
- "storage-vm btrfs"
- "storage-vm ceph"
- "storage-vm lvm"
- "storage-vm lvm-thin"
- "storage-vm zfs"
- storage-volumes-vm
- tpm-vm
- vm-nesting
- vm-migration
include:
- test: qemu-external-vm
track: "latest/edge"
os: "24.04"
exclude:
# not compatible with 4.0/*
- test: container-copy
track: "4.0/edge"
- test: conversion
track: "4.0/edge"
- test: cpu-vm
track: "4.0/edge"
- test: devlxd-vm
track: "4.0/edge"
- test: efi-vars-editor-vm
track: "4.0/edge"
- test: network-bridge-firewall
os: 20.04
track: "4.0/edge"
- test: network-ovn
track: "4.0/edge"
# https://github.com/canonical/pylxd/issues/590
- test: pylxd
track: "4.0/edge"
- test: storage-buckets
track: "4.0/edge"
- test: storage-disks-vm
track: "4.0/edge"
- test: "storage-vm dir"
track: "4.0/edge"
- test: "storage-vm btrfs"
track: "4.0/edge"
- test: "storage-vm ceph"
track: "4.0/edge"
- test: "storage-vm lvm"
track: "4.0/edge"
- test: "storage-vm lvm-thin"
track: "4.0/edge"
- test: "storage-vm zfs"
track: "4.0/edge"
- test: storage-volumes-vm
track: "4.0/edge"
- test: tpm-vm
track: "4.0/edge"
# not compatible with 5.0/*
- test: efi-vars-editor-vm # not compatible with 5.0/*
track: "5.0/edge"
# waiting for integration with microceph
- test: "storage-vm ceph"
# skip track/os combinaisons that are too far appart
- track: "4.0/edge"
os: "24.04"
- track: "5.0/edge"
os: "24.04"
- track: "5.0/edge"
os: "20.04"
- track: "5.21/edge"
os: "20.04"
- track: "latest/edge"
os: "20.04"
- track: "latest/edge"
os: "22.04"
- test: "vm-migration"
track: "4.0/edge"
- test: "vm-migration"
track: "5.0/edge"
- devlxd-container

steps:
- name: Performance tuning
Expand All @@ -195,7 +103,7 @@ jobs:
echo "force-unsafe-io" | sudo tee /etc/dpkg/dpkg.cfg.d/force-unsafe-io
- name: Reclaim some space (storage tests only)
if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' }}
if: ${{ startsWith(matrix.test, 'storage') || matrix.test == 'vm-nesting' || matrix.test == 'conversion' || matrix.test == 'vm-migration' }}
run: |
set -eux
df -h
Expand Down Expand Up @@ -225,6 +133,16 @@ jobs:
sudo rm -rf /opt/ghc
df -h
- name: Reclaim some memory (VM migration tests only)
if: ${{ matrix.test == 'vm-migration' }}
run: |
set -eux
free -mt
sudo systemctl stop dpkg-db-backup.timer e2scrub_all.timer fstrim.timer logrotate.timer man-db.timer motd-news.timer phpsessionclean.timer update-notifier-download.timer update-notifier-motd.timer
sudo systemctl stop iscsid.socket multipathd.socket
free -mt
- name: Remove docker
run: |
set -eux
Expand Down Expand Up @@ -255,10 +173,6 @@ jobs:
run: |
set -eux
# XXX: prevent accidental usage of `images:` in CI test jobs.
# All tests should be done using officially supported images.
echo '127.0.0.1 images.lxd.canonical.com' | sudo tee /etc/hosts
TEST_SCRIPT="$(echo ${{ matrix.test }} | cut -d " " -f 1)"
EXTRA_ARGS="$(echo ${{ matrix.test }} | cut -d " " -f 2- --only-delimited)"
if [ "${TEST_SCRIPT}" = "cluster" ]; then
Expand All @@ -268,6 +182,9 @@ jobs:
fi
sudo --preserve-env=PURGE_LXD,TEST_IMG ./bin/local-run "tests/${TEST_SCRIPT}" ${{ matrix.track }} ${EXTRA_ARGS:-}
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3

# always update cache as we have our own logic of
# cache invalidation and updates in addition to a date check
- name: Delete previous cache
Expand Down
112 changes: 71 additions & 41 deletions tests/vm-migration
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,40 @@ lxc network create lxdbr0
lxc profile device add default eth0 nic network=lxdbr0

poolName="ctpool$$"
poolDriver=dir
poolDriver=zfs

echo "==> Create storage pool using driver ${poolDriver}"
lxc storage create "${poolName}" "${poolDriver}"
lxc profile device add default root disk path="/" pool="${poolName}"

# Create ceph node
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" ceph --vm -c limits.cpu=2 -c limits.memory=4GiB
lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block
lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk
if [ -n "${GITHUB_ACTIONS:-}" ]; then
# If the rootfs and the ephemeral part are on the same physical disk, giving the whole
# disk to microceph would wipe our rootfs. Since it is pretty rare for GitHub Action
# runners to have a single disk, we immediately bail rather than trying to gracefully
# handle it. Once snapd releases with https://github.com/snapcore/snapd/pull/13150,
# we will be able to stop worrying about that special case.
if [ "$(stat -c '%d' /)" = "$(stat -c '%d' /mnt)" ]; then
echo "FAIL: rootfs and ephemeral part on the same disk, aborting"
exit 1
fi

# Free-up the ephemeral disk to use it as ceph OSD.
# https://github.com/canonical/microceph/issues/288 and https://github.com/canonical/microceph/issues/289
swapoff /mnt/swapfile
ephemeral_disk="$(findmnt --noheadings --output SOURCE --target /mnt | sed 's/[0-9]\+$//')"
umount /mnt

lxc config device add ceph ceph-disk unix-block path="${ephemeral_disk}" source=/dev/sdb
else
lxc storage volume create "${poolName}" ceph-disk size=20GiB --type=block
lxc config device add ceph ceph-disk disk pool="${poolName}" source=ceph-disk
fi


# Disable vGPU to save RAM
lxc config set ceph raw.qemu.conf='[device "qemu_gpu"]'
lxc start ceph

# Wait for snap in ceph instance.
Expand All @@ -40,7 +64,7 @@ lxc exec ceph -- microceph.ceph osd crush rule create-replicated replicated defa
for flag in nosnaptrim noscrub nobackfill norebalance norecover noscrub nodeep-scrub; do
lxc exec ceph -- microceph.ceph osd set "${flag}"
done
lxc exec ceph -- microceph disk add /dev/sdb
lxc exec ceph -- microceph disk add /dev/sdb --wipe
lxc exec ceph -- microceph.ceph osd pool create cephfs_meta 32
lxc exec ceph -- microceph.ceph osd pool create cephfs_data 32
lxc exec ceph -- microceph.ceph fs new cephfs cephfs_meta cephfs_data
Expand All @@ -53,22 +77,26 @@ for _ in $(seq 60); do
fi
done

# Launch two instances for our LXD cluster and wait for them to be ready. If the host supports `devlxd_images_vm` then
# set `security.devlxd.images=true` so that we don't have to download the image again.
# Initialise two instances for our LXD cluster. If the host supports `devlxd_images_vm` then set
# `security.devlxd.images=true` so that we don't have to download the image again.
if hasNeededAPIExtension devlxd_images_vm; then
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.memory=2GiB -c security.devlxd.images=true
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.memory=2GiB -c security.devlxd.images=true
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.cpu=2 -c limits.memory=4GiB -c security.devlxd.images=true
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.cpu=2 -c limits.memory=4GiB -c security.devlxd.images=true
else
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.memory=2GiB
lxc launch "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.memory=2GiB
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member1 --vm -c limits.cpu=2 -c limits.memory=4GiB
lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" member2 --vm -c limits.cpu=2 -c limits.memory=4GiB
fi

# Disable vGPU to save RAM
lxc config set member1 raw.qemu.conf='[device "qemu_gpu"]'
lxc config set member2 raw.qemu.conf='[device "qemu_gpu"]'

# Start the instances and wait for member1 to be ready.
lxc start member1
lxc start member2
waitInstanceReady member1
waitInstanceReady member2
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member1 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"

# Install LXD in the first member.
lxc exec member1 -- snap remove --purge lxd || true
Expand All @@ -91,6 +119,11 @@ lxc exec member1 -- lxc config set core.https_address="${member1Address}:8443"
lxc exec member1 -- lxc cluster enable member1
joinToken="$(lxc exec member1 -- lxc cluster add member2 --quiet)"

# Ensure member2 is ready.
waitInstanceReady member2
# shellcheck disable=SC3044 # Ignore "declare is undefined" shellcheck error.
lxc exec member2 -- sh -c "$(declare -f waitSnapdSeed); waitSnapdSeed"

# Install LXD on the second member.
lxc exec member2 -- snap remove --purge lxd || true
lxc exec member2 -- snap install lxd --channel="${LXD_SNAP_CHANNEL}"
Expand Down Expand Up @@ -136,35 +169,32 @@ lxc exec member1 -- lxc storage create ceph ceph
lxc exec member1 -- lxc storage volume create ceph vol1 --type=block size=500MiB

# Create a VM in the cluster, on member1.
lxc exec member1 -- lxc init "${TEST_IMG:-ubuntu-minimal-daily:24.04}" v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.memory=1GiB
lxc exec member1 -- lxc init images:alpine/3.20 v1 --vm --storage ceph --target member1 -c migration.stateful=true -c limits.cpu=1 -c limits.memory=1GiB -c security.secureboot=false

# Add vol1 as a disk device to the VM.
lxc exec member1 -- lxc config device add v1 vol1-disk disk pool=ceph source=vol1

# Start the VM.
lxc exec member1 -- lxc start v1

# Wait for a long time for it to boot (doubly nested VM takes a while).
while [ "$(lxc exec member1 -- lxc info v1 | awk '{if ($1 == "Processes:") print $2}')" -le 1 ]; do
sleep 30
done

# vol1 should be available as /dev/sdb. Format it as ext4. Then mount it and create a file.
lxc exec member1 -- lxc exec v1 -- mkfs -t ext4 /dev/sdb
lxc exec member1 -- lxc exec v1 -- mkdir /mnt/vol1
lxc exec member1 -- lxc exec v1 -- mount -t ext4 /dev/sdb /mnt/vol1
lxc exec member1 -- lxc exec v1 -- cp /etc/hostname /mnt/vol1/bar

# Move the instance
lxc exec member1 -- lxc move v1 --target member2

# The VM is slow. So the agent isn't immediately available after the live migration.
while [ "$(lxc exec member1 -- lxc info v1 | awk '{if ($1 == "Processes:") print $2}')" -le 1 ]; do
sleep 5
done

# The volume should be functional, still mounted, and the file we created should still be there with the same contents.
[ "$(lxc exec member2 -- lxc exec v1 -- cat /mnt/vol1/bar)" = "v1" ]

# shellcheck disable=SC2034
FAIL=0
FAIL=1

Check warning

Code scanning / shellcheck

SC2034 Warning test

FAIL appears unused. Verify use (or export if used externally).
## Start the VM.
#lxc exec member1 -- lxc start v1
#
## Wait for a long time for it to boot (doubly nested VM takes a while).
#lxc exec member1 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:") print $2}'"'"')" -le 1 ]; do echo "Instance v1 still not booted, waiting 60s..." && sleep 60; done'
#
## vol1 should be available as /dev/sdb. Format it as ext4. Then mount it and create a file.
#lxc exec member1 -- lxc exec v1 -- mkfs -t ext4 /dev/sdb
#lxc exec member1 -- lxc exec v1 -- mkdir /mnt/vol1
#lxc exec member1 -- lxc exec v1 -- mount -t ext4 /dev/sdb /mnt/vol1
#lxc exec member1 -- lxc exec v1 -- cp /etc/hostname /mnt/vol1/bar
#
## Move the instance
#lxc exec member1 -- lxc move v1 --target member2
#
## The VM is slow. So the agent isn't immediately available after the live migration.
#lxc exec member1 -- sh -c 'while [ "$(lxc info v1 | awk '"'"'{if ($1 == "Processes:") print $2}'"'"')" -le 1 ]; do echo "Instance v1 still not booted, waiting 60s..." && sleep 60; done'
#
## The volume should be functional, still mounted, and the file we created should still be there with the same contents.
#[ "$(lxc exec member2 -- lxc exec v1 -- cat /mnt/vol1/bar)" = "v1" ]
#
## shellcheck disable=SC2034
#FAIL=0

0 comments on commit a4b95cf

Please sign in to comment.