-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathBackupVm.sh
executable file
·373 lines (270 loc) · 11.6 KB
/
BackupVm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
#!/bin/bash
# This script performs a cold backup of a libvirt virtual machine.
#
# Version 1.09.
#
# Usage:
# ./BackupVm.sh <VM ID> <destination directory>
#
# If the virtual machine is already running, it shuts it down, and restarts it afterwards.
#
# Before using this script, consider an alternative backup strategy based on LVM or Btrfs snapshots:
# - Stop the VM
# - Take a LVM snapshot
# - Restart the VM
# - Back up from the LVM snapshot
# - Delete the LVM snapshot (do this soon for performance reasons)
#
# The script asummes that all virtual disks are using QEMU's qcow2 format.
# Disk images are copied with qemu-img, so the resulting copy will usually shrink. Therefore,
# the target filesystem needs no sparse file support.
#
# I had to change the file permissions for this script to be able to access the VM disk without
# running as root. The original permissions are normally:
#
# If the VM is not running:
# -rw------- root root YourVmDisk.qcow2
#
# If the VM is running:
# -rw------- libvirt-qemu kvm YourVmDisk.qcow2
#
# I changed the group and its permissions like this:
# sudo chgrp libvirt /var/lib/libvirt/images/YourVmDisk.qcow2
# sudo chmod g+r /var/lib/libvirt/images/YourVmDisk.qcow2
#
# This change can be problematic, because the file permissions change automatically,
# see libvirt's dynamic_ownership mode.
#
# Alternatively, this script could use "virsh vol-download", but:
# 1) Process "virsh vol-download" uses a lot of memory, as of january 2020, due to an internal
# streaming architectural issue discussed here:
# virsh vol-download uses a lot of memory
# https://www.redhat.com/archives/libvirt-users/2020-January/msg00056.html
# 2) The XML parser should extract the pool name too, for the --pool parameter.
#
# The libvirt snapshot metadata is not backed up yet. Therefore, if you restore the VM,
# you may not realise that your qcow2 file contains snapshots that are perhaps wasting disk space,
# because the Virtual Machine Manager does not show them.
#
# The script's implementation is more robust than comparable scripts from the Internet. The whole script
# runs with error detection enabled. The elapsed time is printed after shuting down the virtual machine
# and after backing up a virtual disk, which helps measure performance.
#
# This script requires tool 'xmlstarlet'.
#
# Copyright (c) 2019-2020 R. Diez - Licensed under the GNU AGPLv3
set -o errexit
set -o nounset
set -o pipefail
# set -x # Enable tracing of this script.
declare -r EXIT_CODE_ERROR=1
declare -r CONNECTION_URI="qemu:///system"
abort ()
{
echo >&2 && echo "Error in script \"$0\": $*" >&2
exit $EXIT_CODE_ERROR
}
read_uptime_as_integer ()
{
local PROC_UPTIME_CONTENTS
PROC_UPTIME_CONTENTS="$(</proc/uptime)"
local PROC_UPTIME_COMPONENTS
IFS=$' \t' read -r -a PROC_UPTIME_COMPONENTS <<< "$PROC_UPTIME_CONTENTS"
local UPTIME_AS_FLOATING_POINT=${PROC_UPTIME_COMPONENTS[0]}
# The /proc/uptime format is not exactly documented, so I am not sure whether
# there will always be a decimal part. Therefore, capture the integer part
# of a value like "123" or "123.45".
# I hope /proc/uptime never yields a value like ".12" or "12.", because
# the following code does not cope with those.
local REGEXP="^([0-9]+)(\\.[0-9]+)?\$"
if ! [[ $UPTIME_AS_FLOATING_POINT =~ $REGEXP ]]; then
abort "Error parsing this uptime value: $UPTIME_AS_FLOATING_POINT"
fi
UPTIME=${BASH_REMATCH[1]}
}
get_human_friendly_elapsed_time ()
{
local -i SECONDS="$1"
if (( SECONDS <= 59 )); then
ELAPSED_TIME_STR="$SECONDS seconds"
return
fi
local -i V="$SECONDS"
ELAPSED_TIME_STR="$(( V % 60 )) seconds"
V="$(( V / 60 ))"
ELAPSED_TIME_STR="$(( V % 60 )) minutes, $ELAPSED_TIME_STR"
V="$(( V / 60 ))"
if (( V > 0 )); then
ELAPSED_TIME_STR="$V hours, $ELAPSED_TIME_STR"
fi
printf -v ELAPSED_TIME_STR "%s (%'d seconds)" "$ELAPSED_TIME_STR" "$SECONDS"
}
verify_tool_is_installed ()
{
local TOOL_NAME="$1"
local DEBIAN_PACKAGE_NAME="$2"
command -v "$TOOL_NAME" >/dev/null 2>&1 || abort "Tool '$TOOL_NAME' is not installed. You may have to install it with your Operating System's package manager. For example, under Ubuntu/Debian the corresponding package is called \"$DEBIAN_PACKAGE_NAME\"."
}
check_if_vm_is_running ()
{
# Set the LANG to a default value, so that the status you get is "running", etc. in English,
# and not some localised/translated version.
#
# By the way, we are parsing a string meant for the user, which is not very robust.
# But there does not seem to be any other easy way to query the status from a shell script.
echo "Checking whether virtual machine \"$VM_ID\" is running..."
local CMD
printf -v CMD "env LANG=C virsh --connect %q domstate %q" "$CONNECTION_URI" "$VM_ID"
echo "$CMD"
local STATUS
STATUS="$(eval "$CMD")"
case "$STATUS" in
"running") IS_VM_RUNNING=true;;
"in shutdown") IS_VM_RUNNING=true;;
"shut off") IS_VM_RUNNING=false;;
*) abort "Unexepected status of \"$STATUS\".";;
esac
}
start_vm ()
{
echo "Starting virtual machine \"$VM_ID\"..."
local CMD
printf -v CMD "virsh --connect %q start %q" "$CONNECTION_URI" "$VM_ID"
echo "$CMD"
eval "$CMD"
}
stop_vm ()
{
echo "Shutting down virtual machine \"$VM_ID\"..."
local CMD
# Command 'shutdown' via ACPI will not work if the VM is stuck in the GRUB bootloader.
printf -v CMD "virsh --connect %q shutdown --mode acpi %q" "$CONNECTION_URI" "$VM_ID"
echo "$CMD"
eval "$CMD"
# It can take a long time to shutdown a Linux virtual machine. Sometimes systemd waits quite a long time
# for some services to stop.
local -r -i TIMEOUT=120
# I cannot believe that virsh command "shutdown" does not have a "--wait-for=seconds" option,
# or some other way to wait until a virtual machine has stopped.
# Just think of how many people have implemented the kind of waiting loop below.
local -i START_UPTIME
local -i ELAPSED_SECONDS
read_uptime_as_integer
local -r -i START_UPTIME="$UPTIME"
while true; do
check_if_vm_is_running
read_uptime_as_integer
ELAPSED_SECONDS=$(( UPTIME - START_UPTIME ))
if ! $IS_VM_RUNNING; then
break;
fi
if (( ELAPSED_SECONDS >= TIMEOUT )); then
abort "Timeout waiting for the virtual machine \"$VM_ID\" to stop."
fi
sleep 1
done
get_human_friendly_elapsed_time "$ELAPSED_SECONDS"
local -r ELAPSED_SECONDS_STR="$ELAPSED_TIME_STR"
get_human_friendly_elapsed_time "$TIMEOUT"
local -r TIMEOUT_STR="$ELAPSED_TIME_STR"
echo "Virtual machine \"$VM_ID\" has been shutdown,"
echo "time to shutdown: $ELAPSED_SECONDS_STR, $(( ELAPSED_SECONDS * 100 / TIMEOUT )) % of limit $TIMEOUT_STR."
}
backup_up_vm_disk ()
{
local -r DISK_FILENAME="$1"
echo "Backing up VM disk \"$DISK_FILENAME\"..."
read_uptime_as_integer
local -r START_UPTIME="$UPTIME"
# Most disks will be in .qcow2 format, which tend to be heavily sparse.
# Tool 'cp' normally automatically detects sparse files and can copies them efficiently. Otherwise, try adding option "--sparse=always".
# Note that some filesystem, like eCryptfs, do not support sparse files.
# Using "qemu-img convert" to copy the image has the nice side-effect that the copy is optimised (shrinks),
# no matter whether the underlying filesystem supports sparse files or not.
local -r NAME_ONLY="${DISK_FILENAME##*/}"
# This would use the 'cp' command:
# printf -v CMD "cp -- %q %q" "$DISK_FILENAME" "$DEST_DIRNAME/$NAME_ONLY"
# Option '-c' would compress the data blocks. Compression is not very good (based on zlib).
# Option '-p' shows a progress indication.
# Options -T and -t set the cache mode for source and destination respectively.
# Mode 'none' means O_DIRECT. We are bypassing the Linux "page cache" (the filesystem cache) because otherwise the whole cache would
# get flushed when copying such a huge file. This cache pollution can have a severe performance impact on the rest of the system.
# I hope the Linux cache system gets smarter some day, and this kind of manual workaround is no longer necessary.
printf -v CMD "qemu-img convert -p -t none -T none -O qcow2 -- %q %q" "$DISK_FILENAME" "$DEST_DIRNAME/$NAME_ONLY"
echo "$CMD"
eval "$CMD"
read_uptime_as_integer
get_human_friendly_elapsed_time "$(( UPTIME - START_UPTIME ))"
echo "Finished backing up the VM disk, time to backup: $ELAPSED_TIME_STR"
}
# ----------- Entry point -----------
# When developing this script, sometimes you want to skip backing up the disks,
# because it can be very slow.
declare -r SKIP_DISK_BACKUPS=false
if (( $# != 2 )); then
abort "Invalid number of command-line arguments."
fi
declare -r VM_ID="$1"
declare -r DEST_DIRNAME="$2"
# We are doing a 'cold' backup: shutdown the VM if already running, back it up, and restart the VM if was running.
# There are alternative ways to do such backups, see options --atomic and --quiesce, and also command 'virsh domfsfreeze'.
# However, cold backups have advantages:
# 1) You make sure the operating system restarts every now and then, which tends to help reliability.
# 2) You can change the VM configuration (virtual hardware), and the backups will mostly start fine.
# If you save the running state to RAM, all virtual hardware must be the same,
# which can be inconvenient in some situations.
check_if_vm_is_running
declare -r WAS_VM_RUNNING="$IS_VM_RUNNING"
if $WAS_VM_RUNNING; then
if $SKIP_DISK_BACKUPS; then
echo "If we were not skipping the disk backups, we would stop the VM at this point."
else
stop_vm
fi
fi
echo "Backing up VM configuration..."
declare -r XML_FILENAME="$DEST_DIRNAME/$VM_ID.xml"
printf -v CMD "virsh --connect %q dumpxml %q >%q" "$CONNECTION_URI" "$VM_ID" "$XML_FILENAME"
echo "$CMD"
eval "$CMD"
# This script does not copy the necessary snapshot metadata yet.
# To implement this, look at commands "virsh snapshot-dumpxml --security-info"
# and "virsh snapshot-create --redefine". You may also want to backup and restore which snapshot
# is the current one.
# The following code parses the virtual machine XML configuration file.
# Alternatively, we could parse the output of "virsh domblklist --details". For example: awk '/^[[:space:]]*file[[:space:]]+disk/ {print "imgs["$3"]="$4}'
echo "Extracting disk files..."
verify_tool_is_installed "xmlstarlet" "xmlstarlet"
# This search expression could no doubt be improved.
declare -r SEARCH_EXPRESSION="/domain/devices/disk[@device='disk']/source/@file"
printf -v CMD \
"xmlstarlet sel --template --value-of %q %q" \
"$SEARCH_EXPRESSION" \
"$XML_FILENAME"
echo "$CMD"
DISK_FILENAMES_AS_TEXT="$(eval "$CMD")"
declare -a DISK_FILENAMES
readarray -t DISK_FILENAMES <<<"$DISK_FILENAMES_AS_TEXT"
for FILENAME in "${DISK_FILENAMES[@]}"
do
if $SKIP_DISK_BACKUPS; then
echo "Skipping backup of \"$FILENAME\"."
else
# There is often a file permission problem with the VM disk files, and "qemu-img convert" apparently does not generate
# a good error message, so check beforehand if we can read the file at all.
# We can only check when the VM is stopped, because the VM disk files usually have other permissions when
# the associated VM is running. Remember that we are not stopping the VMs if SKIP_DISK_BACKUP is 'true'.
if [ ! -r "$FILENAME" ]; then
abort "Cannot read from \"$FILENAME\", check the file permissions."
fi
backup_up_vm_disk "$FILENAME"
fi
done
echo "Finished backing up VM disks."
if $WAS_VM_RUNNING; then
if $SKIP_DISK_BACKUPS; then
echo "If we were not skipping the disk backups, we would start the VM at this point."
else
start_vm
fi
fi